Merge pull request #10378 [BEAM-8481] Fix a race condition in proto stubs generation.
diff --git a/.gitignore b/.gitignore
index ce56f55..5732b9c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,6 +35,8 @@
**/dist/**/*
**/distribute-*/**/*
**/env/**/*
+**/.mypy_cache
+**/.dmypy.json
sdks/python/**/*.c
sdks/python/**/*.so
sdks/python/**/*.egg
@@ -88,3 +90,6 @@
# JetBrains Education files
!**/study_project.xml
**/.coursecreator/**/*
+
+.pytest_cache
+.pytest_cache/**/*
\ No newline at end of file
diff --git a/.test-infra/jenkins/CommonTestProperties.groovy b/.test-infra/jenkins/CommonTestProperties.groovy
index 0d750ee..7bf585e 100644
--- a/.test-infra/jenkins/CommonTestProperties.groovy
+++ b/.test-infra/jenkins/CommonTestProperties.groovy
@@ -21,6 +21,7 @@
class CommonTestProperties {
enum SDK {
PYTHON,
+ PYTHON_37,
JAVA
}
@@ -42,6 +43,11 @@
DATAFLOW: "TestDataflowRunner",
DIRECT: "DirectRunner",
PORTABLE: "PortableRunner"
+ ],
+ PYTHON_37: [
+ DATAFLOW: "TestDataflowRunner",
+ DIRECT: "DirectRunner",
+ PORTABLE: "PortableRunner"
]
]
@@ -51,8 +57,7 @@
this.option = option
}
-
- String getDepenedencyBySDK(SDK sdk) {
+ String getDependencyBySDK(SDK sdk) {
RUNNER_DEPENDENCY_MAP.get(sdk.toString()).get(this.toString())
}
@@ -62,4 +67,4 @@
PR,
POST_COMMIT
}
-}
\ No newline at end of file
+}
diff --git a/.test-infra/jenkins/LoadTestsBuilder.groovy b/.test-infra/jenkins/LoadTestsBuilder.groovy
index c259033..d7ed2d5 100644
--- a/.test-infra/jenkins/LoadTestsBuilder.groovy
+++ b/.test-infra/jenkins/LoadTestsBuilder.groovy
@@ -42,11 +42,8 @@
shell("echo *** ${title} ***")
gradle {
rootBuildScriptDir(commonJobProperties.checkoutDir)
- tasks(getGradleTaskName(sdk))
+ setGradleTask(delegate, runner, sdk, options, mainClass)
commonJobProperties.setGradleSwitches(delegate)
- switches("-PloadTest.mainClass=\"${mainClass}\"")
- switches("-Prunner=${runner.getDepenedencyBySDK(sdk)}")
- switches("-PloadTest.args=\"${parseOptions(options)}\"")
}
}
}
@@ -59,10 +56,21 @@
}
}
+ private static void setGradleTask(context, Runner runner, SDK sdk, Map<String, ?> options, String mainClass) {
+ context.tasks(getGradleTaskName(sdk))
+ context.switches("-PloadTest.mainClass=\"${mainClass}\"")
+ context.switches("-Prunner=${runner.getDependencyBySDK(sdk)}")
+ context.switches("-PloadTest.args=\"${parseOptions(options)}\"")
+
+ if (sdk == SDK.PYTHON_37) {
+ context.switches("-PpythonVersion=3.7")
+ }
+ }
+
private static String getGradleTaskName(SDK sdk) {
if (sdk == SDK.JAVA) {
return ':sdks:java:testing:load-tests:run'
- } else if (sdk == SDK.PYTHON) {
+ } else if (sdk == SDK.PYTHON || sdk == SDK.PYTHON_37) {
return ':sdks:python:apache_beam:testing:load_tests:run'
} else {
throw new RuntimeException("No task name defined for SDK: $SDK")
diff --git a/.test-infra/jenkins/NexmarkBuilder.groovy b/.test-infra/jenkins/NexmarkBuilder.groovy
index 32a4e13..9cdef21 100644
--- a/.test-infra/jenkins/NexmarkBuilder.groovy
+++ b/.test-infra/jenkins/NexmarkBuilder.groovy
@@ -81,7 +81,7 @@
rootBuildScriptDir(commonJobProperties.checkoutDir)
tasks(':sdks:java:testing:nexmark:run')
commonJobProperties.setGradleSwitches(delegate)
- switches("-Pnexmark.runner=${runner.getDepenedencyBySDK(sdk)}")
+ switches("-Pnexmark.runner=${runner.getDependencyBySDK(sdk)}")
switches("-Pnexmark.args=\"${parseOptions(options)}\"")
}
}
diff --git a/.test-infra/jenkins/README.md b/.test-infra/jenkins/README.md
index bd876d3..471d914 100644
--- a/.test-infra/jenkins/README.md
+++ b/.test-infra/jenkins/README.md
@@ -21,7 +21,7 @@
## Beam Jenkins
-Beam Jenkins overview page: [link](https://builds.apache.org/view/A-D/view/Beam/view)
+Beam Jenkins overview page: [link](https://builds.apache.org/view/A-D/view/Beam/view/All/)
### PreCommit Jobs
@@ -129,6 +129,7 @@
| beam_LoadTests_Java_ParDo_Dataflow_Batch | [cron](https://builds.apache.org/job/beam_LoadTests_Java_ParDo_Dataflow_Batch/), [phrase](https://builds.apache.org/job/beam_LoadTests_Java_ParDo_Dataflow_Batch_PR/) | `Run Load Tests Java ParDo Dataflow Batch` | [](https://builds.apache.org/job/beam_LoadTests_Java_ParDo_Dataflow_Batch/) |
| beam_LoadTests_Java_ParDo_Dataflow_Streaming | [cron](https://builds.apache.org/job/beam_LoadTests_Java_ParDo_Dataflow_Streaming/), [phrase](https://builds.apache.org/job/beam_LoadTests_Java_ParDo_Dataflow_Streaming_PR/) | `Run Load Tests Java ParDo Dataflow Streaming` | [](https://builds.apache.org/job/beam_LoadTests_Java_ParDo_Dataflow_Streaming/) |
| beam_LoadTests_Python_ParDo_Dataflow_Batch | [cron](https://builds.apache.org/job/beam_LoadTests_Python_ParDo_Dataflow_Batch/), [phrase](https://builds.apache.org/job/beam_LoadTests_Python_ParDo_Dataflow_Batch_PR/) | `Run Python Load Tests ParDo Dataflow Batch` | [](https://builds.apache.org/job/beam_LoadTests_Python_ParDo_Dataflow_Batch/) |
+| beam_LoadTests_Python_37_ParDo_Dataflow_Batch | [cron](https://builds.apache.org/job/beam_LoadTests_Python_37_ParDo_Dataflow_Batch/), [phrase](https://builds.apache.org/job/beam_LoadTests_Python_37_ParDo_Dataflow_Batch_PR/) | `Run Python 3.7 Load Tests ParDo Dataflow Batch` | [](https://builds.apache.org/job/beam_LoadTests_Python_37_ParDo_Dataflow_Batch/) |
| beam_LoadTests_Python_ParDo_Flink_Batch | [cron](https://builds.apache.org/job/beam_LoadTests_Python_ParDo_Flink_Batch/), [phrase](https://builds.apache.org/job/beam_LoadTests_Python_ParDo_Flink_Batch_PR/) | `Run Python Load Tests ParDo Flink Batch` | [](https://builds.apache.org/job/beam_LoadTests_Python_ParDo_Flink_Batch/) |
### Inventory Jobs
diff --git a/.test-infra/jenkins/job_CancelStaleDataflowJobs.groovy b/.test-infra/jenkins/job_CancelStaleDataflowJobs.groovy
index 20760e8..a03a1d0 100644
--- a/.test-infra/jenkins/job_CancelStaleDataflowJobs.groovy
+++ b/.test-infra/jenkins/job_CancelStaleDataflowJobs.groovy
@@ -25,7 +25,7 @@
commonJobProperties.setTopLevelMainJobProperties(delegate)
// Sets that this is a cron job, run once randomly per day.
- commonJobProperties.setCronJob(delegate, 'H H * * *')
+ commonJobProperties.setCronJob(delegate, '0 */4 * * *')
// Allows triggering this build against pull requests.
commonJobProperties.enablePhraseTriggeringFromPullRequest(
diff --git a/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy b/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy
index 6a2fa92..4bf2878 100644
--- a/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy
+++ b/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy
@@ -105,9 +105,9 @@
List<Map> testScenarios = scenarios(datasetName, pythonHarnessImageTag)
publisher.publish(':sdks:python:container:py2:docker', 'python2.7_sdk')
- publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink-job-server')
+ publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink1.9_job_server')
def flink = new Flink(scope, 'beam_LoadTests_Python_Combine_Flink_Batch')
- flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink-job-server'))
+ flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink1.9_job_server'))
defineTestSteps(scope, testScenarios, [
'Combine Python Load test: 2GB Fanout 4',
diff --git a/.test-infra/jenkins/job_LoadTests_GBK_Flink_Python.groovy b/.test-infra/jenkins/job_LoadTests_GBK_Flink_Python.groovy
index ddb570f..5277d28 100644
--- a/.test-infra/jenkins/job_LoadTests_GBK_Flink_Python.groovy
+++ b/.test-infra/jenkins/job_LoadTests_GBK_Flink_Python.groovy
@@ -172,9 +172,9 @@
List<Map> testScenarios = scenarios(datasetName, pythonHarnessImageTag)
publisher.publish(':sdks:python:container:py2:docker', 'python2.7_sdk')
- publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink-job-server')
+ publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink1.9_job_server')
def flink = new Flink(scope, 'beam_LoadTests_Python_GBK_Flink_Batch')
- flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink-job-server'))
+ flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink1.9_job_server'))
def configurations = testScenarios.findAll { it.pipelineOptions?.parallelism?.value == numberOfWorkers }
loadTestsBuilder.loadTests(scope, sdk, configurations, "GBK", "batch")
diff --git a/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy b/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy
index 7c9a7ca..c775c9e 100644
--- a/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy
+++ b/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy
@@ -129,9 +129,9 @@
List<Map> testScenarios = scenarios(datasetName, pythonHarnessImageTag)
publisher.publish(':sdks:python:container:py2:docker', 'python2.7_sdk')
- publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink-job-server')
+ publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink1.9_job_server')
Flink flink = new Flink(scope, 'beam_LoadTests_Python_ParDo_Flink_Batch')
- flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink-job-server'))
+ flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink1.9_job_server'))
loadTestsBuilder.loadTests(scope, CommonTestProperties.SDK.PYTHON, testScenarios, 'ParDo', 'batch')
}
diff --git a/.test-infra/jenkins/job_LoadTests_ParDo_Python_37.groovy b/.test-infra/jenkins/job_LoadTests_ParDo_Python_37.groovy
new file mode 100644
index 0000000..71c5d6f
--- /dev/null
+++ b/.test-infra/jenkins/job_LoadTests_ParDo_Python_37.groovy
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import CommonJobProperties as commonJobProperties
+import LoadTestsBuilder as loadTestsBuilder
+import PhraseTriggeringPostCommitBuilder
+
+def now = new Date().format("MMddHHmmss", TimeZone.getTimeZone('UTC'))
+
+def loadTestConfigurations = { datasetName -> [
+ [
+ title : 'ParDo Python Load test: 2GB 100 byte records 10 times',
+ test : 'apache_beam.testing.load_tests.pardo_test:ParDoTest.testParDo',
+ runner : CommonTestProperties.Runner.DATAFLOW,
+ pipelineOptions: [
+ job_name : 'load-tests-python37-dataflow-batch-pardo-1-' + now,
+ project : 'apache-beam-testing',
+ temp_location : 'gs://temp-storage-for-perf-tests/loadtests',
+ publish_to_big_query : true,
+ metrics_dataset : datasetName,
+ metrics_table : 'python37_dataflow_batch_pardo_1',
+ input_options : '\'{' +
+ '"num_records": 20000000,' +
+ '"key_size": 10,' +
+ '"value_size": 90}\'',
+ iterations : 10,
+ number_of_counter_operations: 0,
+ number_of_counters : 0,
+ num_workers : 5,
+ autoscaling_algorithm: 'NONE',
+ ]
+ ],
+]}
+
+def batchLoadTestJob = { scope, triggeringContext ->
+ scope.description('Runs Python 3.7 ParDo load tests on Dataflow runner in batch mode')
+ commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 120)
+
+ def datasetName = loadTestsBuilder.getBigQueryDataset('load_test', triggeringContext)
+ for (testConfiguration in loadTestConfigurations(datasetName)) {
+ loadTestsBuilder.loadTest(scope, testConfiguration.title, testConfiguration.runner, CommonTestProperties.SDK.PYTHON_37, testConfiguration.pipelineOptions, testConfiguration.test)
+ }
+}
+
+PhraseTriggeringPostCommitBuilder.postCommitJob(
+ 'beam_LoadTests_Python_37_ParDo_Dataflow_Batch',
+ 'Run Python 3.7 Load Tests ParDo Dataflow Batch',
+ 'Load Tests Python 3.7 ParDo Dataflow Batch suite',
+ this
+) {
+ batchLoadTestJob(delegate, CommonTestProperties.TriggeringContext.PR)
+}
+
+CronJobBuilder.cronJob('beam_LoadTests_Python_37_ParDo_Dataflow_Batch', 'H 13 * * *', this) {
+ batchLoadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT)
+}
diff --git a/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Python.groovy b/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Python.groovy
index 29a53c4..e8fcb24 100644
--- a/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Python.groovy
+++ b/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Python.groovy
@@ -157,9 +157,9 @@
List<Map> testScenarios = scenarios(datasetName, pythonHarnessImageTag)
publisher.publish(':sdks:python:container:py2:docker', 'python2.7_sdk')
- publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink-job-server')
+ publisher.publish(':runners:flink:1.9:job-server-container:docker', 'flink1.9_job_server')
def flink = new Flink(scope, 'beam_LoadTests_Python_CoGBK_Flink_Batch')
- flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink-job-server'))
+ flink.setUp([pythonHarnessImageTag], numberOfWorkers, publisher.getFullImageName('flink1.9_job_server'))
loadTestsBuilder.loadTests(scope, CommonTestProperties.SDK.PYTHON, testScenarios, 'CoGBK', 'batch')
}
diff --git a/.test-infra/jenkins/job_PerformanceTests_SQLIO_Java.groovy b/.test-infra/jenkins/job_PerformanceTests_SQLIO_Java.groovy
new file mode 100644
index 0000000..3de488a
--- /dev/null
+++ b/.test-infra/jenkins/job_PerformanceTests_SQLIO_Java.groovy
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import CommonJobProperties as common
+
+def jobConfigs = [
+ [
+ title : 'SQL BigQueryIO with push-down Batch Performance Test Java',
+ triggerPhrase: 'Run SQLBigQueryIO Batch Performance Test Java',
+ name : 'beam_SQLBigQueryIO_Batch_Performance_Test_Java',
+ itClass : 'org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BigQueryIOPushDownIT',
+ properties: [
+ project : 'apache-beam-testing',
+ tempLocation : 'gs://temp-storage-for-perf-tests/loadtests',
+ tempRoot : 'gs://temp-storage-for-perf-tests/loadtests',
+ metricsBigQueryDataset: 'beam_performance',
+ metricsBigQueryTable : 'sql_bqio_read_java_batch',
+ runner : "DataflowRunner",
+ maxNumWorkers : '5',
+ numWorkers : '5',
+ autoscalingAlgorithm : 'NONE',
+ ]
+ ]
+]
+
+jobConfigs.forEach { jobConfig -> createPostCommitJob(jobConfig)}
+
+private void createPostCommitJob(jobConfig) {
+ job(jobConfig.name) {
+ description(jobConfig.description)
+ common.setTopLevelMainJobProperties(delegate)
+ common.enablePhraseTriggeringFromPullRequest(delegate, jobConfig.title, jobConfig.triggerPhrase)
+ common.setAutoJob(delegate, 'H */6 * * *')
+ publishers {
+ archiveJunit('**/build/test-results/**/*.xml')
+ }
+
+ steps {
+ gradle {
+ rootBuildScriptDir(common.checkoutDir)
+ common.setGradleSwitches(delegate)
+ switches("--info")
+ switches("-DintegrationTestPipelineOptions=\'${common.joinOptionsWithNestedJsonValues(jobConfig.properties)}\'")
+ switches("-DintegrationTestRunner=dataflow")
+ tasks(":sdks:java:extensions:sql:perf-tests:integrationTest --tests ${jobConfig.itClass}")
+ }
+ }
+ }
+}
diff --git a/.test-infra/jenkins/job_PostCommit_PortableJar_Flink.groovy b/.test-infra/jenkins/job_PostCommit_PortableJar_Flink.groovy
index 80b2aa3..b526bce 100644
--- a/.test-infra/jenkins/job_PostCommit_PortableJar_Flink.groovy
+++ b/.test-infra/jenkins/job_PostCommit_PortableJar_Flink.groovy
@@ -25,7 +25,7 @@
description('Tests creation and execution of portable pipeline Jars on the Flink runner.')
// Set common parameters.
- commonJobProperties.setTopLevelMainJobProperties(delegate)
+ commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 120)
// Gradle goals for this job.
steps {
diff --git a/.test-infra/jenkins/job_PostCommit_PortableJar_Spark.groovy b/.test-infra/jenkins/job_PostCommit_PortableJar_Spark.groovy
new file mode 100644
index 0000000..2a9f34d
--- /dev/null
+++ b/.test-infra/jenkins/job_PostCommit_PortableJar_Spark.groovy
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import CommonJobProperties as commonJobProperties
+import PostcommitJobBuilder
+
+// Tests creation and execution of portable pipeline Jars on the Spark runner.
+PostcommitJobBuilder.postCommitJob('beam_PostCommit_PortableJar_Spark',
+ 'Run PortableJar_Spark PostCommit', 'Spark Portable Jar Tests', this) {
+ description('Tests creation and execution of portable pipeline Jars on the Spark runner.')
+
+ // Set common parameters.
+ commonJobProperties.setTopLevelMainJobProperties(delegate)
+
+ // Gradle goals for this job.
+ steps {
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':runners:spark:job-server:testPipelineJar')
+ commonJobProperties.setGradleSwitches(delegate)
+ }
+ }
+}
diff --git a/.test-infra/jenkins/job_PostCommit_Python_MongoDBIO_Load_Test.groovy b/.test-infra/jenkins/job_PostCommit_Python_MongoDBIO_Load_Test.groovy
new file mode 100644
index 0000000..be8265f
--- /dev/null
+++ b/.test-infra/jenkins/job_PostCommit_Python_MongoDBIO_Load_Test.groovy
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import CommonJobProperties as common
+import Kubernetes
+
+String jobName = "beam_python_mongoio_load_test"
+
+job(jobName) {
+ common.setTopLevelMainJobProperties(delegate)
+ common.setAutoJob(delegate, 'H */6 * * *')
+ common.enablePhraseTriggeringFromPullRequest(
+ delegate,
+ 'Python MongoDBIO Load Test',
+ 'Run Python MongoDBIO Load Test')
+
+ String namespace = common.getKubernetesNamespace(jobName)
+ String kubeconfigPath = common.getKubeconfigLocationForNamespace(namespace)
+ Kubernetes k8s = Kubernetes.create(delegate, kubeconfigPath, namespace)
+
+ k8s.apply(common.makePathAbsolute("src/.test-infra/kubernetes/mongodb/load-balancer/mongo.yml"))
+ String mongoHostName = "LOAD_BALANCER_IP"
+ k8s.loadBalancerIP("mongo-load-balancer-service", mongoHostName)
+
+ Map pipelineOptions = [
+ temp_location: 'gs://temp-storage-for-perf-tests/loadtests',
+ project : 'apache-beam-testing',
+ mongo_uri : "mongodb://\$${mongoHostName}:27017",
+ num_documents: '1000000',
+ batch_size : '10000',
+ runner : 'DataflowRunner',
+ num_workers : '5'
+ ]
+
+ steps {
+ gradle {
+ rootBuildScriptDir(common.checkoutDir)
+ common.setGradleSwitches(delegate)
+ switches("-Popts=\'${common.mapToArgString(pipelineOptions)}\'")
+ tasks(":sdks:python:test-suites:dataflow:py35:mongodbioIT")
+ }
+ }
+}
diff --git a/.test-infra/jenkins/job_PreCommit_Python.groovy b/.test-infra/jenkins/job_PreCommit_Python.groovy
index 0fe418a..63914a0 100644
--- a/.test-infra/jenkins/job_PreCommit_Python.groovy
+++ b/.test-infra/jenkins/job_PreCommit_Python.groovy
@@ -30,23 +30,6 @@
]
)
builder.build {
- // Publish all test results to Jenkins. Note that Nose documentation
- // specifically mentions that it produces JUnit compatible test results.
- publishers {
- archiveJunit('**/nosetests*.xml')
- }
-}
-
-// Temporary job for testing pytest-based testing.
-// TODO(BEAM-3713): Remove this job once nose tests are replaced.
-PrecommitJobBuilder builderPytest = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'Python_pytest',
- gradleTask: ':pythonPreCommitPytest',
- commitTriggering: false,
- timeoutMins: 180,
-)
-builderPytest.build {
// Publish all test results to Jenkins.
publishers {
archiveJunit('**/pytest*.xml')
diff --git a/build.gradle b/build.gradle
index 1b633f8..b1702fa 100644
--- a/build.gradle
+++ b/build.gradle
@@ -214,14 +214,6 @@
// have caught. Note that the same tests will still run in postcommit.
}
-// TODO(BEAM-3713): Temporary task for testing pytest.
-task pythonPreCommitPytest() {
- dependsOn ":sdks:python:test-suites:tox:py2:preCommitPy2Pytest"
- dependsOn ":sdks:python:test-suites:tox:py35:preCommitPy35Pytest"
- dependsOn ":sdks:python:test-suites:tox:py36:preCommitPy36Pytest"
- dependsOn ":sdks:python:test-suites:tox:py37:preCommitPy37Pytest"
-}
-
task pythonLintPreCommit() {
dependsOn ":sdks:python:test-suites:tox:py2:lint"
dependsOn ":sdks:python:test-suites:tox:py37:lint"
@@ -326,7 +318,14 @@
dependsOn project.getTasksByName('publishMavenJavaPublicationToMavenLocal', true /* recursively */)
classpath = project.configurations.linkageCheckerJava
main = 'com.google.cloud.tools.opensource.classpath.LinkageCheckerMain'
- args '-a', project.javaLinkageArtifactIds.split(',').collect({"${project.ext.mavenGroupId}:${it}:${project.version}"}).join(',')
+ args '-a', project.javaLinkageArtifactIds.split(',').collect({
+ if (it.contains(':')) {
+ "${project.ext.mavenGroupId}:${it}"
+ } else {
+ // specify the version if not provided
+ "${project.ext.mavenGroupId}:${it}:${project.version}"
+ }
+ }).join(',')
doLast {
println "NOTE: This task published artifacts into your local Maven repository. You may want to remove them manually."
}
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index c4467cd..7e1cf0f 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -362,17 +362,18 @@
def apex_malhar_version = "3.4.0"
def aws_java_sdk_version = "1.11.519"
def aws_java_sdk2_version = "2.5.71"
- def cassandra_driver_version = "3.6.0"
+ def cassandra_driver_version = "3.8.0"
+ def classgraph_version = "4.8.56"
def generated_grpc_beta_version = "0.44.0"
- def generated_grpc_ga_version = "1.43.0"
+ def generated_grpc_ga_version = "1.83.0"
def generated_grpc_dc_beta_version = "0.27.0-alpha"
- def google_auth_version = "0.12.0"
+ def google_auth_version = "0.19.0"
def google_clients_version = "1.28.0"
def google_cloud_bigdataoss_version = "1.9.16"
def google_cloud_core_version = "1.61.0"
def google_cloud_spanner_version = "1.6.0"
def grpc_version = "1.17.1"
- def guava_version = "20.0"
+ def guava_version = "25.1-jre"
def hadoop_version = "2.8.5"
def hamcrest_version = "2.1"
def jackson_version = "2.9.10"
@@ -382,8 +383,8 @@
def netty_version = "4.1.30.Final"
def postgres_version = "42.2.2"
def powermock_version = "2.0.2"
- def proto_google_common_protos_version = "1.12.0"
- def protobuf_version = "3.6.0"
+ def proto_google_common_protos_version = "1.17.0"
+ def protobuf_version = "3.11.1"
def quickcheck_version = "0.8"
def spark_version = "2.4.4"
def spark_structured_streaming_version = "2.4.0"
@@ -426,24 +427,24 @@
bigdataoss_util : "com.google.cloud.bigdataoss:util:$google_cloud_bigdataoss_version",
cassandra_driver_core : "com.datastax.cassandra:cassandra-driver-core:$cassandra_driver_version",
cassandra_driver_mapping : "com.datastax.cassandra:cassandra-driver-mapping:$cassandra_driver_version",
- commons_codec : "commons-codec:commons-codec:1.10",
+ classgraph : "io.github.classgraph:classgraph:$classgraph_version",
+ commons_codec : "commons-codec:commons-codec:1.14",
commons_compress : "org.apache.commons:commons-compress:1.19",
- commons_csv : "org.apache.commons:commons-csv:1.4",
- commons_io_1x : "commons-io:commons-io:1.3.2",
- commons_io_2x : "commons-io:commons-io:2.5",
- commons_lang3 : "org.apache.commons:commons-lang3:3.6",
+ commons_csv : "org.apache.commons:commons-csv:1.7",
+ commons_io : "commons-io:commons-io:2.6",
+ commons_lang3 : "org.apache.commons:commons-lang3:3.9",
commons_math3 : "org.apache.commons:commons-math3:3.6.1",
error_prone_annotations : "com.google.errorprone:error_prone_annotations:2.0.15",
gax_grpc : "com.google.api:gax-grpc:1.38.0",
google_api_client : "com.google.api-client:google-api-client:$google_clients_version",
google_api_client_jackson2 : "com.google.api-client:google-api-client-jackson2:$google_clients_version",
google_api_client_java6 : "com.google.api-client:google-api-client-java6:$google_clients_version",
- google_api_common : "com.google.api:api-common:1.7.0",
+ google_api_common : "com.google.api:api-common:1.8.1",
google_api_services_bigquery : "com.google.apis:google-api-services-bigquery:v2-rev20181221-$google_clients_version",
google_api_services_clouddebugger : "com.google.apis:google-api-services-clouddebugger:v2-rev20181114-$google_clients_version",
google_api_services_cloudresourcemanager : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20181015-$google_clients_version",
google_api_services_dataflow : "com.google.apis:google-api-services-dataflow:v1b3-rev20190927-$google_clients_version",
- google_api_services_pubsub : "com.google.apis:google-api-services-pubsub:v1-rev20181213-$google_clients_version",
+ google_api_services_pubsub : "com.google.apis:google-api-services-pubsub:v1-rev20191111-$google_clients_version",
google_api_services_storage : "com.google.apis:google-api-services-storage:v1-rev20181109-$google_clients_version",
google_auth_library_credentials : "com.google.auth:google-auth-library-credentials:$google_auth_version",
google_auth_library_oauth2_http : "com.google.auth:google-auth-library-oauth2-http:$google_auth_version",
@@ -453,7 +454,7 @@
google_cloud_core : "com.google.cloud:google-cloud-core:$google_cloud_core_version",
google_cloud_core_grpc : "com.google.cloud:google-cloud-core-grpc:$google_cloud_core_version",
google_cloud_dataflow_java_proto_library_all: "com.google.cloud.dataflow:google-cloud-dataflow-java-proto-library-all:0.5.160304",
- google_cloud_datastore_v1_proto_client : "com.google.cloud.datastore:datastore-v1-proto-client:1.6.0",
+ google_cloud_datastore_v1_proto_client : "com.google.cloud.datastore:datastore-v1-proto-client:1.6.3",
google_cloud_spanner : "com.google.cloud:google-cloud-spanner:$google_cloud_spanner_version",
google_http_client : "com.google.http-client:google-http-client:$google_clients_version",
google_http_client_jackson : "com.google.http-client:google-http-client-jackson:$google_clients_version",
@@ -490,6 +491,8 @@
jackson_module_scala : "com.fasterxml.jackson.module:jackson-module-scala_2.11:$jackson_version",
jaxb_api : "javax.xml.bind:jaxb-api:$jaxb_api_version",
joda_time : "joda-time:joda-time:2.10.3",
+ jsonassert : "org.skyscreamer:jsonassert:1.5.0",
+ jsr305 : "com.google.code.findbugs:jsr305:3.0.2",
junit : "junit:junit:4.13-beta-3",
kafka : "org.apache.kafka:kafka_2.11:$kafka_version",
kafka_clients : "org.apache.kafka:kafka-clients:$kafka_version",
@@ -504,7 +507,7 @@
powermock_mockito : "org.powermock:powermock-api-mockito2:$powermock_version",
protobuf_java : "com.google.protobuf:protobuf-java:$protobuf_version",
protobuf_java_util : "com.google.protobuf:protobuf-java-util:$protobuf_version",
- proto_google_cloud_bigquery_storage_v1beta1 : "com.google.api.grpc:proto-google-cloud-bigquerystorage-v1beta1:$generated_grpc_beta_version",
+ proto_google_cloud_bigquery_storage_v1beta1 : "com.google.api.grpc:proto-google-cloud-bigquerystorage-v1beta1:0.83.0",
proto_google_cloud_bigtable_v2 : "com.google.api.grpc:proto-google-cloud-bigtable-v2:$generated_grpc_beta_version",
proto_google_cloud_datacatalog_v1beta1 : "com.google.api.grpc:proto-google-cloud-datacatalog-v1beta1:$generated_grpc_dc_beta_version",
proto_google_cloud_datastore_v1 : "com.google.api.grpc:proto-google-cloud-datastore-v1:$generated_grpc_beta_version",
@@ -738,6 +741,10 @@
// spotbugs-annotations artifact is licensed under LGPL and cannot be included in the
// Apache Beam distribution, but may be relied on during build.
// See: https://www.apache.org/legal/resolved.html#prohibited
+ // Special case for jsr305 (a transitive dependency of spotbugs-annotations):
+ // sdks/java/core's FieldValueTypeInformation needs javax.annotations.Nullable at runtime.
+ // Therefore, the java core module declares jsr305 dependency (BSD license) as "compile".
+ // https://github.com/findbugsproject/findbugs/blob/master/findbugs/licenses/LICENSE-jsr305.txt
"com.github.spotbugs:spotbugs-annotations:3.1.12",
"net.jcip:jcip-annotations:1.0",
]
@@ -948,7 +955,7 @@
FileTree exposedClasses = project.zipTree(it).matching {
include "**/*.class"
// BEAM-5919: Exclude paths for Java 9 multi-release jars.
- exclude "META-INF/versions/*/module-info.class"
+ exclude "**/module-info.class"
configuration.shadowJarValidationExcludes.each {
exclude "$it"
exclude "META-INF/versions/*/$it"
@@ -1489,7 +1496,7 @@
archivesBaseName: configuration.archivesBaseName,
automaticModuleName: configuration.automaticModuleName,
shadowJarValidationExcludes: it.shadowJarValidationExcludes,
- shadowClosure: GrpcVendoring.shadowClosure() << {
+ shadowClosure: GrpcVendoring_1_21_0.shadowClosure() << {
// We perform all the code relocations but don't include
// any of the actual dependencies since they will be supplied
// by org.apache.beam:beam-vendor-grpc-v1p21p0:0.1
@@ -1530,7 +1537,7 @@
}
}
- project.dependencies GrpcVendoring.dependenciesClosure() << { shadow project.ext.library.java.vendored_grpc_1_21_0 }
+ project.dependencies GrpcVendoring_1_21_0.dependenciesClosure() << { shadow project.ext.library.java.vendored_grpc_1_21_0 }
}
/** ***********************************************************************************************/
@@ -1773,7 +1780,7 @@
project.exec { commandLine virtualenvCmd }
project.exec {
executable 'sh'
- args '-c', ". ${project.ext.envdir}/bin/activate && pip install --retries 10 --upgrade tox==3.11.1 grpcio-tools==1.3.5"
+ args '-c', ". ${project.ext.envdir}/bin/activate && pip install --retries 10 --upgrade tox==3.11.1 -r ${project.rootDir}/sdks/python/build-requirements.txt"
}
}
// Gradle will delete outputs whenever it thinks they are stale. Putting a
@@ -1827,6 +1834,9 @@
}
}
project.clean.dependsOn project.cleanPython
+ // Force this subproject's clean to run before the main :clean, to avoid
+ // racing on deletes.
+ project.rootProject.clean.dependsOn project.clean
// Return a joined String from a Map that contains all commandline args of
// IT test.
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_21_0.groovy
similarity index 99%
rename from buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring.groovy
rename to buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_21_0.groovy
index 96c6bf8..3c34a6d 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_21_0.groovy
@@ -23,7 +23,7 @@
/**
* Utilities for working with our vendored version of gRPC.
*/
-class GrpcVendoring {
+class GrpcVendoring_1_21_0 {
/** Returns the list of compile time dependencies. */
static List<String> dependencies() {
return [
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_26_0.groovy
similarity index 68%
copy from buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring.groovy
copy to buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_26_0.groovy
index 96c6bf8..8c70aa2 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_26_0.groovy
@@ -23,28 +23,41 @@
/**
* Utilities for working with our vendored version of gRPC.
*/
-class GrpcVendoring {
+class GrpcVendoring_1_26_0 {
/** Returns the list of compile time dependencies. */
static List<String> dependencies() {
return [
'com.google.guava:guava:26.0-jre',
- 'com.google.protobuf:protobuf-java:3.7.1',
- 'com.google.protobuf:protobuf-java-util:3.7.1',
- 'com.google.code.gson:gson:2.7',
- 'io.grpc:grpc-auth:1.21.0',
- 'io.grpc:grpc-core:1.21.0',
- 'io.grpc:grpc-context:1.21.0',
- 'io.grpc:grpc-netty:1.21.0',
- 'io.grpc:grpc-protobuf:1.21.0',
- 'io.grpc:grpc-stub:1.21.0',
- 'io.netty:netty-transport-native-epoll:4.1.34.Final',
+ 'com.google.protobuf:protobuf-java:3.11.0',
+ 'com.google.protobuf:protobuf-java-util:3.11.0',
+ 'com.google.code.gson:gson:2.8.6',
+ 'io.grpc:grpc-auth:1.26.0',
+ 'io.grpc:grpc-core:1.26.0',
+ 'io.grpc:grpc-context:1.26.0',
+ 'io.grpc:grpc-netty:1.26.0',
+ 'io.grpc:grpc-protobuf:1.26.0',
+ 'io.grpc:grpc-stub:1.26.0',
+ 'io.netty:netty-transport-native-epoll:4.1.42.Final',
// tcnative version from https://github.com/grpc/grpc-java/blob/master/SECURITY.md#netty
- 'io.netty:netty-tcnative-boringssl-static:2.0.22.Final',
- 'com.google.auth:google-auth-library-credentials:0.13.0',
- 'io.grpc:grpc-testing:1.21.0',
+ 'io.netty:netty-tcnative-boringssl-static:2.0.26.Final',
+ 'com.google.auth:google-auth-library-credentials:0.18.0',
+ 'io.grpc:grpc-testing:1.26.0',
'com.google.api.grpc:proto-google-common-protos:1.12.0',
- 'io.opencensus:opencensus-api:0.21.0',
- 'io.opencensus:opencensus-contrib-grpc-metrics:0.21.0',
+ 'io.opencensus:opencensus-api:0.24.0',
+ 'io.opencensus:opencensus-contrib-grpc-metrics:0.24.0',
+ 'io.perfmark:perfmark-api:0.19.0',
+ 'com.github.jponge:lzma-java:1.3',
+ 'com.google.protobuf.nano:protobuf-javanano:3.0.0-alpha-5',
+ 'com.jcraft:jzlib:1.1.3',
+ 'com.ning:compress-lzf:1.0.3',
+ 'net.jpountz.lz4:lz4:1.3.0',
+ 'org.bouncycastle:bcpkix-jdk15on:1.54',
+ 'org.bouncycastle:bcprov-jdk15on:1.54',
+ 'org.conscrypt:conscrypt-openjdk-uber:1.3.0',
+ 'org.eclipse.jetty.alpn:alpn-api:1.1.2.v20150522',
+ 'org.eclipse.jetty.npn:npn-api:1.1.1.v20141010',
+ 'org.jboss.marshalling:jboss-marshalling:1.4.11.Final',
+ 'org.jboss.modules:jboss-modules:1.1.0.Beta1'
]
}
@@ -54,7 +67,19 @@
*/
static List<String> runtimeDependencies() {
return [
- 'com.google.errorprone:error_prone_annotations:2.3.2',
+ 'com.google.errorprone:error_prone_annotations:2.3.3',
+ 'commons-logging:commons-logging:1.2',
+ 'org.apache.logging.log4j:log4j-api:2.6.2',
+ 'org.slf4j:slf4j-api:1.7.21'
+ ]
+ }
+
+ /**
+ * Returns the list of test dependencies.
+ */
+ static List<String> testDependencies() {
+ return [
+ 'junit:junit:4.12',
]
}
@@ -73,7 +98,7 @@
// those libraries may provide. The 'validateShadedJarDoesntLeakNonOrgApacheBeamClasses'
// ensures that there are no classes outside of the 'org.apache.beam' namespace.
- String version = "v1p21p0";
+ String version = "v1p26p0";
String prefix = "org.apache.beam.vendor.grpc.${version}";
List<String> packagesToRelocate = [
// guava uses the com.google.common and com.google.thirdparty package namespaces
@@ -81,7 +106,6 @@
"com.google.thirdparty",
"com.google.protobuf",
"com.google.gson",
- "io.grpc",
"com.google.auth",
"com.google.api",
"com.google.cloud",
@@ -89,8 +113,23 @@
"com.google.longrunning",
"com.google.rpc",
"com.google.type",
+ "io.grpc",
+ "io.netty",
"io.opencensus",
- "io.netty"
+ "io.perfmark",
+ "com.google.protobuf.nano",
+ "com.jcraft",
+ "com.ning",
+ "com.sun",
+ "lzma",
+ "net.jpountz",
+ "org.bouncycastle",
+ "org.cservenak.streams",
+ "org.conscrypt",
+ "org.eclipse.jetty.alpn",
+ "org.eclipse.jetty.npn",
+ "org.jboss.marshalling",
+ "org.jboss.modules"
]
return packagesToRelocate.collectEntries {
@@ -108,19 +147,25 @@
/** Returns the list of shading exclusions. */
static List<String> exclusions() {
return [
- // Don't include android annotations, errorprone, checkerframework, JDK8 annotations, objenesis, junit, and mockito in the vendored jar
+ // Don't include android annotations, errorprone, checkerframework, JDK8 annotations, objenesis, junit,
+ // commons-logging, log4j, slf4j and mockito in the vendored jar
"android/annotation/**/",
"com/google/errorprone/**",
"com/google/instrumentation/**",
"com/google/j2objc/annotations/**",
"javax/annotation/**",
"junit/**",
+ "org/apache/commons/logging/**",
+ "org/apache/log/**",
+ "org/apache/log4j/**",
+ "org/apache/logging/log4j/**",
"org/checkerframework/**",
"org/codehaus/mojo/animal_sniffer/**",
"org/hamcrest/**",
"org/junit/**",
"org/mockito/**",
"org/objenesis/**",
+ "org/slf4j/**",
]
}
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/VendorJavaPlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/VendorJavaPlugin.groovy
index 24ca6e1..77c3019 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/VendorJavaPlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/VendorJavaPlugin.groovy
@@ -21,6 +21,7 @@
import org.gradle.api.GradleException
import org.gradle.api.Plugin
import org.gradle.api.Project
+import org.gradle.api.artifacts.ProjectDependency
import org.gradle.api.file.FileTree
import org.gradle.api.publish.maven.MavenPublication
@@ -51,6 +52,7 @@
static class VendorJavaPluginConfig {
List<String> dependencies
List<String> runtimeDependencies
+ List<String> testDependencies
Map<String, String> relocations
List<String> exclusions
String groupId
@@ -96,7 +98,8 @@
project.dependencies {
config.dependencies.each { compile it }
- config.runtimeDependencies.each { runtime it }
+ config.runtimeDependencies.each { runtimeOnly it }
+ config.testDependencies.each { compileOnly it}
}
// Create a task which emulates the maven-archiver plugin in generating a
@@ -132,7 +135,7 @@
include "**/*.class"
exclude "org/apache/beam/vendor/**"
// BEAM-5919: Exclude paths for Java 9 multi-release jars.
- exclude "META-INF/versions/*/module-info.class"
+ exclude "**/module-info.class"
exclude "META-INF/versions/*/org/apache/beam/vendor/**"
}
if (exposedClasses.files) {
@@ -281,6 +284,55 @@
}
pom.withXml {
+ def root = asNode()
+ def dependenciesNode = root.appendNode('dependencies')
+ def generateDependenciesFromConfiguration = { param ->
+ project.configurations."${param.configuration}".allDependencies.each {
+ def dependencyNode = dependenciesNode.appendNode('dependency')
+ def appendClassifier = { dep ->
+ dep.artifacts.each { art ->
+ if (art.hasProperty('classifier')) {
+ dependencyNode.appendNode('classifier', art.classifier)
+ }
+ }
+ }
+
+ if (it instanceof ProjectDependency) {
+ dependencyNode.appendNode('groupId', it.getDependencyProject().mavenGroupId)
+ dependencyNode.appendNode('artifactId', it.getDependencyProject().archivesBaseName)
+ dependencyNode.appendNode('version', it.version)
+ dependencyNode.appendNode('scope', param.scope)
+ appendClassifier(it)
+ } else {
+ dependencyNode.appendNode('groupId', it.group)
+ dependencyNode.appendNode('artifactId', it.name)
+ dependencyNode.appendNode('version', it.version)
+ dependencyNode.appendNode('scope', param.scope)
+ appendClassifier(it)
+ }
+
+ // Start with any exclusions that were added via configuration exclude rules.
+ // Then add all the exclusions that are specific to the dependency (if any
+ // were declared). Finally build the node that represents all exclusions.
+ def exclusions = []
+ exclusions += project.configurations."${param.configuration}".excludeRules
+ if (it.hasProperty('excludeRules')) {
+ exclusions += it.excludeRules
+ }
+ if (!exclusions.empty) {
+ def exclusionsNode = dependencyNode.appendNode('exclusions')
+ exclusions.each { exclude ->
+ def exclusionNode = exclusionsNode.appendNode('exclusion')
+ exclusionNode.appendNode('groupId', exclude.group)
+ exclusionNode.appendNode('artifactId', exclude.module)
+ }
+ }
+ }
+ }
+
+ generateDependenciesFromConfiguration(configuration: 'runtimeOnly', scope: 'runtime')
+ generateDependenciesFromConfiguration(configuration: 'compileOnly', scope: 'provided')
+
// NB: This must come after asNode() logic, as it seems asNode()
// removes XML comments.
// TODO: Load this from file?
diff --git a/model/fn-execution/src/main/proto/beam_fn_api.proto b/model/fn-execution/src/main/proto/beam_fn_api.proto
index c868bab..17440e6 100644
--- a/model/fn-execution/src/main/proto/beam_fn_api.proto
+++ b/model/fn-execution/src/main/proto/beam_fn_api.proto
@@ -207,18 +207,13 @@
// Either an absolute timestamp or a relative timestamp can represent a
// scheduled execution time.
message DelayedBundleApplication {
- // Recommended time at which the application should be scheduled to execute
- // by the runner. Times in the past may be scheduled to execute immediately.
- // TODO(BEAM-8536): Migrate usage of absolute time to requested_time_delay.
- google.protobuf.Timestamp requested_execution_time = 1;
-
// (Required) The application that should be scheduled.
- BundleApplication application = 2;
+ BundleApplication application = 1;
// Recommended time delay at which the application should be scheduled to
// execute by the runner. Time delay that equals 0 may be scheduled to execute
// immediately. The unit of time delay should be microsecond.
- google.protobuf.Duration requested_time_delay = 3;
+ google.protobuf.Duration requested_time_delay = 2;
}
// A request to process a given bundle.
@@ -431,7 +426,7 @@
//
// Set to 0 to "checkpoint" as soon as possible (keeping as little work as
// possible and returning the remainder).
- float fraction_of_remainder = 1;
+ double fraction_of_remainder = 1;
// A set of allowed element indices where the SDK may split. When this is
// empty, there are no constraints on where to split.
@@ -486,12 +481,12 @@
// The last element of the input channel that should be entirely considered
// part of the primary, identified by its absolute index in the (ordered)
// channel.
- int32 last_primary_element = 2;
+ int64 last_primary_element = 2;
// The first element of the input channel that should be entirely considered
// part of the residual, identified by its absolute index in the (ordered)
// channel.
- int32 first_residual_element = 3;
+ int64 first_residual_element = 3;
}
// Partitions of input data channels into primary and residual elements,
@@ -894,3 +889,32 @@
// Stop the SDK worker.
rpc StopWorker (StopWorkerRequest) returns (StopWorkerResponse) {}
}
+
+// Request from runner to SDK Harness asking for its status. For more details see
+// https://s.apache.org/beam-fn-api-harness-status
+message WorkerStatusRequest {
+ // (Required) Unique ID identifying this request.
+ string id = 1;
+}
+
+// Response from SDK Harness to runner containing the debug related status info.
+message WorkerStatusResponse {
+ // (Required) Unique ID from the original request.
+ string id = 1;
+
+ // (Optional) Error message if exception encountered generating the status response.
+ string error = 2;
+
+ // (Optional) Status debugging info reported by SDK harness worker. Content and
+ // format is not strongly enforced but should be print-friendly and
+ // appropriate as an HTTP response body for end user. For details of the preferred
+ // info to include in the message see
+ // https://s.apache.org/beam-fn-api-harness-status
+ string status_info = 3;
+}
+
+// API for SDKs to report debug-related statuses to runner during pipeline execution.
+service BeamFnWorkerStatus {
+ rpc WorkerStatus (stream WorkerStatusResponse)
+ returns (stream WorkerStatusRequest) {}
+}
diff --git a/model/fn-execution/src/main/proto/beam_provision_api.proto b/model/fn-execution/src/main/proto/beam_provision_api.proto
index 442e626..e6b4ed8 100644
--- a/model/fn-execution/src/main/proto/beam_provision_api.proto
+++ b/model/fn-execution/src/main/proto/beam_provision_api.proto
@@ -29,6 +29,7 @@
option java_package = "org.apache.beam.model.fnexecution.v1";
option java_outer_classname = "ProvisionApi";
+import "endpoints.proto";
import "google/protobuf/struct.proto";
// A service to provide runtime provisioning information to the SDK harness
@@ -71,6 +72,12 @@
// (required) The artifact retrieval token produced by
// ArtifactStagingService.CommitManifestResponse.
string retrieval_token = 6;
+
+ // (optional) The endpoint that the runner is hosting for the SDK to submit
+ // status reports to during pipeline execution. This field will only be
+ // populated if the runner supports SDK status reports. For more details see
+ // https://s.apache.org/beam-fn-api-harness-status
+ org.apache.beam.model.pipeline.v1.ApiServiceDescriptor status_endpoint = 7;
}
// Resources specify limits for local resources, such memory and cpu. It
diff --git a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
index 9de15ac..c9a4288 100644
--- a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
+++ b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
@@ -263,6 +263,44 @@
---
+# ParamWindowedValueCoder with constant value of:
+# timestamp: Instant.ofEpochMilli(1000)
+# windows: [IntervalWindow(10, 20)]
+# pane info: PaneInfo(false, true, PaneInfo.Timing.ON_TIME, 30, 40)
+coder:
+ urn: "beam:coder:param_windowed_value:v1"
+ payload: "\x80\x00\x00\x00\x00\x00\x03è\x00\x00\x00\x01\x80\x00\x00\x00\x00\x00\x00\x14\n&\x1E(\x00"
+ components: [{urn: "beam:coder:varint:v1"},
+ {urn: "beam:coder:interval_window:v1"}]
+examples:
+ "\u0002": {
+ value: 2,
+ timestamp: 1000,
+ pane: {is_first: False, is_last: True, timing: ON_TIME, index: 30, on_time_index: 40},
+ windows: [{end: 20, span: 10}]
+ }
+
+---
+
+# ParamWindowedValueCoder with constant value of:
+# timestamp: BoundedWindow.TIMESTAMP_MIN_VALUE
+# windows: [GlobalWindow.INSTANCE]
+# pane info: PaneInfo.NO_FIRING
+coder:
+ urn: "beam:coder:param_windowed_value:v1"
+ payload: "\x7Fß;dZ\x1C¬\t\x00\x00\x00\x01\x0F\x00"
+ components: [{urn: "beam:coder:varint:v1"},
+ {urn: "beam:coder:global_window:v1"}]
+examples:
+ "\u0002": {
+ value: 2,
+ timestamp: -9223372036854775,
+ pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
+ windows: ["global"]
+ }
+
+---
+
coder:
urn: "beam:coder:double:v1"
examples:
diff --git a/model/pipeline/src/main/proto/beam_runner_api.proto b/model/pipeline/src/main/proto/beam_runner_api.proto
index 6143930..df5d59d 100644
--- a/model/pipeline/src/main/proto/beam_runner_api.proto
+++ b/model/pipeline/src/main/proto/beam_runner_api.proto
@@ -164,6 +164,11 @@
// there is none, or it is not relevant (such as use by the Fn API)
// then it may be omitted.
DisplayData display_data = 6;
+
+ // (Optional) Environment where the current PTransform should be executed in.
+ // Runner that executes the pipeline may choose to override this if needed. If
+ // not specified, environment will be decided by the runner.
+ string environment_id = 7;
}
message StandardPTransforms {
@@ -364,8 +369,8 @@
// The payload for the primitive ParDo transform.
message ParDoPayload {
- // (Required) The SdkFunctionSpec of the DoFn.
- SdkFunctionSpec do_fn = 1;
+ // (Required) The FunctionSpec of the DoFn.
+ FunctionSpec do_fn = 1;
// (Required) Additional pieces of context the DoFn may require that
// are not otherwise represented in the payload.
@@ -439,7 +444,7 @@
message CombiningStateSpec {
string accumulator_coder_id = 1;
- SdkFunctionSpec combine_fn = 2;
+ FunctionSpec combine_fn = 2;
}
message MapStateSpec {
@@ -467,8 +472,8 @@
// The payload for the primitive Read transform.
message ReadPayload {
- // (Required) The SdkFunctionSpec of the source for this Read.
- SdkFunctionSpec source = 1;
+ // (Required) The FunctionSpec of the source for this Read.
+ FunctionSpec source = 1;
// (Required) Whether the source is bounded or unbounded
IsBounded.Enum is_bounded = 2;
@@ -479,15 +484,15 @@
// The payload for the WindowInto transform.
message WindowIntoPayload {
- // (Required) The SdkFunctionSpec of the WindowFn.
- SdkFunctionSpec window_fn = 1;
+ // (Required) The FunctionSpec of the WindowFn.
+ FunctionSpec window_fn = 1;
}
// The payload for the special-but-not-primitive Combine transform.
message CombinePayload {
- // (Required) The SdkFunctionSpec of the CombineFn.
- SdkFunctionSpec combine_fn = 1;
+ // (Required) The FunctionSpec of the CombineFn.
+ FunctionSpec combine_fn = 1;
// (Required) A reference to the Coder to use for accumulators of the CombineFn
string accumulator_coder_id = 2;
@@ -562,11 +567,11 @@
// The payload for the special-but-not-primitive WriteFiles transform.
message WriteFilesPayload {
- // (Required) The SdkFunctionSpec of the FileBasedSink.
- SdkFunctionSpec sink = 1;
+ // (Required) The FunctionSpec of the FileBasedSink.
+ FunctionSpec sink = 1;
// (Required) The format function.
- SdkFunctionSpec format_function = 2;
+ FunctionSpec format_function = 2;
bool windowed_writes = 3;
@@ -588,7 +593,7 @@
// (Optional) If this coder is parametric, such as ListCoder(VarIntCoder),
// this is a list of the components. In order for encodings to be identical,
- // the SdkFunctionSpec and all components must be identical, recursively.
+ // the FunctionSpec and all components must be identical, recursively.
repeated string component_coder_ids = 2;
}
@@ -672,6 +677,16 @@
// Components: The element coder and the window coder, in that order
WINDOWED_VALUE = 8 [(beam_urn) = "beam:coder:windowed_value:v1"];
+ // A windowed value coder with parameterized timestamp, windows and pane info.
+ // Encodes an element with only the value of the windowed value.
+ // Decodes the value and assigns the parameterized timestamp, windows and pane info to the
+ // windowed value.
+ // Components: The element coder and the window coder, in that order
+ // The payload of this coder is an encoded windowed value using the
+ // beam:coder:windowed_value:v1 coder parameterized by a beam:coder:bytes:v1
+ // element coder and the window coder that this param_windowed_value coder uses.
+ PARAM_WINDOWED_VALUE = 14 [(beam_urn) = "beam:coder:param_windowed_value:v1"];
+
// Encodes an iterable of elements, some of which may be stored elsewhere.
//
// The encoding for a state-backed iterable is the same as that for
@@ -741,10 +756,10 @@
// TODO: consider inlining field on PCollection
message WindowingStrategy {
- // (Required) The SdkFunctionSpec of the UDF that assigns windows,
+ // (Required) The FunctionSpec of the UDF that assigns windows,
// merges windows, and shifts timestamps before they are
// combined according to the OutputTime.
- SdkFunctionSpec window_fn = 1;
+ FunctionSpec window_fn = 1;
// (Required) Whether or not the window fn is merging.
//
@@ -787,6 +802,11 @@
//
// This knowledge is required for some optimizations
bool assigns_to_one_window = 10;
+
+ // (Optional) Environment where the current window_fn should be applied in.
+ // Runner that executes the pipeline may choose to override this if needed.
+ // If not specified, environment will be decided by the runner.
+ string environment_id = 11;
}
// Whether or not a PCollection's WindowFn is non-merging, merging, or
@@ -1043,20 +1063,35 @@
// URN)
FunctionSpec access_pattern = 1;
- // (Required) The SdkFunctionSpec of the UDF that adapts a particular
+ // (Required) The FunctionSpec of the UDF that adapts a particular
// access_pattern to a user-facing view type.
//
// For example, View.asSingleton() may include a `view_fn` that adapts a
// specially-designed multimap to a single value per window.
- SdkFunctionSpec view_fn = 2;
+ FunctionSpec view_fn = 2;
- // (Required) The SdkFunctionSpec of the UDF that maps a main input window
+ // (Required) The FunctionSpec of the UDF that maps a main input window
// to a side input window.
//
// For example, when the main input is in fixed windows of one hour, this
// can specify that the side input should be accessed according to the day
// in which that hour falls.
- SdkFunctionSpec window_mapping_fn = 3;
+ FunctionSpec window_mapping_fn = 3;
+}
+
+// Settings that decide the coder type of wire coder.
+message WireCoderSetting {
+ // (Required) The URN of the wire coder.
+ // Note that only windowed value coder or parameterized windowed value coder are supported.
+ string urn = 1;
+
+ // (Optional) The data specifying any parameters to the URN. If
+ // the URN is beam:coder:windowed_value:v1, this may be omitted. If the URN is
+ // beam:coder:param_windowed_value:v1, the payload is an encoded windowed
+ // value using the beam:coder:windowed_value:v1 coder parameterized by
+ // a beam:coder:bytes:v1 element coder and the window coder that this
+ // param_windowed_value coder uses.
+ bytes payload = 2;
}
// An environment for executing UDFs. By default, an SDK container URL, but
@@ -1099,18 +1134,6 @@
map<string, string> params = 2; // Arbitrary extra parameters to pass
}
-// A specification of a user defined function.
-//
-message SdkFunctionSpec {
-
- // (Required) A full specification of this function.
- FunctionSpec spec = 1;
-
- // (Required) Reference to an execution environment capable of
- // invoking this function.
- string environment_id = 2;
-}
-
extend google.protobuf.EnumValueOptions {
// An extension to be used for specifying the standard URN of various
// pipeline entities, e.g. transforms, functions, coders etc.
@@ -1258,7 +1281,7 @@
oneof root {
Coder coder = 2;
CombinePayload combine_payload = 3;
- SdkFunctionSpec sdk_function_spec = 4;
+ FunctionSpec function_spec = 4;
ParDoPayload par_do_payload = 6;
PTransform ptransform = 7;
PCollection pcollection = 8;
@@ -1266,7 +1289,6 @@
SideInput side_input = 11;
WindowIntoPayload window_into_payload = 12;
WindowingStrategy windowing_strategy = 13;
- FunctionSpec function_spec = 14;
}
}
@@ -1280,6 +1302,9 @@
// because ExecutableStages use environments directly. This may change in the future.
Environment environment = 1;
+ // set the wire coder of this executable stage
+ WireCoderSetting wire_coder_setting = 9;
+
// (Required) Input PCollection id. This must be present as a value in the inputs of any
// PTransform the ExecutableStagePayload is the payload of.
string input = 2;
diff --git a/model/pipeline/src/main/proto/schema.proto b/model/pipeline/src/main/proto/schema.proto
index 1e62d17..2cf404e 100644
--- a/model/pipeline/src/main/proto/schema.proto
+++ b/model/pipeline/src/main/proto/schema.proto
@@ -87,4 +87,56 @@
string urn = 1;
bytes payload = 2;
FieldType representation = 3;
+ FieldType argument_type = 4;
+ FieldValue argument = 5;
}
+
+message Row {
+ repeated FieldValue values = 1;
+}
+
+message FieldValue {
+ oneof field_value {
+ AtomicTypeValue atomic_value = 1;
+ ArrayTypeValue array_value = 2;
+ IterableTypeValue iterable_value = 3;
+ MapTypeValue map_value = 4;
+ Row row_value = 5;
+ LogicalTypeValue logical_type_value = 6;
+ }
+}
+
+message AtomicTypeValue {
+ oneof value {
+ int32 byte = 1;
+ int32 int16 = 2;
+ int32 int32 = 3;
+ int64 int64 = 4;
+ float float = 5;
+ double double = 6;
+ string string = 7;
+ bool boolean = 8;
+ bytes bytes = 9;
+ }
+}
+
+message ArrayTypeValue {
+ repeated FieldValue element = 1;
+}
+
+message IterableTypeValue {
+ repeated FieldValue element = 1;
+}
+
+message MapTypeValue {
+ repeated MapTypeEntry entries = 1;
+}
+
+message MapTypeEntry {
+ FieldValue key = 1;
+ FieldValue value = 2;
+}
+
+message LogicalTypeValue {
+ FieldValue value = 1;
+}
\ No newline at end of file
diff --git a/release/src/main/scripts/publish_docker_images.sh b/release/src/main/scripts/publish_docker_images.sh
old mode 100644
new mode 100755
index 44a133e..fb6ba0d
--- a/release/src/main/scripts/publish_docker_images.sh
+++ b/release/src/main/scripts/publish_docker_images.sh
@@ -24,43 +24,78 @@
set -e
-source release/src/main/scripts/build_release_candidate.sh
+PYTHON_VER=("python2.7" "python3.5" "python3.6" "python3.7")
+FLINK_VER=("$(ls -1 runners/flink | awk '/^[0-9]+\.[0-9]+$/{print}')")
echo "Publish SDK docker images to Docker Hub."
+
+echo "================Setting Up Environment Variables==========="
+echo "Which release version are you working on: "
+read RELEASE
+
+echo "================Setting Up RC candidate Variables==========="
+echo "From which RC candidate do you create publish docker image? (ex: rc0, rc1) "
+read RC_VERSION
+
+echo "================Confirmimg Release and RC version==========="
+echo "We are using ${RC_VERSION} to create docker images for ${RELEASE}."
echo "Do you want to proceed? [y|N]"
read confirmation
if [[ $confirmation = "y" ]]; then
- echo "============Publishing SDK docker images on docker hub========="
- cd ~
- if [[ -d ${LOCAL_PYTHON_STAGING_DIR} ]]; then
- rm -rf ${LOCAL_PYTHON_STAGING_DIR}
- fi
- mkdir -p ${LOCAL_PYTHON_STAGING_DIR}
- cd ${LOCAL_PYTHON_STAGING_DIR}
-
- echo '-------------------Cloning Beam Release Branch-----------------'
- git clone ${GIT_REPO_URL}
- cd ${BEAM_ROOT_DIR}
- git checkout ${RELEASE_BRANCH}
echo '-------------------Generating and Pushing Python images-----------------'
- ./gradlew :sdks:python:container:buildAll -Pdocker-tag=${RELEASE}
for ver in "${PYTHON_VER[@]}"; do
+ # Pull varified RC from dockerhub.
+ docker pull apachebeam/${ver}_sdk:${RELEASE}_${RC_VERSION}
+
+ # Tag with ${RELEASE} and push to dockerhub.
+ docker tag apachebeam/${ver}_sdk:${RELEASE}_${RC_VERSION} apachebeam/${ver}_sdk:${RELEASE}
docker push apachebeam/${ver}_sdk:${RELEASE}
- docker tag apachebeam/${ver}_sdk:${RELEASE} apachebeam/${ver}_sdk:latest
+
+ # Tag with latest and push to dockerhub.
+ docker tag apachebeam/${ver}_sdk:${RELEASE}_${RC_VERSION} apachebeam/${ver}_sdk:latest
docker push apachebeam/${ver}_sdk:latest
+
+ # Cleanup images from local
+ docker rmi -f apachebeam/${ver}_sdk:${RELEASE}_${RC_VERSION}
+ docker rmi -f apachebeam/${ver}_sdk:${RELEASE}
+ docker rmi -f apachebeam/${ver}_sdk:latest
done
echo '-------------------Generating and Pushing Java images-----------------'
- ./gradlew :sdks:java:container:dockerPush -Pdocker-tag=${RELEASE}
- docker tag apachebeam/java_sdk:${RELEASE} apachebeam/java_sdk:latest
+ # Pull varified RC from dockerhub.
+ docker pull apachebeam/java_sdk:${RELEASE}_${RC_VERSION}
+
+ # Tag with ${RELEASE} and push to dockerhub.
+ docker tag apachebeam/java_sdk:${RELEASE}_${RC_VERSION} apachebeam/java_sdk:${RELEASE}
+ docker push apachebeam/java_sdk:${RELEASE}
+
+ # Tag with latest and push to dockerhub.
+ docker tag apachebeam/java_sdk:${RELEASE}_${RC_VERSION} apachebeam/java_sdk:latest
docker push apachebeam/java_sdk:latest
+ # Cleanup images from local
+ docker rmi -f apachebeam/java_sdk:${RELEASE}_${RC_VERSION}
+ docker rmi -f apachebeam/java_sdk:${RELEASE}
+ docker rmi -f apachebeam/java_sdk:latest
+
echo '-------------------Generating and Pushing Go images-----------------'
- ./gradlew :sdks:go:container:dockerPush -Pdocker-tag=${RELEASE}
- docker tag apachebeam/go_sdk:${RELEASE} apachebeam/go_sdk:latest
+ # Pull varified RC from dockerhub.
+ docker pull apachebeam/go_sdk:${RELEASE}_${RC_VERSION}
+
+ # Tag with ${RELEASE} and push to dockerhub.
+ docker tag apachebeam/go_sdk:${RELEASE}_${RC_VERSION} apachebeam/go_sdk:${RELEASE}
+ docker push apachebeam/go_sdk:${RELEASE}
+
+ # Tag with latest and push to dockerhub.
+ docker tag apachebeam/go_sdk:${RELEASE}_${RC_VERSION} apachebeam/go_sdk:latest
docker push apachebeam/go_sdk:latest
+ # Cleanup images from local
+ docker rmi -f apachebeam/go_sdk:${RELEASE}_${RC_VERSION}
+ docker rmi -f apachebeam/go_sdk:${RELEASE}
+ docker rmi -f apachebeam/go_sdk:latest
+
echo '-------------Generating and Pushing Flink job server images-------------'
echo "Building containers for the following Flink versions:" "${FLINK_VER[@]}"
for ver in "${FLINK_VER[@]}"; do
@@ -71,20 +106,6 @@
docker push "${FLINK_IMAGE_NAME}:latest"
done
- rm -rf ~/${PYTHON_ARTIFACTS_DIR}
-
- echo "-------------------Clean up SDK docker images at local-------------------"
- for ver in "${PYTHON_VER[@]}"; do
- docker rmi -f apachebeam/${ver}_sdk:${RELEASE}
- docker rmi -f apachebeam/${ver}_sdk:latest
- done
-
- docker rmi -f apachebeam/java_sdk:${RELEASE}
- docker rmi -f apachebeam/java_sdk:latest
-
- docker rmi -f apachebeam/go_sdk:${RELEASE}
- docker rmi -f apachebeam/go_sdk:latest
-
for ver in "${FLINK_VER[@]}"; do
FLINK_IMAGE_NAME=apachebeam/flink${ver}_job_server
docker rmi -f "${FLINK_IMAGE_NAME}:${RELEASE}"
diff --git a/release/src/main/scripts/verify_release_build.sh b/release/src/main/scripts/verify_release_build.sh
index 8442e9f..f9b0480 100755
--- a/release/src/main/scripts/verify_release_build.sh
+++ b/release/src/main/scripts/verify_release_build.sh
@@ -46,7 +46,6 @@
# To run all PostCommit jobs
"Run Go PostCommit"
"Run Java PostCommit"
- "Run Java PostCommit"
"Run Java PortabilityApi PostCommit"
"Run Java Flink PortableValidatesRunner Batch"
"Run Java Flink PortableValidatesRunner Streaming"
@@ -152,7 +151,7 @@
if [[ ! -z `which hub` ]]; then
git checkout -b ${WORKING_BRANCH} origin/${RELEASE_BRANCH} --quiet
- touch empty_file.txt
+ touch empty_file.json
git add .
git commit -m "Add empty file in order to create a test PR" --quiet
git push -f ${GITHUB_USERNAME} --quiet
diff --git a/runners/apex/build.gradle b/runners/apex/build.gradle
index 739fd9d..d204a0b 100644
--- a/runners/apex/build.gradle
+++ b/runners/apex/build.gradle
@@ -42,7 +42,6 @@
compile library.java.apex_common
compile library.java.malhar_library
compile library.java.apex_engine
- compile library.java.commons_lang3
compile library.java.apex_engine
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
// ApexStateInternalsTest extends abstract StateInternalsTest
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
index d1ae4ec..15e3968 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java
@@ -62,11 +62,11 @@
import org.apache.apex.api.Launcher.LauncherException;
import org.apache.apex.api.Launcher.ShutdownMode;
import org.apache.apex.api.YarnAppLauncher;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.SerializationUtils;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Splitter;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets;
import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.SerializationUtils;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TranslationContext.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TranslationContext.java
index 6d1f4b0..4a19f14 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TranslationContext.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TranslationContext.java
@@ -28,6 +28,8 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair;
import org.apache.beam.runners.apex.ApexPipelineOptions;
import org.apache.beam.runners.apex.translation.utils.ApexStateInternals;
import org.apache.beam.runners.apex.translation.utils.ApexStateInternals.ApexStateBackend;
@@ -46,8 +48,6 @@
import org.apache.beam.sdk.values.PValue;
import org.apache.beam.sdk.values.TupleTag;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
/** Maintains context data for {@link TransformTranslator}s. */
@SuppressWarnings({"rawtypes", "unchecked", "TypeParameterUnusedInFormals"})
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
index 9a56496..6f90f58 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
@@ -129,8 +129,8 @@
this.keyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
this.valueCoder = ((KvCoder<K, V>) input.getCoder()).getValueCoder();
this.stateInternalsFactory = stateBackend.newStateInternalsFactory(keyCoder);
- TimerInternals.TimerDataCoder timerCoder =
- TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
+ TimerInternals.TimerDataCoderV2 timerCoder =
+ TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder());
this.timerInternals = new ApexTimerInternals<>(timerCoder);
}
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
index 9d4b110..4841c6a 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
@@ -187,8 +187,8 @@
this.inputCoder = inputCoder;
this.outputCoders = outputCoders;
- TimerInternals.TimerDataCoder timerCoder =
- TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
+ TimerInternals.TimerDataCoderV2 timerCoder =
+ TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder());
this.currentKeyTimerInternals = new ApexTimerInternals<>(timerCoder);
this.doFnSchemaInformation = doFnSchemaInformation;
this.sideInputMapping = sideInputMapping;
@@ -384,7 +384,12 @@
checkArgument(namespace instanceof WindowNamespace);
BoundedWindow window = ((WindowNamespace<?>) namespace).getWindow();
pushbackDoFnRunner.onTimer(
- timerData.getTimerId(), window, timerData.getTimestamp(), timerData.getDomain());
+ timerData.getTimerId(),
+ timerData.getTimerFamilyId(),
+ window,
+ timerData.getTimestamp(),
+ timerData.getOutputTimestamp(),
+ timerData.getDomain());
}
pushbackDoFnRunner.finishBundle();
}
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternals.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternals.java
index b4028e7..682cbed 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternals.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternals.java
@@ -57,7 +57,7 @@
private transient Instant currentOutputWatermark;
private transient Coder<K> keyCoder;
- public ApexTimerInternals(TimerDataCoder timerDataCoder) {
+ public ApexTimerInternals(TimerDataCoderV2 timerDataCoder) {
this.eventTimeTimeTimers = new TimerSet(timerDataCoder);
this.processingTimeTimers = new TimerSet(timerDataCoder);
}
@@ -77,8 +77,14 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
- TimerData timerData = TimerData.of(timerId, namespace, target, timeDomain);
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ TimerData timerData =
+ TimerData.of(timerId, timerFamilyId, namespace, target, outputTimestamp, timeDomain);
setTimer(timerData);
}
@@ -196,10 +202,10 @@
protected static class TimerSet implements Serializable {
private final Map<Slice, Set<Slice>> activeTimers = new HashMap<>();
- private final TimerDataCoder timerDataCoder;
+ private final TimerDataCoderV2 timerDataCoder;
private long minTimestamp = Long.MAX_VALUE;
- protected TimerSet(TimerDataCoder timerDataCoder) {
+ protected TimerSet(TimerDataCoderV2 timerDataCoder) {
this.timerDataCoder = timerDataCoder;
}
diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternalsTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternalsTest.java
index 0bd890a..df3f600 100644
--- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternalsTest.java
+++ b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternalsTest.java
@@ -29,7 +29,7 @@
import org.apache.beam.runners.apex.translation.operators.ApexTimerInternals.TimerProcessor;
import org.apache.beam.runners.core.StateNamespaces;
import org.apache.beam.runners.core.TimerInternals.TimerData;
-import org.apache.beam.runners.core.TimerInternals.TimerDataCoder;
+import org.apache.beam.runners.core.TimerInternals.TimerDataCoderV2;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.state.TimeDomain;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
@@ -46,7 +46,7 @@
final Map<String, Collection<TimerData>> firedTimers = new HashMap<>();
TimerProcessor<String> timerProcessor = firedTimers::put;
- TimerDataCoder timerDataCoder = TimerDataCoder.of(GlobalWindow.Coder.INSTANCE);
+ TimerDataCoderV2 timerDataCoder = TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE);
String key1 = "key1";
Instant instant0 = new Instant(0);
Instant instant1 = new Instant(1);
@@ -56,11 +56,13 @@
timerInternals.setContext(key1, StringUtf8Coder.of(), Instant.now(), null);
TimerData timerData0 =
- TimerData.of("timerData0", StateNamespaces.global(), instant0, TimeDomain.EVENT_TIME);
+ TimerData.of(
+ "timerData0", StateNamespaces.global(), instant0, instant0, TimeDomain.EVENT_TIME);
timerInternals.setTimer(timerData0);
TimerData timerData1 =
- TimerData.of("timerData1", StateNamespaces.global(), instant1, TimeDomain.EVENT_TIME);
+ TimerData.of(
+ "timerData1", StateNamespaces.global(), instant1, instant1, TimeDomain.EVENT_TIME);
timerInternals.setTimer(timerData1);
timerInternals.fireReadyTimers(instant0.getMillis(), timerProcessor, TimeDomain.EVENT_TIME);
@@ -85,7 +87,7 @@
@Test
public void testDeleteTimer() {
- TimerDataCoder timerDataCoder = TimerDataCoder.of(GlobalWindow.Coder.INSTANCE);
+ TimerDataCoderV2 timerDataCoder = TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE);
String key1 = "key1";
Instant instant0 = new Instant(0);
Instant instant1 = new Instant(1);
@@ -94,11 +96,13 @@
timerInternals.setContext(key1, StringUtf8Coder.of(), Instant.now(), null);
TimerData timerData0 =
- TimerData.of("timerData0", StateNamespaces.global(), instant0, TimeDomain.EVENT_TIME);
+ TimerData.of(
+ "timerData0", StateNamespaces.global(), instant0, instant0, TimeDomain.EVENT_TIME);
timerInternals.setTimer(timerData0);
TimerData timerData1 =
- TimerData.of("timerData1", StateNamespaces.global(), instant1, TimeDomain.EVENT_TIME);
+ TimerData.of(
+ "timerData1", StateNamespaces.global(), instant1, instant1, TimeDomain.EVENT_TIME);
timerInternals.setTimer(timerData1);
Map<?, Set<Slice>> timerMap = timerInternals.getTimerSet(TimeDomain.EVENT_TIME).getMap();
@@ -121,7 +125,7 @@
@Test
public void testSerialization() {
- TimerDataCoder timerDataCoder = TimerDataCoder.of(GlobalWindow.Coder.INSTANCE);
+ TimerDataCoderV2 timerDataCoder = TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE);
TimerData timerData =
TimerData.of(
"arbitrary-id", StateNamespaces.global(), new Instant(0), TimeDomain.EVENT_TIME);
diff --git a/runners/core-construction-java/build.gradle b/runners/core-construction-java/build.gradle
index e7f4899..b9c842f 100644
--- a/runners/core-construction-java/build.gradle
+++ b/runners/core-construction-java/build.gradle
@@ -38,6 +38,7 @@
compile project(path: ":sdks:java:core", configuration: "shadow")
compile library.java.vendored_grpc_1_21_0
compile library.java.vendored_guava_26_0_jre
+ compile library.java.classgraph
compile library.java.jackson_core
compile library.java.jackson_databind
compile library.java.joda_time
@@ -49,6 +50,7 @@
testCompile library.java.jackson_annotations
testCompile library.java.jackson_dataformat_yaml
testCompile project(path: ":model:fn-execution", configuration: "shadow")
+ testCompile project(path: ":sdks:java:core", configuration: "testRuntime")
testRuntimeOnly library.java.slf4j_jdk14
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CoderTranslators.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CoderTranslators.java
index 2bf5264..6f156dd 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CoderTranslators.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CoderTranslators.java
@@ -125,6 +125,26 @@
};
}
+ static CoderTranslator<WindowedValue.ParamWindowedValueCoder<?>> paramWindowedValue() {
+ return new CoderTranslator<WindowedValue.ParamWindowedValueCoder<?>>() {
+ @Override
+ public List<? extends Coder<?>> getComponents(WindowedValue.ParamWindowedValueCoder<?> from) {
+ return ImmutableList.of(from.getValueCoder(), from.getWindowCoder());
+ }
+
+ @Override
+ public byte[] getPayload(WindowedValue.ParamWindowedValueCoder<?> from) {
+ return WindowedValue.ParamWindowedValueCoder.getPayload(from);
+ }
+
+ @Override
+ public WindowedValue.ParamWindowedValueCoder<?> fromComponents(
+ List<Coder<?>> components, byte[] payload) {
+ return WindowedValue.ParamWindowedValueCoder.fromComponents(components, payload);
+ }
+ };
+ }
+
static CoderTranslator<RowCoder> row() {
return new CoderTranslator<RowCoder>() {
@Override
@@ -134,7 +154,7 @@
@Override
public byte[] getPayload(RowCoder from) {
- return SchemaTranslation.schemaToProto(from.getSchema()).toByteArray();
+ return SchemaTranslation.schemaToProto(from.getSchema(), true).toByteArray();
}
@Override
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CombineTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CombineTranslation.java
index 76881e2..e5edc35 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CombineTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CombineTranslation.java
@@ -27,7 +27,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
@@ -234,15 +233,10 @@
.build();
}
- public static SdkFunctionSpec toProto(
- GlobalCombineFn<?, ?, ?> combineFn, SdkComponents components) {
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(JAVA_SERIALIZED_COMBINE_FN_URN)
- .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(combineFn)))
- .build())
+ public static FunctionSpec toProto(GlobalCombineFn<?, ?, ?> combineFn, SdkComponents components) {
+ return FunctionSpec.newBuilder()
+ .setUrn(JAVA_SERIALIZED_COMBINE_FN_URN)
+ .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(combineFn)))
.build();
}
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java
index 79b0111..ed94642 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java
@@ -23,56 +23,21 @@
import java.util.Optional;
import javax.annotation.Nullable;
import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
-import org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.Components;
import org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.Environment;
import org.apache.beam.model.pipeline.v1.RunnerApi.ExternalPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
-import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.ProcessPayload;
-import org.apache.beam.model.pipeline.v1.RunnerApi.ReadPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.StandardEnvironments;
-import org.apache.beam.model.pipeline.v1.RunnerApi.WindowIntoPayload;
import org.apache.beam.sdk.util.ReleaseInfo;
import org.apache.beam.sdk.util.common.ReflectHelpers;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString;
-import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.InvalidProtocolBufferException;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
/** Utilities for interacting with portability {@link Environment environments}. */
public class Environments {
- private static final ImmutableMap<String, EnvironmentIdExtractor> KNOWN_URN_SPEC_EXTRACTORS =
- ImmutableMap.<String, EnvironmentIdExtractor>builder()
- .put(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN, Environments::combineExtractor)
- .put(
- PTransformTranslation.COMBINE_PER_KEY_PRECOMBINE_TRANSFORM_URN,
- Environments::combineExtractor)
- .put(
- PTransformTranslation.COMBINE_PER_KEY_MERGE_ACCUMULATORS_TRANSFORM_URN,
- Environments::combineExtractor)
- .put(
- PTransformTranslation.COMBINE_PER_KEY_EXTRACT_OUTPUTS_TRANSFORM_URN,
- Environments::combineExtractor)
- .put(PTransformTranslation.PAR_DO_TRANSFORM_URN, Environments::parDoExtractor)
- .put(PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN, Environments::parDoExtractor)
- .put(
- PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN,
- Environments::parDoExtractor)
- .put(
- PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN,
- Environments::parDoExtractor)
- .put(
- PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN,
- Environments::parDoExtractor)
- .put(PTransformTranslation.READ_TRANSFORM_URN, Environments::readExtractor)
- .put(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN, Environments::windowExtractor)
- .build();
-
- private static final EnvironmentIdExtractor DEFAULT_SPEC_EXTRACTOR = transform -> null;
-
private static final ObjectMapper MAPPER =
new ObjectMapper()
.registerModules(ObjectMapper.findModules(ReflectHelpers.findClassLoader()));
@@ -181,73 +146,31 @@
}
public static Optional<Environment> getEnvironment(String ptransformId, Components components) {
- try {
- PTransform ptransform = components.getTransformsOrThrow(ptransformId);
- String envId =
- KNOWN_URN_SPEC_EXTRACTORS
- .getOrDefault(ptransform.getSpec().getUrn(), DEFAULT_SPEC_EXTRACTOR)
- .getEnvironmentId(ptransform);
- if (Strings.isNullOrEmpty(envId)) {
- // Some PTransform payloads may have an unspecified (empty) Environment ID, for example a
- // WindowIntoPayload with a known WindowFn. Others will never have an Environment ID, such
- // as a GroupByKeyPayload, and the Default extractor returns null in this case.
- return Optional.empty();
- } else {
- return Optional.of(components.getEnvironmentsOrThrow(envId));
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
+ PTransform ptransform = components.getTransformsOrThrow(ptransformId);
+ String envId = ptransform.getEnvironmentId();
+ if (Strings.isNullOrEmpty(envId)) {
+ // Some PTransform payloads may have an unspecified (empty) Environment ID, for example a
+ // WindowIntoPayload with a known WindowFn. Others will never have an Environment ID, such
+ // as a GroupByKeyPayload, and we return null in this case.
+ return Optional.empty();
+ } else {
+ return Optional.of(components.getEnvironmentsOrThrow(envId));
}
}
public static Optional<Environment> getEnvironment(
PTransform ptransform, RehydratedComponents components) {
- try {
- String envId =
- KNOWN_URN_SPEC_EXTRACTORS
- .getOrDefault(ptransform.getSpec().getUrn(), DEFAULT_SPEC_EXTRACTOR)
- .getEnvironmentId(ptransform);
- if (!Strings.isNullOrEmpty(envId)) {
- // Some PTransform payloads may have an empty (default) Environment ID, for example a
- // WindowIntoPayload with a known WindowFn. Others will never have an Environment ID, such
- // as a GroupByKeyPayload, and the Default extractor returns null in this case.
- return Optional.of(components.getEnvironment(envId));
- } else {
- return Optional.empty();
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
+ String envId = ptransform.getEnvironmentId();
+ if (Strings.isNullOrEmpty(envId)) {
+ return Optional.empty();
+ } else {
+ // Some PTransform payloads may have an empty (default) Environment ID, for example a
+ // WindowIntoPayload with a known WindowFn. Others will never have an Environment ID, such
+ // as a GroupByKeyPayload, and we return null in this case.
+ return Optional.of(components.getEnvironment(envId));
}
}
- private interface EnvironmentIdExtractor {
- @Nullable
- String getEnvironmentId(PTransform transform) throws IOException;
- }
-
- private static String parDoExtractor(PTransform pTransform)
- throws InvalidProtocolBufferException {
- return ParDoPayload.parseFrom(pTransform.getSpec().getPayload()).getDoFn().getEnvironmentId();
- }
-
- private static String combineExtractor(PTransform pTransform)
- throws InvalidProtocolBufferException {
- return CombinePayload.parseFrom(pTransform.getSpec().getPayload())
- .getCombineFn()
- .getEnvironmentId();
- }
-
- private static String readExtractor(PTransform transform) throws InvalidProtocolBufferException {
- return ReadPayload.parseFrom(transform.getSpec().getPayload()).getSource().getEnvironmentId();
- }
-
- private static String windowExtractor(PTransform transform)
- throws InvalidProtocolBufferException {
- return WindowIntoPayload.parseFrom(transform.getSpec().getPayload())
- .getWindowFn()
- .getEnvironmentId();
- }
-
private static class ProcessPayloadReferenceJSON {
@Nullable private String os;
@Nullable private String arch;
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/External.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/External.java
index 1a842b6..d58346b 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/External.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/External.java
@@ -66,16 +66,17 @@
return namespaceCounter.getAndIncrement();
}
- public static <OutputT> SingleOutputExpandableTransform<OutputT> of(
- String urn, byte[] payload, String endpoint) {
+ public static <InputT extends PInput, OutputT>
+ SingleOutputExpandableTransform<InputT, OutputT> of(
+ String urn, byte[] payload, String endpoint) {
Endpoints.ApiServiceDescriptor apiDesc =
Endpoints.ApiServiceDescriptor.newBuilder().setUrl(endpoint).build();
return new SingleOutputExpandableTransform<>(urn, payload, apiDesc, getFreshNamespaceIndex());
}
/** Expandable transform for output type of PCollection. */
- public static class SingleOutputExpandableTransform<OutputT>
- extends ExpandableTransform<PCollection<OutputT>> {
+ public static class SingleOutputExpandableTransform<InputT extends PInput, OutputT>
+ extends ExpandableTransform<InputT, PCollection<OutputT>> {
SingleOutputExpandableTransform(
String urn,
byte[] payload,
@@ -90,14 +91,20 @@
return Iterables.getOnlyElement(output.values());
}
- public MultiOutputExpandableTransform withMultiOutputs() {
- return new MultiOutputExpandableTransform(
+ public MultiOutputExpandableTransform<InputT> withMultiOutputs() {
+ return new MultiOutputExpandableTransform<>(
+ getUrn(), getPayload(), getEndpoint(), getNamespaceIndex());
+ }
+
+ public <T> SingleOutputExpandableTransform<InputT, T> withOutputType() {
+ return new SingleOutputExpandableTransform<>(
getUrn(), getPayload(), getEndpoint(), getNamespaceIndex());
}
}
/** Expandable transform for output type of PCollectionTuple. */
- public static class MultiOutputExpandableTransform extends ExpandableTransform<PCollectionTuple> {
+ public static class MultiOutputExpandableTransform<InputT extends PInput>
+ extends ExpandableTransform<InputT, PCollectionTuple> {
MultiOutputExpandableTransform(
String urn,
byte[] payload,
@@ -119,8 +126,8 @@
}
/** Base Expandable Transform which calls ExpansionService to expand itself. */
- public abstract static class ExpandableTransform<OutputT extends POutput>
- extends PTransform<PInput, OutputT> {
+ public abstract static class ExpandableTransform<InputT extends PInput, OutputT extends POutput>
+ extends PTransform<InputT, OutputT> {
private final String urn;
private final byte[] payload;
private final Endpoints.ApiServiceDescriptor endpoint;
@@ -142,7 +149,7 @@
}
@Override
- public OutputT expand(PInput input) {
+ public OutputT expand(InputT input) {
Pipeline p = input.getPipeline();
SdkComponents components = SdkComponents.create(p.getOptions());
RunnerApi.PTransform.Builder ptransformBuilder =
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ExternalTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ExternalTranslation.java
index 937e982..39be910 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ExternalTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ExternalTranslation.java
@@ -37,7 +37,7 @@
/** Translator for ExpandableTransform. */
public static class ExternalTranslator
- implements PTransformTranslation.TransformTranslator<External.ExpandableTransform<?>> {
+ implements PTransformTranslation.TransformTranslator<External.ExpandableTransform<?, ?>> {
public static PTransformTranslation.TransformTranslator create() {
return new ExternalTranslator();
}
@@ -125,6 +125,7 @@
transformBuilder
.setUniqueName(proto.getUniqueName())
.setSpec(proto.getSpec())
+ .setEnvironmentId(proto.getEnvironmentId())
.addAllSubtransforms(proto.getSubtransformsList());
for (Map.Entry<String, String> inputEntry : proto.getInputsMap().entrySet()) {
transformBuilder.putInputs(
@@ -144,6 +145,7 @@
.setUniqueName(expandedTransform.getUniqueName())
.setSpec(expandedTransform.getSpec())
.addAllSubtransforms(expandedTransform.getSubtransformsList())
+ .setEnvironmentId(expandedTransform.getEnvironmentId())
.putAllInputs(expandedTransform.getInputsMap());
for (Map.Entry<String, String> outputEntry : expandedTransform.getOutputsMap().entrySet()) {
rootTransformBuilder.putOutputs(
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoderRegistrar.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoderRegistrar.java
index 854f523..8ff9063 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoderRegistrar.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoderRegistrar.java
@@ -34,6 +34,7 @@
import org.apache.beam.sdk.coders.VarLongCoder;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder;
+import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.BiMap;
@@ -60,6 +61,9 @@
.put(LengthPrefixCoder.class, ModelCoders.LENGTH_PREFIX_CODER_URN)
.put(GlobalWindow.Coder.class, ModelCoders.GLOBAL_WINDOW_CODER_URN)
.put(FullWindowedValueCoder.class, ModelCoders.WINDOWED_VALUE_CODER_URN)
+ .put(
+ WindowedValue.ParamWindowedValueCoder.class,
+ ModelCoders.PARAM_WINDOWED_VALUE_CODER_URN)
.put(DoubleCoder.class, ModelCoders.DOUBLE_CODER_URN)
.put(RowCoder.class, ModelCoders.ROW_CODER_URN)
.build();
@@ -80,6 +84,7 @@
.put(Timer.Coder.class, CoderTranslators.timer())
.put(LengthPrefixCoder.class, CoderTranslators.lengthPrefix())
.put(FullWindowedValueCoder.class, CoderTranslators.fullWindowedValue())
+ .put(WindowedValue.ParamWindowedValueCoder.class, CoderTranslators.paramWindowedValue())
.put(DoubleCoder.class, CoderTranslators.atomic(DoubleCoder.class))
.put(RowCoder.class, CoderTranslators.row())
.build();
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoders.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoders.java
index 486e39c..3d6d4dd 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoders.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ModelCoders.java
@@ -25,6 +25,7 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.Coder;
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.StandardCoders;
+import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
/** Utilities and constants ot interact with coders that are part of the Beam Model. */
@@ -53,6 +54,8 @@
getUrn(StandardCoders.Enum.INTERVAL_WINDOW);
public static final String WINDOWED_VALUE_CODER_URN = getUrn(StandardCoders.Enum.WINDOWED_VALUE);
+ public static final String PARAM_WINDOWED_VALUE_CODER_URN =
+ getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE);
public static final String ROW_CODER_URN = getUrn(StandardCoders.Enum.ROW);
@@ -70,7 +73,8 @@
INTERVAL_WINDOW_CODER_URN,
WINDOWED_VALUE_CODER_URN,
DOUBLE_CODER_URN,
- ROW_CODER_URN);
+ ROW_CODER_URN,
+ PARAM_WINDOWED_VALUE_CODER_URN);
public static Set<String> urns() {
return MODEL_CODER_URNS;
@@ -90,6 +94,18 @@
.build();
}
+ public static Coder paramWindowedValueCoder(
+ String elementCoderId, String windowCoderId, byte[] payload) {
+ return Coder.newBuilder()
+ .setSpec(
+ FunctionSpec.newBuilder()
+ .setUrn(PARAM_WINDOWED_VALUE_CODER_URN)
+ .setPayload(ByteString.copyFrom(payload)))
+ .addComponentCoderIds(elementCoderId)
+ .addComponentCoderIds(windowCoderId)
+ .build();
+ }
+
/** Components of a Windowed Value {@link Coder} with names. */
@AutoValue
public abstract static class WindowedValueCoderComponents {
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionViewTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionViewTranslation.java
index d9ad758..d9ddb93 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionViewTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionViewTranslation.java
@@ -72,12 +72,12 @@
}
/**
- * Converts a {@link org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec} into a {@link
+ * Converts a {@link org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec} into a {@link
* ViewFn} using the URN.
*/
- public static ViewFn<?, ?> viewFnFromProto(RunnerApi.SdkFunctionSpec viewFn)
+ public static ViewFn<?, ?> viewFnFromProto(RunnerApi.FunctionSpec viewFn)
throws InvalidProtocolBufferException {
- RunnerApi.FunctionSpec spec = viewFn.getSpec();
+ RunnerApi.FunctionSpec spec = viewFn;
checkArgument(
spec.getUrn().equals(ParDoTranslation.CUSTOM_JAVA_VIEW_FN_URN),
"Can't deserialize unknown %s type %s",
@@ -89,12 +89,12 @@
}
/**
- * Converts a {@link org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec} into a {@link
+ * Converts a {@link org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec} into a {@link
* WindowMappingFn} using the URN.
*/
- public static WindowMappingFn<?> windowMappingFnFromProto(
- RunnerApi.SdkFunctionSpec windowMappingFn) throws InvalidProtocolBufferException {
- RunnerApi.FunctionSpec spec = windowMappingFn.getSpec();
+ public static WindowMappingFn<?> windowMappingFnFromProto(RunnerApi.FunctionSpec windowMappingFn)
+ throws InvalidProtocolBufferException {
+ RunnerApi.FunctionSpec spec = windowMappingFn;
checkArgument(
spec.getUrn().equals(ParDoTranslation.CUSTOM_JAVA_WINDOW_MAPPING_FN_URN),
"Can't deserialize unknown %s type %s",
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
index 8c73964..95f50ea 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
@@ -19,6 +19,7 @@
import static org.apache.beam.runners.core.construction.BeamUrns.getUrn;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
import java.io.IOException;
import java.util.Collection;
@@ -38,6 +39,7 @@
import org.apache.beam.runners.core.construction.ExternalTranslation.ExternalTranslator;
import org.apache.beam.runners.core.construction.ParDoTranslation.ParDoTranslator;
import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.Read;
import org.apache.beam.sdk.runners.AppliedPTransform;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.display.DisplayData;
@@ -48,6 +50,7 @@
import org.apache.beam.sdk.values.PValue;
import org.apache.beam.sdk.values.TupleTag;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSortedSet;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
@@ -58,68 +61,131 @@
* buffers}.
*/
public class PTransformTranslation {
+ // We specifically copy the values here so that they can be used in switch case statements
+ // and we validate that the value matches the actual URN in the static block below.
- public static final String PAR_DO_TRANSFORM_URN = getUrn(StandardPTransforms.Primitives.PAR_DO);
- public static final String FLATTEN_TRANSFORM_URN = getUrn(StandardPTransforms.Primitives.FLATTEN);
- public static final String GROUP_BY_KEY_TRANSFORM_URN =
- getUrn(StandardPTransforms.Primitives.GROUP_BY_KEY);
- public static final String IMPULSE_TRANSFORM_URN = getUrn(StandardPTransforms.Primitives.IMPULSE);
- public static final String ASSIGN_WINDOWS_TRANSFORM_URN =
- getUrn(StandardPTransforms.Primitives.ASSIGN_WINDOWS);
- public static final String TEST_STREAM_TRANSFORM_URN =
- getUrn(StandardPTransforms.Primitives.TEST_STREAM);
- public static final String MAP_WINDOWS_TRANSFORM_URN =
- getUrn(StandardPTransforms.Primitives.MAP_WINDOWS);
+ // Primitives
+ public static final String PAR_DO_TRANSFORM_URN = "beam:transform:pardo:v1";
+ public static final String FLATTEN_TRANSFORM_URN = "beam:transform:flatten:v1";
+ public static final String GROUP_BY_KEY_TRANSFORM_URN = "beam:transform:group_by_key:v1";
+ public static final String IMPULSE_TRANSFORM_URN = "beam:transform:impulse:v1";
+ public static final String ASSIGN_WINDOWS_TRANSFORM_URN = "beam:transform:window_into:v1";
+ public static final String TEST_STREAM_TRANSFORM_URN = "beam:transform:teststream:v1";
+ public static final String MAP_WINDOWS_TRANSFORM_URN = "beam:transform:map_windows:v1";
+ // DeprecatedPrimitives
/**
* @deprecated SDKs should move away from creating `Read` transforms and migrate to using Impulse
* + SplittableDoFns.
*/
- @Deprecated
- public static final String READ_TRANSFORM_URN =
- getUrn(StandardPTransforms.DeprecatedPrimitives.READ);
+ @Deprecated public static final String READ_TRANSFORM_URN = "beam:transform:read:v1";
+
/**
* @deprecated runners should move away from translating `CreatePCollectionView` and treat this as
* part of the translation for a `ParDo` side input.
*/
@Deprecated
- public static final String CREATE_VIEW_TRANSFORM_URN =
- getUrn(StandardPTransforms.DeprecatedPrimitives.CREATE_VIEW);
+ public static final String CREATE_VIEW_TRANSFORM_URN = "beam:transform:create_view:v1";
- public static final String COMBINE_PER_KEY_TRANSFORM_URN =
- getUrn(StandardPTransforms.Composites.COMBINE_PER_KEY);
- public static final String COMBINE_GLOBALLY_TRANSFORM_URN =
- getUrn(StandardPTransforms.Composites.COMBINE_GLOBALLY);
- public static final String COMBINE_GROUPED_VALUES_TRANSFORM_URN =
- getUrn(CombineComponents.COMBINE_GROUPED_VALUES);
+ // Composites
+ public static final String COMBINE_PER_KEY_TRANSFORM_URN = "beam:transform:combine_per_key:v1";
+ public static final String COMBINE_GLOBALLY_TRANSFORM_URN = "beam:transform:combine_globally:v1";
+ public static final String RESHUFFLE_URN = "beam:transform:reshuffle:v1";
+ public static final String WRITE_FILES_TRANSFORM_URN = "beam:transform:write_files:v1";
+
+ // CombineComponents
public static final String COMBINE_PER_KEY_PRECOMBINE_TRANSFORM_URN =
- getUrn(CombineComponents.COMBINE_PER_KEY_PRECOMBINE);
+ "beam:transform:combine_per_key_precombine:v1";
public static final String COMBINE_PER_KEY_MERGE_ACCUMULATORS_TRANSFORM_URN =
- getUrn(CombineComponents.COMBINE_PER_KEY_MERGE_ACCUMULATORS);
+ "beam:transform:combine_per_key_merge_accumulators:v1";
public static final String COMBINE_PER_KEY_EXTRACT_OUTPUTS_TRANSFORM_URN =
- getUrn(CombineComponents.COMBINE_PER_KEY_EXTRACT_OUTPUTS);
- public static final String RESHUFFLE_URN = getUrn(StandardPTransforms.Composites.RESHUFFLE);
- public static final String WRITE_FILES_TRANSFORM_URN =
- getUrn(StandardPTransforms.Composites.WRITE_FILES);
+ "beam:transform:combine_per_key_extract_outputs:v1";
+ public static final String COMBINE_GROUPED_VALUES_TRANSFORM_URN =
+ "beam:transform:combine_grouped_values:v1";
// SplittableParDoComponents
public static final String SPLITTABLE_PAIR_WITH_RESTRICTION_URN =
- getUrn(SplittableParDoComponents.PAIR_WITH_RESTRICTION);
+ "beam:transform:sdf_pair_with_restriction:v1";
public static final String SPLITTABLE_SPLIT_RESTRICTION_URN =
- getUrn(SplittableParDoComponents.SPLIT_RESTRICTION);
+ "beam:transform:sdf_split_restriction:v1";
+ /**
+ * @deprecated runners should move away from using `SplittableProcessKeyedElements` and prefer to
+ * internalize any necessary SplittableDoFn expansion.
+ */
+ @Deprecated
public static final String SPLITTABLE_PROCESS_KEYED_URN =
- getUrn(SplittableParDoComponents.PROCESS_KEYED_ELEMENTS);
- public static final String SPLITTABLE_PROCESS_ELEMENTS_URN =
- getUrn(SplittableParDoComponents.PROCESS_ELEMENTS);
- public static final String SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN =
- getUrn(SplittableParDoComponents.SPLIT_AND_SIZE_RESTRICTIONS);
- public static final String SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN =
- getUrn(SplittableParDoComponents.PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS);
+ "beam:transform:sdf_process_keyed_elements:v1";
- public static final String ITERABLE_SIDE_INPUT =
- getUrn(RunnerApi.StandardSideInputTypes.Enum.ITERABLE);
- public static final String MULTIMAP_SIDE_INPUT =
- getUrn(RunnerApi.StandardSideInputTypes.Enum.MULTIMAP);
+ public static final String SPLITTABLE_PROCESS_ELEMENTS_URN =
+ "beam:transform:sdf_process_elements:v1";
+ public static final String SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN =
+ "beam:transform:sdf_split_and_size_restrictions:v1";
+ public static final String SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN =
+ "beam:transform:sdf_process_sized_element_and_restrictions:v1";
+
+ static {
+ checkState(PAR_DO_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Primitives.PAR_DO)));
+ checkState(FLATTEN_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Primitives.FLATTEN)));
+ checkState(
+ GROUP_BY_KEY_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Primitives.GROUP_BY_KEY)));
+ checkState(IMPULSE_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Primitives.IMPULSE)));
+ checkState(
+ ASSIGN_WINDOWS_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Primitives.ASSIGN_WINDOWS)));
+ checkState(
+ TEST_STREAM_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Primitives.TEST_STREAM)));
+ checkState(
+ MAP_WINDOWS_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Primitives.MAP_WINDOWS)));
+
+ // DeprecatedPrimitives
+ checkState(READ_TRANSFORM_URN.equals(getUrn(StandardPTransforms.DeprecatedPrimitives.READ)));
+ checkState(
+ CREATE_VIEW_TRANSFORM_URN.equals(
+ getUrn(StandardPTransforms.DeprecatedPrimitives.CREATE_VIEW)));
+
+ // Composites
+ checkState(
+ COMBINE_PER_KEY_TRANSFORM_URN.equals(
+ getUrn(StandardPTransforms.Composites.COMBINE_PER_KEY)));
+ checkState(
+ COMBINE_GLOBALLY_TRANSFORM_URN.equals(
+ getUrn(StandardPTransforms.Composites.COMBINE_GLOBALLY)));
+ checkState(RESHUFFLE_URN.equals(getUrn(StandardPTransforms.Composites.RESHUFFLE)));
+ checkState(
+ WRITE_FILES_TRANSFORM_URN.equals(getUrn(StandardPTransforms.Composites.WRITE_FILES)));
+
+ // CombineComponents
+ checkState(
+ COMBINE_PER_KEY_PRECOMBINE_TRANSFORM_URN.equals(
+ getUrn(CombineComponents.COMBINE_PER_KEY_PRECOMBINE)));
+ checkState(
+ COMBINE_PER_KEY_MERGE_ACCUMULATORS_TRANSFORM_URN.equals(
+ getUrn(CombineComponents.COMBINE_PER_KEY_MERGE_ACCUMULATORS)));
+ checkState(
+ COMBINE_PER_KEY_EXTRACT_OUTPUTS_TRANSFORM_URN.equals(
+ getUrn(CombineComponents.COMBINE_PER_KEY_EXTRACT_OUTPUTS)));
+ checkState(
+ COMBINE_GROUPED_VALUES_TRANSFORM_URN.equals(
+ getUrn(CombineComponents.COMBINE_GROUPED_VALUES)));
+
+ // SplittableParDoComponents
+ checkState(
+ SPLITTABLE_PAIR_WITH_RESTRICTION_URN.equals(
+ getUrn(SplittableParDoComponents.PAIR_WITH_RESTRICTION)));
+ checkState(
+ SPLITTABLE_SPLIT_RESTRICTION_URN.equals(
+ getUrn(SplittableParDoComponents.SPLIT_RESTRICTION)));
+ checkState(
+ SPLITTABLE_PROCESS_KEYED_URN.equals(
+ getUrn(SplittableParDoComponents.PROCESS_KEYED_ELEMENTS)));
+ checkState(
+ SPLITTABLE_PROCESS_ELEMENTS_URN.equals(getUrn(SplittableParDoComponents.PROCESS_ELEMENTS)));
+ checkState(
+ SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN.equals(
+ getUrn(SplittableParDoComponents.SPLIT_AND_SIZE_RESTRICTIONS)));
+ checkState(
+ SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(
+ getUrn(SplittableParDoComponents.PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS)));
+ }
private static final Collection<TransformTranslator<?>> KNOWN_TRANSLATORS =
loadKnownTranslators();
@@ -278,7 +344,7 @@
if (spec != null) {
transformBuilder.setSpec(spec);
}
-
+ transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
return transformBuilder.build();
}
}
@@ -292,6 +358,11 @@
private static final Map<Class<? extends PTransform>, TransformPayloadTranslator>
KNOWN_PAYLOAD_TRANSLATORS = loadTransformPayloadTranslators();
+ // TODO: BEAM-9001 - set environment ID in all transforms and allow runners to override.
+ private static List<String> sdkTransformsWithEnvironment =
+ ImmutableList.of(
+ PAR_DO_TRANSFORM_URN, COMBINE_PER_KEY_TRANSFORM_URN, ASSIGN_WINDOWS_TRANSFORM_URN);
+
private static Map<Class<? extends PTransform>, TransformPayloadTranslator>
loadTransformPayloadTranslators() {
HashMap<Class<? extends PTransform>, TransformPayloadTranslator> translators =
@@ -342,6 +413,16 @@
.translate(appliedPTransform, components);
if (spec != null) {
transformBuilder.setSpec(spec);
+
+ if (sdkTransformsWithEnvironment.contains(spec.getUrn())) {
+ transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
+ } else if (spec.getUrn().equals(READ_TRANSFORM_URN)
+ && (appliedPTransform.getTransform().getClass() == Read.Bounded.class)) {
+ // Only assigning environment to Bounded reads. Not assigning an environment to Unbounded
+ // reads since they are a Runner translated transform, unless, in the future, we have an
+ // adapter available for splittable DoFn.
+ transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
+ }
}
return transformBuilder.build();
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
index 280e2f3..8a191e5 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
@@ -18,6 +18,11 @@
package org.apache.beam.runners.core.construction;
import static org.apache.beam.runners.core.construction.PTransformTranslation.PAR_DO_TRANSFORM_URN;
+import static org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN;
+import static org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN;
+import static org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN;
+import static org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN;
+import static org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_SPLIT_RESTRICTION_URN;
import static org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getStateSpecOrThrow;
import static org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getTimerSpecOrThrow;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
@@ -37,7 +42,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.Parameter.Type;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.SideInput;
import org.apache.beam.model.pipeline.v1.RunnerApi.SideInput.Builder;
import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
@@ -127,6 +131,7 @@
.setUrn(PAR_DO_TRANSFORM_URN)
.setPayload(payload.toByteString())
.build());
+ builder.setEnvironmentId(components.getOnlyEnvironmentId());
String mainInputName = getMainInputName(builder, payload);
PCollection<KV<?, ?>> mainInput =
@@ -207,7 +212,7 @@
return payloadForParDoLike(
new ParDoLike() {
@Override
- public SdkFunctionSpec translateDoFn(SdkComponents newComponents) {
+ public FunctionSpec translateDoFn(SdkComponents newComponents) {
return ParDoTranslation.translateDoFn(
parDo.getFn(),
parDo.getMainOutputTag(),
@@ -414,7 +419,13 @@
public static RunnerApi.PCollection getMainInput(
RunnerApi.PTransform ptransform, Components components) throws IOException {
checkArgument(
- ptransform.getSpec().getUrn().equals(PAR_DO_TRANSFORM_URN),
+ PAR_DO_TRANSFORM_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_PAIR_WITH_RESTRICTION_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_SPLIT_RESTRICTION_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_PROCESS_ELEMENTS_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(
+ ptransform.getSpec().getUrn()),
"Unexpected payload type %s",
ptransform.getSpec().getUrn());
return components.getPcollectionsOrThrow(
@@ -425,7 +436,13 @@
public static String getMainInputName(RunnerApi.PTransformOrBuilder ptransform)
throws IOException {
checkArgument(
- ptransform.getSpec().getUrn().equals(PAR_DO_TRANSFORM_URN),
+ PAR_DO_TRANSFORM_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_PAIR_WITH_RESTRICTION_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_SPLIT_RESTRICTION_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_PROCESS_ELEMENTS_URN.equals(ptransform.getSpec().getUrn())
+ || SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(
+ ptransform.getSpec().getUrn()),
"Unexpected payload type %s",
ptransform.getSpec().getUrn());
ParDoPayload payload = ParDoPayload.parseFrom(ptransform.getSpec().getPayload());
@@ -508,7 +525,7 @@
case BAG_SPEC:
return StateSpecs.bag(components.getCoder(stateSpec.getBagSpec().getElementCoderId()));
case COMBINING_SPEC:
- FunctionSpec combineFnSpec = stateSpec.getCombiningSpec().getCombineFn().getSpec();
+ FunctionSpec combineFnSpec = stateSpec.getCombiningSpec().getCombineFn();
if (!combineFnSpec.getUrn().equals(CombineTranslation.JAVA_SERIALIZED_COMBINE_FN_URN)) {
throw new UnsupportedOperationException(
@@ -576,36 +593,32 @@
}
}
- public static SdkFunctionSpec translateDoFn(
+ public static FunctionSpec translateDoFn(
DoFn<?, ?> fn,
TupleTag<?> tag,
Map<String, PCollectionView<?>> sideInputMapping,
DoFnSchemaInformation doFnSchemaInformation,
SdkComponents components) {
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(CUSTOM_JAVA_DO_FN_URN)
- .setPayload(
- ByteString.copyFrom(
- SerializableUtils.serializeToByteArray(
- DoFnWithExecutionInformation.of(
- fn, tag, sideInputMapping, doFnSchemaInformation))))
- .build())
+ return FunctionSpec.newBuilder()
+ .setUrn(CUSTOM_JAVA_DO_FN_URN)
+ .setPayload(
+ ByteString.copyFrom(
+ SerializableUtils.serializeToByteArray(
+ DoFnWithExecutionInformation.of(
+ fn, tag, sideInputMapping, doFnSchemaInformation))))
.build();
}
public static DoFnWithExecutionInformation doFnWithExecutionInformationFromProto(
- SdkFunctionSpec fnSpec) {
+ FunctionSpec fnSpec) {
checkArgument(
- fnSpec.getSpec().getUrn().equals(CUSTOM_JAVA_DO_FN_URN),
+ fnSpec.getUrn().equals(CUSTOM_JAVA_DO_FN_URN),
"Expected %s to be %s with URN %s, but URN was %s",
DoFn.class.getSimpleName(),
FunctionSpec.class.getSimpleName(),
CUSTOM_JAVA_DO_FN_URN,
- fnSpec.getSpec().getUrn());
- byte[] serializedFn = fnSpec.getSpec().getPayload().toByteArray();
+ fnSpec.getUrn());
+ byte[] serializedFn = fnSpec.getPayload().toByteArray();
return (DoFnWithExecutionInformation)
SerializableUtils.deserializeFromByteArray(serializedFn, "Custom DoFn With Execution Info");
}
@@ -662,14 +675,10 @@
return builder.build();
}
- public static SdkFunctionSpec translateViewFn(ViewFn<?, ?> viewFn, SdkComponents components) {
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(CUSTOM_JAVA_VIEW_FN_URN)
- .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(viewFn)))
- .build())
+ public static FunctionSpec translateViewFn(ViewFn<?, ?> viewFn, SdkComponents components) {
+ return FunctionSpec.newBuilder()
+ .setUrn(CUSTOM_JAVA_VIEW_FN_URN)
+ .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(viewFn)))
.build();
}
@@ -696,22 +705,17 @@
return payload.getSplittable();
}
- public static SdkFunctionSpec translateWindowMappingFn(
+ public static FunctionSpec translateWindowMappingFn(
WindowMappingFn<?> windowMappingFn, SdkComponents components) {
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(CUSTOM_JAVA_WINDOW_MAPPING_FN_URN)
- .setPayload(
- ByteString.copyFrom(SerializableUtils.serializeToByteArray(windowMappingFn)))
- .build())
+ return FunctionSpec.newBuilder()
+ .setUrn(CUSTOM_JAVA_WINDOW_MAPPING_FN_URN)
+ .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(windowMappingFn)))
.build();
}
/** These methods drive to-proto translation from Java and from rehydrated ParDos. */
public interface ParDoLike {
- SdkFunctionSpec translateDoFn(SdkComponents newComponents);
+ FunctionSpec translateDoFn(SdkComponents newComponents);
List<RunnerApi.Parameter> translateParameters();
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ReadTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ReadTranslation.java
index f5b9c7f..81bafab 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ReadTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ReadTranslation.java
@@ -27,7 +27,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.IsBounded;
import org.apache.beam.model.pipeline.v1.RunnerApi.ReadPayload;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.Read;
@@ -65,7 +64,7 @@
.build();
}
- public static SdkFunctionSpec toProto(Source<?> source, SdkComponents components) {
+ public static FunctionSpec toProto(Source<?> source, SdkComponents components) {
if (source instanceof BoundedSource) {
return toProto((BoundedSource) source, components);
} else if (source instanceof UnboundedSource) {
@@ -76,14 +75,10 @@
}
}
- private static SdkFunctionSpec toProto(BoundedSource<?> source, SdkComponents components) {
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(JAVA_SERIALIZED_BOUNDED_SOURCE)
- .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(source)))
- .build())
+ private static FunctionSpec toProto(BoundedSource<?> source, SdkComponents components) {
+ return FunctionSpec.newBuilder()
+ .setUrn(JAVA_SERIALIZED_BOUNDED_SOURCE)
+ .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(source)))
.build();
}
@@ -92,7 +87,7 @@
checkArgument(payload.getIsBounded().equals(IsBounded.Enum.BOUNDED));
return (BoundedSource<?>)
SerializableUtils.deserializeFromByteArray(
- payload.getSource().getSpec().getPayload().toByteArray(), "BoundedSource");
+ payload.getSource().getPayload().toByteArray(), "BoundedSource");
}
public static <T> BoundedSource<T> boundedSourceFromTransform(
@@ -118,15 +113,10 @@
.getPayload());
}
- private static SdkFunctionSpec toProto(UnboundedSource<?, ?> source, SdkComponents components) {
- return SdkFunctionSpec.newBuilder()
- // Do not assign an environment. Unbounded reads are a Runner translated transform,
- // unless, in the future, we have an adapter available for splittable DoFn.
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(JAVA_SERIALIZED_UNBOUNDED_SOURCE)
- .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(source)))
- .build())
+ private static FunctionSpec toProto(UnboundedSource<?, ?> source, SdkComponents components) {
+ return FunctionSpec.newBuilder()
+ .setUrn(JAVA_SERIALIZED_UNBOUNDED_SOURCE)
+ .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(source)))
.build();
}
@@ -134,7 +124,7 @@
checkArgument(payload.getIsBounded().equals(IsBounded.Enum.UNBOUNDED));
return (UnboundedSource<?, ?>)
SerializableUtils.deserializeFromByteArray(
- payload.getSource().getSpec().getPayload().toByteArray(), "UnboundedSource");
+ payload.getSource().getPayload().toByteArray(), "UnboundedSource");
}
public static PCollection.IsBounded sourceIsBounded(AppliedPTransform<?, ?, ?> transform) {
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java
index 2373188..f89874e 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java
@@ -30,7 +30,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.Parameter;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.SideInput;
import org.apache.beam.model.pipeline.v1.RunnerApi.StateSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.TimerSpec;
@@ -366,7 +365,7 @@
ParDoTranslation.payloadForParDoLike(
new ParDoLike() {
@Override
- public SdkFunctionSpec translateDoFn(SdkComponents newComponents) {
+ public FunctionSpec translateDoFn(SdkComponents newComponents) {
// Schemas not yet supported on splittable DoFn.
return ParDoTranslation.translateDoFn(
fn,
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Timer.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Timer.java
index 072dbc7..090ee32 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Timer.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Timer.java
@@ -47,7 +47,12 @@
/** Returns a timer for the given timestamp with a user specified payload. */
public static <T> Timer<T> of(Instant timestamp, @Nullable T payload) {
- return new AutoValue_Timer(timestamp, payload);
+ return new AutoValue_Timer(timestamp, timestamp, payload);
+ }
+
+ /** Returns a timer for the given timestamp with a user specified payload and outputTimestamp. */
+ public static <T> Timer<T> of(Instant timestamp, Instant outputTimestamp, @Nullable T payload) {
+ return new AutoValue_Timer(timestamp, outputTimestamp, payload);
}
/**
@@ -58,6 +63,9 @@
*/
public abstract Instant getTimestamp();
+ /* Returns the outputTimestamps */
+ public abstract Instant getOutputTimestamp();
+
/** A user supplied payload. */
@Nullable
public abstract T getPayload();
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java
index a57aa9b..63f662f 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java
@@ -25,7 +25,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.Components;
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.OutputTime;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.StandardWindowFns.FixedWindowsPayload;
import org.apache.beam.model.pipeline.v1.StandardWindowFns.GlobalWindowsPayload;
import org.apache.beam.model.pipeline.v1.StandardWindowFns.SessionsPayload;
@@ -216,25 +215,19 @@
* RunnerApi.MessageWithComponents#getFunctionSpec()} is a {@link RunnerApi.FunctionSpec} for the
* input {@link WindowFn}.
*/
- public static SdkFunctionSpec toProto(WindowFn<?, ?> windowFn, SdkComponents components) {
+ public static FunctionSpec toProto(WindowFn<?, ?> windowFn, SdkComponents components) {
ByteString serializedFn = ByteString.copyFrom(SerializableUtils.serializeToByteArray(windowFn));
if (windowFn instanceof GlobalWindows) {
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(FunctionSpec.newBuilder().setUrn(GLOBAL_WINDOWS_URN))
- .build();
+ return FunctionSpec.newBuilder().setUrn(GLOBAL_WINDOWS_URN).build();
} else if (windowFn instanceof FixedWindows) {
FixedWindowsPayload fixedWindowsPayload =
FixedWindowsPayload.newBuilder()
.setSize(Durations.fromMillis(((FixedWindows) windowFn).getSize().getMillis()))
.setOffset(Timestamps.fromMillis(((FixedWindows) windowFn).getOffset().getMillis()))
.build();
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(FIXED_WINDOWS_URN)
- .setPayload(fixedWindowsPayload.toByteString()))
+ return FunctionSpec.newBuilder()
+ .setUrn(FIXED_WINDOWS_URN)
+ .setPayload(fixedWindowsPayload.toByteString())
.build();
} else if (windowFn instanceof SlidingWindows) {
SlidingWindowsPayload slidingWindowsPayload =
@@ -243,32 +236,23 @@
.setOffset(Timestamps.fromMillis(((SlidingWindows) windowFn).getOffset().getMillis()))
.setPeriod(Durations.fromMillis(((SlidingWindows) windowFn).getPeriod().getMillis()))
.build();
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(SLIDING_WINDOWS_URN)
- .setPayload(slidingWindowsPayload.toByteString()))
+ return FunctionSpec.newBuilder()
+ .setUrn(SLIDING_WINDOWS_URN)
+ .setPayload(slidingWindowsPayload.toByteString())
.build();
} else if (windowFn instanceof Sessions) {
SessionsPayload sessionsPayload =
SessionsPayload.newBuilder()
.setGapSize(Durations.fromMillis(((Sessions) windowFn).getGapDuration().getMillis()))
.build();
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(SESSION_WINDOWS_URN)
- .setPayload(sessionsPayload.toByteString()))
+ return FunctionSpec.newBuilder()
+ .setUrn(SESSION_WINDOWS_URN)
+ .setPayload(sessionsPayload.toByteString())
.build();
} else {
- return SdkFunctionSpec.newBuilder()
- .setEnvironmentId(components.getOnlyEnvironmentId())
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(SERIALIZED_JAVA_WINDOWFN_URN)
- .setPayload(serializedFn))
+ return FunctionSpec.newBuilder()
+ .setUrn(SERIALIZED_JAVA_WINDOWFN_URN)
+ .setPayload(serializedFn)
.build();
}
}
@@ -295,7 +279,7 @@
*/
public static RunnerApi.WindowingStrategy toProto(
WindowingStrategy<?, ?> windowingStrategy, SdkComponents components) throws IOException {
- SdkFunctionSpec windowFnSpec = toProto(windowingStrategy.getWindowFn(), components);
+ FunctionSpec windowFnSpec = toProto(windowingStrategy.getWindowFn(), components);
RunnerApi.WindowingStrategy.Builder windowingStrategyProto =
RunnerApi.WindowingStrategy.newBuilder()
@@ -308,7 +292,8 @@
.setAssignsToOneWindow(windowingStrategy.getWindowFn().assignsToOneWindow())
.setOnTimeBehavior(toProto(windowingStrategy.getOnTimeBehavior()))
.setWindowCoderId(
- components.registerCoder(windowingStrategy.getWindowFn().windowCoder()));
+ components.registerCoder(windowingStrategy.getWindowFn().windowCoder()))
+ .setEnvironmentId(components.getOnlyEnvironmentId());
return windowingStrategyProto.build();
}
@@ -340,7 +325,7 @@
RunnerApi.WindowingStrategy proto, RehydratedComponents components)
throws InvalidProtocolBufferException {
- SdkFunctionSpec windowFnSpec = proto.getWindowFn();
+ FunctionSpec windowFnSpec = proto.getWindowFn();
WindowFn<?, ?> windowFn = windowFnFromProto(windowFnSpec);
TimestampCombiner timestampCombiner = timestampCombinerFromProto(proto.getOutputTime());
AccumulationMode accumulationMode = fromProto(proto.getAccumulationMode());
@@ -358,34 +343,32 @@
.withOnTimeBehavior(onTimeBehavior);
}
- public static WindowFn<?, ?> windowFnFromProto(SdkFunctionSpec windowFnSpec) {
+ public static WindowFn<?, ?> windowFnFromProto(FunctionSpec windowFnSpec) {
try {
- String s = windowFnSpec.getSpec().getUrn();
+ String s = windowFnSpec.getUrn();
if (s.equals(getUrn(GlobalWindowsPayload.Enum.PROPERTIES))) {
return new GlobalWindows();
} else if (s.equals(getUrn(FixedWindowsPayload.Enum.PROPERTIES))) {
- FixedWindowsPayload fixedParams =
- FixedWindowsPayload.parseFrom(windowFnSpec.getSpec().getPayload());
+ FixedWindowsPayload fixedParams = FixedWindowsPayload.parseFrom(windowFnSpec.getPayload());
return FixedWindows.of(Duration.millis(Durations.toMillis(fixedParams.getSize())))
.withOffset(Duration.millis(Timestamps.toMillis(fixedParams.getOffset())));
} else if (s.equals(getUrn(SlidingWindowsPayload.Enum.PROPERTIES))) {
SlidingWindowsPayload slidingParams =
- SlidingWindowsPayload.parseFrom(windowFnSpec.getSpec().getPayload());
+ SlidingWindowsPayload.parseFrom(windowFnSpec.getPayload());
return SlidingWindows.of(Duration.millis(Durations.toMillis(slidingParams.getSize())))
.every(Duration.millis(Durations.toMillis(slidingParams.getPeriod())))
.withOffset(Duration.millis(Timestamps.toMillis(slidingParams.getOffset())));
} else if (s.equals(getUrn(SessionsPayload.Enum.PROPERTIES))) {
- SessionsPayload sessionParams =
- SessionsPayload.parseFrom(windowFnSpec.getSpec().getPayload());
+ SessionsPayload sessionParams = SessionsPayload.parseFrom(windowFnSpec.getPayload());
return Sessions.withGapDuration(
Duration.millis(Durations.toMillis(sessionParams.getGapSize())));
} else if (s.equals(SERIALIZED_JAVA_WINDOWFN_URN)) {
return (WindowFn<?, ?>)
SerializableUtils.deserializeFromByteArray(
- windowFnSpec.getSpec().getPayload().toByteArray(), "WindowFn");
+ windowFnSpec.getPayload().toByteArray(), "WindowFn");
} else {
throw new IllegalArgumentException(
- "Unknown or unsupported WindowFn: " + windowFnSpec.getSpec().getUrn());
+ "Unknown or unsupported WindowFn: " + windowFnSpec.getUrn());
}
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException(
@@ -393,7 +376,7 @@
"%s for %s with URN %s did not contain expected proto message for payload",
FunctionSpec.class.getSimpleName(),
WindowFn.class.getSimpleName(),
- windowFnSpec.getSpec().getUrn()),
+ windowFnSpec.getUrn()),
e);
}
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java
index e86c450..0df16a2 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java
@@ -30,7 +30,6 @@
import java.util.Map;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.SideInput;
import org.apache.beam.model.pipeline.v1.RunnerApi.WriteFilesPayload;
import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
@@ -66,7 +65,7 @@
return payloadForWriteFilesLike(
new WriteFilesLike() {
@Override
- public SdkFunctionSpec translateSink(SdkComponents newComponents) {
+ public FunctionSpec translateSink(SdkComponents newComponents) {
// TODO: register the environment
return toProto(transform.getSink());
}
@@ -96,31 +95,27 @@
components);
}
- private static SdkFunctionSpec toProto(FileBasedSink<?, ?, ?> sink) {
+ private static FunctionSpec toProto(FileBasedSink<?, ?, ?> sink) {
return toProto(CUSTOM_JAVA_FILE_BASED_SINK_URN, sink);
}
- private static SdkFunctionSpec toProto(String urn, Serializable serializable) {
- return SdkFunctionSpec.newBuilder()
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(urn)
- .setPayload(
- ByteString.copyFrom(SerializableUtils.serializeToByteArray(serializable)))
- .build())
+ private static FunctionSpec toProto(String urn, Serializable serializable) {
+ return FunctionSpec.newBuilder()
+ .setUrn(urn)
+ .setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(serializable)))
.build();
}
@VisibleForTesting
- static FileBasedSink<?, ?, ?> sinkFromProto(SdkFunctionSpec sinkProto) throws IOException {
+ static FileBasedSink<?, ?, ?> sinkFromProto(FunctionSpec sinkProto) throws IOException {
checkArgument(
- sinkProto.getSpec().getUrn().equals(CUSTOM_JAVA_FILE_BASED_SINK_URN),
+ sinkProto.getUrn().equals(CUSTOM_JAVA_FILE_BASED_SINK_URN),
"Cannot extract %s instance from %s with URN %s",
FileBasedSink.class.getSimpleName(),
FunctionSpec.class.getSimpleName(),
- sinkProto.getSpec().getUrn());
+ sinkProto.getUrn());
- byte[] serializedSink = sinkProto.getSpec().getPayload().toByteArray();
+ byte[] serializedSink = sinkProto.getPayload().toByteArray();
return (FileBasedSink<?, ?, ?>)
SerializableUtils.deserializeFromByteArray(
@@ -252,7 +247,7 @@
}
@Override
- public SdkFunctionSpec translateSink(SdkComponents newComponents) {
+ public FunctionSpec translateSink(SdkComponents newComponents) {
// TODO: re-register the environment with the new components
return payload.getSink();
}
@@ -304,7 +299,7 @@
/** These methods drive to-proto translation from Java and from rehydrated WriteFiles. */
private interface WriteFilesLike {
- SdkFunctionSpec translateSink(SdkComponents newComponents);
+ FunctionSpec translateSink(SdkComponents newComponents);
Map<String, RunnerApi.SideInput> translateSideInputs(SdkComponents components);
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/expansion/ExpansionService.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/expansion/ExpansionService.java
index ae1a3d7..aab393b 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/expansion/ExpansionService.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/expansion/ExpansionService.java
@@ -277,12 +277,12 @@
} else if (output instanceof PCollectionTuple) {
return ((PCollectionTuple) output)
.getAll().entrySet().stream()
- .collect(Collectors.toMap(entry -> entry.getKey().toString(), Map.Entry::getValue));
+ .collect(Collectors.toMap(entry -> entry.getKey().getId(), Map.Entry::getValue));
} else if (output instanceof PCollectionList<?>) {
PCollectionList<?> listOutput = (PCollectionList<?>) output;
return IntStream.range(0, listOutput.size())
.boxed()
- .collect(Collectors.toMap(index -> "output_" + index, listOutput::get));
+ .collect(Collectors.toMap(Object::toString, listOutput::get));
} else {
throw new UnsupportedOperationException("Unknown output type: " + output.getClass());
}
@@ -334,18 +334,31 @@
throw new UnsupportedOperationException(
"Unknown urn: " + request.getTransform().getSpec().getUrn());
}
- registeredTransforms
- .get(request.getTransform().getSpec().getUrn())
- .apply(
- pipeline,
- request.getTransform().getUniqueName(),
- request.getTransform().getSpec(),
- inputs);
+ Map<String, PCollection<?>> outputs =
+ registeredTransforms
+ .get(request.getTransform().getSpec().getUrn())
+ .apply(
+ pipeline,
+ request.getTransform().getUniqueName(),
+ request.getTransform().getSpec(),
+ inputs);
// Needed to find which transform was new...
SdkComponents sdkComponents =
rehydratedComponents.getSdkComponents().withNewIdPrefix(request.getNamespace());
sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
+ Map<String, String> outputMap =
+ outputs.entrySet().stream()
+ .collect(
+ Collectors.toMap(
+ Map.Entry::getKey,
+ output -> {
+ try {
+ return sdkComponents.registerPCollection(output.getValue());
+ } catch (IOException exn) {
+ throw new RuntimeException(exn);
+ }
+ }));
pipeline.replaceAll(ImmutableList.of(JavaReadViaImpulse.boundedOverride()));
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents);
String expandedTransformId =
@@ -359,6 +372,8 @@
.getTransformsOrThrow(expandedTransformId)
.toBuilder()
.setUniqueName(expandedTransformId)
+ .clearOutputs()
+ .putAllOutputs(outputMap)
.build();
LOG.debug("Expanded to {}", expandedTransform);
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ExecutableStage.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ExecutableStage.java
index 8bdd718..dd2d374 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ExecutableStage.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ExecutableStage.java
@@ -17,6 +17,8 @@
*/
package org.apache.beam.runners.core.construction.graph;
+import static org.apache.beam.runners.core.construction.BeamUrns.getUrn;
+
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
@@ -31,6 +33,7 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
import org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline;
+import org.apache.beam.model.pipeline.v1.RunnerApi.WireCoderSetting;
import org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode;
import org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode;
@@ -70,6 +73,14 @@
Environment getEnvironment();
/**
+ * Returns the {@link WireCoderSetting} this stage executes in.
+ *
+ * <p>A {@link WireCoderSetting} consists of settings which is used to configure the type of the
+ * wire coder.
+ */
+ WireCoderSetting getWireCoderSetting();
+
+ /**
* Returns the root {@link PCollectionNode} of this {@link ExecutableStage}. This {@link
* ExecutableStage} executes by reading elements from a Remote gRPC Read Node.
*
@@ -134,6 +145,7 @@
ExecutableStagePayload.Builder payload = ExecutableStagePayload.newBuilder();
payload.setEnvironment(getEnvironment());
+ payload.setWireCoderSetting(getWireCoderSetting());
// Populate inputs and outputs of the stage payload and outer PTransform simultaneously.
PCollectionNode input = getInputPCollection();
@@ -208,6 +220,7 @@
static ExecutableStage fromPayload(ExecutableStagePayload payload) {
Components components = payload.getComponents();
Environment environment = payload.getEnvironment();
+ WireCoderSetting wireCoderSetting = payload.getWireCoderSetting();
PCollectionNode input =
PipelineNode.pCollection(
@@ -233,6 +246,20 @@
.map(id -> PipelineNode.pCollection(id, components.getPcollectionsOrThrow(id)))
.collect(Collectors.toList());
return ImmutableExecutableStage.of(
- components, environment, input, sideInputs, userStates, timers, transforms, outputs);
+ components,
+ environment,
+ input,
+ sideInputs,
+ userStates,
+ timers,
+ transforms,
+ outputs,
+ wireCoderSetting);
}
+
+ /** The default wire coder, i.e., WINDOWED_VALUE coder. */
+ WireCoderSetting DEFAULT_WIRE_CODER_SETTING =
+ WireCoderSetting.newBuilder()
+ .setUrn(getUrn(RunnerApi.StandardCoders.Enum.WINDOWED_VALUE))
+ .build();
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuser.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuser.java
index 61d9546..00cca32 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuser.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuser.java
@@ -415,7 +415,8 @@
stage.getUserStates(),
stage.getTimers(),
pTransformNodes,
- stage.getOutputPCollections());
+ stage.getOutputPCollections(),
+ stage.getWireCoderSetting());
}
/**
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuser.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuser.java
index 2334458..87f2076 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuser.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuser.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.core.construction.graph;
+import static org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTING;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import java.util.ArrayDeque;
@@ -138,7 +139,8 @@
userStates,
timers,
fusedTransforms.build(),
- materializedPCollections);
+ materializedPCollections,
+ DEFAULT_WIRE_CODER_SETTING);
}
private static Environment getStageEnvironment(
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStage.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStage.java
index a996ac0..0092056 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStage.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStage.java
@@ -22,6 +22,7 @@
import java.util.stream.Collectors;
import org.apache.beam.model.pipeline.v1.RunnerApi.Components;
import org.apache.beam.model.pipeline.v1.RunnerApi.Environment;
+import org.apache.beam.model.pipeline.v1.RunnerApi.WireCoderSetting;
import org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode;
import org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
@@ -37,7 +38,8 @@
Collection<UserStateReference> userStates,
Collection<TimerReference> timers,
Collection<PTransformNode> transforms,
- Collection<PCollectionNode> outputs) {
+ Collection<PCollectionNode> outputs,
+ WireCoderSetting wireCoderSetting) {
Components prunedComponents =
components
.toBuilder()
@@ -47,7 +49,15 @@
.collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform)))
.build();
return of(
- prunedComponents, environment, input, sideInputs, userStates, timers, transforms, outputs);
+ prunedComponents,
+ environment,
+ input,
+ sideInputs,
+ userStates,
+ timers,
+ transforms,
+ outputs,
+ wireCoderSetting);
}
public static ImmutableExecutableStage of(
@@ -58,7 +68,8 @@
Collection<UserStateReference> userStates,
Collection<TimerReference> timers,
Collection<PTransformNode> transforms,
- Collection<PCollectionNode> outputs) {
+ Collection<PCollectionNode> outputs,
+ WireCoderSetting wireCoderSetting) {
return new AutoValue_ImmutableExecutableStage(
components,
environment,
@@ -67,7 +78,8 @@
ImmutableSet.copyOf(userStates),
ImmutableSet.copyOf(timers),
ImmutableSet.copyOf(transforms),
- ImmutableSet.copyOf(outputs));
+ ImmutableSet.copyOf(outputs),
+ wireCoderSetting);
}
@Override
@@ -94,4 +106,7 @@
@Override
public abstract Collection<PCollectionNode> getOutputPCollections();
+
+ @Override
+ public abstract WireCoderSetting getWireCoderSetting();
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicator.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicator.java
index 157ece0..def7de8 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicator.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicator.java
@@ -308,7 +308,8 @@
stage.getUserStates(),
stage.getTimers(),
updatedTransforms,
- updatedOutputs);
+ updatedOutputs,
+ stage.getWireCoderSetting());
}
/**
@@ -325,6 +326,7 @@
output.getKey(), originalToPartial.get(output.getValue()).getId());
}
}
+ updatedTransformBuilder.setEnvironmentId(transform.getEnvironmentId());
return updatedTransformBuilder.build();
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java
index 77f0211..a320412 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java
@@ -162,6 +162,7 @@
FunctionSpec.newBuilder()
.setUrn(PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN)
.setPayload(splittableParDo.getSpec().getPayload()));
+ pairWithRestriction.setEnvironmentId(splittableParDo.getEnvironmentId());
rval.getComponentsBuilder()
.putTransforms(pairWithRestrictionId, pairWithRestriction.build());
}
@@ -180,6 +181,7 @@
FunctionSpec.newBuilder()
.setUrn(PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN)
.setPayload(splittableParDo.getSpec().getPayload()));
+ splitAndSize.setEnvironmentId(splittableParDo.getEnvironmentId());
rval.getComponentsBuilder().putTransforms(splitAndSizeId, splitAndSize.build());
}
@@ -201,6 +203,7 @@
.setUrn(
PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN)
.setPayload(splittableParDo.getSpec().getPayload()));
+ processSizedElementsAndRestrictions.setEnvironmentId(splittableParDo.getEnvironmentId());
rval.getComponentsBuilder()
.putTransforms(
processSizedElementsAndRestrictionsId,
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/ClasspathScanningResourcesDetector.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/ClasspathScanningResourcesDetector.java
new file mode 100644
index 0000000..bfa1fa1e
--- /dev/null
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/ClasspathScanningResourcesDetector.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core.construction.resources;
+
+import io.github.classgraph.ClassGraph;
+import java.io.File;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Attempts to detect all the resources to be staged using classgraph library.
+ *
+ * <p>See <a
+ * href="https://github.com/classgraph/classgraph">https://github.com/classgraph/classgraph</a>
+ */
+public class ClasspathScanningResourcesDetector implements PipelineResourcesDetector {
+
+ private transient ClassGraph classGraph;
+
+ public ClasspathScanningResourcesDetector(ClassGraph classGraph) {
+ this.classGraph = classGraph;
+ }
+
+ /**
+ * Detects classpath resources and returns a list of absolute paths to them.
+ *
+ * @param classLoader The classloader to use to detect resources to stage (optional).
+ * @return A list of absolute paths to the resources the class loader uses.
+ */
+ @Override
+ public List<String> detect(ClassLoader classLoader) {
+ List<File> classpathContents =
+ classGraph.disableNestedJarScanning().addClassLoader(classLoader).getClasspathFiles();
+
+ return classpathContents.stream().map(File::getAbsolutePath).collect(Collectors.toList());
+ }
+}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PipelineResources.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResources.java
similarity index 74%
rename from runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PipelineResources.java
rename to runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResources.java
index f63e082..dda68f1 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PipelineResources.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResources.java
@@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.beam.runners.core.construction;
+package org.apache.beam.runners.core.construction.resources;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
@@ -23,12 +23,10 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.net.URLClassLoader;
-import java.util.ArrayList;
import java.util.List;
+import java.util.function.Predicate;
import java.util.stream.Collectors;
+import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.util.ZipFiles;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings;
@@ -40,34 +38,30 @@
public class PipelineResources {
/**
- * Attempts to detect all the resources the class loader has access to. This does not recurse to
- * class loader parents stopping it from pulling in resources from the system class loader.
+ * Uses algorithm provided via {@link
+ * org.apache.beam.runners.core.construction.resources.PipelineResourcesOptions} to detect
+ * classpath resources.
*
- * @param classLoader The URLClassLoader to use to detect resources to stage.
- * @throws IllegalArgumentException If either the class loader is not a URLClassLoader or one of
- * the resources the class loader exposes is not a file resource.
+ * @param classLoader The URLClassLoader to use to detect resources to stage (optional).
+ * @param options pipeline options
* @return A list of absolute paths to the resources the class loader uses.
*/
- public static List<String> detectClassPathResourcesToStage(ClassLoader classLoader) {
- if (!(classLoader instanceof URLClassLoader)) {
- String message =
- String.format(
- "Unable to use ClassLoader to detect classpath elements. "
- + "Current ClassLoader is %s, only URLClassLoaders are supported.",
- classLoader);
- throw new IllegalArgumentException(message);
- }
+ public static List<String> detectClassPathResourcesToStage(
+ ClassLoader classLoader, PipelineOptions options) {
- List<String> files = new ArrayList<>();
- for (URL url : ((URLClassLoader) classLoader).getURLs()) {
- try {
- files.add(new File(url.toURI()).getAbsolutePath());
- } catch (IllegalArgumentException | URISyntaxException e) {
- String message = String.format("Unable to convert url (%s) to file.", url);
- throw new IllegalArgumentException(message, e);
- }
- }
- return files;
+ PipelineResourcesOptions artifactsRelatedOptions = options.as(PipelineResourcesOptions.class);
+ List<String> detectedResources =
+ artifactsRelatedOptions.getPipelineResourcesDetector().detect(classLoader);
+
+ return detectedResources.stream().filter(isStageable()).collect(Collectors.toList());
+ }
+
+ /**
+ * Returns a predicate for filtering all resources that are impossible to stage (like gradle
+ * wrapper jars).
+ */
+ private static Predicate<String> isStageable() {
+ return resourcePath -> !resourcePath.contains("gradle/wrapper");
}
/**
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesDetector.java
similarity index 63%
copy from runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
copy to runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesDetector.java
index a114f40..103c694 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesDetector.java
@@ -15,16 +15,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.beam.runners.flink.translation.utils;
+package org.apache.beam.runners.core.construction.resources;
-import com.fasterxml.jackson.databind.type.TypeFactory;
+import java.io.Serializable;
+import java.util.List;
-/** Utilities for dealing with classloading. */
-public class FlinkClassloading {
+/** Interface for an algorithm detecting classpath resources for pipelines. */
+public interface PipelineResourcesDetector extends Serializable {
- public static void deleteStaticCaches() {
- // Clear cache to get rid of any references to the Flink Classloader
- // See https://jira.apache.org/jira/browse/BEAM-6460
- TypeFactory.defaultInstance().clearCache();
+ List<String> detect(ClassLoader classLoader);
+
+ /** Provides pipeline resources detection algorithm. */
+ interface Factory {
+ PipelineResourcesDetector getPipelineResourcesDetector();
}
}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesOptions.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesOptions.java
new file mode 100644
index 0000000..884664a
--- /dev/null
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesOptions.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core.construction.resources;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import io.github.classgraph.ClassGraph;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.DefaultValueFactory;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.InstanceBuilder;
+
+/** Pipeline options dedicated to detecting classpath resources. */
+public interface PipelineResourcesOptions extends PipelineOptions {
+
+ /**
+ * The class of the pipeline resources detector factory that should be created and used to create
+ * the detector. If not set explicitly, a default class will be used to instantiate the factory.
+ */
+ @JsonIgnore
+ @Description(
+ "The class of the pipeline resources detector factory that should be created and used to create "
+ + "the detector. If not set explicitly, a default class will be used to instantiate the factory.")
+ @Default.Class(ClasspathScanningResourcesDetectorFactory.class)
+ Class<? extends PipelineResourcesDetector.Factory> getPipelineResourcesDetectorFactoryClass();
+
+ void setPipelineResourcesDetectorFactoryClass(
+ Class<? extends PipelineResourcesDetector.Factory> factoryClass);
+
+ /**
+ * Instance of a pipeline resources detection algorithm. If not set explicitly, a default
+ * implementation will be used.
+ */
+ @JsonIgnore
+ @Description(
+ "Instance of a pipeline resources detection algorithm. If not set explicitly, a default implementation will be used")
+ @Default.InstanceFactory(PipelineResourcesDetectorFactory.class)
+ PipelineResourcesDetector getPipelineResourcesDetector();
+
+ void setPipelineResourcesDetector(PipelineResourcesDetector pipelineResourcesDetector);
+
+ /**
+ * Creates {@link PipelineResourcesDetector} instance based on provided pipeline options or
+ * default values set for them.
+ */
+ class PipelineResourcesDetectorFactory implements DefaultValueFactory<PipelineResourcesDetector> {
+
+ @Override
+ public PipelineResourcesDetector create(PipelineOptions options) {
+ PipelineResourcesOptions resourcesOptions = options.as(PipelineResourcesOptions.class);
+
+ PipelineResourcesDetector.Factory resourcesToStage =
+ InstanceBuilder.ofType(PipelineResourcesDetector.Factory.class)
+ .fromClass(resourcesOptions.getPipelineResourcesDetectorFactoryClass())
+ .fromFactoryMethod("create")
+ .build();
+
+ return resourcesToStage.getPipelineResourcesDetector();
+ }
+ }
+
+ /** Constructs the default {@link PipelineResourcesDetector} instance. */
+ class ClasspathScanningResourcesDetectorFactory implements PipelineResourcesDetector.Factory {
+
+ public static ClasspathScanningResourcesDetectorFactory create() {
+ return new ClasspathScanningResourcesDetectorFactory();
+ }
+
+ @Override
+ public PipelineResourcesDetector getPipelineResourcesDetector() {
+ return new ClasspathScanningResourcesDetector(new ClassGraph());
+ }
+ }
+}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/package-info.java
similarity index 65%
copy from runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
copy to runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/package-info.java
index a114f40..6e64dad 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/resources/package-info.java
@@ -15,16 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.beam.runners.flink.translation.utils;
-import com.fasterxml.jackson.databind.type.TypeFactory;
-
-/** Utilities for dealing with classloading. */
-public class FlinkClassloading {
-
- public static void deleteStaticCaches() {
- // Clear cache to get rid of any references to the Flink Classloader
- // See https://jira.apache.org/jira/browse/BEAM-6460
- TypeFactory.defaultInstance().clearCache();
- }
-}
+/** Classes used to render Pipelines. */
+package org.apache.beam.runners.core.construction.resources;
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java
index c2c6804..b2adc2b 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java
@@ -51,6 +51,7 @@
import org.apache.beam.sdk.schemas.logicaltypes.FixedBytes;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder;
+import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
@@ -79,6 +80,7 @@
.add(
FullWindowedValueCoder.of(
IterableCoder.of(VarLongCoder.of()), IntervalWindowCoder.of()))
+ .add(WindowedValue.ParamWindowedValueCoder.of(IterableCoder.of(VarLongCoder.of())))
.add(DoubleCoder.of())
.add(
RowCoder.of(
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CombineTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CombineTranslationTest.java
index 73b26a3..ee026a4 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CombineTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CombineTranslationTest.java
@@ -108,7 +108,7 @@
assertEquals(
combineFn,
SerializableUtils.deserializeFromByteArray(
- combineProto.getCombineFn().getSpec().getPayload().toByteArray(), "CombineFn"));
+ combineProto.getCombineFn().getPayload().toByteArray(), "CombineFn"));
}
}
@@ -151,7 +151,7 @@
assertEquals(
combineFn,
SerializableUtils.deserializeFromByteArray(
- combineProto.getCombineFn().getSpec().getPayload().toByteArray(), "CombineFn"));
+ combineProto.getCombineFn().getPayload().toByteArray(), "CombineFn"));
}
@Test
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java
index 2de2fe8..56fae2f 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java
@@ -106,6 +106,9 @@
.put(
getUrn(StandardCoders.Enum.WINDOWED_VALUE),
WindowedValue.FullWindowedValueCoder.class)
+ .put(
+ getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE),
+ WindowedValue.ParamWindowedValueCoder.class)
.put(getUrn(StandardCoders.Enum.ROW), RowCoder.class)
.build();
@@ -272,7 +275,8 @@
return convertedElements;
} else if (s.equals(getUrn(StandardCoders.Enum.GLOBAL_WINDOW))) {
return GlobalWindow.INSTANCE;
- } else if (s.equals(getUrn(StandardCoders.Enum.WINDOWED_VALUE))) {
+ } else if (s.equals(getUrn(StandardCoders.Enum.WINDOWED_VALUE))
+ || s.equals(getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE))) {
Map<String, Object> kvMap = (Map<String, Object>) value;
Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) coder).getValueCoder();
Coder windowCoder = ((WindowedValue.FullWindowedValueCoder) coder).getWindowCoder();
@@ -438,6 +442,9 @@
} else if (s.equals(getUrn(StandardCoders.Enum.WINDOWED_VALUE))) {
assertEquals(expectedValue, actualValue);
+ } else if (s.equals(getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE))) {
+ assertEquals(expectedValue, actualValue);
+
} else if (s.equals(getUrn(StandardCoders.Enum.DOUBLE))) {
assertEquals(expectedValue, actualValue);
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java
index 5cbe98a..68ff4ce 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java
@@ -24,32 +24,19 @@
import java.io.IOException;
import java.io.Serializable;
import java.util.Optional;
-import org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.Environment;
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.ProcessPayload;
-import org.apache.beam.model.pipeline.v1.RunnerApi.ReadPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.StandardEnvironments;
-import org.apache.beam.model.pipeline.v1.RunnerApi.WindowIntoPayload;
import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.io.CountingSource;
-import org.apache.beam.sdk.io.Read;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.DoFnSchemaInformation;
import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.PartitioningWindowFn;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
import org.apache.beam.sdk.values.TupleTag;
import org.apache.beam.sdk.values.TupleTagList;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -112,9 +99,10 @@
}
@Test
- public void getEnvironmentParDo() throws IOException {
+ public void getEnvironmentPTransform() throws IOException {
SdkComponents components = SdkComponents.create();
- components.registerEnvironment(Environments.createDockerEnvironment("java"));
+ Environment env = Environments.createDockerEnvironment("java");
+ components.registerEnvironment(env);
ParDoPayload payload =
ParDoTranslation.translateParDo(
ParDo.of(
@@ -128,145 +116,18 @@
components);
RehydratedComponents rehydratedComponents =
RehydratedComponents.forComponents(components.toComponents());
- PTransform builder =
+ PTransform ptransform =
PTransform.newBuilder()
.setSpec(
FunctionSpec.newBuilder()
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(payload.toByteString())
.build())
+ .setEnvironmentId(components.getOnlyEnvironmentId())
.build();
- Environment env = Environments.getEnvironment(builder, rehydratedComponents).get();
+ Environment env1 = Environments.getEnvironment(ptransform, rehydratedComponents).get();
assertThat(
- env,
- equalTo(
- components
- .toComponents()
- .getEnvironmentsOrThrow(payload.getDoFn().getEnvironmentId())));
- }
-
- @Test
- public void getEnvironmentWindowIntoKnown() throws IOException {
- SdkComponents components = SdkComponents.create();
- components.registerEnvironment(Environments.createDockerEnvironment("java"));
- WindowIntoPayload payload =
- WindowIntoPayload.newBuilder()
- .setWindowFn(
- WindowingStrategyTranslation.toProto(
- FixedWindows.of(Duration.standardMinutes(5L)), components))
- .build();
- RehydratedComponents rehydratedComponents =
- RehydratedComponents.forComponents(components.toComponents());
- PTransform builder =
- PTransform.newBuilder()
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
- .setPayload(payload.toByteString())
- .build())
- .build();
- Environment env = Environments.getEnvironment(builder, rehydratedComponents).get();
- assertThat(
- env,
- equalTo(
- components
- .toComponents()
- .getEnvironmentsOrThrow(payload.getWindowFn().getEnvironmentId())));
- }
-
- @Test
- public void getEnvironmentWindowIntoCustom() throws IOException {
- SdkComponents components = SdkComponents.create();
- components.registerEnvironment(Environments.createDockerEnvironment("java"));
- WindowIntoPayload payload =
- WindowIntoPayload.newBuilder()
- .setWindowFn(
- WindowingStrategyTranslation.toProto(
- new PartitioningWindowFn<Object, BoundedWindow>() {
- @Override
- public BoundedWindow assignWindow(Instant timestamp) {
- return null;
- }
-
- @Override
- public boolean isCompatible(WindowFn<?, ?> other) {
- return false;
- }
-
- @Override
- public Coder<BoundedWindow> windowCoder() {
- return null;
- }
- },
- components))
- .build();
- RehydratedComponents rehydratedComponents =
- RehydratedComponents.forComponents(components.toComponents());
- PTransform builder =
- PTransform.newBuilder()
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
- .setPayload(payload.toByteString())
- .build())
- .build();
- Environment env = Environments.getEnvironment(builder, rehydratedComponents).get();
- assertThat(
- env,
- equalTo(
- components
- .toComponents()
- .getEnvironmentsOrThrow(payload.getWindowFn().getEnvironmentId())));
- }
-
- @Test
- public void getEnvironmentRead() throws IOException {
- SdkComponents components = SdkComponents.create();
- components.registerEnvironment(Environments.createDockerEnvironment("java"));
- ReadPayload payload = ReadTranslation.toProto(Read.from(CountingSource.upTo(10)), components);
- RehydratedComponents rehydratedComponents =
- RehydratedComponents.forComponents(components.toComponents());
- PTransform builder =
- PTransform.newBuilder()
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN)
- .setPayload(payload.toByteString())
- .build())
- .build();
- Environment env = Environments.getEnvironment(builder, rehydratedComponents).get();
- assertThat(
- env,
- equalTo(
- components
- .toComponents()
- .getEnvironmentsOrThrow(payload.getSource().getEnvironmentId())));
- }
-
- @Test
- public void getEnvironmentCombine() throws IOException {
- SdkComponents components = SdkComponents.create();
- components.registerEnvironment(Environments.createDockerEnvironment("java"));
- CombinePayload payload =
- CombinePayload.newBuilder()
- .setCombineFn(CombineTranslation.toProto(Sum.ofLongs(), components))
- .build();
- RehydratedComponents rehydratedComponents =
- RehydratedComponents.forComponents(components.toComponents());
- PTransform builder =
- PTransform.newBuilder()
- .setSpec(
- FunctionSpec.newBuilder()
- .setUrn(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN)
- .setPayload(payload.toByteString())
- .build())
- .build();
- Environment env = Environments.getEnvironment(builder, rehydratedComponents).get();
- assertThat(
- env,
- equalTo(
- components
- .toComponents()
- .getEnvironmentsOrThrow(payload.getCombineFn().getEnvironmentId())));
+ env1,
+ equalTo(components.toComponents().getEnvironmentsOrThrow(ptransform.getEnvironmentId())));
}
}
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ExternalTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ExternalTest.java
index 397b444..b399472 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ExternalTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ExternalTest.java
@@ -139,8 +139,8 @@
testPipeline
.apply(Create.of("1", "2", "2", "3", "3", "3"))
.apply(
- External.<KV<String, Integer>>of(
- "beam:transforms:xlang:count", new byte[] {}, target))
+ External.of("beam:transforms:xlang:count", new byte[] {}, target)
+ .<KV<String, Long>>withOutputType())
.apply(
"toString",
MapElements.into(TypeDescriptors.strings())
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SchemaTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SchemaTranslationTest.java
index 9324bab..6022d70 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SchemaTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SchemaTranslationTest.java
@@ -85,7 +85,7 @@
@Test
public void toAndFromProto() throws Exception {
- SchemaApi.Schema schemaProto = SchemaTranslation.schemaToProto(schema);
+ SchemaApi.Schema schemaProto = SchemaTranslation.schemaToProto(schema, true);
Schema decodedSchema = SchemaTranslation.fromProto(schemaProto);
assertThat(decodedSchema, equalTo(schema));
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/expansion/ExpansionServiceTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/expansion/ExpansionServiceTest.java
index 3e5cf51..6024c10 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/expansion/ExpansionServiceTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/expansion/ExpansionServiceTest.java
@@ -99,6 +99,8 @@
assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
// Verify it has the right input.
assertEquals(inputPcollId, Iterables.getOnlyElement(expandedTransform.getInputsMap().values()));
+ // Verify it has the right output.
+ assertEquals("output", Iterables.getOnlyElement(expandedTransform.getOutputsMap().keySet()));
// Loose check that it's composite, and its children are represented.
assertNotEquals(expandedTransform.getSubtransformsCount(), 0);
for (String subtransform : expandedTransform.getSubtransformsList()) {
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ExecutableStageTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ExecutableStageTest.java
index f7a0509..863165c 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ExecutableStageTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ExecutableStageTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.core.construction.graph;
+import static org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTING;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -33,7 +34,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.SideInput;
import org.apache.beam.model.pipeline.v1.RunnerApi.StateSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.TimerSpec;
@@ -65,12 +65,13 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("foo"))
+ .setDoFn(FunctionSpec.newBuilder())
.putSideInputs("side_input", SideInput.getDefaultInstance())
.putStateSpecs("user_state", StateSpec.getDefaultInstance())
.putTimerSpecs("timer", TimerSpec.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("foo")
.build();
PCollection input = PCollection.newBuilder().setUniqueName("input.out").build();
PCollection sideInput = PCollection.newBuilder().setUniqueName("sideInput.in").build();
@@ -104,7 +105,8 @@
Collections.singleton(userStateRef),
Collections.singleton(timerRef),
Collections.singleton(PipelineNode.pTransform("pt", pt)),
- Collections.singleton(PipelineNode.pCollection("output.out", output)));
+ Collections.singleton(PipelineNode.pCollection("output.out", output)),
+ DEFAULT_WIRE_CODER_SETTING);
PTransform stagePTransform = stage.toPTransform("foo");
assertThat(stagePTransform.getOutputsMap(), hasValue("output.out"));
@@ -130,9 +132,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform windowTransform =
PTransform.newBuilder()
@@ -143,9 +146,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
Components components =
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuserTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuserTest.java
index 87c0f72..0a32f9e 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuserTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyPipelineFuserTest.java
@@ -41,7 +41,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.SideInput;
import org.apache.beam.model.pipeline.v1.RunnerApi.StateSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.TimerSpec;
@@ -117,9 +116,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("read.out", pc(name))
.putTransforms(
@@ -133,9 +133,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("parDo.out", pc("parDo.out"))
.putTransforms(
@@ -149,10 +150,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("window.out", pc("window.out"))
.build();
@@ -238,9 +239,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("read.out", pc("read.out"))
.putTransforms(
@@ -265,9 +267,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("parDo.out", pc("parDo.out"))
.build();
@@ -314,9 +317,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("read.out", pc("read.out"))
.putTransforms(
@@ -330,9 +334,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("go")
.build())
.putPcollections("go.out", pc("go.out"))
.putTransforms(
@@ -346,10 +351,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("py.out", pc("py.out"))
.build();
@@ -414,9 +419,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("pyRead.out", pc("pyRead.out"))
.putTransforms(
@@ -440,9 +446,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("go")
.build())
.putPcollections("goRead.out", pc("goRead.out"))
.putTransforms(
@@ -468,9 +475,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("pyParDo.out", pc("pyParDo.out"))
.putTransforms(
@@ -484,9 +492,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("go")
.build())
.putPcollections("goParDo.out", pc("goParDo.out"))
.putEnvironments("go", Environments.createDockerEnvironment("go"))
@@ -587,9 +596,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("pyRead.out", pc("pyRead.out"))
.putTransforms(
@@ -613,9 +623,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("go")
.build())
.putPcollections("goRead.out", pc("goRead.out"))
.putTransforms(
@@ -641,9 +652,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("go")
.build())
.putPcollections("goParDo.out", pc("goParDo.out"))
.putEnvironments("go", Environments.createDockerEnvironment("go"))
@@ -708,9 +720,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("read.out", pc("read.out"))
.putTransforms(
@@ -724,10 +737,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("py").build())
+ .setDoFn(FunctionSpec.newBuilder().build())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("parDo.out", pc("parDo.out"))
.build();
@@ -791,9 +804,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("read.out", pc("read.out"))
.putTransforms(
@@ -817,9 +831,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("sideRead.out", pc("sideRead.out"))
.putTransforms(
@@ -833,10 +848,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString())
.build())
+ .setEnvironmentId("py")
.build())
.putPcollections("leftParDo.out", pc("leftParDo.out"))
.putTransforms(
@@ -850,10 +866,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString())
.build())
+ .setEnvironmentId("py")
.build())
.putPcollections("rightParDo.out", pc("rightParDo.out"))
.putTransforms(
@@ -868,11 +885,12 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.putSideInputs("side", SideInput.getDefaultInstance())
.build()
.toByteString())
.build())
+ .setEnvironmentId("py")
.build())
.putPcollections("sideParDo.out", pc("sideParDo.out"))
.putEnvironments("py", Environments.createDockerEnvironment("py"))
@@ -930,9 +948,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform statefulTransform =
PTransform.newBuilder()
@@ -944,10 +963,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putStateSpecs("state", StateSpec.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
Components components =
@@ -998,9 +1018,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform timerTransform =
PTransform.newBuilder()
@@ -1014,10 +1035,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putTimerSpecs("timer", TimerSpec.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
Components components =
@@ -1067,11 +1089,12 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putStateSpecs("state", StateSpec.getDefaultInstance())
.putTimerSpecs("timer", TimerSpec.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
Components components =
@@ -1122,9 +1145,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("read.out", pc("read.out"))
.putTransforms(
@@ -1138,9 +1162,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("go")
.build())
.putPcollections("go.out", pc("go.out"))
.putTransforms(
@@ -1154,10 +1179,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("py.out", pc("py.out"))
.putTransforms(
@@ -1216,9 +1241,10 @@
.setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build();
PTransform read1Transform =
@@ -1231,9 +1257,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build();
PTransform read2Transform =
PTransform.newBuilder()
@@ -1245,9 +1272,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build();
PTransform impulse1Transform =
@@ -1259,7 +1287,7 @@
.setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
.build();
@@ -1272,7 +1300,7 @@
.setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
.build();
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuserTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuserTest.java
index af918d4..8dc0f1e 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuserTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/GreedyStageFuserTest.java
@@ -34,7 +34,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.SideInput;
import org.apache.beam.model.pipeline.v1.RunnerApi.StateSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.TimerSpec;
@@ -123,10 +122,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("go")
.build())
.putPcollections("go.out", PCollection.newBuilder().setUniqueName("go.out").build())
.putTransforms(
@@ -139,10 +138,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("py")
.build())
.putPcollections("py.out", PCollection.newBuilder().setUniqueName("py.out").build())
.putEnvironments("go", Environments.createDockerEnvironment("go"))
@@ -205,9 +204,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform windowTransform =
PTransform.newBuilder()
@@ -218,9 +218,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
QueryablePipeline p =
@@ -262,9 +263,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform statefulTransform =
PTransform.newBuilder()
@@ -275,10 +277,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putStateSpecs("state", StateSpec.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
QueryablePipeline p =
@@ -321,9 +324,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform timerTransform =
PTransform.newBuilder()
@@ -334,10 +338,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putTimerSpecs("timer", TimerSpec.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
QueryablePipeline p =
@@ -382,9 +387,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform parDoTransform =
PTransform.newBuilder()
@@ -395,9 +401,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform flattenTransform =
PTransform.newBuilder()
@@ -415,9 +422,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
QueryablePipeline p =
@@ -464,9 +472,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform otherEnvRead =
PTransform.newBuilder()
@@ -477,9 +486,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("rare"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("rare")
.build();
PTransform flattenTransform =
PTransform.newBuilder()
@@ -497,9 +507,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
Components components =
@@ -561,10 +572,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString())
.build())
+ .setEnvironmentId("py")
.build();
PTransform goRead =
PTransform.newBuilder()
@@ -575,10 +587,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString())
.build())
+ .setEnvironmentId("go")
.build();
PTransform pyParDo =
@@ -590,10 +603,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("py"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString())
.build())
+ .setEnvironmentId("py")
.build();
PTransform goWindow =
PTransform.newBuilder()
@@ -604,10 +618,11 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(SdkFunctionSpec.newBuilder().setEnvironmentId("go"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString())
.build())
+ .setEnvironmentId("go")
.build();
PCollection flattenPc = PCollection.newBuilder().setUniqueName("flatten.out").build();
@@ -682,9 +697,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PCollection parDoOutput = PCollection.newBuilder().setUniqueName("parDo.out").build();
@@ -704,10 +720,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("rare"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("rare")
.build())
.putPcollections(
"window.out", PCollection.newBuilder().setUniqueName("window.out").build())
@@ -749,9 +765,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
QueryablePipeline p =
QueryablePipeline.forPrimitivesIn(
@@ -770,10 +787,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build())
.putPcollections(
"parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build())
@@ -787,10 +804,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("rare"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("rare")
.build())
.putPcollections(
"window.out", PCollection.newBuilder().setUniqueName("window.out").build())
@@ -828,9 +845,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
QueryablePipeline p =
@@ -863,11 +881,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putSideInputs("side_input", SideInput.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build())
.putPcollections(
"parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build())
@@ -881,10 +899,10 @@
.setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN)
.setPayload(
WindowIntoPayload.newBuilder()
- .setWindowFn(
- SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setWindowFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build())
.putPcollections(
"window.out", PCollection.newBuilder().setUniqueName("window.out").build())
@@ -912,9 +930,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform parDoTransform =
@@ -928,10 +947,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putSideInputs("side_input", SideInput.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PCollection sideInputPCollection =
PCollection.newBuilder().setUniqueName("side_read.out").build();
@@ -996,9 +1016,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform processMain =
PTransform.newBuilder()
@@ -1010,10 +1031,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putSideInputs("side", SideInput.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PCollection sidePC = PCollection.newBuilder().setUniqueName("sidePC").build();
@@ -1051,9 +1073,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PTransform parDoTransform =
PTransform.newBuilder()
@@ -1064,10 +1087,11 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.putStateSpecs("state_spec", StateSpec.getDefaultInstance())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
PCollection userStateMainInputPCollection =
PCollection.newBuilder().setUniqueName("read.out").build();
@@ -1124,9 +1148,10 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(SdkFunctionSpec.newBuilder().setEnvironmentId("common"))
+ .setDoFn(FunctionSpec.newBuilder())
.build()
.toByteString()))
+ .setEnvironmentId("common")
.build();
QueryablePipeline p =
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStageTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStageTest.java
index fd034d8..578d506 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStageTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/ImmutableExecutableStageTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.core.construction.graph;
+import static org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTING;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
@@ -58,7 +59,7 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(
ParDoPayload.newBuilder()
- .setDoFn(RunnerApi.SdkFunctionSpec.newBuilder().setEnvironmentId("foo"))
+ .setDoFn(RunnerApi.FunctionSpec.newBuilder())
.putSideInputs("side_input", RunnerApi.SideInput.getDefaultInstance())
.putStateSpecs("user_state", RunnerApi.StateSpec.getDefaultInstance())
.putTimerSpecs("timer", RunnerApi.TimerSpec.getDefaultInstance())
@@ -98,7 +99,8 @@
Collections.singleton(userStateRef),
Collections.singleton(timerRef),
Collections.singleton(PipelineNode.pTransform("pt", pt)),
- Collections.singleton(PipelineNode.pCollection("output.out", output)));
+ Collections.singleton(PipelineNode.pCollection("output.out", output)),
+ DEFAULT_WIRE_CODER_SETTING);
assertThat(stage.getComponents().containsTransforms("pt"), is(true));
assertThat(stage.getComponents().containsTransforms("other_pt"), is(false));
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicatorTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicatorTest.java
index daae554..8e83eef 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicatorTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/OutputDeduplicatorTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.core.construction.graph;
+import static org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTING;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables.getOnlyElement;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.empty;
@@ -119,7 +120,8 @@
ImmutableList.of(),
ImmutableList.of(),
ImmutableList.of(PipelineNode.pTransform("one", one)),
- ImmutableList.of(PipelineNode.pCollection(oneOut.getUniqueName(), oneOut)));
+ ImmutableList.of(PipelineNode.pCollection(oneOut.getUniqueName(), oneOut)),
+ DEFAULT_WIRE_CODER_SETTING);
ExecutableStage twoStage =
ImmutableExecutableStage.of(
components,
@@ -129,7 +131,8 @@
ImmutableList.of(),
ImmutableList.of(),
ImmutableList.of(PipelineNode.pTransform("two", two)),
- ImmutableList.of(PipelineNode.pCollection(twoOut.getUniqueName(), twoOut)));
+ ImmutableList.of(PipelineNode.pCollection(twoOut.getUniqueName(), twoOut)),
+ DEFAULT_WIRE_CODER_SETTING);
PTransformNode redTransform = PipelineNode.pTransform("red", red);
PTransformNode blueTransform = PipelineNode.pTransform("blue", blue);
QueryablePipeline pipeline = QueryablePipeline.forPrimitivesIn(components);
@@ -237,7 +240,8 @@
ImmutableList.of(),
ImmutableList.of(
PipelineNode.pTransform("one", one), PipelineNode.pTransform("shared", shared)),
- ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)));
+ ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)),
+ DEFAULT_WIRE_CODER_SETTING);
ExecutableStage twoStage =
ImmutableExecutableStage.of(
components,
@@ -248,7 +252,8 @@
ImmutableList.of(),
ImmutableList.of(
PipelineNode.pTransform("two", two), PipelineNode.pTransform("shared", shared)),
- ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)));
+ ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)),
+ DEFAULT_WIRE_CODER_SETTING);
PTransformNode redTransform = PipelineNode.pTransform("red", red);
PTransformNode blueTransform = PipelineNode.pTransform("blue", blue);
QueryablePipeline pipeline = QueryablePipeline.forPrimitivesIn(components);
@@ -367,7 +372,8 @@
ImmutableList.of(),
ImmutableList.of(),
ImmutableList.of(PipelineNode.pTransform("one", one), sharedTransform),
- ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)));
+ ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)),
+ DEFAULT_WIRE_CODER_SETTING);
PTransformNode redTransform = PipelineNode.pTransform("red", red);
PTransformNode blueTransform = PipelineNode.pTransform("blue", blue);
QueryablePipeline pipeline = QueryablePipeline.forPrimitivesIn(components);
@@ -540,7 +546,8 @@
PipelineNode.pTransform("otherShared", otherShared)),
ImmutableList.of(
PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut),
- PipelineNode.pCollection(otherSharedOut.getUniqueName(), otherSharedOut)));
+ PipelineNode.pCollection(otherSharedOut.getUniqueName(), otherSharedOut)),
+ DEFAULT_WIRE_CODER_SETTING);
ExecutableStage oneStage =
ImmutableExecutableStage.of(
components,
@@ -551,7 +558,8 @@
ImmutableList.of(),
ImmutableList.of(
PipelineNode.pTransform("one", one), PipelineNode.pTransform("shared", shared)),
- ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)));
+ ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)),
+ DEFAULT_WIRE_CODER_SETTING);
ExecutableStage twoStage =
ImmutableExecutableStage.of(
components,
@@ -564,7 +572,8 @@
PipelineNode.pTransform("two", two),
PipelineNode.pTransform("otherShared", otherShared)),
ImmutableList.of(
- PipelineNode.pCollection(otherSharedOut.getUniqueName(), otherSharedOut)));
+ PipelineNode.pCollection(otherSharedOut.getUniqueName(), otherSharedOut)),
+ DEFAULT_WIRE_CODER_SETTING);
PTransformNode redTransform = PipelineNode.pTransform("red", red);
PTransformNode blueTransform = PipelineNode.pTransform("blue", blue);
QueryablePipeline pipeline = QueryablePipeline.forPrimitivesIn(components);
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/resources/ClasspathScanningResourcesDetectorTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/resources/ClasspathScanningResourcesDetectorTest.java
new file mode 100644
index 0000000..afd0472
--- /dev/null
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/resources/ClasspathScanningResourcesDetectorTest.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core.construction.resources;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.CoreMatchers.hasItem;
+import static org.hamcrest.CoreMatchers.hasItems;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsInRelativeOrder;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertFalse;
+
+import io.github.classgraph.ClassGraph;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.List;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+import org.apache.beam.sdk.testing.RestoreSystemProperties;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.mockito.Mockito;
+
+public class ClasspathScanningResourcesDetectorTest {
+
+ @Rule public transient TemporaryFolder tmpFolder = new TemporaryFolder();
+
+ @Rule public transient RestoreSystemProperties systemProperties = new RestoreSystemProperties();
+
+ @Test
+ public void shouldDetectDirectories() throws Exception {
+ File folder = tmpFolder.newFolder("folder1");
+ ClassLoader classLoader = new URLClassLoader(new URL[] {folder.toURI().toURL()});
+ ClasspathScanningResourcesDetector detector =
+ new ClasspathScanningResourcesDetector(new ClassGraph());
+
+ List<String> result = detector.detect(classLoader);
+
+ assertThat(result, hasItem(containsString(folder.getAbsolutePath())));
+ }
+
+ @Test
+ public void shouldDetectJarFiles() throws Exception {
+ File jarFile = createTestTmpJarFile("test");
+ ClassLoader classLoader = new URLClassLoader(new URL[] {jarFile.toURI().toURL()});
+ ClasspathScanningResourcesDetector detector =
+ new ClasspathScanningResourcesDetector(new ClassGraph());
+
+ List<String> result = detector.detect(classLoader);
+
+ assertThat(result, hasItem(containsString(jarFile.getAbsolutePath())));
+ }
+
+ @Test
+ public void shouldDetectResourcesInOrderTheyAppearInURLClassLoader() throws Exception {
+ File file1 = createTestTmpJarFile("test1");
+ File file2 = createTestTmpJarFile("test2");
+ ClassLoader classLoader =
+ new URLClassLoader(new URL[] {file1.toURI().toURL(), file2.toURI().toURL()});
+
+ ClasspathScanningResourcesDetector detector =
+ new ClasspathScanningResourcesDetector(new ClassGraph());
+
+ List<String> result = detector.detect(classLoader);
+
+ assertThat(
+ result,
+ containsInRelativeOrder(
+ containsString(file1.getAbsolutePath()), containsString(file2.getAbsolutePath())));
+ }
+
+ private File createTestTmpJarFile(String name) throws IOException {
+ File jarFile = tmpFolder.newFile(name);
+ try (JarOutputStream os = new JarOutputStream(new FileOutputStream(jarFile), new Manifest())) {}
+ return jarFile;
+ }
+
+ @Test
+ public void shouldNotDetectOrdinaryFiles() throws Exception {
+ File textFile = tmpFolder.newFile("ordinaryTextFile.txt");
+ ClassLoader classLoader = new URLClassLoader(new URL[] {textFile.toURI().toURL()});
+ ClasspathScanningResourcesDetector detector =
+ new ClasspathScanningResourcesDetector(new ClassGraph());
+
+ List<String> result = detector.detect(classLoader);
+
+ assertThat(result, not(hasItem(containsString(textFile.getAbsolutePath()))));
+ }
+
+ @Test
+ public void shouldDetectClassPathResourceFromJavaClassPathEnvVariable() throws IOException {
+ String path = tmpFolder.newFolder("folder").getAbsolutePath();
+ System.setProperty("java.class.path", path);
+ ClasspathScanningResourcesDetector detector =
+ new ClasspathScanningResourcesDetector(new ClassGraph());
+
+ List<String> resources = detector.detect(null);
+
+ assertThat(resources, hasItems(containsString(path)));
+ }
+
+ @Test
+ public void shouldNotDetectClassPathResourceThatIsNotAFile() throws Exception {
+ String url = "http://www.google.com/all-the-secrets.jar";
+ ClassLoader classLoader = new URLClassLoader(new URL[] {new URL(url)});
+ ClasspathScanningResourcesDetector detector =
+ new ClasspathScanningResourcesDetector(new ClassGraph());
+
+ List<String> result = detector.detect(classLoader);
+
+ assertThat(result, not(hasItem(containsString(url))));
+ }
+
+ /*
+ * ClassGraph library that is used in the tested algorithm can still detect resources from
+ * "java.class.path" env variable. Even in case the classloader that is passed is of no use we
+ * will still be able to detect and load resource paths from the env variable.
+ */
+ @Test
+ public void shouldStillDetectResourcesEvenIfClassloaderIsUseless() {
+ ClassLoader uselessClassLoader = Mockito.mock(ClassLoader.class);
+ ClasspathScanningResourcesDetector detector =
+ new ClasspathScanningResourcesDetector(new ClassGraph());
+
+ List<String> detectedResources = detector.detect(uselessClassLoader);
+
+ assertFalse(detectedResources.isEmpty());
+ }
+}
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PipelineResourcesTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesTest.java
similarity index 65%
rename from runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PipelineResourcesTest.java
rename to runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesTest.java
index 27b42e2..b84d3e7 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PipelineResourcesTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/resources/PipelineResourcesTest.java
@@ -15,9 +15,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.beam.runners.core.construction;
+package org.apache.beam.runners.core.construction.resources;
import static junit.framework.TestCase.assertTrue;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.not;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
@@ -28,51 +32,44 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.junit.Rule;
import org.junit.Test;
-import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
-import org.mockito.Mockito;
/** Tests for PipelineResources. */
@RunWith(JUnit4.class)
public class PipelineResourcesTest {
@Rule public transient TemporaryFolder tmpFolder = new TemporaryFolder();
- @Rule public transient ExpectedException thrown = ExpectedException.none();
@Test
- public void detectClassPathResourceWithFileResources() throws Exception {
+ public void testDetectsResourcesToStage() throws IOException {
File file = tmpFolder.newFile("file");
- File file2 = tmpFolder.newFile("file2");
+ URLClassLoader classLoader = new URLClassLoader(new URL[] {file.toURI().toURL()});
+ PipelineResourcesOptions options =
+ PipelineOptionsFactory.create().as(PipelineResourcesOptions.class);
+
+ List<String> detectedResources =
+ PipelineResources.detectClassPathResourcesToStage(classLoader, options);
+
+ assertThat(detectedResources, not(empty()));
+ }
+
+ @Test
+ public void testDetectedResourcesListDoNotContainNotStageableResources() throws IOException {
+ File unstageableResource = tmpFolder.newFolder(".gradle/wrapper/unstageableResource");
URLClassLoader classLoader =
- new URLClassLoader(new URL[] {file.toURI().toURL(), file2.toURI().toURL()});
+ new URLClassLoader(new URL[] {unstageableResource.toURI().toURL()});
+ PipelineResourcesOptions options =
+ PipelineOptionsFactory.create().as(PipelineResourcesOptions.class);
- assertEquals(
- ImmutableList.of(file.getAbsolutePath(), file2.getAbsolutePath()),
- PipelineResources.detectClassPathResourcesToStage(classLoader));
- }
+ List<String> detectedResources =
+ PipelineResources.detectClassPathResourcesToStage(classLoader, options);
- @Test
- public void detectClassPathResourcesWithUnsupportedClassLoader() {
- ClassLoader mockClassLoader = Mockito.mock(ClassLoader.class);
- thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("Unable to use ClassLoader to detect classpath elements.");
-
- PipelineResources.detectClassPathResourcesToStage(mockClassLoader);
- }
-
- @Test
- public void detectClassPathResourceWithNonFileResources() throws Exception {
- String url = "http://www.google.com/all-the-secrets.jar";
- URLClassLoader classLoader = new URLClassLoader(new URL[] {new URL(url)});
- thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("Unable to convert url (" + url + ") to file.");
-
- PipelineResources.detectClassPathResourcesToStage(classLoader);
+ assertThat(detectedResources, not(contains(unstageableResource.getAbsolutePath())));
}
@Test
@@ -105,12 +102,15 @@
@Test
public void testIfThrowsWhenThereIsNoTemporaryFolderForJars() throws IOException {
- thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("Please provide temporary location for storing the jar files.");
-
List<String> filesToStage = new ArrayList<>();
filesToStage.add(tmpFolder.newFolder().getAbsolutePath());
- PipelineResources.prepareFilesForStaging(filesToStage, null);
+ IllegalArgumentException exception =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> PipelineResources.prepareFilesForStaging(filesToStage, null));
+
+ assertEquals(
+ "Please provide temporary location for storing the jar files.", exception.getMessage());
}
}
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
index cf28436..e2fc262 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunner.java
@@ -38,7 +38,13 @@
* Calls a {@link DoFn DoFn's} {@link DoFn.OnTimer @OnTimer} method for the given timer in the
* given window.
*/
- void onTimer(String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain);
+ void onTimer(
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain);
/**
* Calls a {@link DoFn DoFn's} {@link DoFn.FinishBundle @FinishBundle} method and performs
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java
index 7b01c04..286e60b 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java
@@ -103,11 +103,19 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
- setTimer(TimerData.of(timerId, namespace, target, timeDomain));
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ setTimer(TimerData.of(timerId, timerFamilyId, namespace, target, outputTimestamp, timeDomain));
}
- /** @deprecated use {@link #setTimer(StateNamespace, String, Instant, TimeDomain)}. */
+ /**
+ * @deprecated use {@link #setTimer(StateNamespace, String, String, Instant, Instant,
+ * TimeDomain)}.
+ */
@Deprecated
@Override
public void setTimer(TimerData timerData) {
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/KeyedWorkItemCoder.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/KeyedWorkItemCoder.java
index 2949548..fc395de 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/KeyedWorkItemCoder.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/KeyedWorkItemCoder.java
@@ -22,7 +22,7 @@
import java.io.OutputStream;
import java.util.List;
import org.apache.beam.runners.core.TimerInternals.TimerData;
-import org.apache.beam.runners.core.TimerInternals.TimerDataCoder;
+import org.apache.beam.runners.core.TimerInternals.TimerDataCoderV2;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.CoderException;
import org.apache.beam.sdk.coders.IterableCoder;
@@ -54,7 +54,7 @@
this.keyCoder = keyCoder;
this.elemCoder = elemCoder;
this.windowCoder = windowCoder;
- this.timersCoder = IterableCoder.of(TimerDataCoder.of(windowCoder));
+ this.timersCoder = IterableCoder.of(TimerDataCoderV2.of(windowCoder));
this.elemsCoder = IterableCoder.of(FullWindowedValueCoder.of(elemCoder, windowCoder));
}
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java
index 4865e82..8f19b5f 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java
@@ -82,8 +82,13 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
- doFnRunner.onTimer(timerId, window, timestamp, timeDomain);
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ doFnRunner.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
}
@Override
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java
index d65b5f4..c310c49 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java
@@ -83,7 +83,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
throw new UnsupportedOperationException("User timers unsupported in ProcessFn");
}
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java
index cc2e86a..32a61af 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java
@@ -43,7 +43,13 @@
Iterable<WindowedValue<InputT>> processElementInReadyWindows(WindowedValue<InputT> elem);
/** Calls the underlying {@link DoFn.OnTimer} method. */
- void onTimer(String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain);
+ void onTimer(
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain);
/** Calls the underlying {@link DoFn.FinishBundle} method. */
void finishBundle();
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/SimpleDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/SimpleDoFnRunner.java
index 2b105fb..8af4d13 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/SimpleDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/SimpleDoFnRunner.java
@@ -186,18 +186,22 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
// The effective timestamp is when derived elements will have their timestamp set, if not
- // otherwise specified. If this is an event time timer, then they have the timestamp of the
- // timer itself. Otherwise, they are set to the input timestamp, which is by definition
+ // otherwise specified. If this is an event time timer, then they have the timer's output
+ // timestamp. Otherwise, they are set to the input timestamp, which is by definition
// non-late.
Instant effectiveTimestamp;
switch (timeDomain) {
case EVENT_TIME:
- effectiveTimestamp = timestamp;
+ effectiveTimestamp = outputTimestamp;
break;
-
case PROCESSING_TIME:
case SYNCHRONIZED_PROCESSING_TIME:
effectiveTimestamp = stepContext.timerInternals().currentInputWatermarkTime();
@@ -907,6 +911,8 @@
private final StateNamespace namespace;
private final String timerId;
private final TimerSpec spec;
+ private Instant target;
+ private Instant outputTimestamp;
private Duration period = Duration.ZERO;
private Duration offset = Duration.ZERO;
@@ -925,30 +931,14 @@
@Override
public void set(Instant target) {
- // Verifies that the time domain of this timer is acceptable for absolute timers.
- if (!TimeDomain.EVENT_TIME.equals(spec.getTimeDomain())) {
- throw new IllegalStateException(
- "Can only set relative timers in processing time domain. Use #setRelative()");
- }
-
- // Ensures that the target time is reasonable. For event time timers this means that the time
- // should be prior to window GC time.
- if (TimeDomain.EVENT_TIME.equals(spec.getTimeDomain())) {
- Instant windowExpiry = window.maxTimestamp().plus(allowedLateness);
- checkArgument(
- !target.isAfter(windowExpiry),
- "Attempted to set event time timer for %s but that is after"
- + " the expiration of window %s",
- target,
- windowExpiry);
- }
-
- setUnderlyingTimer(target);
+ this.target = target;
+ verifyAbsoluteTimeDomain();
+ setAndVerifyOutputTimestamp();
+ setUnderlyingTimer();
}
@Override
public void setRelative() {
- Instant target;
Instant now = getCurrentTime();
if (period.equals(Duration.ZERO)) {
target = now.plus(offset);
@@ -957,7 +947,9 @@
target = millisSinceStart == 0 ? now : now.plus(period).minus(millisSinceStart);
}
target = minTargetAndGcTime(target);
- setUnderlyingTimer(target);
+
+ setAndVerifyOutputTimestamp();
+ setUnderlyingTimer();
}
@Override
@@ -986,13 +978,58 @@
return target;
}
+ @Override
+ public Timer withOutputTimestamp(Instant outputTimestamp) {
+ this.outputTimestamp = outputTimestamp;
+ return this;
+ }
+
+ /** Verifies that the time domain of this timer is acceptable for absolute timers. */
+ private void verifyAbsoluteTimeDomain() {
+ if (!TimeDomain.EVENT_TIME.equals(spec.getTimeDomain())) {
+ throw new IllegalStateException(
+ "Cannot only set relative timers in processing time domain." + " Use #setRelative()");
+ }
+ }
+
+ /**
+ *
+ *
+ * <ul>
+ * Ensures that:
+ * <li>Users can't set {@code outputTimestamp} for processing time timers.
+ * <li>Event time timers' {@code outputTimestamp} is set before window expiration.
+ * </ul>
+ */
+ private void setAndVerifyOutputTimestamp() {
+ // Output timestamp is currently not supported in processing time timers.
+ if (outputTimestamp != null && !TimeDomain.EVENT_TIME.equals(spec.getTimeDomain())) {
+ throw new IllegalStateException("Cannot set outputTimestamp in processing time domain.");
+ }
+ // Output timestamp is set to the delivery time if not initialized by an user.
+ if (outputTimestamp == null) {
+ outputTimestamp = target;
+ }
+
+ if (TimeDomain.EVENT_TIME.equals(spec.getTimeDomain())) {
+ Instant windowExpiry = window.maxTimestamp().plus(allowedLateness);
+ checkArgument(
+ !target.isAfter(windowExpiry),
+ "Attempted to set event time timer that outputs for %s but that is"
+ + " after the expiration of window %s",
+ target,
+ windowExpiry);
+ }
+ }
+
/**
* Sets the timer for the target time without checking anything about whether it is a reasonable
* thing to do. For example, absolute processing time timers are not really sensible since the
* user has no way to compute a good choice of time.
*/
- private void setUnderlyingTimer(Instant target) {
- timerInternals.setTimer(namespace, timerId, target, spec.getTimeDomain());
+ private void setUnderlyingTimer() {
+ timerInternals.setTimer(
+ namespace, timerId, "", target, outputTimestamp, spec.getTimeDomain());
}
private Instant getCurrentTime() {
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java
index 36a89fe..b27e046 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java
@@ -108,8 +108,13 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
- underlying.onTimer(timerId, window, timestamp, timeDomain);
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ underlying.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
}
@Override
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
index 14a9502..f6170ce 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
@@ -38,7 +38,7 @@
/**
* A customized {@link DoFnRunner} that handles late data dropping and garbage collection for
* stateful {@link DoFn DoFns}. It registers a GC timer in {@link #processElement(WindowedValue)}
- * and does cleanup in {@link #onTimer(String, BoundedWindow, Instant, TimeDomain)}
+ * and does cleanup in {@link #onTimer(String, BoundedWindow, Instant, Instant, TimeDomain)}
*
* @param <InputT> the type of the {@link DoFn} (main) input elements
* @param <OutputT> the type of the {@link DoFn} (main) output elements
@@ -117,7 +117,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
if (cleanupTimer.isForWindow(timerId, window, timestamp, timeDomain)) {
stateCleaner.clearForWindow(window);
// There should invoke the onWindowExpiration of DoFn
@@ -134,7 +139,7 @@
window,
cleanupTimer.currentInputWatermarkTime());
} else {
- doFnRunner.onTimer(timerId, window, timestamp, timeDomain);
+ doFnRunner.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
}
}
}
@@ -208,7 +213,12 @@
// make sure this fires after any window.maxTimestamp() timers
gcTime = gcTime.plus(GC_DELAY_MS);
timerInternals.setTimer(
- StateNamespaces.window(windowCoder, window), GC_TIMER_ID, gcTime, TimeDomain.EVENT_TIME);
+ StateNamespaces.window(windowCoder, window),
+ GC_TIMER_ID,
+ "",
+ gcTime,
+ window.maxTimestamp(),
+ TimeDomain.EVENT_TIME);
}
@Override
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/TimerInternals.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/TimerInternals.java
index a766143..e9083d7 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/TimerInternals.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/TimerInternals.java
@@ -54,9 +54,18 @@
*
* <p>It is an error to set a timer for two different time domains.
*/
- void setTimer(StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain);
+ void setTimer(
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain);
- /** @deprecated use {@link #setTimer(StateNamespace, String, Instant, TimeDomain)}. */
+ /**
+ * @deprecated use {@link #setTimer(StateNamespace, String, String, Instant, Instant,
+ * TimeDomain)}.
+ */
@Deprecated
void setTimer(TimerData timerData);
@@ -161,35 +170,82 @@
public abstract String getTimerId();
+ public abstract String getTimerFamilyId();
+
public abstract StateNamespace getNamespace();
public abstract Instant getTimestamp();
+ /**
+ * Timestamp the timer assigns to outputted elements from {@link
+ * org.apache.beam.sdk.transforms.DoFn.OnTimer} method. For event time timers, output watermark
+ * is held at this timestamp until the timer fires.
+ */
+ public abstract Instant getOutputTimestamp();
+
public abstract TimeDomain getDomain();
// When adding a new field, make sure to add it to the compareTo() method.
- /**
- * Construct a {@link TimerData} for the given parameters, where the timer ID is automatically
- * generated.
- */
+ /** Construct a {@link TimerData} for the given parameters. */
public static TimerData of(
- String timerId, StateNamespace namespace, Instant timestamp, TimeDomain domain) {
- return new AutoValue_TimerInternals_TimerData(timerId, namespace, timestamp, domain);
+ String timerId,
+ StateNamespace namespace,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain domain) {
+ return new AutoValue_TimerInternals_TimerData(
+ timerId, timerId, namespace, timestamp, outputTimestamp, domain);
}
/**
- * Construct a {@link TimerData} for the given parameters, where the timer ID is
+ * Construct a {@link TimerData} for the given parameters except for {@code outputTimestamp}.
+ * {@code outputTimestamp} is set to timer {@code timestamp}.
+ */
+ public static TimerData of(
+ String timerId,
+ String timerFamilyId,
+ StateNamespace namespace,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain domain) {
+ return new AutoValue_TimerInternals_TimerData(
+ timerId, timerFamilyId, namespace, timestamp, outputTimestamp, domain);
+ }
+
+ /**
+ * Construct a {@link TimerData} for the given parameters, where the timer ID is automatically
+ * generated. Construct a {@link TimerData} for the given parameters except for {@code
+ * outputTimestamp}. {@code outputTimestamp} is set to timer {@code timestamp}.
+ */
+ public static TimerData of(
+ String timerId, StateNamespace namespace, Instant timestamp, TimeDomain domain) {
+ return new AutoValue_TimerInternals_TimerData(
+ timerId, timerId, namespace, timestamp, timestamp, domain);
+ }
+
+ /**
+ * Construct a {@link TimerData} for the given parameters except for timer ID. Timer ID is
* deterministically generated from the {@code timestamp} and {@code domain}.
*/
- public static TimerData of(StateNamespace namespace, Instant timestamp, TimeDomain domain) {
+ public static TimerData of(
+ StateNamespace namespace, Instant timestamp, Instant outputTimestamp, TimeDomain domain) {
String timerId =
new StringBuilder()
.append(domain.ordinal())
.append(':')
.append(timestamp.getMillis())
.toString();
- return of(timerId, namespace, timestamp, domain);
+ return of(timerId, namespace, timestamp, outputTimestamp, domain);
+ }
+
+ /**
+ * Construct a {@link TimerData} for the given parameters, where the timer ID is
+ * deterministically generated from the {@code timestamp} and {@code domain}. Also, output
+ * timestamp is set to the timer timestamp by default.
+ */
+ public static TimerData of(StateNamespace namespace, Instant timestamp, TimeDomain domain) {
+ return of(namespace, timestamp, timestamp, domain);
}
/**
@@ -207,8 +263,10 @@
ComparisonChain chain =
ComparisonChain.start()
.compare(this.getTimestamp(), that.getTimestamp())
+ .compare(this.getOutputTimestamp(), that.getOutputTimestamp())
.compare(this.getDomain(), that.getDomain())
- .compare(this.getTimerId(), that.getTimerId());
+ .compare(this.getTimerId(), that.getTimerId())
+ .compare(this.getTimerFamilyId(), that.getTimerFamilyId());
if (chain.result() == 0 && !this.getNamespace().equals(that.getNamespace())) {
// Obtaining the stringKey may be expensive; only do so if required
chain = chain.compare(getNamespace().stringKey(), that.getNamespace().stringKey());
@@ -218,6 +276,56 @@
}
/** A {@link Coder} for {@link TimerData}. */
+ class TimerDataCoderV2 extends StructuredCoder<TimerData> {
+ private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
+ private static final InstantCoder INSTANT_CODER = InstantCoder.of();
+ private final Coder<? extends BoundedWindow> windowCoder;
+
+ public static TimerDataCoderV2 of(Coder<? extends BoundedWindow> windowCoder) {
+ return new TimerDataCoderV2(windowCoder);
+ }
+
+ private TimerDataCoderV2(Coder<? extends BoundedWindow> windowCoder) {
+ this.windowCoder = windowCoder;
+ }
+
+ @Override
+ public void encode(TimerData timer, OutputStream outStream) throws CoderException, IOException {
+ STRING_CODER.encode(timer.getTimerId(), outStream);
+ STRING_CODER.encode(timer.getTimerFamilyId(), outStream);
+ STRING_CODER.encode(timer.getNamespace().stringKey(), outStream);
+ INSTANT_CODER.encode(timer.getTimestamp(), outStream);
+ INSTANT_CODER.encode(timer.getOutputTimestamp(), outStream);
+ STRING_CODER.encode(timer.getDomain().name(), outStream);
+ }
+
+ @Override
+ public TimerData decode(InputStream inStream) throws CoderException, IOException {
+ String timerId = STRING_CODER.decode(inStream);
+ String timerFamilyId = STRING_CODER.decode(inStream);
+ StateNamespace namespace =
+ StateNamespaces.fromString(STRING_CODER.decode(inStream), windowCoder);
+ Instant timestamp = INSTANT_CODER.decode(inStream);
+ Instant outputTimestamp = INSTANT_CODER.decode(inStream);
+ TimeDomain domain = TimeDomain.valueOf(STRING_CODER.decode(inStream));
+ return TimerData.of(timerId, timerFamilyId, namespace, timestamp, outputTimestamp, domain);
+ }
+
+ @Override
+ public List<? extends Coder<?>> getCoderArguments() {
+ return Arrays.asList(windowCoder);
+ }
+
+ @Override
+ public void verifyDeterministic() throws NonDeterministicException {
+ verifyDeterministic(this, "window coder must be deterministic", windowCoder);
+ }
+ }
+
+ /**
+ * A {@link Coder} for {@link TimerData}. To make it encoding and decoding backward compatible for
+ * DataFlow
+ */
class TimerDataCoder extends StructuredCoder<TimerData> {
private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
private static final InstantCoder INSTANT_CODER = InstantCoder.of();
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/InMemoryTimerInternalsTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/InMemoryTimerInternalsTest.java
index 6be35b7..dd106329 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/InMemoryTimerInternalsTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/InMemoryTimerInternalsTest.java
@@ -71,8 +71,8 @@
Instant laterTimestamp = new Instant(42);
underTest.advanceInputWatermark(new Instant(0));
- underTest.setTimer(NS1, ID1, earlyTimestamp, TimeDomain.EVENT_TIME);
- underTest.setTimer(NS1, ID1, laterTimestamp, TimeDomain.EVENT_TIME);
+ underTest.setTimer(NS1, ID1, ID1, earlyTimestamp, earlyTimestamp, TimeDomain.EVENT_TIME);
+ underTest.setTimer(NS1, ID1, ID1, laterTimestamp, laterTimestamp, TimeDomain.EVENT_TIME);
underTest.advanceInputWatermark(earlyTimestamp.plus(1L));
assertThat(underTest.removeNextEventTimer(), nullValue());
@@ -86,7 +86,7 @@
public void testDeletionIdempotent() throws Exception {
InMemoryTimerInternals underTest = new InMemoryTimerInternals();
Instant timestamp = new Instant(42);
- underTest.setTimer(NS1, ID1, timestamp, TimeDomain.EVENT_TIME);
+ underTest.setTimer(NS1, ID1, ID1, timestamp, timestamp, TimeDomain.EVENT_TIME);
underTest.deleteTimer(NS1, ID1);
underTest.deleteTimer(NS1, ID1);
}
@@ -97,7 +97,7 @@
Instant timestamp = new Instant(42);
underTest.advanceInputWatermark(new Instant(0));
- underTest.setTimer(NS1, ID1, timestamp, TimeDomain.EVENT_TIME);
+ underTest.setTimer(NS1, ID1, ID1, timestamp, timestamp, TimeDomain.EVENT_TIME);
underTest.deleteTimer(NS1, ID1);
underTest.advanceInputWatermark(new Instant(43));
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/SimpleDoFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/SimpleDoFnRunnerTest.java
index b790314..90bb5aa 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/SimpleDoFnRunnerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/SimpleDoFnRunnerTest.java
@@ -119,7 +119,12 @@
thrown.expectCause(is(fn.exceptionToThrow));
runner.onTimer(
- ThrowingDoFn.TIMER_ID, GlobalWindow.INSTANCE, new Instant(0), TimeDomain.EVENT_TIME);
+ ThrowingDoFn.TIMER_ID,
+ ThrowingDoFn.TIMER_ID,
+ GlobalWindow.INSTANCE,
+ new Instant(0),
+ new Instant(0),
+ TimeDomain.EVENT_TIME);
}
/**
@@ -156,6 +161,8 @@
.setTimer(
StateNamespaces.window(new GlobalWindows().windowCoder(), GlobalWindow.INSTANCE),
DoFnWithTimers.TIMER_ID,
+ "",
+ currentTime.plus(DoFnWithTimers.TIMER_OFFSET),
currentTime.plus(DoFnWithTimers.TIMER_OFFSET),
TimeDomain.EVENT_TIME);
}
@@ -237,8 +244,10 @@
// the method call.
runner.onTimer(
DoFnWithTimers.TIMER_ID,
+ DoFnWithTimers.TIMER_ID,
GlobalWindow.INSTANCE,
currentTime.plus(offset),
+ currentTime.plus(offset),
TimeDomain.EVENT_TIME);
assertThat(
@@ -246,8 +255,10 @@
contains(
TimerData.of(
DoFnWithTimers.TIMER_ID,
+ DoFnWithTimers.TIMER_ID,
StateNamespaces.window(windowFn.windowCoder(), GlobalWindow.INSTANCE),
currentTime.plus(offset),
+ currentTime.plus(offset),
TimeDomain.EVENT_TIME)));
}
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java
index 28b387e..10ac7a8 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java
@@ -283,7 +283,13 @@
// Mocking is not easily compatible with annotation analysis, so we manually record
// the method call.
- runner.onTimer(timerId, window, new Instant(timestamp), TimeDomain.EVENT_TIME);
+ runner.onTimer(
+ timerId,
+ timerId,
+ window,
+ new Instant(timestamp),
+ new Instant(timestamp),
+ TimeDomain.EVENT_TIME);
assertThat(
underlying.firedTimers,
@@ -320,12 +326,19 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
firedTimers.add(
TimerData.of(
timerId,
+ timerFamilyId,
StateNamespaces.window(IntervalWindow.getCoder(), (IntervalWindow) window),
timestamp,
+ outputTimestamp,
timeDomain));
}
@@ -458,7 +471,13 @@
StateNamespace namespace = timer.getNamespace();
checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
- toTrigger.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ toTrigger.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
}
}
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java
index 85b3c0b..be4e321 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java
@@ -220,7 +220,13 @@
StateNamespace namespace = timer.getNamespace();
checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
- toTrigger.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ toTrigger.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
}
}
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/TimerInternalsTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/TimerInternalsTest.java
index d1e5221..ab2978f 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/TimerInternalsTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/TimerInternalsTest.java
@@ -22,7 +22,7 @@
import static org.junit.Assert.assertThat;
import org.apache.beam.runners.core.TimerInternals.TimerData;
-import org.apache.beam.runners.core.TimerInternals.TimerDataCoder;
+import org.apache.beam.runners.core.TimerInternals.TimerDataCoderV2;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.state.TimeDomain;
import org.apache.beam.sdk.testing.CoderProperties;
@@ -40,13 +40,13 @@
@Test
public void testTimerDataCoder() throws Exception {
CoderProperties.coderDecodeEncodeEqual(
- TimerDataCoder.of(GlobalWindow.Coder.INSTANCE),
+ TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE),
TimerData.of(
"arbitrary-id", StateNamespaces.global(), new Instant(0), TimeDomain.EVENT_TIME));
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
CoderProperties.coderDecodeEncodeEqual(
- TimerDataCoder.of(windowCoder),
+ TimerDataCoderV2.of(windowCoder),
TimerData.of(
"another-id",
StateNamespaces.window(
@@ -57,7 +57,7 @@
@Test
public void testCoderIsSerializableWithWellKnownCoderType() {
- CoderProperties.coderSerializable(TimerDataCoder.of(GlobalWindow.Coder.INSTANCE));
+ CoderProperties.coderSerializable(TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE));
}
@Test
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java
index 8f3ab48..0261bf6 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java
@@ -47,11 +47,20 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
- timerUpdateBuilder.setTimer(TimerData.of(timerId, namespace, target, timeDomain));
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ timerUpdateBuilder.setTimer(
+ TimerData.of(timerId, timerFamilyId, namespace, target, outputTimestamp, timeDomain));
}
- /** @deprecated use {@link #setTimer(StateNamespace, String, Instant, TimeDomain)}. */
+ /**
+ * @deprecated use {@link #setTimer(StateNamespace, String, String, Instant, Instant,
+ * TimeDomain)}.
+ */
@Deprecated
@Override
public void setTimer(TimerData timerData) {
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
index 31eb80b..5f41175 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
@@ -222,7 +222,13 @@
public void onTimer(TimerData timer, BoundedWindow window) {
try {
- fnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ fnRunner.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
} catch (Exception e) {
throw UserCodeException.wrap(e);
}
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java
index e1080e5..8972319 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java
@@ -44,12 +44,14 @@
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.runners.AppliedPTransform;
import org.apache.beam.sdk.state.StateSpec;
+import org.apache.beam.sdk.state.WatermarkHoldState;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
import org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration;
import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.TimestampCombiner;
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
@@ -71,6 +73,8 @@
private final ParDoEvaluatorFactory<KV<K, InputT>, OutputT> delegateFactory;
+ private final EvaluationContext evaluationContext;
+
StatefulParDoEvaluatorFactory(EvaluationContext evaluationContext, PipelineOptions options) {
this.delegateFactory =
new ParDoEvaluatorFactory<>(
@@ -92,6 +96,8 @@
CacheBuilder.newBuilder()
.weakValues()
.build(new CleanupSchedulingLoader(evaluationContext));
+
+ this.evaluationContext = evaluationContext;
}
@Override
@@ -146,7 +152,13 @@
application.getTransform().getSchemaInformation(),
application.getTransform().getSideInputMapping());
- return new StatefulParDoEvaluator<>(delegateEvaluator);
+ DirectStepContext stepContext =
+ evaluationContext
+ .getExecutionContext(application, inputBundle.getKey())
+ .getStepContext(evaluationContext.getStepName(application));
+
+ stepContext.stateInternals().commit();
+ return new StatefulParDoEvaluator<>(delegateEvaluator, stepContext);
}
private class CleanupSchedulingLoader
@@ -241,10 +253,14 @@
private final List<TimerData> pushedBackTimers = new ArrayList<>();
private final DirectTimerInternals timerInternals;
+ DirectStepContext stepContext;
+
public StatefulParDoEvaluator(
- DoFnLifecycleManagerRemovingTransformEvaluator<KV<K, InputT>> delegateEvaluator) {
+ DoFnLifecycleManagerRemovingTransformEvaluator<KV<K, InputT>> delegateEvaluator,
+ DirectStepContext stepContext) {
this.delegateEvaluator = delegateEvaluator;
this.timerInternals = delegateEvaluator.getParDoEvaluator().getStepContext().timerInternals();
+ this.stepContext = stepContext;
}
@Override
@@ -269,6 +285,12 @@
WindowNamespace<?> windowNamespace = (WindowNamespace) timer.getNamespace();
BoundedWindow timerWindow = windowNamespace.getWindow();
delegateEvaluator.onTimer(timer, timerWindow);
+
+ StateTag<WatermarkHoldState> timerWatermarkHoldTag = setTimerTag(timer);
+
+ stepContext.stateInternals().state(timer.getNamespace(), timerWatermarkHoldTag).clear();
+ stepContext.stateInternals().commit();
+
if (timerInternals.containsUpdateForTimeBefore(currentInputWatermark)) {
break;
}
@@ -278,15 +300,41 @@
@Override
public TransformResult<KeyedWorkItem<K, KV<K, InputT>>> finishBundle() throws Exception {
+
TransformResult<KV<K, InputT>> delegateResult = delegateEvaluator.finishBundle();
+ boolean isTimerDeclared = false;
+ for (TimerData timerData : delegateResult.getTimerUpdate().getSetTimers()) {
+ StateTag<WatermarkHoldState> timerWatermarkHoldTag = setTimerTag(timerData);
+
+ stepContext
+ .stateInternals()
+ .state(timerData.getNamespace(), timerWatermarkHoldTag)
+ .add(timerData.getOutputTimestamp());
+ isTimerDeclared = true;
+ }
+
+ CopyOnAccessInMemoryStateInternals state;
+ Instant watermarkHold;
+
+ if (isTimerDeclared && delegateResult.getState() != null) { // For both State and Timer Holds
+ state = delegateResult.getState();
+ watermarkHold = stepContext.commitState().getEarliestWatermarkHold();
+ } else if (isTimerDeclared) { // For only Timer holds
+ state = stepContext.commitState();
+ watermarkHold = state.getEarliestWatermarkHold();
+ } else { // For only State ( non Timer ) holds
+ state = delegateResult.getState();
+ watermarkHold = delegateResult.getWatermarkHold();
+ }
+
TimerUpdate timerUpdate =
delegateResult.getTimerUpdate().withPushedBackTimers(pushedBackTimers);
pushedBackTimers.clear();
StepTransformResult.Builder<KeyedWorkItem<K, KV<K, InputT>>> regroupedResult =
StepTransformResult.<KeyedWorkItem<K, KV<K, InputT>>>withHold(
- delegateResult.getTransform(), delegateResult.getWatermarkHold())
+ delegateResult.getTransform(), watermarkHold)
.withTimerUpdate(timerUpdate)
- .withState(delegateResult.getState())
+ .withState(state)
.withMetricUpdates(delegateResult.getLogicalMetricUpdates())
.addOutput(Lists.newArrayList(delegateResult.getOutputBundles()));
@@ -306,4 +354,10 @@
return regroupedResult.build();
}
}
+
+ private static StateTag<WatermarkHoldState> setTimerTag(TimerData timerData) {
+ return StateTags.makeSystemTagInternal(
+ StateTags.watermarkStateInternal(
+ "timer-" + timerData.getTimerId(), TimestampCombiner.EARLIEST));
+ }
}
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WatermarkManager.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WatermarkManager.java
index d9e7ac2..5df3bb5 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WatermarkManager.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WatermarkManager.java
@@ -327,7 +327,7 @@
if (pendingTimers.isEmpty()) {
return BoundedWindow.TIMESTAMP_MAX_VALUE;
} else {
- return pendingTimers.firstEntry().getElement().getTimestamp();
+ return pendingTimers.firstEntry().getElement().getOutputTimestamp();
}
}
@@ -465,7 +465,8 @@
Instant oldWatermark = currentWatermark.get();
Instant newWatermark =
INSTANT_ORDERING.min(
- inputWatermark.get(), inputWatermark.getEarliestTimerTimestamp(), holds.getMinHold());
+ inputWatermark.get(), holds.getMinHold(), inputWatermark.getEarliestTimerTimestamp());
+
newWatermark = INSTANT_ORDERING.max(oldWatermark, newWatermark);
currentWatermark.set(newWatermark);
return updateAndTrace(getName(), oldWatermark, newWatermark);
diff --git a/runners/flink/1.7/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java b/runners/flink/1.7/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
index e29f97e..807faf5 100644
--- a/runners/flink/1.7/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
+++ b/runners/flink/1.7/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
@@ -20,6 +20,8 @@
import java.io.EOFException;
import java.io.IOException;
import java.util.Objects;
+import javax.annotation.Nullable;
+import org.apache.beam.runners.core.construction.SerializablePipelineOptions;
import org.apache.beam.runners.flink.translation.wrappers.DataInputViewWrapper;
import org.apache.beam.runners.flink.translation.wrappers.DataOutputViewWrapper;
import org.apache.beam.sdk.coders.Coder;
@@ -40,9 +42,26 @@
private final Coder<T> coder;
+ /**
+ * {@link SerializablePipelineOptions} deserialization will cause {@link
+ * org.apache.beam.sdk.io.FileSystems} registration needed for {@link
+ * org.apache.beam.sdk.transforms.Reshuffle} translation.
+ */
+ @SuppressWarnings("unused")
+ @Nullable
+ private final SerializablePipelineOptions pipelineOptions;
+
public CoderTypeSerializer(Coder<T> coder) {
Preconditions.checkNotNull(coder);
this.coder = coder;
+ this.pipelineOptions = null;
+ }
+
+ public CoderTypeSerializer(
+ Coder<T> coder, @Nullable SerializablePipelineOptions pipelineOptions) {
+ Preconditions.checkNotNull(coder);
+ this.coder = coder;
+ this.pipelineOptions = pipelineOptions;
}
@Override
diff --git a/runners/flink/1.8/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java b/runners/flink/1.8/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
index 2ff1cda..276e49c 100644
--- a/runners/flink/1.8/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
+++ b/runners/flink/1.8/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
@@ -19,6 +19,8 @@
import java.io.EOFException;
import java.io.IOException;
+import javax.annotation.Nullable;
+import org.apache.beam.runners.core.construction.SerializablePipelineOptions;
import org.apache.beam.runners.flink.translation.wrappers.DataInputViewWrapper;
import org.apache.beam.runners.flink.translation.wrappers.DataOutputViewWrapper;
import org.apache.beam.sdk.coders.Coder;
@@ -41,9 +43,26 @@
private final Coder<T> coder;
+ /**
+ * {@link SerializablePipelineOptions} deserialization will cause {@link
+ * org.apache.beam.sdk.io.FileSystems} registration needed for {@link
+ * org.apache.beam.sdk.transforms.Reshuffle} translation.
+ */
+ @SuppressWarnings("unused")
+ @Nullable
+ private final SerializablePipelineOptions pipelineOptions;
+
public CoderTypeSerializer(Coder<T> coder) {
Preconditions.checkNotNull(coder);
this.coder = coder;
+ this.pipelineOptions = null;
+ }
+
+ public CoderTypeSerializer(
+ Coder<T> coder, @Nullable SerializablePipelineOptions pipelineOptions) {
+ Preconditions.checkNotNull(coder);
+ this.coder = coder;
+ this.pipelineOptions = pipelineOptions;
}
@Override
diff --git a/runners/flink/flink_runner.gradle b/runners/flink/flink_runner.gradle
index c7f8cc0..ef6c82f 100644
--- a/runners/flink/flink_runner.gradle
+++ b/runners/flink/flink_runner.gradle
@@ -128,6 +128,7 @@
configurations {
validatesRunner
+ miniCluster
}
dependencies {
@@ -153,7 +154,6 @@
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
// FlinkStateInternalsTest extends abstract StateInternalsTest
testCompile project(path: ":runners:core-java", configuration: "testRuntime")
- testCompile library.java.commons_lang3
testCompile library.java.hamcrest_core
testCompile library.java.junit
testCompile library.java.mockito_core
@@ -170,6 +170,7 @@
validatesRunner project(path: ":sdks:java:core", configuration: "shadowTest")
validatesRunner project(path: ":runners:core-java", configuration: "testRuntime")
validatesRunner project(project.path)
+ miniCluster "org.apache.flink:flink-runtime-web_2.11:$flink_version"
}
class ValidatesRunnerConfig {
@@ -190,11 +191,15 @@
])
systemProperty "beamTestPipelineOptions", pipelineOptions
classpath = configurations.validatesRunner
- testClassesDirs = files(project(":sdks:java:core").sourceSets.test.output.classesDirs, project(":runners:core-java").sourceSets.test.output.classesDirs)
+ testClassesDirs = files(
+ project(":sdks:java:core").sourceSets.test.output.classesDirs,
+ project(":runners:core-java").sourceSets.test.output.classesDirs,
+ )
// maxParallelForks decreased from 4 in order to avoid OOM errors
maxParallelForks 2
useJUnit {
includeCategories 'org.apache.beam.sdk.testing.ValidatesRunner'
+ excludeCategories 'org.apache.beam.sdk.testing.UsesTestStreamWithOutputTimestamp'
excludeCategories 'org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders'
excludeCategories 'org.apache.beam.sdk.testing.LargeKeys$Above100MB'
excludeCategories 'org.apache.beam.sdk.testing.UsesCommittedMetrics'
@@ -223,3 +228,29 @@
// Generates :runners:flink:1.9:runQuickstartJavaFlinkLocal
createJavaExamplesArchetypeValidationTask(type: 'Quickstart', runner: 'FlinkLocal')
+
+/**
+ * Updates the documentation with the current pipeline options.
+ */
+def createPipelineOptionsTableTask(String target) {
+ tasks.register("generatePipelineOptionsTable${target}", JavaExec) {
+ group = 'Website'
+ description = "Generates a table with pipeline options for the Flink Runner documentation page"
+ classpath = sourceSets.test.runtimeClasspath
+ main = 'org.apache.beam.runners.flink.website.PipelineOptionsTableGenerator'
+ args = [target]
+ standardOutput = new ByteArrayOutputStream()
+ doLast {
+ def dest = file("${project(':website').getProjectDir()}/src/_includes/flink_${target.toLowerCase()}_pipeline_options.html")
+ if (!dest.exists()) {
+ throw new GradleException("Pipeline options file is not in expected location: ${dest}")
+ }
+ dest.write(standardOutput.toString())
+ }
+ }
+}
+createPipelineOptionsTableTask('Java')
+createPipelineOptionsTableTask('Python')
+// Update the pipeline options documentation before running the tests
+test.dependsOn(generatePipelineOptionsTableJava)
+test.dependsOn(generatePipelineOptionsTablePython)
diff --git a/runners/flink/job-server/flink_job_server.gradle b/runners/flink/job-server/flink_job_server.gradle
index f0cae9d..27a116f 100644
--- a/runners/flink/job-server/flink_job_server.gradle
+++ b/runners/flink/job-server/flink_job_server.gradle
@@ -181,20 +181,76 @@
]
)
-task testPipelineJar() {
- dependsOn shadowJar
- dependsOn ":sdks:python:container:py35:docker"
- doLast{
- exec {
- executable "sh"
- def options = [
- "--flink_job_server_jar ${shadowJar.archivePath}",
- "--env_dir ${project.rootProject.buildDir}/gradleenv/${project.path.hashCode()}",
- "--python_root_dir ${project.rootDir}/sdks/python",
- "--python_version 3.5",
- "--python_container_image apachebeam/python3.5_sdk:${project['python_sdk_version']}",
- ]
- args "-c", "../../job-server/test_pipeline_jar.sh ${options.join(' ')}"
+def addTestJavaJarCreator(String pyVersion) {
+ def pyBuildPath = pyVersion.startsWith("2") ? "2" : pyVersion.replaceAll("\\.", "")
+ project.tasks.create(name: "testJavaJarCreatorPy${pyBuildPath}") {
+ dependsOn shadowJar
+ dependsOn ":sdks:python:container:py${pyBuildPath}:docker"
+ doLast{
+ exec {
+ executable "sh"
+ def options = [
+ "--flink_job_server_jar ${shadowJar.archivePath}",
+ "--env_dir ${project.rootProject.buildDir}/gradleenv/${project.path.hashCode()}",
+ "--python_root_dir ${project.rootDir}/sdks/python",
+ "--python_version ${pyVersion}",
+ "--python_container_image apachebeam/python${pyVersion}_sdk:${project['python_sdk_version']}",
+ ]
+ args "-c", "../../job-server/test_pipeline_jar.sh ${options.join(' ')}"
+ }
}
}
}
+
+// miniCluster jar starts an embedded Flink cluster intended for use in testing.
+task miniCluster(type: Jar, dependsOn: shadowJar) {
+ archiveBaseName = "${project.archivesBaseName}-mini-cluster"
+ dependencies {
+ runtime project(path: flinkRunnerProject, configuration: "miniCluster")
+ }
+ from zipTree(shadowJar.archivePath).matching {
+ // If these classes aren't excluded from the mini cluster jar, they will be loaded instead of
+ // the corresponding classes in the submitted job jar, preventing pipeline resources from
+ // loading successfully.
+ exclude "**/FlinkPipelineRunner*"
+ exclude "**/PortablePipelineJarUtils*"
+ }
+ manifest {
+ attributes('Main-Class': 'org.apache.beam.runners.flink.FlinkMiniClusterEntryPoint')
+ }
+ zip64 true // jar needs to contain more than 65535 files
+}
+
+def addTestFlinkUberJarPy(String pyVersion) {
+ def pyBuildPath = pyVersion.startsWith("2") ? "2" : pyVersion.replaceAll("\\.", "")
+ project.tasks.create(name: "testFlinkUberJarPy${pyBuildPath}") {
+ dependsOn miniCluster
+ dependsOn shadowJar
+ dependsOn ":sdks:python:container:py${pyBuildPath}:docker"
+ doLast{
+ exec {
+ executable "sh"
+ def options = [
+ "--flink_job_server_jar ${shadowJar.archivePath}",
+ "--flink_mini_cluster_jar ${miniCluster.archivePath}",
+ "--env_dir ${project.rootProject.buildDir}/gradleenv/${project.path.hashCode()}",
+ "--python_root_dir ${project.rootDir}/sdks/python",
+ "--python_version ${pyVersion}",
+ "--python_container_image apachebeam/python${pyVersion}_sdk:${project['python_sdk_version']}",
+ ]
+ args "-c", "../../job-server/test_flink_uber_jar.sh ${options.join(' ')}"
+ }
+ }
+ }
+}
+
+["2.7", "3.5", "3.6", "3.7"].each{ pyVersion ->
+ addTestJavaJarCreator(pyVersion)
+ addTestFlinkUberJarPy(pyVersion)
+}
+
+task testPipelineJar() {
+ dependsOn testJavaJarCreatorPy37
+ dependsOn testFlinkUberJarPy36
+ dependsOn testFlinkUberJarPy37
+}
diff --git a/runners/flink/job-server/test_flink_uber_jar.sh b/runners/flink/job-server/test_flink_uber_jar.sh
new file mode 100755
index 0000000..f24c32a
--- /dev/null
+++ b/runners/flink/job-server/test_flink_uber_jar.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# test_flink_uber_jar.sh tests the Python FlinkRunner class.
+
+set -e
+set -v
+
+while [[ $# -gt 0 ]]
+do
+key="$1"
+case $key in
+ --flink_job_server_jar)
+ FLINK_JOB_SERVER_JAR="$2"
+ shift # past argument
+ shift # past value
+ ;;
+ --flink_mini_cluster_jar)
+ FLINK_MINI_CLUSTER_JAR="$2"
+ shift # past argument
+ shift # past value
+ ;;
+ --env_dir)
+ ENV_DIR="$2"
+ shift # past argument
+ shift # past value
+ ;;
+ --python_root_dir)
+ PYTHON_ROOT_DIR="$2"
+ shift # past argument
+ shift # past value
+ ;;
+ --python_version)
+ PYTHON_VERSION="$2"
+ shift # past argument
+ shift # past value
+ ;;
+ --python_container_image)
+ PYTHON_CONTAINER_IMAGE="$2"
+ shift # past argument
+ shift # past value
+ ;;
+ *) # unknown option
+ echo "Unknown option: $1"
+ exit 1
+ ;;
+esac
+done
+
+
+# Go to the root of the repository
+cd "$(git rev-parse --show-toplevel)"
+
+# Verify docker command exists
+command -v docker
+docker -v
+
+# Verify container has already been built
+docker images --format "{{.Repository}}:{{.Tag}}" | grep "$PYTHON_CONTAINER_IMAGE"
+
+# Set up Python environment
+virtualenv -p "python$PYTHON_VERSION" "$ENV_DIR"
+. $ENV_DIR/bin/activate
+pip install --retries 10 -e "$PYTHON_ROOT_DIR"
+
+# Hacky python script to find a free port. Note there is a small chance the chosen port could
+# get taken before being claimed.
+SOCKET_SCRIPT="
+from __future__ import print_function
+import socket
+s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+s.bind(('localhost', 0))
+print(s.getsockname()[1])
+s.close()
+"
+FLINK_PORT=$(python -c "$SOCKET_SCRIPT")
+
+echo "Starting Flink mini cluster listening on port $FLINK_PORT"
+java -jar "$FLINK_MINI_CLUSTER_JAR" --rest-port "$FLINK_PORT" &
+
+PIPELINE_PY="
+import apache_beam as beam
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.options.pipeline_options import SetupOptions
+from apache_beam.testing.util import assert_that
+from apache_beam.testing.util import equal_to
+from apache_beam.transforms import Create
+from apache_beam.transforms import Map
+
+# To test that our main session is getting plumbed through artifact staging
+# correctly, create a global variable. If the main session is not plumbed
+# through properly, global_var will be undefined and the pipeline will fail.
+global_var = 1
+
+pipeline_options = PipelineOptions()
+pipeline_options.view_as(SetupOptions).save_main_session = True
+pipeline = beam.Pipeline(options=pipeline_options)
+pcoll = (pipeline
+ | Create([0, 1, 2])
+ | Map(lambda x: x + global_var))
+assert_that(pcoll, equal_to([1, 2, 3]))
+
+result = pipeline.run()
+result.wait_until_finish()
+"
+
+(python -c "$PIPELINE_PY" \
+ --runner FlinkRunner \
+ --flink_job_server_jar "$FLINK_JOB_SERVER_JAR" \
+ --parallelism 1 \
+ --environment_type DOCKER \
+ --environment_config "$PYTHON_CONTAINER_IMAGE" \
+ --flink_master "localhost:$FLINK_PORT" \
+ --flink_submit_uber_jar \
+) || TEST_EXIT_CODE=$? # don't fail fast here; clean up before exiting
+
+kill %1 || echo "Failed to shut down Flink mini cluster"
+
+rm -rf "$ENV_DIR"
+
+if [[ "$TEST_EXIT_CODE" -eq 0 ]]; then
+ echo ">>> SUCCESS"
+else
+ echo ">>> FAILURE"
+fi
+exit $TEST_EXIT_CODE
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
index 229eca5..28351d5 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
@@ -34,6 +34,7 @@
import org.apache.beam.runners.core.construction.ReadTranslation;
import org.apache.beam.runners.flink.translation.functions.FlinkAssignWindows;
import org.apache.beam.runners.flink.translation.functions.FlinkDoFnFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkIdentityFunction;
import org.apache.beam.runners.flink.translation.functions.FlinkMergingNonShuffleReduceFunction;
import org.apache.beam.runners.flink.translation.functions.FlinkMultiOutputPruningFunction;
import org.apache.beam.runners.flink.translation.functions.FlinkPartialReduceFunction;
@@ -84,6 +85,7 @@
import org.apache.flink.api.java.operators.GroupCombineOperator;
import org.apache.flink.api.java.operators.GroupReduceOperator;
import org.apache.flink.api.java.operators.Grouping;
+import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.operators.MapPartitionOperator;
import org.apache.flink.api.java.operators.SingleInputUdfOperator;
@@ -306,11 +308,25 @@
@Override
public void translateNode(
Reshuffle<K, InputT> transform, FlinkBatchTranslationContext context) {
-
- DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+ final DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
context.getInputDataSet(context.getInput(transform));
-
- context.setOutputDataSet(context.getOutput(transform), inputDataSet.rebalance());
+ // Construct an instance of CoderTypeInformation which contains the pipeline options.
+ // This will be used to initialized FileSystems.
+ @SuppressWarnings("unchecked")
+ final CoderTypeInformation<WindowedValue<KV<K, InputT>>> outputType =
+ ((CoderTypeInformation) inputDataSet.getType())
+ .withPipelineOptions(context.getPipelineOptions());
+ // We insert a NOOP here to initialize the FileSystems via the above CoderTypeInformation.
+ // The output type coder may be relying on file system access. The shuffled data may have to
+ // be deserialized on a different machine using this coder where FileSystems has not been
+ // initialized.
+ final DataSet<WindowedValue<KV<K, InputT>>> retypedDataSet =
+ new MapOperator<>(
+ inputDataSet,
+ outputType,
+ FlinkIdentityFunction.of(),
+ getCurrentTransformName(context));
+ context.setOutputDataSet(context.getOutput(transform), retypedDataSet.rebalance());
}
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkJobInvoker.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkJobInvoker.java
index a123653..40257b9 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkJobInvoker.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkJobInvoker.java
@@ -17,13 +17,13 @@
*/
package org.apache.beam.runners.flink;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
-import java.io.IOException;
import java.util.UUID;
import javax.annotation.Nullable;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
+import org.apache.beam.runners.flink.translation.utils.Workarounds;
import org.apache.beam.runners.fnexecution.jobsubmission.JobInvocation;
import org.apache.beam.runners.fnexecution.jobsubmission.JobInvoker;
import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineJarCreator;
@@ -31,6 +31,7 @@
import org.apache.beam.runners.fnexecution.provisioning.JobInfo;
import org.apache.beam.sdk.options.PortablePipelineOptions;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.Struct;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ListeningExecutorService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -55,8 +56,9 @@
RunnerApi.Pipeline pipeline,
Struct options,
@Nullable String retrievalToken,
- ListeningExecutorService executorService)
- throws IOException {
+ ListeningExecutorService executorService) {
+ Workarounds.restoreOriginalStdOutAndStdErrIfApplicable();
+
// TODO: How to make Java/Python agree on names of keys and their values?
LOG.trace("Parsing pipeline options");
FlinkPipelineOptions flinkOptions =
@@ -72,13 +74,13 @@
PortablePipelineOptions portableOptions = flinkOptions.as(PortablePipelineOptions.class);
PortablePipelineRunner pipelineRunner;
- if (portableOptions.getOutputExecutablePath() == null
- || portableOptions.getOutputExecutablePath().isEmpty()) {
+ if (Strings.isNullOrEmpty(portableOptions.getOutputExecutablePath())) {
pipelineRunner =
new FlinkPipelineRunner(
flinkOptions,
serverConfig.getFlinkConfDir(),
- detectClassPathResourcesToStage(FlinkJobInvoker.class.getClassLoader()));
+ detectClassPathResourcesToStage(
+ FlinkJobInvoker.class.getClassLoader(), flinkOptions));
} else {
pipelineRunner = new PortablePipelineJarCreator(FlinkPipelineRunner.class);
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkMiniClusterEntryPoint.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkMiniClusterEntryPoint.java
new file mode 100644
index 0000000..fec9a88
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkMiniClusterEntryPoint.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.RestOptions;
+import org.apache.flink.runtime.minicluster.MiniCluster;
+import org.apache.flink.runtime.minicluster.MiniClusterConfiguration;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Entry point for starting an embedded Flink cluster. */
+public class FlinkMiniClusterEntryPoint {
+
+ private static final Logger LOG = LoggerFactory.getLogger(FlinkMiniClusterEntryPoint.class);
+
+ static class MiniClusterArgs {
+ @Option(name = "--rest-port")
+ int restPort = 0;
+
+ @Option(name = "--num-task-managers")
+ int numTaskManagers = 1;
+
+ @Option(name = "--num-task-slots-per-taskmanager")
+ int numSlotsPerTaskManager = 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ MiniClusterArgs miniClusterArgs = parseArgs(args);
+
+ Configuration flinkConfig = new Configuration();
+ flinkConfig.setInteger(RestOptions.PORT, miniClusterArgs.restPort);
+
+ MiniClusterConfiguration clusterConfig =
+ new MiniClusterConfiguration.Builder()
+ .setConfiguration(flinkConfig)
+ .setNumTaskManagers(miniClusterArgs.numTaskManagers)
+ .setNumSlotsPerTaskManager(miniClusterArgs.numSlotsPerTaskManager)
+ .build();
+
+ try (MiniCluster miniCluster = new MiniCluster(clusterConfig)) {
+ miniCluster.start();
+ System.out.println(
+ String.format(
+ "Started Flink mini cluster (%s TaskManagers with %s task slots) with Rest API at %s",
+ miniClusterArgs.numTaskManagers,
+ miniClusterArgs.numSlotsPerTaskManager,
+ miniCluster.getRestAddress()));
+ Thread.sleep(Long.MAX_VALUE);
+ }
+ }
+
+ private static MiniClusterArgs parseArgs(String[] args) {
+ MiniClusterArgs configuration = new MiniClusterArgs();
+ CmdLineParser parser = new CmdLineParser(configuration);
+ try {
+ parser.parseArgument(args);
+ } catch (CmdLineException e) {
+ LOG.error("Unable to parse command line arguments.", e);
+ printUsage(parser);
+ throw new IllegalArgumentException("Unable to parse command line arguments.", e);
+ }
+ return configuration;
+ }
+
+ private static void printUsage(CmdLineParser parser) {
+ System.err.println(
+ String.format(
+ "Usage: java %s arguments...", FlinkMiniClusterEntryPoint.class.getSimpleName()));
+ parser.printUsage(System.err);
+ System.err.println();
+ }
+}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
index 0b23d43..7ff4ee4 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
@@ -19,7 +19,7 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
-import org.apache.beam.runners.core.construction.PipelineResources;
+import org.apache.beam.runners.core.construction.resources.PipelineResources;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects;
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
index e9cb8dc..d2244bc 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
@@ -153,11 +153,11 @@
void setStateBackendFactory(Class<? extends FlinkStateBackendFactory> stateBackendFactory);
- @Description("Enable/disable Beam metrics in Flink Runner")
- @Default.Boolean(true)
- Boolean getEnableMetrics();
+ @Description("Disable Beam metrics in Flink Runner")
+ @Default.Boolean(false)
+ Boolean getDisableMetrics();
- void setEnableMetrics(Boolean enableMetrics);
+ void setDisableMetrics(Boolean enableMetrics);
/** Enables or disables externalized checkpoints. */
@Description(
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
index 33d2c76..137aa00 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
@@ -17,7 +17,7 @@
*/
package org.apache.beam.runners.flink;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
import static org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.hasUnboundedPCollections;
import java.util.List;
@@ -178,7 +178,8 @@
new FlinkPipelineRunner(
flinkOptions,
configuration.flinkConfDir,
- detectClassPathResourcesToStage(FlinkPipelineRunner.class.getClassLoader()));
+ detectClassPathResourcesToStage(
+ FlinkPipelineRunner.class.getClassLoader(), flinkOptions));
JobInfo jobInfo =
JobInfo.create(
invocationId,
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
index 649b5ec..9454ba2 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
@@ -17,7 +17,7 @@
*/
package org.apache.beam.runners.flink;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
import java.util.ArrayList;
import java.util.HashSet;
@@ -26,6 +26,7 @@
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.beam.runners.core.metrics.MetricsPusher;
+import org.apache.beam.runners.flink.translation.utils.Workarounds;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.PipelineRunner;
@@ -77,7 +78,7 @@
if (flinkOptions.getFilesToStage() == null) {
flinkOptions.setFilesToStage(
- detectClassPathResourcesToStage(FlinkRunner.class.getClassLoader()));
+ detectClassPathResourcesToStage(FlinkRunner.class.getClassLoader(), options));
LOG.info(
"PipelineOptions.filesToStage was not specified. "
+ "Defaulting to files from the classpath: will stage {} files. "
@@ -89,9 +90,10 @@
return new FlinkRunner(flinkOptions);
}
- private FlinkRunner(FlinkPipelineOptions options) {
+ protected FlinkRunner(FlinkPipelineOptions options) {
this.options = options;
this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>();
+ Workarounds.restoreOriginalStdOutAndStdErrIfApplicable();
}
@Override
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
index cdb3060..d98a601 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
@@ -1310,46 +1310,12 @@
new CreateStreamingFlinkViewPayloadTranslator())
.put(
SplittableParDoViaKeyedWorkItems.ProcessElements.class,
- new SplittableParDoProcessElementsTranslator())
- .put(
- SplittableParDoViaKeyedWorkItems.GBKIntoKeyedWorkItems.class,
- new SplittableParDoGbkIntoKeyedWorkItemsPayloadTranslator())
+ PTransformTranslation.TransformPayloadTranslator.NotSerializable.forUrn(
+ SPLITTABLE_PROCESS_URN))
.build();
}
}
- /**
- * A translator just to vend the URN. This will need to be moved to runners-core-construction-java
- * once SDF is reorganized appropriately.
- */
- private static class SplittableParDoProcessElementsPayloadTranslator
- extends PTransformTranslation.TransformPayloadTranslator.NotSerializable<
- SplittableParDoViaKeyedWorkItems.ProcessElements<?, ?, ?, ?>> {
-
- private SplittableParDoProcessElementsPayloadTranslator() {}
-
- @Override
- public String getUrn(SplittableParDoViaKeyedWorkItems.ProcessElements<?, ?, ?, ?> transform) {
- return SPLITTABLE_PROCESS_URN;
- }
- }
-
- /**
- * A translator just to vend the URN. This will need to be moved to runners-core-construction-java
- * once SDF is reorganized appropriately.
- */
- private static class SplittableParDoGbkIntoKeyedWorkItemsPayloadTranslator
- extends PTransformTranslation.TransformPayloadTranslator.NotSerializable<
- SplittableParDoViaKeyedWorkItems.GBKIntoKeyedWorkItems<?, ?>> {
-
- private SplittableParDoGbkIntoKeyedWorkItemsPayloadTranslator() {}
-
- @Override
- public String getUrn(SplittableParDoViaKeyedWorkItems.GBKIntoKeyedWorkItems<?, ?> transform) {
- return SplittableParDo.SPLITTABLE_GBKIKWI_URN;
- }
- }
-
/** A translator just to vend the URN. */
private static class CreateStreamingFlinkViewPayloadTranslator
extends PTransformTranslation.TransformPayloadTranslator.NotSerializable<
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/DoFnRunnerWithMetricsUpdate.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/DoFnRunnerWithMetricsUpdate.java
index 9d853a2..ce54d5b 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/DoFnRunnerWithMetricsUpdate.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/DoFnRunnerWithMetricsUpdate.java
@@ -68,12 +68,14 @@
@Override
public void onTimer(
final String timerId,
+ final String timerFamilyId,
final BoundedWindow window,
final Instant timestamp,
+ final Instant outputTimestamp,
final TimeDomain timeDomain) {
try (Closeable ignored =
MetricsEnvironment.scopedMetricsContainer(container.getMetricsContainer(stepName))) {
- delegate.onTimer(timerId, window, timestamp, timeDomain);
+ delegate.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
} catch (IOException e) {
throw new RuntimeException(e);
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainer.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainer.java
index 2db34a1..e389be5 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainer.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainer.java
@@ -46,6 +46,11 @@
/**
* Helper class for holding a {@link MetricsContainerImpl} and forwarding Beam metrics to Flink
* accumulators and metrics.
+ *
+ * <p>Using accumulators can be turned off because it is memory and network intensive. The
+ * accumulator results are only meaningful in batch applications or testing streaming applications
+ * which have a defined end. They are not essential during execution because metrics will also be
+ * reported using the configured metrics reporter.
*/
public class FlinkMetricContainer {
@@ -56,18 +61,30 @@
private static final String METRIC_KEY_SEPARATOR =
GlobalConfiguration.loadConfiguration().getString(MetricOptions.SCOPE_DELIMITER);
+ private final MetricsContainerStepMap metricsContainers;
private final RuntimeContext runtimeContext;
private final Map<String, Counter> flinkCounterCache;
private final Map<String, FlinkDistributionGauge> flinkDistributionGaugeCache;
private final Map<String, FlinkGauge> flinkGaugeCache;
- private final MetricsAccumulator metricsAccumulator;
public FlinkMetricContainer(RuntimeContext runtimeContext) {
this.runtimeContext = runtimeContext;
this.flinkCounterCache = new HashMap<>();
this.flinkDistributionGaugeCache = new HashMap<>();
this.flinkGaugeCache = new HashMap<>();
+ this.metricsContainers = new MetricsContainerStepMap();
+ }
+ public MetricsContainerImpl getMetricsContainer(String stepName) {
+ return metricsContainers.getContainer(stepName);
+ }
+
+ /**
+ * This should be called at the end of the Flink job and sets up an accumulator to push the
+ * metrics to the PipelineResult. This should not be called beforehand, to avoid the overhead
+ * which accumulators cause at runtime.
+ */
+ public void registerMetricsForPipelineResult() {
Accumulator<MetricsContainerStepMap, MetricsContainerStepMap> metricsAccumulator =
runtimeContext.getAccumulator(ACCUMULATOR_NAME);
if (metricsAccumulator == null) {
@@ -80,13 +97,7 @@
LOG.error("Failed to create metrics accumulator.", e);
}
}
- this.metricsAccumulator = (MetricsAccumulator) metricsAccumulator;
- }
-
- public MetricsContainerImpl getMetricsContainer(String stepName) {
- return metricsAccumulator != null
- ? metricsAccumulator.getLocalValue().getContainer(stepName)
- : null;
+ metricsAccumulator.add(metricsContainers);
}
/**
@@ -103,7 +114,7 @@
* given step.
*/
void updateMetrics(String stepName) {
- MetricResults metricResults = asAttemptedOnlyMetricResults(metricsAccumulator.getLocalValue());
+ MetricResults metricResults = asAttemptedOnlyMetricResults(metricsContainers);
MetricQueryResults metricQueryResults =
metricResults.queryMetrics(MetricsFilter.builder().addStep(stepName).build());
updateCounters(metricQueryResults.getCounters());
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/MetricsAccumulator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/MetricsAccumulator.java
index f55c2d8..d4f2c48 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/MetricsAccumulator.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/MetricsAccumulator.java
@@ -21,8 +21,13 @@
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.accumulators.SimpleAccumulator;
-/** Accumulator of {@link MetricsContainerStepMap}. */
+/**
+ * Accumulator of {@link MetricsContainerStepMap}. This accumulator will only be reported to Flink
+ * when the job ends. This avoids the runtime overhead for accumulators which are continously sent
+ * to the job manager.
+ */
public class MetricsAccumulator implements SimpleAccumulator<MetricsContainerStepMap> {
+
private MetricsContainerStepMap metricsContainers = new MetricsContainerStepMap();
@Override
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/ReaderInvocationUtil.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/ReaderInvocationUtil.java
index 995bacc..736a2dd 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/ReaderInvocationUtil.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/metrics/ReaderInvocationUtil.java
@@ -40,7 +40,7 @@
String stepName, PipelineOptions options, FlinkMetricContainer container) {
FlinkPipelineOptions flinkPipelineOptions = options.as(FlinkPipelineOptions.class);
this.stepName = stepName;
- enableMetrics = flinkPipelineOptions.getEnableMetrics();
+ this.enableMetrics = !flinkPipelineOptions.getDisableMetrics();
this.container = container;
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
index f5d2bd1..a34d840 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
@@ -26,7 +26,7 @@
import org.apache.beam.runners.flink.FlinkPipelineOptions;
import org.apache.beam.runners.flink.metrics.DoFnRunnerWithMetricsUpdate;
import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
-import org.apache.beam.runners.flink.translation.utils.FlinkClassloading;
+import org.apache.beam.runners.flink.translation.utils.Workarounds;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.options.PipelineOptions;
@@ -72,6 +72,7 @@
private final Map<String, PCollectionView<?>> sideInputMapping;
private transient DoFnInvoker<InputT, OutputT> doFnInvoker;
+ private transient FlinkMetricContainer metricContainer;
public FlinkDoFnFunction(
DoFn<InputT, OutputT> doFn,
@@ -131,10 +132,9 @@
doFnSchemaInformation,
sideInputMapping);
- if ((serializedOptions.get().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
- doFnRunner =
- new DoFnRunnerWithMetricsUpdate<>(
- stepName, doFnRunner, new FlinkMetricContainer(getRuntimeContext()));
+ FlinkPipelineOptions pipelineOptions = serializedOptions.get().as(FlinkPipelineOptions.class);
+ if (!pipelineOptions.getDisableMetrics()) {
+ doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer);
}
doFnRunner.startBundle();
@@ -153,14 +153,16 @@
// options where they are needed.
FileSystems.setDefaultPipelineOptions(serializedOptions.get());
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn);
+ metricContainer = new FlinkMetricContainer(getRuntimeContext());
}
@Override
public void close() throws Exception {
try {
+ metricContainer.registerMetricsForPipelineResult();
Optional.ofNullable(doFnInvoker).ifPresent(DoFnInvoker::invokeTeardown);
} finally {
- FlinkClassloading.deleteStaticCaches();
+ Workarounds.deleteStaticCaches();
}
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunction.java
index f53dc61..4e1c709 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunction.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunction.java
@@ -31,6 +31,7 @@
import org.apache.beam.runners.core.TimerInternals;
import org.apache.beam.runners.core.construction.SerializablePipelineOptions;
import org.apache.beam.runners.core.construction.graph.ExecutableStage;
+import org.apache.beam.runners.flink.FlinkPipelineOptions;
import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
import org.apache.beam.runners.fnexecution.control.BundleProgressHandler;
import org.apache.beam.runners.fnexecution.control.ExecutableStageContext;
@@ -95,7 +96,7 @@
// Worker-local fields. These should only be constructed and consumed on Flink TaskManagers.
private transient RuntimeContext runtimeContext;
- private transient FlinkMetricContainer container;
+ private transient FlinkMetricContainer metricContainer;
private transient StateRequestHandler stateRequestHandler;
private transient ExecutableStageContext stageContext;
private transient StageBundleFactory stageBundleFactory;
@@ -124,12 +125,13 @@
}
@Override
- public void open(Configuration parameters) throws Exception {
+ public void open(Configuration parameters) {
+ FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class);
// Register standard file systems.
- FileSystems.setDefaultPipelineOptions(pipelineOptions.get());
+ FileSystems.setDefaultPipelineOptions(options);
executableStage = ExecutableStage.fromPayload(stagePayload);
runtimeContext = getRuntimeContext();
- container = new FlinkMetricContainer(getRuntimeContext());
+ metricContainer = new FlinkMetricContainer(runtimeContext);
// TODO: Wire this into the distributed cache and make it pluggable.
stageContext = contextFactory.get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
@@ -143,12 +145,12 @@
new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
- container.updateMetrics(stepName, progress.getMonitoringInfosList());
+ metricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
}
@Override
public void onCompleted(ProcessBundleResponse response) {
- container.updateMetrics(stepName, response.getMonitoringInfosList());
+ metricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
}
};
}
@@ -281,6 +283,7 @@
@Override
public void close() throws Exception {
+ metricContainer.registerMetricsForPipelineResult();
// close may be called multiple times when an exception is thrown
if (stageContext != null) {
try (AutoCloseable bundleFactoryCloser = stageBundleFactory;
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkIdentityFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkIdentityFunction.java
new file mode 100644
index 0000000..f9128e7
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkIdentityFunction.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.flink.api.common.functions.MapFunction;
+
+/**
+ * A map function that outputs the input element without any change.
+ *
+ * @param <T> Input element type.
+ */
+public class FlinkIdentityFunction<T> implements MapFunction<T, T> {
+
+ private static final FlinkIdentityFunction<?> INSTANCE = new FlinkIdentityFunction<>();
+
+ @SuppressWarnings("unchecked")
+ public static <T> FlinkIdentityFunction<T> of() {
+ return (FlinkIdentityFunction) INSTANCE;
+ }
+
+ private FlinkIdentityFunction() {}
+
+ @Override
+ public T map(T value) {
+ return value;
+ }
+}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
index eac0298..d7fc2de 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
@@ -35,7 +35,7 @@
import org.apache.beam.runners.flink.FlinkPipelineOptions;
import org.apache.beam.runners.flink.metrics.DoFnRunnerWithMetricsUpdate;
import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
-import org.apache.beam.runners.flink.translation.utils.FlinkClassloading;
+import org.apache.beam.runners.flink.translation.utils.Workarounds;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.options.PipelineOptions;
@@ -72,7 +72,9 @@
private final Map<TupleTag<?>, Coder<?>> outputCoderMap;
private final DoFnSchemaInformation doFnSchemaInformation;
private final Map<String, PCollectionView<?>> sideInputMapping;
+
private transient DoFnInvoker doFnInvoker;
+ private transient FlinkMetricContainer metricContainer;
public FlinkStatefulDoFnFunction(
DoFn<KV<K, V>, OutputT> dofn,
@@ -157,10 +159,9 @@
doFnSchemaInformation,
sideInputMapping);
- if ((serializedOptions.get().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
- doFnRunner =
- new DoFnRunnerWithMetricsUpdate<>(
- stepName, doFnRunner, new FlinkMetricContainer(getRuntimeContext()));
+ FlinkPipelineOptions pipelineOptions = serializedOptions.get().as(FlinkPipelineOptions.class);
+ if (!pipelineOptions.getDisableMetrics()) {
+ doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer);
}
doFnRunner.startBundle();
@@ -214,7 +215,13 @@
StateNamespace namespace = timer.getNamespace();
checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
- doFnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ doFnRunner.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
}
@Override
@@ -223,15 +230,17 @@
// deserialization method. However, this is a hack, and we want to properly initialize the
// options where they are needed.
FileSystems.setDefaultPipelineOptions(serializedOptions.get());
+ metricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(dofn);
}
@Override
public void close() throws Exception {
try {
+ metricContainer.registerMetricsForPipelineResult();
Optional.ofNullable(doFnInvoker).ifPresent(DoFnInvoker::invokeTeardown);
} finally {
- FlinkClassloading.deleteStaticCaches();
+ Workarounds.deleteStaticCaches();
}
}
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
index c03bef9..5e76923 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
@@ -19,7 +19,10 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+import javax.annotation.Nullable;
+import org.apache.beam.runners.core.construction.SerializablePipelineOptions;
import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.typeinfo.AtomicType;
import org.apache.flink.api.common.typeinfo.TypeInformation;
@@ -33,10 +36,18 @@
public class CoderTypeInformation<T> extends TypeInformation<T> implements AtomicType<T> {
private final Coder<T> coder;
+ @Nullable private final SerializablePipelineOptions pipelineOptions;
public CoderTypeInformation(Coder<T> coder) {
checkNotNull(coder);
this.coder = coder;
+ this.pipelineOptions = null;
+ }
+
+ private CoderTypeInformation(Coder<T> coder, PipelineOptions pipelineOptions) {
+ checkNotNull(coder);
+ this.coder = coder;
+ this.pipelineOptions = new SerializablePipelineOptions(pipelineOptions);
}
public Coder<T> getCoder() {
@@ -70,9 +81,8 @@
}
@Override
- @SuppressWarnings("unchecked")
public TypeSerializer<T> createSerializer(ExecutionConfig config) {
- return new CoderTypeSerializer<>(coder);
+ return new CoderTypeSerializer<>(coder, pipelineOptions);
}
@Override
@@ -80,6 +90,18 @@
return 2;
}
+ /**
+ * Creates a new {@link CoderTypeInformation} with {@link PipelineOptions}, that can be used for
+ * {@link org.apache.beam.sdk.io.FileSystems} registration.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/BEAM-8577">Jira issue.</a>
+ * @param pipelineOptions Options of current pipeline.
+ * @return New type information.
+ */
+ public CoderTypeInformation<T> withPipelineOptions(PipelineOptions pipelineOptions) {
+ return new CoderTypeInformation<>(getCoder(), pipelineOptions);
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/Workarounds.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/Workarounds.java
new file mode 100644
index 0000000..77baba3
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/Workarounds.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.utils;
+
+import com.fasterxml.jackson.databind.type.TypeFactory;
+import java.io.FileDescriptor;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.client.program.OptimizerPlanEnvironment;
+
+/** Workarounds for dealing with limitations of Flink or its libraries. */
+public class Workarounds {
+
+ public static void deleteStaticCaches() {
+ // Clear cache to get rid of any references to the Flink Classloader
+ // See https://jira.apache.org/jira/browse/BEAM-6460
+ TypeFactory.defaultInstance().clearCache();
+ }
+
+ /**
+ * Flink uses the {@link org.apache.flink.client.program.OptimizerPlanEnvironment} which replaces
+ * stdout/stderr during job graph creation. This was intended only for previewing the plan, but
+ * other parts of Flink, e.g. the Rest API have started to use this code as well. To be able to
+ * inspect the output before execution, we use this method to restore the original stdout/stderr.
+ */
+ public static void restoreOriginalStdOutAndStdErrIfApplicable() {
+ if (ExecutionEnvironment.getExecutionEnvironment() instanceof OptimizerPlanEnvironment) {
+ System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out)));
+ System.setErr(new PrintStream(new FileOutputStream(FileDescriptor.err)));
+ }
+ }
+}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
index a9c787b..b22fd88 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
@@ -52,6 +52,7 @@
private boolean inputAvailable = false;
private transient ReaderInvocationUtil<T, BoundedSource.BoundedReader<T>> readerInvoker;
+ private transient FlinkMetricContainer metricContainer;
public SourceInputFormat(
String stepName, BoundedSource<T> initialSource, PipelineOptions options) {
@@ -67,7 +68,7 @@
@Override
public void open(SourceInputSplit<T> sourceInputSplit) throws IOException {
- FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext());
+ metricContainer = new FlinkMetricContainer(getRuntimeContext());
readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.get(), metricContainer);
@@ -145,6 +146,7 @@
@Override
public void close() throws IOException {
+ metricContainer.registerMetricsForPipelineResult();
// TODO null check can be removed once FLINK-3796 is fixed
if (reader != null) {
reader.close();
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
index 42e68ac..b7c650c 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
@@ -58,8 +58,8 @@
import org.apache.beam.runners.flink.metrics.DoFnRunnerWithMetricsUpdate;
import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
import org.apache.beam.runners.flink.translation.types.CoderTypeSerializer;
-import org.apache.beam.runners.flink.translation.utils.FlinkClassloading;
import org.apache.beam.runners.flink.translation.utils.NoopLock;
+import org.apache.beam.runners.flink.translation.utils.Workarounds;
import org.apache.beam.runners.flink.translation.wrappers.streaming.stableinput.BufferingDoFnRunner;
import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals;
import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals;
@@ -171,7 +171,7 @@
final KeySelector<WindowedValue<InputT>, ?> keySelector;
- private final TimerInternals.TimerDataCoder timerCoder;
+ private final TimerInternals.TimerDataCoderV2 timerCoder;
/** Max number of elements to include in a bundle. */
private final long maxBundleSize;
@@ -244,7 +244,7 @@
this.keySelector = keySelector;
this.timerCoder =
- TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
+ TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder());
FlinkPipelineOptions flinkOptions = options.as(FlinkPipelineOptions.class);
@@ -438,7 +438,7 @@
}
doFnRunner = createWrappingDoFnRunner(doFnRunner);
- if (options.getEnableMetrics()) {
+ if (!options.getDisableMetrics()) {
flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
}
@@ -467,7 +467,7 @@
public void dispose() throws Exception {
try {
Optional.ofNullable(checkFinishBundleTimer).ifPresent(timer -> timer.cancel(true));
- FlinkClassloading.deleteStaticCaches();
+ Workarounds.deleteStaticCaches();
Optional.ofNullable(doFnInvoker).ifPresent(DoFnInvoker::invokeTeardown);
} finally {
// This releases all task's resources. We need to call this last
@@ -480,6 +480,7 @@
@Override
public void close() throws Exception {
try {
+ flinkMetricContainer.registerMetricsForPipelineResult();
// This is our last change to block shutdown of this operator while
// there are still remaining processing-time timers. Flink will ignore pending
// processing-time timers when upstream operators have shut down and will also
@@ -812,7 +813,12 @@
BoundedWindow window = ((WindowNamespace) namespace).getWindow();
timerInternals.cleanupPendingTimer(timer.getNamespace());
pushbackDoFnRunner.onTimer(
- timerData.getTimerId(), window, timerData.getTimestamp(), timerData.getDomain());
+ timerData.getTimerId(),
+ timerData.getTimerFamilyId(),
+ window,
+ timerData.getTimestamp(),
+ timerData.getOutputTimestamp(),
+ timerData.getDomain());
}
private void setCurrentInputWatermark(long currentInputWatermark) {
@@ -1087,11 +1093,20 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
- setTimer(TimerData.of(timerId, namespace, target, timeDomain));
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ setTimer(
+ TimerData.of(timerId, timerFamilyId, namespace, target, outputTimestamp, timeDomain));
}
- /** @deprecated use {@link #setTimer(StateNamespace, String, Instant, TimeDomain)}. */
+ /**
+ * @deprecated use {@link #setTimer(StateNamespace, String, String, Instant, Instant,
+ * TimeDomain)}.
+ */
@Deprecated
@Override
public void setTimer(TimerData timer) {
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperator.java
index ba951b1..45fc2a1 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperator.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperator.java
@@ -661,7 +661,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
Object timerKey = keyForTimer.get();
Preconditions.checkNotNull(timerKey, "Key for timer needs to be set before calling onTimer");
Preconditions.checkNotNull(remoteBundle, "Call to onTimer outside of a bundle");
@@ -839,7 +844,9 @@
timerInternals.setTimer(
StateNamespaces.window(windowCoder, window),
GC_TIMER_ID,
+ "",
gcTime,
+ window.maxTimestamp(),
TimeDomain.EVENT_TIME);
} finally {
stateBackendLock.unlock();
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
index 56744f6..a3b40d0 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
@@ -27,7 +27,7 @@
import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
import org.apache.beam.runners.flink.metrics.ReaderInvocationUtil;
import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.utils.FlinkClassloading;
+import org.apache.beam.runners.flink.translation.utils.Workarounds;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.coders.SerializableCoder;
@@ -136,6 +136,9 @@
/** false if checkpointCoder is null or no restore state by starting first. */
private transient boolean isRestored = false;
+ /** Metrics container which will be reported as Flink accumulators at the end of the job. */
+ private transient FlinkMetricContainer metricContainer;
+
@SuppressWarnings("unchecked")
public UnboundedSourceWrapper(
String stepName,
@@ -177,6 +180,7 @@
public void open(Configuration parameters) throws Exception {
FileSystems.setDefaultPipelineOptions(serializedOptions.get());
runtimeContext = (StreamingRuntimeContext) getRuntimeContext();
+ metricContainer = new FlinkMetricContainer(runtimeContext);
// figure out which split sources we're responsible for
int subtaskIndex = runtimeContext.getIndexOfThisSubtask();
@@ -220,8 +224,6 @@
context = ctx;
- FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext());
-
ReaderInvocationUtil<OutputT, UnboundedSource.UnboundedReader<OutputT>> readerInvoker =
new ReaderInvocationUtil<>(stepName, serializedOptions.get(), metricContainer);
@@ -347,6 +349,7 @@
@Override
public void close() throws Exception {
+ metricContainer.registerMetricsForPipelineResult();
try {
super.close();
if (localReaders != null) {
@@ -355,7 +358,7 @@
}
}
} finally {
- FlinkClassloading.deleteStaticCaches();
+ Workarounds.deleteStaticCaches();
}
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElements.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElements.java
index b0f9304..5c5ca6a 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElements.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElements.java
@@ -67,20 +67,30 @@
static final class Timer implements BufferedElement {
private final String timerId;
+ private final String timerFamilyId;
private final BoundedWindow window;
private final Instant timestamp;
+ private final Instant outputTimestamp;
private final TimeDomain timeDomain;
- Timer(String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ Timer(
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
this.timerId = timerId;
this.window = window;
this.timestamp = timestamp;
this.timeDomain = timeDomain;
+ this.outputTimestamp = outputTimestamp;
+ this.timerFamilyId = timerFamilyId;
}
@Override
public void processWith(DoFnRunner doFnRunner) {
- doFnRunner.onTimer(timerId, window, timestamp, timeDomain);
+ doFnRunner.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
}
@Override
@@ -130,8 +140,10 @@
outStream.write(TIMER_MAGIC_BYTE);
Timer timer = (Timer) value;
STRING_CODER.encode(timer.timerId, outStream);
+ STRING_CODER.encode(timer.timerFamilyId, outStream);
windowCoder.encode(timer.window, outStream);
INSTANT_CODER.encode(timer.timestamp, outStream);
+ INSTANT_CODER.encode(timer.outputTimestamp, outStream);
outStream.write(timer.timeDomain.ordinal());
} else {
throw new IllegalStateException("Unexpected element " + value);
@@ -147,8 +159,10 @@
case TIMER_MAGIC_BYTE:
return new Timer(
STRING_CODER.decode(inStream),
+ STRING_CODER.decode(inStream),
windowCoder.decode(inStream),
INSTANT_CODER.decode(inStream),
+ INSTANT_CODER.decode(inStream),
TimeDomain.values()[inStream.read()]);
default:
throw new IllegalStateException(
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferingDoFnRunner.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferingDoFnRunner.java
index 80aabc2..367ed32 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferingDoFnRunner.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferingDoFnRunner.java
@@ -117,9 +117,15 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
currentBufferingElementsHandler.buffer(
- new BufferedElements.Timer(timerId, window, timestamp, timeDomain));
+ new BufferedElements.Timer(
+ timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain));
}
@Override
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkJobInvokerTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkJobInvokerTest.java
new file mode 100644
index 0000000..28c2cde
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkJobInvokerTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.model.pipeline.v1.RunnerApi;
+import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
+import org.apache.beam.runners.core.construction.PipelineTranslation;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.GenerateSequence;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.Struct;
+import org.apache.flink.client.program.OptimizerPlanEnvironment;
+import org.apache.flink.client.program.PackagedProgram;
+import org.apache.flink.client.program.ProgramInvocationException;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.optimizer.Optimizer;
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.core.StringContains;
+import org.junit.Assert;
+import org.junit.Test;
+
+/** Tests for {@link FlinkJobInvoker}. */
+public class FlinkJobInvokerTest {
+
+ @Test
+ public void testEnsureStdoutStdErrIsRestored() throws Exception {
+ PackagedProgram packagedProgram = new PackagedProgram(getClass());
+ OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(new Optimizer(new Configuration()));
+ try {
+ // Flink will throw an error because no job graph will be generated by the main method
+ env.getOptimizedPlan(packagedProgram);
+ Assert.fail("This should have failed to create the Flink Plan.");
+ } catch (ProgramInvocationException e) {
+ // Test that Flink wasn't able to intercept the stdout/stderr and we printed to the regular
+ // output instead
+ MatcherAssert.assertThat(
+ e.getMessage(),
+ CoreMatchers.allOf(
+ StringContains.containsString("System.out: (none)"),
+ StringContains.containsString("System.err: (none)")));
+ }
+ }
+
+ /** Main method for {@code testEnsureStdoutStdErrIsRestored()}. */
+ public static void main(String[] args) {
+ Pipeline p = Pipeline.create();
+ p.apply(GenerateSequence.from(0));
+
+ RunnerApi.Pipeline pipeline = PipelineTranslation.toProto(p);
+ Struct options = PipelineOptionsTranslation.toProto(PipelineOptionsFactory.create());
+
+ FlinkJobInvoker flinkJobInvoker =
+ FlinkJobInvoker.create(new FlinkJobServerDriver.FlinkServerConfiguration());
+ // This will call Workarounds.restoreOriginalStdOutAndStdErr() which we want to test
+ flinkJobInvoker.invokeWithExecutor(pipeline, options, "retrievalToken", null);
+ }
+}
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
index e5e297a..48c4ed5 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
@@ -23,6 +23,7 @@
import java.util.Collections;
import java.util.HashMap;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.SerializationUtils;
import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
@@ -37,7 +38,6 @@
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.TupleTag;
import org.apache.beam.sdk.values.WindowingStrategy;
-import org.apache.commons.lang3.SerializationUtils;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.ExecutionMode;
import org.apache.flink.api.common.typeinfo.TypeHint;
@@ -91,6 +91,7 @@
assertThat(options.getExecutionModeForBatch(), is(ExecutionMode.PIPELINED.name()));
assertThat(options.getSavepointPath(), is(nullValue()));
assertThat(options.getAllowNonRestoredState(), is(false));
+ assertThat(options.getDisableMetrics(), is(false));
}
@Test(expected = Exception.class)
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkRunnerTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkRunnerTest.java
new file mode 100644
index 0000000..182f65e
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkRunnerTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import static org.hamcrest.CoreMatchers.allOf;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.io.GenerateSequence;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.flink.client.program.OptimizerPlanEnvironment;
+import org.apache.flink.client.program.PackagedProgram;
+import org.apache.flink.client.program.ProgramInvocationException;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.optimizer.Optimizer;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.core.StringContains;
+import org.junit.Assert;
+import org.junit.Test;
+
+/** Test for {@link FlinkRunner}. */
+public class FlinkRunnerTest {
+
+ @Test
+ public void testEnsureStdoutStdErrIsRestored() throws Exception {
+ PackagedProgram packagedProgram = new PackagedProgram(getClass());
+ OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(new Optimizer(new Configuration()));
+ try {
+ // Flink will throw an error because no job graph will be generated by the main method
+ env.getOptimizedPlan(packagedProgram);
+ Assert.fail("This should have failed to create the Flink Plan.");
+ } catch (ProgramInvocationException e) {
+ // Test that Flink wasn't able to intercept the stdout/stderr and we printed to the regular
+ // output instead
+ MatcherAssert.assertThat(
+ e.getMessage(),
+ allOf(
+ StringContains.containsString("System.out: (none)"),
+ StringContains.containsString("System.err: (none)")));
+ }
+ }
+
+ /** Main method for {@code testEnsureStdoutStdErrIsRestored()}. */
+ public static void main(String[] args) {
+ FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
+ options.setRunner(NotExecutingFlinkRunner.class);
+ Pipeline p = Pipeline.create(options);
+ p.apply(GenerateSequence.from(0));
+
+ // This will call Workarounds.restoreOriginalStdOutAndStdErr() through the constructor of
+ // FlinkRunner
+ p.run();
+ }
+
+ private static class NotExecutingFlinkRunner extends FlinkRunner {
+
+ protected NotExecutingFlinkRunner(FlinkPipelineOptions options) {
+ // Stdout/Stderr is restored here
+ super(options);
+ }
+
+ @SuppressWarnings("unused")
+ public static NotExecutingFlinkRunner fromOptions(PipelineOptions options) {
+ return new NotExecutingFlinkRunner(options.as(FlinkPipelineOptions.class));
+ }
+
+ @Override
+ public PipelineResult run(Pipeline pipeline) {
+ // Do not execute to test the stdout printing
+ return null;
+ }
+ }
+}
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainerTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainerTest.java
index b4bad56..a85ad65 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainerTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/metrics/FlinkMetricContainerTest.java
@@ -21,8 +21,8 @@
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertNotNull;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.anyObject;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.argThat;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.verify;
@@ -67,6 +67,8 @@
@Mock private RuntimeContext runtimeContext;
@Mock private MetricGroup metricGroup;
+ FlinkMetricContainer container;
+
@Before
public void beforeTest() {
MockitoAnnotations.initMocks(this);
@@ -74,6 +76,7 @@
anyString()))
.thenReturn(new MetricsAccumulator());
when(runtimeContext.getMetricGroup()).thenReturn(metricGroup);
+ container = new FlinkMetricContainer(runtimeContext);
}
@Test
@@ -88,7 +91,6 @@
SimpleCounter flinkCounter = new SimpleCounter();
when(metricGroup.counter("namespace.name")).thenReturn(flinkCounter);
- FlinkMetricContainer container = new FlinkMetricContainer(runtimeContext);
MetricsContainer step = container.getMetricsContainer("step");
MetricName metricName = MetricName.named("namespace", "name");
Counter counter = step.getCounter(metricName);
@@ -106,7 +108,6 @@
new FlinkMetricContainer.FlinkGauge(GaugeResult.empty());
when(metricGroup.gauge(eq("namespace.name"), anyObject())).thenReturn(flinkGauge);
- FlinkMetricContainer container = new FlinkMetricContainer(runtimeContext);
MetricsContainer step = container.getMetricsContainer("step");
MetricName metricName = MetricName.named("namespace", "name");
Gauge gauge = step.getGauge(metricName);
@@ -122,8 +123,6 @@
@Test
public void testMonitoringInfoUpdate() {
- FlinkMetricContainer container = new FlinkMetricContainer(runtimeContext);
-
SimpleCounter userCounter = new SimpleCounter();
when(metricGroup.counter("ns1.metric1")).thenReturn(userCounter);
@@ -173,8 +172,7 @@
@Test
public void testDropUnexpectedMonitoringInfoTypes() {
- FlinkMetricContainer flinkContainer = new FlinkMetricContainer(runtimeContext);
- MetricsContainerImpl step = flinkContainer.getMetricsContainer("step");
+ MetricsContainerImpl step = container.getMetricsContainer("step");
MonitoringInfo intCounter =
MonitoringInfo.newBuilder()
@@ -237,7 +235,7 @@
SimpleCounter counter = new SimpleCounter();
when(metricGroup.counter("ns1.int_counter")).thenReturn(counter);
- flinkContainer.updateMetrics(
+ container.updateMetrics(
"step", ImmutableList.of(intCounter, doubleCounter, intDistribution, doubleDistribution));
// Flink's MetricGroup should only have asked for one counter (the integer-typed one) to be
@@ -280,7 +278,6 @@
new FlinkMetricContainer.FlinkDistributionGauge(DistributionResult.IDENTITY_ELEMENT);
when(metricGroup.gauge(eq("namespace.name"), anyObject())).thenReturn(flinkGauge);
- FlinkMetricContainer container = new FlinkMetricContainer(runtimeContext);
MetricsContainer step = container.getMetricsContainer("step");
MetricName metricName = MetricName.named("namespace", "name");
Distribution distribution = step.getDistribution(metricName);
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunctionTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunctionTest.java
new file mode 100644
index 0000000..779f9c1
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunctionTest.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import java.util.Collections;
+import java.util.Map;
+import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.DoFnSchemaInformation;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.WindowingStrategy;
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.configuration.Configuration;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.powermock.reflect.Whitebox;
+
+/** Tests for {@link FlinkDoFnFunction}. */
+public class FlinkDoFnFunctionTest {
+
+ @Test
+ public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
+ FlinkDoFnFunction doFnFunction =
+ new TestDoFnFunction(
+ "step",
+ WindowingStrategy.globalDefault(),
+ Collections.emptyMap(),
+ PipelineOptionsFactory.create(),
+ Collections.emptyMap(),
+ new TupleTag<>(),
+ null,
+ Collections.emptyMap(),
+ DoFnSchemaInformation.create(),
+ Collections.emptyMap());
+
+ doFnFunction.open(new Configuration());
+
+ String metricContainerFieldName = "metricContainer";
+ FlinkMetricContainer monitoredContainer =
+ Mockito.spy(
+ (FlinkMetricContainer)
+ Whitebox.getInternalState(doFnFunction, metricContainerFieldName));
+ Whitebox.setInternalState(doFnFunction, metricContainerFieldName, monitoredContainer);
+
+ doFnFunction.close();
+ Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
+ }
+
+ private static class TestDoFnFunction extends FlinkDoFnFunction {
+
+ public TestDoFnFunction(
+ String stepName,
+ WindowingStrategy windowingStrategy,
+ Map sideInputs,
+ PipelineOptions options,
+ Map outputMap,
+ TupleTag mainOutputTag,
+ Coder inputCoder,
+ Map outputCoderMap,
+ DoFnSchemaInformation doFnSchemaInformation,
+ Map sideInputMapping) {
+ super(
+ new IdentityFn(),
+ stepName,
+ windowingStrategy,
+ sideInputs,
+ options,
+ outputMap,
+ mainOutputTag,
+ inputCoder,
+ outputCoderMap,
+ doFnSchemaInformation,
+ sideInputMapping);
+ }
+
+ @Override
+ public RuntimeContext getRuntimeContext() {
+ return Mockito.mock(RuntimeContext.class);
+ }
+
+ private static class IdentityFn<T> extends DoFn<T, T> {
+ @ProcessElement
+ public void processElement(ProcessContext c) {
+ c.output(c.element());
+ }
+ }
+ }
+}
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunctionTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunctionTest.java
index 93f7cd2..61d8906 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunctionTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkExecutableStageFunctionTest.java
@@ -32,6 +32,7 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.Components;
import org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
+import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
import org.apache.beam.runners.fnexecution.control.BundleProgressHandler;
import org.apache.beam.runners.fnexecution.control.ExecutableStageContext;
import org.apache.beam.runners.fnexecution.control.OutputReceiverFactory;
@@ -197,6 +198,11 @@
}
@Override
+ public void split(double fractionOfRemainder) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void close() throws Exception {
if (once) {
return;
@@ -248,6 +254,21 @@
verifyNoMoreInteractions(stageBundleFactory);
}
+ @Test
+ public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
+ FlinkExecutableStageFunction<Integer> function = getFunction(Collections.emptyMap());
+ function.open(new Configuration());
+
+ String metricContainerFieldName = "metricContainer";
+ FlinkMetricContainer monitoredContainer =
+ Mockito.spy(
+ (FlinkMetricContainer) Whitebox.getInternalState(function, metricContainerFieldName));
+ Whitebox.setInternalState(function, metricContainerFieldName, monitoredContainer);
+
+ function.close();
+ Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
+ }
+
/**
* Creates a {@link FlinkExecutableStageFunction}. Sets the runtime context to {@link
* #runtimeContext}. The context factory is mocked to return {@link #stageContext} every time. The
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunctionTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunctionTest.java
new file mode 100644
index 0000000..4f9707c
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunctionTest.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import java.util.Collections;
+import java.util.Map;
+import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.DoFnSchemaInformation;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.WindowingStrategy;
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.configuration.Configuration;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.powermock.reflect.Whitebox;
+
+/** Tests for {@link FlinkStatefulDoFnFunction}. */
+public class FlinkStatefulDoFnFunctionTest {
+
+ @Test
+ public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
+ FlinkStatefulDoFnFunction doFnFunction =
+ new TestDoFnFunction(
+ "step",
+ WindowingStrategy.globalDefault(),
+ Collections.emptyMap(),
+ PipelineOptionsFactory.create(),
+ Collections.emptyMap(),
+ new TupleTag<>(),
+ null,
+ Collections.emptyMap(),
+ DoFnSchemaInformation.create(),
+ Collections.emptyMap());
+
+ doFnFunction.open(new Configuration());
+
+ String metricContainerFieldName = "metricContainer";
+ FlinkMetricContainer monitoredContainer =
+ Mockito.spy(
+ (FlinkMetricContainer)
+ Whitebox.getInternalState(doFnFunction, metricContainerFieldName));
+ Whitebox.setInternalState(doFnFunction, metricContainerFieldName, monitoredContainer);
+
+ doFnFunction.close();
+ Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
+ }
+
+ private static class TestDoFnFunction extends FlinkStatefulDoFnFunction {
+
+ public TestDoFnFunction(
+ String stepName,
+ WindowingStrategy windowingStrategy,
+ Map sideInputs,
+ PipelineOptions options,
+ Map outputMap,
+ TupleTag mainOutputTag,
+ Coder inputCoder,
+ Map outputCoderMap,
+ DoFnSchemaInformation doFnSchemaInformation,
+ Map sideInputMapping) {
+ super(
+ new IdentityFn(),
+ stepName,
+ windowingStrategy,
+ sideInputs,
+ options,
+ outputMap,
+ mainOutputTag,
+ inputCoder,
+ outputCoderMap,
+ doFnSchemaInformation,
+ sideInputMapping);
+ }
+
+ @Override
+ public RuntimeContext getRuntimeContext() {
+ return Mockito.mock(RuntimeContext.class);
+ }
+
+ private static class IdentityFn<T> extends DoFn<T, T> {
+ @ProcessElement
+ public void processElement(ProcessContext c) {
+ c.output(c.element());
+ }
+ }
+ }
+}
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormatTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormatTest.java
new file mode 100644
index 0000000..4e8affd
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormatTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers;
+
+import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.CountingSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.powermock.reflect.Whitebox;
+
+/** Tests for {@link SourceInputFormat}. */
+public class SourceInputFormatTest {
+
+ @Test
+ public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
+ SourceInputFormat<Long> sourceInputFormat =
+ new TestSourceInputFormat<>(
+ "step", CountingSource.upTo(10), PipelineOptionsFactory.create());
+
+ sourceInputFormat.open(sourceInputFormat.createInputSplits(1)[0]);
+
+ String metricContainerFieldName = "metricContainer";
+ FlinkMetricContainer monitoredContainer =
+ Mockito.spy(
+ (FlinkMetricContainer)
+ Whitebox.getInternalState(sourceInputFormat, metricContainerFieldName));
+ Whitebox.setInternalState(sourceInputFormat, metricContainerFieldName, monitoredContainer);
+
+ sourceInputFormat.close();
+ Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
+ }
+
+ private static class TestSourceInputFormat<T> extends SourceInputFormat<T> {
+
+ public TestSourceInputFormat(
+ String stepName, BoundedSource initialSource, PipelineOptions options) {
+ super(stepName, initialSource, options);
+ }
+
+ @Override
+ public RuntimeContext getRuntimeContext() {
+ return Mockito.mock(RuntimeContext.class);
+ }
+ }
+}
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperatorTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperatorTest.java
index 220ffc9..235a2e3 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperatorTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperatorTest.java
@@ -39,6 +39,7 @@
import javax.annotation.Nullable;
import org.apache.beam.runners.core.StatefulDoFnRunner;
import org.apache.beam.runners.flink.FlinkPipelineOptions;
+import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
import org.apache.beam.runners.flink.translation.types.CoderTypeSerializer;
import org.apache.beam.sdk.Pipeline;
@@ -94,6 +95,7 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
import org.powermock.reflect.Whitebox;
/** Tests for {@link DoFnOperator}. */
@@ -1874,13 +1876,43 @@
assertThrows(Error.class, () -> testHarness.snapshot(0, 0));
}
+ @Test
+ public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
+ DoFnOperator doFnOperator = getOperatorForCleanupInspection();
+ OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
+ new OneInputStreamOperatorTestHarness<>(doFnOperator);
+
+ testHarness.open();
+
+ String metricContainerFieldName = "flinkMetricContainer";
+ FlinkMetricContainer monitoredContainer =
+ Mockito.spy(
+ (FlinkMetricContainer)
+ Whitebox.getInternalState(doFnOperator, metricContainerFieldName));
+ Whitebox.setInternalState(doFnOperator, metricContainerFieldName, monitoredContainer);
+
+ testHarness.close();
+ Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
+ }
+
/**
* Ensures Jackson cache is cleaned to get rid of any references to the Flink Classloader. See
* https://jira.apache.org/jira/browse/BEAM-6460
*/
@Test
public void testRemoveCachedClassReferences() throws Exception {
+ OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
+ new OneInputStreamOperatorTestHarness<>(getOperatorForCleanupInspection());
+ LRUMap typeCache =
+ (LRUMap) Whitebox.getInternalState(TypeFactory.defaultInstance(), "_typeCache");
+ assertThat(typeCache.size(), greaterThan(0));
+ testHarness.open();
+ testHarness.close();
+ assertThat(typeCache.size(), is(0));
+ }
+
+ private static DoFnOperator getOperatorForCleanupInspection() {
FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
options.setParallelism(4);
@@ -1901,34 +1933,23 @@
outputTag,
WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));
- DoFnOperator<String, String> doFnOperator =
- new DoFnOperator<>(
- doFn,
- "stepName",
- windowedValueCoder,
- null,
- Collections.emptyMap(),
- outputTag,
- Collections.emptyList(),
- outputManagerFactory,
- WindowingStrategy.globalDefault(),
- new HashMap<>(), /* side-input mapping */
- Collections.emptyList(), /* side inputs */
- options,
- null,
- null,
- DoFnSchemaInformation.create(),
- Collections.emptyMap());
-
- OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
- new OneInputStreamOperatorTestHarness<>(doFnOperator);
-
- LRUMap typeCache =
- (LRUMap) Whitebox.getInternalState(TypeFactory.defaultInstance(), "_typeCache");
- assertThat(typeCache.size(), greaterThan(0));
- testHarness.open();
- testHarness.close();
- assertThat(typeCache.size(), is(0));
+ return new DoFnOperator<>(
+ doFn,
+ "stepName",
+ windowedValueCoder,
+ null,
+ Collections.emptyMap(),
+ outputTag,
+ Collections.emptyList(),
+ outputManagerFactory,
+ WindowingStrategy.globalDefault(),
+ new HashMap<>(), /* side-input mapping */
+ Collections.emptyList(), /* side inputs */
+ options,
+ null,
+ null,
+ DoFnSchemaInformation.create(),
+ Collections.emptyMap());
}
private Iterable<WindowedValue<String>> stripStreamRecord(Iterable<?> input) {
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperatorTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperatorTest.java
index 9f7eff4..e3b46e6 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperatorTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/ExecutableStageDoFnOperatorTest.java
@@ -29,8 +29,8 @@
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.verify;
@@ -53,6 +53,8 @@
import org.apache.beam.model.pipeline.v1.RunnerApi.Components;
import org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload;
import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.SerializationUtils;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.mutable.MutableObject;
import org.apache.beam.runners.core.InMemoryStateInternals;
import org.apache.beam.runners.core.InMemoryTimerInternals;
import org.apache.beam.runners.core.StateNamespace;
@@ -101,8 +103,6 @@
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.apache.beam.vendor.sdk.v2.sdk.extensions.protobuf.ByteStringCoder;
-import org.apache.commons.lang3.SerializationUtils;
-import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.flink.api.common.cache.DistributedCache;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.typeinfo.TypeInformation;
@@ -315,6 +315,11 @@
}
@Override
+ public void split(double fractionOfRemainder) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void close() throws Exception {
if (onceEmitted) {
return;
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapperTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapperTest.java
index 7b0f9b8..5a04f7e 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapperTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapperTest.java
@@ -36,6 +36,7 @@
import java.util.stream.LongStream;
import org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource;
import org.apache.beam.runners.flink.FlinkPipelineOptions;
+import org.apache.beam.runners.flink.metrics.FlinkMetricContainer;
import org.apache.beam.runners.flink.streaming.StreamSources;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.io.CountingSource;
@@ -71,6 +72,7 @@
import org.junit.runners.JUnit4;
import org.junit.runners.Parameterized;
import org.mockito.Mockito;
+import org.powermock.reflect.Whitebox;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -775,6 +777,34 @@
.boxed()
.toArray()));
}
+
+ @Test
+ public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
+ FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
+
+ TestCountingSource source = new TestCountingSource(20).withoutSplitting();
+
+ UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> sourceWrapper =
+ new UnboundedSourceWrapper<>("noReader", options, source, 2);
+
+ StreamingRuntimeContext mock = Mockito.mock(StreamingRuntimeContext.class);
+ Mockito.when(mock.getNumberOfParallelSubtasks()).thenReturn(1);
+ Mockito.when(mock.getExecutionConfig()).thenReturn(new ExecutionConfig());
+ Mockito.when(mock.getIndexOfThisSubtask()).thenReturn(0);
+ sourceWrapper.setRuntimeContext(mock);
+
+ sourceWrapper.open(new Configuration());
+
+ String metricContainerFieldName = "metricContainer";
+ FlinkMetricContainer monitoredContainer =
+ Mockito.spy(
+ (FlinkMetricContainer)
+ Whitebox.getInternalState(sourceWrapper, metricContainerFieldName));
+ Whitebox.setInternalState(sourceWrapper, metricContainerFieldName, monitoredContainer);
+
+ sourceWrapper.close();
+ Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
+ }
}
private static final class TestStreamStatusMaintainer implements StreamStatusMaintainer {
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElementsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElementsTest.java
index 9ebdefc..0828a22 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElementsTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/stableinput/BufferedElementsTest.java
@@ -52,7 +52,12 @@
WindowedValue.of("test", new Instant(2), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
BufferedElement timerElement =
new BufferedElements.Timer(
- "timerId", GlobalWindow.INSTANCE, new Instant(1), TimeDomain.EVENT_TIME);
+ "timerId",
+ "timerId",
+ GlobalWindow.INSTANCE,
+ new Instant(1),
+ new Instant(1),
+ TimeDomain.EVENT_TIME);
testRoundTrip(ImmutableList.of(element), coder);
testRoundTrip(ImmutableList.of(timerElement), coder);
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/website/PipelineOptionsTableGenerator.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/website/PipelineOptionsTableGenerator.java
new file mode 100644
index 0000000..8ca490e
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/website/PipelineOptionsTableGenerator.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.website;
+
+import edu.umd.cs.findbugs.annotations.Nullable;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Optional;
+import org.apache.beam.runners.flink.FlinkPipelineOptions;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.CaseFormat;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+
+/**
+ * A main class which is called by the Gradle generatePipelineOptionsTable* tasks to update the list
+ * of available pipeline options for the Beam website.
+ */
+public class PipelineOptionsTableGenerator {
+
+ private static final List<String> supportedLanguages = ImmutableList.of("java", "python");
+
+ private static class Option {
+ String name;
+ String description;
+ @Nullable String defaultValue;
+
+ public Option(String name, String description, @Nullable String defaultValue) {
+ this.name = name;
+ this.description = description;
+ this.defaultValue = defaultValue;
+ }
+ }
+
+ public static void main(String[] args) {
+ if (args.length != 1) {
+ throw new RuntimeException(
+ "Please specify the language (" + supportedLanguages + ") as the only argument.");
+ }
+ String arg = args[0].toLowerCase();
+ if (!supportedLanguages.contains(arg)) {
+ throw new RuntimeException("The language is not supported: " + arg);
+ }
+ boolean isPython = arg.equals("python");
+
+ printHeader();
+ List<Option> options = extractOptions(isPython);
+ printOptionsTable(options);
+ }
+
+ private static void printHeader() {
+ System.out.println(
+ "<!--\n"
+ + "Licensed under the Apache License, Version 2.0 (the \"License\");\n"
+ + "you may not use this file except in compliance with the License.\n"
+ + "You may obtain a copy of the License at\n"
+ + "\n"
+ + "http://www.apache.org/licenses/LICENSE-2.0\n"
+ + "\n"
+ + "Unless required by applicable law or agreed to in writing, software\n"
+ + "distributed under the License is distributed on an \"AS IS\" BASIS,\n"
+ + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n"
+ + "See the License for the specific language governing permissions and\n"
+ + "limitations under the License.\n"
+ + "-->");
+ System.out.println(
+ "<!--\n"
+ + "This is an auto-generated file.\n"
+ + "Use generatePipelineOptionsTableJava and generatePipelineOptionsTablePython respectively\n"
+ + "which should be called before running the tests.\n"
+ + "-->");
+ }
+
+ /**
+ * Returns the extracted list of options via reflections on FlinkPipelineOptions. Options are
+ * returned sorted in alphabetical order since Java does not guarantee any consistent order on the
+ * class methods.
+ */
+ private static List<Option> extractOptions(boolean isPython) {
+ List<Option> options = new ArrayList<>();
+ for (Method method : FlinkPipelineOptions.class.getDeclaredMethods()) {
+ String name;
+ String description;
+ String defaultValue = null;
+ name = method.getName();
+ if (name.matches("^(get|is).*")) {
+ name = name.replaceFirst("^(get|is)", "");
+
+ if (isPython) {
+ name = CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, name);
+ } else {
+ name = Character.toLowerCase(name.charAt(0)) + name.substring(1);
+ }
+
+ Description descriptionAnnotation = method.getAnnotation(Description.class);
+ if (descriptionAnnotation == null) {
+ throw new RuntimeException(
+ "All pipeline options should have a description. Please add one for " + name);
+ }
+ description = descriptionAnnotation.value();
+
+ Optional<String> defaultValueFromAnnotation = getDefaultValueFromAnnotation(method);
+ if (defaultValueFromAnnotation.isPresent()) {
+ defaultValue = defaultValueFromAnnotation.get();
+ }
+
+ options.add(new Option(name, description, defaultValue));
+ }
+ }
+ options.sort(Comparator.comparing(option -> option.name));
+ return options;
+ }
+
+ private static void printOptionsTable(List<Option> options) {
+ System.out.println("<table class=\"table table-bordered\">");
+ for (Option option : options) {
+ System.out.println("<tr>");
+ System.out.println(" <td><code>" + option.name + "</code></td>");
+ System.out.println(" <td>" + option.description + "</td>");
+ if (option.defaultValue != null) {
+ System.out.println(" <td>Default: <code>" + option.defaultValue + "</code></td>");
+ } else {
+ System.out.println(" <td></td>");
+ }
+ System.out.println("</tr>");
+ }
+ System.out.println("</table>");
+ }
+
+ /** Returns a string representation of the {@link Default} value on the passed in method. */
+ private static Optional<String> getDefaultValueFromAnnotation(Method method) {
+ for (Annotation annotation : method.getAnnotations()) {
+ if (annotation instanceof Default.Class) {
+ return Optional.of(((Default.Class) annotation).value().getSimpleName());
+ } else if (annotation instanceof Default.String) {
+ return Optional.of(((Default.String) annotation).value());
+ } else if (annotation instanceof Default.Boolean) {
+ return Optional.of(Boolean.toString(((Default.Boolean) annotation).value()));
+ } else if (annotation instanceof Default.Character) {
+ return Optional.of(Character.toString(((Default.Character) annotation).value()));
+ } else if (annotation instanceof Default.Byte) {
+ return Optional.of(Byte.toString(((Default.Byte) annotation).value()));
+ } else if (annotation instanceof Default.Short) {
+ return Optional.of(Short.toString(((Default.Short) annotation).value()));
+ } else if (annotation instanceof Default.Integer) {
+ return Optional.of(Integer.toString(((Default.Integer) annotation).value()));
+ } else if (annotation instanceof Default.Long) {
+ return Optional.of(Long.toString(((Default.Long) annotation).value()));
+ } else if (annotation instanceof Default.Float) {
+ return Optional.of(Float.toString(((Default.Float) annotation).value()));
+ } else if (annotation instanceof Default.Double) {
+ return Optional.of(Double.toString(((Default.Double) annotation).value()));
+ } else if (annotation instanceof Default.Enum) {
+ return Optional.of(((Default.Enum) annotation).value());
+ } else if (annotation instanceof Default.InstanceFactory) {
+ return Optional.of(((Default.InstanceFactory) annotation).value().getSimpleName());
+ }
+ }
+ return Optional.empty();
+ }
+}
diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle
index 1a01351..d8ddffa 100644
--- a/runners/google-cloud-dataflow-java/build.gradle
+++ b/runners/google-cloud-dataflow-java/build.gradle
@@ -39,7 +39,7 @@
filter org.apache.tools.ant.filters.ReplaceTokens, tokens: [
'dataflow.legacy_environment_major_version' : '7',
'dataflow.fnapi_environment_major_version' : '7',
- 'dataflow.container_version' : 'beam-master-20190829'
+ 'dataflow.container_version' : 'beam-master-20191226'
]
}
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
index 1da964dd..367f6e0 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
@@ -18,7 +18,7 @@
package org.apache.beam.runners.dataflow;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
import static org.apache.beam.sdk.util.CoderUtils.encodeToByteArray;
import static org.apache.beam.sdk.util.SerializableUtils.serializeToByteArray;
import static org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString;
@@ -121,8 +121,6 @@
import org.apache.beam.sdk.transforms.Combine.GroupedValues;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.DoFn.ProcessContext;
-import org.apache.beam.sdk.transforms.DoFn.ProcessElement;
import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.GroupIntoBatches;
import org.apache.beam.sdk.transforms.Impulse;
@@ -278,7 +276,7 @@
if (dataflowOptions.getFilesToStage() == null) {
dataflowOptions.setFilesToStage(
- detectClassPathResourcesToStage(DataflowRunner.class.getClassLoader()));
+ detectClassPathResourcesToStage(DataflowRunner.class.getClassLoader(), options));
if (dataflowOptions.getFilesToStage().isEmpty()) {
throw new IllegalArgumentException("No files to stage has been found.");
} else {
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/PrimitiveParDoSingleFactory.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/PrimitiveParDoSingleFactory.java
index 390f0f0..4db0e3d 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/PrimitiveParDoSingleFactory.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/PrimitiveParDoSingleFactory.java
@@ -193,7 +193,7 @@
return ParDoTranslation.payloadForParDoLike(
new ParDoTranslation.ParDoLike() {
@Override
- public RunnerApi.SdkFunctionSpec translateDoFn(SdkComponents newComponents) {
+ public RunnerApi.FunctionSpec translateDoFn(SdkComponents newComponents) {
return ParDoTranslation.translateDoFn(
parDo.getFn(),
parDo.getMainOutputTag(),
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/SchemaCoderCloudObjectTranslator.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/SchemaCoderCloudObjectTranslator.java
index e166a22..0e8eb91 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/SchemaCoderCloudObjectTranslator.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/SchemaCoderCloudObjectTranslator.java
@@ -59,7 +59,7 @@
base,
SCHEMA,
StringUtils.byteArrayToJsonString(
- SchemaTranslation.schemaToProto(target.getSchema()).toByteArray()));
+ SchemaTranslation.schemaToProto(target.getSchema(), true).toByteArray()));
return base;
}
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineJobTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineJobTest.java
index 0d2907d..1f2a8cc 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineJobTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineJobTest.java
@@ -23,8 +23,8 @@
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.lessThanOrEqualTo;
import static org.junit.Assert.assertEquals;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.anyLong;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.mock;
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
index 4438e14..e965947 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
@@ -38,10 +38,10 @@
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.Assume.assumeFalse;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyListOf;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Matchers.isA;
import static org.mockito.Mockito.mock;
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/TestDataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/TestDataflowRunnerTest.java
index 75d4166..f86136f 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/TestDataflowRunnerTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/TestDataflowRunnerTest.java
@@ -24,8 +24,8 @@
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.spy;
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 4013d18..69e87ee 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
@@ -27,9 +27,9 @@
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyListOf;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
diff --git a/runners/google-cloud-dataflow-java/worker/build.gradle b/runners/google-cloud-dataflow-java/worker/build.gradle
index 4aba6eb..6b866d7 100644
--- a/runners/google-cloud-dataflow-java/worker/build.gradle
+++ b/runners/google-cloud-dataflow-java/worker/build.gradle
@@ -104,6 +104,7 @@
shadowTest project(path: ":runners:direct-java", configuration: "shadow")
shadowTest library.java.hamcrest_core
shadowTest library.java.hamcrest_library
+ shadowTest library.java.jsonassert
shadowTest library.java.junit
shadowTest library.java.mockito_core
}
diff --git a/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle b/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle
index 6dc99f2..5ee2d63 100644
--- a/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle
+++ b/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle
@@ -78,6 +78,7 @@
library.java.hamcrest_core, // Test only
library.java.hamcrest_library, // Test only
library.java.junit, // Test only
+ library.java.jsonassert // Test only
]
applyJavaNature(
@@ -224,6 +225,7 @@
shadowTest project(path: ":sdks:java:core", configuration: "shadowTest")
shadowTest library.java.hamcrest_core
shadowTest library.java.hamcrest_library
+ shadowTest library.java.jsonassert
shadowTest library.java.junit
shadowTest library.java.mockito_core
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ApplianceShuffleEntryReader.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ApplianceShuffleEntryReader.java
index 71228c5..d184364 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ApplianceShuffleEntryReader.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ApplianceShuffleEntryReader.java
@@ -44,9 +44,9 @@
new ChunkingShuffleBatchReader(executionContext, operationContext, applianceShuffleReader);
if (cache) {
- // Limit the size of the cache.
- final int maxBatches = 32;
- batchReader = new CachingShuffleBatchReader(batchReader, maxBatches);
+ // Limit the size of the cache to ~32 full shuffle batches.
+ final long maxBytes = 128L * 1024 * 1024;
+ batchReader = new CachingShuffleBatchReader(batchReader, maxBytes);
}
entryReader = new BatchingShuffleEntryReader(batchReader);
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ChunkingShuffleBatchReader.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ChunkingShuffleBatchReader.java
index 7f25c81..8939fa7 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ChunkingShuffleBatchReader.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/ChunkingShuffleBatchReader.java
@@ -59,14 +59,18 @@
}
DataInputStream input = new DataInputStream(new ByteArrayInputStream(result.chunk));
ArrayList<ShuffleEntry> entries = new ArrayList<>();
+ long batchSize = 0;
while (input.available() > 0) {
- entries.add(getShuffleEntry(input));
+ ShuffleEntry entry = getShuffleEntry(input);
+ batchSize += entry.length();
+ entries.add(entry);
}
return new Batch(
entries,
result.nextStartPosition == null
? null
- : ByteArrayShufflePosition.of(result.nextStartPosition));
+ : ByteArrayShufflePosition.of(result.nextStartPosition),
+ batchSize);
}
/**
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowProcessFnRunner.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowProcessFnRunner.java
index 2903cc0..3cb92c9 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowProcessFnRunner.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowProcessFnRunner.java
@@ -109,7 +109,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
throw new UnsupportedOperationException("Unsupported for ProcessFn");
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java
index a990c38..5449462 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.dataflow.worker;
+import com.google.common.base.Strings;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -41,6 +42,7 @@
private static final String CONTROL_API_SERVICE_DESCRIPTOR = "CONTROL_API_SERVICE_DESCRIPTOR";
private static final String LOGGING_API_SERVICE_DESCRIPTOR = "LOGGING_API_SERVICE_DESCRIPTOR";
+ private static final String STATUS_API_SERVICE_DESCRIPTOR = "STATUS_API_SERVICE_DESCRIPTOR";
private static final String ROOT_LOGGER_NAME = "";
private static final String PIPELINE_PATH = "PIPELINE_PATH";
@@ -113,6 +115,18 @@
return parseApiServiceDescriptorFromText(System.getenv().get(CONTROL_API_SERVICE_DESCRIPTOR));
}
+ @Nullable
+ public static Endpoints.ApiServiceDescriptor getStatusDescriptor()
+ throws TextFormat.ParseException {
+ String statusApiDescriptor = System.getenv().get(STATUS_API_SERVICE_DESCRIPTOR);
+ if (Strings.isNullOrEmpty(statusApiDescriptor)) {
+ // Missing STATUS_API_SERVICE_DESCRIPTOR env var is a signal that the worker status API
+ // is unsupported by the current runner.
+ return null;
+ }
+ return parseApiServiceDescriptorFromText(statusApiDescriptor);
+ }
+
// TODO: make env logic private to main() so it is never done outside of initializing the process
public static @Nullable RunnerApi.Pipeline getPipelineFromEnv() throws IOException {
String pipelinePath = System.getenv(PIPELINE_PATH);
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FetchAndFilterStreamingSideInputsOperation.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FetchAndFilterStreamingSideInputsOperation.java
index 6c1ddde..4ba2487 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FetchAndFilterStreamingSideInputsOperation.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FetchAndFilterStreamingSideInputsOperation.java
@@ -75,7 +75,7 @@
Coder<WindowedValue<T>> inputCoder,
WindowingStrategy<?, W> windowingStrategy,
DataflowExecutionContext.DataflowStepContext stepContext,
- Map<PCollectionView<?>, RunnerApi.SdkFunctionSpec> pCollectionViewToWindowMappingFns) {
+ Map<PCollectionView<?>, RunnerApi.FunctionSpec> pCollectionViewToWindowMappingFns) {
super(receivers, context);
this.sideInputFetcher =
@@ -167,9 +167,9 @@
FnDataService beamFnDataService,
ApiServiceDescriptor dataServiceApiServiceDescriptor,
Coder<BoundedWindow> mainInputWindowCoder,
- Map<PCollectionView<?>, RunnerApi.SdkFunctionSpec> pCollectionViewsToWindowMappingFns) {
+ Map<PCollectionView<?>, RunnerApi.FunctionSpec> pCollectionViewsToWindowMappingFns) {
ImmutableList.Builder<PCollectionView<?>> wrappedViews = ImmutableList.builder();
- for (Map.Entry<PCollectionView<?>, RunnerApi.SdkFunctionSpec> entry :
+ for (Map.Entry<PCollectionView<?>, RunnerApi.FunctionSpec> entry :
pCollectionViewsToWindowMappingFns.entrySet()) {
WindowMappingFn windowMappingFn =
new FnApiWindowMappingFn(
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFn.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFn.java
index dcf15f4..b49a9d6 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFn.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFn.java
@@ -34,7 +34,6 @@
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.runners.core.construction.CoderTranslation;
import org.apache.beam.runners.core.construction.RehydratedComponents;
import org.apache.beam.runners.core.construction.SdkComponents;
@@ -79,11 +78,11 @@
@AutoValue
public abstract static class CacheKey {
- public static CacheKey create(SdkFunctionSpec windowMappingFn, BoundedWindow mainWindow) {
+ public static CacheKey create(FunctionSpec windowMappingFn, BoundedWindow mainWindow) {
return new AutoValue_FnApiWindowMappingFn_CacheKey(windowMappingFn, mainWindow);
}
- public abstract SdkFunctionSpec getWindowMappingFn();
+ public abstract FunctionSpec getWindowMappingFn();
public abstract BoundedWindow getMainWindow();
}
@@ -94,7 +93,7 @@
private final IdGenerator idGenerator;
private final FnDataService beamFnDataService;
private final InstructionRequestHandler instructionRequestHandler;
- private final SdkFunctionSpec windowMappingFn;
+ private final FunctionSpec windowMappingFn;
private final Coder<WindowedValue<KV<byte[], BoundedWindow>>> outboundCoder;
private final Coder<WindowedValue<KV<byte[], TargetWindowT>>> inboundCoder;
private final ProcessBundleDescriptor processBundleDescriptor;
@@ -104,7 +103,7 @@
InstructionRequestHandler instructionRequestHandler,
ApiServiceDescriptor dataServiceApiServiceDescriptor,
FnDataService beamFnDataService,
- SdkFunctionSpec windowMappingFn,
+ FunctionSpec windowMappingFn,
Coder<BoundedWindow> mainInputWindowCoder,
Coder<TargetWindowT> sideInputWindowCoder) {
this.idGenerator = idGenerator;
@@ -219,7 +218,7 @@
}
}
- private TargetWindowT loadIfNeeded(SdkFunctionSpec windowMappingFn, BoundedWindow mainWindow) {
+ private TargetWindowT loadIfNeeded(FunctionSpec windowMappingFn, BoundedWindow mainWindow) {
try {
String processRequestInstructionId = idGenerator.getId();
InstructionRequest processRequest =
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/GroupAlsoByWindowFnRunner.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/GroupAlsoByWindowFnRunner.java
index e50f1bd..6296461 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/GroupAlsoByWindowFnRunner.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/GroupAlsoByWindowFnRunner.java
@@ -82,7 +82,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
throw new UnsupportedOperationException(
String.format("Timers are not supported by %s", GroupAlsoByWindowFn.class.getSimpleName()));
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/SimpleParDoFn.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/SimpleParDoFn.java
index 712a017..4c011ed 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/SimpleParDoFn.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/SimpleParDoFn.java
@@ -357,7 +357,13 @@
private void processUserTimer(TimerData timer) throws Exception {
if (fnSignature.timerDeclarations().containsKey(timer.getTimerId())) {
BoundedWindow window = ((WindowNamespace) timer.getNamespace()).getWindow();
- fnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ fnRunner.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java
index 4def350..d6d017a 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java
@@ -37,6 +37,7 @@
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -183,6 +184,7 @@
static final long TARGET_COMMIT_BUNDLE_BYTES = 32 << 20;
static final int MAX_COMMIT_QUEUE_BYTES = 500 << 20; // 500MB
static final int NUM_COMMIT_STREAMS = 1;
+ static final int GET_WORK_STREAM_TIMEOUT_MINUTES = 3;
static final Duration COMMIT_STREAM_TIMEOUT = Duration.standardMinutes(1);
private static final int DEFAULT_STATUS_PORT = 8081;
@@ -424,7 +426,7 @@
private final Counter<Long, Long> javaHarnessMaxMemory;
private final Counter<Integer, Integer> windmillMaxObservedWorkItemCommitBytes;
private final Counter<Integer, Integer> memoryThrashing;
- private Timer refreshActiveWorkTimer;
+ private Timer refreshWorkTimer;
private Timer statusPageTimer;
private final boolean publishCounters;
@@ -785,9 +787,9 @@
0,
options.getWindmillHarnessUpdateReportingPeriod().getMillis());
+ refreshWorkTimer = new Timer("RefreshWork");
if (options.getActiveWorkRefreshPeriodMillis() > 0) {
- refreshActiveWorkTimer = new Timer("RefreshActiveWork");
- refreshActiveWorkTimer.schedule(
+ refreshWorkTimer.schedule(
new TimerTask() {
@Override
public void run() {
@@ -797,6 +799,18 @@
options.getActiveWorkRefreshPeriodMillis(),
options.getActiveWorkRefreshPeriodMillis());
}
+ if (windmillServiceEnabled && options.getStuckCommitDurationMillis() > 0) {
+ int periodMillis = Math.max(options.getStuckCommitDurationMillis() / 10, 100);
+ refreshWorkTimer.schedule(
+ new TimerTask() {
+ @Override
+ public void run() {
+ invalidateStuckCommits();
+ }
+ },
+ periodMillis,
+ periodMillis);
+ }
if (options.getPeriodicStatusPageOutputDirectory() != null) {
statusPageTimer = new Timer("DumpStatusPages");
@@ -863,8 +877,8 @@
globalConfigRefreshTimer.cancel();
}
globalWorkerUpdatesTimer.cancel();
- if (refreshActiveWorkTimer != null) {
- refreshActiveWorkTimer.cancel();
+ if (refreshWorkTimer != null) {
+ refreshWorkTimer.cancel();
}
if (statusPageTimer != null) {
statusPageTimer.cancel();
@@ -925,7 +939,11 @@
computationMap.put(
computationId,
new ComputationState(
- computationId, mapTask, workUnitExecutor, transformUserNameToStateFamily));
+ computationId,
+ mapTask,
+ workUnitExecutor,
+ transformUserNameToStateFamily,
+ stateCache.forComputation(computationId)));
}
}
@@ -1014,7 +1032,9 @@
// Reconnect every now and again to enable better load balancing.
// If at any point the server closes the stream, we will reconnect immediately; otherwise
// we half-close the stream after some time and create a new one.
- stream.closeAfterDefaultTimeout();
+ if (!stream.awaitTermination(GET_WORK_STREAM_TIMEOUT_MINUTES, TimeUnit.MINUTES)) {
+ stream.close();
+ }
} catch (InterruptedException e) {
// Continue processing until !running.get()
}
@@ -1080,11 +1100,12 @@
private final Windmill.WorkItem workItem;
private final Instant startTime;
+ private Instant stateStartTime;
private State state;
public Work(Windmill.WorkItem workItem) {
this.workItem = workItem;
- this.startTime = Instant.now();
+ this.startTime = this.stateStartTime = Instant.now();
this.state = State.QUEUED;
}
@@ -1102,6 +1123,11 @@
public void setState(State state) {
this.state = state;
+ this.stateStartTime = Instant.now();
+ }
+
+ public Instant getStateStartTime() {
+ return stateStartTime;
}
}
@@ -1423,8 +1449,8 @@
sleep(retryLocallyDelayMs);
workUnitExecutor.forceExecute(work);
} else {
- // Consider the item invalid. It will eventually be retried by Windmill if it still needs
- // to be processed.
+ // Consider the item invalid. It will eventually be retried by Windmill if it still needs to
+ // be processed.
computationState.completeWork(key, workItem.getWorkToken());
}
} finally {
@@ -1494,7 +1520,7 @@
Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
LOG.trace("Commit: {}", commitRequest);
activeCommitBytes.set(commitBytes);
- commitWork(commitRequest);
+ windmillServer.commitWork(commitRequest);
activeCommitBytes.set(0);
for (Map.Entry<ComputationState, Windmill.ComputationCommitWorkRequest.Builder> entry :
computationRequestMap.entrySet()) {
@@ -1506,71 +1532,82 @@
}
}
+ // Adds the commit to the commitStream if it fits, returning true iff it is consumed.
+ private boolean addCommitToStream(Commit commit, CommitWorkStream commitStream) {
+ Preconditions.checkNotNull(commit);
+ final ComputationState state = commit.getComputationState();
+ final Windmill.WorkItemCommitRequest request = commit.getRequest();
+ final int size = commit.getSize();
+ commit.getWork().setState(State.COMMITTING);
+ activeCommitBytes.addAndGet(size);
+ if (commitStream.commitWorkItem(
+ state.computationId,
+ request,
+ (Windmill.CommitStatus status) -> {
+ if (status != Windmill.CommitStatus.OK) {
+ stateCache.forComputation(state.computationId).invalidate(request.getKey());
+ }
+ activeCommitBytes.addAndGet(-size);
+ // This may throw an exception if the commit was not active, which is possible if it
+ // was deemed stuck.
+ state.completeWork(request.getKey(), request.getWorkToken());
+ })) {
+ return true;
+ } else {
+ // Back out the stats changes since the commit wasn't consumed.
+ commit.getWork().setState(State.COMMIT_QUEUED);
+ activeCommitBytes.addAndGet(-size);
+ return false;
+ }
+ }
+
+ // Helper to batch additional commits into the commit stream as long as they fit.
+ // Returns a commit that was removed from the queue but not consumed or null.
+ private Commit batchCommitsToStream(CommitWorkStream commitStream) {
+ int commits = 1;
+ while (running.get()) {
+ Commit commit;
+ try {
+ if (commits < 5) {
+ commit = commitQueue.poll(10 - 2 * commits, TimeUnit.MILLISECONDS);
+ } else {
+ commit = commitQueue.poll();
+ }
+ } catch (InterruptedException e) {
+ // Continue processing until !running.get()
+ continue;
+ }
+ if (commit == null || !addCommitToStream(commit, commitStream)) {
+ return commit;
+ }
+ commits++;
+ }
+ return null;
+ }
+
private void streamingCommitLoop() {
StreamPool<CommitWorkStream> streamPool =
new StreamPool<>(
NUM_COMMIT_STREAMS, COMMIT_STREAM_TIMEOUT, windmillServer::commitWorkStream);
- Commit commit = null;
+ Commit initialCommit = null;
while (running.get()) {
- // Batch commits as long as there are more and we can fit them in the current request.
- // We lazily initialize the commit stream to make sure that we only create one after
- // we have a commit.
- CommitWorkStream commitStream = null;
- int commits = 0;
- while (running.get()) {
- // There may be a commit left over from the previous iteration but if not, pull one.
- if (commit == null) {
- try {
- if (commits == 0) {
- commit = commitQueue.take();
- } else if (commits < 10) {
- commit = commitQueue.poll(20 - 2 * commits, TimeUnit.MILLISECONDS);
- } else {
- commit = commitQueue.poll();
- }
- } catch (InterruptedException e) {
- // Continue processing until !running.get()
- continue;
- }
- if (commit == null) {
- // No longer batching, break loop to trigger flush.
- break;
- }
- }
-
- commits++;
- final ComputationState state = commit.getComputationState();
- final Windmill.WorkItemCommitRequest request = commit.getRequest();
- final int size = commit.getSize();
- commit.getWork().setState(State.COMMITTING);
- if (commitStream == null) {
- commitStream = streamPool.getStream();
- }
- if (commitStream.commitWorkItem(
- state.computationId,
- request,
- (Windmill.CommitStatus status) -> {
- if (status != Windmill.CommitStatus.OK) {
- stateCache.forComputation(state.computationId).invalidate(request.getKey());
- }
- state.completeWork(request.getKey(), request.getWorkToken());
- activeCommitBytes.addAndGet(-size);
- })) {
- // The commit was consumed.
- commit = null;
- // It's possible we could decrement from the callback above before adding here but since
- // it's just for the status page we don't care.
- activeCommitBytes.addAndGet(size);
- } else {
- // The commit was not consumed, leave it set and it will be added to the subsequent stream
- // on the next iteration after this stream is flushed.
- break;
+ if (initialCommit == null) {
+ try {
+ initialCommit = commitQueue.take();
+ } catch (InterruptedException e) {
+ continue;
}
}
- if (commitStream != null) {
- commitStream.flush();
- streamPool.releaseStream(commitStream);
+ // We initialize the commit stream only after we have a commit to make sure it is fresh.
+ CommitWorkStream commitStream = streamPool.getStream();
+ if (!addCommitToStream(initialCommit, commitStream)) {
+ throw new AssertionError("Initial commit on flushed stream should always be accepted.");
}
+ // Batch additional commits to the stream and possibly make an un-batched commit the next
+ // initial commit.
+ initialCommit = batchCommitsToStream(commitStream);
+ commitStream.flush();
+ streamPool.releaseStream(commitStream);
}
}
@@ -1583,10 +1620,6 @@
.build());
}
- private void commitWork(Windmill.CommitWorkRequest request) {
- windmillServer.commitWork(request);
- }
-
private void getConfigFromWindmill(String computation) {
Windmill.GetConfigRequest request =
Windmill.GetConfigRequest.newBuilder().addComputations(computation).build();
@@ -2056,6 +2089,14 @@
metricTrackingWindmillServer.refreshActiveWork(active);
}
+ private void invalidateStuckCommits() {
+ Instant stuckCommitDeadline =
+ Instant.now().minus(Duration.millis(options.getStuckCommitDurationMillis()));
+ for (Map.Entry<String, ComputationState> entry : computationMap.entrySet()) {
+ entry.getValue().invalidateStuckCommits(stuckCommitDeadline);
+ }
+ }
+
/**
* Class representing the state of a computation.
*
@@ -2068,16 +2109,18 @@
private final ImmutableMap<String, String> transformUserNameToStateFamily;
// Map from key to work for the key. The first item in the queue is
// actively processing. Synchronized by itself.
- private final Map<ByteString, Queue<Work>> activeWork = new HashMap<>();
+ private final Map<ByteString, Deque<Work>> activeWork = new HashMap<>();
private final BoundedQueueExecutor executor;
private final ConcurrentMap<SdkWorkerHarness, ConcurrentLinkedQueue<ExecutionState>>
executionStateQueues = new ConcurrentHashMap<>();
+ private final WindmillStateCache.ForComputation computationStateCache;
public ComputationState(
String computationId,
MapTask mapTask,
BoundedQueueExecutor executor,
- Map<String, String> transformUserNameToStateFamily) {
+ Map<String, String> transformUserNameToStateFamily,
+ WindmillStateCache.ForComputation computationStateCache) {
this.computationId = computationId;
this.mapTask = mapTask;
this.executor = executor;
@@ -2085,6 +2128,7 @@
transformUserNameToStateFamily != null
? ImmutableMap.copyOf(transformUserNameToStateFamily)
: ImmutableMap.of();
+ this.computationStateCache = computationStateCache;
Preconditions.checkNotNull(mapTask.getStageName());
Preconditions.checkNotNull(mapTask.getSystemName());
}
@@ -2110,20 +2154,23 @@
/** Mark the given key and work as active. */
public boolean activateWork(ByteString key, Work work) {
synchronized (activeWork) {
- Queue<Work> queue = activeWork.get(key);
- if (queue == null) {
- queue = new ArrayDeque<>();
- activeWork.put(key, queue);
- queue.add(work);
- // Fall through to execute without the lock held.
- } else {
- if (queue.peek().getWorkItem().getWorkToken() != work.getWorkItem().getWorkToken()) {
- // Queue the work for later processing.
- queue.add(work);
- return true;
+ Deque<Work> queue = activeWork.get(key);
+ if (queue != null) {
+ Preconditions.checkState(!queue.isEmpty());
+ // Ensure we don't already have this work token queueud.
+ for (Work queuedWork : queue) {
+ if (queuedWork.getWorkItem().getWorkToken() == work.getWorkItem().getWorkToken()) {
+ return false;
+ }
}
- // Skip the work if duplicate
- return false;
+ // Queue the work for later processing.
+ queue.addLast(work);
+ return true;
+ } else {
+ queue = new ArrayDeque<>();
+ queue.addLast(work);
+ activeWork.put(key, queue);
+ // Fall through to execute without the lock held.
}
}
executor.execute(work);
@@ -2132,11 +2179,11 @@
/** Marks the work for a the given key as complete. Schedules queued work for the key if any. */
public void completeWork(ByteString key, long workToken) {
- Work work = null;
+ Work nextWork;
synchronized (activeWork) {
Queue<Work> queue = activeWork.get(key);
Preconditions.checkNotNull(queue);
- Work completedWork = queue.poll();
+ Work completedWork = queue.peek();
// avoid Preconditions.checkNotNull and checkState here to prevent eagerly evaluating the
// format string parameters for the error message.
if (completedWork == null) {
@@ -2153,20 +2200,45 @@
completedWork.getWorkItem().getWorkToken(),
workToken));
}
- if (queue.peek() == null) {
- activeWork.remove(key);
- return;
+ queue.remove(); // We consumed the matching work item.
+ nextWork = queue.peek();
+ if (nextWork == null) {
+ Preconditions.checkState(queue == activeWork.remove(key));
}
- work = queue.peek();
}
- executor.forceExecute(work);
+ if (nextWork != null) {
+ executor.forceExecute(nextWork);
+ }
+ }
+
+ public void invalidateStuckCommits(Instant stuckCommitDeadline) {
+ synchronized (activeWork) {
+ // Determine the stuck commit keys but complete them outside of iterating over
+ // activeWork as completeWork may delete the entry from activeWork.
+ Map<ByteString, Long> stuckCommits = new HashMap<>();
+ for (Map.Entry<ByteString, Deque<Work>> entry : activeWork.entrySet()) {
+ ByteString key = entry.getKey();
+ Work work = entry.getValue().peek();
+ if (work.getState() == State.COMMITTING
+ && work.getStateStartTime().isBefore(stuckCommitDeadline)) {
+ LOG.error(
+ "Detected key with sharding key {} stuck in COMMITTING state, completing it with error. Key ",
+ work.workItem.getShardingKey());
+ stuckCommits.put(key, work.getWorkItem().getWorkToken());
+ }
+ }
+ for (Map.Entry<ByteString, Long> stuckCommit : stuckCommits.entrySet()) {
+ computationStateCache.invalidate(stuckCommit.getKey());
+ completeWork(stuckCommit.getKey(), stuckCommit.getValue());
+ }
+ }
}
/** Adds any work started before the refreshDeadline to the GetDataRequest builder. */
public List<Windmill.KeyedGetDataRequest> getKeysToRefresh(Instant refreshDeadline) {
List<Windmill.KeyedGetDataRequest> result = new ArrayList<>();
synchronized (activeWork) {
- for (Map.Entry<ByteString, Queue<Work>> entry : activeWork.entrySet()) {
+ for (Map.Entry<ByteString, Deque<Work>> entry : activeWork.entrySet()) {
ByteString key = entry.getKey();
for (Work work : entry.getValue()) {
if (work.getStartTime().isBefore(refreshDeadline)) {
@@ -2183,6 +2255,12 @@
return result;
}
+ private String elapsedString(Instant start, Instant end) {
+ Duration activeFor = new Duration(start, end);
+ // Duration's toString always starts with "PT"; remove that here.
+ return activeFor.toString().substring(2);
+ }
+
public void printActiveWork(PrintWriter writer) {
final Instant now = Instant.now();
// The max number of keys in COMMITTING or COMMIT_QUEUED status to be shown.
@@ -2192,17 +2270,16 @@
"<table border=\"1\" "
+ "style=\"border-collapse:collapse;padding:5px;border-spacing:5px;border:1px\">");
writer.println(
- "<tr><th>Key</th><th>Token</th><th>Queued</th><th>Active For</th><th>State</th></tr>");
+ "<tr><th>Key</th><th>Token</th><th>Queued</th><th>Active For</th><th>State</th><th>State Active For</th></tr>");
// We use a StringBuilder in the synchronized section to buffer writes since the provided
// PrintWriter may block when flushing.
StringBuilder builder = new StringBuilder();
synchronized (activeWork) {
- for (Map.Entry<ByteString, Queue<Work>> entry : activeWork.entrySet()) {
+ for (Map.Entry<ByteString, Deque<Work>> entry : activeWork.entrySet()) {
Queue<Work> queue = entry.getValue();
+ Preconditions.checkNotNull(queue);
Work work = queue.peek();
- if (work == null) {
- continue;
- }
+ Preconditions.checkNotNull(work);
Windmill.WorkItem workItem = work.getWorkItem();
State state = work.getState();
if (state == State.COMMITTING || state == State.COMMIT_QUEUED) {
@@ -2218,12 +2295,12 @@
builder.append("</td><td>");
builder.append(queue.size() - 1);
builder.append("</td><td>");
- Duration activeFor = new Duration(work.getStartTime(), now);
- // Duration's toString always starts with "PT"; remove that here.
- builder.append(activeFor.toString().substring(2));
+ builder.append(elapsedString(work.getStartTime(), now));
builder.append("</td><td>");
builder.append(state);
builder.append("</td></tr>\n");
+ builder.append(elapsedString(work.getStateStartTime(), now));
+ builder.append("</td></tr>\n");
}
}
writer.print(builder.toString());
@@ -2231,7 +2308,7 @@
if (commitPendingCount >= maxCommitPending) {
writer.println("<br>");
writer.print("Skipped keys in COMMITTING/COMMIT_QUEUED: ");
- writer.println(maxCommitPending - commitPendingCount);
+ writer.println(commitPendingCount - maxCommitPending);
writer.println("<br>");
}
}
@@ -2326,20 +2403,13 @@
+ MAX_WORK_UNITS_QUEUED
+ "<br>");
writer.print("Commit Queue: ");
- writer.print(commitQueue.weight() >> 20);
- writer.print("MB, ");
+ appendHumanizedBytes(commitQueue.weight(), writer);
+ writer.print(", ");
writer.print(commitQueue.size());
writer.println(" elements<br>");
writer.print("Active commit: ");
- long commitBytes = activeCommitBytes.get();
- if (commitBytes == 0) {
- writer.print("none");
- } else {
- writer.print("~");
- writer.print((commitBytes >> 20) + 1);
- writer.print("MB");
- }
+ appendHumanizedBytes(activeCommitBytes.get(), writer);
writer.println("<br>");
metricTrackingWindmillServer.printHtml(writer);
@@ -2354,5 +2424,20 @@
writer.println("<br>");
}
}
+
+ private void appendHumanizedBytes(long bytes, PrintWriter writer) {
+ if (bytes < (4 << 10)) {
+ writer.print(bytes);
+ writer.print("B");
+ } else if (bytes < (4 << 20)) {
+ writer.print("~");
+ writer.print(bytes >> 10);
+ writer.print("KB");
+ } else {
+ writer.print("~");
+ writer.print(bytes >> 20);
+ writer.print("MB");
+ }
+ }
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingKeyedWorkItemSideInputDoFnRunner.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingKeyedWorkItemSideInputDoFnRunner.java
index bf5efee..41da4b3 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingKeyedWorkItemSideInputDoFnRunner.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingKeyedWorkItemSideInputDoFnRunner.java
@@ -133,7 +133,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
throw new UnsupportedOperationException(
"Attempt to deliver a timer to a DoFn, but timers are not supported in Dataflow.");
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingModeExecutionContext.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingModeExecutionContext.java
index a114b6f..76aa8b0 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingModeExecutionContext.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingModeExecutionContext.java
@@ -602,6 +602,8 @@
.setTimer(
StateNamespaces.window(windowCoder, window),
timerId,
+ "",
+ cleanupTime,
cleanupTime,
TimeDomain.EVENT_TIME);
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputDoFnRunner.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputDoFnRunner.java
index 02a1292..f7e86c7 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputDoFnRunner.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputDoFnRunner.java
@@ -77,7 +77,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
throw new UnsupportedOperationException(
"Attempt to deliver a timer to a DoFn, but timers are not supported in Dataflow.");
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputFetcher.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputFetcher.java
index 7c5babf..2c00c99 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputFetcher.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingSideInputFetcher.java
@@ -32,6 +32,7 @@
import org.apache.beam.runners.core.StateTags;
import org.apache.beam.runners.core.TimerInternals.TimerData;
import org.apache.beam.runners.core.TimerInternals.TimerDataCoder;
+import org.apache.beam.runners.core.TimerInternals.TimerDataCoderV2;
import org.apache.beam.runners.dataflow.worker.windmill.Windmill;
import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest;
import org.apache.beam.sdk.coders.AtomicCoder;
@@ -60,6 +61,7 @@
private final StateTag<BagState<WindowedValue<InputT>>> elementsAddr;
private final StateTag<BagState<TimerData>> timersAddr;
+ private final StateTag<BagState<TimerData>> oldTimersAddr;
private final StateTag<WatermarkHoldState> watermarkHoldingAddr;
private final StateTag<ValueState<Map<W, Set<Windmill.GlobalDataRequest>>>> blockedMapAddr;
@@ -85,8 +87,11 @@
this.elementsAddr =
StateTags.makeSystemTagInternal(
StateTags.bag("elem", WindowedValue.getFullCoder(inputCoder, mainWindowCoder)));
- this.timersAddr =
+ this.oldTimersAddr =
StateTags.makeSystemTagInternal(StateTags.bag("timer", TimerDataCoder.of(mainWindowCoder)));
+ this.timersAddr =
+ StateTags.makeSystemTagInternal(
+ StateTags.bag("timerV2", TimerDataCoderV2.of(mainWindowCoder)));
StateTag<WatermarkHoldState> watermarkTag =
StateTags.watermarkStateInternal(
"holdForSideinput", windowingStrategy.getTimestampCombiner());
@@ -169,6 +174,7 @@
List<BagState<TimerData>> timers = Lists.newArrayList();
for (W window : readyWindows) {
timers.add(timerBag(window).readLater());
+ timers.add(timerOldBag(window).readLater());
}
return timers;
}
@@ -275,6 +281,12 @@
.state(StateNamespaces.window(mainWindowCoder, window), timersAddr);
}
+ BagState<TimerData> timerOldBag(W window) {
+ return stepContext
+ .stateInternals()
+ .state(StateNamespaces.window(mainWindowCoder, window), oldTimersAddr);
+ }
+
private <SideWindowT extends BoundedWindow> Windmill.GlobalDataRequest buildGlobalDataRequest(
PCollectionView<?> view, BoundedWindow mainWindow) {
@SuppressWarnings("unchecked")
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateCache.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateCache.java
index d74b0db..b419a38 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateCache.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateCache.java
@@ -106,10 +106,9 @@
public void invalidate(ByteString processingKey) {
synchronized (this) {
ComputationKey key = new ComputationKey(computation, processingKey);
- for (StateId id : keyIndex.get(key)) {
+ for (StateId id : keyIndex.removeAll(key)) {
stateCache.invalidate(id);
}
- keyIndex.removeAll(key);
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillTimerInternals.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillTimerInternals.java
index c2deb2f..fb33ed3 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillTimerInternals.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillTimerInternals.java
@@ -94,8 +94,16 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant timestamp, TimeDomain timeDomain) {
- timers.put(timerId, namespace, TimerData.of(timerId, namespace, timestamp, timeDomain));
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ timers.put(
+ timerId,
+ namespace,
+ TimerData.of(timerId, timerFamilyId, namespace, timestamp, outputTimestamp, timeDomain));
timerStillPresent.put(timerId, namespace, true);
}
@@ -176,7 +184,7 @@
.setStateFamily(stateFamily)
.setReset(true)
.addTimestamps(
- WindmillTimeUtils.harnessToWindmillTimestamp(timerData.getTimestamp()));
+ WindmillTimeUtils.harnessToWindmillTimestamp(timerData.getOutputTimestamp()));
}
} else {
// Deleting a timer. If it is a user timer, clear the hold
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/fn/control/TimerReceiver.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/fn/control/TimerReceiver.java
index 286ea8a..e3d2277 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/fn/control/TimerReceiver.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/fn/control/TimerReceiver.java
@@ -119,7 +119,8 @@
String timerId = timerSpec.timerId();
TimerInternals timerInternals = stepContext.namespacedToUser().timerInternals();
- timerInternals.setTimer(namespace, timerId, timer.getTimestamp(), timeDomain);
+ timerInternals.setTimer(
+ namespace, timerId, "", timer.getTimestamp(), timer.getOutputTimestamp(), timeDomain);
timerIdToKey.put(timerId, windowedValue.getValue().getKey());
timerIdToPayload.put(timerId, timer.getPayload());
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/CreateExecutableStageNodeFunction.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/CreateExecutableStageNodeFunction.java
index fc42534..9b75433 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/CreateExecutableStageNodeFunction.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/CreateExecutableStageNodeFunction.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.dataflow.worker.graph;
+import static org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTING;
import static org.apache.beam.runners.dataflow.util.Structs.getBytes;
import static org.apache.beam.runners.dataflow.util.Structs.getString;
import static org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forSideInputInfos;
@@ -160,8 +161,6 @@
// For intermediate PCollections we fabricate, we make a bogus WindowingStrategy
// TODO: create a correct windowing strategy, including coders and environment
- // An SdkFunctionSpec is invalid without a working environment reference. We can revamp that
- // when we inline SdkFunctionSpec and FunctionSpec, both slated for inlining wherever they occur
// Default to use the Java environment if pipeline doesn't have environment specified.
if (pipeline.getComponents().getEnvironmentsMap().isEmpty()) {
@@ -389,7 +388,7 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(parDoPayload.toByteString());
} else {
- // legacy path - bytes are the SdkFunctionSpec's payload field, basically, and
+ // legacy path - bytes are the FunctionSpec's payload field, basically, and
// SDKs expect it in the PTransform's payload field
byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN);
transformSpec
@@ -487,7 +486,8 @@
executableStageUserStateReference,
executableStageTimers,
executableStageTransforms,
- executableStageOutputs);
+ executableStageOutputs,
+ DEFAULT_WIRE_CODER_SETTING);
return ExecutableStageNode.create(
executableStage,
ptransformIdToNameContexts.build(),
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodes.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodes.java
index 6ba81d1..0d96981 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodes.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodes.java
@@ -132,7 +132,7 @@
}
// Gather all the side input window mapping fns which we need to request the SDK to map
- ImmutableMap.Builder<PCollectionView<?>, RunnerApi.SdkFunctionSpec>
+ ImmutableMap.Builder<PCollectionView<?>, RunnerApi.FunctionSpec>
pCollectionViewsToWindowMapingsFns = ImmutableMap.builder();
parDoPayload
.getSideInputsMap()
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/Nodes.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/Nodes.java
index 853af69..327234c 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/Nodes.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/Nodes.java
@@ -349,7 +349,7 @@
public abstract static class FetchAndFilterStreamingSideInputsNode extends Node {
public static FetchAndFilterStreamingSideInputsNode create(
WindowingStrategy<?, ?> windowingStrategy,
- Map<PCollectionView<?>, RunnerApi.SdkFunctionSpec> pCollectionViewsToWindowMappingFns,
+ Map<PCollectionView<?>, RunnerApi.FunctionSpec> pCollectionViewsToWindowMappingFns,
NameContext nameContext) {
return new AutoValue_Nodes_FetchAndFilterStreamingSideInputsNode(
windowingStrategy, pCollectionViewsToWindowMappingFns, nameContext);
@@ -357,7 +357,7 @@
public abstract WindowingStrategy<?, ?> getWindowingStrategy();
- public abstract Map<PCollectionView<?>, RunnerApi.SdkFunctionSpec>
+ public abstract Map<PCollectionView<?>, RunnerApi.FunctionSpec>
getPCollectionViewsToWindowMappingFns();
public abstract NameContext getNameContext();
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/RegisterNodeFunction.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/RegisterNodeFunction.java
index 0c33ee8..b44e42c 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/RegisterNodeFunction.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/RegisterNodeFunction.java
@@ -190,8 +190,6 @@
// For intermediate PCollections we fabricate, we make a bogus WindowingStrategy
// TODO: create a correct windowing strategy, including coders and environment
- // An SdkFunctionSpec is invalid without a working environment reference. We can revamp that
- // when we inline SdkFunctionSpec and FunctionSpec, both slated for inlining wherever they occur
SdkComponents sdkComponents = SdkComponents.create(pipeline.getComponents());
// Default to use the Java environment if pipeline doesn't have environment specified.
@@ -351,7 +349,7 @@
.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
.setPayload(parDoPayload.toByteString());
} else {
- // legacy path - bytes are the SdkFunctionSpec's payload field, basically, and
+ // legacy path - bytes are the FunctionSpec's payload field, basically, and
// SDKs expect it in the PTransform's payload field
byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN);
transformSpec
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java
index dbad980..b5d3224 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java
@@ -69,6 +69,12 @@
void setActiveWorkRefreshPeriodMillis(int value);
+ @Description("Necessary duration for a commit to be considered stuck and invalidated.")
+ @Default.Integer(10 * 60 * 1000)
+ int getStuckCommitDurationMillis();
+
+ void setStuckCommitDurationMillis(int value);
+
@Description(
"Period for sending 'global get config' requests to the service. The duration is "
+ "specified as seconds in 'PTx.yS' format, e.g. 'PT5.125S'."
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReader.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReader.java
index fc87bc4..c769881 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReader.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReader.java
@@ -18,8 +18,8 @@
package org.apache.beam.runners.dataflow.worker.util.common.worker;
import java.io.IOException;
+import java.time.Duration;
import java.util.concurrent.ExecutionException;
-import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Objects;
@@ -27,26 +27,41 @@
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.CacheBuilder;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.CacheLoader;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LoadingCache;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.Weigher;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.Ints;
/** A {@link ShuffleBatchReader} that caches batches as they're read. */
public class CachingShuffleBatchReader implements ShuffleBatchReader {
private final ShuffleBatchReader reader;
@VisibleForTesting final LoadingCache<BatchRange, Batch> cache;
- /** Limit the size of the cache to 1000 batches. */
- private static final int MAXIMUM_BATCHES = 1000;
+ /**
+ * Limit the size of the cache to 1GiB of batches.
+ *
+ * <p>If this increases beyond Integer.MAX_VALUE then {@link BatchWeigher} must be updated.
+ * Because a batch may be larger than 1GiB, the actual in-memory batch size may exceed this value.
+ */
+ private static final int MAXIMUM_WEIGHT = 1024 * 1024 * 1024;
// Ensure that batches in the cache are expired quickly
// for improved GC performance.
- private static final long EXPIRE_AFTER_MS = 250;
+ private static final Duration EXPIRE_AFTER = Duration.ofMillis(250);
+ /**
+ * Creates the caching reader.
+ *
+ * @param shuffleReader wrapped reader.
+ * @param maximumWeightBytes maximum bytes for the cache.
+ * @param expireAfterAccess cache items may be evicted after the elapsed duration.
+ */
public CachingShuffleBatchReader(
- ShuffleBatchReader shuffleReader, int maximumBatches, long expireAfterAccessMillis) {
+ ShuffleBatchReader shuffleReader, long maximumWeightBytes, Duration expireAfterAccess) {
this.reader = shuffleReader;
this.cache =
CacheBuilder.newBuilder()
- .maximumSize(maximumBatches)
- .expireAfterAccess(expireAfterAccessMillis, TimeUnit.MILLISECONDS)
+ .maximumWeight(maximumWeightBytes)
+ .weigher(new BatchWeigher())
+ .expireAfterAccess(expireAfterAccess)
.<BatchRange, Batch>build(
new CacheLoader<BatchRange, Batch>() {
@Override
@@ -58,12 +73,24 @@
});
}
+ /**
+ * Creates the caching reader with a maximum size of {@link MAXIMUM_WEIGHT} and an element expiry
+ * duration of {@link EXPIRE_AFTER}.
+ *
+ * @param shuffleReader wrapped reader.
+ */
public CachingShuffleBatchReader(ShuffleBatchReader shuffleReader) {
- this(shuffleReader, MAXIMUM_BATCHES, EXPIRE_AFTER_MS);
+ this(shuffleReader, MAXIMUM_WEIGHT, EXPIRE_AFTER);
}
- public CachingShuffleBatchReader(ShuffleBatchReader shuffleReader, int maximumBatches) {
- this(shuffleReader, maximumBatches, EXPIRE_AFTER_MS);
+ /**
+ * Creates the caching reader with an element expiry duration of {@link EXPIRE_AFTER}.
+ *
+ * @param shuffleReader wrapped reader.
+ * @param maximumWeightBytes maximum bytes for the cache.
+ */
+ public CachingShuffleBatchReader(ShuffleBatchReader shuffleReader, long maximumWeightBytes) {
+ this(shuffleReader, maximumWeightBytes, EXPIRE_AFTER);
}
@Override
@@ -102,4 +129,18 @@
return Objects.hashCode(startPosition, endPosition);
}
}
+
+ /**
+ * Returns the weight of a Batch, in bytes, within the range [0, Integer.MAX_VALUE].
+ *
+ * <p>The cache holds {@link MAX_WEIGHT} bytes. If {@link MAX_WEIGHT} is increased beyond
+ * Integer.MAX_VALUE bytes, a new weighing heuristic will be required to avoid under representing
+ * the number of bytes in memory.
+ */
+ static final class BatchWeigher implements Weigher<BatchRange, Batch> {
+ @Override
+ public int weigh(BatchRange key, Batch value) {
+ return Ints.saturatedCast(value.bytes);
+ }
+ }
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/ShuffleBatchReader.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/ShuffleBatchReader.java
index d1676eb..29890c8 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/ShuffleBatchReader.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/common/worker/ShuffleBatchReader.java
@@ -30,10 +30,13 @@
public static class Batch {
public final List<ShuffleEntry> entries;
@Nullable public final ShufflePosition nextStartPosition;
+ public final long bytes;
- public Batch(List<ShuffleEntry> entries, @Nullable ShufflePosition nextStartPosition) {
+ public Batch(
+ List<ShuffleEntry> entries, @Nullable ShufflePosition nextStartPosition, long bytes) {
this.entries = entries;
this.nextStartPosition = nextStartPosition;
+ this.bytes = bytes;
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java
index 98f74c7..c64803d 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java
@@ -117,8 +117,6 @@
// high.
private static final long DEFAULT_UNARY_RPC_DEADLINE_SECONDS = 300;
private static final long DEFAULT_STREAM_RPC_DEADLINE_SECONDS = 300;
- // Stream clean close seconds must be set lower than the stream deadline seconds.
- private static final long DEFAULT_STREAM_CLEAN_CLOSE_SECONDS = 180;
private static final Duration MIN_BACKOFF = Duration.millis(1);
private static final Duration MAX_BACKOFF = Duration.standardSeconds(30);
@@ -137,13 +135,14 @@
syncStubList = new ArrayList<>();
private WindmillApplianceGrpc.WindmillApplianceBlockingStub syncApplianceStub = null;
private long unaryDeadlineSeconds = DEFAULT_UNARY_RPC_DEADLINE_SECONDS;
+ private long streamDeadlineSeconds = DEFAULT_STREAM_RPC_DEADLINE_SECONDS;
private ImmutableSet<HostAndPort> endpoints;
private int logEveryNStreamFailures = 20;
private Duration maxBackoff = MAX_BACKOFF;
private final ThrottleTimer getWorkThrottleTimer = new ThrottleTimer();
private final ThrottleTimer getDataThrottleTimer = new ThrottleTimer();
private final ThrottleTimer commitWorkThrottleTimer = new ThrottleTimer();
- Random rand = new Random();
+ private final Random rand = new Random();
private final Set<AbstractWindmillStream<?, ?>> streamRegistry =
Collections.newSetFromMap(new ConcurrentHashMap<AbstractWindmillStream<?, ?>, Boolean>());
@@ -213,7 +212,7 @@
private synchronized void initializeLocalHost(int port) throws IOException {
this.logEveryNStreamFailures = 1;
this.maxBackoff = Duration.millis(500);
- this.unaryDeadlineSeconds = 10; // For local testing use a short deadline.
+ this.unaryDeadlineSeconds = 10; // For local testing use short deadlines.
Channel channel = localhostChannel(port);
if (streamingEngineEnabled()) {
this.stubList.add(CloudWindmillServiceV1Alpha1Grpc.newStub(channel));
@@ -599,7 +598,7 @@
this.clientFactory = clientFactory;
}
- /** Called on each response from the server */
+ /** Called on each response from the server. */
protected abstract void onResponse(ResponseT response);
/** Called when a new underlying stream to the server has been opened. */
protected abstract void onNewStream();
@@ -607,7 +606,7 @@
protected abstract boolean hasPendingRequests();
/**
* Called when the stream is throttled due to resource exhausted errors. Will be called for each
- * resource exhausted error not just the first. onResponse() must stop throttling on reciept of
+ * resource exhausted error not just the first. onResponse() must stop throttling on receipt of
* the first good message.
*/
protected abstract void startThrottleTimer();
@@ -746,15 +745,6 @@
}
@Override
- public final void closeAfterDefaultTimeout() throws InterruptedException {
- if (!finishLatch.await(DEFAULT_STREAM_CLEAN_CLOSE_SECONDS, TimeUnit.SECONDS)) {
- // If the stream did not close due to error in the specified amount of time, half-close
- // the stream cleanly.
- close();
- }
- }
-
- @Override
public final Instant startTime() {
return new Instant(startTimeMs.get());
}
@@ -773,7 +763,7 @@
super(
responseObserver ->
stub()
- .withDeadlineAfter(DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS)
+ .withDeadlineAfter(streamDeadlineSeconds, TimeUnit.SECONDS)
.getWorkStream(responseObserver));
this.request = request;
this.receiver = receiver;
@@ -946,7 +936,7 @@
super(
responseObserver ->
stub()
- .withDeadlineAfter(DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS)
+ .withDeadlineAfter(streamDeadlineSeconds, TimeUnit.SECONDS)
.getDataStream(responseObserver));
startStream();
}
@@ -1161,7 +1151,7 @@
private class Batcher {
long queuedBytes = 0;
- Map<Long, PendingRequest> queue = new HashMap<>();
+ final Map<Long, PendingRequest> queue = new HashMap<>();
boolean canAccept(PendingRequest request) {
return queue.isEmpty()
@@ -1178,6 +1168,7 @@
void flush() {
flushInternal(queue);
queuedBytes = 0;
+ queue.clear();
}
}
@@ -1187,7 +1178,7 @@
super(
responseObserver ->
stub()
- .withDeadlineAfter(DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS)
+ .withDeadlineAfter(streamDeadlineSeconds, TimeUnit.SECONDS)
.commitWorkStream(responseObserver));
startStream();
}
@@ -1219,16 +1210,27 @@
protected void onResponse(StreamingCommitResponse response) {
commitWorkThrottleTimer.stop();
+ RuntimeException finalException = null;
for (int i = 0; i < response.getRequestIdCount(); ++i) {
long requestId = response.getRequestId(i);
PendingRequest done = pending.remove(requestId);
if (done == null) {
LOG.error("Got unknown commit request ID: {}", requestId);
} else {
- done.onDone.accept(
- (i < response.getStatusCount()) ? response.getStatus(i) : CommitStatus.OK);
+ try {
+ done.onDone.accept(
+ (i < response.getStatusCount()) ? response.getStatus(i) : CommitStatus.OK);
+ } catch (RuntimeException e) {
+ // Catch possible exceptions to ensure that an exception for one commit does not prevent
+ // other commits from being processed.
+ LOG.warn("Exception while processing commit response {} ", e);
+ finalException = e;
+ }
}
}
+ if (finalException != null) {
+ throw finalException;
+ }
}
@Override
@@ -1252,7 +1254,7 @@
batcher.flush();
}
- private final void flushInternal(Map<Long, PendingRequest> requests) {
+ private void flushInternal(Map<Long, PendingRequest> requests) {
if (requests.isEmpty()) {
return;
}
@@ -1266,7 +1268,6 @@
} else {
issueBatchedRequest(requests);
}
- requests.clear();
}
private void issueSingleRequest(final long id, PendingRequest pendingRequest) {
@@ -1278,13 +1279,13 @@
.setShardingKey(pendingRequest.request.getShardingKey())
.setSerializedWorkItemCommit(pendingRequest.request.toByteString());
StreamingCommitWorkRequest chunk = requestBuilder.build();
- try {
- synchronized (this) {
- pending.put(id, pendingRequest);
+ synchronized (this) {
+ pending.put(id, pendingRequest);
+ try {
send(chunk);
+ } catch (IllegalStateException e) {
+ // Stream was broken, request will be retried when stream is reopened.
}
- } catch (IllegalStateException e) {
- // Stream was broken, request will be retried when stream is reopened.
}
}
@@ -1303,13 +1304,13 @@
chunkBuilder.setSerializedWorkItemCommit(request.request.toByteString());
}
StreamingCommitWorkRequest request = requestBuilder.build();
- try {
- synchronized (this) {
- pending.putAll(requests);
+ synchronized (this) {
+ pending.putAll(requests);
+ try {
send(request);
+ } catch (IllegalStateException e) {
+ // Stream was broken, request will be retried when stream is reopened.
}
- } catch (IllegalStateException e) {
- // Stream was broken, request will be retried when stream is reopened.
}
}
@@ -1492,7 +1493,7 @@
}
}
- /** Returns if the specified type is currently being throttled */
+ /** Returns if the specified type is currently being throttled. */
public synchronized boolean throttled() {
return startTime != -1;
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServerStub.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServerStub.java
index 2b5453b..31c5114 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServerStub.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServerStub.java
@@ -106,12 +106,6 @@
/** Waits for the server to close its end of the connection, with timeout. */
boolean awaitTermination(int time, TimeUnit unit) throws InterruptedException;
- /**
- * Cleanly closes the stream after implementation-speficied timeout, unless the stream is
- * aborted before the timeout is reached.
- */
- void closeAfterDefaultTimeout() throws InterruptedException;
-
/** Returns when the stream was opened. */
Instant startTime();
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarnessTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarnessTest.java
index 3682545..3451743 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarnessTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarnessTest.java
@@ -18,7 +18,7 @@
package org.apache.beam.runners.dataflow.worker;
import static org.junit.Assert.assertEquals;
-import static org.mockito.Matchers.anyString;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.doCallRealMethod;
import static org.mockito.Mockito.when;
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java
index 947d290..b96ee2f 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java
@@ -18,7 +18,7 @@
package org.apache.beam.runners.dataflow.worker;
import static org.junit.Assert.assertEquals;
-import static org.mockito.Matchers.anyString;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.doCallRealMethod;
import static org.mockito.Mockito.when;
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java
index 72c894a..66be62d 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java
@@ -18,6 +18,7 @@
package org.apache.beam.runners.dataflow.worker;
import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -82,4 +83,9 @@
assertThat(decoded, equalTo(descriptor));
assertThat(decoded.getUrl(), equalTo("some_test_url"));
}
+
+ @Test
+ public void testParseStatusApiDescriptor() throws TextFormat.ParseException {
+ assertNull(DataflowWorkerHarnessHelper.getStatusDescriptor());
+ }
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FakeWindmillServer.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FakeWindmillServer.java
index 2c27606..e788578 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FakeWindmillServer.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FakeWindmillServer.java
@@ -64,6 +64,8 @@
private final AtomicInteger expectedExceptionCount;
private final ErrorCollector errorCollector;
private boolean isReady = true;
+ private boolean dropStreamingCommits = false;
+ private final AtomicInteger droppedStreamingCommits;
public FakeWindmillServer(ErrorCollector errorCollector) {
workToOffer = new ConcurrentLinkedQueue<>();
@@ -73,6 +75,11 @@
expectedExceptionCount = new AtomicInteger();
this.errorCollector = errorCollector;
statsReceived = new ArrayList<>();
+ droppedStreamingCommits = new AtomicInteger();
+ }
+
+ public void setDropStreamingCommits(boolean dropStreamingCommits) {
+ this.dropStreamingCommits = dropStreamingCommits;
}
public void addWorkToOffer(Windmill.GetWorkResponse work) {
@@ -188,7 +195,12 @@
final CountDownLatch done = new CountDownLatch(1);
return new GetWorkStream() {
@Override
- public void closeAfterDefaultTimeout() {
+ public void close() {
+ done.countDown();
+ }
+
+ @Override
+ public boolean awaitTermination(int time, TimeUnit unit) throws InterruptedException {
while (done.getCount() > 0) {
Windmill.GetWorkResponse response = workToOffer.poll();
if (response == null) {
@@ -210,15 +222,6 @@
}
}
}
- }
-
- @Override
- public void close() {
- done.countDown();
- }
-
- @Override
- public boolean awaitTermination(int time, TimeUnit unit) throws InterruptedException {
return done.await(time, unit);
}
@@ -280,9 +283,6 @@
}
@Override
- public void closeAfterDefaultTimeout() {}
-
- @Override
public Instant startTime() {
return startTime;
}
@@ -303,9 +303,15 @@
errorCollector.checkThat(
request.getShardingKey(), allOf(greaterThan(0L), lessThan(Long.MAX_VALUE)));
errorCollector.checkThat(request.getCacheToken(), not(equalTo(0L)));
- commitsReceived.put(request.getWorkToken(), request);
- onDone.accept(Windmill.CommitStatus.OK);
- return true; // The request was accepted.
+ if (dropStreamingCommits) {
+ droppedStreamingCommits.incrementAndGet();
+ } else {
+ commitsReceived.put(request.getWorkToken(), request);
+ onDone.accept(Windmill.CommitStatus.OK);
+ }
+ // Return true to indicate the request was accepted even if we are dropping the commit
+ // to simulate a dropped commit.
+ return true;
}
@Override
@@ -320,9 +326,6 @@
}
@Override
- public void closeAfterDefaultTimeout() {}
-
- @Override
public Instant startTime() {
return startTime;
}
@@ -358,6 +361,15 @@
return commitsReceived;
}
+ public void waitForDroppedCommits(int droppedCommits) {
+ LOG.debug("waitForDroppedCommits: {}", droppedCommits);
+ int maxTries = 10;
+ while (maxTries-- > 0 && droppedStreamingCommits.get() < droppedCommits) {
+ Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
+ }
+ assertEquals(droppedCommits, droppedStreamingCommits.get());
+ }
+
public void setExpectedExceptionCount(int i) {
expectedExceptionCount.getAndAdd(i);
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFnTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFnTest.java
index 81d4a67..91380b8 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFnTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/FnApiWindowMappingFnTest.java
@@ -30,7 +30,7 @@
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.RegisterResponse;
import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
+import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.runners.core.construction.ParDoTranslation;
import org.apache.beam.runners.core.construction.SdkComponents;
import org.apache.beam.runners.fnexecution.control.InstructionRequestHandler;
@@ -62,7 +62,7 @@
public class FnApiWindowMappingFnTest {
private static final ApiServiceDescriptor DATA_SERVICE =
ApiServiceDescriptor.newBuilder().setUrl("test://data").build();
- private static final SdkFunctionSpec WINDOW_MAPPING_SPEC =
+ private static final FunctionSpec WINDOW_MAPPING_SPEC =
ParDoTranslation.translateWindowMappingFn(
new GlobalWindows().getDefaultWindowMappingFn(),
SdkComponents.create(PipelineOptionsFactory.create()));
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/HotKeyLoggerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/HotKeyLoggerTest.java
index 467034a..2728e26 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/HotKeyLoggerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/HotKeyLoggerTest.java
@@ -20,8 +20,8 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.verify;
import static org.powermock.api.mockito.PowerMockito.mock;
import static org.powermock.api.mockito.PowerMockito.mockStatic;
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
index 2cb5ada..c954bdb 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
@@ -2089,7 +2089,8 @@
"computation",
defaultMapTask(Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()))),
mockExecutor,
- ImmutableMap.of());
+ ImmutableMap.of(),
+ null);
ByteString key1 = ByteString.copyFromUtf8("key1");
ByteString key2 = ByteString.copyFromUtf8("key2");
@@ -2600,4 +2601,39 @@
assertThat(commit.getSerializedSize(), isWithinBundleSizeLimits);
}
+
+ @Test
+ public void testStuckCommit() throws Exception {
+ if (!streamingEngine) {
+ // Stuck commits have only been observed with streaming engine and thus recovery from them is
+ // not implemented for non-streaming engine.
+ return;
+ }
+
+ List<ParallelInstruction> instructions =
+ Arrays.asList(
+ makeSourceInstruction(StringUtf8Coder.of()),
+ makeSinkInstruction(StringUtf8Coder.of(), 0));
+
+ FakeWindmillServer server = new FakeWindmillServer(errorCollector);
+ StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
+ options.setStuckCommitDurationMillis(2000);
+ StreamingDataflowWorker worker = makeWorker(instructions, options, true /* publishCounters */);
+ worker.start();
+ // Prevent commit callbacks from being called to simulate a stuck commit.
+ server.setDropStreamingCommits(true);
+
+ // Add some work for key 1.
+ server.addWorkToOffer(makeInput(10, TimeUnit.MILLISECONDS.toMicros(2), keyStringForIndex(1)));
+ server.waitForDroppedCommits(1);
+ server.setDropStreamingCommits(false);
+ // Enqueue another work item for key 1.
+ server.addWorkToOffer(makeInput(1, TimeUnit.MILLISECONDS.toMicros(1)));
+ // Ensure that the second work item processes.
+ Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
+ worker.stop();
+
+ assertTrue(result.containsKey(1L));
+ assertEquals(makeExpectedOutput(1, TimeUnit.MILLISECONDS.toMicros(1)).build(), result.get(1L));
+ }
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/ElementCountMonitoringInfoToCounterUpdateTransformerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/ElementCountMonitoringInfoToCounterUpdateTransformerTest.java
index 24b2491..3970724 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/ElementCountMonitoringInfoToCounterUpdateTransformerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/ElementCountMonitoringInfoToCounterUpdateTransformerTest.java
@@ -17,8 +17,9 @@
*/
package org.apache.beam.runners.dataflow.worker.fn.control;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonAssert.assertEqualsAsJson;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.when;
@@ -56,7 +57,7 @@
mockSpecValidator, pcollectionNameMapping);
Optional<String> error = Optional.of("Error text");
when(mockSpecValidator.validate(any())).thenReturn(error);
- assertEquals(null, testObject.transform(null));
+ assertNull(testObject.transform(null));
}
@Test
@@ -85,7 +86,7 @@
new ElementCountMonitoringInfoToCounterUpdateTransformer(
mockSpecValidator, pcollectionNameMapping);
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
- assertEquals(null, testObject.transform(monitoringInfo));
+ assertNull(testObject.transform(monitoringInfo));
}
@Test
@@ -106,12 +107,12 @@
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
CounterUpdate result = testObject.transform(monitoringInfo);
- assertNotEquals(null, result);
+ assertNotNull(result);
- assertEquals(
- "{cumulative=true, integer={highBits=0, lowBits=0}, "
- + "nameAndKind={kind=SUM, "
- + "name=transformedValue-ElementCount}}",
- result.toString());
+ assertEqualsAsJson(
+ "{cumulative:true, integer:{highBits:0, lowBits:0}, "
+ + "nameAndKind:{kind:'SUM', "
+ + "name:'transformedValue-ElementCount'}}",
+ result);
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/FnApiMonitoringInfoToCounterUpdateTransformerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/FnApiMonitoringInfoToCounterUpdateTransformerTest.java
index 3dec6e1..e89d059 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/FnApiMonitoringInfoToCounterUpdateTransformerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/FnApiMonitoringInfoToCounterUpdateTransformerTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.dataflow.worker.fn.control;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.when;
@@ -86,6 +87,6 @@
CounterUpdate result = testObject.transform(monitoringInfo);
- assertSame(null, result);
+ assertNull(result);
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MSecMonitoringInfoToCounterUpdateTransformerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MSecMonitoringInfoToCounterUpdateTransformerTest.java
index 0dbedb7..95a65bc 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MSecMonitoringInfoToCounterUpdateTransformerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MSecMonitoringInfoToCounterUpdateTransformerTest.java
@@ -17,8 +17,10 @@
*/
package org.apache.beam.runners.dataflow.worker.fn.control;
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonAssert.assertEqualsAsJson;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -68,7 +70,7 @@
MonitoringInfo.newBuilder()
.setUrn("beam:metric:pardo_execution_time:start_bundle_msecs:v1:invalid")
.build();
- assertEquals(null, testObject.transform(monitoringInfo));
+ assertNull(testObject.transform(monitoringInfo));
}
@Test
@@ -142,14 +144,13 @@
CounterUpdate result = testObject.transform(monitoringInfo);
// Validate
- assertNotEquals(null, result);
-
- assertEquals(
- "{cumulative=true, integer={highBits=0, lowBits=0}, "
- + "structuredNameAndMetadata={metadata={kind=SUM}, "
- + "name={executionStepName=anyStageName, name=supportedCounter, origin=SYSTEM, "
- + "originalStepName=anyOriginalName}}}",
- result.toString());
+ assertNotNull(result);
+ assertEqualsAsJson(
+ "{cumulative:true, integer:{highBits:0, lowBits:0}, "
+ + "structuredNameAndMetadata:{metadata:{kind:'SUM'}, "
+ + "name:{executionStepName:'anyStageName', name:'supportedCounter', origin:'SYSTEM', "
+ + "originalStepName:'anyOriginalName'}}}",
+ result);
}
@Test
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MeanByteCountMonitoringInfoToCounterUpdateTransformerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MeanByteCountMonitoringInfoToCounterUpdateTransformerTest.java
index d554b07..e4efa1c 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MeanByteCountMonitoringInfoToCounterUpdateTransformerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/MeanByteCountMonitoringInfoToCounterUpdateTransformerTest.java
@@ -17,8 +17,9 @@
*/
package org.apache.beam.runners.dataflow.worker.fn.control;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonAssert.assertEqualsAsJson;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.when;
@@ -56,7 +57,7 @@
mockSpecValidator, pcollectionNameMapping);
Optional<String> error = Optional.of("Error text");
when(mockSpecValidator.validate(any())).thenReturn(error);
- assertEquals(null, testObject.transform(null));
+ assertNull(testObject.transform(null));
}
@Test
@@ -85,7 +86,7 @@
new MeanByteCountMonitoringInfoToCounterUpdateTransformer(
mockSpecValidator, pcollectionNameMapping);
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
- assertEquals(null, testObject.transform(monitoringInfo));
+ assertNull(testObject.transform(monitoringInfo));
}
@Test
@@ -107,12 +108,12 @@
CounterUpdate result = testObject.transform(monitoringInfo);
- assertNotEquals(null, result);
- assertEquals(
- "{cumulative=true, integerMean={count={highBits=0, lowBits=0}, "
- + "sum={highBits=0, lowBits=0}}, "
- + "nameAndKind={kind=MEAN, "
- + "name=transformedValue-MeanByteCount}}",
- result.toString());
+ assertNotNull(result);
+ assertEqualsAsJson(
+ "{cumulative:true, integerMean:{count:{highBits:0, lowBits:0}, "
+ + "sum:{highBits:0, lowBits:0}}, "
+ + "nameAndKind:{kind:'MEAN', "
+ + "name:'transformedValue-MeanByteCount'}}",
+ result);
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserDistributionMonitoringInfoToCounterUpdateTransformerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserDistributionMonitoringInfoToCounterUpdateTransformerTest.java
index 1d9c9f5..b0b4884 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserDistributionMonitoringInfoToCounterUpdateTransformerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserDistributionMonitoringInfoToCounterUpdateTransformerTest.java
@@ -17,8 +17,9 @@
*/
package org.apache.beam.runners.dataflow.worker.fn.control;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonAssert.assertEqualsAsJson;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -58,7 +59,7 @@
mockSpecValidator, stepContextMapping);
Optional<String> error = Optional.of("Error text");
when(mockSpecValidator.validate(any())).thenReturn(error);
- assertEquals(null, testObject.transform(null));
+ assertNull(testObject.transform(null));
}
@Test
@@ -89,7 +90,7 @@
new UserDistributionMonitoringInfoToCounterUpdateTransformer(
mockSpecValidator, stepContextMapping);
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
- assertEquals(null, testObject.transform(monitoringInfo));
+ assertNull(testObject.transform(monitoringInfo));
}
@Test
@@ -114,15 +115,15 @@
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
CounterUpdate result = testObject.transform(monitoringInfo);
- assertNotEquals(null, result);
+ assertNotNull(result);
- assertEquals(
- "{cumulative=true, distribution={count={highBits=0, lowBits=0}, "
- + "max={highBits=0, lowBits=0}, min={highBits=0, lowBits=0}, "
- + "sum={highBits=0, lowBits=0}}, "
- + "structuredNameAndMetadata={metadata={kind=DISTRIBUTION}, "
- + "name={name=anyName, origin=USER, originNamespace=anyNamespace, "
- + "originalStepName=anyOriginalName}}}",
- result.toString());
+ assertEqualsAsJson(
+ "{cumulative:true, distribution:{count:{highBits:0, lowBits:0}, "
+ + "max:{highBits:0, lowBits:0}, min:{highBits:0, lowBits:0}, "
+ + "sum:{highBits:0, lowBits:0}}, "
+ + "structuredNameAndMetadata:{metadata:{kind:'DISTRIBUTION'}, "
+ + "name:{name:'anyName', origin:'USER', originNamespace:'anyNamespace', "
+ + "originalStepName:'anyOriginalName'}}}",
+ result);
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserMonitoringInfoToCounterUpdateTransformerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserMonitoringInfoToCounterUpdateTransformerTest.java
index e2992f5..f24fba0 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserMonitoringInfoToCounterUpdateTransformerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/fn/control/UserMonitoringInfoToCounterUpdateTransformerTest.java
@@ -17,8 +17,9 @@
*/
package org.apache.beam.runners.dataflow.worker.fn.control;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonAssert.assertEqualsAsJson;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -57,7 +58,7 @@
new UserMonitoringInfoToCounterUpdateTransformer(mockSpecValidator, stepContextMapping);
Optional<String> error = Optional.of("Error text");
when(mockSpecValidator.validate(any())).thenReturn(error);
- assertEquals(null, testObject.transform(null));
+ assertNull(testObject.transform(null));
}
@Test
@@ -86,7 +87,7 @@
UserMonitoringInfoToCounterUpdateTransformer testObject =
new UserMonitoringInfoToCounterUpdateTransformer(mockSpecValidator, stepContextMapping);
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
- assertEquals(null, testObject.transform(monitoringInfo));
+ assertNull(testObject.transform(monitoringInfo));
}
@Test
@@ -110,13 +111,13 @@
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
CounterUpdate result = testObject.transform(monitoringInfo);
- assertNotEquals(null, result);
+ assertNotNull(result);
- assertEquals(
- "{cumulative=true, integer={highBits=0, lowBits=0}, "
- + "structuredNameAndMetadata={metadata={kind=SUM}, "
- + "name={name=anyName, origin=USER, originNamespace=anyNamespace, "
- + "originalStepName=anyOriginalName}}}",
- result.toString());
+ assertEqualsAsJson(
+ "{cumulative:true, integer:{highBits:0, lowBits:0}, "
+ + "structuredNameAndMetadata:{metadata:{kind:'SUM'}, "
+ + "name:{name:'anyName', origin:'USER', originNamespace:'anyNamespace', "
+ + "originalStepName:'anyOriginalName'}}}",
+ result);
}
}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodesTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodesTest.java
index 5cf303c..d1115c9 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodesTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/InsertFetchAndFilterStreamingSideInputNodesTest.java
@@ -30,8 +30,8 @@
import java.util.Objects;
import javax.annotation.Nullable;
import org.apache.beam.model.pipeline.v1.RunnerApi;
+import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.runners.core.construction.PTransformTranslation;
import org.apache.beam.runners.core.construction.ParDoTranslation;
import org.apache.beam.runners.core.construction.PipelineTranslation;
@@ -194,9 +194,9 @@
&& b instanceof FetchAndFilterStreamingSideInputsNode) {
FetchAndFilterStreamingSideInputsNode nodeA = (FetchAndFilterStreamingSideInputsNode) a;
FetchAndFilterStreamingSideInputsNode nodeB = (FetchAndFilterStreamingSideInputsNode) b;
- Map.Entry<PCollectionView<?>, SdkFunctionSpec> nodeAEntry =
+ Map.Entry<PCollectionView<?>, FunctionSpec> nodeAEntry =
Iterables.getOnlyElement(nodeA.getPCollectionViewsToWindowMappingFns().entrySet());
- Map.Entry<PCollectionView<?>, SdkFunctionSpec> nodeBEntry =
+ Map.Entry<PCollectionView<?>, FunctionSpec> nodeBEntry =
Iterables.getOnlyElement(nodeB.getPCollectionViewsToWindowMappingFns().entrySet());
return Objects.equals(
nodeAEntry.getKey().getTagInternal(), nodeBEntry.getKey().getTagInternal())
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/LengthPrefixUnknownCodersTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/LengthPrefixUnknownCodersTest.java
index ebe0d4e..a8b37b0 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/LengthPrefixUnknownCodersTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/LengthPrefixUnknownCodersTest.java
@@ -23,9 +23,14 @@
import static org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutput;
import static org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode;
import static org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forParallelInstruction;
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonAssert.assertEqualsAsJson;
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonMatcher.jsonOf;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotSame;
+import com.google.api.client.json.GenericJson;
import com.google.api.client.json.jackson.JacksonFactory;
import com.google.api.services.dataflow.model.InstructionOutput;
import com.google.api.services.dataflow.model.ParDoInstruction;
@@ -36,10 +41,8 @@
import com.google.api.services.dataflow.model.Source;
import com.google.api.services.dataflow.model.WriteInstruction;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.beam.runners.dataflow.util.CloudObject;
import org.apache.beam.runners.dataflow.util.CloudObjects;
import org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge;
@@ -105,7 +108,7 @@
public void testLengthPrefixUnknownCoders() throws Exception {
Map<String, Object> lengthPrefixedCoderCloudObject =
forCodec(CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null), false);
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/ null),
lengthPrefixedCoderCloudObject);
}
@@ -126,7 +129,7 @@
KvCoder.of(StringUtf8Coder.of(), LengthPrefixCoder.of(VarIntCoder.of())),
GlobalWindow.Coder.INSTANCE);
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(expectedCoder, /*sdkComponents=*/ null),
lengthPrefixedCoderCloudObject);
}
@@ -141,7 +144,7 @@
Map<String, Object> lengthPrefixedCoderCloudObject =
forCodec(CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null), true);
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedAndReplacedWindowedValueCoder, /*sdkComponents=*/ null),
lengthPrefixedCoderCloudObject);
}
@@ -153,11 +156,11 @@
output.setFactory(new JacksonFactory());
InstructionOutput prefixedOutput = forInstructionOutput(output, false);
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/ null),
prefixedOutput.getCodec());
// Should not mutate the instruction.
- assertEquals(
+ assertEqualsAsJson(
output.getCodec(), CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null));
}
@@ -170,11 +173,11 @@
instruction.setRead(readInstruction);
ParallelInstruction prefixedInstruction = forParallelInstruction(instruction, false);
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/ null),
prefixedInstruction.getRead().getSource().getCodec());
// Should not mutate the instruction.
- assertEquals(
+ assertEqualsAsJson(
readInstruction.getSource().getCodec(),
CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null));
}
@@ -188,11 +191,11 @@
instruction.setWrite(writeInstruction);
ParallelInstruction prefixedInstruction = forParallelInstruction(instruction, false);
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/ null),
prefixedInstruction.getWrite().getSink().getCodec());
// Should not mutate the instruction.
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null),
writeInstruction.getSink().getCodec());
}
@@ -208,11 +211,11 @@
instruction.setParDo(parDo);
ParallelInstruction prefixedInstruction = forParallelInstruction(instruction, false);
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/ null),
prefixedInstruction.getParDo().getUserFn().get(WorkerPropertyNames.INPUT_CODER));
// Should not mutate the instruction.
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null),
parDo.getUserFn().get(WorkerPropertyNames.INPUT_CODER));
}
@@ -226,7 +229,7 @@
}
@Test
- public void testLengthPrefixAndReplaceForRunnerNetwork() {
+ public void testLengthPrefixAndReplaceForRunnerNetwork() throws Exception {
Node readNode = createReadNode("Read", "Source", windowedValueCoder);
Edge readNodeEdge = DefaultEdge.create();
Node readNodeOut = createInstructionOutputNode("Read.out", windowedValueCoder);
@@ -243,7 +246,7 @@
MutableNetwork<Node, Edge> prefixedNetwork = andReplaceForRunnerNetwork(network);
- Set prefixedInstructions = new HashSet<>();
+ ImmutableSet.Builder<GenericJson> prefixedInstructions = ImmutableSet.builder();
for (Node node : prefixedNetwork.nodes()) {
if (node instanceof ParallelInstructionNode) {
prefixedInstructions.add(((ParallelInstructionNode) node).getParallelInstruction());
@@ -252,11 +255,11 @@
}
}
- Set expectedInstructions =
- ImmutableSet.of(
- prefixedReadNode.getParallelInstruction(), prefixedReadNodeOut.getInstructionOutput());
-
- assertEquals(expectedInstructions, prefixedInstructions);
+ assertThat(
+ prefixedInstructions.build(),
+ containsInAnyOrder(
+ jsonOf(prefixedReadNodeOut.getInstructionOutput()),
+ jsonOf(prefixedReadNode.getParallelInstruction())));
}
@Test
@@ -265,7 +268,7 @@
network.addNode(instructionOutputNode);
network.addNode(grpcPortNode);
network.addEdge(grpcPortNode, instructionOutputNode, DefaultEdge.create());
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/ null),
((InstructionOutputNode) forInstructionOutputNode(network).apply(instructionOutputNode))
.getInstructionOutput()
@@ -278,7 +281,7 @@
network.addNode(instructionOutputNode);
network.addNode(grpcPortNode);
network.addEdge(instructionOutputNode, grpcPortNode, DefaultEdge.create());
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/ null),
((InstructionOutputNode) forInstructionOutputNode(network).apply(instructionOutputNode))
.getInstructionOutput()
@@ -292,7 +295,7 @@
network.addNode(instructionOutputNode);
network.addNode(readNode);
network.addEdge(readNode, instructionOutputNode, DefaultEdge.create());
- assertEquals(
+ assertEqualsAsJson(
CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null),
((InstructionOutputNode) forInstructionOutputNode(network).apply(instructionOutputNode))
.getInstructionOutput()
@@ -306,7 +309,7 @@
ImmutableList.of(
createSideInputInfosWithCoders(windowedValueCoder, prefixedWindowedValueCoder)),
false);
- assertEquals(
+ assertEqualsAsJson(
ImmutableList.of(
createSideInputInfosWithCoders(prefixedWindowedValueCoder, prefixedWindowedValueCoder)),
prefixedSideInputInfos);
@@ -316,7 +319,7 @@
ImmutableList.of(
createSideInputInfosWithCoders(windowedValueCoder, prefixedWindowedValueCoder)),
true);
- assertEquals(
+ assertEqualsAsJson(
ImmutableList.of(
createSideInputInfosWithCoders(
prefixedAndReplacedWindowedValueCoder, prefixedAndReplacedWindowedValueCoder)),
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/NodesTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/NodesTest.java
index 3888103..d87ec98 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/NodesTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/graph/NodesTest.java
@@ -28,7 +28,6 @@
import org.apache.beam.model.fnexecution.v1.BeamFnApi;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.runners.dataflow.worker.DataflowPortabilityPCollectionView;
import org.apache.beam.runners.dataflow.worker.NameContextsForTests;
import org.apache.beam.runners.dataflow.worker.counters.NameContext;
@@ -146,12 +145,10 @@
@Test
public void testFetchReadySideInputsAndFilterBlockedStreamingSideInputsNode() {
WindowingStrategy windowingStrategy = WindowingStrategy.globalDefault();
- Map<PCollectionView<?>, RunnerApi.SdkFunctionSpec> pcollectionViewsToWindowMappingFns =
+ Map<PCollectionView<?>, RunnerApi.FunctionSpec> pcollectionViewsToWindowMappingFns =
ImmutableMap.of(
mock(PCollectionView.class),
- SdkFunctionSpec.newBuilder()
- .setSpec(FunctionSpec.newBuilder().setUrn("beam:test:urn:1.0"))
- .build());
+ FunctionSpec.newBuilder().setUrn("beam:test:urn:1.0").build());
NameContext nameContext = NameContextsForTests.nameContextForTest();
assertSame(
FetchAndFilterStreamingSideInputsNode.create(
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonAssert.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonAssert.java
new file mode 100644
index 0000000..f4e7268
--- /dev/null
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonAssert.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker.testing;
+
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import java.io.IOException;
+import org.json.JSONException;
+import org.skyscreamer.jsonassert.JSONAssert;
+
+/** Assertions on {@link GenericJson} class. */
+public class GenericJsonAssert {
+
+ private static final JacksonFactory jacksonFactory = JacksonFactory.getDefaultInstance();
+
+ /**
+ * Asserts that {@code actual} has the same JSON representation as {@code expected}.
+ *
+ * @param expected expected JSON string, {@link GenericJson}, {@link java.util.Map}, or {@link
+ * Iterable} of {@link GenericJson}.
+ * @param actual actual object to compare its JSON representation.
+ */
+ public static void assertEqualsAsJson(Object expected, Object actual) {
+
+ try {
+ String expectedJsonText =
+ expected instanceof String ? (String) expected : jacksonFactory.toString(expected);
+ String actualJsonText = jacksonFactory.toString(actual);
+ JSONAssert.assertEquals(expectedJsonText, actualJsonText, true);
+ } catch (JSONException ex) {
+ throw new IllegalArgumentException("Could not parse JSON", ex);
+ } catch (IOException ex) {
+ throw new IllegalArgumentException("Could not generate JSON text", ex);
+ }
+ }
+}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonMatcher.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonMatcher.java
new file mode 100644
index 0000000..e576a33
--- /dev/null
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonMatcher.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker.testing;
+
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import java.io.IOException;
+import org.hamcrest.Description;
+import org.hamcrest.TypeSafeMatcher;
+import org.json.JSONException;
+import org.skyscreamer.jsonassert.JSONCompare;
+import org.skyscreamer.jsonassert.JSONCompareMode;
+import org.skyscreamer.jsonassert.JSONCompareResult;
+
+/**
+ * Matcher to compare {@link GenericJson}s using JSONassert's {@link JSONCompare}. This matcher does
+ * not rely on {@link GenericJson#equals(Object)}, which may use fields irrelevant to JSON values.
+ */
+public final class GenericJsonMatcher extends TypeSafeMatcher<GenericJson> {
+
+ private String expectedJsonText;
+
+ private static final JacksonFactory jacksonFactory = JacksonFactory.getDefaultInstance();
+
+ private GenericJsonMatcher(GenericJson expected) {
+ try {
+ expectedJsonText = jacksonFactory.toString(expected);
+ } catch (IOException ex) {
+ throw new IllegalArgumentException("Could not parse JSON", ex);
+ }
+ }
+
+ public static GenericJsonMatcher jsonOf(GenericJson genericJson) {
+ return new GenericJsonMatcher(genericJson);
+ }
+
+ @Override
+ protected boolean matchesSafely(GenericJson actual) {
+ try {
+ String actualJsonText = jacksonFactory.toString(actual);
+ JSONCompareResult result =
+ JSONCompare.compareJSON(expectedJsonText, actualJsonText, JSONCompareMode.STRICT);
+ return result.passed();
+ } catch (IOException | JSONException ex) {
+ throw new IllegalArgumentException("Could not parse JSON", ex);
+ }
+ }
+
+ @Override
+ public void describeTo(Description description) {
+ description.appendText(expectedJsonText);
+ }
+}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonMatcherTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonMatcherTest.java
new file mode 100644
index 0000000..cac9fe3
--- /dev/null
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/testing/GenericJsonMatcherTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker.testing;
+
+import static org.apache.beam.runners.dataflow.worker.testing.GenericJsonMatcher.jsonOf;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import com.google.api.client.json.GenericJson;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link GenericJsonMatcher}. */
+@RunWith(JUnit4.class)
+public class GenericJsonMatcherTest {
+
+ @Test
+ public void testMatch() {
+ GenericJson expected = new GenericJson();
+ expected.set("foo", "bar");
+ GenericJson actual = new GenericJson();
+ actual.set("foo", "bar");
+
+ assertThat(expected, is(jsonOf(actual)));
+ }
+
+ @Test
+ public void testMatchFailure() {
+ GenericJson expected = new GenericJson();
+ expected.set("foo", "expected");
+ GenericJson actual = new GenericJson();
+ actual.set("foo", "actual");
+
+ try {
+ assertThat(actual, is(jsonOf(expected)));
+ } catch (AssertionError ex) {
+ assertEquals(
+ "\nExpected: is {\"foo\":\"expected\"}\n but: was <{foo=actual}>", ex.getMessage());
+
+ // pass
+ return;
+ }
+ fail("The difference in JSON should raise AssertionError");
+ }
+}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/BatchingShuffleEntryReaderTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/BatchingShuffleEntryReaderTest.java
index 9ee670b..faa4b05 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/BatchingShuffleEntryReaderTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/BatchingShuffleEntryReaderTest.java
@@ -18,9 +18,9 @@
package org.apache.beam.runners.dataflow.worker.util.common.worker;
import static com.google.api.client.util.Lists.newArrayList;
+import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertThat;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.when;
@@ -68,8 +68,9 @@
ArrayList<ShuffleEntry> entries = new ArrayList<>();
entries.add(e1);
entries.add(e2);
+ long batchSize = (long) e1.length() + e2.length();
when(batchReader.read(START_POSITION, END_POSITION))
- .thenReturn(new ShuffleBatchReader.Batch(entries, null));
+ .thenReturn(new ShuffleBatchReader.Batch(entries, null, batchSize));
List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
assertThat(results, contains(e1, e2));
}
@@ -81,8 +82,9 @@
ArrayList<ShuffleEntry> entries = new ArrayList<>();
entries.add(e1);
entries.add(e2);
+ long batchSize = (long) e1.length() + e2.length();
when(batchReader.read(START_POSITION, END_POSITION))
- .thenReturn(new ShuffleBatchReader.Batch(entries, null));
+ .thenReturn(new ShuffleBatchReader.Batch(entries, null, batchSize));
Reiterator<ShuffleEntry> it = reader.read(START_POSITION, END_POSITION);
assertThat(it.hasNext(), equalTo(Boolean.TRUE));
assertThat(it.next(), equalTo(e1));
@@ -102,9 +104,9 @@
ShuffleEntry e2 = new ShuffleEntry(KEY, SKEY, VALUE);
List<ShuffleEntry> e2s = Collections.singletonList(e2);
when(batchReader.read(START_POSITION, END_POSITION))
- .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION));
+ .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION, e1.length()));
when(batchReader.read(NEXT_START_POSITION, END_POSITION))
- .thenReturn(new ShuffleBatchReader.Batch(e2s, null));
+ .thenReturn(new ShuffleBatchReader.Batch(e2s, null, e2.length()));
List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
assertThat(results, contains(e1, e2));
@@ -120,11 +122,11 @@
ShuffleEntry e3 = new ShuffleEntry(KEY, SKEY, VALUE);
List<ShuffleEntry> e3s = Collections.singletonList(e3);
when(batchReader.read(START_POSITION, END_POSITION))
- .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION));
+ .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION, 0));
when(batchReader.read(NEXT_START_POSITION, END_POSITION))
- .thenReturn(new ShuffleBatchReader.Batch(e2s, SECOND_NEXT_START_POSITION));
+ .thenReturn(new ShuffleBatchReader.Batch(e2s, SECOND_NEXT_START_POSITION, 0));
when(batchReader.read(SECOND_NEXT_START_POSITION, END_POSITION))
- .thenReturn(new ShuffleBatchReader.Batch(e3s, null));
+ .thenReturn(new ShuffleBatchReader.Batch(e3s, null, e3.length()));
List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
assertThat(results, contains(e3));
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReaderTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReaderTest.java
index 88db3b2..27dbc1d 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReaderTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/CachingShuffleBatchReaderTest.java
@@ -17,9 +17,9 @@
*/
package org.apache.beam.runners.dataflow.worker.util.common.worker;
+import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
-import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@@ -37,7 +37,7 @@
@RunWith(JUnit4.class)
public final class CachingShuffleBatchReaderTest {
private final ShuffleBatchReader.Batch testBatch =
- new ShuffleBatchReader.Batch(new ArrayList<ShuffleEntry>(), null);
+ new ShuffleBatchReader.Batch(new ArrayList<ShuffleEntry>(), null, 0);
@Test
public void readerShouldCacheReads() throws IOException {
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/BundleSplitHandler.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/BundleSplitHandler.java
new file mode 100644
index 0000000..cb03238
--- /dev/null
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/BundleSplitHandler.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.fnexecution.control;
+
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitResponse;
+
+/**
+ * A handler which is invoked whenever an active bundle is split. The active bundle will continue to
+ * keep processing until it is complete.
+ *
+ * <p>The returned split response contains a description of work that has been performed containing
+ * a {@code primary} portion that the SDK is responsible for processing and a {@code residual} which
+ * the runner is responsible for scheduling for future processing. See <a
+ * href="https://s.apache.org/beam-breaking-fusion">breaking the fusion barrier</a> for further
+ * details.
+ */
+public interface BundleSplitHandler {
+ void split(ProcessBundleSplitResponse splitResponse);
+
+ /** Returns a bundle split handler that throws on any split response. */
+ static BundleSplitHandler unsupported() {
+ return new BundleSplitHandler() {
+ @Override
+ public void split(ProcessBundleSplitResponse splitResponse) {
+ throw new UnsupportedOperationException(
+ String.format(
+ "%s does not support splitting.", BundleSplitHandler.class.getSimpleName()));
+ }
+ };
+ };
+}
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactory.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactory.java
index a3f6f01..006790f 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactory.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactory.java
@@ -19,9 +19,12 @@
import com.google.auto.value.AutoValue;
import java.io.IOException;
+import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.concurrent.ThreadSafe;
@@ -58,6 +61,7 @@
import org.apache.beam.sdk.fn.data.FnDataReceiver;
import org.apache.beam.sdk.fn.stream.OutboundObserverFactory;
import org.apache.beam.sdk.function.ThrowingFunction;
+import org.apache.beam.sdk.options.ExperimentalOptions;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PortablePipelineOptions;
import org.apache.beam.sdk.options.PortablePipelineOptions.RetrievalServiceType;
@@ -88,12 +92,16 @@
private final String factoryId = factoryIdGenerator.getId();
private final ImmutableList<LoadingCache<Environment, WrappedSdkHarnessClient>> environmentCaches;
- private final AtomicInteger stageBundleCount = new AtomicInteger();
+ private final AtomicInteger stageBundleFactoryCount = new AtomicInteger();
private final Map<String, EnvironmentFactory.Provider> environmentFactoryProviderMap;
private final ExecutorService executor;
private final MapControlClientPool clientPool;
private final IdGenerator stageIdGenerator;
private final int environmentExpirationMillis;
+ private final Semaphore availableCachesSemaphore;
+ private final LinkedBlockingDeque<LoadingCache<Environment, WrappedSdkHarnessClient>>
+ availableCaches;
+ private final boolean loadBalanceBundles;
public static DefaultJobBundleFactory create(JobInfo jobInfo) {
PipelineOptions pipelineOptions =
@@ -124,10 +132,13 @@
this.clientPool = MapControlClientPool.create();
this.stageIdGenerator = () -> factoryId + "-" + stageIdSuffixGenerator.getId();
this.environmentExpirationMillis = getEnvironmentExpirationMillis(jobInfo);
+ this.loadBalanceBundles = shouldLoadBalanceBundles(jobInfo);
this.environmentCaches =
createEnvironmentCaches(
serverFactory -> createServerInfo(jobInfo, serverFactory),
getMaxEnvironmentClients(jobInfo));
+ this.availableCachesSemaphore = new Semaphore(environmentCaches.size(), true);
+ this.availableCaches = new LinkedBlockingDeque<>(environmentCaches);
}
@VisibleForTesting
@@ -141,8 +152,11 @@
this.clientPool = MapControlClientPool.create();
this.stageIdGenerator = stageIdGenerator;
this.environmentExpirationMillis = getEnvironmentExpirationMillis(jobInfo);
+ this.loadBalanceBundles = shouldLoadBalanceBundles(jobInfo);
this.environmentCaches =
createEnvironmentCaches(serverFactory -> serverInfo, getMaxEnvironmentClients(jobInfo));
+ this.availableCachesSemaphore = new Semaphore(environmentCaches.size(), true);
+ this.availableCaches = new LinkedBlockingDeque<>(environmentCaches);
}
private ImmutableList<LoadingCache<Environment, WrappedSdkHarnessClient>> createEnvironmentCaches(
@@ -211,6 +225,26 @@
return maxEnvironments;
}
+ private static boolean shouldLoadBalanceBundles(JobInfo jobInfo) {
+ PipelineOptions pipelineOptions =
+ PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions());
+ boolean loadBalanceBundles =
+ pipelineOptions.as(PortablePipelineOptions.class).getLoadBalanceBundles();
+ if (loadBalanceBundles) {
+ int stateCacheSize =
+ Integer.parseInt(
+ MoreObjects.firstNonNull(
+ ExperimentalOptions.getExperimentValue(
+ pipelineOptions, ExperimentalOptions.STATE_CACHE_SIZE),
+ "0"));
+ Preconditions.checkArgument(
+ stateCacheSize == 0,
+ "%s must be 0 when using bundle load balancing",
+ ExperimentalOptions.STATE_CACHE_SIZE);
+ }
+ return loadBalanceBundles;
+ }
+
@Override
public StageBundleFactory forStage(ExecutableStage executableStage) {
return new SimpleStageBundleFactory(executableStage);
@@ -227,6 +261,58 @@
executor.shutdown();
}
+ private static ImmutableMap.Builder<String, RemoteOutputReceiver<?>> getOutputReceivers(
+ ExecutableProcessBundleDescriptor processBundleDescriptor,
+ OutputReceiverFactory outputReceiverFactory) {
+ ImmutableMap.Builder<String, RemoteOutputReceiver<?>> outputReceivers = ImmutableMap.builder();
+ for (Map.Entry<String, Coder> remoteOutputCoder :
+ processBundleDescriptor.getRemoteOutputCoders().entrySet()) {
+ String outputTransform = remoteOutputCoder.getKey();
+ Coder coder = remoteOutputCoder.getValue();
+ String bundleOutputPCollection =
+ Iterables.getOnlyElement(
+ processBundleDescriptor
+ .getProcessBundleDescriptor()
+ .getTransformsOrThrow(outputTransform)
+ .getInputsMap()
+ .values());
+ FnDataReceiver outputReceiver = outputReceiverFactory.create(bundleOutputPCollection);
+ outputReceivers.put(outputTransform, RemoteOutputReceiver.of(coder, outputReceiver));
+ }
+ return outputReceivers;
+ }
+
+ private static class PreparedClient {
+ private BundleProcessor processor;
+ private ExecutableProcessBundleDescriptor processBundleDescriptor;
+ private WrappedSdkHarnessClient wrappedClient;
+ }
+
+ private PreparedClient prepare(
+ WrappedSdkHarnessClient wrappedClient, ExecutableStage executableStage) {
+ PreparedClient preparedClient = new PreparedClient();
+ try {
+ preparedClient.wrappedClient = wrappedClient;
+ preparedClient.processBundleDescriptor =
+ ProcessBundleDescriptors.fromExecutableStage(
+ stageIdGenerator.getId(),
+ executableStage,
+ wrappedClient.getServerInfo().getDataServer().getApiServiceDescriptor(),
+ wrappedClient.getServerInfo().getStateServer().getApiServiceDescriptor());
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to create ProcessBundleDescriptor.", e);
+ }
+
+ preparedClient.processor =
+ wrappedClient
+ .getClient()
+ .getProcessor(
+ preparedClient.processBundleDescriptor.getProcessBundleDescriptor(),
+ preparedClient.processBundleDescriptor.getRemoteInputDestinations(),
+ wrappedClient.getServerInfo().getStateServer().getService());
+ return preparedClient;
+ }
+
/**
* A {@link StageBundleFactory} for remotely processing bundles that supports environment
* expiration.
@@ -235,37 +321,16 @@
private final ExecutableStage executableStage;
private final int environmentIndex;
- private BundleProcessor processor;
- private ExecutableProcessBundleDescriptor processBundleDescriptor;
- private WrappedSdkHarnessClient wrappedClient;
+ private final HashMap<WrappedSdkHarnessClient, PreparedClient> preparedClients = new HashMap();
+ private PreparedClient currentClient;
private SimpleStageBundleFactory(ExecutableStage executableStage) {
this.executableStage = executableStage;
- this.environmentIndex = stageBundleCount.getAndIncrement() % environmentCaches.size();
- prepare(
- environmentCaches.get(environmentIndex).getUnchecked(executableStage.getEnvironment()));
- }
-
- private void prepare(WrappedSdkHarnessClient wrappedClient) {
- try {
- this.wrappedClient = wrappedClient;
- this.processBundleDescriptor =
- ProcessBundleDescriptors.fromExecutableStage(
- stageIdGenerator.getId(),
- executableStage,
- wrappedClient.getServerInfo().getDataServer().getApiServiceDescriptor(),
- wrappedClient.getServerInfo().getStateServer().getApiServiceDescriptor());
- } catch (IOException e) {
- throw new RuntimeException("Failed to create ProcessBundleDescriptor.", e);
- }
-
- this.processor =
- wrappedClient
- .getClient()
- .getProcessor(
- processBundleDescriptor.getProcessBundleDescriptor(),
- processBundleDescriptor.getRemoteInputDestinations(),
- wrappedClient.getServerInfo().getStateServer().getService());
+ this.environmentIndex = stageBundleFactoryCount.getAndIncrement() % environmentCaches.size();
+ WrappedSdkHarnessClient client =
+ environmentCaches.get(environmentIndex).getUnchecked(executableStage.getEnvironment());
+ this.currentClient = prepare(client, executableStage);
+ this.preparedClients.put(client, currentClient);
}
@Override
@@ -276,38 +341,53 @@
throws Exception {
// TODO: Consider having BundleProcessor#newBundle take in an OutputReceiverFactory rather
// than constructing the receiver map here. Every bundle factory will need this.
- ImmutableMap.Builder<String, RemoteOutputReceiver<?>> outputReceivers =
- ImmutableMap.builder();
- for (Map.Entry<String, Coder> remoteOutputCoder :
- processBundleDescriptor.getRemoteOutputCoders().entrySet()) {
- String outputTransform = remoteOutputCoder.getKey();
- Coder coder = remoteOutputCoder.getValue();
- String bundleOutputPCollection =
- Iterables.getOnlyElement(
- processBundleDescriptor
- .getProcessBundleDescriptor()
- .getTransformsOrThrow(outputTransform)
- .getInputsMap()
- .values());
- FnDataReceiver outputReceiver = outputReceiverFactory.create(bundleOutputPCollection);
- outputReceivers.put(outputTransform, RemoteOutputReceiver.of(coder, outputReceiver));
+
+ if (environmentExpirationMillis == 0 && !loadBalanceBundles) {
+ return currentClient.processor.newBundle(
+ getOutputReceivers(currentClient.processBundleDescriptor, outputReceiverFactory)
+ .build(),
+ stateRequestHandler,
+ progressHandler);
}
- if (environmentExpirationMillis == 0) {
- return processor.newBundle(outputReceivers.build(), stateRequestHandler, progressHandler);
- }
+ final LoadingCache<Environment, WrappedSdkHarnessClient> currentCache;
+ if (loadBalanceBundles) {
+ // The semaphore is used to ensure fairness, i.e. first stop first go.
+ availableCachesSemaphore.acquire();
+ // The blocking queue of caches for serving multiple bundles concurrently.
+ currentCache = availableCaches.take();
+ WrappedSdkHarnessClient client =
+ currentCache.getUnchecked(executableStage.getEnvironment());
+ client.ref();
- final WrappedSdkHarnessClient client =
- environmentCaches.get(environmentIndex).getUnchecked(executableStage.getEnvironment());
- client.ref();
+ currentClient = preparedClients.get(client);
+ if (currentClient == null) {
+ // we are using this client for the first time
+ preparedClients.put(client, currentClient = prepare(client, executableStage));
+ // cleanup any expired clients
+ preparedClients.keySet().removeIf(c -> c.bundleRefCount.get() == 0);
+ }
- if (client != wrappedClient) {
- // reset after environment expired
- prepare(client);
+ } else {
+ currentCache = environmentCaches.get(environmentIndex);
+ WrappedSdkHarnessClient client =
+ currentCache.getUnchecked(executableStage.getEnvironment());
+ client.ref();
+
+ if (currentClient.wrappedClient != client) {
+ // reset after environment expired
+ preparedClients.clear();
+ currentClient = prepare(client, executableStage);
+ preparedClients.put(client, currentClient);
+ }
}
final RemoteBundle bundle =
- processor.newBundle(outputReceivers.build(), stateRequestHandler, progressHandler);
+ currentClient.processor.newBundle(
+ getOutputReceivers(currentClient.processBundleDescriptor, outputReceiverFactory)
+ .build(),
+ stateRequestHandler,
+ progressHandler);
return new RemoteBundle() {
@Override
public String getId() {
@@ -320,22 +400,31 @@
}
@Override
+ public void split(double fractionOfRemainder) {
+ bundle.split(fractionOfRemainder);
+ }
+
+ @Override
public void close() throws Exception {
bundle.close();
- client.unref();
+ currentClient.wrappedClient.unref();
+ if (loadBalanceBundles) {
+ availableCaches.offer(currentCache);
+ availableCachesSemaphore.release();
+ }
}
};
}
@Override
public ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
- return processBundleDescriptor;
+ return currentClient.processBundleDescriptor;
}
@Override
public void close() throws Exception {
// Clear reference to encourage cache eviction. Values are weakly referenced.
- wrappedClient = null;
+ preparedClients.clear();
}
}
@@ -344,7 +433,7 @@
* now, there is a 1:1 relationship between data services and harness clients. The servers are
* packaged here to tie server lifetimes to harness client lifetimes.
*/
- protected static class WrappedSdkHarnessClient implements AutoCloseable {
+ protected static class WrappedSdkHarnessClient {
private final RemoteEnvironment environment;
private final SdkHarnessClient client;
@@ -374,17 +463,24 @@
return serverInfo;
}
- @Override
- public void close() throws Exception {
- try (AutoCloseable envCloser = environment) {
- // Wrap resources in try-with-resources to ensure all are cleaned up.
- }
- try (AutoCloseable stateServer = serverInfo.getStateServer();
+ public void close() {
+ // DO NOT ADD ANYTHING HERE WHICH MIGHT CAUSE THE BLOCK BELOW TO NOT BE EXECUTED.
+ // If we exit prematurely (e.g. due to an exception), resources won't be cleaned up properly.
+ // Please make an AutoCloseable and add it to the try statement below.
+ try (AutoCloseable envCloser = environment;
+ AutoCloseable stateServer = serverInfo.getStateServer();
AutoCloseable dateServer = serverInfo.getDataServer();
AutoCloseable controlServer = serverInfo.getControlServer();
AutoCloseable loggingServer = serverInfo.getLoggingServer();
AutoCloseable retrievalServer = serverInfo.getRetrievalServer();
- AutoCloseable provisioningServer = serverInfo.getProvisioningServer()) {}
+ AutoCloseable provisioningServer = serverInfo.getProvisioningServer()) {
+ // Wrap resources in try-with-resources to ensure all are cleaned up.
+ // This will close _all_ of these even in the presence of exceptions.
+ // The first exception encountered will be the base exception,
+ // the next one will be added via Throwable#addSuppressed.
+ } catch (Exception e) {
+ LOG.warn("Error cleaning up servers {}", environment.getEnvironment(), e);
+ }
// TODO: Wait for executor shutdown?
}
@@ -397,11 +493,7 @@
if (count == 0) {
// Close environment after it was removed from cache and all bundles finished.
LOG.info("Closing environment {}", environment.getEnvironment());
- try {
- close();
- } catch (Exception e) {
- LOG.warn("Error cleaning up environment {}", environment.getEnvironment(), e);
- }
+ close();
}
return count;
}
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/ProcessBundleDescriptors.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/ProcessBundleDescriptors.java
index d4932a9..cd81c0a 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/ProcessBundleDescriptors.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/ProcessBundleDescriptors.java
@@ -122,10 +122,12 @@
// The order of these does not matter.
inputDestinationsBuilder.put(
stage.getInputPCollection().getId(),
- addStageInput(dataEndpoint, stage.getInputPCollection(), components));
+ addStageInput(
+ dataEndpoint, stage.getInputPCollection(), components, stage.getWireCoderSetting()));
remoteOutputCodersBuilder.putAll(
- addStageOutputs(dataEndpoint, stage.getOutputPCollections(), components));
+ addStageOutputs(
+ dataEndpoint, stage.getOutputPCollections(), components, stage.getWireCoderSetting()));
Map<String, Map<String, SideInputSpec>> sideInputSpecs = addSideInputs(stage, components);
@@ -189,11 +191,13 @@
private static Map<String, Coder<WindowedValue<?>>> addStageOutputs(
ApiServiceDescriptor dataEndpoint,
Collection<PCollectionNode> outputPCollections,
- Components.Builder components)
+ Components.Builder components,
+ RunnerApi.WireCoderSetting wireCoderSetting)
throws IOException {
Map<String, Coder<WindowedValue<?>>> remoteOutputCoders = new LinkedHashMap<>();
for (PCollectionNode outputPCollection : outputPCollections) {
- OutputEncoding outputEncoding = addStageOutput(dataEndpoint, components, outputPCollection);
+ OutputEncoding outputEncoding =
+ addStageOutput(dataEndpoint, components, outputPCollection, wireCoderSetting);
remoteOutputCoders.put(outputEncoding.getPTransformId(), outputEncoding.getCoder());
}
return remoteOutputCoders;
@@ -202,12 +206,16 @@
private static RemoteInputDestination<WindowedValue<?>> addStageInput(
ApiServiceDescriptor dataEndpoint,
PCollectionNode inputPCollection,
- Components.Builder components)
+ Components.Builder components,
+ RunnerApi.WireCoderSetting wireCoderSetting)
throws IOException {
- String inputWireCoderId = WireCoders.addSdkWireCoder(inputPCollection, components);
+ String inputWireCoderId =
+ WireCoders.addSdkWireCoder(inputPCollection, components, wireCoderSetting);
@SuppressWarnings("unchecked")
Coder<WindowedValue<?>> wireCoder =
- (Coder) WireCoders.instantiateRunnerWireCoder(inputPCollection, components.build());
+ (Coder)
+ WireCoders.instantiateRunnerWireCoder(
+ inputPCollection, components.build(), wireCoderSetting);
RemoteGrpcPort inputPort =
RemoteGrpcPort.newBuilder()
@@ -226,12 +234,16 @@
private static OutputEncoding addStageOutput(
ApiServiceDescriptor dataEndpoint,
Components.Builder components,
- PCollectionNode outputPCollection)
+ PCollectionNode outputPCollection,
+ RunnerApi.WireCoderSetting wireCoderSetting)
throws IOException {
- String outputWireCoderId = WireCoders.addSdkWireCoder(outputPCollection, components);
+ String outputWireCoderId =
+ WireCoders.addSdkWireCoder(outputPCollection, components, wireCoderSetting);
@SuppressWarnings("unchecked")
Coder<WindowedValue<?>> wireCoder =
- (Coder) WireCoders.instantiateRunnerWireCoder(outputPCollection, components.build());
+ (Coder)
+ WireCoders.instantiateRunnerWireCoder(
+ outputPCollection, components.build(), wireCoderSetting);
RemoteGrpcPort outputPort =
RemoteGrpcPort.newBuilder()
.setApiServiceDescriptor(dataEndpoint)
@@ -385,7 +397,8 @@
addStageInput(
dataEndpoint,
PipelineNode.pCollection(inputTimerPCollectionId, timerCollectionSpec),
- components));
+ components,
+ stage.getWireCoderSetting()));
String outputTimerPCollectionId =
SyntheticComponents.uniqueId(
String.format(
@@ -397,7 +410,8 @@
addStageOutput(
dataEndpoint,
components,
- PipelineNode.pCollection(outputTimerPCollectionId, timerCollectionSpec));
+ PipelineNode.pCollection(outputTimerPCollectionId, timerCollectionSpec),
+ stage.getWireCoderSetting());
outputTransformCodersBuilder.put(outputEncoding.getPTransformId(), outputEncoding.getCoder());
components.putTransforms(
timerReference.transform().getId(),
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/RemoteBundle.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/RemoteBundle.java
index 30fea85..cd7ac11 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/RemoteBundle.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/RemoteBundle.java
@@ -39,6 +39,15 @@
Map<String, FnDataReceiver> getInputReceivers();
/**
+ * Ask the remote bundle to split its current processing based upon its knowledge of remaining
+ * work. A fraction of 0, is equivalent to asking the SDK to checkpoint.
+ *
+ * <p>This method will return after the request has been issued. Any splits will be forwarded to
+ * the {@link BundleSplitHandler}.
+ */
+ void split(double fractionOfRemainder);
+
+ /**
* Closes this bundle. This causes the input {@link FnDataReceiver} to be closed (future calls to
* that {@link FnDataReceiver} will throw an exception), and causes the {@link RemoteBundle} to
* produce any buffered outputs. The call to {@link #close()} will block until all of the outputs
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClient.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClient.java
index 2799e58..08d50b0 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClient.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClient.java
@@ -25,10 +25,13 @@
import java.util.concurrent.CompletionStage;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.beam.model.fnexecution.v1.BeamFnApi;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitRequest;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitRequest.DesiredSplit;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.RegisterResponse;
import org.apache.beam.model.pipeline.v1.Endpoints;
import org.apache.beam.runners.fnexecution.data.FnDataService;
@@ -140,6 +143,7 @@
outputReceivers,
stateRequestHandler,
progressHandler,
+ BundleSplitHandler.unsupported(),
request -> {
throw new UnsupportedOperationException(
String.format(
@@ -174,6 +178,7 @@
Map<String, RemoteOutputReceiver<?>> outputReceivers,
StateRequestHandler stateRequestHandler,
BundleProgressHandler progressHandler,
+ BundleSplitHandler splitHandler,
BundleCheckpointHandler checkpointHandler,
BundleFinalizationHandler finalizationHandler) {
String bundleId = idGenerator.getId();
@@ -205,14 +210,15 @@
outputClients.put(receiver.getKey(), outputClient);
}
- ImmutableMap.Builder<String, CloseableFnDataReceiver> dataReceiversBuilder =
+ ImmutableMap.Builder<String, CountingFnDataReceiver> dataReceiversBuilder =
ImmutableMap.builder();
for (Map.Entry<String, RemoteInputDestination> remoteInput : remoteInputs.entrySet()) {
dataReceiversBuilder.put(
remoteInput.getKey(),
- fnApiDataService.send(
- LogicalEndpoint.of(bundleId, remoteInput.getValue().getPTransformId()),
- (Coder) remoteInput.getValue().getCoder()));
+ new CountingFnDataReceiver(
+ fnApiDataService.send(
+ LogicalEndpoint.of(bundleId, remoteInput.getValue().getPTransformId()),
+ (Coder) remoteInput.getValue().getCoder())));
}
return new ActiveBundle(
@@ -222,6 +228,7 @@
outputClients,
stateDelegator.registerForProcessBundleInstructionId(bundleId, stateRequestHandler),
progressHandler,
+ splitHandler,
checkpointHandler,
finalizationHandler);
}
@@ -231,122 +238,122 @@
return fnApiDataService.receive(
LogicalEndpoint.of(bundleId, ptransformId), receiver.getCoder(), receiver.getReceiver());
}
- }
- /** An active bundle for a particular {@link BeamFnApi.ProcessBundleDescriptor}. */
- public static class ActiveBundle implements RemoteBundle {
- private final String bundleId;
- private final CompletionStage<BeamFnApi.ProcessBundleResponse> response;
- private final Map<String, CloseableFnDataReceiver> inputReceivers;
- private final Map<String, InboundDataClient> outputClients;
- private final StateDelegator.Registration stateRegistration;
- private final BundleProgressHandler progressHandler;
- private final BundleCheckpointHandler checkpointHandler;
- private final BundleFinalizationHandler finalizationHandler;
+ /** An active bundle for a particular {@link BeamFnApi.ProcessBundleDescriptor}. */
+ public class ActiveBundle implements RemoteBundle {
+ private final String bundleId;
+ private final CompletionStage<BeamFnApi.ProcessBundleResponse> response;
+ private final Map<String, CountingFnDataReceiver> inputReceivers;
+ private final Map<String, InboundDataClient> outputClients;
+ private final StateDelegator.Registration stateRegistration;
+ private final BundleProgressHandler progressHandler;
+ private final BundleSplitHandler splitHandler;
+ private final BundleCheckpointHandler checkpointHandler;
+ private final BundleFinalizationHandler finalizationHandler;
- private ActiveBundle(
- String bundleId,
- CompletionStage<ProcessBundleResponse> response,
- Map<String, CloseableFnDataReceiver> inputReceivers,
- Map<String, InboundDataClient> outputClients,
- StateDelegator.Registration stateRegistration,
- BundleProgressHandler progressHandler,
- BundleCheckpointHandler checkpointHandler,
- BundleFinalizationHandler finalizationHandler) {
- this.bundleId = bundleId;
- this.response = response;
- this.inputReceivers = inputReceivers;
- this.outputClients = outputClients;
- this.stateRegistration = stateRegistration;
- this.progressHandler = progressHandler;
- this.checkpointHandler = checkpointHandler;
- this.finalizationHandler = finalizationHandler;
- }
+ private ActiveBundle(
+ String bundleId,
+ CompletionStage<ProcessBundleResponse> response,
+ Map<String, CountingFnDataReceiver> inputReceivers,
+ Map<String, InboundDataClient> outputClients,
+ StateDelegator.Registration stateRegistration,
+ BundleProgressHandler progressHandler,
+ BundleSplitHandler splitHandler,
+ BundleCheckpointHandler checkpointHandler,
+ BundleFinalizationHandler finalizationHandler) {
+ this.bundleId = bundleId;
+ this.response = response;
+ this.inputReceivers = inputReceivers;
+ this.outputClients = outputClients;
+ this.stateRegistration = stateRegistration;
+ this.progressHandler = progressHandler;
+ this.splitHandler = splitHandler;
+ this.checkpointHandler = checkpointHandler;
+ this.finalizationHandler = finalizationHandler;
+ }
- /** Returns an id used to represent this bundle. */
- @Override
- public String getId() {
- return bundleId;
- }
+ /** Returns an id used to represent this bundle. */
+ @Override
+ public String getId() {
+ return bundleId;
+ }
- /**
- * Get a map of PCollection ids to {@link FnDataReceiver receiver}s which consume input
- * elements, forwarding them to the remote environment.
- */
- @Override
- public Map<String, FnDataReceiver> getInputReceivers() {
- return (Map) inputReceivers;
- }
+ /**
+ * Get a map of PCollection ids to {@link FnDataReceiver receiver}s which consume input
+ * elements, forwarding them to the remote environment.
+ */
+ @Override
+ public Map<String, FnDataReceiver> getInputReceivers() {
+ return (Map) inputReceivers;
+ }
- /**
- * Blocks until bundle processing is finished. This is comprised of:
- *
- * <ul>
- * <li>closing each {@link #getInputReceivers() input receiver}.
- * <li>waiting for the SDK to say that processing the bundle is finished.
- * <li>waiting for all inbound data clients to complete
- * </ul>
- *
- * <p>This method will throw an exception if bundle processing has failed. {@link
- * Throwable#getSuppressed()} will return all the reasons as to why processing has failed.
- */
- @Override
- public void close() throws Exception {
- Exception exception = null;
- for (CloseableFnDataReceiver<?> inputReceiver : inputReceivers.values()) {
+ @Override
+ public void split(double fractionOfRemainder) {
+ Map<String, DesiredSplit> splits = new HashMap<>();
+ for (Map.Entry<String, CountingFnDataReceiver> ptransformToInput :
+ inputReceivers.entrySet()) {
+ splits.put(
+ ptransformToInput.getKey(),
+ DesiredSplit.newBuilder()
+ .setFractionOfRemainder(fractionOfRemainder)
+ .setEstimatedInputElements(ptransformToInput.getValue().getCount())
+ .build());
+ }
+ InstructionRequest request =
+ InstructionRequest.newBuilder()
+ .setInstructionId(idGenerator.getId())
+ .setProcessBundleSplit(
+ ProcessBundleSplitRequest.newBuilder()
+ .setInstructionId(bundleId)
+ .putAllDesiredSplits(splits)
+ .build())
+ .build();
+ CompletionStage<InstructionResponse> response = fnApiControlClient.handle(request);
+ response.thenAccept(
+ instructionResponse -> splitHandler.split(instructionResponse.getProcessBundleSplit()));
+ }
+
+ /**
+ * Blocks until bundle processing is finished. This is comprised of:
+ *
+ * <ul>
+ * <li>closing each {@link #getInputReceivers() input receiver}.
+ * <li>waiting for the SDK to say that processing the bundle is finished.
+ * <li>waiting for all inbound data clients to complete
+ * </ul>
+ *
+ * <p>This method will throw an exception if bundle processing has failed. {@link
+ * Throwable#getSuppressed()} will return all the reasons as to why processing has failed.
+ */
+ @Override
+ public void close() throws Exception {
+ Exception exception = null;
+ for (CloseableFnDataReceiver<?> inputReceiver : inputReceivers.values()) {
+ try {
+ inputReceiver.close();
+ } catch (Exception e) {
+ if (exception == null) {
+ exception = e;
+ } else {
+ exception.addSuppressed(e);
+ }
+ }
+ }
try {
- inputReceiver.close();
- } catch (Exception e) {
+ // We don't have to worry about the completion stage.
if (exception == null) {
- exception = e;
+ BeamFnApi.ProcessBundleResponse completedResponse = MoreFutures.get(response);
+ progressHandler.onCompleted(completedResponse);
+ if (completedResponse.getResidualRootsCount() > 0) {
+ checkpointHandler.onCheckpoint(completedResponse);
+ }
+ if (completedResponse.getRequiresFinalization()) {
+ finalizationHandler.requestsFinalization(bundleId);
+ }
} else {
- exception.addSuppressed(e);
- }
- }
- }
- try {
- // We don't have to worry about the completion stage.
- if (exception == null) {
- BeamFnApi.ProcessBundleResponse completedResponse = MoreFutures.get(response);
- progressHandler.onCompleted(completedResponse);
- if (completedResponse.getResidualRootsCount() > 0) {
- checkpointHandler.onCheckpoint(completedResponse);
- }
- if (completedResponse.getRequiresFinalization()) {
- finalizationHandler.requestsFinalization(bundleId);
- }
- } else {
- // TODO: [BEAM-3962] Handle aborting the bundle being processed.
- throw new IllegalStateException(
- "Processing bundle failed, TODO: [BEAM-3962] abort bundle.");
- }
- } catch (Exception e) {
- if (exception == null) {
- exception = e;
- } else {
- exception.addSuppressed(e);
- }
- }
- try {
- if (exception == null) {
- stateRegistration.deregister();
- } else {
- stateRegistration.abort();
- }
- } catch (Exception e) {
- if (exception == null) {
- exception = e;
- } else {
- exception.addSuppressed(e);
- }
- }
- for (InboundDataClient outputClient : outputClients.values()) {
- try {
- // If we failed processing this bundle, we should cancel all inbound data.
- if (exception == null) {
- outputClient.awaitCompletion();
- } else {
- outputClient.cancel();
+ // TODO: [BEAM-3962] Handle aborting the bundle being processed.
+ throw new IllegalStateException(
+ "Processing bundle failed, TODO: [BEAM-3962] abort bundle.");
}
} catch (Exception e) {
if (exception == null) {
@@ -355,9 +362,37 @@
exception.addSuppressed(e);
}
}
- }
- if (exception != null) {
- throw exception;
+ try {
+ if (exception == null) {
+ stateRegistration.deregister();
+ } else {
+ stateRegistration.abort();
+ }
+ } catch (Exception e) {
+ if (exception == null) {
+ exception = e;
+ } else {
+ exception.addSuppressed(e);
+ }
+ }
+ for (InboundDataClient outputClient : outputClients.values()) {
+ try {
+ if (exception == null) {
+ outputClient.awaitCompletion();
+ } else {
+ outputClient.cancel();
+ }
+ } catch (Exception e) {
+ if (exception == null) {
+ exception = e;
+ } else {
+ exception.addSuppressed(e);
+ }
+ }
+ }
+ if (exception != null) {
+ throw exception;
+ }
}
}
}
@@ -476,6 +511,38 @@
}
}
+ /**
+ * A {@link CloseableFnDataReceiver} which counts the number of elements that have been accepted.
+ */
+ private static class CountingFnDataReceiver<T> implements CloseableFnDataReceiver<T> {
+ private final CloseableFnDataReceiver delegate;
+ private long count;
+
+ private CountingFnDataReceiver(CloseableFnDataReceiver delegate) {
+ this.delegate = delegate;
+ }
+
+ public long getCount() {
+ return count;
+ }
+
+ @Override
+ public void accept(T input) throws Exception {
+ count += 1;
+ delegate.accept(input);
+ }
+
+ @Override
+ public void flush() throws Exception {
+ delegate.flush();
+ }
+
+ @Override
+ public void close() throws Exception {
+ delegate.close();
+ }
+ }
+
/** Registers a {@link BeamFnApi.ProcessBundleDescriptor} for future processing. */
private BundleProcessor create(
BeamFnApi.ProcessBundleDescriptor processBundleDescriptor,
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java
index 154aaec..5ec4a39 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java
@@ -159,25 +159,31 @@
containerId = docker.runImage(containerImage, dockerOptsBuilder.build(), argsBuilder.build());
LOG.debug("Created Docker Container with Container ID {}", containerId);
// Wait on a client from the gRPC server.
- try {
- instructionHandler = clientSource.take(workerId, Duration.ofMinutes(1));
- } catch (TimeoutException timeoutEx) {
- RuntimeException runtimeException =
- new RuntimeException(
- String.format(
- "Docker container %s failed to start up successfully within 1 minute.",
- containerImage),
- timeoutEx);
+ while (instructionHandler == null) {
try {
- String containerLogs = docker.getContainerLogs(containerId);
- LOG.error("Docker container {} logs:\n{}", containerId, containerLogs);
- } catch (Exception getLogsException) {
- runtimeException.addSuppressed(getLogsException);
+ // If the docker is not alive anymore, we abort.
+ if (!docker.isContainerRunning(containerId)) {
+ IllegalStateException illegalStateException =
+ new IllegalStateException(
+ String.format("No container running for id %s", containerId));
+ try {
+ String containerLogs = docker.getContainerLogs(containerId);
+ LOG.error("Docker container {} logs:\n{}", containerId, containerLogs);
+ } catch (Exception getLogsException) {
+ illegalStateException.addSuppressed(getLogsException);
+ }
+ throw illegalStateException;
+ }
+ instructionHandler = clientSource.take(workerId, Duration.ofSeconds(5));
+ } catch (TimeoutException timeoutEx) {
+ LOG.info(
+ "Still waiting for startup of environment {} for worker id {}",
+ dockerPayload.getContainerImage(),
+ workerId);
+ } catch (InterruptedException interruptEx) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(interruptEx);
}
- throw runtimeException;
- } catch (InterruptedException interruptEx) {
- Thread.currentThread().interrupt();
- throw new RuntimeException(interruptEx);
}
} catch (Exception e) {
if (containerId != null) {
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/EmbeddedEnvironmentFactory.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/EmbeddedEnvironmentFactory.java
index a696770..a06a5d7 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/EmbeddedEnvironmentFactory.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/EmbeddedEnvironmentFactory.java
@@ -23,6 +23,7 @@
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
+import java.util.concurrent.TimeoutException;
import org.apache.beam.fn.harness.FnHarness;
import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
import org.apache.beam.model.pipeline.v1.RunnerApi.Environment;
@@ -123,8 +124,23 @@
}
});
- // TODO: find some way to populate the actual ID in FnHarness.main()
- InstructionRequestHandler handler = clientSource.take("", Duration.ofMinutes(1L));
+ InstructionRequestHandler handler = null;
+ // Wait on a client from the gRPC server.
+ while (handler == null) {
+ try {
+ // If the thread is not alive anymore, we abort.
+ if (executor.isShutdown()) {
+ throw new IllegalStateException("FnHarness startup failed");
+ }
+ // TODO: find some way to populate the actual ID in FnHarness.main()
+ handler = clientSource.take("", Duration.ofSeconds(5L));
+ } catch (TimeoutException timeoutEx) {
+ LOG.info("Still waiting for startup of FnHarness");
+ } catch (InterruptedException interruptEx) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(interruptEx);
+ }
+ }
return RemoteEnvironment.forHandler(environment, handler);
}
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactory.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactory.java
index a90a245..539d76a 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactory.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactory.java
@@ -135,7 +135,7 @@
try {
// If the process is not alive anymore, we abort.
process.isAliveOrThrow();
- instructionHandler = clientSource.take(workerId, Duration.ofMinutes(2));
+ instructionHandler = clientSource.take(workerId, Duration.ofSeconds(5));
} catch (TimeoutException timeoutEx) {
LOG.info(
"Still waiting for startup of environment '{}' for worker id {}",
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessManager.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessManager.java
index 65fcdf2..f1d88d0 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessManager.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/ProcessManager.java
@@ -47,19 +47,12 @@
/** A list of all managers to ensure all processes shutdown on JVM exit . */
private static final List<ProcessManager> ALL_PROCESS_MANAGERS = new ArrayList<>();
- static {
- // Install a shutdown hook to ensure processes are stopped/killed.
- Runtime.getRuntime().addShutdownHook(ShutdownHook.create());
- }
+ @VisibleForTesting static Thread shutdownHook = null;
private final Map<String, Process> processes;
public static ProcessManager create() {
- synchronized (ALL_PROCESS_MANAGERS) {
- ProcessManager processManager = new ProcessManager();
- ALL_PROCESS_MANAGERS.add(processManager);
- return processManager;
- }
+ return new ProcessManager();
}
private ProcessManager() {
@@ -126,6 +119,7 @@
return startProcess(id, command, args, env, outputFile);
}
+ @SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD")
public RunningProcess startProcess(
String id, String command, List<String> args, Map<String, String> env, File outputFile)
throws IOException {
@@ -149,6 +143,15 @@
LOG.debug("Attempting to start process with command: {}", pb.command());
Process newProcess = pb.start();
Process oldProcess = processes.put(id, newProcess);
+ synchronized (ALL_PROCESS_MANAGERS) {
+ if (!ALL_PROCESS_MANAGERS.contains(this)) {
+ ALL_PROCESS_MANAGERS.add(this);
+ }
+ if (shutdownHook == null) {
+ shutdownHook = ShutdownHook.create();
+ Runtime.getRuntime().addShutdownHook(shutdownHook);
+ }
+ }
if (oldProcess != null) {
stopProcess(id, oldProcess);
stopProcess(id, newProcess);
@@ -159,10 +162,23 @@
}
/** Stops a previously started process identified by its unique id. */
+ @SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD")
public void stopProcess(String id) {
checkNotNull(id, "Process id must not be null");
- Process process = checkNotNull(processes.remove(id), "Process for id does not exist: " + id);
- stopProcess(id, process);
+ try {
+ Process process = checkNotNull(processes.remove(id), "Process for id does not exist: " + id);
+ stopProcess(id, process);
+ } finally {
+ synchronized (ALL_PROCESS_MANAGERS) {
+ if (processes.isEmpty()) {
+ ALL_PROCESS_MANAGERS.remove(this);
+ }
+ if (ALL_PROCESS_MANAGERS.isEmpty() && shutdownHook != null) {
+ Runtime.getRuntime().removeShutdownHook(shutdownHook);
+ shutdownHook = null;
+ }
+ }
+ }
}
private void stopProcess(String id, Process process) {
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/jobsubmission/PortablePipelineJarCreator.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/jobsubmission/PortablePipelineJarCreator.java
index 951a8cb..c14098a 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/jobsubmission/PortablePipelineJarCreator.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/jobsubmission/PortablePipelineJarCreator.java
@@ -49,13 +49,14 @@
import org.apache.beam.model.jobmanagement.v1.JobApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline;
import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
-import org.apache.beam.runners.core.construction.PipelineResources;
+import org.apache.beam.runners.core.construction.resources.PipelineResources;
import org.apache.beam.runners.fnexecution.GrpcFnServer;
import org.apache.beam.runners.fnexecution.InProcessServerFactory;
import org.apache.beam.runners.fnexecution.artifact.BeamFileSystemArtifactRetrievalService;
import org.apache.beam.runners.fnexecution.provisioning.JobInfo;
import org.apache.beam.sdk.fn.test.InProcessManagedChannelFactory;
import org.apache.beam.sdk.metrics.MetricResults;
+import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PortablePipelineOptions;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.MessageOrBuilder;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.util.JsonFormat;
@@ -106,7 +107,7 @@
new JarOutputStream(new FileOutputStream(outputFile), createManifest(mainClass, jobName));
outputChannel = Channels.newChannel(outputStream);
PortablePipelineJarUtils.writeDefaultJobName(outputStream, jobName);
- writeClassPathResources(mainClass.getClassLoader());
+ writeClassPathResources(mainClass.getClassLoader(), pipelineOptions);
writeAsJson(pipeline, PortablePipelineJarUtils.getPipelineUri(jobName));
writeAsJson(
PipelineOptionsTranslation.toProto(pipelineOptions),
@@ -144,9 +145,10 @@
}
/** Copy resources from {@code classLoader} to {@link #outputStream}. */
- private void writeClassPathResources(ClassLoader classLoader) throws IOException {
+ private void writeClassPathResources(ClassLoader classLoader, PipelineOptions options)
+ throws IOException {
List<String> classPathResources =
- PipelineResources.detectClassPathResourcesToStage(classLoader);
+ PipelineResources.detectClassPathResourcesToStage(classLoader, options);
Preconditions.checkArgument(
classPathResources.size() == 1, "Expected exactly one jar on " + classLoader.toString());
copyResourcesFromJar(new JarFile(classPathResources.get(0)));
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/splittabledofn/SDFFeederViaStateAndTimers.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/splittabledofn/SDFFeederViaStateAndTimers.java
index 38a885f..920dae6 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/splittabledofn/SDFFeederViaStateAndTimers.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/splittabledofn/SDFFeederViaStateAndTimers.java
@@ -44,7 +44,7 @@
import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString;
-import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.util.Timestamps;
+import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.util.Durations;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.joda.time.Instant;
@@ -152,7 +152,8 @@
holdState.add(watermarkHold);
Instant requestedWakeupTime =
- new Instant(Timestamps.toMillis(residual.getRequestedExecutionTime()));
+ new Instant(
+ System.currentTimeMillis() + Durations.toMillis(residual.getRequestedTimeDelay()));
Instant wakeupTime =
timerInternals.currentProcessingTime().isBefore(requestedWakeupTime)
? requestedWakeupTime
@@ -160,7 +161,12 @@
// Set a timer to continue processing this element.
timerInternals.setTimer(
- stateNamespace, "sdfContinuation", wakeupTime, TimeDomain.PROCESSING_TIME);
+ stateNamespace,
+ "sdfContinuation",
+ "sdfContinuation",
+ wakeupTime,
+ wakeupTime,
+ TimeDomain.PROCESSING_TIME);
}
/** Signals that a split happened. */
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/BeamWorkerStatusGrpcService.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/BeamWorkerStatusGrpcService.java
new file mode 100644
index 0000000..ce7498b
--- /dev/null
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/BeamWorkerStatusGrpcService.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.fnexecution.status;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusRequest;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusResponse;
+import org.apache.beam.model.fnexecution.v1.BeamFnWorkerStatusGrpc.BeamFnWorkerStatusImplBase;
+import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
+import org.apache.beam.runners.fnexecution.FnService;
+import org.apache.beam.runners.fnexecution.HeaderAccessor;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.stub.StreamObserver;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A Fn Status service which can collect run-time status information from SDK harnesses for
+ * debugging purpose.
+ */
+public class BeamWorkerStatusGrpcService extends BeamFnWorkerStatusImplBase implements FnService {
+
+ private static final Logger LOG = LoggerFactory.getLogger(BeamWorkerStatusGrpcService.class);
+ private static final String DEFAULT_EXCEPTION_RESPONSE =
+ "Error: exception encountered getting status from SDK harness";
+
+ private final HeaderAccessor headerAccessor;
+ private final Map<String, CompletableFuture<WorkerStatusClient>> connectedClient =
+ Collections.synchronizedMap(new HashMap<>());
+ private final AtomicBoolean isClosed = new AtomicBoolean();
+
+ private BeamWorkerStatusGrpcService(
+ ApiServiceDescriptor apiServiceDescriptor, HeaderAccessor headerAccessor) {
+ this.headerAccessor = headerAccessor;
+ LOG.info("Launched Beam Fn Status service at {}", apiServiceDescriptor);
+ }
+
+ /**
+ * Create new instance of {@link BeamWorkerStatusGrpcService}.
+ *
+ * @param apiServiceDescriptor describes the configuration for the endpoint the server will
+ * expose.
+ * @param headerAccessor headerAccessor gRPC header accessor used to obtain SDK harness worker id.
+ * @return {@link BeamWorkerStatusGrpcService}
+ */
+ public static BeamWorkerStatusGrpcService create(
+ ApiServiceDescriptor apiServiceDescriptor, HeaderAccessor headerAccessor) {
+ return new BeamWorkerStatusGrpcService(apiServiceDescriptor, headerAccessor);
+ }
+
+ @Override
+ public void close() throws Exception {
+ if (isClosed.getAndSet(true)) {
+ return;
+ }
+ synchronized (connectedClient) {
+ for (CompletableFuture<WorkerStatusClient> clientFuture : connectedClient.values()) {
+ if (clientFuture.isDone()) {
+ clientFuture.get().close();
+ }
+ }
+ connectedClient.clear();
+ }
+ }
+
+ @Override
+ public StreamObserver<WorkerStatusResponse> workerStatus(
+ StreamObserver<WorkerStatusRequest> requestObserver) {
+ if (isClosed.get()) {
+ throw new IllegalStateException("BeamWorkerStatusGrpcService already closed.");
+ }
+ String workerId = headerAccessor.getSdkWorkerId();
+ LOG.info("Beam Fn Status client connected with id {}", workerId);
+
+ WorkerStatusClient fnApiStatusClient =
+ WorkerStatusClient.forRequestObserver(workerId, requestObserver);
+ connectedClient.compute(
+ workerId,
+ (k, existingClientFuture) -> {
+ if (existingClientFuture != null) {
+ try {
+ if (existingClientFuture.isDone()) {
+ LOG.info(
+ "SDK Worker {} was connected to status server previously, disconnecting old client",
+ workerId);
+ existingClientFuture.get().close();
+ } else {
+ existingClientFuture.complete(fnApiStatusClient);
+ return existingClientFuture;
+ }
+ } catch (IOException | InterruptedException | ExecutionException e) {
+ LOG.warn("Error closing worker status client", e);
+ }
+ }
+ return CompletableFuture.completedFuture(fnApiStatusClient);
+ });
+ return fnApiStatusClient.getResponseObserver();
+ }
+
+ /**
+ * Get the latest SDK worker status from the client's corresponding SDK harness.
+ *
+ * @param workerId worker id of the SDK harness.
+ * @return {@link CompletableFuture} of WorkerStatusResponse from SDK harness.
+ */
+ public String getSingleWorkerStatus(String workerId, long timeout, TimeUnit timeUnit) {
+ if (isClosed.get()) {
+ throw new IllegalStateException("BeamWorkerStatusGrpcService already closed.");
+ }
+ try {
+ return getWorkerStatus(workerId).get(timeout, timeUnit);
+ } catch (InterruptedException | ExecutionException | TimeoutException e) {
+ return handleAndReturnExceptionResponse(e);
+ }
+ }
+
+ /**
+ * Get all the statuses from all connected SDK harnesses within specified timeout. Any errors
+ * getting status from the SDK harnesses will be returned in the map.
+ *
+ * @param timeout max time waiting for the response from each SDK harness.
+ * @param timeUnit timeout time unit.
+ * @return All the statuses in a map keyed by the SDK harness id.
+ */
+ public Map<String, String> getAllWorkerStatuses(long timeout, TimeUnit timeUnit) {
+ if (isClosed.get()) {
+ throw new IllegalStateException("BeamWorkerStatusGrpcService already closed.");
+ }
+ // return result in worker id sorted map.
+ Map<String, String> allStatuses = new ConcurrentSkipListMap<>(Comparator.naturalOrder());
+ Set<String> connectedClientIdsCopy;
+ synchronized (connectedClient) {
+ connectedClientIdsCopy = ImmutableSet.copyOf(connectedClient.keySet());
+ }
+ connectedClientIdsCopy
+ .parallelStream()
+ .forEach(
+ workerId ->
+ allStatuses.put(workerId, getSingleWorkerStatus(workerId, timeout, timeUnit)));
+
+ return allStatuses;
+ }
+
+ @VisibleForTesting
+ CompletableFuture<String> getWorkerStatus(String workerId) {
+ CompletableFuture<WorkerStatusClient> statusClient;
+ try {
+ statusClient = getStatusClient(workerId);
+ if (!statusClient.isDone()) {
+ return CompletableFuture.completedFuture("Error: Not connected.");
+ }
+ CompletableFuture<WorkerStatusResponse> future = statusClient.get().getWorkerStatus();
+ return future.thenApply(this::getStatusErrorOrInfo);
+ } catch (ExecutionException | InterruptedException e) {
+ return CompletableFuture.completedFuture(handleAndReturnExceptionResponse(e));
+ }
+ }
+
+ /**
+ * Get the status api client connected to the SDK harness with specified workerId.
+ *
+ * @param workerId worker id of the SDK harness.
+ * @return CompletableFuture of {@link WorkerStatusClient}.
+ */
+ @VisibleForTesting
+ CompletableFuture<WorkerStatusClient> getStatusClient(String workerId) {
+ return connectedClient.computeIfAbsent(workerId, k -> new CompletableFuture<>());
+ }
+
+ /**
+ * Return Error field from WorkerStatusResponse if not empty, otherwise return the StatusInfo
+ * field.
+ */
+ private String getStatusErrorOrInfo(WorkerStatusResponse response) {
+ return !Strings.isNullOrEmpty(response.getError())
+ ? response.getError()
+ : response.getStatusInfo();
+ }
+
+ private String handleAndReturnExceptionResponse(Exception e) {
+ LOG.warn(DEFAULT_EXCEPTION_RESPONSE, e);
+ if (e instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
+ }
+ StringBuilder response = new StringBuilder();
+ response
+ .append(DEFAULT_EXCEPTION_RESPONSE)
+ .append(": ")
+ .append(e.getClass().getCanonicalName());
+ if (e.getMessage() != null) {
+ response.append(": ").append(e.getMessage());
+ }
+ return response.toString();
+ }
+}
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/WorkerStatusClient.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/WorkerStatusClient.java
new file mode 100644
index 0000000..dbd7fa8
--- /dev/null
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/WorkerStatusClient.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.fnexecution.status;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusRequest;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusResponse;
+import org.apache.beam.sdk.fn.IdGenerator;
+import org.apache.beam.sdk.fn.IdGenerators;
+import org.apache.beam.sdk.fn.stream.SynchronizedStreamObserver;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.stub.StreamObserver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Client for handling requests and responses over Fn Worker Status Api between runner and SDK
+ * harness.
+ */
+class WorkerStatusClient implements Closeable {
+
+ public static final Logger LOG = LoggerFactory.getLogger(WorkerStatusClient.class);
+ private final IdGenerator idGenerator = IdGenerators.incrementingLongs();
+ private final StreamObserver<WorkerStatusRequest> requestReceiver;
+ private final Map<String, CompletableFuture<WorkerStatusResponse>> pendingResponses =
+ Collections.synchronizedMap(new HashMap<>());
+ private final String workerId;
+ private AtomicBoolean isClosed = new AtomicBoolean(false);
+
+ private WorkerStatusClient(String workerId, StreamObserver<WorkerStatusRequest> requestReceiver) {
+ this.requestReceiver = SynchronizedStreamObserver.wrapping(requestReceiver);
+ this.workerId = workerId;
+ }
+
+ /**
+ * Create new status api client with SDK harness worker id and request observer.
+ *
+ * @param workerId SDK harness worker id.
+ * @param requestObserver The outbound request observer this client uses to send new status
+ * requests to its corresponding SDK harness.
+ * @return {@link WorkerStatusClient}
+ */
+ public static WorkerStatusClient forRequestObserver(
+ String workerId, StreamObserver<WorkerStatusRequest> requestObserver) {
+ return new WorkerStatusClient(workerId, requestObserver);
+ }
+
+ /**
+ * Get the latest sdk worker status from the client's corresponding SDK harness. A random id will
+ * be used to specify the request_id field.
+ *
+ * @return {@link CompletableFuture} of the SDK harness status response.
+ */
+ public CompletableFuture<WorkerStatusResponse> getWorkerStatus() {
+ WorkerStatusRequest request =
+ WorkerStatusRequest.newBuilder().setId(idGenerator.getId()).build();
+ return getWorkerStatus(request);
+ }
+
+ /**
+ * Get the latest sdk worker status from the client's corresponding SDK harness with request.
+ *
+ * @param request WorkerStatusRequest to be sent to SDK harness.
+ * @return {@link CompletableFuture} of the SDK harness status response.
+ */
+ CompletableFuture<WorkerStatusResponse> getWorkerStatus(WorkerStatusRequest request) {
+ CompletableFuture<WorkerStatusResponse> future = new CompletableFuture<>();
+ if (isClosed.get()) {
+ future.completeExceptionally(new RuntimeException("Worker status client already closed."));
+ return future;
+ }
+ this.pendingResponses.put(request.getId(), future);
+ this.requestReceiver.onNext(request);
+ return future;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (isClosed.getAndSet(true)) {
+ return;
+ }
+ synchronized (pendingResponses) {
+ for (CompletableFuture<WorkerStatusResponse> pendingResponse : pendingResponses.values()) {
+ pendingResponse.completeExceptionally(
+ new RuntimeException("Fn Status Api client shut down while waiting for the request"));
+ }
+ pendingResponses.clear();
+ }
+ requestReceiver.onCompleted();
+ }
+
+ /** Check if the client connection has already been closed. */
+ public boolean isClosed() {
+ return isClosed.get();
+ }
+
+ /** Get the worker id for the client's corresponding SDK harness. */
+ public String getWorkerId() {
+ return this.workerId;
+ }
+
+ /** Get the response observer of this client for retrieving inbound worker status responses. */
+ public StreamObserver<WorkerStatusResponse> getResponseObserver() {
+ return new ResponseStreamObserver();
+ }
+
+ /**
+ * ResponseObserver for handling status responses. Each request will be cached with it's
+ * request_id. Upon receiving response from SDK harness with this StreamObserver, the future
+ * mapped to same request_id will be finished accordingly.
+ */
+ private class ResponseStreamObserver implements StreamObserver<WorkerStatusResponse> {
+
+ @Override
+ public void onNext(WorkerStatusResponse response) {
+ if (isClosed.get()) {
+ return;
+ }
+ CompletableFuture<WorkerStatusResponse> future = pendingResponses.remove(response.getId());
+ if (future != null) {
+ future.complete(response);
+ } else {
+ LOG.warn(
+ String.format(
+ "Received response for status with unknown response id %s and status %s",
+ response.getId(), response.getStatusInfo()));
+ }
+ }
+
+ @Override
+ public void onError(Throwable throwable) {
+ LOG.error("{} received error {}", WorkerStatusClient.class.getSimpleName(), throwable);
+ onCompleted();
+ }
+
+ @Override
+ public void onCompleted() {
+ try {
+ close();
+ } catch (IOException e) {
+ LOG.warn("Error closing Fn status api client", e);
+ }
+ }
+ }
+}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/package-info.java
similarity index 65%
rename from runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
rename to runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/package-info.java
index a114f40..65edce9 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/package-info.java
@@ -15,16 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.beam.runners.flink.translation.utils;
-import com.fasterxml.jackson.databind.type.TypeFactory;
-
-/** Utilities for dealing with classloading. */
-public class FlinkClassloading {
-
- public static void deleteStaticCaches() {
- // Clear cache to get rid of any references to the Flink Classloader
- // See https://jira.apache.org/jira/browse/BEAM-6460
- TypeFactory.defaultInstance().clearCache();
- }
-}
+/** Worker Status API services. */
+package org.apache.beam.runners.fnexecution.status;
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/wire/WireCoders.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/wire/WireCoders.java
index 46d894a..f37901b 100644
--- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/wire/WireCoders.java
+++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/wire/WireCoders.java
@@ -17,6 +17,8 @@
*/
package org.apache.beam.runners.fnexecution.wire;
+import static org.apache.beam.runners.core.construction.BeamUrns.getUrn;
+import static org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTING;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import java.io.IOException;
@@ -41,8 +43,10 @@
* @return id of a windowed value coder containing the PCollection's element coder
*/
public static String addSdkWireCoder(
- PCollectionNode pCollectionNode, RunnerApi.Components.Builder components) {
- return addWireCoder(pCollectionNode, components, false);
+ PCollectionNode pCollectionNode,
+ RunnerApi.Components.Builder components,
+ RunnerApi.WireCoderSetting wireCoderSetting) {
+ return addWireCoder(pCollectionNode, components, false, wireCoderSetting);
}
/**
@@ -54,22 +58,38 @@
* @return id of a windowed value coder containing the PCollection's element coder
*/
public static String addRunnerWireCoder(
- PCollectionNode pCollectionNode, RunnerApi.Components.Builder components) {
- return addWireCoder(pCollectionNode, components, true);
+ PCollectionNode pCollectionNode,
+ RunnerApi.Components.Builder components,
+ RunnerApi.WireCoderSetting wireCoderSetting) {
+ return addWireCoder(pCollectionNode, components, true, wireCoderSetting);
}
/**
* Instantiates a runner-side wire coder for the given PCollection. Any component coders that are
* unknown by the runner are replaced with length-prefixed byte arrays.
*
- * @return a windowed value coder containing the PCollection's element coder
+ * @return a full windowed value coder containing the PCollection's element coder
*/
public static <T> Coder<WindowedValue<T>> instantiateRunnerWireCoder(
PCollectionNode pCollectionNode, RunnerApi.Components components) throws IOException {
+ return instantiateRunnerWireCoder(pCollectionNode, components, DEFAULT_WIRE_CODER_SETTING);
+ }
+
+ /**
+ * Instantiates a runner-side wire coder for the given PCollection. Any component coders that are
+ * unknown by the runner are replaced with length-prefixed byte arrays.
+ *
+ * @return a full or parameterized windowed value coder containing the PCollection's element coder
+ */
+ public static <T> Coder<WindowedValue<T>> instantiateRunnerWireCoder(
+ PCollectionNode pCollectionNode,
+ RunnerApi.Components components,
+ RunnerApi.WireCoderSetting wireCoderSetting)
+ throws IOException {
// NOTE: We discard the new set of components so we don't bother to ensure it's consistent with
// the caller's view.
RunnerApi.Components.Builder builder = components.toBuilder();
- String protoCoderId = addRunnerWireCoder(pCollectionNode, builder);
+ String protoCoderId = addRunnerWireCoder(pCollectionNode, builder, wireCoderSetting);
Coder<?> javaCoder = RehydratedComponents.forComponents(builder.build()).getCoder(protoCoderId);
checkArgument(
javaCoder instanceof WindowedValue.FullWindowedValueCoder,
@@ -83,13 +103,31 @@
private static String addWireCoder(
PCollectionNode pCollectionNode,
RunnerApi.Components.Builder components,
- boolean useByteArrayCoder) {
+ boolean useByteArrayCoder,
+ RunnerApi.WireCoderSetting wireCoderSetting) {
String elementCoderId = pCollectionNode.getPCollection().getCoderId();
String windowingStrategyId = pCollectionNode.getPCollection().getWindowingStrategyId();
String windowCoderId =
components.getWindowingStrategiesOrThrow(windowingStrategyId).getWindowCoderId();
- RunnerApi.Coder windowedValueCoder =
- ModelCoders.windowedValueCoder(elementCoderId, windowCoderId);
+
+ // decide type of windowedValueCoder according to the wire coder setting.
+ RunnerApi.Coder windowedValueCoder;
+ String wireCoderUrn = wireCoderSetting.getUrn();
+ if (wireCoderUrn.equals(getUrn(RunnerApi.StandardCoders.Enum.WINDOWED_VALUE))
+ || wireCoderUrn.isEmpty()) {
+ windowedValueCoder = ModelCoders.windowedValueCoder(elementCoderId, windowCoderId);
+ } else {
+ checkArgument(
+ wireCoderUrn.equals(getUrn(RunnerApi.StandardCoders.Enum.PARAM_WINDOWED_VALUE)),
+ "Unexpected wire coder urn %s, currently only %s or %s are supported!",
+ wireCoderUrn,
+ getUrn(RunnerApi.StandardCoders.Enum.WINDOWED_VALUE),
+ getUrn(RunnerApi.StandardCoders.Enum.PARAM_WINDOWED_VALUE));
+ windowedValueCoder =
+ ModelCoders.paramWindowedValueCoder(
+ elementCoderId, windowCoderId, wireCoderSetting.getPayload().toByteArray());
+ }
+
// Add the original WindowedValue<T, W> coder to the components;
String windowedValueId =
SyntheticComponents.uniqueId(
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactoryTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactoryTest.java
index 1b43154..b5ac3c6 100644
--- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactoryTest.java
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/DefaultJobBundleFactoryTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.fnexecution.control;
+import static org.hamcrest.Matchers.containsString;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
@@ -26,7 +27,10 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse;
import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
import org.apache.beam.model.pipeline.v1.RunnerApi.Coder;
@@ -55,6 +59,7 @@
import org.apache.beam.sdk.fn.IdGenerator;
import org.apache.beam.sdk.fn.IdGenerators;
import org.apache.beam.sdk.fn.data.CloseableFnDataReceiver;
+import org.apache.beam.sdk.options.ExperimentalOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.PortablePipelineOptions;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString;
@@ -362,7 +367,7 @@
StageBundleFactory bf1 = bundleFactory.forStage(getExecutableStage(environment));
StageBundleFactory bf2 = bundleFactory.forStage(getExecutableStage(environment));
// NOTE: We hang on to stage bundle references to ensure their underlying environments are not
- // garbage collected. For additional safety, we print the factories to ensure the referernces
+ // garbage collected. For additional safety, we print the factories to ensure the references
// are not optimized away.
System.out.println("bundle factory 1:" + bf1);
System.out.println("bundle factory 1:" + bf2);
@@ -395,6 +400,78 @@
}
}
+ @Test
+ public void loadBalancesBundles() throws Exception {
+ PortablePipelineOptions portableOptions =
+ PipelineOptionsFactory.as(PortablePipelineOptions.class);
+ portableOptions.setSdkWorkerParallelism(2);
+ portableOptions.setLoadBalanceBundles(true);
+ Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
+
+ try (DefaultJobBundleFactory bundleFactory =
+ new DefaultJobBundleFactory(
+ JobInfo.create("testJob", "testJob", "token", pipelineOptions),
+ envFactoryProviderMap,
+ stageIdGenerator,
+ serverInfo)) {
+ OutputReceiverFactory orf = mock(OutputReceiverFactory.class);
+ StateRequestHandler srh = mock(StateRequestHandler.class);
+ when(srh.getCacheTokens()).thenReturn(Collections.emptyList());
+ StageBundleFactory sbf = bundleFactory.forStage(getExecutableStage(environment));
+ RemoteBundle b1 = sbf.getBundle(orf, srh, BundleProgressHandler.ignored());
+ verify(envFactory, Mockito.times(1)).createEnvironment(environment);
+ final RemoteBundle b2 = sbf.getBundle(orf, srh, BundleProgressHandler.ignored());
+ verify(envFactory, Mockito.times(2)).createEnvironment(environment);
+
+ long tms = System.currentTimeMillis();
+ AtomicBoolean closed = new AtomicBoolean();
+ // close to free up environment for another bundle
+ TimerTask closeBundleTask =
+ new TimerTask() {
+ @Override
+ public void run() {
+ try {
+ b2.close();
+ closed.set(true);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ new Timer().schedule(closeBundleTask, 100);
+
+ RemoteBundle b3 = sbf.getBundle(orf, srh, BundleProgressHandler.ignored());
+ // ensure we waited for close
+ Assert.assertTrue(System.currentTimeMillis() - tms >= 100 && closed.get());
+
+ verify(envFactory, Mockito.times(2)).createEnvironment(environment);
+ b3.close();
+ b1.close();
+ }
+ }
+
+ @Test
+ public void rejectsStateCachingWithLoadBalancing() throws Exception {
+ PortablePipelineOptions portableOptions =
+ PipelineOptionsFactory.as(PortablePipelineOptions.class);
+ portableOptions.setLoadBalanceBundles(true);
+ ExperimentalOptions options = portableOptions.as(ExperimentalOptions.class);
+ ExperimentalOptions.addExperiment(options, "state_cache_size=1");
+ Struct pipelineOptions = PipelineOptionsTranslation.toProto(options);
+
+ Exception e =
+ Assert.assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ new DefaultJobBundleFactory(
+ JobInfo.create("testJob", "testJob", "token", pipelineOptions),
+ envFactoryProviderMap,
+ stageIdGenerator,
+ serverInfo)
+ .close());
+ Assert.assertThat(e.getMessage(), containsString("state_cache_size"));
+ }
+
private DefaultJobBundleFactory createDefaultJobBundleFactory(
Map<String, EnvironmentFactory.Provider> envFactoryProviderMap) {
return new DefaultJobBundleFactory(
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/RemoteExecutionTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/RemoteExecutionTest.java
index dc73203..d9d51d4 100644
--- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/RemoteExecutionTest.java
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/RemoteExecutionTest.java
@@ -60,7 +60,6 @@
import org.apache.beam.runners.fnexecution.GrpcFnServer;
import org.apache.beam.runners.fnexecution.InProcessServerFactory;
import org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor;
-import org.apache.beam.runners.fnexecution.control.SdkHarnessClient.ActiveBundle;
import org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor;
import org.apache.beam.runners.fnexecution.data.GrpcDataService;
import org.apache.beam.runners.fnexecution.logging.GrpcLoggingService;
@@ -286,7 +285,7 @@
}
// The impulse example
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(outputReceivers, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values())
.accept(WindowedValue.valueInGlobalWindow(new byte[0]));
@@ -350,7 +349,7 @@
(FnDataReceiver<? super WindowedValue<?>>) outputContents::add));
}
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(outputReceivers, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values())
.accept(
@@ -359,7 +358,7 @@
}
try {
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(outputReceivers, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values())
.accept(
@@ -372,7 +371,7 @@
assertTrue(e.getMessage().contains("testBundleExecutionFailure"));
}
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(outputReceivers, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values())
.accept(
@@ -504,7 +503,7 @@
});
BundleProgressHandler progressHandler = BundleProgressHandler.ignored();
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(outputReceivers, stateRequestHandler, progressHandler)) {
Iterables.getOnlyElement(bundle.getInputReceivers().values())
.accept(WindowedValue.valueInGlobalWindow("X"));
@@ -818,7 +817,7 @@
}
};
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(outputReceivers, stateRequestHandler, progressHandler)) {
Iterables.getOnlyElement(bundle.getInputReceivers().values())
.accept(
@@ -959,7 +958,7 @@
}
});
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(
outputReceivers, stateRequestHandler, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values())
@@ -1102,7 +1101,7 @@
// output.
DateTimeUtils.setCurrentMillisFixed(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(
outputReceivers, StateRequestHandler.unsupported(), BundleProgressHandler.ignored())) {
bundle
@@ -1222,7 +1221,7 @@
(Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputValues::add));
}
- try (ActiveBundle bundle =
+ try (RemoteBundle bundle =
processor.newBundle(
outputReceivers,
StateRequestHandler.unsupported(),
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClientTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClientTest.java
index ef0b2ac..57b5cbb 100644
--- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClientTest.java
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/control/SdkHarnessClientTest.java
@@ -53,8 +53,8 @@
import org.apache.beam.runners.core.construction.CoderTranslation;
import org.apache.beam.runners.core.construction.PipelineTranslation;
import org.apache.beam.runners.fnexecution.EmbeddedSdkHarness;
-import org.apache.beam.runners.fnexecution.control.SdkHarnessClient.ActiveBundle;
import org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor;
+import org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor.ActiveBundle;
import org.apache.beam.runners.fnexecution.data.FnDataService;
import org.apache.beam.runners.fnexecution.data.RemoteInputDestination;
import org.apache.beam.runners.fnexecution.state.StateDelegator;
@@ -228,7 +228,7 @@
(FullWindowedValueCoder) coder, SDK_GRPC_READ_TRANSFORM)));
when(dataService.send(any(), eq(coder))).thenReturn(mock(CloseableFnDataReceiver.class));
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(Collections.emptyMap(), BundleProgressHandler.ignored())) {
// Correlating the ProcessBundleRequest and ProcessBundleResponse is owned by the underlying
// FnApiControlClient. The SdkHarnessClient owns just wrapping the request and unwrapping
@@ -256,7 +256,7 @@
SDK_GRPC_READ_TRANSFORM)));
Collection<WindowedValue<String>> outputs = new ArrayList<>();
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(
Collections.singletonMap(
SDK_GRPC_WRITE_TRANSFORM,
@@ -311,7 +311,7 @@
BundleProgressHandler mockProgressHandler = mock(BundleProgressHandler.class);
try {
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(
ImmutableMap.of(SDK_GRPC_WRITE_TRANSFORM, mockRemoteOutputReceiver),
mockProgressHandler)) {
@@ -363,7 +363,7 @@
RemoteOutputReceiver mockRemoteOutputReceiver = mock(RemoteOutputReceiver.class);
try {
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(
ImmutableMap.of(SDK_GRPC_WRITE_TRANSFORM, mockRemoteOutputReceiver),
mockStateHandler,
@@ -408,7 +408,7 @@
BundleProgressHandler mockProgressHandler = mock(BundleProgressHandler.class);
try {
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(
ImmutableMap.of(SDK_GRPC_WRITE_TRANSFORM, mockRemoteOutputReceiver),
mockProgressHandler)) {
@@ -457,7 +457,7 @@
RemoteOutputReceiver mockRemoteOutputReceiver = mock(RemoteOutputReceiver.class);
try {
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(
ImmutableMap.of(SDK_GRPC_WRITE_TRANSFORM, mockRemoteOutputReceiver),
mockStateHandler,
@@ -503,7 +503,7 @@
BundleProgressHandler mockProgressHandler = mock(BundleProgressHandler.class);
try {
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(
ImmutableMap.of(SDK_GRPC_WRITE_TRANSFORM, mockRemoteOutputReceiver),
mockProgressHandler)) {
@@ -559,7 +559,7 @@
RemoteOutputReceiver mockRemoteOutputReceiver = mock(RemoteOutputReceiver.class);
try {
- try (ActiveBundle activeBundle =
+ try (RemoteBundle activeBundle =
processor.newBundle(
ImmutableMap.of(SDK_GRPC_WRITE_TRANSFORM, mockRemoteOutputReceiver),
mockStateHandler,
@@ -649,6 +649,7 @@
RemoteOutputReceiver mockRemoteOutputReceiver = mock(RemoteOutputReceiver.class);
BundleProgressHandler mockProgressHandler = mock(BundleProgressHandler.class);
+ BundleSplitHandler mockSplitHandler = mock(BundleSplitHandler.class);
BundleCheckpointHandler mockCheckpointHandler = mock(BundleCheckpointHandler.class);
BundleFinalizationHandler mockFinalizationHandler = mock(BundleFinalizationHandler.class);
@@ -663,6 +664,7 @@
throw new UnsupportedOperationException();
},
mockProgressHandler,
+ mockSplitHandler,
mockCheckpointHandler,
mockFinalizationHandler)) {
processBundleResponseFuture.complete(
@@ -671,7 +673,7 @@
verify(mockProgressHandler).onCompleted(response);
verify(mockCheckpointHandler).onCheckpoint(response);
- verifyZeroInteractions(mockFinalizationHandler);
+ verifyZeroInteractions(mockFinalizationHandler, mockSplitHandler);
}
@Test
@@ -698,6 +700,7 @@
RemoteOutputReceiver mockRemoteOutputReceiver = mock(RemoteOutputReceiver.class);
BundleProgressHandler mockProgressHandler = mock(BundleProgressHandler.class);
+ BundleSplitHandler mockSplitHandler = mock(BundleSplitHandler.class);
BundleCheckpointHandler mockCheckpointHandler = mock(BundleCheckpointHandler.class);
BundleFinalizationHandler mockFinalizationHandler = mock(BundleFinalizationHandler.class);
@@ -711,6 +714,7 @@
throw new UnsupportedOperationException();
},
mockProgressHandler,
+ mockSplitHandler,
mockCheckpointHandler,
mockFinalizationHandler)) {
bundleId = activeBundle.getId();
@@ -720,7 +724,7 @@
verify(mockProgressHandler).onCompleted(response);
verify(mockFinalizationHandler).requestsFinalization(bundleId);
- verifyZeroInteractions(mockCheckpointHandler);
+ verifyZeroInteractions(mockCheckpointHandler, mockSplitHandler);
}
private CompletableFuture<InstructionResponse> createRegisterResponse() {
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java
index a199cb2..f163640 100644
--- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java
@@ -20,14 +20,13 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
-import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.any;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.util.Arrays;
import java.util.Collection;
-import java.util.concurrent.TimeoutException;
import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
import org.apache.beam.model.pipeline.v1.RunnerApi.Environment;
import org.apache.beam.runners.core.construction.Environments;
@@ -158,15 +157,11 @@
}
@Test(expected = RuntimeException.class)
- public void logsDockerOutputOnTimeoutException() throws Exception {
+ public void logsDockerOutputOnStartupFailed() throws Exception {
when(docker.runImage(Mockito.eq(IMAGE_NAME), Mockito.any(), Mockito.any()))
.thenReturn(CONTAINER_ID);
- when(docker.isContainerRunning(Mockito.eq(CONTAINER_ID))).thenReturn(true);
- DockerEnvironmentFactory factory =
- getFactory(
- (workerId, timeout) -> {
- throw new TimeoutException();
- });
+ when(docker.isContainerRunning(Mockito.eq(CONTAINER_ID))).thenReturn(false);
+ DockerEnvironmentFactory factory = getFactory((workerId, timeout) -> client);
factory.createEnvironment(ENVIRONMENT);
@@ -188,7 +183,7 @@
@Test
public void createsMultipleEnvironments() throws Exception {
- when(docker.isContainerRunning(anyString())).thenReturn(true);
+ when(docker.isContainerRunning(any())).thenReturn(true);
DockerEnvironmentFactory factory = getFactory((workerId, timeout) -> client);
Environment fooEnv = Environments.createDockerEnvironment("foo");
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactoryTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactoryTest.java
index a38b1b5..519face 100644
--- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactoryTest.java
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessEnvironmentFactoryTest.java
@@ -20,9 +20,9 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.anyList;
import static org.mockito.Matchers.anyMap;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessManagerTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessManagerTest.java
index d0c02c6..a1377f3 100644
--- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessManagerTest.java
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/ProcessManagerTest.java
@@ -21,7 +21,9 @@
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
@@ -157,4 +159,32 @@
// assertThat(outputStr, containsString("testing123"));
assertFalse(ProcessManager.INHERIT_IO_FILE.exists());
}
+
+ @Test
+ public void testShutdownHook() throws IOException {
+ ProcessManager processManager = ProcessManager.create();
+
+ // no process alive, no shutdown hook
+ assertNull(ProcessManager.shutdownHook);
+
+ processManager.startProcess(
+ "1", "bash", Arrays.asList("-c", "echo 'testing123'"), Collections.emptyMap());
+ // the shutdown hook will be created when process is started
+ assertNotNull(ProcessManager.shutdownHook);
+ // check the shutdown hook is registered
+ assertTrue(Runtime.getRuntime().removeShutdownHook(ProcessManager.shutdownHook));
+ // add back the shutdown hook
+ Runtime.getRuntime().addShutdownHook(ProcessManager.shutdownHook);
+
+ processManager.startProcess(
+ "2", "bash", Arrays.asList("-c", "echo 'testing123'"), Collections.emptyMap());
+
+ processManager.stopProcess("1");
+ // the shutdown hook will be not removed if there are still processes alive
+ assertNotNull(ProcessManager.shutdownHook);
+
+ processManager.stopProcess("2");
+ // the shutdown hook will be removed when there is no process alive
+ assertNull(ProcessManager.shutdownHook);
+ }
}
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/status/BeamWorkerStatusGrpcServiceTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/status/BeamWorkerStatusGrpcServiceTest.java
new file mode 100644
index 0000000..c9d6544
--- /dev/null
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/status/BeamWorkerStatusGrpcServiceTest.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.fnexecution.status;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import org.apache.beam.fn.harness.control.AddHarnessIdInterceptor;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusRequest;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusResponse;
+import org.apache.beam.model.fnexecution.v1.BeamFnWorkerStatusGrpc;
+import org.apache.beam.model.fnexecution.v1.BeamFnWorkerStatusGrpc.BeamFnWorkerStatusStub;
+import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
+import org.apache.beam.runners.fnexecution.GrpcContextHeaderAccessorProvider;
+import org.apache.beam.runners.fnexecution.GrpcFnServer;
+import org.apache.beam.runners.fnexecution.InProcessServerFactory;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.ManagedChannel;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.inprocess.InProcessChannelBuilder;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.stub.StreamObserver;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.testing.GrpcCleanupRule;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+@RunWith(JUnit4.class)
+public class BeamWorkerStatusGrpcServiceTest {
+
+ @Rule public final GrpcCleanupRule grpcCleanup = new GrpcCleanupRule();
+ private static final String ID = "id";
+ private BeamWorkerStatusGrpcService service;
+ private GrpcFnServer<BeamWorkerStatusGrpcService> server;
+ private ManagedChannel channel;
+ private BeamFnWorkerStatusStub stub;
+ @Mock private StreamObserver<WorkerStatusRequest> mockObserver;
+
+ @Before
+ public void setUp() throws Exception {
+ MockitoAnnotations.initMocks(this);
+ service =
+ BeamWorkerStatusGrpcService.create(
+ ApiServiceDescriptor.newBuilder().setUrl(UUID.randomUUID().toString()).build(),
+ GrpcContextHeaderAccessorProvider.getHeaderAccessor());
+ server = GrpcFnServer.allocatePortAndCreateFor(service, InProcessServerFactory.create());
+ channel = InProcessChannelBuilder.forName(server.getApiServiceDescriptor().getUrl()).build();
+ stub =
+ BeamFnWorkerStatusGrpc.newStub(channel)
+ .withInterceptors(AddHarnessIdInterceptor.create(ID));
+ grpcCleanup.register(server.getServer());
+ grpcCleanup.register(channel);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ if (service != null) {
+ service.close();
+ }
+ }
+
+ @Test
+ public void testClientConnected() throws Exception {
+ StreamObserver<WorkerStatusResponse> workerStatusResponseStreamObserver =
+ stub.workerStatus(mockObserver);
+ WorkerStatusClient client = waitAndGetStatusClient(ID);
+ assertNotNull(client);
+ }
+
+ @Test
+ public void testGetWorkerStatusNoResponse() throws Exception {
+ StreamObserver<WorkerStatusResponse> unused = stub.workerStatus(mockObserver);
+ waitAndGetStatusClient(ID);
+ String response = service.getSingleWorkerStatus("id", 1, TimeUnit.MILLISECONDS);
+ assertEquals(
+ "Error: exception encountered getting status from SDK harness: java.util.concurrent.TimeoutException",
+ response);
+ }
+
+ @Test
+ public void testGetWorkerStatusSuccess() throws Exception {
+ StreamObserver<WorkerStatusResponse> observer = stub.workerStatus(mockObserver);
+ waitAndGetStatusClient(ID);
+ doAnswer(
+ (invocation) -> {
+ WorkerStatusRequest request = (WorkerStatusRequest) invocation.getArguments()[0];
+ observer.onNext(
+ WorkerStatusResponse.newBuilder()
+ .setId(request.getId())
+ .setStatusInfo("status")
+ .build());
+ return null;
+ })
+ .when(mockObserver)
+ .onNext(any());
+
+ CompletableFuture<String> future = service.getWorkerStatus(ID);
+ String response = future.get(5, TimeUnit.SECONDS);
+ assertEquals("status", response);
+ }
+
+ @Test
+ public void testGetWorkerStatusReturnError() throws Exception {
+ StreamObserver<WorkerStatusResponse> observer = stub.workerStatus(mockObserver);
+ waitAndGetStatusClient(ID);
+ doAnswer(
+ (invocation) -> {
+ WorkerStatusRequest request = (WorkerStatusRequest) invocation.getArguments()[0];
+ observer.onNext(
+ WorkerStatusResponse.newBuilder()
+ .setId(request.getId())
+ .setError("error")
+ .build());
+ return null;
+ })
+ .when(mockObserver)
+ .onNext(any());
+
+ CompletableFuture<String> future = service.getWorkerStatus(ID);
+ String response = future.get(5, TimeUnit.SECONDS);
+ assertEquals("error", response);
+ }
+
+ @Test
+ public void testGetAllWorkerStatuses() throws Exception {
+ Set<String> ids = Sets.newHashSet("id0", "id3", "id11", "id12", "id21");
+ for (String id : ids) {
+ StreamObserver<WorkerStatusRequest> requestObserverMock = mock(StreamObserver.class);
+ BeamFnWorkerStatusStub workerStatusStub =
+ BeamFnWorkerStatusGrpc.newStub(channel)
+ .withInterceptors(AddHarnessIdInterceptor.create(id));
+ StreamObserver<WorkerStatusResponse> observer =
+ workerStatusStub.workerStatus(requestObserverMock);
+ // wait for connection before proceeding to avoid race condition.
+ waitAndGetStatusClient(id);
+ doAnswer(
+ (invocation) -> {
+ WorkerStatusRequest request = (WorkerStatusRequest) invocation.getArguments()[0];
+ observer.onNext(
+ WorkerStatusResponse.newBuilder()
+ .setId(request.getId())
+ .setStatusInfo("status")
+ .build());
+ return null;
+ })
+ .when(requestObserverMock)
+ .onNext(any());
+ }
+ Map<String, String> allWorkerStatuses = service.getAllWorkerStatuses(5, TimeUnit.SECONDS);
+
+ assertEquals(ids, allWorkerStatuses.keySet());
+
+ for (String id : ids) {
+ assertEquals("status", allWorkerStatuses.get(id));
+ }
+ }
+
+ private WorkerStatusClient waitAndGetStatusClient(String id)
+ throws InterruptedException, ExecutionException, TimeoutException {
+ // wait for status client connection, and get the corresponding client.
+ CompletableFuture<WorkerStatusClient> clientFuture = service.getStatusClient(id);
+ return clientFuture.get(1, TimeUnit.SECONDS);
+ }
+}
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/status/WorkerStatusClientTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/status/WorkerStatusClientTest.java
new file mode 100644
index 0000000..0aca49a
--- /dev/null
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/status/WorkerStatusClientTest.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.fnexecution.status;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.verify;
+
+import java.io.IOException;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusRequest;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusResponse;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.stub.StreamObserver;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+@RunWith(JUnit4.class)
+public class WorkerStatusClientTest {
+
+ @Mock public StreamObserver<BeamFnApi.WorkerStatusRequest> mockObserver;
+ private WorkerStatusClient client;
+
+ @Before
+ public void setup() {
+ MockitoAnnotations.initMocks(this);
+ client = WorkerStatusClient.forRequestObserver("ID", mockObserver);
+ }
+
+ @Test
+ public void testGetWorkerStatusSuccess() throws Exception {
+ CompletableFuture<WorkerStatusResponse> workerStatus =
+ client.getWorkerStatus(WorkerStatusRequest.newBuilder().setId("123").build());
+ client
+ .getResponseObserver()
+ .onNext(WorkerStatusResponse.newBuilder().setId("123").setStatusInfo("status").build());
+ Assert.assertEquals("status", workerStatus.get().getStatusInfo());
+ }
+
+ @Test
+ public void testGetWorkerStatusError() throws Exception {
+ CompletableFuture<WorkerStatusResponse> workerStatus =
+ client.getWorkerStatus(WorkerStatusRequest.newBuilder().setId("123").build());
+ client
+ .getResponseObserver()
+ .onNext(WorkerStatusResponse.newBuilder().setId("123").setError("error").build());
+ Assert.assertEquals("error", workerStatus.get().getError());
+ }
+
+ @Test
+ public void testGetWorkerStatusRequestSent() {
+ CompletableFuture<WorkerStatusResponse> workerStatus = client.getWorkerStatus();
+ verify(mockObserver).onNext(any(WorkerStatusRequest.class));
+ }
+
+ @Test
+ public void testUnknownRequestIdResponseIgnored() {
+ CompletableFuture<WorkerStatusResponse> workerStatus = client.getWorkerStatus();
+ client
+ .getResponseObserver()
+ .onNext(WorkerStatusResponse.newBuilder().setId("unknown").setStatusInfo("status").build());
+ Assert.assertFalse(workerStatus.isDone());
+ }
+
+ @Test
+ public void testCloseOutstandingRequest() throws IOException {
+ CompletableFuture<WorkerStatusResponse> workerStatus = client.getWorkerStatus();
+ client.close();
+ Assert.assertThrows(ExecutionException.class, workerStatus::get);
+ }
+}
diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/translation/BatchSideInputHandlerFactoryTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/translation/BatchSideInputHandlerFactoryTest.java
index 1afb93f..e1ccad6 100644
--- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/translation/BatchSideInputHandlerFactoryTest.java
+++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/translation/BatchSideInputHandlerFactoryTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.fnexecution.translation;
+import static org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTING;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -234,6 +235,7 @@
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList(),
- Collections.emptyList());
+ Collections.emptyList(),
+ DEFAULT_WIRE_CODER_SETTING);
}
}
diff --git a/runners/jet/src/main/java/org/apache/beam/runners/jet/processors/StatefulParDoP.java b/runners/jet/src/main/java/org/apache/beam/runners/jet/processors/StatefulParDoP.java
index e291117..76e8375 100644
--- a/runners/jet/src/main/java/org/apache/beam/runners/jet/processors/StatefulParDoP.java
+++ b/runners/jet/src/main/java/org/apache/beam/runners/jet/processors/StatefulParDoP.java
@@ -92,7 +92,13 @@
TimerInternals.TimerData timer, DoFnRunner<KV<?, ?>, ?> doFnRunner) {
StateNamespace namespace = timer.getNamespace();
BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
- doFnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ doFnRunner.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
}
@Override
diff --git a/runners/portability/java/src/main/java/org/apache/beam/runners/portability/PortableRunner.java b/runners/portability/java/src/main/java/org/apache/beam/runners/portability/PortableRunner.java
index d03e56e..72c3cbc 100644
--- a/runners/portability/java/src/main/java/org/apache/beam/runners/portability/PortableRunner.java
+++ b/runners/portability/java/src/main/java/org/apache/beam/runners/portability/PortableRunner.java
@@ -17,7 +17,7 @@
*/
package org.apache.beam.runners.portability;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
import java.io.File;
@@ -104,7 +104,8 @@
s -> pathsToStage.addAll(Arrays.asList(s.replaceFirst("jar_packages=", "").split(","))));
}
if (portableOptions.getFilesToStage() == null) {
- pathsToStage.addAll(detectClassPathResourcesToStage(PortableRunner.class.getClassLoader()));
+ pathsToStage.addAll(
+ detectClassPathResourcesToStage(PortableRunner.class.getClassLoader(), options));
if (pathsToStage.isEmpty()) {
throw new IllegalArgumentException("No classpath elements found.");
}
diff --git a/runners/samza/build.gradle b/runners/samza/build.gradle
index ae6f48f..469c961 100644
--- a/runners/samza/build.gradle
+++ b/runners/samza/build.gradle
@@ -34,7 +34,7 @@
validatesRunner
}
-def samza_version = "1.1.0"
+def samza_version = "1.3.0"
dependencies {
compile library.java.vendored_guava_26_0_jre
@@ -46,7 +46,7 @@
compile library.java.slf4j_api
compile library.java.joda_time
compile library.java.commons_compress
- compile library.java.commons_io_2x
+ compile library.java.commons_io
compile library.java.args4j
compile "org.apache.samza:samza-api:$samza_version"
compile "org.apache.samza:samza-core_2.11:$samza_version"
@@ -58,7 +58,6 @@
compile "org.apache.kafka:kafka-clients:0.11.0.2"
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
testCompile project(path: ":runners:core-java", configuration: "testRuntime")
- testCompile library.java.commons_lang3
testCompile library.java.hamcrest_core
testCompile library.java.junit
testCompile library.java.mockito_core
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java
index f965e5a..591c0ee 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java
@@ -19,10 +19,10 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
+import static org.apache.samza.config.TaskConfig.MAX_CONCURRENCY;
import java.util.HashMap;
import java.util.Map;
-import org.apache.samza.config.TaskConfig;
/** Validates that the {@link SamzaPipelineOptions} conforms to all the criteria. */
public class SamzaPipelineOptionsValidator {
@@ -41,7 +41,7 @@
isPortable(pipelineOptions),
"Bundling is not supported in non portable mode. Please disable by setting maxBundleSize to 1.");
- String taskConcurrencyConfig = TaskConfig.MAX_CONCURRENCY();
+ String taskConcurrencyConfig = MAX_CONCURRENCY;
Map<String, String> configs =
pipelineOptions.getConfigOverride() == null
? new HashMap<>()
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java
index e6d27fa..98ca20f 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java
@@ -18,6 +18,7 @@
package org.apache.beam.runners.samza;
import static org.apache.beam.runners.core.metrics.MetricsContainerStepMap.asAttemptedOnlyMetricResults;
+import static org.apache.samza.config.TaskConfig.TASK_SHUTDOWN_MS;
import javax.annotation.Nullable;
import org.apache.beam.sdk.Pipeline;
@@ -26,7 +27,6 @@
import org.apache.beam.sdk.util.UserCodeException;
import org.apache.samza.application.StreamApplication;
import org.apache.samza.config.Config;
-import org.apache.samza.config.TaskConfig;
import org.apache.samza.job.ApplicationStatus;
import org.apache.samza.runtime.ApplicationRunner;
import org.joda.time.Duration;
@@ -39,6 +39,7 @@
private static final long DEFAULT_SHUTDOWN_MS = 5000L;
// allow some buffer on top of samza's own shutdown timeout
private static final long SHUTDOWN_TIMEOUT_BUFFER = 5000L;
+ private static final long DEFAULT_TASK_SHUTDOWN_MS = 30000L;
private final SamzaExecutionContext executionContext;
private final ApplicationRunner runner;
@@ -57,7 +58,7 @@
this.app = app;
this.listener = listener;
this.shutdownTiemoutMs =
- config.getLong(TaskConfig.SHUTDOWN_MS(), DEFAULT_SHUTDOWN_MS) + SHUTDOWN_TIMEOUT_BUFFER;
+ config.getLong(TASK_SHUTDOWN_MS, DEFAULT_TASK_SHUTDOWN_MS) + SHUTDOWN_TIMEOUT_BUFFER;
}
@Override
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java
index 4a94626..4b1c6b9 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java
@@ -55,6 +55,7 @@
*/
public class SamzaRunner extends PipelineRunner<SamzaPipelineResult> {
private static final Logger LOG = LoggerFactory.getLogger(SamzaRunner.class);
+ private static final String BEAM_DOT_GRAPH = "beamDotGraph";
public static SamzaRunner fromOptions(PipelineOptions opts) {
final SamzaPipelineOptions samzaOptions =
@@ -74,8 +75,12 @@
}
public PortablePipelineResult runPortablePipeline(RunnerApi.Pipeline pipeline) {
+ final String dotGraph = PipelineDotRenderer.toDotString(pipeline);
+ LOG.info("Portable pipeline to run:\n{}", dotGraph);
+
final ConfigBuilder configBuilder = new ConfigBuilder(options);
SamzaPortablePipelineTranslator.createConfig(pipeline, configBuilder, options);
+ configBuilder.put(BEAM_DOT_GRAPH, dotGraph);
final Config config = configBuilder.build();
options.setConfigOverride(config);
@@ -109,12 +114,14 @@
pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides());
- LOG.info("Beam pipeline DOT graph:\n{}", PipelineDotRenderer.toDotString(pipeline));
+ final String dotGraph = PipelineDotRenderer.toDotString(pipeline);
+ LOG.info("Beam pipeline DOT graph:\n{}", dotGraph);
final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
-
final ConfigBuilder configBuilder = new ConfigBuilder(options);
+
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
+ configBuilder.put(BEAM_DOT_GRAPH, dotGraph);
final Config config = configBuilder.build();
options.setConfigOverride(config);
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java
index 2dfb37c..15bdd94 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java
@@ -17,6 +17,9 @@
*/
package org.apache.beam.runners.samza;
+import static org.apache.samza.config.JobConfig.JOB_LOGGED_STORE_BASE_DIR;
+import static org.apache.samza.config.JobConfig.JOB_NON_LOGGED_STORE_BASE_DIR;
+
import java.io.File;
import java.nio.file.Paths;
import java.util.HashMap;
@@ -28,10 +31,10 @@
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsValidator;
import org.apache.commons.io.FileUtils;
-import org.apache.samza.config.JobConfig;
/** Test {@link SamzaRunner}. */
public class TestSamzaRunner extends PipelineRunner<PipelineResult> {
+
private final SamzaRunner delegate;
public static TestSamzaRunner fromOptions(PipelineOptions options) {
@@ -51,8 +54,8 @@
// ignore
}
- config.put(JobConfig.JOB_LOGGED_STORE_BASE_DIR(), storeDir.getAbsolutePath());
- config.put(JobConfig.JOB_NON_LOGGED_STORE_BASE_DIR(), storeDir.getAbsolutePath());
+ config.put(JOB_LOGGED_STORE_BASE_DIR, storeDir.getAbsolutePath());
+ config.put(JOB_NON_LOGGED_STORE_BASE_DIR, storeDir.getAbsolutePath());
if (samzaOptions.getConfigOverride() != null) {
config.putAll(samzaOptions.getConfigOverride());
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java
index b118a3c..c77ddb1 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java
@@ -34,6 +34,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Collectors;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils;
import org.apache.beam.runners.core.construction.SerializablePipelineOptions;
import org.apache.beam.runners.core.serialization.Base64Serializer;
import org.apache.beam.runners.samza.SamzaPipelineOptions;
@@ -48,7 +49,6 @@
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
-import org.apache.commons.lang3.StringUtils;
import org.apache.samza.Partition;
import org.apache.samza.SamzaException;
import org.apache.samza.config.Config;
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java
index 101ee80..aefcf6d 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java
@@ -57,8 +57,16 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
- withMetrics(() -> underlying.onTimer(timerId, window, timestamp, timeDomain));
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ withMetrics(
+ () ->
+ underlying.onTimer(
+ timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain));
}
@Override
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java
index 795b8c0..cd140c7 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java
@@ -452,7 +452,13 @@
// Need to pass in the keyed TimerData here
((DoFnRunnerWithKeyedInternals) fnRunner).onTimer(keyedTimerData, window);
} else {
- pushbackFnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ pushbackFnRunner.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
}
}
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java
index 6fb2bd3..3b2d1cb 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java
@@ -62,7 +62,13 @@
try {
final TimerInternals.TimerData timer = keyedTimerData.getTimerData();
- onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
} finally {
clearKeyedInternals();
}
@@ -70,10 +76,15 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
checkState(keyedInternals.getKey() != null, "Key is not set for timer");
- underlying.onTimer(timerId, window, timestamp, timeDomain);
+ underlying.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
}
@Override
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java
index d504929..330fb24 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java
@@ -118,8 +118,14 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
- getInternals().setTimer(namespace, timerId, target, timeDomain);
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ getInternals()
+ .setTimer(namespace, timerId, timerFamilyId, target, outputTimestamp, timeDomain);
}
@Override
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java
index 2f3b809..a6214be 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java
@@ -120,9 +120,9 @@
}
/**
- * Coder for {@link KeyedTimerData}. Note we don't use the {@link
- * org.apache.beam.runners.core.TimerInternals.TimerDataCoder} here directly since we want to
- * en/decode timestamp first so the timers will be sorted in the state.
+ * Coder for {@link KeyedTimerData}. Note we don't use the {@link TimerInternals.TimerDataCoderV2}
+ * here directly since we want to en/decode timestamp first so the timers will be sorted in the
+ * state.
*/
public static class KeyedTimerDataCoder<K> extends StructuredCoder<KeyedTimerData<K>> {
private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java
index 3b1b938..99439a2 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java
@@ -262,7 +262,12 @@
@Override
public void onTimer(
- String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {}
+ String timerId,
+ String timerFamilyId,
+ BoundedWindow window,
+ Instant timestamp,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {}
@Override
public void finishBundle() {
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java
index 3bd9834..28f333a 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java
@@ -21,6 +21,7 @@
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
+import java.io.Serializable;
import java.lang.ref.SoftReference;
import java.util.AbstractMap;
import java.util.ArrayList;
@@ -64,7 +65,11 @@
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.Ints;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedBytes;
+import org.apache.samza.config.Config;
import org.apache.samza.context.TaskContext;
+import org.apache.samza.serializers.Serde;
+import org.apache.samza.serializers.SerdeFactory;
import org.apache.samza.storage.kv.Entry;
import org.apache.samza.storage.kv.KeyValueIterator;
import org.apache.samza.storage.kv.KeyValueStore;
@@ -78,14 +83,14 @@
new ThreadLocal<>();
// the stores include both beamStore for system states as well as stores for user state
- private final Map<String, KeyValueStore<byte[], byte[]>> stores;
+ private final Map<String, KeyValueStore<ByteArray, byte[]>> stores;
private final K key;
private final byte[] keyBytes;
private final int batchGetSize;
private final String stageId;
private SamzaStoreStateInternals(
- Map<String, KeyValueStore<byte[], byte[]>> stores,
+ Map<String, KeyValueStore<ByteArray, byte[]>> stores,
@Nullable K key,
@Nullable byte[] keyBytes,
String stageId,
@@ -98,8 +103,8 @@
}
@SuppressWarnings("unchecked")
- static KeyValueStore<byte[], byte[]> getBeamStore(TaskContext context) {
- return (KeyValueStore<byte[], byte[]>) context.getStore(SamzaStoreStateInternals.BEAM_STORE);
+ static KeyValueStore<ByteArray, byte[]> getBeamStore(TaskContext context) {
+ return (KeyValueStore<ByteArray, byte[]>) context.getStore(SamzaStoreStateInternals.BEAM_STORE);
}
static Factory createStateInternalFactory(
@@ -109,7 +114,7 @@
SamzaPipelineOptions pipelineOptions,
DoFnSignature signature) {
final int batchGetSize = pipelineOptions.getStoreBatchGetSize();
- final Map<String, KeyValueStore<byte[], byte[]>> stores = new HashMap<>();
+ final Map<String, KeyValueStore<ByteArray, byte[]>> stores = new HashMap<>();
stores.put(BEAM_STORE, getBeamStore(context));
final Coder stateKeyCoder;
@@ -119,7 +124,8 @@
.keySet()
.forEach(
stateId ->
- stores.put(stateId, (KeyValueStore<byte[], byte[]>) context.getStore(stateId)));
+ stores.put(
+ stateId, (KeyValueStore<ByteArray, byte[]>) context.getStore(stateId)));
stateKeyCoder = keyCoder;
} else {
stateKeyCoder = VoidCoder.of();
@@ -208,13 +214,13 @@
/** Factory class to create {@link SamzaStoreStateInternals}. */
public static class Factory<K> implements StateInternalsFactory<K> {
private final String stageId;
- private final Map<String, KeyValueStore<byte[], byte[]>> stores;
+ private final Map<String, KeyValueStore<ByteArray, byte[]>> stores;
private final Coder<K> keyCoder;
private final int batchGetSize;
public Factory(
String stageId,
- Map<String, KeyValueStore<byte[], byte[]>> stores,
+ Map<String, KeyValueStore<ByteArray, byte[]>> stores,
Coder<K> keyCoder,
int batchGetSize) {
this.stageId = stageId;
@@ -254,14 +260,14 @@
private final Coder<T> coder;
private final byte[] encodedStoreKey;
private final String namespace;
- protected final KeyValueStore<byte[], byte[]> store;
+ protected final KeyValueStore<ByteArray, byte[]> store;
protected AbstractSamzaState(
StateNamespace namespace, StateTag<? extends State> address, Coder<T> coder) {
this.coder = coder;
this.namespace = namespace.stringKey();
- final KeyValueStore<byte[], byte[]> userStore = stores.get(address.getId());
+ final KeyValueStore<ByteArray, byte[]> userStore = stores.get(address.getId());
this.store = userStore != null ? userStore : stores.get(BEAM_STORE);
final ByteArrayOutputStream baos = getThreadLocalBaos();
@@ -308,7 +314,11 @@
};
}
- protected byte[] getEncodedStoreKey() {
+ protected ByteArray getEncodedStoreKey() {
+ return ByteArray.of(encodedStoreKey);
+ }
+
+ protected byte[] getEncodedStoreKeyBytes() {
return encodedStoreKey;
}
@@ -393,7 +403,7 @@
public void add(T value) {
synchronized (store) {
final int size = getSize();
- final byte[] encodedKey = encodeKey(size);
+ final ByteArray encodedKey = encodeKey(size);
store.put(encodedKey, encodeValue(value));
store.put(getEncodedStoreKey(), Ints.toByteArray(size + 1));
}
@@ -416,7 +426,7 @@
}
final List<T> values = new ArrayList<>(size);
- final List<byte[]> keys = new ArrayList<>(size);
+ final List<ByteArray> keys = new ArrayList<>(size);
int start = 0;
while (start < size) {
final int end = Math.min(size, start + batchGetSize);
@@ -442,7 +452,7 @@
synchronized (store) {
final int size = getSize();
if (size != 0) {
- final List<byte[]> keys = new ArrayList<>(size);
+ final List<ByteArray> keys = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
keys.add(encodeKey(i));
}
@@ -457,12 +467,12 @@
return sizeBytes == null ? 0 : Ints.fromByteArray(sizeBytes);
}
- private byte[] encodeKey(int size) {
+ private ByteArray encodeKey(int size) {
final ByteArrayOutputStream baos = getThreadLocalBaos();
try (DataOutputStream dos = new DataOutputStream(baos)) {
- dos.write(getEncodedStoreKey());
+ dos.write(getEncodedStoreKeyBytes());
dos.writeInt(size);
- return baos.toByteArray();
+ return ByteArray.of(baos.toByteArray());
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -567,7 +577,7 @@
private final Coder<KeyT> keyCoder;
private final int storeKeySize;
- private final List<KeyValueIterator<byte[], byte[]>> openIterators =
+ private final List<KeyValueIterator<ByteArray, byte[]>> openIterators =
Collections.synchronizedList(new ArrayList<>());
private int maxKeySize;
@@ -580,22 +590,22 @@
super(namespace, address, valueCoder);
this.keyCoder = keyCoder;
- this.storeKeySize = getEncodedStoreKey().length;
+ this.storeKeySize = getEncodedStoreKeyBytes().length;
// initial max key size is around 100k, so we can restore timer keys
this.maxKeySize = this.storeKeySize + 100_000;
}
@Override
public void put(KeyT key, ValueT value) {
- final byte[] encodedKey = encodeKey(key);
- maxKeySize = Math.max(maxKeySize, encodedKey.length);
+ final ByteArray encodedKey = encodeKey(key);
+ maxKeySize = Math.max(maxKeySize, encodedKey.getValue().length);
store.put(encodedKey, encodeValue(value));
}
@Override
@Nullable
public ReadableState<ValueT> putIfAbsent(KeyT key, ValueT value) {
- final byte[] encodedKey = encodeKey(key);
+ final ByteArray encodedKey = encodeKey(key);
final ValueT current = decodeValue(store.get(encodedKey));
if (current == null) {
put(key, value);
@@ -665,8 +675,8 @@
@Override
public ReadableState<Iterator<Map.Entry<KeyT, ValueT>>> readIterator() {
- final byte[] maxKey = createMaxKey();
- final KeyValueIterator<byte[], byte[]> kvIter = store.range(getEncodedStoreKey(), maxKey);
+ final ByteArray maxKey = createMaxKey();
+ final KeyValueIterator<ByteArray, byte[]> kvIter = store.range(getEncodedStoreKey(), maxKey);
openIterators.add(kvIter);
return new ReadableState<Iterator<Map.Entry<KeyT, ValueT>>>() {
@@ -686,7 +696,7 @@
@Override
public Map.Entry<KeyT, ValueT> next() {
- Entry<byte[], byte[]> entry = kvIter.next();
+ Entry<ByteArray, byte[]> entry = kvIter.next();
return new AbstractMap.SimpleEntry<>(
decodeKey(entry.getKey()), decodeValue(entry.getValue()));
}
@@ -705,16 +715,16 @@
* properly, we need to load the content into memory.
*/
private <OutputT> Iterable<OutputT> createIterable(
- SerializableFunction<org.apache.samza.storage.kv.Entry<byte[], byte[]>, OutputT> fn) {
- final byte[] maxKey = createMaxKey();
- final KeyValueIterator<byte[], byte[]> kvIter = store.range(getEncodedStoreKey(), maxKey);
- final List<Entry<byte[], byte[]>> iterable = ImmutableList.copyOf(kvIter);
+ SerializableFunction<org.apache.samza.storage.kv.Entry<ByteArray, byte[]>, OutputT> fn) {
+ final ByteArray maxKey = createMaxKey();
+ final KeyValueIterator<ByteArray, byte[]> kvIter = store.range(getEncodedStoreKey(), maxKey);
+ final List<Entry<ByteArray, byte[]>> iterable = ImmutableList.copyOf(kvIter);
kvIter.close();
return new Iterable<OutputT>() {
@Override
public Iterator<OutputT> iterator() {
- final Iterator<Entry<byte[], byte[]>> iter = iterable.iterator();
+ final Iterator<Entry<ByteArray, byte[]>> iter = iterable.iterator();
return new Iterator<OutputT>() {
@Override
@@ -733,41 +743,42 @@
@Override
public void clear() {
- final byte[] maxKey = createMaxKey();
- final KeyValueIterator<byte[], byte[]> kvIter = store.range(getEncodedStoreKey(), maxKey);
+ final ByteArray maxKey = createMaxKey();
+ final KeyValueIterator<ByteArray, byte[]> kvIter = store.range(getEncodedStoreKey(), maxKey);
while (kvIter.hasNext()) {
store.delete(kvIter.next().getKey());
}
kvIter.close();
}
- private byte[] encodeKey(KeyT key) {
+ private ByteArray encodeKey(KeyT key) {
try {
final ByteArrayOutputStream baos = getThreadLocalBaos();
- baos.write(getEncodedStoreKey());
+ baos.write(getEncodedStoreKeyBytes());
keyCoder.encode(key, baos);
- return baos.toByteArray();
+ return ByteArray.of(baos.toByteArray());
} catch (Exception e) {
throw new RuntimeException(e);
}
}
- private KeyT decodeKey(byte[] keyBytes) {
+ private KeyT decodeKey(ByteArray keyBytes) {
try {
- final byte[] realKey = Arrays.copyOfRange(keyBytes, storeKeySize, keyBytes.length);
+ final byte[] realKey =
+ Arrays.copyOfRange(keyBytes.value, storeKeySize, keyBytes.value.length);
return keyCoder.decode(new ByteArrayInputStream(realKey));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
- private byte[] createMaxKey() {
+ private ByteArray createMaxKey() {
byte[] maxKey = new byte[maxKeySize];
Arrays.fill(maxKey, (byte) 0xff);
- final byte[] encodedKey = getEncodedStoreKey();
+ final byte[] encodedKey = getEncodedStoreKeyBytes();
System.arraycopy(encodedKey, 0, maxKey, 0, encodedKey.length);
- return maxKey;
+ return ByteArray.of(maxKey);
}
@Override
@@ -893,4 +904,64 @@
clearInternal();
}
}
+
+ /** Wrapper of byte[] so it can used as key in the KeyValueStore for caching. */
+ public static class ByteArray implements Serializable, Comparable<ByteArray> {
+
+ private final byte[] value;
+
+ public static ByteArray of(byte[] value) {
+ return new ByteArray(value);
+ }
+
+ private ByteArray(byte[] value) {
+ this.value = value;
+ }
+
+ public byte[] getValue() {
+ return value;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ ByteArray byteArray = (ByteArray) o;
+ return Arrays.equals(value, byteArray.value);
+ }
+
+ @Override
+ public int hashCode() {
+ return value != null ? Arrays.hashCode(value) : 0;
+ }
+
+ @Override
+ public int compareTo(ByteArray other) {
+ return UnsignedBytes.lexicographicalComparator().compare(value, other.value);
+ }
+ }
+
+ /** Factory class to provide {@link ByteArraySerde}. */
+ public static class ByteArraySerdeFactory implements SerdeFactory<ByteArray> {
+
+ @Override
+ public Serde<ByteArray> getSerde(String name, Config config) {
+ return new ByteArraySerde();
+ }
+
+ /** Serde for {@link ByteArray}. */
+ public static class ByteArraySerde implements Serde<ByteArray> {
+
+ @Override
+ public byte[] toBytes(ByteArray byteArray) {
+ return byteArray.value;
+ }
+
+ @Override
+ public ByteArray fromBytes(byte[] bytes) {
+ return ByteArray.of(bytes);
+ }
+ }
+ }
}
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java
index 676129d..9ac082b 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java
@@ -184,8 +184,14 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
- setTimer(TimerData.of(timerId, namespace, target, timeDomain));
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
+ setTimer(
+ TimerData.of(timerId, timerFamilyId, namespace, target, outputTimestamp, timeDomain));
}
@Override
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java
index accdd12..b3ee221 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java
@@ -18,27 +18,30 @@
package org.apache.beam.runners.samza.translation;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+import static org.apache.samza.config.JobConfig.JOB_ID;
+import static org.apache.samza.config.JobConfig.JOB_NAME;
+import static org.apache.samza.config.TaskConfig.COMMIT_MS;
+import static org.apache.samza.config.TaskConfig.GROUPER_FACTORY;
import java.io.File;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils;
import org.apache.beam.runners.core.construction.SerializablePipelineOptions;
import org.apache.beam.runners.core.serialization.Base64Serializer;
import org.apache.beam.runners.samza.SamzaExecutionEnvironment;
import org.apache.beam.runners.samza.SamzaPipelineOptions;
import org.apache.beam.runners.samza.container.BeamContainerRunner;
+import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
-import org.apache.commons.lang3.StringUtils;
import org.apache.samza.config.ApplicationConfig;
import org.apache.samza.config.Config;
import org.apache.samza.config.ConfigFactory;
-import org.apache.samza.config.JobConfig;
import org.apache.samza.config.JobCoordinatorConfig;
import org.apache.samza.config.MapConfig;
-import org.apache.samza.config.TaskConfig;
import org.apache.samza.config.ZkConfig;
import org.apache.samza.config.factories.PropertiesConfigFactory;
import org.apache.samza.container.grouper.task.SingleContainerGrouperFactory;
@@ -84,8 +87,8 @@
config.put(ApplicationConfig.APP_NAME, options.getJobName());
config.put(ApplicationConfig.APP_ID, options.getJobInstance());
- config.put(JobConfig.JOB_NAME(), options.getJobName());
- config.put(JobConfig.JOB_ID(), options.getJobInstance());
+ config.put(JOB_NAME, options.getJobName());
+ config.put(JOB_ID, options.getJobInstance());
config.put(
"beamPipelineOptions",
@@ -196,8 +199,8 @@
.put(
JobCoordinatorConfig.JOB_COORDINATOR_FACTORY,
PassthroughJobCoordinatorFactory.class.getName())
- .put(TaskConfig.GROUPER_FACTORY(), SingleContainerGrouperFactory.class.getName())
- .put(TaskConfig.COMMIT_MS(), "-1")
+ .put(GROUPER_FACTORY, SingleContainerGrouperFactory.class.getName())
+ .put(COMMIT_MS, "-1")
.put("processor.id", "1")
.put(
// TODO: remove after SAMZA-1531 is resolved
@@ -231,9 +234,12 @@
.put(
"stores.beamStore.factory",
"org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory")
- .put("stores.beamStore.key.serde", "byteSerde")
+ .put("stores.beamStore.key.serde", "byteArraySerde")
.put("stores.beamStore.msg.serde", "byteSerde")
- .put("serializers.registry.byteSerde.class", ByteSerdeFactory.class.getName());
+ .put("serializers.registry.byteSerde.class", ByteSerdeFactory.class.getName())
+ .put(
+ "serializers.registry.byteArraySerde.class",
+ SamzaStoreStateInternals.ByteArraySerdeFactory.class.getName());
if (options.getStateDurable()) {
LOG.info("stateDurable is enabled");
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java
index 6550ebf..805af2c 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java
@@ -310,7 +310,7 @@
config.put(
"stores." + storeId + ".factory",
"org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory");
- config.put("stores." + storeId + ".key.serde", "byteSerde");
+ config.put("stores." + storeId + ".key.serde", "byteArraySerde");
config.put("stores." + storeId + ".msg.serde", "byteSerde");
if (options.getStateDurable()) {
diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java
index 27d8ba8..0f536c3 100644
--- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java
+++ b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java
@@ -19,21 +19,26 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.anyString;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import org.apache.beam.runners.core.StateNamespace;
import org.apache.beam.runners.core.StateNamespaces;
import org.apache.beam.runners.core.TimerInternals;
import org.apache.beam.runners.samza.SamzaPipelineOptions;
+import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray;
+import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArraySerdeFactory;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.state.TimeDomain;
@@ -44,9 +49,13 @@
import org.apache.samza.context.TaskContext;
import org.apache.samza.metrics.MetricsRegistryMap;
import org.apache.samza.operators.Scheduler;
+import org.apache.samza.serializers.ByteSerde;
+import org.apache.samza.serializers.Serde;
import org.apache.samza.storage.kv.KeyValueStore;
import org.apache.samza.storage.kv.KeyValueStoreMetrics;
import org.apache.samza.storage.kv.RocksDbKeyValueStore;
+import org.apache.samza.storage.kv.SerializedKeyValueStore;
+import org.apache.samza.storage.kv.SerializedKeyValueStoreMetrics;
import org.joda.time.Instant;
import org.junit.Test;
import org.rocksdb.FlushOptions;
@@ -58,23 +67,30 @@
* timers.
*/
public class SamzaTimerInternalsFactoryTest {
- private static RocksDbKeyValueStore createStore(String name) {
+ private static KeyValueStore<ByteArray, byte[]> createStore(String name) {
final Options options = new Options();
options.setCreateIfMissing(true);
- return new RocksDbKeyValueStore(
- new File(System.getProperty("java.io.tmpdir") + "/" + name),
- options,
- new MapConfig(),
- false,
- "beamStore",
- new WriteOptions(),
- new FlushOptions(),
- new KeyValueStoreMetrics("beamStore", new MetricsRegistryMap()));
+ RocksDbKeyValueStore rocksStore =
+ new RocksDbKeyValueStore(
+ new File(System.getProperty("java.io.tmpdir") + "/" + name),
+ options,
+ new MapConfig(),
+ false,
+ "beamStore",
+ new WriteOptions(),
+ new FlushOptions(),
+ new KeyValueStoreMetrics("beamStore", new MetricsRegistryMap()));
+
+ return new SerializedKeyValueStore<>(
+ rocksStore,
+ new ByteArraySerdeFactory.ByteArraySerde(),
+ new ByteSerde(),
+ new SerializedKeyValueStoreMetrics("beamStore", new MetricsRegistryMap()));
}
private static SamzaStoreStateInternals.Factory<?> createNonKeyedStateInternalsFactory(
- SamzaPipelineOptions pipelineOptions, RocksDbKeyValueStore store) {
+ SamzaPipelineOptions pipelineOptions, KeyValueStore<ByteArray, byte[]> store) {
final TaskContext context = mock(TaskContext.class);
when(context.getStore(anyString())).thenReturn((KeyValueStore) store);
final TupleTag<?> mainOutputTag = new TupleTag<>("output");
@@ -87,7 +103,7 @@
Scheduler<KeyedTimerData<String>> timerRegistry,
String timerStateId,
SamzaPipelineOptions pipelineOptions,
- RocksDbKeyValueStore store) {
+ KeyValueStore<ByteArray, byte[]> store) {
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory =
createNonKeyedStateInternalsFactory(pipelineOptions, store);
@@ -121,7 +137,7 @@
final SamzaPipelineOptions pipelineOptions =
PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
- final RocksDbKeyValueStore store = createStore("store1");
+ final KeyValueStore<ByteArray, byte[]> store = createStore("store1");
final SamzaTimerInternalsFactory<String> timerInternalsFactory =
createTimerInternalsFactory(null, "timer", pipelineOptions, store);
@@ -157,7 +173,7 @@
final SamzaPipelineOptions pipelineOptions =
PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
- RocksDbKeyValueStore store = createStore("store2");
+ KeyValueStore<ByteArray, byte[]> store = createStore("store2");
final SamzaTimerInternalsFactory<String> timerInternalsFactory =
createTimerInternalsFactory(null, "timer", pipelineOptions, store);
@@ -200,7 +216,7 @@
final SamzaPipelineOptions pipelineOptions =
PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
- RocksDbKeyValueStore store = createStore("store3");
+ KeyValueStore<ByteArray, byte[]> store = createStore("store3");
TestTimerRegistry timerRegistry = new TestTimerRegistry();
final SamzaTimerInternalsFactory<String> timerInternalsFactory =
@@ -244,7 +260,7 @@
final SamzaPipelineOptions pipelineOptions =
PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
- RocksDbKeyValueStore store = createStore("store4");
+ KeyValueStore<ByteArray, byte[]> store = createStore("store4");
final SamzaTimerInternalsFactory<String> timerInternalsFactory =
createTimerInternalsFactory(null, "timer", pipelineOptions, store);
@@ -278,4 +294,21 @@
store.close();
}
+
+ @Test
+ public void testByteArray() {
+ ByteArray key1 = ByteArray.of("hello world".getBytes(StandardCharsets.UTF_8));
+ Serde<ByteArray> serde = new ByteArraySerdeFactory().getSerde("", null);
+ byte[] keyBytes = serde.toBytes(key1);
+ ByteArray key2 = serde.fromBytes(keyBytes);
+ assertEquals(key1, key2);
+
+ Map<ByteArray, String> map = new HashMap<>();
+ map.put(key1, "found it");
+ assertEquals("found it", map.get(key2));
+
+ map.remove(key1);
+ assertTrue(!map.containsKey(key2));
+ assertTrue(map.isEmpty());
+ }
}
diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java
index f71585d..c4b910a 100644
--- a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java
+++ b/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java
@@ -73,7 +73,7 @@
assertEquals(
RocksDbKeyValueStorageEngineFactory.class.getName(),
config.get("stores.beamStore.factory"));
- assertEquals("byteSerde", config.get("stores.beamStore.key.serde"));
+ assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde"));
assertEquals("byteSerde", config.get("stores.beamStore.msg.serde"));
assertNull(config.get("stores.beamStore.changelog"));
@@ -203,7 +203,7 @@
assertEquals(
RocksDbKeyValueStorageEngineFactory.class.getName(),
config.get("stores.testState.factory"));
- assertEquals("byteSerde", config.get("stores.testState.key.serde"));
+ assertEquals("byteArraySerde", config.get("stores.testState.key.serde"));
assertEquals("byteSerde", config.get("stores.testState.msg.serde"));
assertNull(config.get("stores.testState.changelog"));
diff --git a/runners/spark/build.gradle b/runners/spark/build.gradle
index 80130bc7..1d70ad1 100644
--- a/runners/spark/build.gradle
+++ b/runners/spark/build.gradle
@@ -70,8 +70,7 @@
provided library.java.spark_streaming
provided library.java.spark_network_common
provided library.java.hadoop_common
- provided library.java.commons_lang3
- provided library.java.commons_io_2x
+ provided library.java.commons_io
provided library.java.hamcrest_core
provided library.java.hamcrest_library
provided "com.esotericsoftware.kryo:kryo:2.21"
@@ -202,6 +201,7 @@
// Portability
excludeCategories 'org.apache.beam.sdk.testing.UsesImpulse'
excludeCategories 'org.apache.beam.sdk.testing.UsesCrossLanguageTransforms'
+ excludeCategories 'org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders'
}
filter {
// Combine with context not implemented
diff --git a/runners/spark/job-server/build.gradle b/runners/spark/job-server/build.gradle
index 6fb7581..30dc24d 100644
--- a/runners/spark/job-server/build.gradle
+++ b/runners/spark/job-server/build.gradle
@@ -38,7 +38,7 @@
def sparkRunnerProject = project.parent.path
-description = project(sparkRunnerProject).description + " :: Job Server"
+description = "Apache Beam :: Runners :: Spark :: Job Server"
configurations {
validatesPortableRunner
@@ -124,3 +124,32 @@
task validatesPortableRunner() {
dependsOn validatesPortableRunnerBatch
}
+
+def addTestJavaJarCreator(String pyVersion) {
+ def pyBuildPath = pyVersion.startsWith("2") ? "2" : pyVersion.replaceAll("\\.", "")
+ project.tasks.create(name: "testJavaJarCreatorPy${pyBuildPath}") {
+ dependsOn shadowJar
+ dependsOn ":sdks:python:container:py${pyBuildPath}:docker"
+ doLast{
+ exec {
+ executable "sh"
+ def options = [
+ "--spark_job_server_jar ${shadowJar.archivePath}",
+ "--env_dir ${project.rootProject.buildDir}/gradleenv/${project.path.hashCode()}",
+ "--python_root_dir ${project.rootDir}/sdks/python",
+ "--python_version ${pyVersion}",
+ "--python_container_image apachebeam/python${pyVersion}_sdk:${project['python_sdk_version']}",
+ ]
+ args "-c", "./test_spark_pipeline_jar.sh ${options.join(' ')}"
+ }
+ }
+ }
+}
+
+["2.7", "3.5", "3.6", "3.7"].each { pyVersion ->
+ addTestJavaJarCreator(pyVersion)
+}
+
+task testPipelineJar() {
+ dependsOn testJavaJarCreatorPy37
+}
diff --git a/runners/spark/job-server/container/Dockerfile b/runners/spark/job-server/container/Dockerfile
new file mode 100644
index 0000000..11a5f0e
--- /dev/null
+++ b/runners/spark/job-server/container/Dockerfile
@@ -0,0 +1,28 @@
+###############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+
+FROM openjdk:8
+MAINTAINER "Apache Beam <dev@beam.apache.org>"
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y libltdl7
+
+ADD beam-runners-spark-job-server.jar /opt/apache/beam/jars/
+ADD spark-job-server.sh /opt/apache/beam/
+
+WORKDIR /opt/apache/beam
+ENTRYPOINT ["./spark-job-server.sh"]
diff --git a/runners/spark/job-server/container/build.gradle b/runners/spark/job-server/container/build.gradle
new file mode 100644
index 0000000..c7244a3
--- /dev/null
+++ b/runners/spark/job-server/container/build.gradle
@@ -0,0 +1,64 @@
+import org.apache.beam.gradle.BeamModulePlugin
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Build a Docker image to bootstrap SparkJobServerDriver which requires a Java environment.
+ * Alternatively, it can also be bootstrapped through the runShadow goal
+ * or by directly running the generated JAR file.
+ */
+
+apply plugin: 'org.apache.beam.module'
+apply plugin: 'base'
+applyDockerNature()
+
+def sparkJobServerProject = project.parent.path
+
+description = "Apache Beam :: Runners :: Spark :: Job Server :: Container"
+
+configurations {
+ dockerDependency
+}
+
+dependencies {
+ dockerDependency project(path: sparkJobServerProject, configuration: "shadow")
+}
+
+task copyDockerfileDependencies(type: Copy) {
+ // Required Jars
+ from configurations.dockerDependency
+ rename 'beam-runners-spark-job-server.*.jar', 'beam-runners-spark-job-server.jar'
+ into "build"
+ // Entry script
+ from "spark-job-server.sh"
+ into "build"
+ // Dockerfile
+ from "Dockerfile"
+ into "build"
+}
+
+docker {
+ name containerImageName(name: 'spark-job-server',
+ root: project.rootProject.hasProperty(["docker-repository-root"]) ?
+ project.rootProject["docker-repository-root"] : "apachebeam")
+ files "./build/"
+}
+
+// Ensure that we build the required resources and copy and file dependencies from related projects
+dockerPrepare.dependsOn copyDockerfileDependencies
diff --git a/runners/spark/job-server/container/spark-job-server.sh b/runners/spark/job-server/container/spark-job-server.sh
new file mode 100755
index 0000000..e39c5ae
--- /dev/null
+++ b/runners/spark/job-server/container/spark-job-server.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+###############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+
+### Just a simple script to bootstrap the SparkJobServerDriver
+### For the environment, see the Dockerfile
+
+# The following (forking to the background, then waiting) enables to use CTRL+C to kill the container.
+# We're PID 1 which doesn't handle signals. By forking the Java process to the background,
+# a PID > 1 is created which handles signals. After the command shuts down, the script and
+# thus the container will also exit.
+
+java -cp "jars/*" org.apache.beam.runners.spark.SparkJobServerDriver "$@" &
+wait
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkJobInvoker.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkJobInvoker.java
index 3e01f6d..2ea261f 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkJobInvoker.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkJobInvoker.java
@@ -23,8 +23,12 @@
import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
import org.apache.beam.runners.fnexecution.jobsubmission.JobInvocation;
import org.apache.beam.runners.fnexecution.jobsubmission.JobInvoker;
+import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineJarCreator;
+import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineRunner;
import org.apache.beam.runners.fnexecution.provisioning.JobInfo;
+import org.apache.beam.sdk.options.PortablePipelineOptions;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.Struct;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ListeningExecutorService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -88,7 +92,13 @@
sparkOptions.getJobName(),
retrievalToken,
PipelineOptionsTranslation.toProto(sparkOptions));
- SparkPipelineRunner pipelineRunner = new SparkPipelineRunner(sparkOptions);
+ PortablePipelineRunner pipelineRunner;
+ if (Strings.isNullOrEmpty(
+ sparkOptions.as(PortablePipelineOptions.class).getOutputExecutablePath())) {
+ pipelineRunner = new SparkPipelineRunner(sparkOptions);
+ } else {
+ pipelineRunner = new PortablePipelineJarCreator(SparkPipelineRunner.class);
+ }
return new JobInvocation(jobInfo, executorService, pipeline, pipelineRunner);
}
}
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
index b85a71e..0b91c4f 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
@@ -21,6 +21,7 @@
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils;
import org.apache.beam.runners.spark.translation.EvaluationContext;
import org.apache.beam.runners.spark.translation.SparkPipelineTranslator;
import org.apache.beam.runners.spark.translation.TransformEvaluator;
@@ -32,7 +33,6 @@
import org.apache.beam.sdk.values.POutput;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
-import org.apache.commons.lang3.StringUtils;
/**
* Pipeline visitor for translating a Beam pipeline into equivalent Spark operations. Used for
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptions.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptions.java
index 100227c..544bec6 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptions.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptions.java
@@ -20,7 +20,7 @@
import java.io.File;
import java.util.List;
import java.util.stream.Collectors;
-import org.apache.beam.runners.core.construction.PipelineResources;
+import org.apache.beam.runners.core.construction.resources.PipelineResources;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.Description;
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java
index 1d3f92d..d0c1c0f 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java
@@ -17,21 +17,24 @@
*/
package org.apache.beam.runners.spark;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
import static org.apache.beam.runners.spark.SparkPipelineOptions.prepareFilesToStage;
+import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline;
import org.apache.beam.runners.core.construction.PTransformTranslation;
+import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
import org.apache.beam.runners.core.construction.graph.ExecutableStage;
import org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser;
import org.apache.beam.runners.core.construction.graph.PipelineTrimmer;
import org.apache.beam.runners.core.construction.graph.ProtoOverrides;
import org.apache.beam.runners.core.construction.graph.SplittableParDoExpander;
import org.apache.beam.runners.core.metrics.MetricsPusher;
+import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineJarUtils;
import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineResult;
import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineRunner;
import org.apache.beam.runners.fnexecution.provisioning.JobInfo;
@@ -40,9 +43,18 @@
import org.apache.beam.runners.spark.translation.SparkBatchPortablePipelineTranslator;
import org.apache.beam.runners.spark.translation.SparkContextFactory;
import org.apache.beam.runners.spark.translation.SparkTranslationContext;
+import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.metrics.MetricsEnvironment;
import org.apache.beam.sdk.metrics.MetricsOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.PortablePipelineOptions;
+import org.apache.beam.sdk.options.PortablePipelineOptions.RetrievalServiceType;
+import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.Struct;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions;
import org.apache.spark.api.java.JavaSparkContext;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -82,7 +94,8 @@
if (pipelineOptions.getFilesToStage() == null) {
pipelineOptions.setFilesToStage(
- detectClassPathResourcesToStage(SparkPipelineRunner.class.getClassLoader()));
+ detectClassPathResourcesToStage(
+ SparkPipelineRunner.class.getClassLoader(), pipelineOptions));
LOG.info(
"PipelineOptions.filesToStage was not specified. Defaulting to files from the classpath");
}
@@ -127,4 +140,74 @@
executorService.shutdown();
return result;
}
+
+ /**
+ * Main method to be called only as the entry point to an executable jar with structure as defined
+ * in {@link PortablePipelineJarUtils}.
+ */
+ public static void main(String[] args) throws Exception {
+ // Register standard file systems.
+ FileSystems.setDefaultPipelineOptions(PipelineOptionsFactory.create());
+
+ SparkPipelineRunnerConfiguration configuration = parseArgs(args);
+ String baseJobName =
+ configuration.baseJobName == null
+ ? PortablePipelineJarUtils.getDefaultJobName()
+ : configuration.baseJobName;
+ Preconditions.checkArgument(
+ baseJobName != null,
+ "No default job name found. Job name must be set using --base-job-name.");
+ Pipeline pipeline = PortablePipelineJarUtils.getPipelineFromClasspath(baseJobName);
+ Struct originalOptions = PortablePipelineJarUtils.getPipelineOptionsFromClasspath(baseJobName);
+
+ // Spark pipeline jars distribute and retrieve artifacts via the classpath.
+ PortablePipelineOptions portablePipelineOptions =
+ PipelineOptionsTranslation.fromProto(originalOptions).as(PortablePipelineOptions.class);
+ portablePipelineOptions.setRetrievalServiceType(RetrievalServiceType.CLASSLOADER);
+ String retrievalToken = PortablePipelineJarUtils.getArtifactManifestUri(baseJobName);
+
+ SparkPipelineOptions sparkOptions = portablePipelineOptions.as(SparkPipelineOptions.class);
+ String invocationId =
+ String.format("%s_%s", sparkOptions.getJobName(), UUID.randomUUID().toString());
+ if (sparkOptions.getAppName() == null) {
+ LOG.debug("App name was null. Using invocationId {}", invocationId);
+ sparkOptions.setAppName(invocationId);
+ }
+
+ SparkPipelineRunner runner = new SparkPipelineRunner(sparkOptions);
+ JobInfo jobInfo =
+ JobInfo.create(
+ invocationId,
+ sparkOptions.getJobName(),
+ retrievalToken,
+ PipelineOptionsTranslation.toProto(sparkOptions));
+ try {
+ runner.run(pipeline, jobInfo);
+ } catch (Exception e) {
+ throw new RuntimeException(String.format("Job %s failed.", invocationId), e);
+ }
+ LOG.info("Job {} finished successfully.", invocationId);
+ }
+
+ private static class SparkPipelineRunnerConfiguration {
+ @Option(
+ name = "--base-job-name",
+ usage =
+ "The job to run. This must correspond to a subdirectory of the jar's BEAM-PIPELINE "
+ + "directory. *Only needs to be specified if the jar contains multiple pipelines.*")
+ private String baseJobName = null;
+ }
+
+ private static SparkPipelineRunnerConfiguration parseArgs(String[] args) {
+ SparkPipelineRunnerConfiguration configuration = new SparkPipelineRunnerConfiguration();
+ CmdLineParser parser = new CmdLineParser(configuration);
+ try {
+ parser.parseArgument(args);
+ } catch (CmdLineException e) {
+ LOG.error("Unable to parse command line arguments.", e);
+ parser.printUsage(System.err);
+ throw new IllegalArgumentException("Unable to parse command line arguments.", e);
+ }
+ return configuration;
+ }
}
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
index c4f17f8..d99aef0 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
@@ -17,7 +17,7 @@
*/
package org.apache.beam.runners.spark;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
import static org.apache.beam.runners.spark.SparkPipelineOptions.prepareFilesToStage;
import java.util.Collection;
@@ -128,7 +128,7 @@
if (sparkOptions.getFilesToStage() == null) {
sparkOptions.setFilesToStage(
- detectClassPathResourcesToStage(SparkRunner.class.getClassLoader()));
+ detectClassPathResourcesToStage(SparkRunner.class.getClassLoader(), options));
LOG.info(
"PipelineOptions.filesToStage was not specified. "
+ "Defaulting to files from the classpath: will stage {} files. "
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java
index a18b92d..2d83c3c 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java
@@ -37,7 +37,7 @@
* For resilience, {@link AccumulatorV2 Accumulators} are required to be wrapped in a Singleton.
*
* @see <a
- * href="https://spark.apache.org/docs/latest/api/java/org/apache/spark/util/AccumulatorV2.html">accumulatorsV2</a>
+ * href="https://spark.apache.org/docs/2.4.4/streaming-programming-guide.html#accumulators-broadcast-variables-and-checkpoints">accumulatorsV2</a>
*/
public class MetricsAccumulator {
private static final Logger LOG = LoggerFactory.getLogger(MetricsAccumulator.class);
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkGroupAlsoByWindowViaWindowSet.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkGroupAlsoByWindowViaWindowSet.java
index f8ff5e6..b741050 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkGroupAlsoByWindowViaWindowSet.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkGroupAlsoByWindowViaWindowSet.java
@@ -368,7 +368,7 @@
private final FullWindowedValueCoder<InputT> wvCoder;
private final Coder<K> keyCoder;
private final List<Integer> sourceIds;
- private final TimerInternals.TimerDataCoder timerDataCoder;
+ private final TimerInternals.TimerDataCoderV2 timerDataCoder;
private final WindowingStrategy<?, W> windowingStrategy;
private final SerializablePipelineOptions options;
private final IterableCoder<WindowedValue<InputT>> itrWvCoder;
@@ -461,9 +461,9 @@
return FullWindowedValueCoder.of(KvCoder.of(keyCoder, IterableCoder.of(iCoder)), wCoder);
}
- private static <W extends BoundedWindow> TimerInternals.TimerDataCoder timerDataCoderOf(
+ private static <W extends BoundedWindow> TimerInternals.TimerDataCoderV2 timerDataCoderOf(
final WindowingStrategy<?, W> windowingStrategy) {
- return TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
+ return TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder());
}
private static void checkpointIfNeeded(
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkTimerInternals.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkTimerInternals.java
index 6cdcef4..1d5b36b 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkTimerInternals.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/stateful/SparkTimerInternals.java
@@ -155,7 +155,12 @@
@Override
public void setTimer(
- StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
+ StateNamespace namespace,
+ String timerId,
+ String timerFamilyId,
+ Instant target,
+ Instant outputTimestamp,
+ TimeDomain timeDomain) {
throw new UnsupportedOperationException("Setting a timer by ID not yet supported.");
}
@@ -165,12 +170,12 @@
}
public static Collection<byte[]> serializeTimers(
- Collection<TimerData> timers, TimerDataCoder timerDataCoder) {
+ Collection<TimerData> timers, TimerDataCoderV2 timerDataCoder) {
return CoderHelpers.toByteArrays(timers, timerDataCoder);
}
public static Iterator<TimerData> deserializeTimers(
- Collection<byte[]> serTimers, TimerDataCoder timerDataCoder) {
+ Collection<byte[]> serTimers, TimerDataCoderV2 timerDataCoder) {
return CoderHelpers.fromByteArrays(serTimers, timerDataCoder).iterator();
}
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java
index 10b95d1..00ed246 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java
@@ -17,7 +17,7 @@
*/
package org.apache.beam.runners.spark.structuredstreaming;
-import static org.apache.beam.runners.core.construction.PipelineResources.detectClassPathResourcesToStage;
+import static org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -111,7 +111,8 @@
if (sparkOptions.getFilesToStage() == null) {
sparkOptions.setFilesToStage(
- detectClassPathResourcesToStage(SparkStructuredStreamingRunner.class.getClassLoader()));
+ detectClassPathResourcesToStage(
+ SparkStructuredStreamingRunner.class.getClassLoader(), options));
LOG.info(
"PipelineOptions.filesToStage was not specified. "
+ "Defaulting to files from the classpath: will stage {} files. "
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java
index ac44bf8..f674ef9 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java
@@ -28,7 +28,7 @@
* For resilience, {@link AccumulatorV2 Accumulators} are required to be wrapped in a Singleton.
*
* @see <a
- * href="https://spark.apache.org/docs/latest/api/java/org/apache/spark/util/AccumulatorV2.html">accumulatorsV2</a>
+ * href="https://spark.apache.org/docs/2.4.4/streaming-programming-guide.html#accumulators-broadcast-variables-and-checkpoints">accumulatorsV2</a>
*/
public class MetricsAccumulator {
private static final Logger LOG = LoggerFactory.getLogger(MetricsAccumulator.class);
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java
index b623e55..7aab4e1 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java
@@ -18,7 +18,7 @@
package org.apache.beam.runners.spark.structuredstreaming.translation;
import org.apache.beam.runners.core.construction.PTransformTranslation;
-import org.apache.beam.runners.core.construction.PipelineResources;
+import org.apache.beam.runners.core.construction.resources.PipelineResources;
import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions;
import org.apache.beam.runners.spark.structuredstreaming.translation.batch.PipelineTranslatorBatch;
import org.apache.beam.runners.spark.structuredstreaming.translation.streaming.PipelineTranslatorStreaming;
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SchemaHelpers.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SchemaHelpers.java
index 13fbfb8..b778c46 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SchemaHelpers.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SchemaHelpers.java
@@ -24,14 +24,16 @@
/** A {@link SchemaHelpers} for the Spark Batch Runner. */
public class SchemaHelpers {
+ private static final StructType BINARY_SCHEMA =
+ new StructType(
+ new StructField[] {
+ StructField.apply("binaryStructField", DataTypes.BinaryType, true, Metadata.empty())
+ });
+
public static StructType binarySchema() {
// we use a binary schema for now because:
// using a empty schema raises a indexOutOfBoundsException
// using a NullType schema stores null in the elements
- StructField[] array = new StructField[1];
- StructField binaryStructField =
- StructField.apply("binaryStructField", DataTypes.BinaryType, true, Metadata.empty());
- array[0] = binaryStructField;
- return new StructType(array);
+ return BINARY_SCHEMA;
}
}
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java
index 46dc282..55d97ba 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java
@@ -71,11 +71,13 @@
@Override
public void onTimer(
final String timerId,
+ final String timerFamilyId,
final BoundedWindow window,
final Instant timestamp,
+ final Instant outputTimestamp,
final TimeDomain timeDomain) {
try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer())) {
- delegate.onTimer(timerId, window, timestamp, timeDomain);
+ delegate.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
} catch (IOException e) {
throw new RuntimeException(e);
}
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpers.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpers.java
index 3310546..b77f94c 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpers.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpers.java
@@ -19,11 +19,12 @@
import static org.apache.spark.sql.types.DataTypes.BinaryType;
-import java.io.ByteArrayInputStream;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Objects;
+import javax.annotation.Nullable;
import org.apache.beam.runners.spark.structuredstreaming.translation.SchemaHelpers;
import org.apache.beam.sdk.coders.Coder;
import org.apache.spark.sql.Encoder;
@@ -48,29 +49,23 @@
/** {@link Encoders} utility class. */
public class EncoderHelpers {
-
- /*
- --------- Bridges from Beam Coders to Spark Encoders
- */
-
/**
* Wrap a Beam coder into a Spark Encoder using Catalyst Expression Encoders (which uses java code
* generation).
*/
- public static <T> Encoder<T> fromBeamCoder(Coder<T> beamCoder) {
+ public static <T> Encoder<T> fromBeamCoder(Coder<T> coder) {
+ Class<? super T> clazz = coder.getEncodedTypeDescriptor().getRawType();
+ ClassTag<T> classTag = ClassTag$.MODULE$.apply(clazz);
+ List<Expression> serializers =
+ Collections.singletonList(
+ new EncodeUsingBeamCoder<>(new BoundReference(0, new ObjectType(clazz), true), coder));
- List<Expression> serialiserList = new ArrayList<>();
- Class<? super T> claz = beamCoder.getEncodedTypeDescriptor().getRawType();
-
- serialiserList.add(
- new EncodeUsingBeamCoder<>(new BoundReference(0, new ObjectType(claz), true), beamCoder));
- ClassTag<T> classTag = ClassTag$.MODULE$.apply(claz);
return new ExpressionEncoder<>(
SchemaHelpers.binarySchema(),
false,
- JavaConversions.collectionAsScalaIterable(serialiserList).toSeq(),
+ JavaConversions.collectionAsScalaIterable(serializers).toSeq(),
new DecodeUsingBeamCoder<>(
- new Cast(new GetColumnByOrdinal(0, BinaryType), BinaryType), classTag, beamCoder),
+ new Cast(new GetColumnByOrdinal(0, BinaryType), BinaryType), classTag, coder),
classTag);
}
@@ -82,12 +77,12 @@
public static class EncodeUsingBeamCoder<T> extends UnaryExpression
implements NonSQLExpression, Serializable {
- private Expression child;
- private Coder<T> beamCoder;
+ private final Expression child;
+ private final Coder<T> coder;
- public EncodeUsingBeamCoder(Expression child, Coder<T> beamCoder) {
+ public EncodeUsingBeamCoder(Expression child, Coder<T> coder) {
this.child = child;
- this.beamCoder = beamCoder;
+ this.coder = coder;
}
@Override
@@ -97,48 +92,31 @@
@Override
public ExprCode doGenCode(CodegenContext ctx, ExprCode ev) {
- // Code to serialize.
- String accessCode =
- ctx.addReferenceObj("beamCoder", beamCoder, beamCoder.getClass().getName());
+ String accessCode = ctx.addReferenceObj("coder", coder, coder.getClass().getName());
ExprCode input = child.genCode(ctx);
+ String javaType = CodeGenerator.javaType(dataType());
+ List<String> parts = new ArrayList<>();
+ List<Object> args = new ArrayList<>();
/*
CODE GENERATED
- byte[] ${ev.value};
- try {
- java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream();
- if ({input.isNull})
- ${ev.value} = null;
- else{
- $beamCoder.encode(${input.value}, baos);
- ${ev.value} = baos.toByteArray();
- }
- } catch (Exception e) {
- throw org.apache.beam.sdk.util.UserCodeException.wrap(e);
- }
+ final ${javaType} ${ev.value} = org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers.EncodeUsingBeamCoder.encode(${input.value}, ${coder});
*/
- List<String> parts = new ArrayList<>();
- parts.add("byte[] ");
+ parts.add("final ");
+ args.add(javaType);
+ parts.add(" ");
+ args.add(ev.value());
parts.add(
- ";try { java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream(); if (");
- parts.add(") ");
- parts.add(" = null; else{");
- parts.add(".encode(");
- parts.add(", baos); ");
- parts.add(
- " = baos.toByteArray();}} catch (Exception e) {throw org.apache.beam.sdk.util.UserCodeException.wrap(e);}");
+ " = org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers.EncodeUsingBeamCoder.encode(");
+ args.add(input.isNull());
+ parts.add(", ");
+ args.add(input.value());
+ parts.add(", ");
+ args.add(accessCode);
+ parts.add(");");
StringContext sc =
new StringContext(JavaConversions.collectionAsScalaIterable(parts).toSeq());
-
- List<Object> args = new ArrayList<>();
-
- args.add(ev.value());
- args.add(input.isNull());
- args.add(ev.value());
- args.add(accessCode);
- args.add(input.value());
- args.add(ev.value());
Block code =
(new Block.BlockHelper(sc)).code(JavaConversions.collectionAsScalaIterable(args).toSeq());
@@ -156,7 +134,7 @@
case 0:
return child;
case 1:
- return beamCoder;
+ return coder;
default:
throw new ArrayIndexOutOfBoundsException("productElement out of bounds");
}
@@ -181,12 +159,20 @@
return false;
}
EncodeUsingBeamCoder<?> that = (EncodeUsingBeamCoder<?>) o;
- return beamCoder.equals(that.beamCoder) && child.equals(that.child);
+ return child.equals(that.child) && coder.equals(that.coder);
}
@Override
public int hashCode() {
- return Objects.hash(super.hashCode(), child, beamCoder);
+ return Objects.hash(super.hashCode(), child, coder);
+ }
+
+ /**
+ * Convert value to byte array (invoked by generated code in {@link #doGenCode(CodegenContext,
+ * ExprCode)}).
+ */
+ public static <T> byte[] encode(boolean isNull, @Nullable T value, Coder<T> coder) {
+ return isNull ? null : CoderHelpers.toByteArray(value, coder);
}
}
@@ -198,14 +184,14 @@
public static class DecodeUsingBeamCoder<T> extends UnaryExpression
implements NonSQLExpression, Serializable {
- private Expression child;
- private ClassTag<T> classTag;
- private Coder<T> beamCoder;
+ private final Expression child;
+ private final ClassTag<T> classTag;
+ private final Coder<T> coder;
- public DecodeUsingBeamCoder(Expression child, ClassTag<T> classTag, Coder<T> beamCoder) {
+ public DecodeUsingBeamCoder(Expression child, ClassTag<T> classTag, Coder<T> coder) {
this.child = child;
this.classTag = classTag;
- this.beamCoder = beamCoder;
+ this.coder = coder;
}
@Override
@@ -215,65 +201,39 @@
@Override
public ExprCode doGenCode(CodegenContext ctx, ExprCode ev) {
- // Code to deserialize.
- String accessCode =
- ctx.addReferenceObj("beamCoder", beamCoder, beamCoder.getClass().getName());
+ String accessCode = ctx.addReferenceObj("coder", coder, coder.getClass().getName());
ExprCode input = child.genCode(ctx);
String javaType = CodeGenerator.javaType(dataType());
- /*
- CODE GENERATED:
- final $javaType ${ev.value}
- try {
- ${ev.value} =
- ${input.isNull} ?
- ${CodeGenerator.defaultValue(dataType)} :
- ($javaType) $beamCoder.decode(new java.io.ByteArrayInputStream(${input.value}));
- } catch (Exception e) {
- throw org.apache.beam.sdk.util.UserCodeException.wrap(e);
- }
- */
-
List<String> parts = new ArrayList<>();
+ List<Object> args = new ArrayList<>();
+ /*
+ CODE GENERATED:
+ final ${javaType} ${ev.value} = (${javaType}) org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers.DecodeUsingBeamCoder.decode(${input.value}, ${coder});
+ */
parts.add("final ");
+ args.add(javaType);
parts.add(" ");
- parts.add(";try { ");
- parts.add(" = ");
- parts.add("? ");
- parts.add(": (");
- parts.add(") ");
- parts.add(".decode(new java.io.ByteArrayInputStream(");
+ args.add(ev.value());
+ parts.add(" = (");
+ args.add(javaType);
parts.add(
- ")); } catch (Exception e) {throw org.apache.beam.sdk.util.UserCodeException.wrap(e);}");
+ ") org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers.DecodeUsingBeamCoder.decode(");
+ args.add(input.isNull());
+ parts.add(", ");
+ args.add(input.value());
+ parts.add(", ");
+ args.add(accessCode);
+ parts.add(");");
StringContext sc =
new StringContext(JavaConversions.collectionAsScalaIterable(parts).toSeq());
-
- List<Object> args = new ArrayList<>();
- args.add(javaType);
- args.add(ev.value());
- args.add(ev.value());
- args.add(input.isNull());
- args.add(CodeGenerator.defaultValue(dataType(), false));
- args.add(javaType);
- args.add(accessCode);
- args.add(input.value());
Block code =
(new Block.BlockHelper(sc)).code(JavaConversions.collectionAsScalaIterable(args).toSeq());
-
return ev.copy(input.code().$plus(code), input.isNull(), ev.value());
}
@Override
- public Object nullSafeEval(Object input) {
- try {
- return beamCoder.decode(new ByteArrayInputStream((byte[]) input));
- } catch (Exception e) {
- throw new IllegalStateException("Error decoding bytes for coder: " + beamCoder, e);
- }
- }
-
- @Override
public DataType dataType() {
return new ObjectType(classTag.runtimeClass());
}
@@ -286,7 +246,7 @@
case 1:
return classTag;
case 2:
- return beamCoder;
+ return coder;
default:
throw new ArrayIndexOutOfBoundsException("productElement out of bounds");
}
@@ -311,14 +271,20 @@
return false;
}
DecodeUsingBeamCoder<?> that = (DecodeUsingBeamCoder<?>) o;
- return child.equals(that.child)
- && classTag.equals(that.classTag)
- && beamCoder.equals(that.beamCoder);
+ return child.equals(that.child) && classTag.equals(that.classTag) && coder.equals(that.coder);
}
@Override
public int hashCode() {
- return Objects.hash(super.hashCode(), child, classTag, beamCoder);
+ return Objects.hash(super.hashCode(), child, classTag, coder);
+ }
+
+ /**
+ * Convert value from byte array (invoked by generated code in {@link #doGenCode(CodegenContext,
+ * ExprCode)}).
+ */
+ public static <T> T decode(boolean isNull, @Nullable byte[] serialized, Coder<T> coder) {
+ return isNull ? null : CoderHelpers.fromByteArray(serialized, coder);
}
}
}
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnRunnerWithMetrics.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnRunnerWithMetrics.java
index 845dc63..013f860 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnRunnerWithMetrics.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnRunnerWithMetrics.java
@@ -71,11 +71,13 @@
@Override
public void onTimer(
final String timerId,
+ final String timerFamilyId,
final BoundedWindow window,
final Instant timestamp,
+ final Instant outputTimestamp,
final TimeDomain timeDomain) {
try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer())) {
- delegate.onTimer(timerId, window, timestamp, timeDomain);
+ delegate.onTimer(timerId, timerFamilyId, window, timestamp, outputTimestamp, timeDomain);
} catch (IOException e) {
throw new RuntimeException(e);
}
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkProcessContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkProcessContext.java
index e978f46..9cbbeda 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkProcessContext.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkProcessContext.java
@@ -161,7 +161,13 @@
StateNamespace namespace = timer.getNamespace();
checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
- doFnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+ doFnRunner.onTimer(
+ timer.getTimerId(),
+ timer.getTimerFamilyId(),
+ window,
+ timer.getTimestamp(),
+ timer.getOutputTimestamp(),
+ timer.getDomain());
}
}
}
diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/utils/EncodersTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpersTest.java
similarity index 79%
rename from runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/utils/EncodersTest.java
rename to runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpersTest.java
index 8327fd8..54db4fa 100644
--- a/runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/utils/EncodersTest.java
+++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/helpers/EncoderHelpersTest.java
@@ -15,13 +15,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.beam.runners.spark.structuredstreaming.utils;
+package org.apache.beam.runners.spark.structuredstreaming.translation.helpers;
import static org.junit.Assert.assertEquals;
-import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
-import org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers;
import org.apache.beam.sdk.coders.VarIntCoder;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
@@ -31,7 +30,7 @@
/** Test of the wrapping of Beam Coders as Spark ExpressionEncoders. */
@RunWith(JUnit4.class)
-public class EncodersTest {
+public class EncoderHelpersTest {
@Test
public void beamCoderToSparkEncoderTest() {
@@ -40,13 +39,9 @@
.appName("beamCoderToSparkEncoderTest")
.master("local[4]")
.getOrCreate();
- List<Integer> data = new ArrayList<>();
- data.add(1);
- data.add(2);
- data.add(3);
+ List<Integer> data = Arrays.asList(1, 2, 3);
Dataset<Integer> dataset =
sparkSession.createDataset(data, EncoderHelpers.fromBeamCoder(VarIntCoder.of()));
- List<Integer> results = dataset.collectAsList();
- assertEquals(data, results);
+ assertEquals(data, dataset.collectAsList());
}
}
diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/SparkExecutableStageFunctionTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/SparkExecutableStageFunctionTest.java
index 5a59fdd..f89f115 100644
--- a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/SparkExecutableStageFunctionTest.java
+++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/SparkExecutableStageFunctionTest.java
@@ -161,6 +161,11 @@
}
@Override
+ public void split(double fractionOfRemainder) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void close() throws Exception {
if (once) {
return;
diff --git a/sdks/go/container/boot.go b/sdks/go/container/boot.go
index bc2f9ab..7b82cd3 100644
--- a/sdks/go/container/boot.go
+++ b/sdks/go/container/boot.go
@@ -122,6 +122,10 @@
"--semi_persist_dir=" + *semiPersistDir,
"--options=" + options,
}
+ if info.GetStatusEndpoint() != nil {
+ args = append(args, "--status_endpoint=" + info.GetStatusEndpoint().GetUrl())
+ }
+
log.Fatalf("User program exited: %v", execx.Execute(prog, args...))
}
diff --git a/sdks/go/gogradle.lock b/sdks/go/gogradle.lock
index 3713503..75eecbe 100644
--- a/sdks/go/gogradle.lock
+++ b/sdks/go/gogradle.lock
@@ -200,7 +200,7 @@
- "git@github.com:golang/protobuf.git"
vcs: "git"
name: "github.com/golang/protobuf"
- commit: "3a3da3a4e26776cc22a79ef46d5d58477532dede"
+ commit: "ed6926b37a637426117ccab59282c3839528a700"
transitive: false
- urls:
- "https://github.com/golang/snappy.git"
diff --git a/sdks/go/pkg/beam/core/runtime/exec/datasource.go b/sdks/go/pkg/beam/core/runtime/exec/datasource.go
index 60d2a8d..06131b7 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/datasource.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/datasource.go
@@ -262,7 +262,7 @@
// Split takes a sorted set of potential split indices, selects and actuates
// split on an appropriate split index, and returns the selected split index
// if successful. Returns an error when unable to split.
-func (n *DataSource) Split(splits []int64, frac float32) (int64, error) {
+func (n *DataSource) Split(splits []int64, frac float64) (int64, error) {
if splits == nil {
return 0, fmt.Errorf("failed to split: requested splits were empty")
}
@@ -275,7 +275,7 @@
// the promised split index to this value.
for _, s := range splits {
// // Never split on the first element, or the current element.
- if s > 0 && s > c && s < n.splitIdx {
+ if s > 0 && s > c && s <= n.splitIdx {
n.splitIdx = s
fs := n.splitIdx
n.mu.Unlock()
diff --git a/sdks/go/pkg/beam/core/runtime/exec/plan.go b/sdks/go/pkg/beam/core/runtime/exec/plan.go
index d221c7e..d87ff54 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/plan.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/plan.go
@@ -201,7 +201,7 @@
type SplitPoints struct {
// Splits is a list of desired split indices.
Splits []int64
- Frac float32
+ Frac float64
}
// Split takes a set of potential split indexes, and if successful returns
diff --git a/sdks/go/pkg/beam/core/runtime/exec/translate.go b/sdks/go/pkg/beam/core/runtime/exec/translate.go
index 139db34..6134506 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/translate.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/translate.go
@@ -168,7 +168,7 @@
if !ok {
return nil, errors.Errorf("windowing strategy %v not found", id)
}
- wfn, err := unmarshalWindowFn(ws.GetWindowFn().GetSpec())
+ wfn, err := unmarshalWindowFn(ws.GetWindowFn())
if err != nil {
return nil, err
}
@@ -345,13 +345,13 @@
if err := proto.Unmarshal(payload, &pardo); err != nil {
return nil, errors.Wrapf(err, "invalid ParDo payload for %v", transform)
}
- data = string(pardo.GetDoFn().GetSpec().GetPayload())
+ data = string(pardo.GetDoFn().GetPayload())
case urnPerKeyCombinePre, urnPerKeyCombineMerge, urnPerKeyCombineExtract:
var cmb pb.CombinePayload
if err := proto.Unmarshal(payload, &cmb); err != nil {
return nil, errors.Wrapf(err, "invalid CombinePayload payload for %v", transform)
}
- data = string(cmb.GetCombineFn().GetSpec().GetPayload())
+ data = string(cmb.GetCombineFn().GetPayload())
default:
// TODO(herohde) 12/4/2017: we see DoFns directly with Dataflow. Handle that
// case here, for now, so that the harness can use this logic.
@@ -479,7 +479,7 @@
if err := proto.Unmarshal(payload, &wp); err != nil {
return nil, errors.Wrapf(err, "invalid WindowInto payload for %v", transform)
}
- wfn, err := unmarshalWindowFn(wp.GetWindowFn().GetSpec())
+ wfn, err := unmarshalWindowFn(wp.GetWindowFn())
if err != nil {
return nil, err
}
diff --git a/sdks/go/pkg/beam/core/runtime/graphx/translate.go b/sdks/go/pkg/beam/core/runtime/graphx/translate.go
index 17dc253..2f915c5 100644
--- a/sdks/go/pkg/beam/core/runtime/graphx/translate.go
+++ b/sdks/go/pkg/beam/core/runtime/graphx/translate.go
@@ -161,16 +161,14 @@
edge := s.Edges[1].Edge
acID := m.coders.Add(edge.AccumCoder)
payload := &pb.CombinePayload{
- CombineFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
- Urn: URNJavaDoFn,
- Payload: []byte(mustEncodeMultiEdgeBase64(edge)),
- },
- EnvironmentId: m.addDefaultEnv(),
+ CombineFn: &pb.FunctionSpec{
+ Urn: URNJavaDoFn,
+ Payload: []byte(mustEncodeMultiEdgeBase64(edge)),
},
AccumulatorCoderId: acID,
}
transform.Spec = &pb.FunctionSpec{Urn: URNCombinePerKey, Payload: protox.MustEncode(payload)}
+ transform.EnvironmentId = m.addDefaultEnv()
}
func (m *marshaller) addMultiEdge(edge NamedEdge) []string {
@@ -197,6 +195,7 @@
// allPIds tracks additional PTransformIDs generated for the pipeline
var allPIds []string
var spec *pb.FunctionSpec
+ var transformEnvID = ""
switch edge.Edge.Op {
case graph.Impulse:
// TODO(herohde) 7/18/2018: Encode data?
@@ -218,14 +217,11 @@
m.makeNode(out, m.coders.Add(makeBytesKeyedCoder(in.From.Coder)), in.From)
payload := &pb.ParDoPayload{
- DoFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
+ DoFn: &pb.FunctionSpec{
+ Urn: URNIterableSideInputKey,
+ Payload: []byte(protox.MustEncodeBase64(&v1.TransformPayload{
Urn: URNIterableSideInputKey,
- Payload: []byte(protox.MustEncodeBase64(&v1.TransformPayload{
- Urn: URNIterableSideInputKey,
- })),
- },
- EnvironmentId: m.addDefaultEnv(),
+ })),
},
}
@@ -236,8 +232,9 @@
Urn: URNParDo,
Payload: protox.MustEncode(payload),
},
- Inputs: map[string]string{"i0": nodeID(in.From)},
- Outputs: map[string]string{"i0": out},
+ Inputs: map[string]string{"i0": nodeID(in.From)},
+ Outputs: map[string]string{"i0": out},
+ EnvironmentId: m.addDefaultEnv(),
}
m.transforms[keyedID] = keyed
allPIds = append(allPIds, keyedID)
@@ -249,17 +246,11 @@
AccessPattern: &pb.FunctionSpec{
Urn: URNMultimapSideInput,
},
- ViewFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
- Urn: "foo",
- },
- EnvironmentId: m.addDefaultEnv(),
+ ViewFn: &pb.FunctionSpec{
+ Urn: "foo",
},
- WindowMappingFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
- Urn: "bar",
- },
- EnvironmentId: m.addDefaultEnv(),
+ WindowMappingFn: &pb.FunctionSpec{
+ Urn: "bar",
},
}
@@ -272,27 +263,23 @@
}
payload := &pb.ParDoPayload{
- DoFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
- Urn: URNJavaDoFn,
- Payload: []byte(mustEncodeMultiEdgeBase64(edge.Edge)),
- },
- EnvironmentId: m.addDefaultEnv(),
+ DoFn: &pb.FunctionSpec{
+ Urn: URNJavaDoFn,
+ Payload: []byte(mustEncodeMultiEdgeBase64(edge.Edge)),
},
SideInputs: si,
}
+ transformEnvID = m.addDefaultEnv()
spec = &pb.FunctionSpec{Urn: URNParDo, Payload: protox.MustEncode(payload)}
case graph.Combine:
payload := &pb.ParDoPayload{
- DoFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
- Urn: URNJavaDoFn,
- Payload: []byte(mustEncodeMultiEdgeBase64(edge.Edge)),
- },
- EnvironmentId: m.addDefaultEnv(),
+ DoFn: &pb.FunctionSpec{
+ Urn: URNJavaDoFn,
+ Payload: []byte(mustEncodeMultiEdgeBase64(edge.Edge)),
},
}
+ transformEnvID = m.addDefaultEnv()
spec = &pb.FunctionSpec{Urn: URNParDo, Payload: protox.MustEncode(payload)}
case graph.Flatten:
@@ -303,9 +290,7 @@
case graph.WindowInto:
payload := &pb.WindowIntoPayload{
- WindowFn: &pb.SdkFunctionSpec{
- Spec: makeWindowFn(edge.Edge.WindowFn),
- },
+ WindowFn: makeWindowFn(edge.Edge.WindowFn),
}
spec = &pb.FunctionSpec{Urn: URNWindow, Payload: protox.MustEncode(payload)}
@@ -317,10 +302,11 @@
}
transform := &pb.PTransform{
- UniqueName: edge.Name,
- Spec: spec,
- Inputs: inputs,
- Outputs: outputs,
+ UniqueName: edge.Name,
+ Spec: spec,
+ Inputs: inputs,
+ Outputs: outputs,
+ EnvironmentId: transformEnvID,
}
m.transforms[id] = transform
allPIds = append(allPIds, id)
@@ -348,15 +334,12 @@
injectID := fmt.Sprintf("%v_inject%v", id, i)
payload := &pb.ParDoPayload{
- DoFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
- Urn: URNInject,
- Payload: []byte(protox.MustEncodeBase64(&v1.TransformPayload{
- Urn: URNInject,
- Inject: &v1.InjectPayload{N: (int32)(i)},
- })),
- },
- EnvironmentId: m.addDefaultEnv(),
+ DoFn: &pb.FunctionSpec{
+ Urn: URNInject,
+ Payload: []byte(protox.MustEncodeBase64(&v1.TransformPayload{
+ Urn: URNInject,
+ Inject: &v1.InjectPayload{N: (int32)(i)},
+ })),
},
}
inject := &pb.PTransform{
@@ -365,8 +348,9 @@
Urn: URNParDo,
Payload: protox.MustEncode(payload),
},
- Inputs: map[string]string{"i0": nodeID(in.From)},
- Outputs: map[string]string{"i0": out},
+ Inputs: map[string]string{"i0": nodeID(in.From)},
+ Outputs: map[string]string{"i0": out},
+ EnvironmentId: m.addDefaultEnv(),
}
m.transforms[injectID] = inject
subtransforms = append(subtransforms, injectID)
@@ -412,14 +396,11 @@
expandID := fmt.Sprintf("%v_expand", id)
payload := &pb.ParDoPayload{
- DoFn: &pb.SdkFunctionSpec{
- Spec: &pb.FunctionSpec{
+ DoFn: &pb.FunctionSpec{
+ Urn: URNExpand,
+ Payload: []byte(protox.MustEncodeBase64(&v1.TransformPayload{
Urn: URNExpand,
- Payload: []byte(protox.MustEncodeBase64(&v1.TransformPayload{
- Urn: URNExpand,
- })),
- },
- EnvironmentId: m.addDefaultEnv(),
+ })),
},
}
expand := &pb.PTransform{
@@ -428,8 +409,9 @@
Urn: URNParDo,
Payload: protox.MustEncode(payload),
},
- Inputs: map[string]string{"i0": gbkOut},
- Outputs: map[string]string{"i0": nodeID(outNode)},
+ Inputs: map[string]string{"i0": gbkOut},
+ Outputs: map[string]string{"i0": nodeID(outNode)},
+ EnvironmentId: m.addDefaultEnv(),
}
m.transforms[id] = expand
subtransforms = append(subtransforms, id)
@@ -481,6 +463,7 @@
func (m *marshaller) addWindowingStrategy(w *window.WindowingStrategy) string {
ws := marshalWindowingStrategy(m.coders, w)
+ ws.EnvironmentId = m.addDefaultEnv()
return m.internWindowingStrategy(ws)
}
@@ -500,9 +483,7 @@
// the given coder context.
func marshalWindowingStrategy(c *CoderMarshaller, w *window.WindowingStrategy) *pb.WindowingStrategy {
ws := &pb.WindowingStrategy{
- WindowFn: &pb.SdkFunctionSpec{
- Spec: makeWindowFn(w.Fn),
- },
+ WindowFn: makeWindowFn(w.Fn),
MergeStatus: pb.MergeStatus_NON_MERGING,
AccumulationMode: pb.AccumulationMode_DISCARDING,
WindowCoderId: c.AddWindowCoder(makeWindowCoder(w.Fn)),
diff --git a/sdks/go/pkg/beam/core/runtime/harness/datamgr.go b/sdks/go/pkg/beam/core/runtime/harness/datamgr.go
index bd37124..7615356 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/datamgr.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/datamgr.go
@@ -116,7 +116,7 @@
return nil, err
}
ch.forceRecreate = func(id string, err error) {
- log.Warnf(ctx, "forcing channel[%v] reconnection on port %v due to %v", id, port, err)
+ log.Warnf(ctx, "forcing DataChannel[%v] reconnection on port %v due to %v", id, port, err)
m.mu.Lock()
delete(m.ports, port.URL)
m.mu.Unlock()
@@ -150,14 +150,14 @@
writers map[clientID]*dataWriter
readers map[clientID]*dataReader
-
// readErr indicates a client.Recv error and is used to prevent new readers.
readErr error
+
// a closure that forces the data manager to recreate this stream.
forceRecreate func(id string, err error)
cancelFn context.CancelFunc // Allows writers to stop the grpc reading goroutine.
- mu sync.Mutex // guards both the readers and writers maps.
+ mu sync.Mutex // guards mutable internal data, notably the maps and readErr.
}
func newDataChannel(ctx context.Context, port exec.Port) (*DataChannel, error) {
@@ -198,6 +198,8 @@
// OpenRead returns an io.ReadCloser of the data elements for the given instruction and ptransform.
func (c *DataChannel) OpenRead(ctx context.Context, ptransformID string, instID instructionID) io.ReadCloser {
+ c.mu.Lock()
+ defer c.mu.Unlock()
cid := clientID{ptransformID: ptransformID, instID: instID}
if c.readErr != nil {
log.Errorf(ctx, "opening a reader %v on a closed channel", cid)
@@ -256,7 +258,9 @@
if local, ok := cache[id]; ok {
r = local
} else {
+ c.mu.Lock()
r = c.makeReader(ctx, id)
+ c.mu.Unlock()
cache[id] = r
}
@@ -306,10 +310,8 @@
return r.err
}
+// makeReader creates a dataReader. It expects to be called while c.mu is held.
func (c *DataChannel) makeReader(ctx context.Context, id clientID) *dataReader {
- c.mu.Lock()
- defer c.mu.Unlock()
-
if r, ok := c.readers[id]; ok {
return r
}
@@ -392,7 +394,7 @@
recordStreamSend(msg)
if err := w.ch.client.Send(msg); err != nil {
if err == io.EOF {
- log.Warnf(context.TODO(), "dataWriter[%v;%v].Close EOF on send; fetching real error", w.id, w.ch.id)
+ log.Warnf(context.TODO(), "dataWriter[%v;%v] EOF on send; fetching real error", w.id, w.ch.id)
err = nil
for err == nil {
// Per GRPC stream documentation, if there's an EOF, we must call Recv
@@ -401,7 +403,7 @@
_, err = w.ch.client.Recv()
}
}
- log.Warnf(context.TODO(), "dataWriter[%v;%v].Close error on send: %v", w.id, w.ch.id, err)
+ log.Warnf(context.TODO(), "dataWriter[%v;%v] error on send: %v", w.id, w.ch.id, err)
w.ch.terminateStreamOnError(err)
return err
}
diff --git a/sdks/go/pkg/beam/core/runtime/harness/datamgr_test.go b/sdks/go/pkg/beam/core/runtime/harness/datamgr_test.go
index 351d97b..05354b3 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/datamgr_test.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/datamgr_test.go
@@ -30,9 +30,9 @@
const extraData = 2
-// fakeClient attempts to mimic the semantics of a GRPC stream
+// fakeDataClient attempts to mimic the semantics of a GRPC stream
// and also permit configurability.
-type fakeClient struct {
+type fakeDataClient struct {
t *testing.T
done chan bool
calls int
@@ -40,7 +40,7 @@
skipFirstError bool
}
-func (f *fakeClient) Recv() (*pb.Elements, error) {
+func (f *fakeDataClient) Recv() (*pb.Elements, error) {
f.calls++
data := []byte{1, 2, 3, 4}
elemData := pb.Elements_Data{
@@ -77,7 +77,7 @@
}
}
-func (f *fakeClient) Send(*pb.Elements) error {
+func (f *fakeDataClient) Send(*pb.Elements) error {
// We skip errors on the first call to test that errors can be returned
// on the sentinel value send in dataWriter.Close
// Otherwise, we return an io.EOF similar to semantics documented
@@ -100,12 +100,12 @@
tests := []struct {
name string
expectedError error
- caseFn func(t *testing.T, r io.ReadCloser, client *fakeClient, c *DataChannel)
+ caseFn func(t *testing.T, r io.ReadCloser, client *fakeDataClient, c *DataChannel)
}{
{
name: "onClose",
expectedError: io.EOF,
- caseFn: func(t *testing.T, r io.ReadCloser, client *fakeClient, c *DataChannel) {
+ caseFn: func(t *testing.T, r io.ReadCloser, client *fakeDataClient, c *DataChannel) {
// We don't read up all the buffered data, but immediately close the reader.
// Previously, since nothing was consuming the incoming gRPC data, the whole
// data channel would get stuck, and the client.Recv() call was eventually
@@ -119,13 +119,13 @@
}, {
name: "onSentinel",
expectedError: io.EOF,
- caseFn: func(t *testing.T, r io.ReadCloser, client *fakeClient, c *DataChannel) {
- // fakeClient eventually returns a sentinel element.
+ caseFn: func(t *testing.T, r io.ReadCloser, client *fakeDataClient, c *DataChannel) {
+ // fakeDataClient eventually returns a sentinel element.
},
}, {
name: "onRecvError",
expectedError: expectedError,
- caseFn: func(t *testing.T, r io.ReadCloser, client *fakeClient, c *DataChannel) {
+ caseFn: func(t *testing.T, r io.ReadCloser, client *fakeDataClient, c *DataChannel) {
// The SDK starts reading in a goroutine immeadiately after open.
// Set the 2nd Recv call to have an error.
client.err = expectedError
@@ -135,7 +135,7 @@
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
done := make(chan bool, 1)
- client := &fakeClient{t: t, done: done}
+ client := &fakeDataClient{t: t, done: done}
ctx, cancelFn := context.WithCancel(context.Background())
c := makeDataChannel(ctx, "id", client, cancelFn)
@@ -181,22 +181,22 @@
tests := []struct {
name string
- caseFn func(t *testing.T, w io.WriteCloser, client *fakeClient) error
+ caseFn func(t *testing.T, w io.WriteCloser, client *fakeDataClient) error
}{
{
name: "onClose_Flush",
- caseFn: func(t *testing.T, w io.WriteCloser, client *fakeClient) error {
+ caseFn: func(t *testing.T, w io.WriteCloser, client *fakeDataClient) error {
return w.Close()
},
}, {
name: "onClose_Sentinel",
- caseFn: func(t *testing.T, w io.WriteCloser, client *fakeClient) error {
+ caseFn: func(t *testing.T, w io.WriteCloser, client *fakeDataClient) error {
client.skipFirstError = true
return w.Close()
},
}, {
name: "onWrite",
- caseFn: func(t *testing.T, w io.WriteCloser, client *fakeClient) error {
+ caseFn: func(t *testing.T, w io.WriteCloser, client *fakeDataClient) error {
_, err := w.Write([]byte{'d', 'o', 'n', 'e'})
return err
},
@@ -205,7 +205,7 @@
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
done := make(chan bool, 1)
- client := &fakeClient{t: t, done: done, err: expectedError}
+ client := &fakeDataClient{t: t, done: done, err: expectedError}
ctx, cancelFn := context.WithCancel(context.Background())
c := makeDataChannel(ctx, "id", client, cancelFn)
diff --git a/sdks/go/pkg/beam/core/runtime/harness/harness.go b/sdks/go/pkg/beam/core/runtime/harness/harness.go
index 28bdf4c..58d4d2d 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/harness.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/harness.go
@@ -287,8 +287,8 @@
ProcessBundleSplit: &fnpb.ProcessBundleSplitResponse{
ChannelSplits: []*fnpb.ProcessBundleSplitResponse_ChannelSplit{
&fnpb.ProcessBundleSplitResponse_ChannelSplit{
- LastPrimaryElement: int32(split - 1),
- FirstResidualElement: int32(split),
+ LastPrimaryElement: split - 1,
+ FirstResidualElement: split,
},
},
},
diff --git a/sdks/go/pkg/beam/core/runtime/harness/init/init.go b/sdks/go/pkg/beam/core/runtime/harness/init/init.go
index 889c33b..4043ada 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/init/init.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/init/init.go
@@ -42,6 +42,7 @@
id = flag.String("id", "", "Local identifier (required in worker mode).")
loggingEndpoint = flag.String("logging_endpoint", "", "Local logging gRPC endpoint (required in worker mode).")
controlEndpoint = flag.String("control_endpoint", "", "Local control gRPC endpoint (required in worker mode).")
+ statusEndpoint = flag.String("status_endpoint", "", "Local status gRPC endpoint (optional in worker mode).")
semiPersistDir = flag.String("semi_persist_dir", "/tmp", "Local semi-persistent directory (optional in worker mode).")
options = flag.String("options", "", "JSON-encoded pipeline options (required in worker mode).")
)
diff --git a/sdks/go/pkg/beam/core/runtime/harness/statemgr.go b/sdks/go/pkg/beam/core/runtime/harness/statemgr.go
index 9669888..b75b1ce7 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/statemgr.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/statemgr.go
@@ -83,10 +83,10 @@
s.mu.Unlock()
return nil, errors.Errorf("instruction %v no longer processing", s.instID)
}
- local := s.mgr
+ localMgr := s.mgr
s.mu.Unlock()
- return local.Open(ctx, port) // don't hold lock over potentially slow operation
+ return localMgr.Open(ctx, port) // don't hold lock over potentially slow operation
}
// Close closes all open readers.
@@ -161,11 +161,11 @@
r.mu.Unlock()
return 0, errors.New("side input closed")
}
- local := r.ch
+ localChannel := r.ch
r.mu.Unlock()
req := &pb.StateRequest{
- // Id: set by channel
+ // Id: set by StateChannel
InstructionId: string(r.instID),
StateKey: r.key,
Request: &pb.StateRequest_Get{
@@ -174,7 +174,7 @@
},
},
}
- resp, err := local.Send(req)
+ resp, err := localChannel.Send(req)
if err != nil {
return 0, err
}
@@ -204,7 +204,7 @@
func (r *stateKeyReader) Close() error {
r.mu.Lock()
r.closed = true
- r.ch = nil
+ r.ch = nil // StateChannels might be re-used if they're ok, so don't close them here.
r.mu.Unlock()
return nil
}
@@ -232,25 +232,57 @@
if err != nil {
return nil, err
}
+ ch.forceRecreate = func(id string, err error) {
+ log.Warnf(ctx, "forcing StateChannel[%v] reconnection on port %v due to %v", id, port, err)
+ m.mu.Lock()
+ delete(m.ports, port.URL)
+ m.mu.Unlock()
+ }
m.ports[port.URL] = ch
return ch, nil
}
+type stateClient interface {
+ Send(*pb.StateRequest) error
+ Recv() (*pb.StateResponse, error)
+}
+
// StateChannel manages state transactions over a single gRPC connection.
// It does not need to track readers and writers as carefully as the
// DataChannel, because the state protocol is request-based.
type StateChannel struct {
id string
- client pb.BeamFnState_StateClient
+ client stateClient
requests chan *pb.StateRequest
nextRequestNo int32
responses map[string]chan<- *pb.StateResponse
mu sync.Mutex
+
+ // a closure that forces the state manager to recreate this stream.
+ forceRecreate func(id string, err error)
+ cancelFn context.CancelFunc
+ closedErr error
+ DoneCh <-chan struct{}
+}
+
+func (c *StateChannel) terminateStreamOnError(err error) {
+ c.mu.Lock()
+ if c.forceRecreate != nil {
+ c.closedErr = err
+ c.forceRecreate(c.id, err)
+ c.forceRecreate = nil
+ }
+ c.responses = nil
+ c.requests = nil
+ // Cancelling context after forcing recreation to ensure closedErr is set.
+ c.cancelFn()
+ c.mu.Unlock()
}
func newStateChannel(ctx context.Context, port exec.Port) (*StateChannel, error) {
+ ctx, cancelFn := context.WithCancel(ctx)
cc, err := dial(ctx, port.URL, 15*time.Second)
if err != nil {
return nil, errors.Wrapf(err, "failed to connect to state service %v", port.URL)
@@ -260,25 +292,31 @@
cc.Close()
return nil, errors.Wrapf(err, "failed to create state client %v", port.URL)
}
+ return makeStateChannel(ctx, cancelFn, port.URL, client), nil
+}
+func makeStateChannel(ctx context.Context, cancelFn context.CancelFunc, id string, client stateClient) *StateChannel {
ret := &StateChannel{
- id: port.URL,
+ id: id,
client: client,
requests: make(chan *pb.StateRequest, 10),
responses: make(map[string]chan<- *pb.StateResponse),
+ cancelFn: cancelFn,
+ DoneCh: ctx.Done(),
}
go ret.read(ctx)
go ret.write(ctx)
- return ret, nil
+ return ret
}
func (c *StateChannel) read(ctx context.Context) {
for {
+ // Closing the context will have an error return from this call.
msg, err := c.client.Recv()
if err != nil {
+ c.terminateStreamOnError(err)
if err == io.EOF {
- // TODO(herohde) 10/12/2017: can this happen before shutdown? Reconnect?
log.Warnf(ctx, "StateChannel[%v].read: closed", c.id)
return
}
@@ -307,21 +345,44 @@
}
func (c *StateChannel) write(ctx context.Context) {
- for req := range c.requests {
- err := c.client.Send(req)
- if err == nil {
- continue // ok
+ var err error
+ var id string
+ for {
+ var req *pb.StateRequest
+ select {
+ case req = <-c.requests:
+ case <-c.DoneCh: // Close the goroutine on context cancel.
+ return
}
+ err = c.client.Send(req)
+ if err != nil {
+ id = req.Id
+ break // non-nil errors mean the stream is broken and can't be re-used.
+ }
+ }
- // Failed to send. Return error.
- c.mu.Lock()
- ch, ok := c.responses[req.Id]
- delete(c.responses, req.Id)
- c.mu.Unlock()
+ if err == io.EOF {
+ log.Warnf(ctx, "StateChannel[%v].write EOF on send; fetching real error", c.id)
+ err = nil
+ for err == nil {
+ // Per GRPC stream documentation, if there's an EOF, we must call Recv
+ // until a non-nil error is returned, to ensure resources are cleaned up.
+ // https://godoc.org/google.golang.org/grpc#ClientConn.NewStream
+ _, err = c.client.Recv()
+ }
+ }
+ log.Errorf(ctx, "StateChannel[%v].write error on send: %v", c.id, err)
- if ok {
- ch <- &pb.StateResponse{Id: req.Id, Error: fmt.Sprintf("failed to send: %v", err)}
- } // else ignore: already received response due to race
+ // Failed to send. Return error & unblock Send.
+ c.mu.Lock()
+ ch, ok := c.responses[id]
+ delete(c.responses, id)
+ c.mu.Unlock()
+ // Clean up everything else, this stream is done.
+ c.terminateStreamOnError(err)
+
+ if ok {
+ ch <- &pb.StateResponse{Id: id, Error: fmt.Sprintf("StateChannel[%v].write failed to send: %v", c.id, err)}
}
}
@@ -332,13 +393,23 @@
ch := make(chan *pb.StateResponse, 1)
c.mu.Lock()
+ if c.closedErr != nil {
+ defer c.mu.Unlock()
+ return nil, errors.Wrapf(c.closedErr, "StateChannel[%v].Send(%v): channel closed due to: %v", c.id, id, c.closedErr)
+ }
c.responses[id] = ch
c.mu.Unlock()
c.requests <- req
- // TODO(herohde) 7/21/2018: time out?
- resp := <-ch
+ var resp *pb.StateResponse
+ select {
+ case resp = <-ch:
+ case <-c.DoneCh:
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ return nil, errors.Wrapf(c.closedErr, "StateChannel[%v].Send(%v): context canceled", c.id, id)
+ }
if resp.Error != "" {
return nil, errors.New(resp.Error)
}
diff --git a/sdks/go/pkg/beam/core/runtime/harness/statemgr_test.go b/sdks/go/pkg/beam/core/runtime/harness/statemgr_test.go
new file mode 100644
index 0000000..6079ceb
--- /dev/null
+++ b/sdks/go/pkg/beam/core/runtime/harness/statemgr_test.go
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package harness
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "strings"
+ "testing"
+ "time"
+
+ pb "github.com/apache/beam/sdks/go/pkg/beam/model/fnexecution_v1"
+)
+
+// fakeStateClient replicates the call and response protocol
+// of the state channel.
+type fakeStateClient struct {
+ // Blocks the read routine
+ recv chan *pb.StateResponse
+ recvErr error
+ // Blocks the write routine
+ send chan *pb.StateRequest
+ sendErr error
+}
+
+func (f *fakeStateClient) Recv() (*pb.StateResponse, error) {
+ // Blocks until something is sent.
+ return <-f.recv, f.recvErr
+}
+
+func (f *fakeStateClient) Send(req *pb.StateRequest) error {
+ f.send <- req // blocks until consumed.
+ return f.sendErr
+}
+
+func TestStateChannel(t *testing.T) {
+ // The logging of channels closed is quite noisy for this test
+ log.SetOutput(ioutil.Discard)
+
+ expectedError := fmt.Errorf("EXPECTED ERROR")
+
+ tests := []struct {
+ name string
+ caseFn func(t *testing.T, c *StateChannel, client *fakeStateClient) error
+ expectedErr error
+ validateCancelled bool
+ }{
+ {
+ name: "HappyPath",
+ caseFn: func(t *testing.T, c *StateChannel, client *fakeStateClient) error {
+ // Verify that we can send a bunch of requests and get the expected paired response back.
+ // This is not a real StateAPI protocol, since that's handled at a higher level than the StateChannel,
+ // but validates that the routing occurs correctly.
+ const count = 10
+ for i := 0; i < count; i++ {
+ go func() {
+ req := <-client.send
+ client.recv <- &pb.StateResponse{
+ Id: req.Id, // Ids need to match up to ensure routing can occur properly.
+ Response: &pb.StateResponse_Get{
+ Get: &pb.StateGetResponse{
+ ContinuationToken: req.GetGet().GetContinuationToken(),
+ },
+ },
+ }
+ }()
+ }
+ for i := 0; i < count; i++ {
+ token := []byte(fmt.Sprintf("%d", i))
+ resp, err := c.Send(&pb.StateRequest{
+ Request: &pb.StateRequest_Get{
+ Get: &pb.StateGetRequest{
+ ContinuationToken: token,
+ },
+ },
+ })
+ if err != nil {
+ t.Fatalf("unexpected error from Send: %v", err)
+ }
+ if got, want := string(resp.GetGet().GetContinuationToken()), string(token); got != want {
+ t.Fatalf("req/response mismatch from Send: got %v, want %v", got, want)
+ }
+ }
+ return nil
+ },
+ }, {
+ name: "readEOF",
+ caseFn: func(t *testing.T, c *StateChannel, client *fakeStateClient) error {
+ go func() {
+ req := <-client.send // Send should succeed.
+
+ client.recvErr = io.EOF
+ client.recv <- &pb.StateResponse{
+ Id: req.Id,
+ }
+ }()
+ _, err := c.Send(&pb.StateRequest{})
+ return err
+ },
+ expectedErr: io.EOF,
+ validateCancelled: true,
+ }, {
+ name: "readOtherErr",
+ caseFn: func(t *testing.T, c *StateChannel, client *fakeStateClient) error {
+ go func() {
+ req := <-client.send // Send should succeed.
+
+ client.recvErr = expectedError
+ client.recv <- &pb.StateResponse{
+ Id: req.Id,
+ }
+ }()
+ _, err := c.Send(&pb.StateRequest{})
+ return err
+ },
+ expectedErr: expectedError,
+ validateCancelled: true,
+ }, {
+ name: "readResponseChannelDeleted",
+ caseFn: func(t *testing.T, c *StateChannel, client *fakeStateClient) error {
+ go func() {
+ req := <-client.send // Send should succeed.
+
+ c.mu.Lock()
+ ch := c.responses[req.Id]
+ delete(c.responses, req.Id)
+ c.mu.Unlock()
+
+ resp := &pb.StateResponse{
+ Id: req.Id,
+ }
+ client.recv <- resp
+ // unblock Send.
+ ch <- resp
+ }()
+ _, err := c.Send(&pb.StateRequest{})
+ return err
+ },
+ }, {
+ name: "writeEOF",
+ caseFn: func(t *testing.T, c *StateChannel, client *fakeStateClient) error {
+ go func() {
+ client.sendErr = io.EOF
+ req := <-client.send
+ // This can be plumbed through on either side, write or read,
+ // the important part is that we get it.
+ client.recvErr = expectedError
+ client.recv <- &pb.StateResponse{
+ Id: req.Id,
+ }
+ }()
+ _, err := c.Send(&pb.StateRequest{})
+ return err
+ },
+ expectedErr: expectedError,
+ validateCancelled: true,
+ }, {
+ name: "writeOtherError",
+ caseFn: func(t *testing.T, c *StateChannel, client *fakeStateClient) error {
+ go func() {
+ client.sendErr = expectedError
+ <-client.send
+ // Shouldn't need to unblock any Recv calls.
+ }()
+ _, err := c.Send(&pb.StateRequest{})
+ return err
+ },
+ expectedErr: expectedError,
+ validateCancelled: true,
+ },
+ }
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ client := &fakeStateClient{
+ recv: make(chan *pb.StateResponse),
+ send: make(chan *pb.StateRequest),
+ }
+ ctx, cancelFn := context.WithCancel(context.Background())
+ c := makeStateChannel(ctx, cancelFn, "id", client)
+ forceRecreateCalled := false
+ var forceRecreateError error
+ c.forceRecreate = func(_ string, err error) {
+ forceRecreateCalled = true
+ forceRecreateError = err
+ }
+
+ retErr := test.caseFn(t, c, client)
+
+ if got, want := retErr, test.expectedErr; !contains(got, want) {
+ t.Errorf("Unexpected error: got %v, want %v", got, want)
+ }
+
+ // Verify that new Sends return the same error on their reads after client.Recv is done.
+ go func() {
+ // Ensure that the client isn't helping us.
+ client.sendErr = nil
+ client.recvErr = nil
+ // Drain the next send, and ensure the response is unblocked.
+ req := <-client.send
+ client.recv <- &pb.StateResponse{Id: req.Id} // Ids need to match up to ensure routing can occur properly.
+ }()
+ if _, err := c.Send(&pb.StateRequest{}); !contains(err, test.expectedErr) {
+ t.Errorf("Unexpected error from Send: got %v, want %v", err, test.expectedErr)
+ }
+
+ if test.validateCancelled {
+ select {
+ case <-ctx.Done(): // Assert that the context must have been cancelled on read failures.
+ case <-time.After(time.Second * 5):
+ t.Fatal("context wasn't cancelled")
+ }
+ if !forceRecreateCalled {
+ t.Fatal("forceRecreate wasn't called")
+ }
+
+ if got, want := forceRecreateError, test.expectedErr; !contains(got, want) {
+ t.Errorf("Unexpected error from forceRecreate: got %v, want %v", got, want)
+ }
+ }
+ })
+ }
+}
+
+// This likely can't be replaced by the "errors" package helpers,
+// since we serialize errors in some cases.
+func contains(got, want error) bool {
+ if got == want {
+ return true
+ }
+ return strings.Contains(got.Error(), want.Error())
+}
diff --git a/sdks/go/pkg/beam/core/runtime/pipelinex/clone.go b/sdks/go/pkg/beam/core/runtime/pipelinex/clone.go
index 444af56..12465a8 100644
--- a/sdks/go/pkg/beam/core/runtime/pipelinex/clone.go
+++ b/sdks/go/pkg/beam/core/runtime/pipelinex/clone.go
@@ -52,6 +52,7 @@
ret.Subtransforms, _ = reflectx.ShallowClone(t.Subtransforms).([]string)
ret.Inputs, _ = reflectx.ShallowClone(t.Inputs).(map[string]string)
ret.Outputs, _ = reflectx.ShallowClone(t.Outputs).(map[string]string)
+ ret.EnvironmentId = t.EnvironmentId
return ret
}
diff --git a/sdks/go/pkg/beam/model/PROTOBUF.md b/sdks/go/pkg/beam/model/PROTOBUF.md
index 6a03268..5c9b162 100644
--- a/sdks/go/pkg/beam/model/PROTOBUF.md
+++ b/sdks/go/pkg/beam/model/PROTOBUF.md
@@ -20,16 +20,48 @@
# Rebuilding generated protobuf code
If you make changes to .proto files, you will need to rebuild the generated Go code.
-To do that, you will need:
-* [The protobuf compiler](https://github.com/google/protobuf/releases)
-* A proper Go development setup per `BUILD.md` (variables GOPATH and GOBIN set properly)
-* `go get -u github.com/golang/protobuf/protoc-gen-go`
+First, follow this one-time setup:
-> **Note:** Newer releases of the protobuf compiler may be incompatible with the
-> protobuf version in Beam. For guaranteed compatibility, use the latest release
-> available from the date of the golang/protobuf release used by Beam. (Currently
-> v3.5.2)
+1. Download [the protobuf compiler](https://github.com/google/protobuf/releases).
+ The simplest approach is to download one of the prebuilt binaries and extract
+ it somewhere in your machine's `$PATH`.
+1. A properly installed Go development environment per [the official
+ instructions](https://golang.org/doc/install). `$GOPATH` must be set properly.
+ If it's not set, follow
+ [these instructions](https://github.com/golang/go/wiki/SettingGOPATH).
+1. Add `$GOBIN` to your `$PATH`. (Note: If `$GOBIN` is not set, add `$GOPATH/bin`
+ instead.)
-If all this setup is complete, simply run `go generate` in the current directory
-(`pkg/beam/model`).
+To generate the code:
+
+1. Navigate to this directory (`pkg/beam/model`).
+1. `go get -u github.com/golang/protobuf/protoc-gen-go`
+1. `go generate`
+
+## Generated Go code fails to build
+
+Occasionally, after following the steps above and updating the generated .pb.go
+files, they may fail to build. This usually indicates a version mismatch in the
+[golang/protobuf](https://github.com/golang/protobuf) package. Specifically,
+the version of protoc-gen-go in the local Go workspace (used during
+`go generate`) differs from the cached version of golang/protobuf used for
+building Beam (specified in [gogradle.lock](https://github.com/apache/beam/blob/master/sdks/go/gogradle.lock)).
+
+The preferred way to fix this issue is to update the fixed Beam version of
+golang/protobuf to a recent commit. This can be done by manually changing the
+commit hash for golang/protobuf in [gogradle.lock](https://github.com/apache/beam/blob/master/sdks/go/gogradle.lock).
+
+If that fails due to dependency issues, an alternate approach is to downgrade
+the local version of protoc-gen-go to match the commit in gogradle.lock, with
+the following commands.
+
+```bash
+# Replace <commit hash> with the commit of golang/protobuf in gogradle.lock.
+go get -d -u github.com/golang/protobuf/protoc-gen-go
+git -C "$(go env GOPATH)"/src/github.com/golang/protobuf checkout <commit hash>
+go install github.com/golang/protobuf/protoc-gen-go
+```
+> **Note:** This leaves the local repository of protoc-gen-go in a detached
+> head state, which may cause problems when updating it in the future. To fix
+> this, navigate to the protoc-gen-go directory and run `git checkout master`.
diff --git a/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go b/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go
index 6516ed3..19bdfa4 100644
--- a/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go
+++ b/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go
@@ -3,18 +3,19 @@
package fnexecution_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-import pipeline_v1 "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
-import _ "github.com/golang/protobuf/protoc-gen-go/descriptor"
-import duration "github.com/golang/protobuf/ptypes/duration"
-import timestamp "github.com/golang/protobuf/ptypes/timestamp"
-import _ "github.com/golang/protobuf/ptypes/wrappers"
-
import (
- context "golang.org/x/net/context"
+ context "context"
+ fmt "fmt"
+ pipeline_v1 "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+ proto "github.com/golang/protobuf/proto"
+ _ "github.com/golang/protobuf/protoc-gen-go/descriptor"
+ duration "github.com/golang/protobuf/ptypes/duration"
+ timestamp "github.com/golang/protobuf/ptypes/timestamp"
+ _ "github.com/golang/protobuf/ptypes/wrappers"
grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+ math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
@@ -26,7 +27,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type LogEntry_Severity_Enum int32
@@ -60,6 +61,7 @@
6: "ERROR",
7: "CRITICAL",
}
+
var LogEntry_Severity_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"TRACE": 1,
@@ -74,8 +76,9 @@
func (x LogEntry_Severity_Enum) String() string {
return proto.EnumName(LogEntry_Severity_Enum_name, int32(x))
}
+
func (LogEntry_Severity_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{27, 1, 0}
+ return fileDescriptor_6d954c03a4758710, []int{27, 1, 0}
}
// A descriptor for connecting to a remote port using the Beam Fn Data API.
@@ -98,16 +101,17 @@
func (m *RemoteGrpcPort) String() string { return proto.CompactTextString(m) }
func (*RemoteGrpcPort) ProtoMessage() {}
func (*RemoteGrpcPort) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{0}
+ return fileDescriptor_6d954c03a4758710, []int{0}
}
+
func (m *RemoteGrpcPort) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_RemoteGrpcPort.Unmarshal(m, b)
}
func (m *RemoteGrpcPort) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_RemoteGrpcPort.Marshal(b, m, deterministic)
}
-func (dst *RemoteGrpcPort) XXX_Merge(src proto.Message) {
- xxx_messageInfo_RemoteGrpcPort.Merge(dst, src)
+func (m *RemoteGrpcPort) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_RemoteGrpcPort.Merge(m, src)
}
func (m *RemoteGrpcPort) XXX_Size() int {
return xxx_messageInfo_RemoteGrpcPort.Size(m)
@@ -158,16 +162,17 @@
func (m *InstructionRequest) String() string { return proto.CompactTextString(m) }
func (*InstructionRequest) ProtoMessage() {}
func (*InstructionRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{1}
+ return fileDescriptor_6d954c03a4758710, []int{1}
}
+
func (m *InstructionRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_InstructionRequest.Unmarshal(m, b)
}
func (m *InstructionRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_InstructionRequest.Marshal(b, m, deterministic)
}
-func (dst *InstructionRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_InstructionRequest.Merge(dst, src)
+func (m *InstructionRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_InstructionRequest.Merge(m, src)
}
func (m *InstructionRequest) XXX_Size() int {
return xxx_messageInfo_InstructionRequest.Size(m)
@@ -178,6 +183,13 @@
var xxx_messageInfo_InstructionRequest proto.InternalMessageInfo
+func (m *InstructionRequest) GetInstructionId() string {
+ if m != nil {
+ return m.InstructionId
+ }
+ return ""
+}
+
type isInstructionRequest_Request interface {
isInstructionRequest_Request()
}
@@ -185,24 +197,32 @@
type InstructionRequest_Register struct {
Register *RegisterRequest `protobuf:"bytes,1000,opt,name=register,proto3,oneof"`
}
+
type InstructionRequest_ProcessBundle struct {
ProcessBundle *ProcessBundleRequest `protobuf:"bytes,1001,opt,name=process_bundle,json=processBundle,proto3,oneof"`
}
+
type InstructionRequest_ProcessBundleProgress struct {
ProcessBundleProgress *ProcessBundleProgressRequest `protobuf:"bytes,1002,opt,name=process_bundle_progress,json=processBundleProgress,proto3,oneof"`
}
+
type InstructionRequest_ProcessBundleSplit struct {
ProcessBundleSplit *ProcessBundleSplitRequest `protobuf:"bytes,1003,opt,name=process_bundle_split,json=processBundleSplit,proto3,oneof"`
}
+
type InstructionRequest_FinalizeBundle struct {
FinalizeBundle *FinalizeBundleRequest `protobuf:"bytes,1004,opt,name=finalize_bundle,json=finalizeBundle,proto3,oneof"`
}
-func (*InstructionRequest_Register) isInstructionRequest_Request() {}
-func (*InstructionRequest_ProcessBundle) isInstructionRequest_Request() {}
+func (*InstructionRequest_Register) isInstructionRequest_Request() {}
+
+func (*InstructionRequest_ProcessBundle) isInstructionRequest_Request() {}
+
func (*InstructionRequest_ProcessBundleProgress) isInstructionRequest_Request() {}
-func (*InstructionRequest_ProcessBundleSplit) isInstructionRequest_Request() {}
-func (*InstructionRequest_FinalizeBundle) isInstructionRequest_Request() {}
+
+func (*InstructionRequest_ProcessBundleSplit) isInstructionRequest_Request() {}
+
+func (*InstructionRequest_FinalizeBundle) isInstructionRequest_Request() {}
func (m *InstructionRequest) GetRequest() isInstructionRequest_Request {
if m != nil {
@@ -211,13 +231,6 @@
return nil
}
-func (m *InstructionRequest) GetInstructionId() string {
- if m != nil {
- return m.InstructionId
- }
- return ""
-}
-
func (m *InstructionRequest) GetRegister() *RegisterRequest {
if x, ok := m.GetRequest().(*InstructionRequest_Register); ok {
return x.Register
@@ -253,9 +266,9 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*InstructionRequest) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _InstructionRequest_OneofMarshaler, _InstructionRequest_OneofUnmarshaler, _InstructionRequest_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*InstructionRequest) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*InstructionRequest_Register)(nil),
(*InstructionRequest_ProcessBundle)(nil),
(*InstructionRequest_ProcessBundleProgress)(nil),
@@ -264,126 +277,6 @@
}
}
-func _InstructionRequest_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*InstructionRequest)
- // request
- switch x := m.Request.(type) {
- case *InstructionRequest_Register:
- b.EncodeVarint(1000<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Register); err != nil {
- return err
- }
- case *InstructionRequest_ProcessBundle:
- b.EncodeVarint(1001<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ProcessBundle); err != nil {
- return err
- }
- case *InstructionRequest_ProcessBundleProgress:
- b.EncodeVarint(1002<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ProcessBundleProgress); err != nil {
- return err
- }
- case *InstructionRequest_ProcessBundleSplit:
- b.EncodeVarint(1003<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ProcessBundleSplit); err != nil {
- return err
- }
- case *InstructionRequest_FinalizeBundle:
- b.EncodeVarint(1004<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.FinalizeBundle); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("InstructionRequest.Request has unexpected type %T", x)
- }
- return nil
-}
-
-func _InstructionRequest_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*InstructionRequest)
- switch tag {
- case 1000: // request.register
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(RegisterRequest)
- err := b.DecodeMessage(msg)
- m.Request = &InstructionRequest_Register{msg}
- return true, err
- case 1001: // request.process_bundle
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleRequest)
- err := b.DecodeMessage(msg)
- m.Request = &InstructionRequest_ProcessBundle{msg}
- return true, err
- case 1002: // request.process_bundle_progress
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleProgressRequest)
- err := b.DecodeMessage(msg)
- m.Request = &InstructionRequest_ProcessBundleProgress{msg}
- return true, err
- case 1003: // request.process_bundle_split
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleSplitRequest)
- err := b.DecodeMessage(msg)
- m.Request = &InstructionRequest_ProcessBundleSplit{msg}
- return true, err
- case 1004: // request.finalize_bundle
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(FinalizeBundleRequest)
- err := b.DecodeMessage(msg)
- m.Request = &InstructionRequest_FinalizeBundle{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _InstructionRequest_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*InstructionRequest)
- // request
- switch x := m.Request.(type) {
- case *InstructionRequest_Register:
- s := proto.Size(x.Register)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionRequest_ProcessBundle:
- s := proto.Size(x.ProcessBundle)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionRequest_ProcessBundleProgress:
- s := proto.Size(x.ProcessBundleProgress)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionRequest_ProcessBundleSplit:
- s := proto.Size(x.ProcessBundleSplit)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionRequest_FinalizeBundle:
- s := proto.Size(x.FinalizeBundle)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// The response for an associated request the SDK had been asked to fulfill.
// Stable
type InstructionResponse struct {
@@ -414,16 +307,17 @@
func (m *InstructionResponse) String() string { return proto.CompactTextString(m) }
func (*InstructionResponse) ProtoMessage() {}
func (*InstructionResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{2}
+ return fileDescriptor_6d954c03a4758710, []int{2}
}
+
func (m *InstructionResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_InstructionResponse.Unmarshal(m, b)
}
func (m *InstructionResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_InstructionResponse.Marshal(b, m, deterministic)
}
-func (dst *InstructionResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_InstructionResponse.Merge(dst, src)
+func (m *InstructionResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_InstructionResponse.Merge(m, src)
}
func (m *InstructionResponse) XXX_Size() int {
return xxx_messageInfo_InstructionResponse.Size(m)
@@ -434,39 +328,6 @@
var xxx_messageInfo_InstructionResponse proto.InternalMessageInfo
-type isInstructionResponse_Response interface {
- isInstructionResponse_Response()
-}
-
-type InstructionResponse_Register struct {
- Register *RegisterResponse `protobuf:"bytes,1000,opt,name=register,proto3,oneof"`
-}
-type InstructionResponse_ProcessBundle struct {
- ProcessBundle *ProcessBundleResponse `protobuf:"bytes,1001,opt,name=process_bundle,json=processBundle,proto3,oneof"`
-}
-type InstructionResponse_ProcessBundleProgress struct {
- ProcessBundleProgress *ProcessBundleProgressResponse `protobuf:"bytes,1002,opt,name=process_bundle_progress,json=processBundleProgress,proto3,oneof"`
-}
-type InstructionResponse_ProcessBundleSplit struct {
- ProcessBundleSplit *ProcessBundleSplitResponse `protobuf:"bytes,1003,opt,name=process_bundle_split,json=processBundleSplit,proto3,oneof"`
-}
-type InstructionResponse_FinalizeBundle struct {
- FinalizeBundle *FinalizeBundleResponse `protobuf:"bytes,1004,opt,name=finalize_bundle,json=finalizeBundle,proto3,oneof"`
-}
-
-func (*InstructionResponse_Register) isInstructionResponse_Response() {}
-func (*InstructionResponse_ProcessBundle) isInstructionResponse_Response() {}
-func (*InstructionResponse_ProcessBundleProgress) isInstructionResponse_Response() {}
-func (*InstructionResponse_ProcessBundleSplit) isInstructionResponse_Response() {}
-func (*InstructionResponse_FinalizeBundle) isInstructionResponse_Response() {}
-
-func (m *InstructionResponse) GetResponse() isInstructionResponse_Response {
- if m != nil {
- return m.Response
- }
- return nil
-}
-
func (m *InstructionResponse) GetInstructionId() string {
if m != nil {
return m.InstructionId
@@ -481,6 +342,47 @@
return ""
}
+type isInstructionResponse_Response interface {
+ isInstructionResponse_Response()
+}
+
+type InstructionResponse_Register struct {
+ Register *RegisterResponse `protobuf:"bytes,1000,opt,name=register,proto3,oneof"`
+}
+
+type InstructionResponse_ProcessBundle struct {
+ ProcessBundle *ProcessBundleResponse `protobuf:"bytes,1001,opt,name=process_bundle,json=processBundle,proto3,oneof"`
+}
+
+type InstructionResponse_ProcessBundleProgress struct {
+ ProcessBundleProgress *ProcessBundleProgressResponse `protobuf:"bytes,1002,opt,name=process_bundle_progress,json=processBundleProgress,proto3,oneof"`
+}
+
+type InstructionResponse_ProcessBundleSplit struct {
+ ProcessBundleSplit *ProcessBundleSplitResponse `protobuf:"bytes,1003,opt,name=process_bundle_split,json=processBundleSplit,proto3,oneof"`
+}
+
+type InstructionResponse_FinalizeBundle struct {
+ FinalizeBundle *FinalizeBundleResponse `protobuf:"bytes,1004,opt,name=finalize_bundle,json=finalizeBundle,proto3,oneof"`
+}
+
+func (*InstructionResponse_Register) isInstructionResponse_Response() {}
+
+func (*InstructionResponse_ProcessBundle) isInstructionResponse_Response() {}
+
+func (*InstructionResponse_ProcessBundleProgress) isInstructionResponse_Response() {}
+
+func (*InstructionResponse_ProcessBundleSplit) isInstructionResponse_Response() {}
+
+func (*InstructionResponse_FinalizeBundle) isInstructionResponse_Response() {}
+
+func (m *InstructionResponse) GetResponse() isInstructionResponse_Response {
+ if m != nil {
+ return m.Response
+ }
+ return nil
+}
+
func (m *InstructionResponse) GetRegister() *RegisterResponse {
if x, ok := m.GetResponse().(*InstructionResponse_Register); ok {
return x.Register
@@ -516,9 +418,9 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*InstructionResponse) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _InstructionResponse_OneofMarshaler, _InstructionResponse_OneofUnmarshaler, _InstructionResponse_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*InstructionResponse) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*InstructionResponse_Register)(nil),
(*InstructionResponse_ProcessBundle)(nil),
(*InstructionResponse_ProcessBundleProgress)(nil),
@@ -527,126 +429,6 @@
}
}
-func _InstructionResponse_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*InstructionResponse)
- // response
- switch x := m.Response.(type) {
- case *InstructionResponse_Register:
- b.EncodeVarint(1000<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Register); err != nil {
- return err
- }
- case *InstructionResponse_ProcessBundle:
- b.EncodeVarint(1001<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ProcessBundle); err != nil {
- return err
- }
- case *InstructionResponse_ProcessBundleProgress:
- b.EncodeVarint(1002<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ProcessBundleProgress); err != nil {
- return err
- }
- case *InstructionResponse_ProcessBundleSplit:
- b.EncodeVarint(1003<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ProcessBundleSplit); err != nil {
- return err
- }
- case *InstructionResponse_FinalizeBundle:
- b.EncodeVarint(1004<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.FinalizeBundle); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("InstructionResponse.Response has unexpected type %T", x)
- }
- return nil
-}
-
-func _InstructionResponse_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*InstructionResponse)
- switch tag {
- case 1000: // response.register
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(RegisterResponse)
- err := b.DecodeMessage(msg)
- m.Response = &InstructionResponse_Register{msg}
- return true, err
- case 1001: // response.process_bundle
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleResponse)
- err := b.DecodeMessage(msg)
- m.Response = &InstructionResponse_ProcessBundle{msg}
- return true, err
- case 1002: // response.process_bundle_progress
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleProgressResponse)
- err := b.DecodeMessage(msg)
- m.Response = &InstructionResponse_ProcessBundleProgress{msg}
- return true, err
- case 1003: // response.process_bundle_split
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleSplitResponse)
- err := b.DecodeMessage(msg)
- m.Response = &InstructionResponse_ProcessBundleSplit{msg}
- return true, err
- case 1004: // response.finalize_bundle
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(FinalizeBundleResponse)
- err := b.DecodeMessage(msg)
- m.Response = &InstructionResponse_FinalizeBundle{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _InstructionResponse_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*InstructionResponse)
- // response
- switch x := m.Response.(type) {
- case *InstructionResponse_Register:
- s := proto.Size(x.Register)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionResponse_ProcessBundle:
- s := proto.Size(x.ProcessBundle)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionResponse_ProcessBundleProgress:
- s := proto.Size(x.ProcessBundleProgress)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionResponse_ProcessBundleSplit:
- s := proto.Size(x.ProcessBundleSplit)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *InstructionResponse_FinalizeBundle:
- s := proto.Size(x.FinalizeBundle)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// A list of objects which can be referred to by the runner in
// future requests.
// Stable
@@ -662,16 +444,17 @@
func (m *RegisterRequest) String() string { return proto.CompactTextString(m) }
func (*RegisterRequest) ProtoMessage() {}
func (*RegisterRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{3}
+ return fileDescriptor_6d954c03a4758710, []int{3}
}
+
func (m *RegisterRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_RegisterRequest.Unmarshal(m, b)
}
func (m *RegisterRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_RegisterRequest.Marshal(b, m, deterministic)
}
-func (dst *RegisterRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_RegisterRequest.Merge(dst, src)
+func (m *RegisterRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_RegisterRequest.Merge(m, src)
}
func (m *RegisterRequest) XXX_Size() int {
return xxx_messageInfo_RegisterRequest.Size(m)
@@ -700,16 +483,17 @@
func (m *RegisterResponse) String() string { return proto.CompactTextString(m) }
func (*RegisterResponse) ProtoMessage() {}
func (*RegisterResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{4}
+ return fileDescriptor_6d954c03a4758710, []int{4}
}
+
func (m *RegisterResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_RegisterResponse.Unmarshal(m, b)
}
func (m *RegisterResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_RegisterResponse.Marshal(b, m, deterministic)
}
-func (dst *RegisterResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_RegisterResponse.Merge(dst, src)
+func (m *RegisterResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_RegisterResponse.Merge(m, src)
}
func (m *RegisterResponse) XXX_Size() int {
return xxx_messageInfo_RegisterResponse.Size(m)
@@ -748,16 +532,17 @@
func (m *ProcessBundleDescriptor) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleDescriptor) ProtoMessage() {}
func (*ProcessBundleDescriptor) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{5}
+ return fileDescriptor_6d954c03a4758710, []int{5}
}
+
func (m *ProcessBundleDescriptor) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleDescriptor.Unmarshal(m, b)
}
func (m *ProcessBundleDescriptor) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleDescriptor.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleDescriptor) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleDescriptor.Merge(dst, src)
+func (m *ProcessBundleDescriptor) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleDescriptor.Merge(m, src)
}
func (m *ProcessBundleDescriptor) XXX_Size() int {
return xxx_messageInfo_ProcessBundleDescriptor.Size(m)
@@ -856,16 +641,17 @@
func (m *BundleApplication) String() string { return proto.CompactTextString(m) }
func (*BundleApplication) ProtoMessage() {}
func (*BundleApplication) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{6}
+ return fileDescriptor_6d954c03a4758710, []int{6}
}
+
func (m *BundleApplication) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_BundleApplication.Unmarshal(m, b)
}
func (m *BundleApplication) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_BundleApplication.Marshal(b, m, deterministic)
}
-func (dst *BundleApplication) XXX_Merge(src proto.Message) {
- xxx_messageInfo_BundleApplication.Merge(dst, src)
+func (m *BundleApplication) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_BundleApplication.Merge(m, src)
}
func (m *BundleApplication) XXX_Size() int {
return xxx_messageInfo_BundleApplication.Size(m)
@@ -922,16 +708,12 @@
// Either an absolute timestamp or a relative timestamp can represent a
// scheduled execution time.
type DelayedBundleApplication struct {
- // Recommended time at which the application should be scheduled to execute
- // by the runner. Times in the past may be scheduled to execute immediately.
- // TODO(BEAM-8536): Migrate usage of absolute time to requested_time_delay.
- RequestedExecutionTime *timestamp.Timestamp `protobuf:"bytes,1,opt,name=requested_execution_time,json=requestedExecutionTime,proto3" json:"requested_execution_time,omitempty"`
// (Required) The application that should be scheduled.
- Application *BundleApplication `protobuf:"bytes,2,opt,name=application,proto3" json:"application,omitempty"`
+ Application *BundleApplication `protobuf:"bytes,1,opt,name=application,proto3" json:"application,omitempty"`
// Recommended time delay at which the application should be scheduled to
// execute by the runner. Time delay that equals 0 may be scheduled to execute
// immediately. The unit of time delay should be microsecond.
- RequestedTimeDelay *duration.Duration `protobuf:"bytes,3,opt,name=requested_time_delay,json=requestedTimeDelay,proto3" json:"requested_time_delay,omitempty"`
+ RequestedTimeDelay *duration.Duration `protobuf:"bytes,2,opt,name=requested_time_delay,json=requestedTimeDelay,proto3" json:"requested_time_delay,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
@@ -941,16 +723,17 @@
func (m *DelayedBundleApplication) String() string { return proto.CompactTextString(m) }
func (*DelayedBundleApplication) ProtoMessage() {}
func (*DelayedBundleApplication) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{7}
+ return fileDescriptor_6d954c03a4758710, []int{7}
}
+
func (m *DelayedBundleApplication) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DelayedBundleApplication.Unmarshal(m, b)
}
func (m *DelayedBundleApplication) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DelayedBundleApplication.Marshal(b, m, deterministic)
}
-func (dst *DelayedBundleApplication) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DelayedBundleApplication.Merge(dst, src)
+func (m *DelayedBundleApplication) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DelayedBundleApplication.Merge(m, src)
}
func (m *DelayedBundleApplication) XXX_Size() int {
return xxx_messageInfo_DelayedBundleApplication.Size(m)
@@ -961,13 +744,6 @@
var xxx_messageInfo_DelayedBundleApplication proto.InternalMessageInfo
-func (m *DelayedBundleApplication) GetRequestedExecutionTime() *timestamp.Timestamp {
- if m != nil {
- return m.RequestedExecutionTime
- }
- return nil
-}
-
func (m *DelayedBundleApplication) GetApplication() *BundleApplication {
if m != nil {
return m.Application
@@ -1000,16 +776,17 @@
func (m *ProcessBundleRequest) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleRequest) ProtoMessage() {}
func (*ProcessBundleRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{8}
+ return fileDescriptor_6d954c03a4758710, []int{8}
}
+
func (m *ProcessBundleRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleRequest.Unmarshal(m, b)
}
func (m *ProcessBundleRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleRequest.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleRequest.Merge(dst, src)
+func (m *ProcessBundleRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleRequest.Merge(m, src)
}
func (m *ProcessBundleRequest) XXX_Size() int {
return xxx_messageInfo_ProcessBundleRequest.Size(m)
@@ -1054,16 +831,17 @@
func (m *ProcessBundleRequest_CacheToken) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleRequest_CacheToken) ProtoMessage() {}
func (*ProcessBundleRequest_CacheToken) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{8, 0}
+ return fileDescriptor_6d954c03a4758710, []int{8, 0}
}
+
func (m *ProcessBundleRequest_CacheToken) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleRequest_CacheToken.Unmarshal(m, b)
}
func (m *ProcessBundleRequest_CacheToken) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleRequest_CacheToken.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleRequest_CacheToken) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleRequest_CacheToken.Merge(dst, src)
+func (m *ProcessBundleRequest_CacheToken) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleRequest_CacheToken.Merge(m, src)
}
func (m *ProcessBundleRequest_CacheToken) XXX_Size() int {
return xxx_messageInfo_ProcessBundleRequest_CacheToken.Size(m)
@@ -1081,11 +859,13 @@
type ProcessBundleRequest_CacheToken_UserState_ struct {
UserState *ProcessBundleRequest_CacheToken_UserState `protobuf:"bytes,1,opt,name=user_state,json=userState,proto3,oneof"`
}
+
type ProcessBundleRequest_CacheToken_SideInput_ struct {
SideInput *ProcessBundleRequest_CacheToken_SideInput `protobuf:"bytes,2,opt,name=side_input,json=sideInput,proto3,oneof"`
}
func (*ProcessBundleRequest_CacheToken_UserState_) isProcessBundleRequest_CacheToken_Type() {}
+
func (*ProcessBundleRequest_CacheToken_SideInput_) isProcessBundleRequest_CacheToken_Type() {}
func (m *ProcessBundleRequest_CacheToken) GetType() isProcessBundleRequest_CacheToken_Type {
@@ -1116,80 +896,14 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*ProcessBundleRequest_CacheToken) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _ProcessBundleRequest_CacheToken_OneofMarshaler, _ProcessBundleRequest_CacheToken_OneofUnmarshaler, _ProcessBundleRequest_CacheToken_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*ProcessBundleRequest_CacheToken) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*ProcessBundleRequest_CacheToken_UserState_)(nil),
(*ProcessBundleRequest_CacheToken_SideInput_)(nil),
}
}
-func _ProcessBundleRequest_CacheToken_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*ProcessBundleRequest_CacheToken)
- // type
- switch x := m.Type.(type) {
- case *ProcessBundleRequest_CacheToken_UserState_:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.UserState); err != nil {
- return err
- }
- case *ProcessBundleRequest_CacheToken_SideInput_:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.SideInput); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("ProcessBundleRequest_CacheToken.Type has unexpected type %T", x)
- }
- return nil
-}
-
-func _ProcessBundleRequest_CacheToken_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*ProcessBundleRequest_CacheToken)
- switch tag {
- case 1: // type.user_state
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleRequest_CacheToken_UserState)
- err := b.DecodeMessage(msg)
- m.Type = &ProcessBundleRequest_CacheToken_UserState_{msg}
- return true, err
- case 2: // type.side_input
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ProcessBundleRequest_CacheToken_SideInput)
- err := b.DecodeMessage(msg)
- m.Type = &ProcessBundleRequest_CacheToken_SideInput_{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _ProcessBundleRequest_CacheToken_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*ProcessBundleRequest_CacheToken)
- // type
- switch x := m.Type.(type) {
- case *ProcessBundleRequest_CacheToken_UserState_:
- s := proto.Size(x.UserState)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *ProcessBundleRequest_CacheToken_SideInput_:
- s := proto.Size(x.SideInput)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// A flag to indicate a cache token is valid for user state.
type ProcessBundleRequest_CacheToken_UserState struct {
XXX_NoUnkeyedLiteral struct{} `json:"-"`
@@ -1203,16 +917,17 @@
func (m *ProcessBundleRequest_CacheToken_UserState) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleRequest_CacheToken_UserState) ProtoMessage() {}
func (*ProcessBundleRequest_CacheToken_UserState) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{8, 0, 0}
+ return fileDescriptor_6d954c03a4758710, []int{8, 0, 0}
}
+
func (m *ProcessBundleRequest_CacheToken_UserState) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleRequest_CacheToken_UserState.Unmarshal(m, b)
}
func (m *ProcessBundleRequest_CacheToken_UserState) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleRequest_CacheToken_UserState.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleRequest_CacheToken_UserState) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleRequest_CacheToken_UserState.Merge(dst, src)
+func (m *ProcessBundleRequest_CacheToken_UserState) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleRequest_CacheToken_UserState.Merge(m, src)
}
func (m *ProcessBundleRequest_CacheToken_UserState) XXX_Size() int {
return xxx_messageInfo_ProcessBundleRequest_CacheToken_UserState.Size(m)
@@ -1238,16 +953,17 @@
func (m *ProcessBundleRequest_CacheToken_SideInput) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleRequest_CacheToken_SideInput) ProtoMessage() {}
func (*ProcessBundleRequest_CacheToken_SideInput) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{8, 0, 1}
+ return fileDescriptor_6d954c03a4758710, []int{8, 0, 1}
}
+
func (m *ProcessBundleRequest_CacheToken_SideInput) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleRequest_CacheToken_SideInput.Unmarshal(m, b)
}
func (m *ProcessBundleRequest_CacheToken_SideInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleRequest_CacheToken_SideInput.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleRequest_CacheToken_SideInput) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleRequest_CacheToken_SideInput.Merge(dst, src)
+func (m *ProcessBundleRequest_CacheToken_SideInput) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleRequest_CacheToken_SideInput.Merge(m, src)
}
func (m *ProcessBundleRequest_CacheToken_SideInput) XXX_Size() int {
return xxx_messageInfo_ProcessBundleRequest_CacheToken_SideInput.Size(m)
@@ -1292,16 +1008,17 @@
func (m *ProcessBundleResponse) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleResponse) ProtoMessage() {}
func (*ProcessBundleResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{9}
+ return fileDescriptor_6d954c03a4758710, []int{9}
}
+
func (m *ProcessBundleResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleResponse.Unmarshal(m, b)
}
func (m *ProcessBundleResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleResponse.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleResponse.Merge(dst, src)
+func (m *ProcessBundleResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleResponse.Merge(m, src)
}
func (m *ProcessBundleResponse) XXX_Size() int {
return xxx_messageInfo_ProcessBundleResponse.Size(m)
@@ -1356,16 +1073,17 @@
func (m *ProcessBundleProgressRequest) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleProgressRequest) ProtoMessage() {}
func (*ProcessBundleProgressRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{10}
+ return fileDescriptor_6d954c03a4758710, []int{10}
}
+
func (m *ProcessBundleProgressRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleProgressRequest.Unmarshal(m, b)
}
func (m *ProcessBundleProgressRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleProgressRequest.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleProgressRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleProgressRequest.Merge(dst, src)
+func (m *ProcessBundleProgressRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleProgressRequest.Merge(m, src)
}
func (m *ProcessBundleProgressRequest) XXX_Size() int {
return xxx_messageInfo_ProcessBundleProgressRequest.Size(m)
@@ -1395,16 +1113,17 @@
func (m *Metrics) String() string { return proto.CompactTextString(m) }
func (*Metrics) ProtoMessage() {}
func (*Metrics) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11}
+ return fileDescriptor_6d954c03a4758710, []int{11}
}
+
func (m *Metrics) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics.Unmarshal(m, b)
}
func (m *Metrics) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics.Marshal(b, m, deterministic)
}
-func (dst *Metrics) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics.Merge(dst, src)
+func (m *Metrics) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics.Merge(m, src)
}
func (m *Metrics) XXX_Size() int {
return xxx_messageInfo_Metrics.Size(m)
@@ -1448,16 +1167,17 @@
func (m *Metrics_PTransform) String() string { return proto.CompactTextString(m) }
func (*Metrics_PTransform) ProtoMessage() {}
func (*Metrics_PTransform) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 0}
+ return fileDescriptor_6d954c03a4758710, []int{11, 0}
}
+
func (m *Metrics_PTransform) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_PTransform.Unmarshal(m, b)
}
func (m *Metrics_PTransform) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_PTransform.Marshal(b, m, deterministic)
}
-func (dst *Metrics_PTransform) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_PTransform.Merge(dst, src)
+func (m *Metrics_PTransform) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_PTransform.Merge(m, src)
}
func (m *Metrics_PTransform) XXX_Size() int {
return xxx_messageInfo_Metrics_PTransform.Size(m)
@@ -1518,16 +1238,17 @@
func (m *Metrics_PTransform_Measured) String() string { return proto.CompactTextString(m) }
func (*Metrics_PTransform_Measured) ProtoMessage() {}
func (*Metrics_PTransform_Measured) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 0, 0}
+ return fileDescriptor_6d954c03a4758710, []int{11, 0, 0}
}
+
func (m *Metrics_PTransform_Measured) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_PTransform_Measured.Unmarshal(m, b)
}
func (m *Metrics_PTransform_Measured) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_PTransform_Measured.Marshal(b, m, deterministic)
}
-func (dst *Metrics_PTransform_Measured) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_PTransform_Measured.Merge(dst, src)
+func (m *Metrics_PTransform_Measured) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_PTransform_Measured.Merge(m, src)
}
func (m *Metrics_PTransform_Measured) XXX_Size() int {
return xxx_messageInfo_Metrics_PTransform_Measured.Size(m)
@@ -1572,16 +1293,17 @@
func (m *Metrics_PTransform_ProcessedElements) String() string { return proto.CompactTextString(m) }
func (*Metrics_PTransform_ProcessedElements) ProtoMessage() {}
func (*Metrics_PTransform_ProcessedElements) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 0, 1}
+ return fileDescriptor_6d954c03a4758710, []int{11, 0, 1}
}
+
func (m *Metrics_PTransform_ProcessedElements) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_PTransform_ProcessedElements.Unmarshal(m, b)
}
func (m *Metrics_PTransform_ProcessedElements) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_PTransform_ProcessedElements.Marshal(b, m, deterministic)
}
-func (dst *Metrics_PTransform_ProcessedElements) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_PTransform_ProcessedElements.Merge(dst, src)
+func (m *Metrics_PTransform_ProcessedElements) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_PTransform_ProcessedElements.Merge(m, src)
}
func (m *Metrics_PTransform_ProcessedElements) XXX_Size() int {
return xxx_messageInfo_Metrics_PTransform_ProcessedElements.Size(m)
@@ -1626,16 +1348,17 @@
func (m *Metrics_PTransform_ActiveElements) String() string { return proto.CompactTextString(m) }
func (*Metrics_PTransform_ActiveElements) ProtoMessage() {}
func (*Metrics_PTransform_ActiveElements) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 0, 2}
+ return fileDescriptor_6d954c03a4758710, []int{11, 0, 2}
}
+
func (m *Metrics_PTransform_ActiveElements) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_PTransform_ActiveElements.Unmarshal(m, b)
}
func (m *Metrics_PTransform_ActiveElements) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_PTransform_ActiveElements.Marshal(b, m, deterministic)
}
-func (dst *Metrics_PTransform_ActiveElements) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_PTransform_ActiveElements.Merge(dst, src)
+func (m *Metrics_PTransform_ActiveElements) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_PTransform_ActiveElements.Merge(m, src)
}
func (m *Metrics_PTransform_ActiveElements) XXX_Size() int {
return xxx_messageInfo_Metrics_PTransform_ActiveElements.Size(m)
@@ -1687,16 +1410,17 @@
func (m *Metrics_User) String() string { return proto.CompactTextString(m) }
func (*Metrics_User) ProtoMessage() {}
func (*Metrics_User) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 1}
+ return fileDescriptor_6d954c03a4758710, []int{11, 1}
}
+
func (m *Metrics_User) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_User.Unmarshal(m, b)
}
func (m *Metrics_User) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_User.Marshal(b, m, deterministic)
}
-func (dst *Metrics_User) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_User.Merge(dst, src)
+func (m *Metrics_User) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_User.Merge(m, src)
}
func (m *Metrics_User) XXX_Size() int {
return xxx_messageInfo_Metrics_User.Size(m)
@@ -1707,6 +1431,13 @@
var xxx_messageInfo_Metrics_User proto.InternalMessageInfo
+func (m *Metrics_User) GetMetricName() *Metrics_User_MetricName {
+ if m != nil {
+ return m.MetricName
+ }
+ return nil
+}
+
type isMetrics_User_Data interface {
isMetrics_User_Data()
}
@@ -1714,16 +1445,20 @@
type Metrics_User_CounterData_ struct {
CounterData *Metrics_User_CounterData `protobuf:"bytes,1001,opt,name=counter_data,json=counterData,proto3,oneof"`
}
+
type Metrics_User_DistributionData_ struct {
DistributionData *Metrics_User_DistributionData `protobuf:"bytes,1002,opt,name=distribution_data,json=distributionData,proto3,oneof"`
}
+
type Metrics_User_GaugeData_ struct {
GaugeData *Metrics_User_GaugeData `protobuf:"bytes,1003,opt,name=gauge_data,json=gaugeData,proto3,oneof"`
}
-func (*Metrics_User_CounterData_) isMetrics_User_Data() {}
+func (*Metrics_User_CounterData_) isMetrics_User_Data() {}
+
func (*Metrics_User_DistributionData_) isMetrics_User_Data() {}
-func (*Metrics_User_GaugeData_) isMetrics_User_Data() {}
+
+func (*Metrics_User_GaugeData_) isMetrics_User_Data() {}
func (m *Metrics_User) GetData() isMetrics_User_Data {
if m != nil {
@@ -1732,13 +1467,6 @@
return nil
}
-func (m *Metrics_User) GetMetricName() *Metrics_User_MetricName {
- if m != nil {
- return m.MetricName
- }
- return nil
-}
-
func (m *Metrics_User) GetCounterData() *Metrics_User_CounterData {
if x, ok := m.GetData().(*Metrics_User_CounterData_); ok {
return x.CounterData
@@ -1760,99 +1488,15 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*Metrics_User) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _Metrics_User_OneofMarshaler, _Metrics_User_OneofUnmarshaler, _Metrics_User_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*Metrics_User) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*Metrics_User_CounterData_)(nil),
(*Metrics_User_DistributionData_)(nil),
(*Metrics_User_GaugeData_)(nil),
}
}
-func _Metrics_User_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*Metrics_User)
- // data
- switch x := m.Data.(type) {
- case *Metrics_User_CounterData_:
- b.EncodeVarint(1001<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.CounterData); err != nil {
- return err
- }
- case *Metrics_User_DistributionData_:
- b.EncodeVarint(1002<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.DistributionData); err != nil {
- return err
- }
- case *Metrics_User_GaugeData_:
- b.EncodeVarint(1003<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.GaugeData); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("Metrics_User.Data has unexpected type %T", x)
- }
- return nil
-}
-
-func _Metrics_User_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*Metrics_User)
- switch tag {
- case 1001: // data.counter_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Metrics_User_CounterData)
- err := b.DecodeMessage(msg)
- m.Data = &Metrics_User_CounterData_{msg}
- return true, err
- case 1002: // data.distribution_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Metrics_User_DistributionData)
- err := b.DecodeMessage(msg)
- m.Data = &Metrics_User_DistributionData_{msg}
- return true, err
- case 1003: // data.gauge_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Metrics_User_GaugeData)
- err := b.DecodeMessage(msg)
- m.Data = &Metrics_User_GaugeData_{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _Metrics_User_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*Metrics_User)
- // data
- switch x := m.Data.(type) {
- case *Metrics_User_CounterData_:
- s := proto.Size(x.CounterData)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Metrics_User_DistributionData_:
- s := proto.Size(x.DistributionData)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Metrics_User_GaugeData_:
- s := proto.Size(x.GaugeData)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// A key for identifying a metric at the most granular level.
type Metrics_User_MetricName struct {
// (Required): The namespace of this metric.
@@ -1868,16 +1512,17 @@
func (m *Metrics_User_MetricName) String() string { return proto.CompactTextString(m) }
func (*Metrics_User_MetricName) ProtoMessage() {}
func (*Metrics_User_MetricName) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 1, 0}
+ return fileDescriptor_6d954c03a4758710, []int{11, 1, 0}
}
+
func (m *Metrics_User_MetricName) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_User_MetricName.Unmarshal(m, b)
}
func (m *Metrics_User_MetricName) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_User_MetricName.Marshal(b, m, deterministic)
}
-func (dst *Metrics_User_MetricName) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_User_MetricName.Merge(dst, src)
+func (m *Metrics_User_MetricName) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_User_MetricName.Merge(m, src)
}
func (m *Metrics_User_MetricName) XXX_Size() int {
return xxx_messageInfo_Metrics_User_MetricName.Size(m)
@@ -1914,16 +1559,17 @@
func (m *Metrics_User_CounterData) String() string { return proto.CompactTextString(m) }
func (*Metrics_User_CounterData) ProtoMessage() {}
func (*Metrics_User_CounterData) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 1, 1}
+ return fileDescriptor_6d954c03a4758710, []int{11, 1, 1}
}
+
func (m *Metrics_User_CounterData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_User_CounterData.Unmarshal(m, b)
}
func (m *Metrics_User_CounterData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_User_CounterData.Marshal(b, m, deterministic)
}
-func (dst *Metrics_User_CounterData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_User_CounterData.Merge(dst, src)
+func (m *Metrics_User_CounterData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_User_CounterData.Merge(m, src)
}
func (m *Metrics_User_CounterData) XXX_Size() int {
return xxx_messageInfo_Metrics_User_CounterData.Size(m)
@@ -1956,16 +1602,17 @@
func (m *Metrics_User_DistributionData) String() string { return proto.CompactTextString(m) }
func (*Metrics_User_DistributionData) ProtoMessage() {}
func (*Metrics_User_DistributionData) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 1, 2}
+ return fileDescriptor_6d954c03a4758710, []int{11, 1, 2}
}
+
func (m *Metrics_User_DistributionData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_User_DistributionData.Unmarshal(m, b)
}
func (m *Metrics_User_DistributionData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_User_DistributionData.Marshal(b, m, deterministic)
}
-func (dst *Metrics_User_DistributionData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_User_DistributionData.Merge(dst, src)
+func (m *Metrics_User_DistributionData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_User_DistributionData.Merge(m, src)
}
func (m *Metrics_User_DistributionData) XXX_Size() int {
return xxx_messageInfo_Metrics_User_DistributionData.Size(m)
@@ -2017,16 +1664,17 @@
func (m *Metrics_User_GaugeData) String() string { return proto.CompactTextString(m) }
func (*Metrics_User_GaugeData) ProtoMessage() {}
func (*Metrics_User_GaugeData) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{11, 1, 3}
+ return fileDescriptor_6d954c03a4758710, []int{11, 1, 3}
}
+
func (m *Metrics_User_GaugeData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metrics_User_GaugeData.Unmarshal(m, b)
}
func (m *Metrics_User_GaugeData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metrics_User_GaugeData.Marshal(b, m, deterministic)
}
-func (dst *Metrics_User_GaugeData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metrics_User_GaugeData.Merge(dst, src)
+func (m *Metrics_User_GaugeData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metrics_User_GaugeData.Merge(m, src)
}
func (m *Metrics_User_GaugeData) XXX_Size() int {
return xxx_messageInfo_Metrics_User_GaugeData.Size(m)
@@ -2069,16 +1717,17 @@
func (m *ProcessBundleProgressResponse) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleProgressResponse) ProtoMessage() {}
func (*ProcessBundleProgressResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{12}
+ return fileDescriptor_6d954c03a4758710, []int{12}
}
+
func (m *ProcessBundleProgressResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleProgressResponse.Unmarshal(m, b)
}
func (m *ProcessBundleProgressResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleProgressResponse.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleProgressResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleProgressResponse.Merge(dst, src)
+func (m *ProcessBundleProgressResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleProgressResponse.Merge(m, src)
}
func (m *ProcessBundleProgressResponse) XXX_Size() int {
return xxx_messageInfo_ProcessBundleProgressResponse.Size(m)
@@ -2130,16 +1779,17 @@
func (m *ProcessBundleSplitRequest) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleSplitRequest) ProtoMessage() {}
func (*ProcessBundleSplitRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{13}
+ return fileDescriptor_6d954c03a4758710, []int{13}
}
+
func (m *ProcessBundleSplitRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleSplitRequest.Unmarshal(m, b)
}
func (m *ProcessBundleSplitRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleSplitRequest.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleSplitRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleSplitRequest.Merge(dst, src)
+func (m *ProcessBundleSplitRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleSplitRequest.Merge(m, src)
}
func (m *ProcessBundleSplitRequest) XXX_Size() int {
return xxx_messageInfo_ProcessBundleSplitRequest.Size(m)
@@ -2171,7 +1821,7 @@
//
// Set to 0 to "checkpoint" as soon as possible (keeping as little work as
// possible and returning the remainder).
- FractionOfRemainder float32 `protobuf:"fixed32,1,opt,name=fraction_of_remainder,json=fractionOfRemainder,proto3" json:"fraction_of_remainder,omitempty"`
+ FractionOfRemainder float64 `protobuf:"fixed64,1,opt,name=fraction_of_remainder,json=fractionOfRemainder,proto3" json:"fraction_of_remainder,omitempty"`
// A set of allowed element indices where the SDK may split. When this is
// empty, there are no constraints on where to split.
AllowedSplitPoints []int64 `protobuf:"varint,3,rep,packed,name=allowed_split_points,json=allowedSplitPoints,proto3" json:"allowed_split_points,omitempty"`
@@ -2190,16 +1840,17 @@
func (m *ProcessBundleSplitRequest_DesiredSplit) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleSplitRequest_DesiredSplit) ProtoMessage() {}
func (*ProcessBundleSplitRequest_DesiredSplit) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{13, 0}
+ return fileDescriptor_6d954c03a4758710, []int{13, 0}
}
+
func (m *ProcessBundleSplitRequest_DesiredSplit) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleSplitRequest_DesiredSplit.Unmarshal(m, b)
}
func (m *ProcessBundleSplitRequest_DesiredSplit) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleSplitRequest_DesiredSplit.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleSplitRequest_DesiredSplit) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleSplitRequest_DesiredSplit.Merge(dst, src)
+func (m *ProcessBundleSplitRequest_DesiredSplit) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleSplitRequest_DesiredSplit.Merge(m, src)
}
func (m *ProcessBundleSplitRequest_DesiredSplit) XXX_Size() int {
return xxx_messageInfo_ProcessBundleSplitRequest_DesiredSplit.Size(m)
@@ -2210,7 +1861,7 @@
var xxx_messageInfo_ProcessBundleSplitRequest_DesiredSplit proto.InternalMessageInfo
-func (m *ProcessBundleSplitRequest_DesiredSplit) GetFractionOfRemainder() float32 {
+func (m *ProcessBundleSplitRequest_DesiredSplit) GetFractionOfRemainder() float64 {
if m != nil {
return m.FractionOfRemainder
}
@@ -2260,16 +1911,17 @@
func (m *ProcessBundleSplitResponse) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleSplitResponse) ProtoMessage() {}
func (*ProcessBundleSplitResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{14}
+ return fileDescriptor_6d954c03a4758710, []int{14}
}
+
func (m *ProcessBundleSplitResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleSplitResponse.Unmarshal(m, b)
}
func (m *ProcessBundleSplitResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleSplitResponse.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleSplitResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleSplitResponse.Merge(dst, src)
+func (m *ProcessBundleSplitResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleSplitResponse.Merge(m, src)
}
func (m *ProcessBundleSplitResponse) XXX_Size() int {
return xxx_messageInfo_ProcessBundleSplitResponse.Size(m)
@@ -2315,11 +1967,11 @@
// The last element of the input channel that should be entirely considered
// part of the primary, identified by its absolute index in the (ordered)
// channel.
- LastPrimaryElement int32 `protobuf:"varint,2,opt,name=last_primary_element,json=lastPrimaryElement,proto3" json:"last_primary_element,omitempty"`
+ LastPrimaryElement int64 `protobuf:"varint,2,opt,name=last_primary_element,json=lastPrimaryElement,proto3" json:"last_primary_element,omitempty"`
// The first element of the input channel that should be entirely considered
// part of the residual, identified by its absolute index in the (ordered)
// channel.
- FirstResidualElement int32 `protobuf:"varint,3,opt,name=first_residual_element,json=firstResidualElement,proto3" json:"first_residual_element,omitempty"`
+ FirstResidualElement int64 `protobuf:"varint,3,opt,name=first_residual_element,json=firstResidualElement,proto3" json:"first_residual_element,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
@@ -2331,16 +1983,17 @@
func (m *ProcessBundleSplitResponse_ChannelSplit) String() string { return proto.CompactTextString(m) }
func (*ProcessBundleSplitResponse_ChannelSplit) ProtoMessage() {}
func (*ProcessBundleSplitResponse_ChannelSplit) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{14, 0}
+ return fileDescriptor_6d954c03a4758710, []int{14, 0}
}
+
func (m *ProcessBundleSplitResponse_ChannelSplit) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessBundleSplitResponse_ChannelSplit.Unmarshal(m, b)
}
func (m *ProcessBundleSplitResponse_ChannelSplit) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessBundleSplitResponse_ChannelSplit.Marshal(b, m, deterministic)
}
-func (dst *ProcessBundleSplitResponse_ChannelSplit) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessBundleSplitResponse_ChannelSplit.Merge(dst, src)
+func (m *ProcessBundleSplitResponse_ChannelSplit) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessBundleSplitResponse_ChannelSplit.Merge(m, src)
}
func (m *ProcessBundleSplitResponse_ChannelSplit) XXX_Size() int {
return xxx_messageInfo_ProcessBundleSplitResponse_ChannelSplit.Size(m)
@@ -2358,14 +2011,14 @@
return ""
}
-func (m *ProcessBundleSplitResponse_ChannelSplit) GetLastPrimaryElement() int32 {
+func (m *ProcessBundleSplitResponse_ChannelSplit) GetLastPrimaryElement() int64 {
if m != nil {
return m.LastPrimaryElement
}
return 0
}
-func (m *ProcessBundleSplitResponse_ChannelSplit) GetFirstResidualElement() int32 {
+func (m *ProcessBundleSplitResponse_ChannelSplit) GetFirstResidualElement() int64 {
if m != nil {
return m.FirstResidualElement
}
@@ -2385,16 +2038,17 @@
func (m *FinalizeBundleRequest) String() string { return proto.CompactTextString(m) }
func (*FinalizeBundleRequest) ProtoMessage() {}
func (*FinalizeBundleRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{15}
+ return fileDescriptor_6d954c03a4758710, []int{15}
}
+
func (m *FinalizeBundleRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FinalizeBundleRequest.Unmarshal(m, b)
}
func (m *FinalizeBundleRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FinalizeBundleRequest.Marshal(b, m, deterministic)
}
-func (dst *FinalizeBundleRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_FinalizeBundleRequest.Merge(dst, src)
+func (m *FinalizeBundleRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_FinalizeBundleRequest.Merge(m, src)
}
func (m *FinalizeBundleRequest) XXX_Size() int {
return xxx_messageInfo_FinalizeBundleRequest.Size(m)
@@ -2422,16 +2076,17 @@
func (m *FinalizeBundleResponse) String() string { return proto.CompactTextString(m) }
func (*FinalizeBundleResponse) ProtoMessage() {}
func (*FinalizeBundleResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{16}
+ return fileDescriptor_6d954c03a4758710, []int{16}
}
+
func (m *FinalizeBundleResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FinalizeBundleResponse.Unmarshal(m, b)
}
func (m *FinalizeBundleResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FinalizeBundleResponse.Marshal(b, m, deterministic)
}
-func (dst *FinalizeBundleResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_FinalizeBundleResponse.Merge(dst, src)
+func (m *FinalizeBundleResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_FinalizeBundleResponse.Merge(m, src)
}
func (m *FinalizeBundleResponse) XXX_Size() int {
return xxx_messageInfo_FinalizeBundleResponse.Size(m)
@@ -2456,16 +2111,17 @@
func (m *Elements) String() string { return proto.CompactTextString(m) }
func (*Elements) ProtoMessage() {}
func (*Elements) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{17}
+ return fileDescriptor_6d954c03a4758710, []int{17}
}
+
func (m *Elements) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Elements.Unmarshal(m, b)
}
func (m *Elements) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Elements.Marshal(b, m, deterministic)
}
-func (dst *Elements) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Elements.Merge(dst, src)
+func (m *Elements) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Elements.Merge(m, src)
}
func (m *Elements) XXX_Size() int {
return xxx_messageInfo_Elements.Size(m)
@@ -2514,16 +2170,17 @@
func (m *Elements_Data) String() string { return proto.CompactTextString(m) }
func (*Elements_Data) ProtoMessage() {}
func (*Elements_Data) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{17, 0}
+ return fileDescriptor_6d954c03a4758710, []int{17, 0}
}
+
func (m *Elements_Data) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Elements_Data.Unmarshal(m, b)
}
func (m *Elements_Data) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Elements_Data.Marshal(b, m, deterministic)
}
-func (dst *Elements_Data) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Elements_Data.Merge(dst, src)
+func (m *Elements_Data) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Elements_Data.Merge(m, src)
}
func (m *Elements_Data) XXX_Size() int {
return xxx_messageInfo_Elements_Data.Size(m)
@@ -2582,16 +2239,17 @@
func (m *StateRequest) String() string { return proto.CompactTextString(m) }
func (*StateRequest) ProtoMessage() {}
func (*StateRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{18}
+ return fileDescriptor_6d954c03a4758710, []int{18}
}
+
func (m *StateRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateRequest.Unmarshal(m, b)
}
func (m *StateRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateRequest.Marshal(b, m, deterministic)
}
-func (dst *StateRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateRequest.Merge(dst, src)
+func (m *StateRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateRequest.Merge(m, src)
}
func (m *StateRequest) XXX_Size() int {
return xxx_messageInfo_StateRequest.Size(m)
@@ -2602,31 +2260,6 @@
var xxx_messageInfo_StateRequest proto.InternalMessageInfo
-type isStateRequest_Request interface {
- isStateRequest_Request()
-}
-
-type StateRequest_Get struct {
- Get *StateGetRequest `protobuf:"bytes,1000,opt,name=get,proto3,oneof"`
-}
-type StateRequest_Append struct {
- Append *StateAppendRequest `protobuf:"bytes,1001,opt,name=append,proto3,oneof"`
-}
-type StateRequest_Clear struct {
- Clear *StateClearRequest `protobuf:"bytes,1002,opt,name=clear,proto3,oneof"`
-}
-
-func (*StateRequest_Get) isStateRequest_Request() {}
-func (*StateRequest_Append) isStateRequest_Request() {}
-func (*StateRequest_Clear) isStateRequest_Request() {}
-
-func (m *StateRequest) GetRequest() isStateRequest_Request {
- if m != nil {
- return m.Request
- }
- return nil
-}
-
func (m *StateRequest) GetId() string {
if m != nil {
return m.Id
@@ -2648,6 +2281,35 @@
return nil
}
+type isStateRequest_Request interface {
+ isStateRequest_Request()
+}
+
+type StateRequest_Get struct {
+ Get *StateGetRequest `protobuf:"bytes,1000,opt,name=get,proto3,oneof"`
+}
+
+type StateRequest_Append struct {
+ Append *StateAppendRequest `protobuf:"bytes,1001,opt,name=append,proto3,oneof"`
+}
+
+type StateRequest_Clear struct {
+ Clear *StateClearRequest `protobuf:"bytes,1002,opt,name=clear,proto3,oneof"`
+}
+
+func (*StateRequest_Get) isStateRequest_Request() {}
+
+func (*StateRequest_Append) isStateRequest_Request() {}
+
+func (*StateRequest_Clear) isStateRequest_Request() {}
+
+func (m *StateRequest) GetRequest() isStateRequest_Request {
+ if m != nil {
+ return m.Request
+ }
+ return nil
+}
+
func (m *StateRequest) GetGet() *StateGetRequest {
if x, ok := m.GetRequest().(*StateRequest_Get); ok {
return x.Get
@@ -2669,99 +2331,15 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*StateRequest) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _StateRequest_OneofMarshaler, _StateRequest_OneofUnmarshaler, _StateRequest_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*StateRequest) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*StateRequest_Get)(nil),
(*StateRequest_Append)(nil),
(*StateRequest_Clear)(nil),
}
}
-func _StateRequest_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*StateRequest)
- // request
- switch x := m.Request.(type) {
- case *StateRequest_Get:
- b.EncodeVarint(1000<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Get); err != nil {
- return err
- }
- case *StateRequest_Append:
- b.EncodeVarint(1001<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Append); err != nil {
- return err
- }
- case *StateRequest_Clear:
- b.EncodeVarint(1002<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Clear); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("StateRequest.Request has unexpected type %T", x)
- }
- return nil
-}
-
-func _StateRequest_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*StateRequest)
- switch tag {
- case 1000: // request.get
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateGetRequest)
- err := b.DecodeMessage(msg)
- m.Request = &StateRequest_Get{msg}
- return true, err
- case 1001: // request.append
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateAppendRequest)
- err := b.DecodeMessage(msg)
- m.Request = &StateRequest_Append{msg}
- return true, err
- case 1002: // request.clear
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateClearRequest)
- err := b.DecodeMessage(msg)
- m.Request = &StateRequest_Clear{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _StateRequest_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*StateRequest)
- // request
- switch x := m.Request.(type) {
- case *StateRequest_Get:
- s := proto.Size(x.Get)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateRequest_Append:
- s := proto.Size(x.Append)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateRequest_Clear:
- s := proto.Size(x.Clear)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type StateResponse struct {
// (Required) A reference provided by the SDK which represents a requests
// execution. The StateResponse must have the matching id when responding
@@ -2787,16 +2365,17 @@
func (m *StateResponse) String() string { return proto.CompactTextString(m) }
func (*StateResponse) ProtoMessage() {}
func (*StateResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{19}
+ return fileDescriptor_6d954c03a4758710, []int{19}
}
+
func (m *StateResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateResponse.Unmarshal(m, b)
}
func (m *StateResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateResponse.Marshal(b, m, deterministic)
}
-func (dst *StateResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateResponse.Merge(dst, src)
+func (m *StateResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateResponse.Merge(m, src)
}
func (m *StateResponse) XXX_Size() int {
return xxx_messageInfo_StateResponse.Size(m)
@@ -2807,31 +2386,6 @@
var xxx_messageInfo_StateResponse proto.InternalMessageInfo
-type isStateResponse_Response interface {
- isStateResponse_Response()
-}
-
-type StateResponse_Get struct {
- Get *StateGetResponse `protobuf:"bytes,1000,opt,name=get,proto3,oneof"`
-}
-type StateResponse_Append struct {
- Append *StateAppendResponse `protobuf:"bytes,1001,opt,name=append,proto3,oneof"`
-}
-type StateResponse_Clear struct {
- Clear *StateClearResponse `protobuf:"bytes,1002,opt,name=clear,proto3,oneof"`
-}
-
-func (*StateResponse_Get) isStateResponse_Response() {}
-func (*StateResponse_Append) isStateResponse_Response() {}
-func (*StateResponse_Clear) isStateResponse_Response() {}
-
-func (m *StateResponse) GetResponse() isStateResponse_Response {
- if m != nil {
- return m.Response
- }
- return nil
-}
-
func (m *StateResponse) GetId() string {
if m != nil {
return m.Id
@@ -2846,6 +2400,35 @@
return ""
}
+type isStateResponse_Response interface {
+ isStateResponse_Response()
+}
+
+type StateResponse_Get struct {
+ Get *StateGetResponse `protobuf:"bytes,1000,opt,name=get,proto3,oneof"`
+}
+
+type StateResponse_Append struct {
+ Append *StateAppendResponse `protobuf:"bytes,1001,opt,name=append,proto3,oneof"`
+}
+
+type StateResponse_Clear struct {
+ Clear *StateClearResponse `protobuf:"bytes,1002,opt,name=clear,proto3,oneof"`
+}
+
+func (*StateResponse_Get) isStateResponse_Response() {}
+
+func (*StateResponse_Append) isStateResponse_Response() {}
+
+func (*StateResponse_Clear) isStateResponse_Response() {}
+
+func (m *StateResponse) GetResponse() isStateResponse_Response {
+ if m != nil {
+ return m.Response
+ }
+ return nil
+}
+
func (m *StateResponse) GetGet() *StateGetResponse {
if x, ok := m.GetResponse().(*StateResponse_Get); ok {
return x.Get
@@ -2867,99 +2450,15 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*StateResponse) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _StateResponse_OneofMarshaler, _StateResponse_OneofUnmarshaler, _StateResponse_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*StateResponse) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*StateResponse_Get)(nil),
(*StateResponse_Append)(nil),
(*StateResponse_Clear)(nil),
}
}
-func _StateResponse_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*StateResponse)
- // response
- switch x := m.Response.(type) {
- case *StateResponse_Get:
- b.EncodeVarint(1000<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Get); err != nil {
- return err
- }
- case *StateResponse_Append:
- b.EncodeVarint(1001<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Append); err != nil {
- return err
- }
- case *StateResponse_Clear:
- b.EncodeVarint(1002<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Clear); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("StateResponse.Response has unexpected type %T", x)
- }
- return nil
-}
-
-func _StateResponse_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*StateResponse)
- switch tag {
- case 1000: // response.get
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateGetResponse)
- err := b.DecodeMessage(msg)
- m.Response = &StateResponse_Get{msg}
- return true, err
- case 1001: // response.append
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateAppendResponse)
- err := b.DecodeMessage(msg)
- m.Response = &StateResponse_Append{msg}
- return true, err
- case 1002: // response.clear
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateClearResponse)
- err := b.DecodeMessage(msg)
- m.Response = &StateResponse_Clear{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _StateResponse_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*StateResponse)
- // response
- switch x := m.Response.(type) {
- case *StateResponse_Get:
- s := proto.Size(x.Get)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateResponse_Append:
- s := proto.Size(x.Append)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateResponse_Clear:
- s := proto.Size(x.Clear)
- n += 2 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type StateKey struct {
// (Required) One of the following state keys must be set.
//
@@ -2979,16 +2478,17 @@
func (m *StateKey) String() string { return proto.CompactTextString(m) }
func (*StateKey) ProtoMessage() {}
func (*StateKey) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{20}
+ return fileDescriptor_6d954c03a4758710, []int{20}
}
+
func (m *StateKey) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateKey.Unmarshal(m, b)
}
func (m *StateKey) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateKey.Marshal(b, m, deterministic)
}
-func (dst *StateKey) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateKey.Merge(dst, src)
+func (m *StateKey) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateKey.Merge(m, src)
}
func (m *StateKey) XXX_Size() int {
return xxx_messageInfo_StateKey.Size(m)
@@ -3006,23 +2506,31 @@
type StateKey_Runner_ struct {
Runner *StateKey_Runner `protobuf:"bytes,1,opt,name=runner,proto3,oneof"`
}
+
type StateKey_MultimapSideInput_ struct {
MultimapSideInput *StateKey_MultimapSideInput `protobuf:"bytes,2,opt,name=multimap_side_input,json=multimapSideInput,proto3,oneof"`
}
+
type StateKey_BagUserState_ struct {
BagUserState *StateKey_BagUserState `protobuf:"bytes,3,opt,name=bag_user_state,json=bagUserState,proto3,oneof"`
}
+
type StateKey_IterableSideInput_ struct {
IterableSideInput *StateKey_IterableSideInput `protobuf:"bytes,4,opt,name=iterable_side_input,json=iterableSideInput,proto3,oneof"`
}
+
type StateKey_MultimapKeysSideInput_ struct {
MultimapKeysSideInput *StateKey_MultimapKeysSideInput `protobuf:"bytes,5,opt,name=multimap_keys_side_input,json=multimapKeysSideInput,proto3,oneof"`
}
-func (*StateKey_Runner_) isStateKey_Type() {}
-func (*StateKey_MultimapSideInput_) isStateKey_Type() {}
-func (*StateKey_BagUserState_) isStateKey_Type() {}
-func (*StateKey_IterableSideInput_) isStateKey_Type() {}
+func (*StateKey_Runner_) isStateKey_Type() {}
+
+func (*StateKey_MultimapSideInput_) isStateKey_Type() {}
+
+func (*StateKey_BagUserState_) isStateKey_Type() {}
+
+func (*StateKey_IterableSideInput_) isStateKey_Type() {}
+
func (*StateKey_MultimapKeysSideInput_) isStateKey_Type() {}
func (m *StateKey) GetType() isStateKey_Type {
@@ -3067,9 +2575,9 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*StateKey) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _StateKey_OneofMarshaler, _StateKey_OneofUnmarshaler, _StateKey_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*StateKey) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*StateKey_Runner_)(nil),
(*StateKey_MultimapSideInput_)(nil),
(*StateKey_BagUserState_)(nil),
@@ -3078,126 +2586,6 @@
}
}
-func _StateKey_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*StateKey)
- // type
- switch x := m.Type.(type) {
- case *StateKey_Runner_:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Runner); err != nil {
- return err
- }
- case *StateKey_MultimapSideInput_:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.MultimapSideInput); err != nil {
- return err
- }
- case *StateKey_BagUserState_:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.BagUserState); err != nil {
- return err
- }
- case *StateKey_IterableSideInput_:
- b.EncodeVarint(4<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.IterableSideInput); err != nil {
- return err
- }
- case *StateKey_MultimapKeysSideInput_:
- b.EncodeVarint(5<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.MultimapKeysSideInput); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("StateKey.Type has unexpected type %T", x)
- }
- return nil
-}
-
-func _StateKey_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*StateKey)
- switch tag {
- case 1: // type.runner
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateKey_Runner)
- err := b.DecodeMessage(msg)
- m.Type = &StateKey_Runner_{msg}
- return true, err
- case 2: // type.multimap_side_input
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateKey_MultimapSideInput)
- err := b.DecodeMessage(msg)
- m.Type = &StateKey_MultimapSideInput_{msg}
- return true, err
- case 3: // type.bag_user_state
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateKey_BagUserState)
- err := b.DecodeMessage(msg)
- m.Type = &StateKey_BagUserState_{msg}
- return true, err
- case 4: // type.iterable_side_input
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateKey_IterableSideInput)
- err := b.DecodeMessage(msg)
- m.Type = &StateKey_IterableSideInput_{msg}
- return true, err
- case 5: // type.multimap_keys_side_input
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(StateKey_MultimapKeysSideInput)
- err := b.DecodeMessage(msg)
- m.Type = &StateKey_MultimapKeysSideInput_{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _StateKey_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*StateKey)
- // type
- switch x := m.Type.(type) {
- case *StateKey_Runner_:
- s := proto.Size(x.Runner)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateKey_MultimapSideInput_:
- s := proto.Size(x.MultimapSideInput)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateKey_BagUserState_:
- s := proto.Size(x.BagUserState)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateKey_IterableSideInput_:
- s := proto.Size(x.IterableSideInput)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateKey_MultimapKeysSideInput_:
- s := proto.Size(x.MultimapKeysSideInput)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type StateKey_Runner struct {
// (Required) Opaque information supplied by the runner. Used to support
// remote references.
@@ -3216,16 +2604,17 @@
func (m *StateKey_Runner) String() string { return proto.CompactTextString(m) }
func (*StateKey_Runner) ProtoMessage() {}
func (*StateKey_Runner) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{20, 0}
+ return fileDescriptor_6d954c03a4758710, []int{20, 0}
}
+
func (m *StateKey_Runner) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateKey_Runner.Unmarshal(m, b)
}
func (m *StateKey_Runner) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateKey_Runner.Marshal(b, m, deterministic)
}
-func (dst *StateKey_Runner) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateKey_Runner.Merge(dst, src)
+func (m *StateKey_Runner) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateKey_Runner.Merge(m, src)
}
func (m *StateKey_Runner) XXX_Size() int {
return xxx_messageInfo_StateKey_Runner.Size(m)
@@ -3271,16 +2660,17 @@
func (m *StateKey_IterableSideInput) String() string { return proto.CompactTextString(m) }
func (*StateKey_IterableSideInput) ProtoMessage() {}
func (*StateKey_IterableSideInput) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{20, 1}
+ return fileDescriptor_6d954c03a4758710, []int{20, 1}
}
+
func (m *StateKey_IterableSideInput) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateKey_IterableSideInput.Unmarshal(m, b)
}
func (m *StateKey_IterableSideInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateKey_IterableSideInput.Marshal(b, m, deterministic)
}
-func (dst *StateKey_IterableSideInput) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateKey_IterableSideInput.Merge(dst, src)
+func (m *StateKey_IterableSideInput) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateKey_IterableSideInput.Merge(m, src)
}
func (m *StateKey_IterableSideInput) XXX_Size() int {
return xxx_messageInfo_StateKey_IterableSideInput.Size(m)
@@ -3343,16 +2733,17 @@
func (m *StateKey_MultimapSideInput) String() string { return proto.CompactTextString(m) }
func (*StateKey_MultimapSideInput) ProtoMessage() {}
func (*StateKey_MultimapSideInput) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{20, 2}
+ return fileDescriptor_6d954c03a4758710, []int{20, 2}
}
+
func (m *StateKey_MultimapSideInput) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateKey_MultimapSideInput.Unmarshal(m, b)
}
func (m *StateKey_MultimapSideInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateKey_MultimapSideInput.Marshal(b, m, deterministic)
}
-func (dst *StateKey_MultimapSideInput) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateKey_MultimapSideInput.Merge(dst, src)
+func (m *StateKey_MultimapSideInput) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateKey_MultimapSideInput.Merge(m, src)
}
func (m *StateKey_MultimapSideInput) XXX_Size() int {
return xxx_messageInfo_StateKey_MultimapSideInput.Size(m)
@@ -3419,16 +2810,17 @@
func (m *StateKey_MultimapKeysSideInput) String() string { return proto.CompactTextString(m) }
func (*StateKey_MultimapKeysSideInput) ProtoMessage() {}
func (*StateKey_MultimapKeysSideInput) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{20, 3}
+ return fileDescriptor_6d954c03a4758710, []int{20, 3}
}
+
func (m *StateKey_MultimapKeysSideInput) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateKey_MultimapKeysSideInput.Unmarshal(m, b)
}
func (m *StateKey_MultimapKeysSideInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateKey_MultimapKeysSideInput.Marshal(b, m, deterministic)
}
-func (dst *StateKey_MultimapKeysSideInput) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateKey_MultimapKeysSideInput.Merge(dst, src)
+func (m *StateKey_MultimapKeysSideInput) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateKey_MultimapKeysSideInput.Merge(m, src)
}
func (m *StateKey_MultimapKeysSideInput) XXX_Size() int {
return xxx_messageInfo_StateKey_MultimapKeysSideInput.Size(m)
@@ -3479,16 +2871,17 @@
func (m *StateKey_BagUserState) String() string { return proto.CompactTextString(m) }
func (*StateKey_BagUserState) ProtoMessage() {}
func (*StateKey_BagUserState) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{20, 4}
+ return fileDescriptor_6d954c03a4758710, []int{20, 4}
}
+
func (m *StateKey_BagUserState) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateKey_BagUserState.Unmarshal(m, b)
}
func (m *StateKey_BagUserState) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateKey_BagUserState.Marshal(b, m, deterministic)
}
-func (dst *StateKey_BagUserState) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateKey_BagUserState.Merge(dst, src)
+func (m *StateKey_BagUserState) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateKey_BagUserState.Merge(m, src)
}
func (m *StateKey_BagUserState) XXX_Size() int {
return xxx_messageInfo_StateKey_BagUserState.Size(m)
@@ -3544,16 +2937,17 @@
func (m *StateGetRequest) String() string { return proto.CompactTextString(m) }
func (*StateGetRequest) ProtoMessage() {}
func (*StateGetRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{21}
+ return fileDescriptor_6d954c03a4758710, []int{21}
}
+
func (m *StateGetRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateGetRequest.Unmarshal(m, b)
}
func (m *StateGetRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateGetRequest.Marshal(b, m, deterministic)
}
-func (dst *StateGetRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateGetRequest.Merge(dst, src)
+func (m *StateGetRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateGetRequest.Merge(m, src)
}
func (m *StateGetRequest) XXX_Size() int {
return xxx_messageInfo_StateGetRequest.Size(m)
@@ -3591,16 +2985,17 @@
func (m *StateGetResponse) String() string { return proto.CompactTextString(m) }
func (*StateGetResponse) ProtoMessage() {}
func (*StateGetResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{22}
+ return fileDescriptor_6d954c03a4758710, []int{22}
}
+
func (m *StateGetResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateGetResponse.Unmarshal(m, b)
}
func (m *StateGetResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateGetResponse.Marshal(b, m, deterministic)
}
-func (dst *StateGetResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateGetResponse.Merge(dst, src)
+func (m *StateGetResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateGetResponse.Merge(m, src)
}
func (m *StateGetResponse) XXX_Size() int {
return xxx_messageInfo_StateGetResponse.Size(m)
@@ -3640,16 +3035,17 @@
func (m *StateAppendRequest) String() string { return proto.CompactTextString(m) }
func (*StateAppendRequest) ProtoMessage() {}
func (*StateAppendRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{23}
+ return fileDescriptor_6d954c03a4758710, []int{23}
}
+
func (m *StateAppendRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateAppendRequest.Unmarshal(m, b)
}
func (m *StateAppendRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateAppendRequest.Marshal(b, m, deterministic)
}
-func (dst *StateAppendRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateAppendRequest.Merge(dst, src)
+func (m *StateAppendRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateAppendRequest.Merge(m, src)
}
func (m *StateAppendRequest) XXX_Size() int {
return xxx_messageInfo_StateAppendRequest.Size(m)
@@ -3678,16 +3074,17 @@
func (m *StateAppendResponse) String() string { return proto.CompactTextString(m) }
func (*StateAppendResponse) ProtoMessage() {}
func (*StateAppendResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{24}
+ return fileDescriptor_6d954c03a4758710, []int{24}
}
+
func (m *StateAppendResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateAppendResponse.Unmarshal(m, b)
}
func (m *StateAppendResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateAppendResponse.Marshal(b, m, deterministic)
}
-func (dst *StateAppendResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateAppendResponse.Merge(dst, src)
+func (m *StateAppendResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateAppendResponse.Merge(m, src)
}
func (m *StateAppendResponse) XXX_Size() int {
return xxx_messageInfo_StateAppendResponse.Size(m)
@@ -3709,16 +3106,17 @@
func (m *StateClearRequest) String() string { return proto.CompactTextString(m) }
func (*StateClearRequest) ProtoMessage() {}
func (*StateClearRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{25}
+ return fileDescriptor_6d954c03a4758710, []int{25}
}
+
func (m *StateClearRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateClearRequest.Unmarshal(m, b)
}
func (m *StateClearRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateClearRequest.Marshal(b, m, deterministic)
}
-func (dst *StateClearRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateClearRequest.Merge(dst, src)
+func (m *StateClearRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateClearRequest.Merge(m, src)
}
func (m *StateClearRequest) XXX_Size() int {
return xxx_messageInfo_StateClearRequest.Size(m)
@@ -3740,16 +3138,17 @@
func (m *StateClearResponse) String() string { return proto.CompactTextString(m) }
func (*StateClearResponse) ProtoMessage() {}
func (*StateClearResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{26}
+ return fileDescriptor_6d954c03a4758710, []int{26}
}
+
func (m *StateClearResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateClearResponse.Unmarshal(m, b)
}
func (m *StateClearResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateClearResponse.Marshal(b, m, deterministic)
}
-func (dst *StateClearResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateClearResponse.Merge(dst, src)
+func (m *StateClearResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateClearResponse.Merge(m, src)
}
func (m *StateClearResponse) XXX_Size() int {
return xxx_messageInfo_StateClearResponse.Size(m)
@@ -3796,16 +3195,17 @@
func (m *LogEntry) String() string { return proto.CompactTextString(m) }
func (*LogEntry) ProtoMessage() {}
func (*LogEntry) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{27}
+ return fileDescriptor_6d954c03a4758710, []int{27}
}
+
func (m *LogEntry) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_LogEntry.Unmarshal(m, b)
}
func (m *LogEntry) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_LogEntry.Marshal(b, m, deterministic)
}
-func (dst *LogEntry) XXX_Merge(src proto.Message) {
- xxx_messageInfo_LogEntry.Merge(dst, src)
+func (m *LogEntry) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LogEntry.Merge(m, src)
}
func (m *LogEntry) XXX_Size() int {
return xxx_messageInfo_LogEntry.Size(m)
@@ -3886,16 +3286,17 @@
func (m *LogEntry_List) String() string { return proto.CompactTextString(m) }
func (*LogEntry_List) ProtoMessage() {}
func (*LogEntry_List) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{27, 0}
+ return fileDescriptor_6d954c03a4758710, []int{27, 0}
}
+
func (m *LogEntry_List) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_LogEntry_List.Unmarshal(m, b)
}
func (m *LogEntry_List) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_LogEntry_List.Marshal(b, m, deterministic)
}
-func (dst *LogEntry_List) XXX_Merge(src proto.Message) {
- xxx_messageInfo_LogEntry_List.Merge(dst, src)
+func (m *LogEntry_List) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LogEntry_List.Merge(m, src)
}
func (m *LogEntry_List) XXX_Size() int {
return xxx_messageInfo_LogEntry_List.Size(m)
@@ -3936,16 +3337,17 @@
func (m *LogEntry_Severity) String() string { return proto.CompactTextString(m) }
func (*LogEntry_Severity) ProtoMessage() {}
func (*LogEntry_Severity) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{27, 1}
+ return fileDescriptor_6d954c03a4758710, []int{27, 1}
}
+
func (m *LogEntry_Severity) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_LogEntry_Severity.Unmarshal(m, b)
}
func (m *LogEntry_Severity) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_LogEntry_Severity.Marshal(b, m, deterministic)
}
-func (dst *LogEntry_Severity) XXX_Merge(src proto.Message) {
- xxx_messageInfo_LogEntry_Severity.Merge(dst, src)
+func (m *LogEntry_Severity) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LogEntry_Severity.Merge(m, src)
}
func (m *LogEntry_Severity) XXX_Size() int {
return xxx_messageInfo_LogEntry_Severity.Size(m)
@@ -3966,16 +3368,17 @@
func (m *LogControl) String() string { return proto.CompactTextString(m) }
func (*LogControl) ProtoMessage() {}
func (*LogControl) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{28}
+ return fileDescriptor_6d954c03a4758710, []int{28}
}
+
func (m *LogControl) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_LogControl.Unmarshal(m, b)
}
func (m *LogControl) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_LogControl.Marshal(b, m, deterministic)
}
-func (dst *LogControl) XXX_Merge(src proto.Message) {
- xxx_messageInfo_LogControl.Merge(dst, src)
+func (m *LogControl) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_LogControl.Merge(m, src)
}
func (m *LogControl) XXX_Size() int {
return xxx_messageInfo_LogControl.Size(m)
@@ -4002,16 +3405,17 @@
func (m *StartWorkerRequest) String() string { return proto.CompactTextString(m) }
func (*StartWorkerRequest) ProtoMessage() {}
func (*StartWorkerRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{29}
+ return fileDescriptor_6d954c03a4758710, []int{29}
}
+
func (m *StartWorkerRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StartWorkerRequest.Unmarshal(m, b)
}
func (m *StartWorkerRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StartWorkerRequest.Marshal(b, m, deterministic)
}
-func (dst *StartWorkerRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StartWorkerRequest.Merge(dst, src)
+func (m *StartWorkerRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StartWorkerRequest.Merge(m, src)
}
func (m *StartWorkerRequest) XXX_Size() int {
return xxx_messageInfo_StartWorkerRequest.Size(m)
@@ -4075,16 +3479,17 @@
func (m *StartWorkerResponse) String() string { return proto.CompactTextString(m) }
func (*StartWorkerResponse) ProtoMessage() {}
func (*StartWorkerResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{30}
+ return fileDescriptor_6d954c03a4758710, []int{30}
}
+
func (m *StartWorkerResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StartWorkerResponse.Unmarshal(m, b)
}
func (m *StartWorkerResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StartWorkerResponse.Marshal(b, m, deterministic)
}
-func (dst *StartWorkerResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StartWorkerResponse.Merge(dst, src)
+func (m *StartWorkerResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StartWorkerResponse.Merge(m, src)
}
func (m *StartWorkerResponse) XXX_Size() int {
return xxx_messageInfo_StartWorkerResponse.Size(m)
@@ -4113,16 +3518,17 @@
func (m *StopWorkerRequest) String() string { return proto.CompactTextString(m) }
func (*StopWorkerRequest) ProtoMessage() {}
func (*StopWorkerRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{31}
+ return fileDescriptor_6d954c03a4758710, []int{31}
}
+
func (m *StopWorkerRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StopWorkerRequest.Unmarshal(m, b)
}
func (m *StopWorkerRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StopWorkerRequest.Marshal(b, m, deterministic)
}
-func (dst *StopWorkerRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StopWorkerRequest.Merge(dst, src)
+func (m *StopWorkerRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StopWorkerRequest.Merge(m, src)
}
func (m *StopWorkerRequest) XXX_Size() int {
return xxx_messageInfo_StopWorkerRequest.Size(m)
@@ -4151,16 +3557,17 @@
func (m *StopWorkerResponse) String() string { return proto.CompactTextString(m) }
func (*StopWorkerResponse) ProtoMessage() {}
func (*StopWorkerResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_fn_api_a35062cff5082b14, []int{32}
+ return fileDescriptor_6d954c03a4758710, []int{32}
}
+
func (m *StopWorkerResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StopWorkerResponse.Unmarshal(m, b)
}
func (m *StopWorkerResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StopWorkerResponse.Marshal(b, m, deterministic)
}
-func (dst *StopWorkerResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StopWorkerResponse.Merge(dst, src)
+func (m *StopWorkerResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StopWorkerResponse.Merge(m, src)
}
func (m *StopWorkerResponse) XXX_Size() int {
return xxx_messageInfo_StopWorkerResponse.Size(m)
@@ -4179,6 +3586,7 @@
}
func init() {
+ proto.RegisterEnum("org.apache.beam.model.fn_execution.v1.LogEntry_Severity_Enum", LogEntry_Severity_Enum_name, LogEntry_Severity_Enum_value)
proto.RegisterType((*RemoteGrpcPort)(nil), "org.apache.beam.model.fn_execution.v1.RemoteGrpcPort")
proto.RegisterType((*InstructionRequest)(nil), "org.apache.beam.model.fn_execution.v1.InstructionRequest")
proto.RegisterType((*InstructionResponse)(nil), "org.apache.beam.model.fn_execution.v1.InstructionResponse")
@@ -4247,7 +3655,213 @@
proto.RegisterType((*StartWorkerResponse)(nil), "org.apache.beam.model.fn_execution.v1.StartWorkerResponse")
proto.RegisterType((*StopWorkerRequest)(nil), "org.apache.beam.model.fn_execution.v1.StopWorkerRequest")
proto.RegisterType((*StopWorkerResponse)(nil), "org.apache.beam.model.fn_execution.v1.StopWorkerResponse")
- proto.RegisterEnum("org.apache.beam.model.fn_execution.v1.LogEntry_Severity_Enum", LogEntry_Severity_Enum_name, LogEntry_Severity_Enum_value)
+}
+
+func init() { proto.RegisterFile("beam_fn_api.proto", fileDescriptor_6d954c03a4758710) }
+
+var fileDescriptor_6d954c03a4758710 = []byte{
+ // 3212 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x5a, 0xcd, 0x6f, 0x1b, 0xc7,
+ 0xd9, 0xf7, 0x8a, 0x94, 0x44, 0x3e, 0xa4, 0x24, 0x72, 0x24, 0xd9, 0xf4, 0xbe, 0xce, 0xfb, 0x3a,
+ 0x7c, 0x1b, 0x40, 0x48, 0x11, 0xfa, 0x13, 0x89, 0x9d, 0x26, 0x4e, 0x24, 0x8a, 0xb6, 0x19, 0xcb,
+ 0x36, 0xbb, 0xb2, 0xeb, 0x36, 0x69, 0xb2, 0x58, 0x71, 0x87, 0xf4, 0xc2, 0xcb, 0xdd, 0xcd, 0xcc,
+ 0x52, 0x96, 0xdc, 0xa0, 0xe9, 0x17, 0x5a, 0xb4, 0x68, 0x9b, 0x4b, 0x0f, 0x69, 0x6f, 0x6d, 0x81,
+ 0x02, 0xbd, 0xf4, 0x0f, 0xc8, 0xb5, 0x87, 0xa2, 0xa7, 0xfe, 0x03, 0xb9, 0x14, 0x68, 0x81, 0xb6,
+ 0xe9, 0xb9, 0x40, 0x6f, 0xc5, 0x7c, 0xec, 0x07, 0x97, 0x4b, 0x67, 0x49, 0xc9, 0xbd, 0xed, 0xcc,
+ 0xec, 0xf3, 0xfb, 0xcd, 0x3c, 0xfb, 0x9b, 0x67, 0x9e, 0x99, 0x59, 0xa8, 0xee, 0x61, 0x63, 0xa0,
+ 0xf7, 0x1c, 0xdd, 0xf0, 0xac, 0x86, 0x47, 0x5c, 0xdf, 0x45, 0x2f, 0xb8, 0xa4, 0xdf, 0x30, 0x3c,
+ 0xa3, 0xfb, 0x10, 0x37, 0x58, 0x6b, 0x63, 0xe0, 0x9a, 0xd8, 0x6e, 0xf4, 0x1c, 0x1d, 0x1f, 0xe0,
+ 0xee, 0xd0, 0xb7, 0x5c, 0xa7, 0xb1, 0x7f, 0x41, 0x5d, 0xe7, 0x96, 0x64, 0xe8, 0x38, 0x98, 0x44,
+ 0xd6, 0xea, 0x0a, 0x76, 0x4c, 0xcf, 0xb5, 0x1c, 0x9f, 0xca, 0x8a, 0xb3, 0x7d, 0xd7, 0xed, 0xdb,
+ 0xf8, 0x1c, 0x2f, 0xed, 0x0d, 0x7b, 0xe7, 0x4c, 0x4c, 0xbb, 0xc4, 0xf2, 0x7c, 0x97, 0xc8, 0x37,
+ 0xfe, 0x2f, 0xf9, 0x86, 0x6f, 0x0d, 0x30, 0xf5, 0x8d, 0x81, 0x27, 0x5f, 0xf8, 0xdf, 0x31, 0x88,
+ 0x21, 0x31, 0x78, 0x3f, 0x26, 0xb4, 0x3f, 0x26, 0x86, 0xe7, 0x61, 0x12, 0x74, 0x61, 0x69, 0x80,
+ 0x7d, 0x62, 0x75, 0x65, 0xb1, 0xfe, 0x0b, 0x05, 0x96, 0x35, 0x3c, 0x70, 0x7d, 0x7c, 0x83, 0x78,
+ 0xdd, 0x8e, 0x4b, 0x7c, 0x34, 0x80, 0x93, 0x86, 0x67, 0xe9, 0x14, 0x93, 0x7d, 0xab, 0x8b, 0xf5,
+ 0xa8, 0x8b, 0x35, 0xe5, 0xac, 0xb2, 0x51, 0xba, 0xf8, 0x4a, 0x23, 0xdd, 0x29, 0x9e, 0xe5, 0x61,
+ 0xdb, 0x72, 0x70, 0x63, 0xff, 0x42, 0x63, 0xd3, 0xb3, 0x76, 0x85, 0xfd, 0x76, 0x68, 0xae, 0xad,
+ 0x19, 0x29, 0xb5, 0xe8, 0x34, 0x14, 0xba, 0xae, 0x89, 0x89, 0x6e, 0x99, 0xb5, 0xb9, 0xb3, 0xca,
+ 0x46, 0x51, 0x5b, 0xe4, 0xe5, 0xb6, 0x59, 0xff, 0x6b, 0x1e, 0x50, 0xdb, 0xa1, 0x3e, 0x19, 0x76,
+ 0xd9, 0x08, 0x35, 0xfc, 0xfe, 0x10, 0x53, 0x1f, 0xbd, 0x00, 0xcb, 0x56, 0x54, 0xcb, 0xec, 0x14,
+ 0x6e, 0xb7, 0x14, 0xab, 0x6d, 0x9b, 0xe8, 0x3e, 0x14, 0x08, 0xee, 0x5b, 0xd4, 0xc7, 0xa4, 0xf6,
+ 0xb7, 0x45, 0xde, 0xf5, 0x97, 0x1b, 0x99, 0xbe, 0x67, 0x43, 0x93, 0x76, 0x92, 0xf1, 0xe6, 0x09,
+ 0x2d, 0x84, 0x42, 0x18, 0x96, 0x3d, 0xe2, 0x76, 0x31, 0xa5, 0xfa, 0xde, 0xd0, 0x31, 0x6d, 0x5c,
+ 0xfb, 0xbb, 0x00, 0xff, 0x52, 0x46, 0xf0, 0x8e, 0xb0, 0xde, 0xe2, 0xc6, 0x11, 0xc3, 0x92, 0x17,
+ 0xaf, 0x47, 0xdf, 0x84, 0x53, 0xa3, 0x34, 0xba, 0x47, 0xdc, 0x3e, 0xc1, 0x94, 0xd6, 0xfe, 0x21,
+ 0xf8, 0x9a, 0xb3, 0xf0, 0x75, 0x24, 0x48, 0xc4, 0xbb, 0xee, 0xa5, 0xb5, 0xa3, 0x21, 0xac, 0x25,
+ 0xf8, 0xa9, 0x67, 0x5b, 0x7e, 0xed, 0x33, 0x41, 0xfe, 0xe6, 0x2c, 0xe4, 0xbb, 0x0c, 0x21, 0x62,
+ 0x46, 0xde, 0x58, 0x23, 0x7a, 0x08, 0x2b, 0x3d, 0xcb, 0x31, 0x6c, 0xeb, 0x09, 0x0e, 0xdc, 0xfb,
+ 0x4f, 0xc1, 0xf8, 0x5a, 0x46, 0xc6, 0xeb, 0xd2, 0x3c, 0xe9, 0xdf, 0xe5, 0xde, 0x48, 0xc3, 0x56,
+ 0x11, 0x16, 0x89, 0x68, 0xac, 0x7f, 0x67, 0x1e, 0x56, 0x47, 0x74, 0x46, 0x3d, 0xd7, 0xa1, 0x38,
+ 0xab, 0xd0, 0xd6, 0x60, 0x1e, 0x13, 0xe2, 0x12, 0x29, 0x5f, 0x51, 0x40, 0x5f, 0x19, 0x97, 0xdf,
+ 0x2b, 0x53, 0xcb, 0x4f, 0x74, 0x64, 0x44, 0x7f, 0xbd, 0x49, 0xfa, 0x7b, 0x6d, 0x36, 0xfd, 0x85,
+ 0x14, 0x09, 0x01, 0x7e, 0xf8, 0xb9, 0x02, 0xdc, 0x3e, 0x9a, 0x00, 0x43, 0xe2, 0x09, 0x0a, 0xdc,
+ 0x7f, 0xba, 0x02, 0x37, 0x8f, 0xa0, 0xc0, 0x90, 0x3a, 0x4d, 0x82, 0xd6, 0x44, 0x09, 0xbe, 0x3e,
+ 0xa3, 0x04, 0x43, 0xba, 0xa4, 0x06, 0x81, 0x69, 0x44, 0xb4, 0xd6, 0x7f, 0xa2, 0xc0, 0x4a, 0x22,
+ 0xee, 0xa0, 0x27, 0x70, 0x3a, 0xe1, 0x82, 0x91, 0x68, 0x9c, 0xdb, 0x28, 0x5d, 0xbc, 0x36, 0x8b,
+ 0x1b, 0x62, 0x41, 0xf9, 0x94, 0x97, 0xde, 0x50, 0x47, 0x50, 0x49, 0xea, 0xb0, 0xfe, 0x6b, 0x80,
+ 0x53, 0x13, 0x80, 0xd0, 0x32, 0xcc, 0x85, 0x13, 0x64, 0xce, 0x32, 0x91, 0x03, 0xe0, 0x13, 0xc3,
+ 0xa1, 0x3d, 0x97, 0x0c, 0x68, 0x6d, 0x8e, 0x77, 0xf6, 0xce, 0xd1, 0x3a, 0xdb, 0xb8, 0x17, 0x02,
+ 0xb6, 0x1c, 0x9f, 0x1c, 0x6a, 0x31, 0x06, 0xe4, 0x43, 0xd9, 0xeb, 0xba, 0xb6, 0x8d, 0xf9, 0xb4,
+ 0xa4, 0xb5, 0x1c, 0x67, 0xec, 0x1c, 0x91, 0xb1, 0x13, 0x83, 0x14, 0x9c, 0x23, 0x2c, 0xe8, 0x47,
+ 0x0a, 0xac, 0x3d, 0xb6, 0x1c, 0xd3, 0x7d, 0x6c, 0x39, 0x7d, 0x9d, 0xfa, 0xc4, 0xf0, 0x71, 0xdf,
+ 0xc2, 0xb4, 0x96, 0xe7, 0xf4, 0x0f, 0x8e, 0x48, 0xff, 0x20, 0x80, 0xde, 0x0d, 0x91, 0x45, 0x2f,
+ 0x56, 0x1f, 0x8f, 0xb7, 0xa0, 0x3d, 0x58, 0xe0, 0x4b, 0x27, 0xad, 0xcd, 0x73, 0xf6, 0xb7, 0x8e,
+ 0xc8, 0xde, 0xe4, 0x60, 0x82, 0x50, 0x22, 0x33, 0x37, 0x63, 0x67, 0xdf, 0x22, 0xae, 0x33, 0xc0,
+ 0x8e, 0x4f, 0x6b, 0x0b, 0xc7, 0xe2, 0xe6, 0x56, 0x0c, 0x52, 0xba, 0x39, 0xce, 0x82, 0x0e, 0xe0,
+ 0x0c, 0xf5, 0x0d, 0x1f, 0xeb, 0x13, 0x32, 0x93, 0xc5, 0xa3, 0x65, 0x26, 0xa7, 0x39, 0x78, 0x5a,
+ 0x93, 0x6a, 0xc3, 0x4a, 0x42, 0x75, 0xa8, 0x02, 0xb9, 0x47, 0xf8, 0x50, 0x4a, 0x9d, 0x3d, 0xa2,
+ 0x26, 0xcc, 0xef, 0x1b, 0xf6, 0x10, 0xf3, 0x15, 0xa0, 0x74, 0xf1, 0xa5, 0x0c, 0xfd, 0xe8, 0x84,
+ 0xa8, 0x9a, 0xb0, 0x7d, 0x75, 0xee, 0x8a, 0xa2, 0xba, 0x50, 0x1d, 0x53, 0x5c, 0x0a, 0xdf, 0xf6,
+ 0x28, 0x5f, 0x23, 0x0b, 0x5f, 0x33, 0x84, 0x8d, 0x13, 0x7e, 0x00, 0xb5, 0x49, 0x1a, 0x4b, 0xe1,
+ 0x7d, 0x6b, 0x94, 0xf7, 0x72, 0x06, 0xde, 0x24, 0xfa, 0x61, 0x9c, 0xbd, 0x0b, 0xa5, 0x98, 0xc6,
+ 0x52, 0x08, 0xaf, 0x8d, 0x12, 0x6e, 0x64, 0x20, 0xe4, 0x80, 0x09, 0x9f, 0x8e, 0xc9, 0xeb, 0x78,
+ 0x7c, 0x1a, 0x83, 0x8d, 0x11, 0xd6, 0xff, 0x9d, 0x83, 0xaa, 0x50, 0xf8, 0xa6, 0xe7, 0xd9, 0x56,
+ 0x97, 0xa7, 0xe7, 0xe8, 0x79, 0x28, 0x87, 0xd1, 0x2a, 0x4a, 0x25, 0x4a, 0x61, 0x5d, 0xdb, 0x64,
+ 0xa9, 0xb0, 0xe5, 0x78, 0x43, 0x3f, 0x96, 0x0a, 0xf3, 0x72, 0xdb, 0x44, 0x35, 0x58, 0xc4, 0x36,
+ 0x66, 0x4c, 0xb5, 0xdc, 0x59, 0x65, 0xa3, 0xac, 0x05, 0x45, 0xf4, 0x0d, 0xa8, 0xba, 0x43, 0x9f,
+ 0x59, 0x3d, 0x36, 0x7c, 0x4c, 0x06, 0x06, 0x79, 0x14, 0x44, 0x9f, 0xac, 0xe1, 0x76, 0xac, 0xb3,
+ 0x8d, 0xbb, 0x1c, 0xf1, 0x41, 0x08, 0x28, 0xe6, 0x64, 0xc5, 0x4d, 0x54, 0xa3, 0x0e, 0x80, 0x45,
+ 0xf5, 0x3d, 0x77, 0xe8, 0x98, 0xd8, 0xac, 0xcd, 0x9f, 0x55, 0x36, 0x96, 0x2f, 0x5e, 0xc8, 0xe0,
+ 0xb9, 0x36, 0xdd, 0x12, 0x36, 0x8d, 0x96, 0x33, 0x1c, 0x68, 0x45, 0x2b, 0x28, 0xa3, 0xaf, 0x43,
+ 0x65, 0xe0, 0x3a, 0x96, 0xef, 0x12, 0x16, 0x50, 0x2d, 0xa7, 0xe7, 0x06, 0x31, 0x26, 0x0b, 0xee,
+ 0xed, 0xd0, 0xb4, 0xed, 0xf4, 0x5c, 0x6d, 0x65, 0x30, 0x52, 0xa6, 0xaa, 0x0e, 0xeb, 0xa9, 0x43,
+ 0x4b, 0xd1, 0xc3, 0xf9, 0x51, 0x3d, 0xa8, 0x0d, 0xb1, 0xb1, 0x6a, 0x04, 0x1b, 0xab, 0xc6, 0xbd,
+ 0x60, 0x67, 0x16, 0xff, 0xf6, 0xbf, 0x57, 0xa0, 0xb6, 0x8d, 0x6d, 0xe3, 0x10, 0x9b, 0xe3, 0x12,
+ 0x78, 0x1b, 0x4a, 0x46, 0x54, 0x94, 0xdb, 0xa9, 0x2b, 0xb3, 0x7e, 0x24, 0x2d, 0x0e, 0x86, 0x6e,
+ 0xc1, 0x9a, 0x4c, 0x67, 0xb1, 0xa9, 0xb3, 0x4d, 0xa3, 0x6e, 0xb2, 0x6e, 0xc8, 0xde, 0x9f, 0x1e,
+ 0xeb, 0xfd, 0xb6, 0xdc, 0x36, 0x6a, 0x28, 0x34, 0x63, 0x03, 0xe2, 0x7d, 0xaf, 0xff, 0x34, 0x0f,
+ 0x6b, 0x69, 0xdb, 0x14, 0xf4, 0x06, 0x9c, 0x99, 0x98, 0x90, 0x44, 0xa2, 0x3e, 0x3d, 0x21, 0xa7,
+ 0x68, 0x9b, 0xc8, 0x82, 0x72, 0x97, 0x8d, 0x54, 0xf7, 0xdd, 0x47, 0xd8, 0x09, 0xf2, 0x82, 0xeb,
+ 0x47, 0xd8, 0x3a, 0x35, 0x9a, 0xcc, 0xea, 0x1e, 0x83, 0xd3, 0x4a, 0xdd, 0xf0, 0x99, 0xaa, 0x7f,
+ 0x9c, 0x03, 0x88, 0xda, 0xd0, 0xfb, 0x00, 0x43, 0x8a, 0x89, 0xce, 0x43, 0xbd, 0xf4, 0x7d, 0xe7,
+ 0x78, 0x78, 0x1b, 0xf7, 0x29, 0x26, 0xbb, 0x0c, 0xf7, 0xe6, 0x09, 0xad, 0x38, 0x0c, 0x0a, 0x8c,
+ 0x92, 0x5a, 0x26, 0xd6, 0xf9, 0x24, 0x96, 0x5f, 0xe2, 0xb8, 0x28, 0x77, 0x2d, 0x13, 0xb7, 0x19,
+ 0x2e, 0xa3, 0xa4, 0x41, 0x81, 0xed, 0x45, 0xb8, 0x67, 0x6b, 0xc0, 0xa3, 0x84, 0x28, 0xa8, 0x25,
+ 0x28, 0x86, 0x5d, 0x54, 0x5f, 0x84, 0x62, 0x68, 0x8c, 0x9e, 0x1b, 0xe9, 0xa2, 0xf8, 0x7c, 0x11,
+ 0xdc, 0xd6, 0x02, 0xe4, 0xfd, 0x43, 0x0f, 0xd7, 0x3f, 0x9d, 0x83, 0xf5, 0xd4, 0x7d, 0x03, 0xba,
+ 0x09, 0x8b, 0xf2, 0x44, 0x41, 0xfa, 0xb4, 0x91, 0x71, 0x80, 0xb7, 0x85, 0x95, 0x16, 0x98, 0xb3,
+ 0x8d, 0x0d, 0xc1, 0xd4, 0x32, 0x87, 0x86, 0xad, 0x13, 0xd7, 0xf5, 0x03, 0x71, 0xbc, 0x91, 0x11,
+ 0x70, 0xd2, 0xb4, 0xd3, 0x96, 0x02, 0x58, 0x8d, 0xa1, 0xa6, 0x46, 0x98, 0xdc, 0x71, 0x45, 0x18,
+ 0x74, 0x09, 0xd6, 0xd9, 0x84, 0xb2, 0x08, 0xa6, 0xba, 0xcc, 0xf6, 0xc5, 0x6c, 0xcf, 0x9f, 0x55,
+ 0x36, 0x0a, 0xda, 0x5a, 0xd0, 0x78, 0x3d, 0xd6, 0x56, 0x6f, 0xc1, 0x99, 0xa7, 0xed, 0xd2, 0x33,
+ 0x6e, 0x44, 0xeb, 0x1f, 0xaf, 0xc2, 0xa2, 0x74, 0x2b, 0x32, 0xa0, 0xe4, 0xc5, 0xf2, 0x6f, 0x65,
+ 0x2a, 0x57, 0x4a, 0x90, 0x46, 0xc7, 0x4f, 0x24, 0xdc, 0x71, 0x4c, 0xf5, 0xd3, 0x12, 0x40, 0x94,
+ 0xc6, 0xa0, 0x27, 0x10, 0xec, 0xa6, 0xb0, 0xa9, 0xcb, 0xd5, 0x29, 0x10, 0xc5, 0xad, 0x69, 0x89,
+ 0x43, 0xd8, 0x60, 0x22, 0x60, 0xb3, 0x25, 0x21, 0xb5, 0xaa, 0x97, 0xac, 0x42, 0xef, 0xc3, 0x8a,
+ 0xd1, 0xf5, 0xad, 0x7d, 0x1c, 0x11, 0x8b, 0xe9, 0x76, 0x73, 0x76, 0xe2, 0x4d, 0x0e, 0x18, 0xb2,
+ 0x2e, 0x1b, 0x23, 0x65, 0x64, 0x01, 0xc4, 0x16, 0x5c, 0x21, 0xa0, 0xf6, 0xec, 0x6c, 0xc9, 0xb5,
+ 0x36, 0x06, 0x8e, 0x6e, 0x40, 0x9e, 0x05, 0x15, 0xb9, 0xaa, 0x5f, 0x9a, 0x92, 0x84, 0xcd, 0x7c,
+ 0x8d, 0x03, 0xa8, 0x7f, 0xc9, 0x41, 0xe1, 0x36, 0x36, 0xe8, 0x90, 0x60, 0x13, 0xfd, 0x58, 0x81,
+ 0x35, 0x91, 0x6e, 0x48, 0x9f, 0xe9, 0x5d, 0x77, 0x28, 0x3e, 0x19, 0xa3, 0x79, 0x7b, 0xf6, 0xb1,
+ 0x04, 0x14, 0x0d, 0x1e, 0x44, 0xa4, 0xc7, 0x9a, 0x1c, 0x5c, 0x0c, 0x0e, 0x59, 0x63, 0x0d, 0xe8,
+ 0x23, 0x05, 0xd6, 0x65, 0x22, 0x93, 0xe8, 0x8f, 0x08, 0x03, 0xef, 0x1c, 0x43, 0x7f, 0xc4, 0xda,
+ 0x9f, 0xd2, 0xa1, 0x55, 0x77, 0xbc, 0x05, 0x6d, 0x40, 0xc5, 0x77, 0x7d, 0xc3, 0x16, 0xcb, 0x29,
+ 0xf5, 0x82, 0xe4, 0x4b, 0xd1, 0x96, 0x79, 0x3d, 0x5b, 0x2f, 0x77, 0x59, 0xad, 0xda, 0x82, 0x53,
+ 0x13, 0x86, 0x9a, 0x92, 0x58, 0xac, 0xc5, 0x13, 0x8b, 0x5c, 0x3c, 0x53, 0xbd, 0x0e, 0xb5, 0x49,
+ 0x3d, 0x9c, 0x0a, 0x87, 0x42, 0x75, 0x6c, 0xd6, 0xa0, 0xf7, 0xa0, 0x30, 0x90, 0x7e, 0x90, 0x93,
+ 0x72, 0xeb, 0xe8, 0x1e, 0xd5, 0x42, 0x4c, 0xf5, 0xa3, 0x1c, 0x2c, 0x8f, 0x4e, 0x99, 0x67, 0x4d,
+ 0x89, 0x5e, 0x02, 0xd4, 0x23, 0x86, 0x88, 0x89, 0x04, 0x0f, 0x0c, 0xcb, 0xb1, 0x9c, 0x3e, 0x77,
+ 0x87, 0xa2, 0x55, 0x83, 0x16, 0x2d, 0x68, 0x40, 0xbf, 0x54, 0xe0, 0xf4, 0xa8, 0xc2, 0x68, 0xcc,
+ 0x4c, 0xcc, 0x60, 0x7c, 0x5c, 0xf1, 0x62, 0x54, 0x6b, 0x34, 0xec, 0x85, 0xd0, 0xdb, 0x29, 0x37,
+ 0xbd, 0x55, 0x7d, 0x0b, 0xce, 0x3c, 0xcd, 0x70, 0x2a, 0x19, 0xbc, 0x0e, 0x2b, 0x9f, 0x9f, 0xe6,
+ 0x4e, 0x36, 0xff, 0xd3, 0x3c, 0xe4, 0x59, 0xec, 0x40, 0x3a, 0x94, 0xc4, 0x1a, 0xad, 0x3b, 0xc6,
+ 0x20, 0x48, 0x9d, 0xae, 0xcd, 0x10, 0x85, 0x64, 0xe1, 0x8e, 0x31, 0xc0, 0x1a, 0x0c, 0xc2, 0x67,
+ 0x84, 0xa1, 0xcc, 0xa7, 0x3a, 0x26, 0xba, 0x69, 0xf8, 0x46, 0x70, 0xa0, 0xf9, 0xc6, 0x2c, 0x14,
+ 0x4d, 0x01, 0xb4, 0x6d, 0xf8, 0xc6, 0xcd, 0x13, 0x5a, 0xa9, 0x1b, 0x15, 0x91, 0x0f, 0x55, 0xd3,
+ 0xa2, 0x3e, 0xb1, 0xf6, 0xb8, 0xa9, 0xe0, 0x9a, 0xf2, 0x2c, 0x73, 0x84, 0x6b, 0x3b, 0x86, 0x26,
+ 0x09, 0x2b, 0x66, 0xa2, 0x0e, 0xe9, 0x00, 0x7d, 0x63, 0xd8, 0xc7, 0x82, 0xee, 0xb3, 0xe9, 0x4e,
+ 0x12, 0x47, 0xe8, 0x6e, 0x30, 0x18, 0xc9, 0x53, 0xec, 0x07, 0x05, 0xf5, 0x1a, 0x40, 0xe4, 0x57,
+ 0x74, 0x06, 0x8a, 0xec, 0x2b, 0x51, 0xcf, 0xe8, 0x62, 0xb9, 0x89, 0x8c, 0x2a, 0x10, 0x82, 0x3c,
+ 0xff, 0x86, 0x39, 0xde, 0xc0, 0x9f, 0xd5, 0xff, 0x67, 0x9b, 0xf0, 0xc8, 0x4b, 0xa1, 0x20, 0x94,
+ 0x98, 0x20, 0xd4, 0xf7, 0xa0, 0x92, 0x1c, 0x2d, 0x7b, 0x93, 0xbb, 0x37, 0x78, 0x93, 0x17, 0x98,
+ 0xc4, 0xe8, 0x70, 0x20, 0xe5, 0xc4, 0x1e, 0x59, 0xcd, 0xc0, 0x72, 0x38, 0x67, 0x4e, 0x63, 0x8f,
+ 0xbc, 0xc6, 0x38, 0xe0, 0x29, 0x11, 0xab, 0x31, 0x0e, 0xd4, 0x77, 0xa0, 0x18, 0x0e, 0x2f, 0xbd,
+ 0x0b, 0xe8, 0x0a, 0x14, 0xc3, 0xcb, 0xb0, 0x0c, 0x9b, 0xb2, 0xe8, 0x65, 0x96, 0xc5, 0x32, 0xe7,
+ 0xab, 0x87, 0x50, 0x49, 0x66, 0x34, 0x29, 0x33, 0xe2, 0xee, 0xe8, 0xc6, 0xef, 0xea, 0xcc, 0x11,
+ 0x21, 0xbe, 0x2f, 0xfc, 0xcd, 0x1c, 0x3c, 0xf7, 0xd4, 0x73, 0xf0, 0x63, 0x4c, 0xa4, 0x9f, 0x6d,
+ 0x82, 0xfb, 0x2e, 0x2c, 0x79, 0xc4, 0x1a, 0x18, 0xe4, 0x50, 0x66, 0xe9, 0x22, 0x2b, 0x99, 0x7d,
+ 0x1b, 0x5b, 0x96, 0x70, 0x3c, 0x3b, 0xaf, 0x7f, 0x3b, 0x0f, 0xa7, 0x27, 0x5e, 0x1a, 0x65, 0xbd,
+ 0x91, 0x79, 0x02, 0xcb, 0x26, 0xa6, 0x16, 0xc1, 0xa6, 0xb8, 0x33, 0x08, 0xc6, 0xbf, 0x7b, 0xd4,
+ 0x5b, 0xab, 0xc6, 0xb6, 0x80, 0xe5, 0x75, 0x32, 0x77, 0x58, 0x32, 0xe3, 0x75, 0xea, 0xef, 0x14,
+ 0x28, 0xc7, 0xdf, 0x42, 0x17, 0x61, 0x3d, 0x5c, 0xa5, 0xdc, 0x9e, 0x5c, 0x71, 0x4c, 0x2c, 0xae,
+ 0x53, 0x15, 0x6d, 0x35, 0x68, 0xbc, 0xdb, 0xd3, 0x82, 0x26, 0x74, 0x1e, 0xd6, 0x0c, 0xdb, 0x76,
+ 0x1f, 0x07, 0x03, 0xd0, 0xc5, 0x35, 0x32, 0x1f, 0x46, 0x4e, 0x43, 0xb2, 0x8d, 0xe3, 0x77, 0x78,
+ 0x0b, 0xba, 0x02, 0x35, 0x4c, 0x7d, 0x6b, 0x60, 0xb0, 0xfd, 0xff, 0x48, 0x5a, 0x47, 0xe5, 0x5c,
+ 0x3c, 0x19, 0xb6, 0xc7, 0x73, 0x15, 0xaa, 0x7e, 0xa4, 0x00, 0x1a, 0x1f, 0x56, 0xca, 0xc4, 0xe8,
+ 0x8e, 0x4e, 0x8c, 0xdb, 0xc7, 0xea, 0xcc, 0xf8, 0x64, 0xf9, 0x57, 0x0e, 0xd4, 0xc9, 0xd7, 0x36,
+ 0xe3, 0x0a, 0x54, 0x8e, 0x53, 0x81, 0xff, 0xb5, 0x7d, 0xe8, 0x10, 0x96, 0xbb, 0x0f, 0x0d, 0xc7,
+ 0xc1, 0xf6, 0xa8, 0x48, 0xef, 0x1c, 0xf9, 0x62, 0xab, 0xd1, 0x14, 0xb8, 0xa2, 0x72, 0xa9, 0x1b,
+ 0x2b, 0x51, 0xf5, 0xe7, 0x0a, 0x94, 0xe3, 0xed, 0x59, 0x0e, 0x26, 0xcf, 0xc3, 0x9a, 0x6d, 0x50,
+ 0x5f, 0x0f, 0xdc, 0x1e, 0x1c, 0x45, 0x0a, 0x61, 0x21, 0xd6, 0xd6, 0x11, 0x4d, 0x52, 0x55, 0xe8,
+ 0x32, 0x9c, 0xec, 0x59, 0x84, 0xfa, 0x7a, 0xe8, 0xca, 0xf8, 0xf1, 0x65, 0x4e, 0x5b, 0xe3, 0xad,
+ 0x9a, 0x6c, 0x94, 0x56, 0xf5, 0x6b, 0xb0, 0x9e, 0x7a, 0x7d, 0x9b, 0x75, 0x03, 0x5c, 0x83, 0x93,
+ 0xe9, 0x77, 0x6f, 0xf5, 0x4f, 0x14, 0x28, 0x84, 0x79, 0xe9, 0x4d, 0xb1, 0x1e, 0x48, 0xdd, 0x5c,
+ 0xce, 0xe8, 0xef, 0x30, 0xb3, 0x63, 0x6b, 0x94, 0x26, 0x56, 0x14, 0x13, 0xf2, 0x7c, 0xc5, 0xca,
+ 0x18, 0x97, 0x92, 0xae, 0x9e, 0x1b, 0x77, 0x35, 0x92, 0x7d, 0x13, 0xa7, 0xbc, 0xfc, 0xb9, 0xfe,
+ 0xb3, 0x1c, 0x94, 0xf9, 0xd9, 0x4d, 0xe0, 0x8e, 0xe4, 0x5d, 0xdb, 0x38, 0xfd, 0x5c, 0x1a, 0xfd,
+ 0x0e, 0x14, 0xc5, 0x2d, 0x0a, 0x9b, 0xd8, 0x39, 0x3e, 0x89, 0xcf, 0x65, 0x1c, 0x3c, 0xa7, 0xbf,
+ 0x85, 0x0f, 0xb5, 0x02, 0x95, 0x4f, 0xe8, 0x16, 0xe4, 0xfa, 0xd8, 0x9f, 0xf6, 0xd7, 0x0a, 0x0e,
+ 0x74, 0x03, 0xc7, 0x7e, 0x03, 0x60, 0x28, 0xe8, 0x1e, 0x2c, 0x18, 0x9e, 0x87, 0x1d, 0x33, 0x48,
+ 0xfe, 0xae, 0x4e, 0x83, 0xb7, 0xc9, 0x4d, 0x23, 0x48, 0x89, 0x85, 0xbe, 0x0c, 0xf3, 0x5d, 0x1b,
+ 0x1b, 0x24, 0xc8, 0xf2, 0xae, 0x4c, 0x03, 0xda, 0x64, 0x96, 0x11, 0xa6, 0x40, 0x8a, 0xff, 0x36,
+ 0xf0, 0xc9, 0x1c, 0x2c, 0xc9, 0xcf, 0x22, 0x23, 0x53, 0xf2, 0xbb, 0xa4, 0xff, 0x19, 0xb0, 0x33,
+ 0xe2, 0xb8, 0x57, 0xa6, 0x76, 0x5c, 0x78, 0x9d, 0xcc, 0x3d, 0x77, 0x3f, 0xe9, 0xb9, 0x57, 0x67,
+ 0xf1, 0x5c, 0x88, 0x19, 0xb8, 0x4e, 0x4b, 0xb8, 0xee, 0xea, 0x0c, 0xae, 0x0b, 0x41, 0xa5, 0xef,
+ 0xe2, 0xd7, 0xdd, 0x7f, 0x28, 0x40, 0x21, 0x10, 0x15, 0xea, 0xc0, 0x82, 0xf8, 0x79, 0x4a, 0xa6,
+ 0x3e, 0x2f, 0x4f, 0xa9, 0xca, 0x86, 0xc6, 0xad, 0x59, 0xf7, 0x05, 0x0e, 0xa2, 0xb0, 0x3a, 0x18,
+ 0xda, 0x6c, 0xbd, 0xf3, 0xf4, 0xb1, 0x33, 0xd8, 0xcd, 0x69, 0xe1, 0x6f, 0x4b, 0xa8, 0xf8, 0xa1,
+ 0x6b, 0x75, 0x90, 0xac, 0x44, 0x26, 0x2c, 0xef, 0x19, 0x7d, 0x3d, 0x76, 0xcc, 0x9c, 0x9b, 0xea,
+ 0xcf, 0x8c, 0x90, 0x6f, 0xcb, 0xe8, 0xc7, 0x8f, 0x94, 0xcb, 0x7b, 0xb1, 0x32, 0x1b, 0x9a, 0xe5,
+ 0x63, 0x62, 0xec, 0xd9, 0x38, 0x3e, 0xb4, 0xfc, 0x6c, 0x43, 0x6b, 0x4b, 0xa8, 0x91, 0xa1, 0x59,
+ 0xc9, 0x4a, 0xf4, 0x2d, 0x05, 0x6a, 0xa1, 0x43, 0x1f, 0xe1, 0x43, 0x1a, 0xa7, 0x9e, 0xe7, 0xd4,
+ 0xad, 0x59, 0xbd, 0x7a, 0x0b, 0x1f, 0xd2, 0x38, 0xfd, 0xfa, 0x20, 0xad, 0x41, 0x55, 0x61, 0x41,
+ 0x7c, 0xe6, 0x78, 0x6a, 0x52, 0xe6, 0xa9, 0x89, 0x4a, 0xa0, 0x3a, 0x36, 0x90, 0x2c, 0x0b, 0x5b,
+ 0x1d, 0x96, 0xa2, 0x71, 0xc4, 0x22, 0x72, 0x78, 0x02, 0xde, 0x36, 0xd1, 0x49, 0x58, 0x10, 0x97,
+ 0xed, 0x32, 0x26, 0xcb, 0x92, 0xfa, 0x3d, 0x05, 0xaa, 0x63, 0xc2, 0x78, 0xc6, 0xa4, 0xc1, 0xd0,
+ 0xf3, 0xd1, 0xd0, 0xf7, 0x61, 0x3d, 0xd5, 0x91, 0xcf, 0x7a, 0xf8, 0x1f, 0x42, 0x39, 0x2e, 0xd3,
+ 0x8c, 0x74, 0xd1, 0xdc, 0x88, 0xd1, 0x85, 0x37, 0x26, 0xd3, 0x0c, 0x3c, 0xbc, 0x9b, 0x78, 0x13,
+ 0x56, 0x12, 0x8b, 0x0a, 0x7a, 0x09, 0x50, 0xd7, 0x75, 0x7c, 0xcb, 0x19, 0xf2, 0xcc, 0x4b, 0x5c,
+ 0x36, 0x49, 0xbd, 0x54, 0xe3, 0x2d, 0xfc, 0x2e, 0xa5, 0x7e, 0x1f, 0x2a, 0xc9, 0xe8, 0x3a, 0x25,
+ 0x44, 0xb8, 0x6c, 0xcf, 0xc5, 0x96, 0xed, 0x0d, 0x40, 0xe3, 0xab, 0x53, 0xf8, 0xa6, 0x12, 0x7b,
+ 0x73, 0x1d, 0x56, 0x53, 0xa2, 0x71, 0x7d, 0x15, 0xaa, 0x63, 0x2b, 0x51, 0x7d, 0x4d, 0xa2, 0x8e,
+ 0xc4, 0xd8, 0xfa, 0xaf, 0xf2, 0x50, 0xd8, 0x71, 0xe5, 0x21, 0xd1, 0xd7, 0xa0, 0x40, 0xf1, 0x3e,
+ 0x26, 0x96, 0x2f, 0x26, 0xc9, 0x72, 0xe6, 0xf3, 0x86, 0x00, 0xa2, 0xb1, 0x2b, 0xed, 0xc5, 0xfd,
+ 0x6c, 0x08, 0x37, 0xfb, 0x26, 0x1c, 0xd5, 0xd8, 0xfe, 0x96, 0x52, 0xa3, 0x1f, 0x9c, 0x3e, 0x04,
+ 0x45, 0x7e, 0x67, 0x45, 0x8c, 0x2e, 0xe6, 0x1f, 0xb7, 0xa8, 0x89, 0x42, 0x4a, 0x4e, 0x33, 0x9f,
+ 0x25, 0xa5, 0x5a, 0x18, 0x97, 0xdd, 0xf3, 0x50, 0xb6, 0xdd, 0xbe, 0x6e, 0xbb, 0xf2, 0xde, 0x75,
+ 0x51, 0xbc, 0x62, 0xbb, 0xfd, 0x1d, 0x59, 0xc5, 0x54, 0xe7, 0x3f, 0x24, 0xd8, 0x30, 0x6b, 0x05,
+ 0xde, 0x28, 0x4b, 0xea, 0x57, 0x21, 0xbf, 0x63, 0x51, 0x1f, 0x75, 0x80, 0xbd, 0xae, 0x63, 0xc7,
+ 0x27, 0x16, 0x0e, 0x36, 0x1c, 0xe7, 0xa6, 0x74, 0xaa, 0x06, 0xb6, 0x78, 0xb2, 0x30, 0x55, 0x09,
+ 0x14, 0x02, 0x1f, 0xd7, 0x7b, 0x90, 0x67, 0x6e, 0x46, 0x2b, 0x50, 0xba, 0x7f, 0x67, 0xb7, 0xd3,
+ 0x6a, 0xb6, 0xaf, 0xb7, 0x5b, 0xdb, 0x95, 0x13, 0xa8, 0x08, 0xf3, 0xf7, 0xb4, 0xcd, 0x66, 0xab,
+ 0xa2, 0xb0, 0xc7, 0xed, 0xd6, 0xd6, 0xfd, 0x1b, 0x95, 0x39, 0x54, 0x80, 0x7c, 0xfb, 0xce, 0xf5,
+ 0xbb, 0x95, 0x1c, 0x02, 0x58, 0xb8, 0x73, 0xf7, 0x5e, 0xbb, 0xd9, 0xaa, 0xe4, 0x59, 0xed, 0x83,
+ 0x4d, 0xed, 0x4e, 0x65, 0x9e, 0xbd, 0xda, 0xd2, 0xb4, 0xbb, 0x5a, 0x65, 0x01, 0x95, 0xa1, 0xd0,
+ 0xd4, 0xda, 0xf7, 0xda, 0xcd, 0xcd, 0x9d, 0xca, 0x62, 0xbd, 0x0c, 0xb0, 0xe3, 0xf6, 0x9b, 0xae,
+ 0xe3, 0x13, 0xd7, 0xae, 0xff, 0x39, 0xcf, 0x95, 0x44, 0xfc, 0x07, 0x2e, 0x79, 0x14, 0xfd, 0x73,
+ 0xf6, 0x3f, 0x50, 0x7c, 0xcc, 0x2b, 0xa2, 0x49, 0x5c, 0x10, 0x15, 0x6d, 0x13, 0xed, 0x41, 0xa5,
+ 0x2b, 0xcc, 0xf5, 0xe0, 0xdf, 0x66, 0xa9, 0x82, 0x99, 0xff, 0xbd, 0x59, 0x91, 0x80, 0x2d, 0x89,
+ 0xc7, 0x38, 0x6c, 0xb7, 0xdf, 0xb7, 0x9c, 0x7e, 0xc4, 0x91, 0x3b, 0x22, 0x87, 0x04, 0x0c, 0x39,
+ 0x4c, 0xa8, 0x1a, 0xc4, 0xb7, 0x7a, 0x46, 0xd7, 0x8f, 0x48, 0xf2, 0x47, 0x23, 0xa9, 0x04, 0x88,
+ 0x21, 0x4b, 0x8f, 0xdf, 0x88, 0xed, 0x5b, 0x94, 0x09, 0x38, 0xa4, 0x99, 0x3f, 0x1a, 0x4d, 0x35,
+ 0x84, 0x0c, 0x79, 0xde, 0x85, 0x05, 0xcf, 0x20, 0xc6, 0x80, 0xd6, 0x80, 0x0b, 0x73, 0x8a, 0x95,
+ 0x38, 0xf1, 0xf5, 0x1b, 0x1d, 0x8e, 0x23, 0x7f, 0xf9, 0x12, 0xa0, 0xea, 0x55, 0x28, 0xc5, 0xaa,
+ 0x3f, 0xef, 0x0c, 0xb9, 0x18, 0xdf, 0xc9, 0x7f, 0x91, 0x07, 0xb6, 0x88, 0x44, 0x06, 0xd7, 0x30,
+ 0x2f, 0x56, 0x62, 0x79, 0x71, 0xfd, 0x3c, 0x0b, 0x77, 0xae, 0x97, 0x5d, 0x8e, 0xf5, 0x17, 0x99,
+ 0x82, 0x23, 0x8b, 0xa7, 0xa1, 0x5f, 0xfc, 0x58, 0x81, 0xa5, 0x2d, 0x6c, 0x0c, 0xae, 0x3b, 0x72,
+ 0x02, 0xa0, 0xef, 0x2b, 0xb0, 0x18, 0x3c, 0x67, 0x4d, 0x9a, 0x53, 0x7e, 0x13, 0x56, 0xaf, 0xce,
+ 0x62, 0x2b, 0x82, 0xf9, 0x89, 0x0d, 0xe5, 0xbc, 0x72, 0xf1, 0x03, 0x00, 0xd1, 0x33, 0xbe, 0x97,
+ 0x74, 0xe4, 0x9e, 0xf2, 0xdc, 0x94, 0xfb, 0x52, 0x75, 0x5a, 0x03, 0xc9, 0xfe, 0x03, 0x05, 0x4a,
+ 0x82, 0x5e, 0x2c, 0xe4, 0x07, 0x30, 0x2f, 0x1e, 0x2e, 0x4d, 0x93, 0xd0, 0xc9, 0x11, 0xa9, 0x97,
+ 0xa7, 0x33, 0x92, 0xcb, 0x97, 0xe8, 0xc9, 0x0f, 0xc3, 0x4f, 0xb4, 0x23, 0xe6, 0x2b, 0x3a, 0x80,
+ 0xc5, 0xe0, 0xf1, 0xf2, 0xb4, 0x4b, 0x18, 0x0b, 0xdc, 0xea, 0x85, 0xec, 0x56, 0x41, 0x5c, 0x14,
+ 0x7d, 0xf9, 0xed, 0x1c, 0xd4, 0x44, 0x5f, 0x5a, 0x07, 0x3e, 0x26, 0x8e, 0x61, 0x0b, 0x95, 0x75,
+ 0x5c, 0xa1, 0x9c, 0x52, 0x4c, 0xd7, 0xe8, 0xea, 0xcc, 0x13, 0x4e, 0x7d, 0x75, 0x16, 0xd3, 0xc0,
+ 0x6b, 0xe8, 0xbb, 0x0a, 0x40, 0x34, 0x03, 0x50, 0xf6, 0xfd, 0x6d, 0x62, 0x9a, 0xa9, 0x57, 0x67,
+ 0xb0, 0x0c, 0x7a, 0xb1, 0xb5, 0x09, 0x5f, 0x98, 0x64, 0x1d, 0x37, 0xde, 0x2a, 0x0a, 0x87, 0x6e,
+ 0x7a, 0xd6, 0xdb, 0xcb, 0xb1, 0x26, 0x7d, 0xff, 0xc2, 0xde, 0x02, 0x4f, 0x1e, 0x2e, 0xfd, 0x27,
+ 0x00, 0x00, 0xff, 0xff, 0x08, 0x83, 0xb9, 0x20, 0x84, 0x33, 0x00, 0x00,
}
// Reference imports to suppress errors if they are not otherwise used.
@@ -4313,6 +3927,14 @@
Control(BeamFnControl_ControlServer) error
}
+// UnimplementedBeamFnControlServer can be embedded to have forward compatible implementations.
+type UnimplementedBeamFnControlServer struct {
+}
+
+func (*UnimplementedBeamFnControlServer) Control(srv BeamFnControl_ControlServer) error {
+ return status.Errorf(codes.Unimplemented, "method Control not implemented")
+}
+
func RegisterBeamFnControlServer(s *grpc.Server, srv BeamFnControlServer) {
s.RegisterService(&_BeamFnControl_serviceDesc, srv)
}
@@ -4411,6 +4033,14 @@
Data(BeamFnData_DataServer) error
}
+// UnimplementedBeamFnDataServer can be embedded to have forward compatible implementations.
+type UnimplementedBeamFnDataServer struct {
+}
+
+func (*UnimplementedBeamFnDataServer) Data(srv BeamFnData_DataServer) error {
+ return status.Errorf(codes.Unimplemented, "method Data not implemented")
+}
+
func RegisterBeamFnDataServer(s *grpc.Server, srv BeamFnDataServer) {
s.RegisterService(&_BeamFnData_serviceDesc, srv)
}
@@ -4509,6 +4139,14 @@
State(BeamFnState_StateServer) error
}
+// UnimplementedBeamFnStateServer can be embedded to have forward compatible implementations.
+type UnimplementedBeamFnStateServer struct {
+}
+
+func (*UnimplementedBeamFnStateServer) State(srv BeamFnState_StateServer) error {
+ return status.Errorf(codes.Unimplemented, "method State not implemented")
+}
+
func RegisterBeamFnStateServer(s *grpc.Server, srv BeamFnStateServer) {
s.RegisterService(&_BeamFnState_serviceDesc, srv)
}
@@ -4609,6 +4247,14 @@
Logging(BeamFnLogging_LoggingServer) error
}
+// UnimplementedBeamFnLoggingServer can be embedded to have forward compatible implementations.
+type UnimplementedBeamFnLoggingServer struct {
+}
+
+func (*UnimplementedBeamFnLoggingServer) Logging(srv BeamFnLogging_LoggingServer) error {
+ return status.Errorf(codes.Unimplemented, "method Logging not implemented")
+}
+
func RegisterBeamFnLoggingServer(s *grpc.Server, srv BeamFnLoggingServer) {
s.RegisterService(&_BeamFnLogging_serviceDesc, srv)
}
@@ -4698,6 +4344,17 @@
StopWorker(context.Context, *StopWorkerRequest) (*StopWorkerResponse, error)
}
+// UnimplementedBeamFnExternalWorkerPoolServer can be embedded to have forward compatible implementations.
+type UnimplementedBeamFnExternalWorkerPoolServer struct {
+}
+
+func (*UnimplementedBeamFnExternalWorkerPoolServer) StartWorker(ctx context.Context, req *StartWorkerRequest) (*StartWorkerResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method StartWorker not implemented")
+}
+func (*UnimplementedBeamFnExternalWorkerPoolServer) StopWorker(ctx context.Context, req *StopWorkerRequest) (*StopWorkerResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method StopWorker not implemented")
+}
+
func RegisterBeamFnExternalWorkerPoolServer(s *grpc.Server, srv BeamFnExternalWorkerPoolServer) {
s.RegisterService(&_BeamFnExternalWorkerPool_serviceDesc, srv)
}
@@ -4754,212 +4411,3 @@
Streams: []grpc.StreamDesc{},
Metadata: "beam_fn_api.proto",
}
-
-func init() { proto.RegisterFile("beam_fn_api.proto", fileDescriptor_beam_fn_api_a35062cff5082b14) }
-
-var fileDescriptor_beam_fn_api_a35062cff5082b14 = []byte{
- // 3241 bytes of a gzipped FileDescriptorProto
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x5a, 0xdd, 0x6f, 0x1b, 0xc7,
- 0xb5, 0xf7, 0xf2, 0x43, 0x22, 0x0f, 0x29, 0x89, 0x1c, 0x49, 0x36, 0xbd, 0xd7, 0xb9, 0xd7, 0xe1,
- 0xbd, 0x01, 0x84, 0x5c, 0x84, 0xb6, 0x65, 0x23, 0xb1, 0x73, 0x13, 0x27, 0x12, 0x45, 0xdb, 0x8c,
- 0x65, 0x9b, 0x77, 0x25, 0x5f, 0xdf, 0x9b, 0xdc, 0x64, 0xb1, 0xe2, 0x0e, 0xe9, 0x85, 0xc9, 0xdd,
- 0xcd, 0xcc, 0x52, 0xb6, 0xdc, 0xa0, 0xe9, 0x17, 0x5a, 0xb4, 0x68, 0x9b, 0x97, 0x3e, 0x24, 0x7d,
- 0x6b, 0x0b, 0x14, 0xe8, 0x4b, 0xff, 0x80, 0xfc, 0x03, 0x45, 0x9f, 0xfa, 0x0f, 0xe4, 0xa5, 0x40,
- 0x0b, 0xb4, 0x4d, 0x9f, 0x0b, 0xf4, 0xad, 0x98, 0x8f, 0xfd, 0xe0, 0x72, 0xe9, 0x2c, 0x29, 0xb9,
- 0x6f, 0x3b, 0x73, 0xf6, 0xfc, 0x7e, 0x33, 0x67, 0xcf, 0x9c, 0x39, 0x67, 0x66, 0xa1, 0x7a, 0x80,
- 0x8d, 0xa1, 0xde, 0xb3, 0x75, 0xc3, 0xb5, 0x1a, 0x2e, 0x71, 0x3c, 0x07, 0xbd, 0xe4, 0x90, 0x7e,
- 0xc3, 0x70, 0x8d, 0xee, 0x43, 0xdc, 0x60, 0xd2, 0xc6, 0xd0, 0x31, 0xf1, 0xa0, 0xd1, 0xb3, 0x75,
- 0xfc, 0x04, 0x77, 0x47, 0x9e, 0xe5, 0xd8, 0x8d, 0xc3, 0x4b, 0xea, 0x3a, 0xd7, 0x24, 0x23, 0xdb,
- 0xc6, 0x24, 0xd4, 0x56, 0x57, 0xb0, 0x6d, 0xba, 0x8e, 0x65, 0x7b, 0x54, 0x76, 0x9c, 0xef, 0x3b,
- 0x4e, 0x7f, 0x80, 0x2f, 0xf0, 0xd6, 0xc1, 0xa8, 0x77, 0xc1, 0xc4, 0xb4, 0x4b, 0x2c, 0xd7, 0x73,
- 0x88, 0x7c, 0xe3, 0xdf, 0xe2, 0x6f, 0x78, 0xd6, 0x10, 0x53, 0xcf, 0x18, 0xba, 0xf2, 0x85, 0x7f,
- 0x9d, 0x80, 0x18, 0x11, 0x83, 0x8f, 0x63, 0x8a, 0xfc, 0x31, 0x31, 0x5c, 0x17, 0x13, 0x7f, 0x08,
- 0x4b, 0x43, 0xec, 0x11, 0xab, 0x2b, 0x9b, 0xf5, 0x9f, 0x2a, 0xb0, 0xac, 0xe1, 0xa1, 0xe3, 0xe1,
- 0x9b, 0xc4, 0xed, 0x76, 0x1c, 0xe2, 0xa1, 0x21, 0x9c, 0x36, 0x5c, 0x4b, 0xa7, 0x98, 0x1c, 0x5a,
- 0x5d, 0xac, 0x87, 0x43, 0xac, 0x29, 0xe7, 0x95, 0x8d, 0xd2, 0xe6, 0x6b, 0x8d, 0x64, 0xa3, 0xb8,
- 0x96, 0x8b, 0x07, 0x96, 0x8d, 0x1b, 0x87, 0x97, 0x1a, 0x5b, 0xae, 0xb5, 0x27, 0xf4, 0x77, 0x02,
- 0x75, 0x6d, 0xcd, 0x48, 0xe8, 0x45, 0x67, 0xa1, 0xd0, 0x75, 0x4c, 0x4c, 0x74, 0xcb, 0xac, 0x65,
- 0xce, 0x2b, 0x1b, 0x45, 0x6d, 0x91, 0xb7, 0xdb, 0x66, 0xfd, 0x8f, 0x39, 0x40, 0x6d, 0x9b, 0x7a,
- 0x64, 0xd4, 0x65, 0x33, 0xd4, 0xf0, 0x87, 0x23, 0x4c, 0x3d, 0xf4, 0x12, 0x2c, 0x5b, 0x61, 0x2f,
- 0xd3, 0x53, 0xb8, 0xde, 0x52, 0xa4, 0xb7, 0x6d, 0xa2, 0xfb, 0x50, 0x20, 0xb8, 0x6f, 0x51, 0x0f,
- 0x93, 0xda, 0x9f, 0x16, 0xf9, 0xd0, 0x5f, 0x6d, 0xa4, 0xfa, 0x9e, 0x0d, 0x4d, 0xea, 0x49, 0xc6,
- 0x5b, 0xa7, 0xb4, 0x00, 0x0a, 0x61, 0x58, 0x76, 0x89, 0xd3, 0xc5, 0x94, 0xea, 0x07, 0x23, 0xdb,
- 0x1c, 0xe0, 0xda, 0x9f, 0x05, 0xf8, 0x7f, 0xa5, 0x04, 0xef, 0x08, 0xed, 0x6d, 0xae, 0x1c, 0x32,
- 0x2c, 0xb9, 0xd1, 0x7e, 0xf4, 0x75, 0x38, 0x33, 0x4e, 0xa3, 0xbb, 0xc4, 0xe9, 0x13, 0x4c, 0x69,
- 0xed, 0x2f, 0x82, 0xaf, 0x39, 0x0f, 0x5f, 0x47, 0x82, 0x84, 0xbc, 0xeb, 0x6e, 0x92, 0x1c, 0x8d,
- 0x60, 0x2d, 0xc6, 0x4f, 0xdd, 0x81, 0xe5, 0xd5, 0xbe, 0x14, 0xe4, 0x6f, 0xcf, 0x43, 0xbe, 0xc7,
- 0x10, 0x42, 0x66, 0xe4, 0x4e, 0x08, 0xd1, 0x43, 0x58, 0xe9, 0x59, 0xb6, 0x31, 0xb0, 0x9e, 0x62,
- 0xdf, 0xbc, 0x7f, 0x15, 0x8c, 0x6f, 0xa4, 0x64, 0xbc, 0x21, 0xd5, 0xe3, 0xf6, 0x5d, 0xee, 0x8d,
- 0x09, 0xb6, 0x8b, 0xb0, 0x48, 0x84, 0xb0, 0xfe, 0xad, 0x3c, 0xac, 0x8e, 0xf9, 0x19, 0x75, 0x1d,
- 0x9b, 0xe2, 0xb4, 0x8e, 0xb6, 0x06, 0x79, 0x4c, 0x88, 0x43, 0xa4, 0xfb, 0x8a, 0x06, 0xfa, 0x9f,
- 0x49, 0xf7, 0x7b, 0x6d, 0x66, 0xf7, 0x13, 0x03, 0x19, 0xf3, 0xbf, 0xde, 0x34, 0xff, 0x7b, 0x63,
- 0x3e, 0xff, 0x0b, 0x28, 0x62, 0x0e, 0xf8, 0xf1, 0x57, 0x3a, 0xe0, 0xce, 0xf1, 0x1c, 0x30, 0x20,
- 0x9e, 0xe2, 0x81, 0x87, 0xcf, 0xf6, 0xc0, 0xad, 0x63, 0x78, 0x60, 0x40, 0x9d, 0xe4, 0x82, 0xd6,
- 0x54, 0x17, 0x7c, 0x73, 0x4e, 0x17, 0x0c, 0xe8, 0xe2, 0x3e, 0x08, 0xcc, 0x47, 0x84, 0xb4, 0xfe,
- 0x23, 0x05, 0x56, 0x62, 0x71, 0x07, 0x3d, 0x85, 0xb3, 0x31, 0x13, 0x8c, 0x45, 0xe3, 0xec, 0x46,
- 0x69, 0xf3, 0xfa, 0x3c, 0x66, 0x88, 0x04, 0xe5, 0x33, 0x6e, 0xb2, 0xa0, 0x8e, 0xa0, 0x12, 0xf7,
- 0xc3, 0xfa, 0x2f, 0x00, 0xce, 0x4c, 0x01, 0x42, 0xcb, 0x90, 0x09, 0x16, 0x48, 0xc6, 0x32, 0x91,
- 0x0d, 0xe0, 0x11, 0xc3, 0xa6, 0x3d, 0x87, 0x0c, 0x69, 0x2d, 0xc3, 0x07, 0x7b, 0xf7, 0x78, 0x83,
- 0x6d, 0xec, 0x07, 0x80, 0x2d, 0xdb, 0x23, 0x47, 0x5a, 0x84, 0x01, 0x79, 0x50, 0x76, 0xbb, 0xce,
- 0x60, 0x80, 0xf9, 0xb2, 0xa4, 0xb5, 0x2c, 0x67, 0xec, 0x1c, 0x93, 0xb1, 0x13, 0x81, 0x14, 0x9c,
- 0x63, 0x2c, 0xe8, 0x07, 0x0a, 0xac, 0x3d, 0xb6, 0x6c, 0xd3, 0x79, 0x6c, 0xd9, 0x7d, 0x9d, 0x7a,
- 0xc4, 0xf0, 0x70, 0xdf, 0xc2, 0xb4, 0x96, 0xe3, 0xf4, 0x0f, 0x8e, 0x49, 0xff, 0xc0, 0x87, 0xde,
- 0x0b, 0x90, 0xc5, 0x28, 0x56, 0x1f, 0x4f, 0x4a, 0xd0, 0x01, 0x2c, 0xf0, 0xad, 0x93, 0xd6, 0xf2,
- 0x9c, 0xfd, 0x9d, 0x63, 0xb2, 0x37, 0x39, 0x98, 0x20, 0x94, 0xc8, 0xcc, 0xcc, 0xd8, 0x3e, 0xb4,
- 0x88, 0x63, 0x0f, 0xb1, 0xed, 0xd1, 0xda, 0xc2, 0x89, 0x98, 0xb9, 0x15, 0x81, 0x94, 0x66, 0x8e,
- 0xb2, 0xa0, 0x27, 0x70, 0x8e, 0x7a, 0x86, 0x87, 0xf5, 0x29, 0x99, 0xc9, 0xe2, 0xf1, 0x32, 0x93,
- 0xb3, 0x1c, 0x3c, 0x49, 0xa4, 0x0e, 0x60, 0x25, 0xe6, 0x75, 0xa8, 0x02, 0xd9, 0x47, 0xf8, 0x48,
- 0xba, 0x3a, 0x7b, 0x44, 0x4d, 0xc8, 0x1f, 0x1a, 0x83, 0x11, 0xe6, 0x3b, 0x40, 0x69, 0xf3, 0x95,
- 0x14, 0xe3, 0xe8, 0x04, 0xa8, 0x9a, 0xd0, 0x7d, 0x3d, 0x73, 0x55, 0x51, 0x1d, 0xa8, 0x4e, 0x78,
- 0x5c, 0x02, 0xdf, 0xce, 0x38, 0x5f, 0x23, 0x0d, 0x5f, 0x33, 0x80, 0x8d, 0x12, 0x7e, 0x04, 0xb5,
- 0x69, 0x3e, 0x96, 0xc0, 0xfb, 0xce, 0x38, 0xef, 0x95, 0x14, 0xbc, 0x71, 0xf4, 0xa3, 0x28, 0x7b,
- 0x17, 0x4a, 0x11, 0x1f, 0x4b, 0x20, 0xbc, 0x3e, 0x4e, 0xb8, 0x91, 0x82, 0x90, 0x03, 0xc6, 0x6c,
- 0x3a, 0xe1, 0x5e, 0x27, 0x63, 0xd3, 0x08, 0x6c, 0x84, 0xb0, 0xfe, 0xf7, 0x2c, 0x54, 0x85, 0x87,
- 0x6f, 0xb9, 0xee, 0xc0, 0xea, 0xf2, 0xf4, 0x1c, 0xbd, 0x08, 0xe5, 0x20, 0x5a, 0x85, 0xa9, 0x44,
- 0x29, 0xe8, 0x6b, 0x9b, 0x2c, 0x15, 0xb6, 0x6c, 0x77, 0xe4, 0x45, 0x52, 0x61, 0xde, 0x6e, 0x9b,
- 0xa8, 0x06, 0x8b, 0x78, 0x80, 0x19, 0x53, 0x2d, 0x7b, 0x5e, 0xd9, 0x28, 0x6b, 0x7e, 0x13, 0x7d,
- 0x0d, 0xaa, 0xce, 0xc8, 0x63, 0x5a, 0x8f, 0x0d, 0x0f, 0x93, 0xa1, 0x41, 0x1e, 0xf9, 0xd1, 0x27,
- 0x6d, 0xb8, 0x9d, 0x18, 0x6c, 0xe3, 0x1e, 0x47, 0x7c, 0x10, 0x00, 0x8a, 0x35, 0x59, 0x71, 0x62,
- 0xdd, 0xa8, 0x03, 0x60, 0x51, 0xfd, 0xc0, 0x19, 0xd9, 0x26, 0x36, 0x6b, 0xf9, 0xf3, 0xca, 0xc6,
- 0xf2, 0xe6, 0xa5, 0x14, 0x96, 0x6b, 0xd3, 0x6d, 0xa1, 0xd3, 0x68, 0xd9, 0xa3, 0xa1, 0x56, 0xb4,
- 0xfc, 0x36, 0xfa, 0x7f, 0xa8, 0x0c, 0x1d, 0xdb, 0xf2, 0x1c, 0xc2, 0x02, 0xaa, 0x65, 0xf7, 0x1c,
- 0x3f, 0xc6, 0xa4, 0xc1, 0xbd, 0x13, 0xa8, 0xb6, 0xed, 0x9e, 0xa3, 0xad, 0x0c, 0xc7, 0xda, 0x54,
- 0xd5, 0x61, 0x3d, 0x71, 0x6a, 0x09, 0xfe, 0x70, 0x71, 0xdc, 0x1f, 0xd4, 0x86, 0x28, 0xac, 0x1a,
- 0x7e, 0x61, 0xd5, 0xd8, 0xf7, 0x2b, 0xb3, 0xe8, 0xb7, 0xff, 0x2c, 0x03, 0xb5, 0x1d, 0x3c, 0x30,
- 0x8e, 0xb0, 0x39, 0xe9, 0x02, 0xfb, 0x50, 0x93, 0x29, 0x27, 0x36, 0xc3, 0x2f, 0xa0, 0xb3, 0x12,
- 0x4f, 0xd6, 0x56, 0xcf, 0x62, 0x39, 0x1d, 0xe8, 0xb6, 0x7c, 0x55, 0x26, 0x44, 0xef, 0x42, 0xc9,
- 0x08, 0x49, 0xe4, 0x70, 0xaf, 0xce, 0xfb, 0xe9, 0xb5, 0x28, 0x18, 0xba, 0x0d, 0x6b, 0xe1, 0x88,
- 0xd9, 0x38, 0x75, 0x93, 0x4d, 0x8e, 0xfb, 0x60, 0x69, 0xf3, 0xec, 0xc4, 0x68, 0x77, 0x64, 0x31,
- 0xaa, 0xa1, 0x40, 0x8d, 0x8d, 0x91, 0x5b, 0xa4, 0xfe, 0xe3, 0x1c, 0xac, 0x25, 0x15, 0x3f, 0xe8,
- 0x2d, 0x38, 0x37, 0x35, 0xcd, 0x09, 0x97, 0xca, 0xd9, 0x29, 0x99, 0x4a, 0xdb, 0x44, 0x16, 0x94,
- 0xbb, 0x6c, 0xa6, 0xba, 0xe7, 0x3c, 0xc2, 0xb6, 0x9f, 0x6d, 0xdc, 0x38, 0x46, 0x41, 0xd6, 0x68,
- 0x32, 0xad, 0x7d, 0x06, 0xa7, 0x95, 0xba, 0xc1, 0x33, 0x55, 0x7f, 0x9b, 0x01, 0x08, 0x65, 0xe8,
- 0x43, 0x80, 0x11, 0xc5, 0x44, 0xe7, 0x1b, 0x88, 0xfc, 0x88, 0x9d, 0x93, 0xe1, 0x6d, 0xdc, 0xa7,
- 0x98, 0xec, 0x31, 0xdc, 0x5b, 0xa7, 0xb4, 0xe2, 0xc8, 0x6f, 0x30, 0x4a, 0x6a, 0x99, 0x58, 0xe7,
- 0xa1, 0x41, 0x7e, 0xee, 0x93, 0xa2, 0xdc, 0xb3, 0x4c, 0xdc, 0x66, 0xb8, 0x8c, 0x92, 0xfa, 0x0d,
- 0x56, 0xe1, 0x70, 0xcb, 0xd6, 0x80, 0xc7, 0x1e, 0xd1, 0x50, 0x4b, 0x50, 0x0c, 0x86, 0xa8, 0xbe,
- 0x0c, 0xc5, 0x40, 0x19, 0xbd, 0x30, 0x36, 0x44, 0xf1, 0xf9, 0x42, 0xb8, 0xed, 0x05, 0xc8, 0x79,
- 0x47, 0x2e, 0xae, 0x7f, 0x91, 0x81, 0xf5, 0xc4, 0x6a, 0x04, 0xdd, 0x82, 0x45, 0x79, 0x4e, 0x21,
- 0x6d, 0xda, 0x48, 0x39, 0xc1, 0x3b, 0x42, 0x4b, 0xf3, 0xd5, 0x59, 0xb9, 0x44, 0x30, 0xb5, 0xcc,
- 0x91, 0x31, 0xd0, 0x89, 0xe3, 0x78, 0xbe, 0x73, 0xbc, 0x95, 0x12, 0x70, 0xda, 0x62, 0xd6, 0x96,
- 0x7c, 0x58, 0x8d, 0xa1, 0x26, 0xc6, 0xad, 0xec, 0x49, 0xc5, 0x2d, 0x74, 0x19, 0xd6, 0xd9, 0x82,
- 0xb2, 0x08, 0xa6, 0xba, 0xac, 0x21, 0xc4, 0x6a, 0xcf, 0x9d, 0x57, 0x36, 0x0a, 0xda, 0x9a, 0x2f,
- 0xbc, 0x11, 0x91, 0xd5, 0x5b, 0x70, 0xee, 0x59, 0xb5, 0x7f, 0xca, 0xf2, 0xb6, 0xfe, 0xe9, 0x2a,
- 0x2c, 0x4a, 0xb3, 0x22, 0x03, 0x4a, 0x6e, 0x24, 0xab, 0x57, 0x66, 0x32, 0xa5, 0x04, 0x69, 0x74,
- 0xbc, 0x58, 0x1a, 0x1f, 0xc5, 0x54, 0xbf, 0x28, 0x01, 0x84, 0xc9, 0x11, 0x7a, 0x0a, 0x7e, 0x8d,
- 0xc6, 0x62, 0xa6, 0xd8, 0xf3, 0x7c, 0xa7, 0xb8, 0x3d, 0x2b, 0x71, 0x00, 0xeb, 0x2f, 0x04, 0x6c,
- 0xb6, 0x24, 0xa4, 0x56, 0x75, 0xe3, 0x5d, 0xe8, 0x43, 0x58, 0x31, 0xba, 0x9e, 0x75, 0x88, 0x43,
- 0x62, 0xb1, 0xdc, 0x6e, 0xcd, 0x4f, 0xbc, 0xc5, 0x01, 0x03, 0xd6, 0x65, 0x63, 0xac, 0x8d, 0x2c,
- 0x80, 0xc8, 0x36, 0x2e, 0x1c, 0xa8, 0x3d, 0x3f, 0x5b, 0x7c, 0x07, 0x8f, 0x80, 0xa3, 0x9b, 0x90,
- 0x63, 0x41, 0x45, 0xe6, 0x0a, 0x97, 0x67, 0x24, 0x61, 0x2b, 0x5f, 0xe3, 0x00, 0xea, 0x1f, 0xb2,
- 0x50, 0xb8, 0x83, 0x0d, 0x3a, 0x22, 0xd8, 0x44, 0x3f, 0x54, 0x60, 0x4d, 0x24, 0x31, 0xd2, 0x66,
- 0x7a, 0xd7, 0x19, 0x89, 0x4f, 0xc6, 0x68, 0xde, 0x9d, 0x7f, 0x2e, 0x3e, 0x45, 0x83, 0x07, 0x11,
- 0x69, 0xb1, 0x26, 0x07, 0x17, 0x93, 0x43, 0xd6, 0x84, 0x00, 0x7d, 0xa2, 0xc0, 0xba, 0x4c, 0x8f,
- 0x62, 0xe3, 0x11, 0x61, 0xe0, 0xbd, 0x13, 0x18, 0x8f, 0xc8, 0x28, 0x12, 0x06, 0xb4, 0xea, 0x4c,
- 0x4a, 0xd0, 0x06, 0x54, 0x3c, 0xc7, 0x33, 0x06, 0x62, 0x3b, 0xa5, 0xae, 0x9f, 0xd2, 0x29, 0xda,
- 0x32, 0xef, 0x67, 0xfb, 0xe5, 0x1e, 0xeb, 0x55, 0x5b, 0x70, 0x66, 0xca, 0x54, 0x13, 0xd2, 0x95,
- 0xb5, 0x68, 0xba, 0x92, 0x8d, 0xe6, 0xbf, 0x37, 0xa0, 0x36, 0x6d, 0x84, 0x33, 0xe1, 0x50, 0xa8,
- 0x4e, 0xac, 0x1a, 0xf4, 0x01, 0x14, 0x86, 0xd2, 0x0e, 0x72, 0x51, 0x6e, 0x1f, 0xdf, 0xa2, 0x5a,
- 0x80, 0xa9, 0x7e, 0x92, 0x85, 0xe5, 0xf1, 0x25, 0xf3, 0xbc, 0x29, 0xd1, 0x2b, 0x80, 0x7a, 0xc4,
- 0x10, 0x31, 0x91, 0xe0, 0xa1, 0x61, 0xd9, 0x96, 0xdd, 0xe7, 0xe6, 0x50, 0xb4, 0xaa, 0x2f, 0xd1,
- 0x7c, 0x01, 0xfa, 0x99, 0x02, 0x67, 0xc7, 0x3d, 0x8c, 0x46, 0xd4, 0xc4, 0x0a, 0xc6, 0x27, 0x15,
- 0x2f, 0xc6, 0x7d, 0x8d, 0x06, 0xa3, 0x10, 0xfe, 0x76, 0xc6, 0x49, 0x96, 0xaa, 0xef, 0xc0, 0xb9,
- 0x67, 0x29, 0xce, 0xe4, 0x06, 0x6f, 0xc2, 0xca, 0x57, 0x27, 0xcf, 0xd3, 0xd5, 0x7f, 0x97, 0x87,
- 0x1c, 0x8b, 0x1d, 0x48, 0x87, 0x92, 0xd8, 0xa3, 0x75, 0xdb, 0x08, 0xf2, 0xdf, 0xeb, 0x73, 0x44,
- 0x21, 0xd9, 0xb8, 0x6b, 0x0c, 0xb1, 0x06, 0xc3, 0xe0, 0x19, 0x61, 0x28, 0xf3, 0xa5, 0x8e, 0x89,
- 0x6e, 0x1a, 0x9e, 0xe1, 0x1f, 0x93, 0xbe, 0x35, 0x0f, 0x45, 0x53, 0x00, 0xed, 0x18, 0x9e, 0x71,
- 0xeb, 0x94, 0x56, 0xea, 0x86, 0x4d, 0xe4, 0x41, 0xd5, 0xb4, 0xa8, 0x47, 0xac, 0x03, 0x91, 0xcd,
- 0x73, 0xae, 0x19, 0x4f, 0x48, 0xc7, 0xb8, 0x76, 0x22, 0x68, 0x92, 0xb0, 0x62, 0xc6, 0xfa, 0x90,
- 0x0e, 0xd0, 0x37, 0x46, 0x7d, 0x2c, 0xe8, 0xbe, 0x9c, 0xed, 0x7c, 0x72, 0x8c, 0xee, 0x26, 0x83,
- 0x91, 0x3c, 0xc5, 0xbe, 0xdf, 0x50, 0xaf, 0x03, 0x84, 0x76, 0x45, 0xe7, 0xa0, 0xc8, 0xbe, 0x12,
- 0x75, 0x8d, 0x2e, 0x96, 0xa5, 0x69, 0xd8, 0x81, 0x10, 0xe4, 0xf8, 0x37, 0xcc, 0x72, 0x01, 0x7f,
- 0x56, 0xff, 0x9d, 0x95, 0xf6, 0xa1, 0x95, 0x02, 0x87, 0x50, 0x22, 0x0e, 0xa1, 0x7e, 0x00, 0x95,
- 0xf8, 0x6c, 0xd9, 0x9b, 0xdc, 0xbc, 0xfe, 0x9b, 0xbc, 0xc1, 0x5c, 0x8c, 0x8e, 0x86, 0xd2, 0x9d,
- 0xd8, 0x23, 0xeb, 0x19, 0x5a, 0x36, 0xe7, 0xcc, 0x6a, 0xec, 0x91, 0xf7, 0x18, 0x4f, 0x78, 0x4a,
- 0xc4, 0x7a, 0x8c, 0x27, 0xea, 0x7b, 0x50, 0x0c, 0xa6, 0x97, 0x3c, 0x04, 0x74, 0x15, 0x8a, 0xc1,
- 0x15, 0x5b, 0x8a, 0x52, 0x2f, 0x7c, 0x99, 0x65, 0xb1, 0xcc, 0xf8, 0xea, 0x11, 0x54, 0xe2, 0x19,
- 0x4d, 0xc2, 0x8a, 0xb8, 0x37, 0x5e, 0x4e, 0x5e, 0x9b, 0x3b, 0x22, 0x44, 0xab, 0xcd, 0x5f, 0x66,
- 0xe0, 0x85, 0x67, 0x9e, 0xae, 0x9f, 0x60, 0x22, 0xfd, 0x7c, 0x13, 0xdc, 0xf7, 0x61, 0xc9, 0x25,
- 0xd6, 0xd0, 0x20, 0x47, 0x32, 0x4b, 0x17, 0x59, 0xc9, 0xfc, 0x65, 0x6c, 0x59, 0xc2, 0xf1, 0xec,
- 0xbc, 0xfe, 0xcd, 0x1c, 0x9c, 0x9d, 0x7a, 0x15, 0x95, 0xf6, 0x9e, 0xe7, 0x29, 0x2c, 0x9b, 0x98,
- 0x5a, 0x04, 0x9b, 0xe2, 0x26, 0xc2, 0x9f, 0xff, 0xde, 0x71, 0xef, 0xc2, 0x1a, 0x3b, 0x02, 0x96,
- 0xf7, 0xc9, 0xdc, 0x61, 0xc9, 0x8c, 0xf6, 0xa9, 0xbf, 0x56, 0xa0, 0x1c, 0x7d, 0x0b, 0x6d, 0xc2,
- 0x7a, 0xb0, 0x4b, 0x39, 0x3d, 0xb9, 0xe3, 0x98, 0x58, 0x5c, 0xd2, 0x66, 0xb4, 0x55, 0x5f, 0x78,
- 0xaf, 0xa7, 0xf9, 0x22, 0x74, 0x11, 0xd6, 0x8c, 0xc1, 0xc0, 0x79, 0xec, 0x4f, 0x40, 0x17, 0x97,
- 0xd3, 0x7c, 0x1a, 0x59, 0x0d, 0x49, 0x19, 0xc7, 0xef, 0x70, 0x09, 0xba, 0x0a, 0x35, 0x4c, 0x3d,
- 0x6b, 0x68, 0xb0, 0xfa, 0x7f, 0x2c, 0xad, 0xa3, 0x72, 0x2d, 0x9e, 0x0e, 0xe4, 0xd1, 0x5c, 0x85,
- 0xaa, 0x9f, 0x28, 0x80, 0x26, 0xa7, 0x95, 0xb0, 0x30, 0xba, 0xe3, 0x0b, 0xe3, 0xce, 0x89, 0x1a,
- 0x33, 0xba, 0x58, 0xfe, 0x96, 0x05, 0x75, 0xfa, 0x65, 0xd0, 0xa4, 0x07, 0x2a, 0x27, 0xe9, 0x81,
- 0xff, 0xb4, 0x3a, 0x74, 0x04, 0xcb, 0xdd, 0x87, 0x86, 0x6d, 0xe3, 0xc1, 0xb8, 0x93, 0xde, 0x3d,
- 0xf6, 0x75, 0x59, 0xa3, 0x29, 0x70, 0x45, 0xe7, 0x52, 0x37, 0xd2, 0xa2, 0xea, 0x67, 0x0a, 0x94,
- 0xa3, 0xf2, 0x34, 0xc7, 0x9d, 0x17, 0x61, 0x6d, 0x60, 0x50, 0x4f, 0xf7, 0xcd, 0xee, 0x1f, 0x70,
- 0x32, 0x47, 0xc8, 0x6b, 0x88, 0xc9, 0x3a, 0x42, 0x24, 0xbd, 0x0a, 0x5d, 0x81, 0xd3, 0x3d, 0x8b,
- 0x50, 0x4f, 0x0f, 0x4c, 0x19, 0x3d, 0x14, 0xcd, 0x6b, 0x6b, 0x5c, 0xaa, 0x49, 0xa1, 0xd4, 0xaa,
- 0x5f, 0x87, 0xf5, 0xc4, 0x4b, 0xe1, 0xb4, 0x05, 0x70, 0x0d, 0x4e, 0x27, 0xdf, 0xe8, 0xd5, 0x3f,
- 0x57, 0xa0, 0x10, 0xe4, 0xa5, 0xb7, 0xc4, 0x7e, 0x20, 0xfd, 0xe6, 0x4a, 0x4a, 0x7b, 0x07, 0x99,
- 0x1d, 0xdb, 0xa3, 0x34, 0xb1, 0xa3, 0x98, 0x90, 0xe3, 0x3b, 0x56, 0xca, 0xb8, 0x14, 0x37, 0x75,
- 0x66, 0xd2, 0xd4, 0x48, 0x8e, 0x4d, 0x9c, 0x1d, 0xf3, 0xe7, 0xfa, 0x4f, 0xb2, 0x50, 0xe6, 0x67,
- 0x37, 0xbe, 0x39, 0xe2, 0x37, 0x78, 0x93, 0xf4, 0x99, 0x24, 0xfa, 0x5d, 0x28, 0x8a, 0xbb, 0x19,
- 0xb6, 0xb0, 0xc5, 0xc1, 0xe0, 0x85, 0x94, 0x93, 0xe7, 0xf4, 0xb7, 0xf1, 0x91, 0x56, 0xa0, 0xf2,
- 0x09, 0xdd, 0x86, 0x6c, 0x1f, 0x7b, 0xb3, 0xfe, 0xb0, 0xc1, 0x81, 0x6e, 0xe2, 0xc8, 0xcf, 0x05,
- 0x0c, 0x05, 0xed, 0xc3, 0x82, 0xe1, 0xba, 0xd8, 0x36, 0xfd, 0xe4, 0xef, 0xda, 0x2c, 0x78, 0x5b,
- 0x5c, 0x35, 0x84, 0x94, 0x58, 0xe8, 0xbf, 0x21, 0xdf, 0x1d, 0x60, 0x83, 0xf8, 0x59, 0xde, 0xd5,
- 0x59, 0x40, 0x9b, 0x4c, 0x33, 0xc4, 0x14, 0x48, 0xd1, 0x9f, 0x11, 0x3e, 0xcf, 0xc0, 0x92, 0xfc,
- 0x2c, 0x32, 0x32, 0xc5, 0xbf, 0x4b, 0xf2, 0xff, 0x06, 0xbb, 0x63, 0x86, 0x7b, 0x6d, 0x66, 0xc3,
- 0x05, 0x97, 0xd4, 0xdc, 0x72, 0xf7, 0xe3, 0x96, 0x7b, 0x7d, 0x1e, 0xcb, 0x05, 0x98, 0xbe, 0xe9,
- 0xb4, 0x98, 0xe9, 0xae, 0xcd, 0x61, 0xba, 0x00, 0x54, 0xda, 0x2e, 0x7a, 0x89, 0xfe, 0x9b, 0x02,
- 0x14, 0x7c, 0xa7, 0x42, 0x1d, 0x58, 0x10, 0xbf, 0x64, 0xc9, 0xd4, 0xe7, 0xd5, 0x19, 0xbd, 0xb2,
- 0xa1, 0x71, 0x6d, 0x36, 0x7c, 0x81, 0x83, 0x28, 0xac, 0x0e, 0x47, 0x03, 0xb6, 0xdf, 0xb9, 0xfa,
- 0xc4, 0x19, 0xec, 0xd6, 0xac, 0xf0, 0x77, 0x24, 0x54, 0xf4, 0xd0, 0xb5, 0x3a, 0x8c, 0x77, 0x22,
- 0x13, 0x96, 0x0f, 0x8c, 0xbe, 0x1e, 0x39, 0x66, 0xce, 0xce, 0xf4, 0xbf, 0x47, 0xc0, 0xb7, 0x6d,
- 0xf4, 0xa3, 0x47, 0xca, 0xe5, 0x83, 0x48, 0x9b, 0x4d, 0xcd, 0xf2, 0x30, 0x31, 0x0e, 0x06, 0x38,
- 0x3a, 0xb5, 0xdc, 0x7c, 0x53, 0x6b, 0x4b, 0xa8, 0xb1, 0xa9, 0x59, 0xf1, 0x4e, 0xf4, 0x0d, 0x05,
- 0x6a, 0x81, 0x41, 0x1f, 0xe1, 0x23, 0x1a, 0xa5, 0xce, 0x73, 0xea, 0xd6, 0xbc, 0x56, 0xbd, 0x8d,
- 0x8f, 0x68, 0x94, 0x7e, 0x7d, 0x98, 0x24, 0x50, 0x55, 0x58, 0x10, 0x9f, 0x39, 0x9a, 0x9a, 0x94,
- 0x79, 0x6a, 0xa2, 0x12, 0xa8, 0x4e, 0x4c, 0x24, 0xcd, 0xc6, 0x56, 0x87, 0xa5, 0x70, 0x1e, 0x91,
- 0x88, 0x1c, 0x9c, 0x80, 0xb7, 0x4d, 0x74, 0x1a, 0x16, 0xc4, 0x15, 0xbe, 0x8c, 0xc9, 0xb2, 0xa5,
- 0x7e, 0x47, 0x81, 0xea, 0x84, 0x63, 0x3c, 0x67, 0x52, 0x7f, 0xea, 0xb9, 0x70, 0xea, 0x87, 0xb0,
- 0x9e, 0x68, 0xc8, 0xe7, 0x3d, 0xfd, 0x8f, 0xa1, 0x1c, 0x75, 0xd3, 0x94, 0x74, 0xe1, 0xda, 0x88,
- 0xd0, 0x05, 0x37, 0x26, 0xb3, 0x4c, 0x3c, 0xb8, 0x9b, 0x78, 0x1b, 0x56, 0x62, 0x9b, 0x0a, 0x7a,
- 0x05, 0x50, 0xd7, 0xb1, 0x3d, 0xcb, 0x1e, 0x19, 0xe2, 0xde, 0x8e, 0x5f, 0x89, 0x08, 0x7f, 0xa9,
- 0x46, 0x25, 0xfc, 0x2e, 0xa5, 0x7e, 0x1f, 0x2a, 0xf1, 0xe8, 0x3a, 0x23, 0x44, 0xb0, 0x6d, 0x67,
- 0x22, 0xdb, 0xf6, 0x06, 0xa0, 0xc9, 0xdd, 0x29, 0x78, 0x53, 0x89, 0xbc, 0xb9, 0x0e, 0xab, 0x09,
- 0xd1, 0xb8, 0xbe, 0x0a, 0xd5, 0x89, 0x9d, 0xa8, 0xbe, 0x26, 0x51, 0xc7, 0x62, 0x6c, 0xfd, 0xe7,
- 0x39, 0x28, 0xec, 0x3a, 0xf2, 0x90, 0xe8, 0xff, 0xa0, 0x40, 0xf1, 0x21, 0x26, 0x96, 0x27, 0x16,
- 0xc9, 0x72, 0xea, 0xf3, 0x06, 0x1f, 0xa2, 0xb1, 0x27, 0xf5, 0xc5, 0xad, 0x6f, 0x00, 0x37, 0x7f,
- 0x11, 0x8e, 0x6a, 0xac, 0xbe, 0xa5, 0xd4, 0xe8, 0xfb, 0xa7, 0x0f, 0x7e, 0x93, 0xdf, 0x59, 0x11,
- 0xa3, 0x8b, 0xf9, 0xc7, 0x2d, 0x6a, 0xa2, 0x91, 0x90, 0xd3, 0xe4, 0xd3, 0xa4, 0x54, 0x0b, 0x93,
- 0x6e, 0xf7, 0x22, 0x94, 0x07, 0x4e, 0x5f, 0x1f, 0x38, 0xf2, 0xde, 0x75, 0x51, 0xbc, 0x32, 0x70,
- 0xfa, 0xbb, 0xb2, 0x8b, 0x79, 0x9d, 0xf7, 0x90, 0x60, 0xc3, 0xac, 0x15, 0xb8, 0x50, 0xb6, 0xd4,
- 0xff, 0x85, 0xdc, 0xae, 0x45, 0x3d, 0xd4, 0x01, 0xf6, 0xba, 0x8e, 0x6d, 0x8f, 0x58, 0xd8, 0x2f,
- 0x38, 0x2e, 0xcc, 0x68, 0x54, 0x0d, 0x06, 0xe2, 0xc9, 0xc2, 0x54, 0x25, 0x50, 0xf0, 0x6d, 0x5c,
- 0xef, 0x41, 0x8e, 0x99, 0x19, 0xad, 0x40, 0xe9, 0xfe, 0xdd, 0xbd, 0x4e, 0xab, 0xd9, 0xbe, 0xd1,
- 0x6e, 0xed, 0x54, 0x4e, 0xa1, 0x22, 0xe4, 0xf7, 0xb5, 0xad, 0x66, 0xab, 0xa2, 0xb0, 0xc7, 0x9d,
- 0xd6, 0xf6, 0xfd, 0x9b, 0x95, 0x0c, 0x2a, 0x40, 0xae, 0x7d, 0xf7, 0xc6, 0xbd, 0x4a, 0x16, 0x01,
- 0x2c, 0xdc, 0xbd, 0xb7, 0xdf, 0x6e, 0xb6, 0x2a, 0x39, 0xd6, 0xfb, 0x60, 0x4b, 0xbb, 0x5b, 0xc9,
- 0xb3, 0x57, 0x5b, 0x9a, 0x76, 0x4f, 0xab, 0x2c, 0xa0, 0x32, 0x14, 0x9a, 0x5a, 0x7b, 0xbf, 0xdd,
- 0xdc, 0xda, 0xad, 0x2c, 0xd6, 0xcb, 0x00, 0xbb, 0x4e, 0xbf, 0xe9, 0xd8, 0x1e, 0x71, 0x06, 0xf5,
- 0xdf, 0xe7, 0xb8, 0x27, 0x11, 0xef, 0x81, 0x43, 0x1e, 0x85, 0x7f, 0xb2, 0xfd, 0x0b, 0x14, 0x1f,
- 0xf3, 0x8e, 0x70, 0x11, 0x17, 0x44, 0x47, 0xdb, 0x44, 0x07, 0x50, 0xe9, 0x0a, 0x75, 0xdd, 0xff,
- 0x63, 0x5a, 0x7a, 0xc1, 0xdc, 0x7f, 0xf4, 0xac, 0x48, 0xc0, 0x96, 0xc4, 0x63, 0x1c, 0x03, 0xa7,
- 0xdf, 0xb7, 0xec, 0x7e, 0xc8, 0x91, 0x3d, 0x26, 0x87, 0x04, 0x0c, 0x38, 0x4c, 0xa8, 0x1a, 0xc4,
- 0xb3, 0x7a, 0x46, 0xd7, 0x0b, 0x49, 0x72, 0xc7, 0x23, 0xa9, 0xf8, 0x88, 0x01, 0x4b, 0x8f, 0xdf,
- 0x88, 0x1d, 0x5a, 0x94, 0x39, 0x70, 0x40, 0x93, 0x3f, 0x1e, 0x4d, 0x35, 0x80, 0x0c, 0x78, 0xde,
- 0x87, 0x05, 0xd7, 0x20, 0xc6, 0x90, 0xd6, 0x80, 0x3b, 0xe6, 0x0c, 0x3b, 0x71, 0xec, 0xeb, 0x37,
- 0x3a, 0x1c, 0x47, 0xfe, 0x48, 0x26, 0x40, 0xd5, 0x6b, 0x50, 0x8a, 0x74, 0x7f, 0xd5, 0x19, 0x72,
- 0x31, 0x5a, 0xc9, 0xff, 0x27, 0x0f, 0x6c, 0x21, 0x89, 0x0c, 0xae, 0x41, 0x5e, 0xac, 0x44, 0xf2,
- 0xe2, 0xfa, 0x45, 0x16, 0xee, 0x1c, 0x37, 0xbd, 0x3b, 0xd6, 0x5f, 0x66, 0x1e, 0x1c, 0x6a, 0x3c,
- 0x0b, 0x7d, 0xf3, 0x53, 0x05, 0x96, 0xb6, 0xb1, 0x31, 0xbc, 0x61, 0xcb, 0x05, 0x80, 0xbe, 0xab,
- 0xc0, 0xa2, 0xff, 0x9c, 0x36, 0x69, 0x4e, 0xf8, 0xf9, 0x58, 0xbd, 0x36, 0x8f, 0xae, 0x08, 0xe6,
- 0xa7, 0x36, 0x94, 0x8b, 0xca, 0xe6, 0x47, 0x00, 0x62, 0x64, 0xbc, 0x96, 0xb4, 0x65, 0x4d, 0x79,
- 0x61, 0xc6, 0xba, 0x54, 0x9d, 0x55, 0x41, 0xb2, 0x7f, 0x4f, 0x81, 0x92, 0xa0, 0x17, 0x1b, 0xf9,
- 0x13, 0xc8, 0x8b, 0x87, 0xcb, 0xb3, 0x24, 0x74, 0x72, 0x46, 0xea, 0x95, 0xd9, 0x94, 0xe4, 0xf6,
- 0x25, 0x46, 0xf2, 0xfd, 0xe0, 0x13, 0xed, 0x8a, 0xf5, 0x8a, 0x9e, 0xc0, 0xa2, 0xff, 0x78, 0x65,
- 0xd6, 0x2d, 0x8c, 0x05, 0x6e, 0xf5, 0x52, 0x7a, 0x2d, 0x3f, 0x2e, 0x8a, 0xb1, 0xfc, 0x2a, 0x03,
- 0x35, 0x31, 0x96, 0xd6, 0x13, 0x0f, 0x13, 0xdb, 0x18, 0x08, 0x2f, 0xeb, 0x38, 0xc2, 0x73, 0x4a,
- 0x11, 0xbf, 0x46, 0xd7, 0xe6, 0x5e, 0x70, 0xea, 0xeb, 0xf3, 0xa8, 0xfa, 0x56, 0x43, 0xdf, 0x56,
- 0x00, 0xc2, 0x15, 0x80, 0xd2, 0xd7, 0xb7, 0xb1, 0x65, 0xa6, 0x5e, 0x9b, 0x43, 0xd3, 0x1f, 0xc5,
- 0xf6, 0x16, 0xfc, 0xc7, 0x34, 0xed, 0xa8, 0xf2, 0x76, 0x51, 0x18, 0x74, 0xcb, 0xb5, 0xde, 0x5d,
- 0x8e, 0x88, 0xf4, 0xc3, 0x4b, 0x07, 0x0b, 0x3c, 0x79, 0xb8, 0xfc, 0x8f, 0x00, 0x00, 0x00, 0xff,
- 0xff, 0xdb, 0x09, 0x17, 0x4b, 0xda, 0x33, 0x00, 0x00,
-}
diff --git a/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go b/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go
index a0a51d9..cbaae18 100644
--- a/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go
+++ b/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go
@@ -3,14 +3,16 @@
package fnexecution_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-import _struct "github.com/golang/protobuf/ptypes/struct"
-
import (
- context "golang.org/x/net/context"
+ context "context"
+ fmt "fmt"
+ pipeline_v1 "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+ proto "github.com/golang/protobuf/proto"
+ _struct "github.com/golang/protobuf/ptypes/struct"
grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+ math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
@@ -22,7 +24,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
// A request to get the provision info of a SDK harness worker instance.
type GetProvisionInfoRequest struct {
@@ -35,16 +37,17 @@
func (m *GetProvisionInfoRequest) String() string { return proto.CompactTextString(m) }
func (*GetProvisionInfoRequest) ProtoMessage() {}
func (*GetProvisionInfoRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_provision_api_8f4efccfb023d8b2, []int{0}
+ return fileDescriptor_92e393e5933c7d6f, []int{0}
}
+
func (m *GetProvisionInfoRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetProvisionInfoRequest.Unmarshal(m, b)
}
func (m *GetProvisionInfoRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetProvisionInfoRequest.Marshal(b, m, deterministic)
}
-func (dst *GetProvisionInfoRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetProvisionInfoRequest.Merge(dst, src)
+func (m *GetProvisionInfoRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetProvisionInfoRequest.Merge(m, src)
}
func (m *GetProvisionInfoRequest) XXX_Size() int {
return xxx_messageInfo_GetProvisionInfoRequest.Size(m)
@@ -67,16 +70,17 @@
func (m *GetProvisionInfoResponse) String() string { return proto.CompactTextString(m) }
func (*GetProvisionInfoResponse) ProtoMessage() {}
func (*GetProvisionInfoResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_provision_api_8f4efccfb023d8b2, []int{1}
+ return fileDescriptor_92e393e5933c7d6f, []int{1}
}
+
func (m *GetProvisionInfoResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetProvisionInfoResponse.Unmarshal(m, b)
}
func (m *GetProvisionInfoResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetProvisionInfoResponse.Marshal(b, m, deterministic)
}
-func (dst *GetProvisionInfoResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetProvisionInfoResponse.Merge(dst, src)
+func (m *GetProvisionInfoResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetProvisionInfoResponse.Merge(m, src)
}
func (m *GetProvisionInfoResponse) XXX_Size() int {
return xxx_messageInfo_GetProvisionInfoResponse.Size(m)
@@ -114,26 +118,32 @@
ResourceLimits *Resources `protobuf:"bytes,4,opt,name=resource_limits,json=resourceLimits,proto3" json:"resource_limits,omitempty"`
// (required) The artifact retrieval token produced by
// ArtifactStagingService.CommitManifestResponse.
- RetrievalToken string `protobuf:"bytes,6,opt,name=retrieval_token,json=retrievalToken,proto3" json:"retrieval_token,omitempty"`
- XXX_NoUnkeyedLiteral struct{} `json:"-"`
- XXX_unrecognized []byte `json:"-"`
- XXX_sizecache int32 `json:"-"`
+ RetrievalToken string `protobuf:"bytes,6,opt,name=retrieval_token,json=retrievalToken,proto3" json:"retrieval_token,omitempty"`
+ // (optional) The endpoint that the runner is hosting for the SDK to submit
+ // status reports to during pipeline execution. This field will only be
+ // populated if the runner supports SDK status reports. For more details see
+ // https://s.apache.org/beam-fn-api-harness-status
+ StatusEndpoint *pipeline_v1.ApiServiceDescriptor `protobuf:"bytes,7,opt,name=status_endpoint,json=statusEndpoint,proto3" json:"status_endpoint,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
}
func (m *ProvisionInfo) Reset() { *m = ProvisionInfo{} }
func (m *ProvisionInfo) String() string { return proto.CompactTextString(m) }
func (*ProvisionInfo) ProtoMessage() {}
func (*ProvisionInfo) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_provision_api_8f4efccfb023d8b2, []int{2}
+ return fileDescriptor_92e393e5933c7d6f, []int{2}
}
+
func (m *ProvisionInfo) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProvisionInfo.Unmarshal(m, b)
}
func (m *ProvisionInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProvisionInfo.Marshal(b, m, deterministic)
}
-func (dst *ProvisionInfo) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProvisionInfo.Merge(dst, src)
+func (m *ProvisionInfo) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProvisionInfo.Merge(m, src)
}
func (m *ProvisionInfo) XXX_Size() int {
return xxx_messageInfo_ProvisionInfo.Size(m)
@@ -186,6 +196,13 @@
return ""
}
+func (m *ProvisionInfo) GetStatusEndpoint() *pipeline_v1.ApiServiceDescriptor {
+ if m != nil {
+ return m.StatusEndpoint
+ }
+ return nil
+}
+
// Resources specify limits for local resources, such memory and cpu. It
// is used to inform SDK harnesses of their allocated footprint.
type Resources struct {
@@ -205,16 +222,17 @@
func (m *Resources) String() string { return proto.CompactTextString(m) }
func (*Resources) ProtoMessage() {}
func (*Resources) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_provision_api_8f4efccfb023d8b2, []int{3}
+ return fileDescriptor_92e393e5933c7d6f, []int{3}
}
+
func (m *Resources) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Resources.Unmarshal(m, b)
}
func (m *Resources) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Resources.Marshal(b, m, deterministic)
}
-func (dst *Resources) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Resources.Merge(dst, src)
+func (m *Resources) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Resources.Merge(m, src)
}
func (m *Resources) XXX_Size() int {
return xxx_messageInfo_Resources.Size(m)
@@ -259,16 +277,17 @@
func (m *Resources_Memory) String() string { return proto.CompactTextString(m) }
func (*Resources_Memory) ProtoMessage() {}
func (*Resources_Memory) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_provision_api_8f4efccfb023d8b2, []int{3, 0}
+ return fileDescriptor_92e393e5933c7d6f, []int{3, 0}
}
+
func (m *Resources_Memory) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Resources_Memory.Unmarshal(m, b)
}
func (m *Resources_Memory) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Resources_Memory.Marshal(b, m, deterministic)
}
-func (dst *Resources_Memory) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Resources_Memory.Merge(dst, src)
+func (m *Resources_Memory) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Resources_Memory.Merge(m, src)
}
func (m *Resources_Memory) XXX_Size() int {
return xxx_messageInfo_Resources_Memory.Size(m)
@@ -300,16 +319,17 @@
func (m *Resources_Cpu) String() string { return proto.CompactTextString(m) }
func (*Resources_Cpu) ProtoMessage() {}
func (*Resources_Cpu) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_provision_api_8f4efccfb023d8b2, []int{3, 1}
+ return fileDescriptor_92e393e5933c7d6f, []int{3, 1}
}
+
func (m *Resources_Cpu) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Resources_Cpu.Unmarshal(m, b)
}
func (m *Resources_Cpu) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Resources_Cpu.Marshal(b, m, deterministic)
}
-func (dst *Resources_Cpu) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Resources_Cpu.Merge(dst, src)
+func (m *Resources_Cpu) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Resources_Cpu.Merge(m, src)
}
func (m *Resources_Cpu) XXX_Size() int {
return xxx_messageInfo_Resources_Cpu.Size(m)
@@ -340,16 +360,17 @@
func (m *Resources_Disk) String() string { return proto.CompactTextString(m) }
func (*Resources_Disk) ProtoMessage() {}
func (*Resources_Disk) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_provision_api_8f4efccfb023d8b2, []int{3, 2}
+ return fileDescriptor_92e393e5933c7d6f, []int{3, 2}
}
+
func (m *Resources_Disk) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Resources_Disk.Unmarshal(m, b)
}
func (m *Resources_Disk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Resources_Disk.Marshal(b, m, deterministic)
}
-func (dst *Resources_Disk) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Resources_Disk.Merge(dst, src)
+func (m *Resources_Disk) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Resources_Disk.Merge(m, src)
}
func (m *Resources_Disk) XXX_Size() int {
return xxx_messageInfo_Resources_Disk.Size(m)
@@ -377,6 +398,47 @@
proto.RegisterType((*Resources_Disk)(nil), "org.apache.beam.model.fn_execution.v1.Resources.Disk")
}
+func init() { proto.RegisterFile("beam_provision_api.proto", fileDescriptor_92e393e5933c7d6f) }
+
+var fileDescriptor_92e393e5933c7d6f = []byte{
+ // 556 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x93, 0xdb, 0x6e, 0xd3, 0x40,
+ 0x10, 0x86, 0x95, 0x43, 0xdd, 0x66, 0x80, 0x24, 0x5a, 0x01, 0x75, 0x4d, 0x91, 0x50, 0x04, 0x82,
+ 0xab, 0x2d, 0x2d, 0xa0, 0xde, 0x81, 0x9a, 0x86, 0x43, 0x24, 0xa0, 0x95, 0xcb, 0x0d, 0xdc, 0x18,
+ 0x1f, 0x26, 0xe9, 0x26, 0xf1, 0xee, 0xb2, 0xbb, 0x0e, 0x87, 0x27, 0xe0, 0x9e, 0x97, 0xe0, 0xc9,
+ 0x78, 0x0e, 0xe4, 0xb5, 0x1d, 0x5a, 0x20, 0x52, 0xda, 0x3b, 0x7b, 0x66, 0xff, 0x6f, 0xe7, 0x9f,
+ 0xd9, 0x01, 0x37, 0xc2, 0x30, 0x0d, 0xa4, 0x12, 0x73, 0xa6, 0x99, 0xe0, 0x41, 0x28, 0x19, 0x95,
+ 0x4a, 0x18, 0x41, 0xee, 0x09, 0x35, 0xa6, 0xa1, 0x0c, 0xe3, 0x53, 0xa4, 0xf9, 0x21, 0x9a, 0x8a,
+ 0x04, 0x67, 0x74, 0xc4, 0x03, 0xfc, 0x82, 0x71, 0x66, 0x98, 0xe0, 0x74, 0xbe, 0xeb, 0x75, 0x90,
+ 0x27, 0x52, 0x30, 0x6e, 0x74, 0xa1, 0xf3, 0xb6, 0xc7, 0x42, 0x8c, 0x67, 0xb8, 0x63, 0xff, 0xa2,
+ 0x6c, 0xb4, 0xa3, 0x8d, 0xca, 0x62, 0x53, 0x64, 0x7b, 0x5b, 0xb0, 0xf9, 0x12, 0xcd, 0x71, 0x75,
+ 0xdf, 0x90, 0x8f, 0x84, 0x8f, 0x9f, 0x32, 0xd4, 0xa6, 0x97, 0x80, 0xfb, 0x6f, 0x4a, 0x4b, 0xc1,
+ 0x35, 0x92, 0x57, 0xd0, 0x64, 0x7c, 0x24, 0xdc, 0xda, 0x9d, 0xda, 0x83, 0x2b, 0x7b, 0x8f, 0xe9,
+ 0x4a, 0xb5, 0xd1, 0xf3, 0x2c, 0x4b, 0xe8, 0x7d, 0x6f, 0xc0, 0xb5, 0x73, 0x71, 0x72, 0x03, 0x9c,
+ 0x89, 0x88, 0x02, 0x96, 0x58, 0x7a, 0xcb, 0x5f, 0x9b, 0x88, 0x68, 0x98, 0x90, 0x2d, 0xd8, 0xc8,
+ 0xc3, 0x3c, 0x4c, 0xd1, 0xad, 0xdb, 0xc4, 0xfa, 0x44, 0x44, 0x6f, 0xc3, 0x14, 0xc9, 0x2d, 0x68,
+ 0x7d, 0x16, 0x6a, 0x8a, 0x2a, 0x17, 0xad, 0xd9, 0xdc, 0x46, 0x11, 0x18, 0x26, 0xa4, 0x0f, 0x5d,
+ 0xc9, 0x24, 0xce, 0x18, 0xc7, 0x40, 0xc8, 0xbc, 0x14, 0xed, 0x36, 0x6c, 0xd9, 0x9b, 0xb4, 0x68,
+ 0x0d, 0xad, 0x5a, 0x43, 0x4f, 0x6c, 0x6b, 0xfc, 0x4e, 0x25, 0x38, 0x2a, 0xce, 0x93, 0xf7, 0xd0,
+ 0x51, 0xa8, 0x45, 0xa6, 0x62, 0x0c, 0x66, 0x2c, 0x65, 0x46, 0xbb, 0x4d, 0x8b, 0x78, 0xb8, 0xa2,
+ 0x73, 0xbf, 0x54, 0x6b, 0xbf, 0x5d, 0x81, 0x5e, 0x5b, 0x0e, 0xb9, 0x9f, 0xa3, 0x8d, 0x62, 0x38,
+ 0x0f, 0x67, 0x81, 0x11, 0x53, 0xe4, 0xae, 0x63, 0x1d, 0xb4, 0x17, 0xe1, 0x77, 0x79, 0x94, 0x7c,
+ 0x84, 0x8e, 0x36, 0xa1, 0xc9, 0x74, 0x50, 0x4d, 0xd8, 0x5d, 0xb7, 0x35, 0xec, 0x2f, 0xa9, 0xa1,
+ 0x32, 0x91, 0xdf, 0x7f, 0x20, 0xd9, 0x09, 0xaa, 0x39, 0x8b, 0x71, 0x80, 0x3a, 0x56, 0x4c, 0x1a,
+ 0xa1, 0xfc, 0x76, 0xc1, 0x7b, 0x5e, 0xe2, 0x7a, 0xbf, 0xea, 0xd0, 0x5a, 0x14, 0x4a, 0x8e, 0xc0,
+ 0x49, 0x31, 0x15, 0xea, 0x6b, 0x39, 0xe4, 0xfd, 0x8b, 0x5a, 0xa5, 0x6f, 0xac, 0xdc, 0x2f, 0x31,
+ 0xe4, 0x05, 0x34, 0x62, 0x99, 0xd9, 0xd9, 0xad, 0xfe, 0x64, 0xfe, 0xd0, 0x0e, 0x65, 0xe6, 0xe7,
+ 0x00, 0x32, 0x86, 0xeb, 0x1a, 0x53, 0x16, 0x48, 0x54, 0x9a, 0x69, 0x83, 0xdc, 0x04, 0x09, 0xd3,
+ 0xd3, 0x72, 0xa8, 0x4f, 0x2e, 0x0c, 0x1e, 0x30, 0x3d, 0xf5, 0x49, 0x8e, 0x3c, 0x5e, 0x10, 0xf3,
+ 0x98, 0xb7, 0x0d, 0x4e, 0x61, 0x81, 0x10, 0x68, 0x6a, 0xf6, 0x0d, 0x6d, 0x27, 0x9a, 0xbe, 0xfd,
+ 0xf6, 0x6e, 0x43, 0xe3, 0x50, 0x66, 0xe4, 0x26, 0x38, 0xfa, 0x34, 0x54, 0xa8, 0x6d, 0xb2, 0xee,
+ 0x97, 0x7f, 0x9e, 0x07, 0xcd, 0x1c, 0xf2, 0x3f, 0xe9, 0xde, 0xcf, 0x1a, 0x74, 0x17, 0x6f, 0xbe,
+ 0x9c, 0x0b, 0xf9, 0x51, 0x83, 0xee, 0xdf, 0xfb, 0x46, 0x9e, 0xae, 0xe8, 0x66, 0xc9, 0x0e, 0x7b,
+ 0xcf, 0x2e, 0xad, 0x2f, 0x16, 0xbd, 0x3f, 0x80, 0xbb, 0xcb, 0x08, 0x67, 0x01, 0xfd, 0xab, 0x0b,
+ 0xf9, 0x81, 0x64, 0x1f, 0xda, 0x67, 0xb2, 0xc1, 0x7c, 0x37, 0x72, 0xec, 0x86, 0x3d, 0xfa, 0x1d,
+ 0x00, 0x00, 0xff, 0xff, 0xb2, 0x30, 0x35, 0xd6, 0xde, 0x04, 0x00, 0x00,
+}
+
// Reference imports to suppress errors if they are not otherwise used.
var _ context.Context
var _ grpc.ClientConn
@@ -416,6 +478,14 @@
GetProvisionInfo(context.Context, *GetProvisionInfoRequest) (*GetProvisionInfoResponse, error)
}
+// UnimplementedProvisionServiceServer can be embedded to have forward compatible implementations.
+type UnimplementedProvisionServiceServer struct {
+}
+
+func (*UnimplementedProvisionServiceServer) GetProvisionInfo(ctx context.Context, req *GetProvisionInfoRequest) (*GetProvisionInfoResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method GetProvisionInfo not implemented")
+}
+
func RegisterProvisionServiceServer(s *grpc.Server, srv ProvisionServiceServer) {
s.RegisterService(&_ProvisionService_serviceDesc, srv)
}
@@ -450,43 +520,3 @@
Streams: []grpc.StreamDesc{},
Metadata: "beam_provision_api.proto",
}
-
-func init() {
- proto.RegisterFile("beam_provision_api.proto", fileDescriptor_beam_provision_api_8f4efccfb023d8b2)
-}
-
-var fileDescriptor_beam_provision_api_8f4efccfb023d8b2 = []byte{
- // 506 bytes of a gzipped FileDescriptorProto
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x93, 0xdb, 0x6e, 0xd3, 0x40,
- 0x10, 0x86, 0x95, 0x43, 0x4d, 0x33, 0x40, 0x1a, 0xad, 0x80, 0xba, 0xa6, 0x48, 0x28, 0x02, 0xc1,
- 0xd5, 0x96, 0x16, 0x10, 0x77, 0x20, 0xd2, 0x0a, 0x88, 0x04, 0xb4, 0x72, 0xb9, 0x81, 0x1b, 0xcb,
- 0x87, 0x49, 0xba, 0x49, 0xec, 0x59, 0x76, 0xd7, 0xe1, 0xf0, 0x1a, 0xbc, 0x03, 0xe2, 0xc9, 0x78,
- 0x0e, 0xb4, 0xeb, 0xc4, 0xb4, 0x40, 0xa5, 0x94, 0x3b, 0xfb, 0x9f, 0xfd, 0x3f, 0xcd, 0xfc, 0xbb,
- 0x03, 0x7e, 0x82, 0x71, 0x1e, 0x49, 0x45, 0x73, 0xa1, 0x05, 0x15, 0x51, 0x2c, 0x05, 0x97, 0x8a,
- 0x0c, 0xb1, 0xbb, 0xa4, 0xc6, 0x3c, 0x96, 0x71, 0x7a, 0x82, 0xdc, 0x1e, 0xe2, 0x39, 0x65, 0x38,
- 0xe3, 0xa3, 0x22, 0xc2, 0xcf, 0x98, 0x96, 0x46, 0x50, 0xc1, 0xe7, 0xbb, 0xc1, 0xf6, 0x98, 0x68,
- 0x3c, 0xc3, 0x1d, 0x67, 0x4a, 0xca, 0xd1, 0x8e, 0x36, 0xaa, 0x4c, 0x4d, 0x05, 0xe9, 0x6f, 0xc1,
- 0xe6, 0x4b, 0x34, 0x47, 0x4b, 0xfc, 0xb0, 0x18, 0x51, 0x88, 0x1f, 0x4b, 0xd4, 0xa6, 0x9f, 0x81,
- 0xff, 0x77, 0x49, 0x4b, 0x2a, 0x34, 0xb2, 0x57, 0xd0, 0x16, 0xc5, 0x88, 0xfc, 0xc6, 0xed, 0xc6,
- 0xfd, 0xcb, 0x7b, 0x8f, 0xf8, 0x4a, 0xad, 0xf0, 0xb3, 0x2c, 0x47, 0xe8, 0x7f, 0x6f, 0xc2, 0xd5,
- 0x33, 0x3a, 0xbb, 0x0e, 0xde, 0x84, 0x92, 0x48, 0x64, 0x8e, 0xde, 0x09, 0xd7, 0x26, 0x94, 0x0c,
- 0x33, 0xb6, 0x05, 0xeb, 0x56, 0x2e, 0xe2, 0x1c, 0xfd, 0xa6, 0x2b, 0x5c, 0x9a, 0x50, 0xf2, 0x36,
- 0xce, 0x91, 0xdd, 0x84, 0xce, 0x27, 0x52, 0x53, 0x54, 0xd6, 0xb4, 0xe6, 0x6a, 0xeb, 0x95, 0x30,
- 0xcc, 0xd8, 0x00, 0x7a, 0x52, 0x48, 0x9c, 0x89, 0x02, 0x23, 0x92, 0xb6, 0x15, 0xed, 0xb7, 0x5c,
- 0xdb, 0x9b, 0xbc, 0x8a, 0x86, 0x2f, 0xa3, 0xe1, 0xc7, 0x2e, 0x9a, 0x70, 0x63, 0x69, 0x38, 0xac,
- 0xce, 0xb3, 0xf7, 0xb0, 0xa1, 0x50, 0x53, 0xa9, 0x52, 0x8c, 0x66, 0x22, 0x17, 0x46, 0xfb, 0x6d,
- 0x87, 0x78, 0xb0, 0xe2, 0xe4, 0xe1, 0xc2, 0xad, 0xc3, 0xee, 0x12, 0xf4, 0xda, 0x71, 0xd8, 0x3d,
- 0x8b, 0x36, 0x4a, 0xe0, 0x3c, 0x9e, 0x45, 0x86, 0xa6, 0x58, 0xf8, 0x9e, 0x9b, 0xa0, 0x5b, 0xcb,
- 0xef, 0xac, 0xda, 0xff, 0xd9, 0x84, 0x4e, 0x8d, 0x61, 0x87, 0xe0, 0xe5, 0x98, 0x93, 0xfa, 0xb2,
- 0xb8, 0x82, 0x27, 0x17, 0x6d, 0x84, 0xbf, 0x71, 0xf6, 0x70, 0x81, 0x61, 0x2f, 0xa0, 0x95, 0xca,
- 0xd2, 0x25, 0xbb, 0xfa, 0x85, 0xfe, 0xa6, 0xed, 0xcb, 0x32, 0xb4, 0x00, 0x36, 0x86, 0x6b, 0x1a,
- 0x73, 0x11, 0x49, 0x54, 0x5a, 0x68, 0x83, 0x85, 0x89, 0x32, 0xa1, 0xa7, 0x8b, 0xc8, 0x1f, 0x5f,
- 0x18, 0x7c, 0x20, 0xf4, 0x34, 0x64, 0x16, 0x79, 0x54, 0x13, 0xad, 0x16, 0x6c, 0x83, 0x57, 0x8d,
- 0xc0, 0x18, 0xb4, 0xb5, 0xf8, 0x8a, 0x2e, 0x89, 0x76, 0xe8, 0xbe, 0x83, 0x5b, 0xd0, 0xda, 0x97,
- 0x25, 0xbb, 0x01, 0x9e, 0x3e, 0x89, 0x15, 0x6a, 0x57, 0x6c, 0x86, 0x8b, 0xbf, 0x20, 0x80, 0xb6,
- 0x85, 0xfc, 0xcb, 0xba, 0xf7, 0xa3, 0x01, 0xbd, 0xfa, 0x45, 0x1e, 0xa3, 0x9a, 0x8b, 0x14, 0xd9,
- 0xb7, 0x06, 0xf4, 0xfe, 0xdc, 0x06, 0xf6, 0x74, 0xc5, 0x69, 0xce, 0xd9, 0xb0, 0xe0, 0xd9, 0x7f,
- 0xfb, 0xab, 0x35, 0x1c, 0x1c, 0xc0, 0x9d, 0xf3, 0x08, 0xa7, 0x01, 0x83, 0x2b, 0xb5, 0xfd, 0xb9,
- 0x14, 0x1f, 0xba, 0xa7, 0xaa, 0xd1, 0x7c, 0x37, 0xf1, 0xdc, 0xfb, 0x7f, 0xf8, 0x2b, 0x00, 0x00,
- 0xff, 0xff, 0xb1, 0xb0, 0xe9, 0xeb, 0x6b, 0x04, 0x00, 0x00,
-}
diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go
index d2c8261..3c0c138 100644
--- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go
+++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go
@@ -3,13 +3,15 @@
package jobmanagement_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-
import (
- context "golang.org/x/net/context"
+ context "context"
+ fmt "fmt"
+ _ "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+ proto "github.com/golang/protobuf/proto"
grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+ math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
@@ -21,7 +23,30 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
+
+type CommitManifestResponse_Constants int32
+
+const (
+ // Token indicating that no artifacts were staged and therefore no retrieval attempt is necessary.
+ CommitManifestResponse_NO_ARTIFACTS_STAGED_TOKEN CommitManifestResponse_Constants = 0
+)
+
+var CommitManifestResponse_Constants_name = map[int32]string{
+ 0: "NO_ARTIFACTS_STAGED_TOKEN",
+}
+
+var CommitManifestResponse_Constants_value = map[string]int32{
+ "NO_ARTIFACTS_STAGED_TOKEN": 0,
+}
+
+func (x CommitManifestResponse_Constants) String() string {
+ return proto.EnumName(CommitManifestResponse_Constants_name, int32(x))
+}
+
+func (CommitManifestResponse_Constants) EnumDescriptor() ([]byte, []int) {
+ return fileDescriptor_8ef4db42c81e3972, []int{11, 0}
+}
// An artifact identifier and associated metadata.
type ArtifactMetadata struct {
@@ -41,16 +66,17 @@
func (m *ArtifactMetadata) String() string { return proto.CompactTextString(m) }
func (*ArtifactMetadata) ProtoMessage() {}
func (*ArtifactMetadata) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{0}
+ return fileDescriptor_8ef4db42c81e3972, []int{0}
}
+
func (m *ArtifactMetadata) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ArtifactMetadata.Unmarshal(m, b)
}
func (m *ArtifactMetadata) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ArtifactMetadata.Marshal(b, m, deterministic)
}
-func (dst *ArtifactMetadata) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ArtifactMetadata.Merge(dst, src)
+func (m *ArtifactMetadata) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ArtifactMetadata.Merge(m, src)
}
func (m *ArtifactMetadata) XXX_Size() int {
return xxx_messageInfo_ArtifactMetadata.Size(m)
@@ -94,16 +120,17 @@
func (m *Manifest) String() string { return proto.CompactTextString(m) }
func (*Manifest) ProtoMessage() {}
func (*Manifest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{1}
+ return fileDescriptor_8ef4db42c81e3972, []int{1}
}
+
func (m *Manifest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Manifest.Unmarshal(m, b)
}
func (m *Manifest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Manifest.Marshal(b, m, deterministic)
}
-func (dst *Manifest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Manifest.Merge(dst, src)
+func (m *Manifest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Manifest.Merge(m, src)
}
func (m *Manifest) XXX_Size() int {
return xxx_messageInfo_Manifest.Size(m)
@@ -134,16 +161,17 @@
func (m *ProxyManifest) String() string { return proto.CompactTextString(m) }
func (*ProxyManifest) ProtoMessage() {}
func (*ProxyManifest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{2}
+ return fileDescriptor_8ef4db42c81e3972, []int{2}
}
+
func (m *ProxyManifest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProxyManifest.Unmarshal(m, b)
}
func (m *ProxyManifest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProxyManifest.Marshal(b, m, deterministic)
}
-func (dst *ProxyManifest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProxyManifest.Merge(dst, src)
+func (m *ProxyManifest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProxyManifest.Merge(m, src)
}
func (m *ProxyManifest) XXX_Size() int {
return xxx_messageInfo_ProxyManifest.Size(m)
@@ -180,16 +208,17 @@
func (m *ProxyManifest_Location) String() string { return proto.CompactTextString(m) }
func (*ProxyManifest_Location) ProtoMessage() {}
func (*ProxyManifest_Location) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{2, 0}
+ return fileDescriptor_8ef4db42c81e3972, []int{2, 0}
}
+
func (m *ProxyManifest_Location) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProxyManifest_Location.Unmarshal(m, b)
}
func (m *ProxyManifest_Location) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProxyManifest_Location.Marshal(b, m, deterministic)
}
-func (dst *ProxyManifest_Location) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProxyManifest_Location.Merge(dst, src)
+func (m *ProxyManifest_Location) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProxyManifest_Location.Merge(m, src)
}
func (m *ProxyManifest_Location) XXX_Size() int {
return xxx_messageInfo_ProxyManifest_Location.Size(m)
@@ -228,16 +257,17 @@
func (m *GetManifestRequest) String() string { return proto.CompactTextString(m) }
func (*GetManifestRequest) ProtoMessage() {}
func (*GetManifestRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{3}
+ return fileDescriptor_8ef4db42c81e3972, []int{3}
}
+
func (m *GetManifestRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetManifestRequest.Unmarshal(m, b)
}
func (m *GetManifestRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetManifestRequest.Marshal(b, m, deterministic)
}
-func (dst *GetManifestRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetManifestRequest.Merge(dst, src)
+func (m *GetManifestRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetManifestRequest.Merge(m, src)
}
func (m *GetManifestRequest) XXX_Size() int {
return xxx_messageInfo_GetManifestRequest.Size(m)
@@ -267,16 +297,17 @@
func (m *GetManifestResponse) String() string { return proto.CompactTextString(m) }
func (*GetManifestResponse) ProtoMessage() {}
func (*GetManifestResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{4}
+ return fileDescriptor_8ef4db42c81e3972, []int{4}
}
+
func (m *GetManifestResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetManifestResponse.Unmarshal(m, b)
}
func (m *GetManifestResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetManifestResponse.Marshal(b, m, deterministic)
}
-func (dst *GetManifestResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetManifestResponse.Merge(dst, src)
+func (m *GetManifestResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetManifestResponse.Merge(m, src)
}
func (m *GetManifestResponse) XXX_Size() int {
return xxx_messageInfo_GetManifestResponse.Size(m)
@@ -310,16 +341,17 @@
func (m *GetArtifactRequest) String() string { return proto.CompactTextString(m) }
func (*GetArtifactRequest) ProtoMessage() {}
func (*GetArtifactRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{5}
+ return fileDescriptor_8ef4db42c81e3972, []int{5}
}
+
func (m *GetArtifactRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetArtifactRequest.Unmarshal(m, b)
}
func (m *GetArtifactRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetArtifactRequest.Marshal(b, m, deterministic)
}
-func (dst *GetArtifactRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetArtifactRequest.Merge(dst, src)
+func (m *GetArtifactRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetArtifactRequest.Merge(m, src)
}
func (m *GetArtifactRequest) XXX_Size() int {
return xxx_messageInfo_GetArtifactRequest.Size(m)
@@ -356,16 +388,17 @@
func (m *ArtifactChunk) String() string { return proto.CompactTextString(m) }
func (*ArtifactChunk) ProtoMessage() {}
func (*ArtifactChunk) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{6}
+ return fileDescriptor_8ef4db42c81e3972, []int{6}
}
+
func (m *ArtifactChunk) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ArtifactChunk.Unmarshal(m, b)
}
func (m *ArtifactChunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ArtifactChunk.Marshal(b, m, deterministic)
}
-func (dst *ArtifactChunk) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ArtifactChunk.Merge(dst, src)
+func (m *ArtifactChunk) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ArtifactChunk.Merge(m, src)
}
func (m *ArtifactChunk) XXX_Size() int {
return xxx_messageInfo_ArtifactChunk.Size(m)
@@ -398,16 +431,17 @@
func (m *PutArtifactMetadata) String() string { return proto.CompactTextString(m) }
func (*PutArtifactMetadata) ProtoMessage() {}
func (*PutArtifactMetadata) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{7}
+ return fileDescriptor_8ef4db42c81e3972, []int{7}
}
+
func (m *PutArtifactMetadata) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PutArtifactMetadata.Unmarshal(m, b)
}
func (m *PutArtifactMetadata) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PutArtifactMetadata.Marshal(b, m, deterministic)
}
-func (dst *PutArtifactMetadata) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PutArtifactMetadata.Merge(dst, src)
+func (m *PutArtifactMetadata) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PutArtifactMetadata.Merge(m, src)
}
func (m *PutArtifactMetadata) XXX_Size() int {
return xxx_messageInfo_PutArtifactMetadata.Size(m)
@@ -449,16 +483,17 @@
func (m *PutArtifactRequest) String() string { return proto.CompactTextString(m) }
func (*PutArtifactRequest) ProtoMessage() {}
func (*PutArtifactRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{8}
+ return fileDescriptor_8ef4db42c81e3972, []int{8}
}
+
func (m *PutArtifactRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PutArtifactRequest.Unmarshal(m, b)
}
func (m *PutArtifactRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PutArtifactRequest.Marshal(b, m, deterministic)
}
-func (dst *PutArtifactRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PutArtifactRequest.Merge(dst, src)
+func (m *PutArtifactRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PutArtifactRequest.Merge(m, src)
}
func (m *PutArtifactRequest) XXX_Size() int {
return xxx_messageInfo_PutArtifactRequest.Size(m)
@@ -476,12 +511,14 @@
type PutArtifactRequest_Metadata struct {
Metadata *PutArtifactMetadata `protobuf:"bytes,1,opt,name=metadata,proto3,oneof"`
}
+
type PutArtifactRequest_Data struct {
Data *ArtifactChunk `protobuf:"bytes,2,opt,name=data,proto3,oneof"`
}
func (*PutArtifactRequest_Metadata) isPutArtifactRequest_Content() {}
-func (*PutArtifactRequest_Data) isPutArtifactRequest_Content() {}
+
+func (*PutArtifactRequest_Data) isPutArtifactRequest_Content() {}
func (m *PutArtifactRequest) GetContent() isPutArtifactRequest_Content {
if m != nil {
@@ -504,80 +541,14 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*PutArtifactRequest) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _PutArtifactRequest_OneofMarshaler, _PutArtifactRequest_OneofUnmarshaler, _PutArtifactRequest_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*PutArtifactRequest) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*PutArtifactRequest_Metadata)(nil),
(*PutArtifactRequest_Data)(nil),
}
}
-func _PutArtifactRequest_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*PutArtifactRequest)
- // content
- switch x := m.Content.(type) {
- case *PutArtifactRequest_Metadata:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Metadata); err != nil {
- return err
- }
- case *PutArtifactRequest_Data:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Data); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("PutArtifactRequest.Content has unexpected type %T", x)
- }
- return nil
-}
-
-func _PutArtifactRequest_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*PutArtifactRequest)
- switch tag {
- case 1: // content.metadata
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(PutArtifactMetadata)
- err := b.DecodeMessage(msg)
- m.Content = &PutArtifactRequest_Metadata{msg}
- return true, err
- case 2: // content.data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ArtifactChunk)
- err := b.DecodeMessage(msg)
- m.Content = &PutArtifactRequest_Data{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _PutArtifactRequest_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*PutArtifactRequest)
- // content
- switch x := m.Content.(type) {
- case *PutArtifactRequest_Metadata:
- s := proto.Size(x.Metadata)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *PutArtifactRequest_Data:
- s := proto.Size(x.Data)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type PutArtifactResponse struct {
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
@@ -588,16 +559,17 @@
func (m *PutArtifactResponse) String() string { return proto.CompactTextString(m) }
func (*PutArtifactResponse) ProtoMessage() {}
func (*PutArtifactResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{9}
+ return fileDescriptor_8ef4db42c81e3972, []int{9}
}
+
func (m *PutArtifactResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PutArtifactResponse.Unmarshal(m, b)
}
func (m *PutArtifactResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PutArtifactResponse.Marshal(b, m, deterministic)
}
-func (dst *PutArtifactResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PutArtifactResponse.Merge(dst, src)
+func (m *PutArtifactResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PutArtifactResponse.Merge(m, src)
}
func (m *PutArtifactResponse) XXX_Size() int {
return xxx_messageInfo_PutArtifactResponse.Size(m)
@@ -625,16 +597,17 @@
func (m *CommitManifestRequest) String() string { return proto.CompactTextString(m) }
func (*CommitManifestRequest) ProtoMessage() {}
func (*CommitManifestRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{10}
+ return fileDescriptor_8ef4db42c81e3972, []int{10}
}
+
func (m *CommitManifestRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CommitManifestRequest.Unmarshal(m, b)
}
func (m *CommitManifestRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CommitManifestRequest.Marshal(b, m, deterministic)
}
-func (dst *CommitManifestRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_CommitManifestRequest.Merge(dst, src)
+func (m *CommitManifestRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CommitManifestRequest.Merge(m, src)
}
func (m *CommitManifestRequest) XXX_Size() int {
return xxx_messageInfo_CommitManifestRequest.Size(m)
@@ -674,16 +647,17 @@
func (m *CommitManifestResponse) String() string { return proto.CompactTextString(m) }
func (*CommitManifestResponse) ProtoMessage() {}
func (*CommitManifestResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_artifact_api_09b5b695a8be46db, []int{11}
+ return fileDescriptor_8ef4db42c81e3972, []int{11}
}
+
func (m *CommitManifestResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CommitManifestResponse.Unmarshal(m, b)
}
func (m *CommitManifestResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CommitManifestResponse.Marshal(b, m, deterministic)
}
-func (dst *CommitManifestResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_CommitManifestResponse.Merge(dst, src)
+func (m *CommitManifestResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CommitManifestResponse.Merge(m, src)
}
func (m *CommitManifestResponse) XXX_Size() int {
return xxx_messageInfo_CommitManifestResponse.Size(m)
@@ -702,6 +676,7 @@
}
func init() {
+ proto.RegisterEnum("org.apache.beam.model.job_management.v1.CommitManifestResponse_Constants", CommitManifestResponse_Constants_name, CommitManifestResponse_Constants_value)
proto.RegisterType((*ArtifactMetadata)(nil), "org.apache.beam.model.job_management.v1.ArtifactMetadata")
proto.RegisterType((*Manifest)(nil), "org.apache.beam.model.job_management.v1.Manifest")
proto.RegisterType((*ProxyManifest)(nil), "org.apache.beam.model.job_management.v1.ProxyManifest")
@@ -717,6 +692,56 @@
proto.RegisterType((*CommitManifestResponse)(nil), "org.apache.beam.model.job_management.v1.CommitManifestResponse")
}
+func init() { proto.RegisterFile("beam_artifact_api.proto", fileDescriptor_8ef4db42c81e3972) }
+
+var fileDescriptor_8ef4db42c81e3972 = []byte{
+ // 690 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x56, 0xcf, 0x4f, 0x13, 0x41,
+ 0x18, 0x65, 0x0a, 0x62, 0xfb, 0x55, 0xb0, 0x19, 0x52, 0xa8, 0x4d, 0x4c, 0x9a, 0x35, 0x91, 0x9e,
+ 0x36, 0x50, 0x23, 0x89, 0x11, 0x25, 0xa5, 0x22, 0x10, 0xf9, 0xe5, 0xb6, 0x5e, 0xf0, 0x30, 0x0e,
+ 0xed, 0x50, 0x56, 0xd8, 0x99, 0xba, 0x3b, 0x6d, 0xf4, 0xee, 0xc1, 0x18, 0x2f, 0x5e, 0x3d, 0x79,
+ 0x36, 0x1e, 0xbd, 0x78, 0xf5, 0xe6, 0x1f, 0xe3, 0x3f, 0xe0, 0xc9, 0xec, 0xec, 0xce, 0xd2, 0xa5,
+ 0x25, 0xd9, 0x56, 0x6e, 0xd3, 0x99, 0xbe, 0x37, 0xef, 0x7b, 0xdf, 0x9b, 0x2f, 0x0b, 0x0b, 0x47,
+ 0x8c, 0x3a, 0x84, 0xba, 0xd2, 0x3e, 0xa6, 0x4d, 0x49, 0x68, 0xc7, 0x36, 0x3b, 0xae, 0x90, 0x02,
+ 0x2f, 0x0a, 0xb7, 0x6d, 0xd2, 0x0e, 0x6d, 0x9e, 0x30, 0xd3, 0xff, 0x8f, 0xe9, 0x88, 0x16, 0x3b,
+ 0x33, 0x5f, 0x8b, 0x23, 0xe2, 0x50, 0x4e, 0xdb, 0xcc, 0x61, 0x5c, 0x9a, 0xbd, 0xe5, 0x62, 0x5e,
+ 0x31, 0xb8, 0x5d, 0xce, 0x99, 0x7b, 0x8e, 0x37, 0x5e, 0x41, 0xae, 0x1a, 0xb2, 0xee, 0x32, 0x49,
+ 0x5b, 0x54, 0x52, 0x8c, 0x61, 0x8a, 0x53, 0x87, 0x15, 0x50, 0x09, 0x95, 0x33, 0x96, 0x5a, 0xe3,
+ 0x12, 0x64, 0x3b, 0xcc, 0x75, 0x6c, 0xcf, 0xb3, 0x05, 0xf7, 0x0a, 0xa9, 0x12, 0x2a, 0xcf, 0x58,
+ 0xfd, 0x5b, 0x78, 0x1e, 0xa6, 0xbd, 0x13, 0x5a, 0xb9, 0xbf, 0x52, 0x98, 0x52, 0xb8, 0xf0, 0x97,
+ 0x41, 0x21, 0xbd, 0x4b, 0xb9, 0x7d, 0xcc, 0x3c, 0x89, 0x5f, 0x40, 0x5a, 0xd7, 0x50, 0x40, 0xa5,
+ 0xc9, 0x72, 0xb6, 0xf2, 0xc0, 0x4c, 0x58, 0x80, 0x79, 0x51, 0xa6, 0x15, 0x51, 0x19, 0x7f, 0x10,
+ 0xcc, 0x1c, 0xb8, 0xe2, 0xed, 0xbb, 0xe8, 0xa2, 0x5d, 0x48, 0x3b, 0xe1, 0x5a, 0x95, 0x91, 0xad,
+ 0x2c, 0x27, 0xbe, 0x48, 0x93, 0x58, 0x11, 0x05, 0x7e, 0x09, 0xe9, 0x33, 0xd1, 0xa4, 0xd2, 0x16,
+ 0xbc, 0x90, 0x52, 0xba, 0xd7, 0x12, 0xd3, 0xc5, 0x84, 0x99, 0x3b, 0x21, 0x8d, 0x15, 0x11, 0x16,
+ 0x97, 0x20, 0xad, 0x77, 0x87, 0x5a, 0x9f, 0x83, 0xc9, 0xae, 0x6b, 0x2b, 0xcb, 0x33, 0x96, 0xbf,
+ 0x34, 0x1e, 0x01, 0xde, 0x64, 0x32, 0xd2, 0xc9, 0xde, 0x74, 0x7d, 0x91, 0x8b, 0x70, 0xd3, 0x65,
+ 0xd2, 0xb5, 0x59, 0x8f, 0x9e, 0x11, 0x29, 0x4e, 0x19, 0x0f, 0x69, 0x66, 0xa3, 0xed, 0x86, 0xbf,
+ 0x6b, 0xb4, 0x60, 0x2e, 0x06, 0xf7, 0x3a, 0x82, 0x7b, 0xec, 0x8a, 0x3d, 0x33, 0x9e, 0x2b, 0x91,
+ 0xba, 0x6b, 0x5a, 0xe4, 0xb0, 0x02, 0x87, 0x08, 0x4f, 0x0d, 0x15, 0x7e, 0x07, 0x66, 0x34, 0x5f,
+ 0xed, 0xa4, 0xcb, 0x4f, 0x7d, 0x36, 0x3f, 0x0a, 0x8a, 0xed, 0x86, 0xa5, 0xd6, 0xc6, 0x57, 0x04,
+ 0x73, 0x07, 0x5d, 0x39, 0x90, 0xea, 0x0a, 0xe4, 0x3d, 0x49, 0xdb, 0x36, 0x6f, 0x13, 0x8f, 0xa9,
+ 0xcc, 0xc6, 0x4c, 0x9a, 0x0b, 0x0f, 0xeb, 0xc1, 0x99, 0xba, 0xd0, 0xcf, 0xab, 0x13, 0xe2, 0x95,
+ 0xa4, 0xff, 0xcb, 0xab, 0xa6, 0x32, 0x7e, 0x23, 0xc0, 0x7d, 0x12, 0xb5, 0x37, 0x87, 0x7d, 0xb7,
+ 0x05, 0x0d, 0x58, 0x4d, 0x9e, 0xb2, 0xc1, 0x8a, 0xb7, 0x26, 0xce, 0xaf, 0xc4, 0x3b, 0xa1, 0x53,
+ 0x41, 0x15, 0x2b, 0x23, 0x57, 0xa1, 0xfc, 0xde, 0x9a, 0x08, 0x3c, 0x5e, 0xcf, 0xc0, 0xf5, 0xa6,
+ 0xe0, 0x92, 0x71, 0x69, 0xe4, 0x63, 0x6e, 0xeb, 0x30, 0x19, 0x5f, 0x10, 0xe4, 0x6b, 0xc2, 0x71,
+ 0xec, 0x81, 0x98, 0x5e, 0xf1, 0xd3, 0xbc, 0xb4, 0xad, 0xa9, 0x4b, 0xdb, 0x6a, 0x7c, 0x42, 0x30,
+ 0x7f, 0x51, 0x5c, 0xf8, 0x08, 0x12, 0x3f, 0xa2, 0x6d, 0xc8, 0xd4, 0x04, 0xf7, 0x24, 0xe5, 0xd2,
+ 0xc3, 0xab, 0x70, 0x6b, 0x6f, 0x9f, 0x54, 0xad, 0xc6, 0xf6, 0xd3, 0x6a, 0xad, 0x51, 0x27, 0xf5,
+ 0x46, 0x75, 0x73, 0xe3, 0x09, 0x69, 0xec, 0x3f, 0xdb, 0xd8, 0xcb, 0x4d, 0x14, 0x6f, 0x7f, 0xfb,
+ 0xf1, 0xf7, 0xd7, 0xb5, 0x05, 0x42, 0xb8, 0x88, 0x66, 0xb8, 0x47, 0x7c, 0x65, 0xac, 0x45, 0x48,
+ 0xe5, 0x67, 0x0a, 0xe6, 0xb5, 0x81, 0x75, 0x2d, 0xd7, 0xed, 0xd9, 0x4d, 0x86, 0x3f, 0x22, 0xc8,
+ 0xf6, 0xd9, 0x8b, 0x1f, 0x8e, 0x13, 0x88, 0xd0, 0xf9, 0xe2, 0xea, 0x78, 0xe0, 0xc0, 0x99, 0x32,
+ 0xc2, 0x9f, 0x11, 0xcc, 0xc6, 0x6d, 0xc3, 0x8f, 0x13, 0x53, 0x0e, 0x0d, 0x43, 0x71, 0x6d, 0x6c,
+ 0x7c, 0xa0, 0xaa, 0xf2, 0x3d, 0x05, 0x85, 0x73, 0xa9, 0x61, 0x87, 0xb4, 0x7b, 0x1f, 0x10, 0x64,
+ 0xfb, 0x26, 0xdd, 0x08, 0xee, 0x0d, 0x8e, 0xd7, 0x11, 0xdc, 0x1b, 0x36, 0x5c, 0xdf, 0x07, 0x52,
+ 0xc6, 0x68, 0xe4, 0xe0, 0x10, 0x2d, 0x8e, 0xf9, 0x7c, 0x97, 0xd0, 0xfa, 0x26, 0xdc, 0xbd, 0x14,
+ 0x1a, 0x43, 0xae, 0x67, 0x35, 0xb4, 0xda, 0xb1, 0x0f, 0x73, 0xb1, 0x63, 0xd2, 0x5b, 0x3e, 0x9a,
+ 0x56, 0x9f, 0x0f, 0xf7, 0xfe, 0x05, 0x00, 0x00, 0xff, 0xff, 0x11, 0x85, 0xb1, 0xe8, 0x99, 0x08,
+ 0x00, 0x00,
+}
+
// Reference imports to suppress errors if they are not otherwise used.
var _ context.Context
var _ grpc.ClientConn
@@ -804,6 +829,17 @@
CommitManifest(context.Context, *CommitManifestRequest) (*CommitManifestResponse, error)
}
+// UnimplementedArtifactStagingServiceServer can be embedded to have forward compatible implementations.
+type UnimplementedArtifactStagingServiceServer struct {
+}
+
+func (*UnimplementedArtifactStagingServiceServer) PutArtifact(srv ArtifactStagingService_PutArtifactServer) error {
+ return status.Errorf(codes.Unimplemented, "method PutArtifact not implemented")
+}
+func (*UnimplementedArtifactStagingServiceServer) CommitManifest(ctx context.Context, req *CommitManifestRequest) (*CommitManifestResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method CommitManifest not implemented")
+}
+
func RegisterArtifactStagingServiceServer(s *grpc.Server, srv ArtifactStagingServiceServer) {
s.RegisterService(&_ArtifactStagingService_serviceDesc, srv)
}
@@ -938,6 +974,17 @@
GetArtifact(*GetArtifactRequest, ArtifactRetrievalService_GetArtifactServer) error
}
+// UnimplementedArtifactRetrievalServiceServer can be embedded to have forward compatible implementations.
+type UnimplementedArtifactRetrievalServiceServer struct {
+}
+
+func (*UnimplementedArtifactRetrievalServiceServer) GetManifest(ctx context.Context, req *GetManifestRequest) (*GetManifestResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method GetManifest not implemented")
+}
+func (*UnimplementedArtifactRetrievalServiceServer) GetArtifact(req *GetArtifactRequest, srv ArtifactRetrievalService_GetArtifactServer) error {
+ return status.Errorf(codes.Unimplemented, "method GetArtifact not implemented")
+}
+
func RegisterArtifactRetrievalServiceServer(s *grpc.Server, srv ArtifactRetrievalServiceServer) {
s.RegisterService(&_ArtifactRetrievalService_serviceDesc, srv)
}
@@ -999,50 +1046,3 @@
},
Metadata: "beam_artifact_api.proto",
}
-
-func init() {
- proto.RegisterFile("beam_artifact_api.proto", fileDescriptor_beam_artifact_api_09b5b695a8be46db)
-}
-
-var fileDescriptor_beam_artifact_api_09b5b695a8be46db = []byte{
- // 618 bytes of a gzipped FileDescriptorProto
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x96, 0xc1, 0x6e, 0xd3, 0x4c,
- 0x10, 0xc7, 0xbb, 0x6e, 0xd5, 0x2f, 0x1d, 0x7f, 0x2d, 0xd5, 0x56, 0x2d, 0x56, 0x4e, 0x91, 0x91,
- 0x68, 0x4e, 0x56, 0x6b, 0x44, 0x25, 0x44, 0xa1, 0x6a, 0x7a, 0x68, 0x0f, 0x8d, 0x54, 0x5c, 0xb8,
- 0x94, 0x83, 0xd9, 0x24, 0xdb, 0x64, 0x69, 0xbc, 0x6b, 0xec, 0x4d, 0x04, 0x77, 0x0e, 0x88, 0x1b,
- 0x57, 0x4e, 0x3c, 0x00, 0x2f, 0xc0, 0x23, 0xf0, 0x30, 0xbc, 0x03, 0xf2, 0xda, 0xeb, 0xc6, 0x8d,
- 0x23, 0x39, 0xa1, 0xb7, 0xcd, 0x6e, 0xe6, 0x3f, 0xbf, 0xf9, 0xcf, 0xec, 0xca, 0xf0, 0xb0, 0x43,
- 0x49, 0xe0, 0x93, 0x48, 0xb2, 0x6b, 0xd2, 0x95, 0x3e, 0x09, 0x99, 0x13, 0x46, 0x42, 0x0a, 0xbc,
- 0x2b, 0xa2, 0xbe, 0x43, 0x42, 0xd2, 0x1d, 0x50, 0x27, 0xf9, 0x8f, 0x13, 0x88, 0x1e, 0x1d, 0x3a,
- 0xef, 0x45, 0xc7, 0x0f, 0x08, 0x27, 0x7d, 0x1a, 0x50, 0x2e, 0x9d, 0xf1, 0xbe, 0xfd, 0x0e, 0x36,
- 0x8f, 0xb3, 0xf0, 0x36, 0x95, 0xa4, 0x47, 0x24, 0xc1, 0x18, 0x56, 0x38, 0x09, 0xa8, 0x85, 0x1a,
- 0xa8, 0xb9, 0xe6, 0xa9, 0x35, 0x6e, 0x80, 0x19, 0xd2, 0x28, 0x60, 0x71, 0xcc, 0x04, 0x8f, 0x2d,
- 0xa3, 0x81, 0x9a, 0xeb, 0xde, 0xe4, 0x16, 0xde, 0x81, 0xd5, 0x78, 0x40, 0xdc, 0xa7, 0x07, 0xd6,
- 0x8a, 0x8a, 0xcb, 0x7e, 0xd9, 0x04, 0x6a, 0x6d, 0xc2, 0xd9, 0x35, 0x8d, 0x25, 0x7e, 0x03, 0x35,
- 0x0d, 0x6b, 0xa1, 0xc6, 0x72, 0xd3, 0x74, 0x9f, 0x39, 0x15, 0x49, 0x9d, 0xbb, 0x98, 0x5e, 0x2e,
- 0x65, 0xff, 0x41, 0xb0, 0x7e, 0x11, 0x89, 0x8f, 0x9f, 0xf2, 0x44, 0x6d, 0xa8, 0x05, 0xd9, 0x5a,
- 0x95, 0x61, 0xba, 0xfb, 0x95, 0x13, 0x69, 0x11, 0x2f, 0x97, 0xc0, 0x6f, 0xa1, 0x36, 0x14, 0x5d,
- 0x22, 0x99, 0xe0, 0x96, 0xa1, 0xb8, 0x8f, 0x2a, 0xcb, 0x15, 0xc0, 0x9c, 0xf3, 0x4c, 0xc6, 0xcb,
- 0x05, 0xeb, 0x7b, 0x50, 0xd3, 0xbb, 0xa5, 0xd6, 0x6f, 0xc2, 0xf2, 0x28, 0x62, 0xca, 0xf2, 0x35,
- 0x2f, 0x59, 0xda, 0x2f, 0x00, 0x9f, 0x52, 0x99, 0x73, 0xd2, 0x0f, 0xa3, 0x04, 0x72, 0x17, 0x1e,
- 0x44, 0x54, 0x46, 0x8c, 0x8e, 0xc9, 0xd0, 0x97, 0xe2, 0x86, 0xf2, 0x4c, 0x66, 0x23, 0xdf, 0x7e,
- 0x9d, 0xec, 0xda, 0x3d, 0xd8, 0x2a, 0x84, 0xc7, 0xa1, 0xe0, 0x31, 0xbd, 0x67, 0xcf, 0xec, 0x57,
- 0x0a, 0x52, 0x77, 0x4d, 0x43, 0x96, 0x15, 0x58, 0x02, 0x6e, 0x94, 0x82, 0x3f, 0x82, 0x75, 0xad,
- 0x77, 0x32, 0x18, 0xf1, 0x9b, 0x44, 0x2d, 0x19, 0x05, 0xa5, 0xf6, 0xbf, 0xa7, 0xd6, 0xf6, 0x0f,
- 0x04, 0x5b, 0x17, 0x23, 0x39, 0x35, 0xd5, 0x2e, 0x6c, 0xc7, 0x92, 0xf4, 0x19, 0xef, 0xfb, 0x31,
- 0x55, 0x33, 0x5b, 0x30, 0x69, 0x2b, 0x3b, 0xbc, 0x4c, 0xcf, 0x54, 0xc2, 0x64, 0x5e, 0x83, 0x2c,
- 0x5e, 0x21, 0xfd, 0xdb, 0xbc, 0x6a, 0x29, 0xfb, 0x37, 0x02, 0x3c, 0x81, 0xa8, 0xbd, 0xb9, 0x9a,
- 0xc8, 0x96, 0x36, 0xe0, 0xb0, 0xfa, 0x94, 0x4d, 0x57, 0x7c, 0xb6, 0x74, 0x9b, 0x12, 0x9f, 0x67,
- 0x4e, 0xa5, 0x55, 0x1c, 0xcc, 0x5d, 0x85, 0xf2, 0xfb, 0x6c, 0x29, 0xf5, 0xb8, 0xb5, 0x06, 0xff,
- 0x75, 0x05, 0x97, 0x94, 0x4b, 0x7b, 0xbb, 0xe0, 0xb6, 0x1e, 0x26, 0xfb, 0x3b, 0x82, 0xed, 0x13,
- 0x11, 0x04, 0x6c, 0x6a, 0x4c, 0xef, 0xf9, 0x6a, 0xce, 0x6c, 0xab, 0x31, 0xb3, 0xad, 0xf6, 0x31,
- 0xec, 0xdc, 0x65, 0xcb, 0xee, 0x40, 0xd5, 0x3b, 0xe4, 0xfe, 0x32, 0x60, 0x47, 0x17, 0x7d, 0xa9,
- 0x53, 0x44, 0x63, 0xd6, 0xa5, 0xf8, 0x2b, 0x02, 0x73, 0xc2, 0x12, 0xfc, 0x7c, 0x91, 0x26, 0x66,
- 0x6e, 0xd5, 0x0f, 0x17, 0x0b, 0x4e, 0xcb, 0x69, 0x22, 0xfc, 0x0d, 0xc1, 0x46, 0xb1, 0x56, 0xfc,
- 0xb2, 0xb2, 0x64, 0x69, 0x03, 0xeb, 0x47, 0x0b, 0xc7, 0xa7, 0x54, 0xee, 0x4f, 0x03, 0xac, 0x5b,
- 0xd4, 0xcc, 0x56, 0xed, 0xde, 0x17, 0x04, 0xe6, 0xc4, 0xeb, 0x34, 0x87, 0x7b, 0xd3, 0x4f, 0xe2,
- 0x1c, 0xee, 0x95, 0x3d, 0x88, 0x9f, 0x53, 0x94, 0x05, 0x1a, 0x39, 0xfd, 0xf0, 0xd5, 0x17, 0xbc,
- 0x72, 0x7b, 0xa8, 0x75, 0x0a, 0x8f, 0x67, 0x86, 0x16, 0x22, 0x5b, 0xa6, 0x0e, 0x3d, 0x0e, 0xd9,
- 0xd5, 0x66, 0xe1, 0xd8, 0x1f, 0xef, 0x77, 0x56, 0xd5, 0xb7, 0xc1, 0x93, 0xbf, 0x01, 0x00, 0x00,
- 0xff, 0xff, 0xb2, 0x30, 0x58, 0x4f, 0x36, 0x08, 0x00, 0x00,
-}
diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go
index 61718ce..6cd7ce4 100644
--- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go
+++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go
@@ -3,14 +3,15 @@
package jobmanagement_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-import pipeline_v1 "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
-
import (
- context "golang.org/x/net/context"
+ context "context"
+ fmt "fmt"
+ pipeline_v1 "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+ proto "github.com/golang/protobuf/proto"
grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+ math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
@@ -22,7 +23,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type ExpansionRequest struct {
// Set of components needed to interpret the transform, or which
@@ -46,16 +47,17 @@
func (m *ExpansionRequest) String() string { return proto.CompactTextString(m) }
func (*ExpansionRequest) ProtoMessage() {}
func (*ExpansionRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_expansion_api_7d6074648ff0899a, []int{0}
+ return fileDescriptor_0877284f21c25569, []int{0}
}
+
func (m *ExpansionRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExpansionRequest.Unmarshal(m, b)
}
func (m *ExpansionRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExpansionRequest.Marshal(b, m, deterministic)
}
-func (dst *ExpansionRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExpansionRequest.Merge(dst, src)
+func (m *ExpansionRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExpansionRequest.Merge(m, src)
}
func (m *ExpansionRequest) XXX_Size() int {
return xxx_messageInfo_ExpansionRequest.Size(m)
@@ -106,16 +108,17 @@
func (m *ExpansionResponse) String() string { return proto.CompactTextString(m) }
func (*ExpansionResponse) ProtoMessage() {}
func (*ExpansionResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_expansion_api_7d6074648ff0899a, []int{1}
+ return fileDescriptor_0877284f21c25569, []int{1}
}
+
func (m *ExpansionResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExpansionResponse.Unmarshal(m, b)
}
func (m *ExpansionResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExpansionResponse.Marshal(b, m, deterministic)
}
-func (dst *ExpansionResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExpansionResponse.Merge(dst, src)
+func (m *ExpansionResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExpansionResponse.Merge(m, src)
}
func (m *ExpansionResponse) XXX_Size() int {
return xxx_messageInfo_ExpansionResponse.Size(m)
@@ -152,6 +155,30 @@
proto.RegisterType((*ExpansionResponse)(nil), "org.apache.beam.model.expansion.v1.ExpansionResponse")
}
+func init() { proto.RegisterFile("beam_expansion_api.proto", fileDescriptor_0877284f21c25569) }
+
+var fileDescriptor_0877284f21c25569 = []byte{
+ // 285 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x92, 0xc1, 0x4a, 0xc3, 0x40,
+ 0x10, 0x86, 0x89, 0x62, 0x21, 0xab, 0x87, 0xba, 0x28, 0x84, 0xe2, 0xa1, 0xe4, 0xd4, 0x8b, 0x03,
+ 0xa9, 0xfa, 0x00, 0x56, 0x3d, 0x89, 0x20, 0xd1, 0x93, 0x97, 0xb0, 0x49, 0xc7, 0xba, 0xd2, 0x9d,
+ 0x59, 0x77, 0x93, 0xe0, 0x2b, 0xf8, 0x60, 0xde, 0x7c, 0x28, 0x31, 0xa5, 0x49, 0x10, 0xc1, 0x7a,
+ 0xf3, 0xb8, 0xc3, 0xff, 0xcd, 0xf2, 0xcf, 0xff, 0x8b, 0x28, 0x47, 0x65, 0x32, 0x7c, 0xb5, 0x8a,
+ 0xbc, 0x66, 0xca, 0x94, 0xd5, 0x60, 0x1d, 0x97, 0x2c, 0x63, 0x76, 0x0b, 0x50, 0x56, 0x15, 0x4f,
+ 0x08, 0x5f, 0x22, 0x30, 0x3c, 0xc7, 0x25, 0xb4, 0x52, 0xa8, 0x93, 0xd1, 0x61, 0x43, 0xbb, 0x8a,
+ 0x08, 0x5d, 0x87, 0xc6, 0x1f, 0x81, 0x18, 0x5e, 0xad, 0x75, 0x29, 0xbe, 0x54, 0xe8, 0x4b, 0x79,
+ 0x23, 0x44, 0xc1, 0xc6, 0x32, 0x21, 0x95, 0x3e, 0x0a, 0xc6, 0xc1, 0x64, 0x77, 0x7a, 0x0c, 0x3f,
+ 0x7f, 0x62, 0xb5, 0xc5, 0xa5, 0x26, 0x84, 0x3a, 0x81, 0x8b, 0x16, 0x4a, 0x7b, 0x0b, 0xe4, 0xb5,
+ 0x08, 0x4b, 0xa7, 0xc8, 0x3f, 0xb2, 0x33, 0xd1, 0xd6, 0xc6, 0xdb, 0x6e, 0xef, 0xd7, 0x50, 0xda,
+ 0xf1, 0xf2, 0x48, 0x84, 0xa4, 0x0c, 0x7a, 0xab, 0x0a, 0x8c, 0xb6, 0xc7, 0xc1, 0x24, 0x4c, 0xbb,
+ 0x41, 0xfc, 0x1e, 0x88, 0xfd, 0x9e, 0x1d, 0x6f, 0x99, 0x3c, 0xfe, 0x6b, 0x3f, 0x07, 0x62, 0x07,
+ 0x9d, 0x63, 0x17, 0x89, 0xc6, 0xcb, 0xea, 0x31, 0x7d, 0xeb, 0xc7, 0x72, 0x87, 0xae, 0xd6, 0x05,
+ 0xca, 0x4a, 0x0c, 0x9a, 0xd9, 0x5c, 0x9e, 0xc2, 0xef, 0x89, 0xc3, 0xf7, 0x58, 0x47, 0x67, 0x7f,
+ 0xa4, 0x56, 0xd7, 0x9b, 0x5d, 0x8a, 0x0d, 0xfa, 0x35, 0xdb, 0x6b, 0xc1, 0x73, 0xab, 0x1f, 0x86,
+ 0xcf, 0x9c, 0x1b, 0x45, 0x6a, 0x81, 0x06, 0xa9, 0xcc, 0xea, 0x24, 0x1f, 0x34, 0x7d, 0x3b, 0xf9,
+ 0x0c, 0x00, 0x00, 0xff, 0xff, 0xb4, 0xc3, 0x3e, 0x66, 0xc6, 0x02, 0x00, 0x00,
+}
+
// Reference imports to suppress errors if they are not otherwise used.
var _ context.Context
var _ grpc.ClientConn
@@ -189,6 +216,14 @@
Expand(context.Context, *ExpansionRequest) (*ExpansionResponse, error)
}
+// UnimplementedExpansionServiceServer can be embedded to have forward compatible implementations.
+type UnimplementedExpansionServiceServer struct {
+}
+
+func (*UnimplementedExpansionServiceServer) Expand(ctx context.Context, req *ExpansionRequest) (*ExpansionResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Expand not implemented")
+}
+
func RegisterExpansionServiceServer(s *grpc.Server, srv ExpansionServiceServer) {
s.RegisterService(&_ExpansionService_serviceDesc, srv)
}
@@ -223,29 +258,3 @@
Streams: []grpc.StreamDesc{},
Metadata: "beam_expansion_api.proto",
}
-
-func init() {
- proto.RegisterFile("beam_expansion_api.proto", fileDescriptor_beam_expansion_api_7d6074648ff0899a)
-}
-
-var fileDescriptor_beam_expansion_api_7d6074648ff0899a = []byte{
- // 285 bytes of a gzipped FileDescriptorProto
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x92, 0xc1, 0x4a, 0xc3, 0x40,
- 0x10, 0x86, 0x89, 0x62, 0x21, 0xab, 0x87, 0xba, 0x28, 0x84, 0xe2, 0xa1, 0xe4, 0xd4, 0x8b, 0x03,
- 0xa9, 0xfa, 0x00, 0x56, 0x3d, 0x89, 0x20, 0xd1, 0x93, 0x97, 0xb0, 0x49, 0xc7, 0xba, 0xd2, 0x9d,
- 0x59, 0x77, 0x93, 0xe0, 0x2b, 0xf8, 0x60, 0xde, 0x7c, 0x28, 0x31, 0xa5, 0x49, 0x10, 0xc1, 0x7a,
- 0xf3, 0xb8, 0xc3, 0xff, 0xcd, 0xf2, 0xcf, 0xff, 0x8b, 0x28, 0x47, 0x65, 0x32, 0x7c, 0xb5, 0x8a,
- 0xbc, 0x66, 0xca, 0x94, 0xd5, 0x60, 0x1d, 0x97, 0x2c, 0x63, 0x76, 0x0b, 0x50, 0x56, 0x15, 0x4f,
- 0x08, 0x5f, 0x22, 0x30, 0x3c, 0xc7, 0x25, 0xb4, 0x52, 0xa8, 0x93, 0xd1, 0x61, 0x43, 0xbb, 0x8a,
- 0x08, 0x5d, 0x87, 0xc6, 0x1f, 0x81, 0x18, 0x5e, 0xad, 0x75, 0x29, 0xbe, 0x54, 0xe8, 0x4b, 0x79,
- 0x23, 0x44, 0xc1, 0xc6, 0x32, 0x21, 0x95, 0x3e, 0x0a, 0xc6, 0xc1, 0x64, 0x77, 0x7a, 0x0c, 0x3f,
- 0x7f, 0x62, 0xb5, 0xc5, 0xa5, 0x26, 0x84, 0x3a, 0x81, 0x8b, 0x16, 0x4a, 0x7b, 0x0b, 0xe4, 0xb5,
- 0x08, 0x4b, 0xa7, 0xc8, 0x3f, 0xb2, 0x33, 0xd1, 0xd6, 0xc6, 0xdb, 0x6e, 0xef, 0xd7, 0x50, 0xda,
- 0xf1, 0xf2, 0x48, 0x84, 0xa4, 0x0c, 0x7a, 0xab, 0x0a, 0x8c, 0xb6, 0xc7, 0xc1, 0x24, 0x4c, 0xbb,
- 0x41, 0xfc, 0x1e, 0x88, 0xfd, 0x9e, 0x1d, 0x6f, 0x99, 0x3c, 0xfe, 0x6b, 0x3f, 0x07, 0x62, 0x07,
- 0x9d, 0x63, 0x17, 0x89, 0xc6, 0xcb, 0xea, 0x31, 0x7d, 0xeb, 0xc7, 0x72, 0x87, 0xae, 0xd6, 0x05,
- 0xca, 0x4a, 0x0c, 0x9a, 0xd9, 0x5c, 0x9e, 0xc2, 0xef, 0x89, 0xc3, 0xf7, 0x58, 0x47, 0x67, 0x7f,
- 0xa4, 0x56, 0xd7, 0x9b, 0x5d, 0x8a, 0x0d, 0xfa, 0x35, 0xdb, 0x6b, 0xc1, 0x73, 0xab, 0x1f, 0x86,
- 0xcf, 0x9c, 0x1b, 0x45, 0x6a, 0x81, 0x06, 0xa9, 0xcc, 0xea, 0x24, 0x1f, 0x34, 0x7d, 0x3b, 0xf9,
- 0x0c, 0x00, 0x00, 0xff, 0xff, 0xb4, 0xc3, 0x3e, 0x66, 0xc6, 0x02, 0x00, 0x00,
-}
diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go
index 609c74d..18f6aaa 100644
--- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go
+++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go
@@ -3,16 +3,17 @@
package jobmanagement_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-import pipeline_v1 "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
-import _struct "github.com/golang/protobuf/ptypes/struct"
-import timestamp "github.com/golang/protobuf/ptypes/timestamp"
-
import (
- context "golang.org/x/net/context"
+ context "context"
+ fmt "fmt"
+ pipeline_v1 "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+ proto "github.com/golang/protobuf/proto"
+ _struct "github.com/golang/protobuf/ptypes/struct"
+ timestamp "github.com/golang/protobuf/ptypes/timestamp"
grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+ math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
@@ -24,7 +25,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type JobMessage_MessageImportance int32
@@ -45,6 +46,7 @@
4: "JOB_MESSAGE_WARNING",
5: "JOB_MESSAGE_ERROR",
}
+
var JobMessage_MessageImportance_value = map[string]int32{
"MESSAGE_IMPORTANCE_UNSPECIFIED": 0,
"JOB_MESSAGE_DEBUG": 1,
@@ -57,8 +59,9 @@
func (x JobMessage_MessageImportance) String() string {
return proto.EnumName(JobMessage_MessageImportance_name, int32(x))
}
+
func (JobMessage_MessageImportance) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{14, 0}
+ return fileDescriptor_97c7b84f742157ae, []int{14, 0}
}
type JobState_Enum int32
@@ -104,6 +107,7 @@
10: "CANCELLING",
11: "UPDATING",
}
+
var JobState_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"STOPPED": 1,
@@ -122,8 +126,9 @@
func (x JobState_Enum) String() string {
return proto.EnumName(JobState_Enum_name, int32(x))
}
+
func (JobState_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{16, 0}
+ return fileDescriptor_97c7b84f742157ae, []int{16, 0}
}
type PipelineOptionType_Enum int32
@@ -146,6 +151,7 @@
4: "ARRAY",
5: "OBJECT",
}
+
var PipelineOptionType_Enum_value = map[string]int32{
"STRING": 0,
"BOOLEAN": 1,
@@ -158,8 +164,9 @@
func (x PipelineOptionType_Enum) String() string {
return proto.EnumName(PipelineOptionType_Enum_name, int32(x))
}
+
func (PipelineOptionType_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{21, 0}
+ return fileDescriptor_97c7b84f742157ae, []int{21, 0}
}
// Prepare is a synchronous request that returns a preparationId back
@@ -179,16 +186,17 @@
func (m *PrepareJobRequest) String() string { return proto.CompactTextString(m) }
func (*PrepareJobRequest) ProtoMessage() {}
func (*PrepareJobRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{0}
+ return fileDescriptor_97c7b84f742157ae, []int{0}
}
+
func (m *PrepareJobRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PrepareJobRequest.Unmarshal(m, b)
}
func (m *PrepareJobRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PrepareJobRequest.Marshal(b, m, deterministic)
}
-func (dst *PrepareJobRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PrepareJobRequest.Merge(dst, src)
+func (m *PrepareJobRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PrepareJobRequest.Merge(m, src)
}
func (m *PrepareJobRequest) XXX_Size() int {
return xxx_messageInfo_PrepareJobRequest.Size(m)
@@ -239,16 +247,17 @@
func (m *PrepareJobResponse) String() string { return proto.CompactTextString(m) }
func (*PrepareJobResponse) ProtoMessage() {}
func (*PrepareJobResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{1}
+ return fileDescriptor_97c7b84f742157ae, []int{1}
}
+
func (m *PrepareJobResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PrepareJobResponse.Unmarshal(m, b)
}
func (m *PrepareJobResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PrepareJobResponse.Marshal(b, m, deterministic)
}
-func (dst *PrepareJobResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PrepareJobResponse.Merge(dst, src)
+func (m *PrepareJobResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PrepareJobResponse.Merge(m, src)
}
func (m *PrepareJobResponse) XXX_Size() int {
return xxx_messageInfo_PrepareJobResponse.Size(m)
@@ -300,16 +309,17 @@
func (m *RunJobRequest) String() string { return proto.CompactTextString(m) }
func (*RunJobRequest) ProtoMessage() {}
func (*RunJobRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{2}
+ return fileDescriptor_97c7b84f742157ae, []int{2}
}
+
func (m *RunJobRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_RunJobRequest.Unmarshal(m, b)
}
func (m *RunJobRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_RunJobRequest.Marshal(b, m, deterministic)
}
-func (dst *RunJobRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_RunJobRequest.Merge(dst, src)
+func (m *RunJobRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_RunJobRequest.Merge(m, src)
}
func (m *RunJobRequest) XXX_Size() int {
return xxx_messageInfo_RunJobRequest.Size(m)
@@ -345,16 +355,17 @@
func (m *RunJobResponse) String() string { return proto.CompactTextString(m) }
func (*RunJobResponse) ProtoMessage() {}
func (*RunJobResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{3}
+ return fileDescriptor_97c7b84f742157ae, []int{3}
}
+
func (m *RunJobResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_RunJobResponse.Unmarshal(m, b)
}
func (m *RunJobResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_RunJobResponse.Marshal(b, m, deterministic)
}
-func (dst *RunJobResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_RunJobResponse.Merge(dst, src)
+func (m *RunJobResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_RunJobResponse.Merge(m, src)
}
func (m *RunJobResponse) XXX_Size() int {
return xxx_messageInfo_RunJobResponse.Size(m)
@@ -386,16 +397,17 @@
func (m *CancelJobRequest) String() string { return proto.CompactTextString(m) }
func (*CancelJobRequest) ProtoMessage() {}
func (*CancelJobRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{4}
+ return fileDescriptor_97c7b84f742157ae, []int{4}
}
+
func (m *CancelJobRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CancelJobRequest.Unmarshal(m, b)
}
func (m *CancelJobRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CancelJobRequest.Marshal(b, m, deterministic)
}
-func (dst *CancelJobRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_CancelJobRequest.Merge(dst, src)
+func (m *CancelJobRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CancelJobRequest.Merge(m, src)
}
func (m *CancelJobRequest) XXX_Size() int {
return xxx_messageInfo_CancelJobRequest.Size(m)
@@ -425,16 +437,17 @@
func (m *CancelJobResponse) String() string { return proto.CompactTextString(m) }
func (*CancelJobResponse) ProtoMessage() {}
func (*CancelJobResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{5}
+ return fileDescriptor_97c7b84f742157ae, []int{5}
}
+
func (m *CancelJobResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CancelJobResponse.Unmarshal(m, b)
}
func (m *CancelJobResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CancelJobResponse.Marshal(b, m, deterministic)
}
-func (dst *CancelJobResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_CancelJobResponse.Merge(dst, src)
+func (m *CancelJobResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CancelJobResponse.Merge(m, src)
}
func (m *CancelJobResponse) XXX_Size() int {
return xxx_messageInfo_CancelJobResponse.Size(m)
@@ -467,16 +480,17 @@
func (m *JobInfo) String() string { return proto.CompactTextString(m) }
func (*JobInfo) ProtoMessage() {}
func (*JobInfo) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{6}
+ return fileDescriptor_97c7b84f742157ae, []int{6}
}
+
func (m *JobInfo) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_JobInfo.Unmarshal(m, b)
}
func (m *JobInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_JobInfo.Marshal(b, m, deterministic)
}
-func (dst *JobInfo) XXX_Merge(src proto.Message) {
- xxx_messageInfo_JobInfo.Merge(dst, src)
+func (m *JobInfo) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_JobInfo.Merge(m, src)
}
func (m *JobInfo) XXX_Size() int {
return xxx_messageInfo_JobInfo.Size(m)
@@ -527,16 +541,17 @@
func (m *GetJobsRequest) String() string { return proto.CompactTextString(m) }
func (*GetJobsRequest) ProtoMessage() {}
func (*GetJobsRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{7}
+ return fileDescriptor_97c7b84f742157ae, []int{7}
}
+
func (m *GetJobsRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetJobsRequest.Unmarshal(m, b)
}
func (m *GetJobsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetJobsRequest.Marshal(b, m, deterministic)
}
-func (dst *GetJobsRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetJobsRequest.Merge(dst, src)
+func (m *GetJobsRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetJobsRequest.Merge(m, src)
}
func (m *GetJobsRequest) XXX_Size() int {
return xxx_messageInfo_GetJobsRequest.Size(m)
@@ -558,16 +573,17 @@
func (m *GetJobsResponse) String() string { return proto.CompactTextString(m) }
func (*GetJobsResponse) ProtoMessage() {}
func (*GetJobsResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{8}
+ return fileDescriptor_97c7b84f742157ae, []int{8}
}
+
func (m *GetJobsResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetJobsResponse.Unmarshal(m, b)
}
func (m *GetJobsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetJobsResponse.Marshal(b, m, deterministic)
}
-func (dst *GetJobsResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetJobsResponse.Merge(dst, src)
+func (m *GetJobsResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetJobsResponse.Merge(m, src)
}
func (m *GetJobsResponse) XXX_Size() int {
return xxx_messageInfo_GetJobsResponse.Size(m)
@@ -599,16 +615,17 @@
func (m *GetJobStateRequest) String() string { return proto.CompactTextString(m) }
func (*GetJobStateRequest) ProtoMessage() {}
func (*GetJobStateRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{9}
+ return fileDescriptor_97c7b84f742157ae, []int{9}
}
+
func (m *GetJobStateRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetJobStateRequest.Unmarshal(m, b)
}
func (m *GetJobStateRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetJobStateRequest.Marshal(b, m, deterministic)
}
-func (dst *GetJobStateRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetJobStateRequest.Merge(dst, src)
+func (m *GetJobStateRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetJobStateRequest.Merge(m, src)
}
func (m *GetJobStateRequest) XXX_Size() int {
return xxx_messageInfo_GetJobStateRequest.Size(m)
@@ -638,16 +655,17 @@
func (m *JobStateEvent) String() string { return proto.CompactTextString(m) }
func (*JobStateEvent) ProtoMessage() {}
func (*JobStateEvent) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{10}
+ return fileDescriptor_97c7b84f742157ae, []int{10}
}
+
func (m *JobStateEvent) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_JobStateEvent.Unmarshal(m, b)
}
func (m *JobStateEvent) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_JobStateEvent.Marshal(b, m, deterministic)
}
-func (dst *JobStateEvent) XXX_Merge(src proto.Message) {
- xxx_messageInfo_JobStateEvent.Merge(dst, src)
+func (m *JobStateEvent) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_JobStateEvent.Merge(m, src)
}
func (m *JobStateEvent) XXX_Size() int {
return xxx_messageInfo_JobStateEvent.Size(m)
@@ -686,16 +704,17 @@
func (m *GetJobPipelineRequest) String() string { return proto.CompactTextString(m) }
func (*GetJobPipelineRequest) ProtoMessage() {}
func (*GetJobPipelineRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{11}
+ return fileDescriptor_97c7b84f742157ae, []int{11}
}
+
func (m *GetJobPipelineRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetJobPipelineRequest.Unmarshal(m, b)
}
func (m *GetJobPipelineRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetJobPipelineRequest.Marshal(b, m, deterministic)
}
-func (dst *GetJobPipelineRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetJobPipelineRequest.Merge(dst, src)
+func (m *GetJobPipelineRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetJobPipelineRequest.Merge(m, src)
}
func (m *GetJobPipelineRequest) XXX_Size() int {
return xxx_messageInfo_GetJobPipelineRequest.Size(m)
@@ -724,16 +743,17 @@
func (m *GetJobPipelineResponse) String() string { return proto.CompactTextString(m) }
func (*GetJobPipelineResponse) ProtoMessage() {}
func (*GetJobPipelineResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{12}
+ return fileDescriptor_97c7b84f742157ae, []int{12}
}
+
func (m *GetJobPipelineResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetJobPipelineResponse.Unmarshal(m, b)
}
func (m *GetJobPipelineResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetJobPipelineResponse.Marshal(b, m, deterministic)
}
-func (dst *GetJobPipelineResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetJobPipelineResponse.Merge(dst, src)
+func (m *GetJobPipelineResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetJobPipelineResponse.Merge(m, src)
}
func (m *GetJobPipelineResponse) XXX_Size() int {
return xxx_messageInfo_GetJobPipelineResponse.Size(m)
@@ -766,16 +786,17 @@
func (m *JobMessagesRequest) String() string { return proto.CompactTextString(m) }
func (*JobMessagesRequest) ProtoMessage() {}
func (*JobMessagesRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{13}
+ return fileDescriptor_97c7b84f742157ae, []int{13}
}
+
func (m *JobMessagesRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_JobMessagesRequest.Unmarshal(m, b)
}
func (m *JobMessagesRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_JobMessagesRequest.Marshal(b, m, deterministic)
}
-func (dst *JobMessagesRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_JobMessagesRequest.Merge(dst, src)
+func (m *JobMessagesRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_JobMessagesRequest.Merge(m, src)
}
func (m *JobMessagesRequest) XXX_Size() int {
return xxx_messageInfo_JobMessagesRequest.Size(m)
@@ -807,16 +828,17 @@
func (m *JobMessage) String() string { return proto.CompactTextString(m) }
func (*JobMessage) ProtoMessage() {}
func (*JobMessage) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{14}
+ return fileDescriptor_97c7b84f742157ae, []int{14}
}
+
func (m *JobMessage) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_JobMessage.Unmarshal(m, b)
}
func (m *JobMessage) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_JobMessage.Marshal(b, m, deterministic)
}
-func (dst *JobMessage) XXX_Merge(src proto.Message) {
- xxx_messageInfo_JobMessage.Merge(dst, src)
+func (m *JobMessage) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_JobMessage.Merge(m, src)
}
func (m *JobMessage) XXX_Size() int {
return xxx_messageInfo_JobMessage.Size(m)
@@ -869,16 +891,17 @@
func (m *JobMessagesResponse) String() string { return proto.CompactTextString(m) }
func (*JobMessagesResponse) ProtoMessage() {}
func (*JobMessagesResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{15}
+ return fileDescriptor_97c7b84f742157ae, []int{15}
}
+
func (m *JobMessagesResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_JobMessagesResponse.Unmarshal(m, b)
}
func (m *JobMessagesResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_JobMessagesResponse.Marshal(b, m, deterministic)
}
-func (dst *JobMessagesResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_JobMessagesResponse.Merge(dst, src)
+func (m *JobMessagesResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_JobMessagesResponse.Merge(m, src)
}
func (m *JobMessagesResponse) XXX_Size() int {
return xxx_messageInfo_JobMessagesResponse.Size(m)
@@ -896,12 +919,14 @@
type JobMessagesResponse_MessageResponse struct {
MessageResponse *JobMessage `protobuf:"bytes,1,opt,name=message_response,json=messageResponse,proto3,oneof"`
}
+
type JobMessagesResponse_StateResponse struct {
StateResponse *JobStateEvent `protobuf:"bytes,2,opt,name=state_response,json=stateResponse,proto3,oneof"`
}
func (*JobMessagesResponse_MessageResponse) isJobMessagesResponse_Response() {}
-func (*JobMessagesResponse_StateResponse) isJobMessagesResponse_Response() {}
+
+func (*JobMessagesResponse_StateResponse) isJobMessagesResponse_Response() {}
func (m *JobMessagesResponse) GetResponse() isJobMessagesResponse_Response {
if m != nil {
@@ -924,80 +949,14 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*JobMessagesResponse) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _JobMessagesResponse_OneofMarshaler, _JobMessagesResponse_OneofUnmarshaler, _JobMessagesResponse_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*JobMessagesResponse) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*JobMessagesResponse_MessageResponse)(nil),
(*JobMessagesResponse_StateResponse)(nil),
}
}
-func _JobMessagesResponse_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*JobMessagesResponse)
- // response
- switch x := m.Response.(type) {
- case *JobMessagesResponse_MessageResponse:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.MessageResponse); err != nil {
- return err
- }
- case *JobMessagesResponse_StateResponse:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.StateResponse); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("JobMessagesResponse.Response has unexpected type %T", x)
- }
- return nil
-}
-
-func _JobMessagesResponse_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*JobMessagesResponse)
- switch tag {
- case 1: // response.message_response
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(JobMessage)
- err := b.DecodeMessage(msg)
- m.Response = &JobMessagesResponse_MessageResponse{msg}
- return true, err
- case 2: // response.state_response
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(JobStateEvent)
- err := b.DecodeMessage(msg)
- m.Response = &JobMessagesResponse_StateResponse{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _JobMessagesResponse_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*JobMessagesResponse)
- // response
- switch x := m.Response.(type) {
- case *JobMessagesResponse_MessageResponse:
- s := proto.Size(x.MessageResponse)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *JobMessagesResponse_StateResponse:
- s := proto.Size(x.StateResponse)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// Enumeration of all JobStates
//
// The state transition diagram is:
@@ -1019,16 +978,17 @@
func (m *JobState) String() string { return proto.CompactTextString(m) }
func (*JobState) ProtoMessage() {}
func (*JobState) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{16}
+ return fileDescriptor_97c7b84f742157ae, []int{16}
}
+
func (m *JobState) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_JobState.Unmarshal(m, b)
}
func (m *JobState) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_JobState.Marshal(b, m, deterministic)
}
-func (dst *JobState) XXX_Merge(src proto.Message) {
- xxx_messageInfo_JobState.Merge(dst, src)
+func (m *JobState) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_JobState.Merge(m, src)
}
func (m *JobState) XXX_Size() int {
return xxx_messageInfo_JobState.Size(m)
@@ -1050,16 +1010,17 @@
func (m *GetJobMetricsRequest) String() string { return proto.CompactTextString(m) }
func (*GetJobMetricsRequest) ProtoMessage() {}
func (*GetJobMetricsRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{17}
+ return fileDescriptor_97c7b84f742157ae, []int{17}
}
+
func (m *GetJobMetricsRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetJobMetricsRequest.Unmarshal(m, b)
}
func (m *GetJobMetricsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetJobMetricsRequest.Marshal(b, m, deterministic)
}
-func (dst *GetJobMetricsRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetJobMetricsRequest.Merge(dst, src)
+func (m *GetJobMetricsRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetJobMetricsRequest.Merge(m, src)
}
func (m *GetJobMetricsRequest) XXX_Size() int {
return xxx_messageInfo_GetJobMetricsRequest.Size(m)
@@ -1088,16 +1049,17 @@
func (m *GetJobMetricsResponse) String() string { return proto.CompactTextString(m) }
func (*GetJobMetricsResponse) ProtoMessage() {}
func (*GetJobMetricsResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{18}
+ return fileDescriptor_97c7b84f742157ae, []int{18}
}
+
func (m *GetJobMetricsResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GetJobMetricsResponse.Unmarshal(m, b)
}
func (m *GetJobMetricsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GetJobMetricsResponse.Marshal(b, m, deterministic)
}
-func (dst *GetJobMetricsResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GetJobMetricsResponse.Merge(dst, src)
+func (m *GetJobMetricsResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GetJobMetricsResponse.Merge(m, src)
}
func (m *GetJobMetricsResponse) XXX_Size() int {
return xxx_messageInfo_GetJobMetricsResponse.Size(m)
@@ -1128,16 +1090,17 @@
func (m *MetricResults) String() string { return proto.CompactTextString(m) }
func (*MetricResults) ProtoMessage() {}
func (*MetricResults) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{19}
+ return fileDescriptor_97c7b84f742157ae, []int{19}
}
+
func (m *MetricResults) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MetricResults.Unmarshal(m, b)
}
func (m *MetricResults) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MetricResults.Marshal(b, m, deterministic)
}
-func (dst *MetricResults) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MetricResults.Merge(dst, src)
+func (m *MetricResults) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MetricResults.Merge(m, src)
}
func (m *MetricResults) XXX_Size() int {
return xxx_messageInfo_MetricResults.Size(m)
@@ -1176,16 +1139,17 @@
func (m *DescribePipelineOptionsRequest) String() string { return proto.CompactTextString(m) }
func (*DescribePipelineOptionsRequest) ProtoMessage() {}
func (*DescribePipelineOptionsRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{20}
+ return fileDescriptor_97c7b84f742157ae, []int{20}
}
+
func (m *DescribePipelineOptionsRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DescribePipelineOptionsRequest.Unmarshal(m, b)
}
func (m *DescribePipelineOptionsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DescribePipelineOptionsRequest.Marshal(b, m, deterministic)
}
-func (dst *DescribePipelineOptionsRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DescribePipelineOptionsRequest.Merge(dst, src)
+func (m *DescribePipelineOptionsRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DescribePipelineOptionsRequest.Merge(m, src)
}
func (m *DescribePipelineOptionsRequest) XXX_Size() int {
return xxx_messageInfo_DescribePipelineOptionsRequest.Size(m)
@@ -1208,16 +1172,17 @@
func (m *PipelineOptionType) String() string { return proto.CompactTextString(m) }
func (*PipelineOptionType) ProtoMessage() {}
func (*PipelineOptionType) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{21}
+ return fileDescriptor_97c7b84f742157ae, []int{21}
}
+
func (m *PipelineOptionType) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PipelineOptionType.Unmarshal(m, b)
}
func (m *PipelineOptionType) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PipelineOptionType.Marshal(b, m, deterministic)
}
-func (dst *PipelineOptionType) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PipelineOptionType.Merge(dst, src)
+func (m *PipelineOptionType) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PipelineOptionType.Merge(m, src)
}
func (m *PipelineOptionType) XXX_Size() int {
return xxx_messageInfo_PipelineOptionType.Size(m)
@@ -1249,16 +1214,17 @@
func (m *PipelineOptionDescriptor) String() string { return proto.CompactTextString(m) }
func (*PipelineOptionDescriptor) ProtoMessage() {}
func (*PipelineOptionDescriptor) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{22}
+ return fileDescriptor_97c7b84f742157ae, []int{22}
}
+
func (m *PipelineOptionDescriptor) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PipelineOptionDescriptor.Unmarshal(m, b)
}
func (m *PipelineOptionDescriptor) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PipelineOptionDescriptor.Marshal(b, m, deterministic)
}
-func (dst *PipelineOptionDescriptor) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PipelineOptionDescriptor.Merge(dst, src)
+func (m *PipelineOptionDescriptor) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PipelineOptionDescriptor.Merge(m, src)
}
func (m *PipelineOptionDescriptor) XXX_Size() int {
return xxx_messageInfo_PipelineOptionDescriptor.Size(m)
@@ -1316,16 +1282,17 @@
func (m *DescribePipelineOptionsResponse) String() string { return proto.CompactTextString(m) }
func (*DescribePipelineOptionsResponse) ProtoMessage() {}
func (*DescribePipelineOptionsResponse) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_job_api_c1d5a4d112319449, []int{23}
+ return fileDescriptor_97c7b84f742157ae, []int{23}
}
+
func (m *DescribePipelineOptionsResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DescribePipelineOptionsResponse.Unmarshal(m, b)
}
func (m *DescribePipelineOptionsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DescribePipelineOptionsResponse.Marshal(b, m, deterministic)
}
-func (dst *DescribePipelineOptionsResponse) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DescribePipelineOptionsResponse.Merge(dst, src)
+func (m *DescribePipelineOptionsResponse) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DescribePipelineOptionsResponse.Merge(m, src)
}
func (m *DescribePipelineOptionsResponse) XXX_Size() int {
return xxx_messageInfo_DescribePipelineOptionsResponse.Size(m)
@@ -1344,6 +1311,9 @@
}
func init() {
+ proto.RegisterEnum("org.apache.beam.model.job_management.v1.JobMessage_MessageImportance", JobMessage_MessageImportance_name, JobMessage_MessageImportance_value)
+ proto.RegisterEnum("org.apache.beam.model.job_management.v1.JobState_Enum", JobState_Enum_name, JobState_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.job_management.v1.PipelineOptionType_Enum", PipelineOptionType_Enum_name, PipelineOptionType_Enum_value)
proto.RegisterType((*PrepareJobRequest)(nil), "org.apache.beam.model.job_management.v1.PrepareJobRequest")
proto.RegisterType((*PrepareJobResponse)(nil), "org.apache.beam.model.job_management.v1.PrepareJobResponse")
proto.RegisterType((*RunJobRequest)(nil), "org.apache.beam.model.job_management.v1.RunJobRequest")
@@ -1368,9 +1338,103 @@
proto.RegisterType((*PipelineOptionType)(nil), "org.apache.beam.model.job_management.v1.PipelineOptionType")
proto.RegisterType((*PipelineOptionDescriptor)(nil), "org.apache.beam.model.job_management.v1.PipelineOptionDescriptor")
proto.RegisterType((*DescribePipelineOptionsResponse)(nil), "org.apache.beam.model.job_management.v1.DescribePipelineOptionsResponse")
- proto.RegisterEnum("org.apache.beam.model.job_management.v1.JobMessage_MessageImportance", JobMessage_MessageImportance_name, JobMessage_MessageImportance_value)
- proto.RegisterEnum("org.apache.beam.model.job_management.v1.JobState_Enum", JobState_Enum_name, JobState_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.job_management.v1.PipelineOptionType_Enum", PipelineOptionType_Enum_name, PipelineOptionType_Enum_value)
+}
+
+func init() { proto.RegisterFile("beam_job_api.proto", fileDescriptor_97c7b84f742157ae) }
+
+var fileDescriptor_97c7b84f742157ae = []byte{
+ // 1444 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xc4, 0x57, 0x5f, 0x6f, 0x1b, 0xc5,
+ 0x16, 0xef, 0x3a, 0x76, 0x6c, 0x1f, 0xd7, 0xce, 0x66, 0xda, 0xdc, 0xa4, 0xd6, 0xbd, 0x6d, 0xee,
+ 0x5e, 0x5d, 0x5a, 0x54, 0xb1, 0x6d, 0x52, 0x89, 0x96, 0x16, 0x0a, 0xeb, 0x78, 0xeb, 0xda, 0x24,
+ 0xb6, 0x35, 0xde, 0x80, 0x00, 0x09, 0xb3, 0xb6, 0x27, 0x66, 0x8b, 0x77, 0x67, 0xd9, 0x1d, 0x5b,
+ 0xad, 0x84, 0x40, 0xe2, 0x85, 0x37, 0x78, 0xe4, 0x3b, 0x20, 0x24, 0xc4, 0x13, 0x0f, 0x7c, 0x06,
+ 0x3e, 0x04, 0x12, 0x9f, 0x81, 0x17, 0x5e, 0xd0, 0xcc, 0xce, 0x3a, 0xde, 0xa4, 0x21, 0xb6, 0xa9,
+ 0xc4, 0x93, 0x77, 0xce, 0x9f, 0xdf, 0xf9, 0x3b, 0x67, 0x8e, 0x01, 0xf5, 0x88, 0xed, 0x76, 0x9f,
+ 0xd0, 0x5e, 0xd7, 0xf6, 0x1d, 0xdd, 0x0f, 0x28, 0xa3, 0xe8, 0x3a, 0x0d, 0x86, 0xba, 0xed, 0xdb,
+ 0xfd, 0x8f, 0x89, 0xce, 0xd9, 0xba, 0x4b, 0x07, 0x64, 0xa4, 0x73, 0x21, 0xd7, 0xf6, 0xec, 0x21,
+ 0x71, 0x89, 0xc7, 0xf4, 0xc9, 0x4e, 0x79, 0x43, 0x28, 0x07, 0x63, 0xcf, 0x23, 0xc1, 0xb1, 0x7e,
+ 0x79, 0x8d, 0x78, 0x03, 0x9f, 0x3a, 0x1e, 0x0b, 0x25, 0xe1, 0xdf, 0x43, 0x4a, 0x87, 0x23, 0x72,
+ 0x4b, 0x9c, 0x7a, 0xe3, 0xa3, 0x5b, 0x21, 0x0b, 0xc6, 0x7d, 0x26, 0xb9, 0xd7, 0x4e, 0x72, 0x99,
+ 0xe3, 0x92, 0x90, 0xd9, 0xae, 0x2f, 0x05, 0x8a, 0x2e, 0x61, 0x81, 0xd3, 0x97, 0x68, 0xda, 0xcf,
+ 0x0a, 0xac, 0xb7, 0x03, 0xe2, 0xdb, 0x01, 0x69, 0xd0, 0x1e, 0x26, 0x9f, 0x8e, 0x49, 0xc8, 0x50,
+ 0x0d, 0x72, 0xbe, 0xe3, 0x93, 0x91, 0xe3, 0x91, 0x2d, 0x65, 0x5b, 0xb9, 0x51, 0xd8, 0xbd, 0xa9,
+ 0x3f, 0x3f, 0x8e, 0x58, 0x4c, 0x9f, 0xec, 0xe8, 0x6d, 0xf9, 0x8d, 0xa7, 0xca, 0xa8, 0x02, 0x6a,
+ 0xfc, 0xdd, 0xa5, 0x3e, 0x73, 0xa8, 0x17, 0x6e, 0xa5, 0x04, 0xe0, 0xa6, 0x1e, 0x79, 0xaa, 0xc7,
+ 0x9e, 0xea, 0x1d, 0x11, 0x07, 0x5e, 0x8b, 0x15, 0x5a, 0x91, 0x3c, 0xba, 0x02, 0x39, 0x9e, 0x2d,
+ 0xcf, 0x76, 0xc9, 0xd6, 0xca, 0xb6, 0x72, 0x23, 0x8f, 0xb3, 0x4f, 0x68, 0xaf, 0x69, 0xbb, 0x44,
+ 0xfb, 0x4d, 0x01, 0x34, 0xeb, 0x7d, 0xe8, 0x53, 0x2f, 0x24, 0xe8, 0xff, 0x50, 0xf2, 0x05, 0xd5,
+ 0xe6, 0x08, 0x5d, 0x67, 0x20, 0x82, 0xc8, 0xe3, 0xe2, 0x0c, 0xb5, 0x3e, 0x40, 0x21, 0x5c, 0xb1,
+ 0x03, 0xe6, 0x1c, 0xd9, 0x7d, 0xd6, 0x0d, 0x99, 0x3d, 0x74, 0xbc, 0x61, 0x37, 0xce, 0xb6, 0xf4,
+ 0xf2, 0xee, 0x1c, 0x61, 0x1b, 0xbe, 0xd3, 0x21, 0xc1, 0xc4, 0xe9, 0x93, 0x2a, 0x09, 0xfb, 0x81,
+ 0xe3, 0x33, 0x1a, 0xe0, 0xcd, 0x18, 0xb9, 0x13, 0x01, 0x9b, 0x12, 0x17, 0xed, 0xc2, 0x46, 0x6c,
+ 0x2b, 0x24, 0x61, 0xc8, 0xfd, 0x63, 0xf4, 0x13, 0xe2, 0xc9, 0xd0, 0x2e, 0x49, 0x66, 0x27, 0xe2,
+ 0x59, 0x9c, 0xa5, 0x75, 0xa1, 0x88, 0xc7, 0xde, 0x4c, 0x7d, 0xe6, 0x0c, 0xf0, 0x3a, 0xac, 0x05,
+ 0xbc, 0xda, 0x64, 0x62, 0x8f, 0xa4, 0x95, 0x94, 0x90, 0x2b, 0x4d, 0xc9, 0x91, 0x81, 0xeb, 0x50,
+ 0x8a, 0x0d, 0xc8, 0x14, 0x6e, 0xc0, 0x2a, 0x4f, 0xfa, 0x14, 0x39, 0xf3, 0x84, 0xf6, 0xea, 0x03,
+ 0xed, 0x65, 0x50, 0xf7, 0x6c, 0xaf, 0x4f, 0x46, 0x33, 0xce, 0x9c, 0x21, 0x6a, 0xc3, 0xfa, 0x8c,
+ 0xa8, 0x84, 0xdd, 0x87, 0x4c, 0xc8, 0x6c, 0x16, 0x75, 0x55, 0x69, 0xf7, 0x55, 0x7d, 0xce, 0xdb,
+ 0xa1, 0x37, 0x68, 0xaf, 0xc3, 0x15, 0x75, 0xd3, 0x1b, 0xbb, 0x38, 0x02, 0xd1, 0x7e, 0x51, 0x20,
+ 0xdb, 0xa0, 0xbd, 0xba, 0x77, 0x44, 0xcf, 0xf0, 0x22, 0xd1, 0x3c, 0xa9, 0x44, 0xf3, 0x3c, 0xb7,
+ 0x37, 0x57, 0x16, 0xec, 0xcd, 0x69, 0x3c, 0xe9, 0x17, 0x11, 0x8f, 0x0a, 0xa5, 0x1a, 0x61, 0x0d,
+ 0xda, 0x0b, 0x65, 0x6e, 0xb5, 0x0f, 0x61, 0x6d, 0x4a, 0x91, 0x29, 0x7c, 0x3b, 0x8a, 0xc8, 0xf1,
+ 0x8e, 0xe8, 0x96, 0xb2, 0xbd, 0x72, 0xa3, 0xb0, 0x7b, 0x7b, 0x11, 0xab, 0x3c, 0x59, 0x22, 0x07,
+ 0xfc, 0x43, 0xbb, 0x09, 0x28, 0xc2, 0x17, 0xce, 0x9c, 0x53, 0xd1, 0x6f, 0x15, 0x28, 0xc6, 0xa2,
+ 0xe6, 0x84, 0x78, 0xec, 0xc5, 0x96, 0x13, 0xdd, 0x83, 0xfc, 0x74, 0x5a, 0xc9, 0xfb, 0x57, 0x3e,
+ 0x55, 0x09, 0x2b, 0x96, 0xc0, 0xc7, 0xc2, 0x9a, 0x0e, 0x1b, 0x51, 0x18, 0xd3, 0x11, 0x74, 0x5e,
+ 0x6f, 0xfe, 0xeb, 0xa4, 0xbc, 0xcc, 0xee, 0x8b, 0x9a, 0x7c, 0x3c, 0xb3, 0x0d, 0xda, 0x3b, 0x20,
+ 0x61, 0x68, 0x0f, 0x49, 0x78, 0x8e, 0x3f, 0x7f, 0xa4, 0x00, 0x8e, 0xa5, 0xd1, 0x7f, 0x00, 0xdc,
+ 0xe8, 0xf3, 0x58, 0x32, 0x2f, 0x29, 0xf5, 0x01, 0x42, 0x90, 0xe6, 0xa1, 0xcb, 0x7e, 0x16, 0xdf,
+ 0x88, 0x00, 0x38, 0xae, 0x4f, 0x03, 0xc6, 0xaf, 0x9c, 0x68, 0xe3, 0xd2, 0xae, 0xb9, 0x48, 0x39,
+ 0xa4, 0x6d, 0x5d, 0xfe, 0xd6, 0xa7, 0x60, 0x78, 0x06, 0x18, 0xfd, 0x17, 0x2e, 0xc6, 0x9e, 0x31,
+ 0xf2, 0x94, 0x89, 0xb6, 0xcf, 0xe3, 0x82, 0xa4, 0x59, 0xe4, 0x29, 0xd3, 0x7e, 0x50, 0x60, 0xfd,
+ 0x14, 0x08, 0xd2, 0xe0, 0xea, 0x81, 0xd9, 0xe9, 0x18, 0x35, 0xb3, 0x5b, 0x3f, 0x68, 0xb7, 0xb0,
+ 0x65, 0x34, 0xf7, 0xcc, 0xee, 0x61, 0xb3, 0xd3, 0x36, 0xf7, 0xea, 0x8f, 0xea, 0x66, 0x55, 0xbd,
+ 0x80, 0x36, 0x60, 0xbd, 0xd1, 0xaa, 0x74, 0x63, 0xb9, 0xaa, 0x59, 0x39, 0xac, 0xa9, 0x0a, 0xda,
+ 0x82, 0xcb, 0x49, 0xb2, 0x65, 0xd4, 0xf7, 0xcd, 0xaa, 0x9a, 0x3a, 0xa9, 0x50, 0x31, 0x3a, 0xf5,
+ 0x3d, 0x75, 0x05, 0x6d, 0xc2, 0xa5, 0x59, 0xf2, 0xbb, 0x06, 0x6e, 0xd6, 0x9b, 0x35, 0x35, 0x7d,
+ 0x52, 0xde, 0xc4, 0xb8, 0x85, 0xd5, 0x0c, 0x7f, 0x45, 0x2e, 0x25, 0x6a, 0x25, 0x7b, 0xe1, 0x23,
+ 0x50, 0xe3, 0x60, 0x03, 0x49, 0x93, 0x3d, 0x71, 0x67, 0x89, 0xcc, 0x3e, 0xbe, 0x80, 0xd7, 0x24,
+ 0xdc, 0xd4, 0x42, 0x17, 0x4a, 0xa2, 0xf5, 0x8f, 0xf1, 0xa3, 0xb6, 0x5f, 0xfc, 0x22, 0x89, 0xfb,
+ 0xf8, 0xf8, 0x02, 0x2e, 0x86, 0xd1, 0x45, 0x8e, 0xe0, 0x2a, 0x00, 0xb9, 0x18, 0x5a, 0xfb, 0x49,
+ 0x81, 0x5c, 0x2c, 0xae, 0x7d, 0xa7, 0x40, 0x9a, 0xdf, 0x3d, 0xb4, 0x06, 0x85, 0x64, 0x15, 0x0a,
+ 0x90, 0xed, 0x58, 0xad, 0x76, 0xdb, 0xac, 0xaa, 0x0a, 0x3f, 0xe0, 0xc3, 0xa6, 0x48, 0x5f, 0x0a,
+ 0xe5, 0x20, 0x5d, 0x6d, 0x35, 0x4d, 0x75, 0x05, 0x01, 0xac, 0x3e, 0x8a, 0x8a, 0x90, 0x46, 0x45,
+ 0xc8, 0xef, 0xf1, 0x62, 0xee, 0xf3, 0x63, 0x86, 0x6b, 0x1c, 0xb6, 0xab, 0x86, 0x65, 0x56, 0xd5,
+ 0x55, 0x74, 0x11, 0x72, 0x55, 0x6c, 0xd4, 0x85, 0x7e, 0x96, 0xb3, 0xc4, 0xc9, 0xac, 0xaa, 0x39,
+ 0xce, 0xea, 0x58, 0x06, 0xb6, 0x38, 0x2b, 0x8f, 0x4a, 0x00, 0x12, 0x84, 0x9f, 0x81, 0x73, 0x05,
+ 0x0a, 0x3f, 0x15, 0xb4, 0x57, 0xe0, 0x72, 0x74, 0x5d, 0x0f, 0xa2, 0xdd, 0xe5, 0x9c, 0xdb, 0xe4,
+ 0xc4, 0xd3, 0x60, 0x2a, 0x2e, 0xd3, 0xdd, 0x86, 0xac, 0xdc, 0x7e, 0x64, 0x1d, 0xe7, 0xcf, 0x73,
+ 0x04, 0x85, 0x49, 0x38, 0x1e, 0xb1, 0x10, 0xc7, 0x30, 0xda, 0x8f, 0x0a, 0x14, 0x13, 0x2c, 0xd4,
+ 0x82, 0xbc, 0xcd, 0x18, 0x71, 0x7d, 0x46, 0x06, 0x72, 0x3e, 0xef, 0xcc, 0x31, 0x41, 0x0e, 0xa8,
+ 0xe7, 0x30, 0x1a, 0x38, 0xde, 0x50, 0x0c, 0xe8, 0x63, 0x0c, 0x0e, 0xd8, 0xa7, 0xae, 0xeb, 0x30,
+ 0x0e, 0x98, 0x5a, 0x1a, 0x70, 0x8a, 0xa1, 0x6d, 0xc3, 0xd5, 0x68, 0x51, 0xe9, 0x91, 0x76, 0xf2,
+ 0x39, 0x8b, 0x5f, 0x1d, 0x02, 0x28, 0xc9, 0xb1, 0x9e, 0xf9, 0x44, 0x6b, 0xc9, 0x8e, 0x01, 0x58,
+ 0xed, 0x58, 0x98, 0x57, 0x46, 0x34, 0x4b, 0xa5, 0xd5, 0xda, 0x37, 0x8d, 0x66, 0xd4, 0x2c, 0xf5,
+ 0xa6, 0x65, 0xd6, 0x4c, 0xac, 0xa6, 0xb8, 0x54, 0xf3, 0xf0, 0xa0, 0x62, 0x62, 0x75, 0x05, 0xe5,
+ 0x21, 0x63, 0x60, 0x6c, 0xbc, 0xa7, 0xa6, 0x39, 0xb9, 0x55, 0x69, 0x98, 0x7b, 0x96, 0x9a, 0xd1,
+ 0x7e, 0x55, 0x60, 0x2b, 0x69, 0xe7, 0x78, 0x81, 0xe2, 0x43, 0x4e, 0x3c, 0xda, 0x51, 0x65, 0xc5,
+ 0x37, 0xb2, 0x20, 0xcd, 0x9e, 0xf9, 0xd1, 0x25, 0x29, 0xed, 0xbe, 0x35, 0x77, 0xf1, 0x4e, 0x07,
+ 0x13, 0xbd, 0x3b, 0x02, 0x0d, 0x6d, 0x43, 0x61, 0x20, 0xed, 0x3a, 0x34, 0xde, 0xc3, 0x66, 0x49,
+ 0xe8, 0x7f, 0x50, 0x1c, 0x90, 0x23, 0x7b, 0x3c, 0x62, 0xdd, 0x89, 0x3d, 0x1a, 0x13, 0x39, 0xf6,
+ 0x2e, 0x4a, 0xe2, 0x3b, 0x9c, 0x86, 0x2e, 0x43, 0x66, 0x18, 0xd0, 0xb1, 0xbf, 0x95, 0x89, 0x7a,
+ 0x51, 0x1c, 0xb4, 0xcf, 0xe1, 0xda, 0x99, 0xc9, 0x96, 0x5d, 0xf9, 0x01, 0x64, 0xe3, 0xf5, 0x23,
+ 0xea, 0x17, 0x63, 0xc9, 0xc0, 0x66, 0xd6, 0xcf, 0x18, 0x71, 0xf7, 0x77, 0x10, 0x2f, 0x8b, 0x5c,
+ 0x50, 0xd1, 0x97, 0x0a, 0x64, 0xe5, 0xc2, 0x8c, 0xee, 0xcf, 0x6f, 0xe6, 0xe4, 0x1f, 0x84, 0xf2,
+ 0x83, 0xa5, 0x74, 0x65, 0xc0, 0x13, 0x58, 0xc1, 0x63, 0x0f, 0xcd, 0x7f, 0xf9, 0x12, 0xcb, 0x6f,
+ 0xf9, 0xee, 0xc2, 0x7a, 0xd2, 0xee, 0x67, 0x90, 0x95, 0xcb, 0x14, 0x9a, 0x1f, 0x23, 0xb9, 0x90,
+ 0x95, 0xef, 0x2d, 0xae, 0x28, 0xad, 0x7f, 0x01, 0xb9, 0x1a, 0x61, 0x62, 0xfa, 0xa2, 0x07, 0x0b,
+ 0xa2, 0xcc, 0x6e, 0x67, 0xe5, 0x25, 0x1f, 0x07, 0xf4, 0xb5, 0x02, 0x85, 0x1a, 0x61, 0x71, 0xcf,
+ 0xa0, 0x87, 0x0b, 0x3a, 0x71, 0x62, 0xb7, 0x2a, 0xbf, 0xb9, 0xb4, 0xfe, 0x34, 0x23, 0xab, 0xd1,
+ 0x3f, 0x04, 0xf4, 0xda, 0xdc, 0x50, 0x27, 0xff, 0x7d, 0x94, 0xef, 0x2f, 0xa3, 0x2a, 0x1d, 0xf8,
+ 0x4a, 0x11, 0x0b, 0xb7, 0xc8, 0x51, 0x87, 0x05, 0xc4, 0x76, 0xff, 0x91, 0xca, 0xdc, 0x56, 0xd0,
+ 0x37, 0x0a, 0xa8, 0x35, 0xc2, 0xe4, 0xaa, 0xb0, 0xb0, 0x2f, 0xa7, 0x37, 0xcd, 0xf2, 0xeb, 0xcb,
+ 0x29, 0x47, 0x99, 0xb9, 0xad, 0xf0, 0x6e, 0x29, 0x26, 0x5e, 0x51, 0xf4, 0xc6, 0x82, 0xa9, 0x49,
+ 0x3e, 0xd6, 0xe5, 0x87, 0xcb, 0xaa, 0xcb, 0x62, 0x7d, 0xaf, 0xc0, 0xe6, 0x19, 0xa3, 0x14, 0xd5,
+ 0xe6, 0xc6, 0xfe, 0xeb, 0x97, 0xaf, 0xfc, 0xf8, 0xef, 0x03, 0xc9, 0xcd, 0xab, 0x02, 0x2f, 0x9d,
+ 0x09, 0x95, 0x40, 0xaa, 0xac, 0x36, 0x68, 0xcf, 0xf0, 0x9d, 0xf7, 0xd5, 0x04, 0xa7, 0x3b, 0xd9,
+ 0xe9, 0xad, 0x8a, 0x7f, 0x3d, 0x77, 0xfe, 0x0c, 0x00, 0x00, 0xff, 0xff, 0xb7, 0xb1, 0x58, 0x7c,
+ 0x58, 0x12, 0x00, 0x00,
}
// Reference imports to suppress errors if they are not otherwise used.
@@ -1577,6 +1641,41 @@
DescribePipelineOptions(context.Context, *DescribePipelineOptionsRequest) (*DescribePipelineOptionsResponse, error)
}
+// UnimplementedJobServiceServer can be embedded to have forward compatible implementations.
+type UnimplementedJobServiceServer struct {
+}
+
+func (*UnimplementedJobServiceServer) Prepare(ctx context.Context, req *PrepareJobRequest) (*PrepareJobResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Prepare not implemented")
+}
+func (*UnimplementedJobServiceServer) Run(ctx context.Context, req *RunJobRequest) (*RunJobResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Run not implemented")
+}
+func (*UnimplementedJobServiceServer) GetJobs(ctx context.Context, req *GetJobsRequest) (*GetJobsResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method GetJobs not implemented")
+}
+func (*UnimplementedJobServiceServer) GetState(ctx context.Context, req *GetJobStateRequest) (*JobStateEvent, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method GetState not implemented")
+}
+func (*UnimplementedJobServiceServer) GetPipeline(ctx context.Context, req *GetJobPipelineRequest) (*GetJobPipelineResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method GetPipeline not implemented")
+}
+func (*UnimplementedJobServiceServer) Cancel(ctx context.Context, req *CancelJobRequest) (*CancelJobResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method Cancel not implemented")
+}
+func (*UnimplementedJobServiceServer) GetStateStream(req *GetJobStateRequest, srv JobService_GetStateStreamServer) error {
+ return status.Errorf(codes.Unimplemented, "method GetStateStream not implemented")
+}
+func (*UnimplementedJobServiceServer) GetMessageStream(req *JobMessagesRequest, srv JobService_GetMessageStreamServer) error {
+ return status.Errorf(codes.Unimplemented, "method GetMessageStream not implemented")
+}
+func (*UnimplementedJobServiceServer) GetJobMetrics(ctx context.Context, req *GetJobMetricsRequest) (*GetJobMetricsResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method GetJobMetrics not implemented")
+}
+func (*UnimplementedJobServiceServer) DescribePipelineOptions(ctx context.Context, req *DescribePipelineOptionsRequest) (*DescribePipelineOptionsResponse, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method DescribePipelineOptions not implemented")
+}
+
func RegisterJobServiceServer(s *grpc.Server, srv JobServiceServer) {
s.RegisterService(&_JobService_serviceDesc, srv)
}
@@ -1818,100 +1917,3 @@
},
Metadata: "beam_job_api.proto",
}
-
-func init() { proto.RegisterFile("beam_job_api.proto", fileDescriptor_beam_job_api_c1d5a4d112319449) }
-
-var fileDescriptor_beam_job_api_c1d5a4d112319449 = []byte{
- // 1444 bytes of a gzipped FileDescriptorProto
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xc4, 0x57, 0x5f, 0x6f, 0x1b, 0xc5,
- 0x16, 0xef, 0x3a, 0x76, 0x6c, 0x1f, 0xd7, 0xce, 0x66, 0xda, 0xdc, 0xa4, 0xd6, 0xbd, 0x6d, 0xee,
- 0x5e, 0x5d, 0x5a, 0x54, 0xb1, 0x6d, 0x52, 0x89, 0x96, 0x16, 0x0a, 0xeb, 0x78, 0xeb, 0xda, 0x24,
- 0xb6, 0x35, 0xde, 0x80, 0x00, 0x09, 0xb3, 0xb6, 0x27, 0x66, 0x8b, 0x77, 0x67, 0xd9, 0x1d, 0x5b,
- 0xad, 0x84, 0x40, 0xe2, 0x85, 0x37, 0x78, 0xe4, 0x3b, 0x20, 0x24, 0xc4, 0x13, 0x0f, 0x7c, 0x06,
- 0x3e, 0x04, 0x12, 0x9f, 0x81, 0x17, 0x5e, 0xd0, 0xcc, 0xce, 0x3a, 0xde, 0xa4, 0x21, 0xb6, 0xa9,
- 0xc4, 0x93, 0x77, 0xce, 0x9f, 0xdf, 0xf9, 0x3b, 0x67, 0x8e, 0x01, 0xf5, 0x88, 0xed, 0x76, 0x9f,
- 0xd0, 0x5e, 0xd7, 0xf6, 0x1d, 0xdd, 0x0f, 0x28, 0xa3, 0xe8, 0x3a, 0x0d, 0x86, 0xba, 0xed, 0xdb,
- 0xfd, 0x8f, 0x89, 0xce, 0xd9, 0xba, 0x4b, 0x07, 0x64, 0xa4, 0x73, 0x21, 0xd7, 0xf6, 0xec, 0x21,
- 0x71, 0x89, 0xc7, 0xf4, 0xc9, 0x4e, 0x79, 0x43, 0x28, 0x07, 0x63, 0xcf, 0x23, 0xc1, 0xb1, 0x7e,
- 0x79, 0x8d, 0x78, 0x03, 0x9f, 0x3a, 0x1e, 0x0b, 0x25, 0xe1, 0xdf, 0x43, 0x4a, 0x87, 0x23, 0x72,
- 0x4b, 0x9c, 0x7a, 0xe3, 0xa3, 0x5b, 0x21, 0x0b, 0xc6, 0x7d, 0x26, 0xb9, 0xd7, 0x4e, 0x72, 0x99,
- 0xe3, 0x92, 0x90, 0xd9, 0xae, 0x2f, 0x05, 0x8a, 0x2e, 0x61, 0x81, 0xd3, 0x97, 0x68, 0xda, 0xcf,
- 0x0a, 0xac, 0xb7, 0x03, 0xe2, 0xdb, 0x01, 0x69, 0xd0, 0x1e, 0x26, 0x9f, 0x8e, 0x49, 0xc8, 0x50,
- 0x0d, 0x72, 0xbe, 0xe3, 0x93, 0x91, 0xe3, 0x91, 0x2d, 0x65, 0x5b, 0xb9, 0x51, 0xd8, 0xbd, 0xa9,
- 0x3f, 0x3f, 0x8e, 0x58, 0x4c, 0x9f, 0xec, 0xe8, 0x6d, 0xf9, 0x8d, 0xa7, 0xca, 0xa8, 0x02, 0x6a,
- 0xfc, 0xdd, 0xa5, 0x3e, 0x73, 0xa8, 0x17, 0x6e, 0xa5, 0x04, 0xe0, 0xa6, 0x1e, 0x79, 0xaa, 0xc7,
- 0x9e, 0xea, 0x1d, 0x11, 0x07, 0x5e, 0x8b, 0x15, 0x5a, 0x91, 0x3c, 0xba, 0x02, 0x39, 0x9e, 0x2d,
- 0xcf, 0x76, 0xc9, 0xd6, 0xca, 0xb6, 0x72, 0x23, 0x8f, 0xb3, 0x4f, 0x68, 0xaf, 0x69, 0xbb, 0x44,
- 0xfb, 0x4d, 0x01, 0x34, 0xeb, 0x7d, 0xe8, 0x53, 0x2f, 0x24, 0xe8, 0xff, 0x50, 0xf2, 0x05, 0xd5,
- 0xe6, 0x08, 0x5d, 0x67, 0x20, 0x82, 0xc8, 0xe3, 0xe2, 0x0c, 0xb5, 0x3e, 0x40, 0x21, 0x5c, 0xb1,
- 0x03, 0xe6, 0x1c, 0xd9, 0x7d, 0xd6, 0x0d, 0x99, 0x3d, 0x74, 0xbc, 0x61, 0x37, 0xce, 0xb6, 0xf4,
- 0xf2, 0xee, 0x1c, 0x61, 0x1b, 0xbe, 0xd3, 0x21, 0xc1, 0xc4, 0xe9, 0x93, 0x2a, 0x09, 0xfb, 0x81,
- 0xe3, 0x33, 0x1a, 0xe0, 0xcd, 0x18, 0xb9, 0x13, 0x01, 0x9b, 0x12, 0x17, 0xed, 0xc2, 0x46, 0x6c,
- 0x2b, 0x24, 0x61, 0xc8, 0xfd, 0x63, 0xf4, 0x13, 0xe2, 0xc9, 0xd0, 0x2e, 0x49, 0x66, 0x27, 0xe2,
- 0x59, 0x9c, 0xa5, 0x75, 0xa1, 0x88, 0xc7, 0xde, 0x4c, 0x7d, 0xe6, 0x0c, 0xf0, 0x3a, 0xac, 0x05,
- 0xbc, 0xda, 0x64, 0x62, 0x8f, 0xa4, 0x95, 0x94, 0x90, 0x2b, 0x4d, 0xc9, 0x91, 0x81, 0xeb, 0x50,
- 0x8a, 0x0d, 0xc8, 0x14, 0x6e, 0xc0, 0x2a, 0x4f, 0xfa, 0x14, 0x39, 0xf3, 0x84, 0xf6, 0xea, 0x03,
- 0xed, 0x65, 0x50, 0xf7, 0x6c, 0xaf, 0x4f, 0x46, 0x33, 0xce, 0x9c, 0x21, 0x6a, 0xc3, 0xfa, 0x8c,
- 0xa8, 0x84, 0xdd, 0x87, 0x4c, 0xc8, 0x6c, 0x16, 0x75, 0x55, 0x69, 0xf7, 0x55, 0x7d, 0xce, 0xdb,
- 0xa1, 0x37, 0x68, 0xaf, 0xc3, 0x15, 0x75, 0xd3, 0x1b, 0xbb, 0x38, 0x02, 0xd1, 0x7e, 0x51, 0x20,
- 0xdb, 0xa0, 0xbd, 0xba, 0x77, 0x44, 0xcf, 0xf0, 0x22, 0xd1, 0x3c, 0xa9, 0x44, 0xf3, 0x3c, 0xb7,
- 0x37, 0x57, 0x16, 0xec, 0xcd, 0x69, 0x3c, 0xe9, 0x17, 0x11, 0x8f, 0x0a, 0xa5, 0x1a, 0x61, 0x0d,
- 0xda, 0x0b, 0x65, 0x6e, 0xb5, 0x0f, 0x61, 0x6d, 0x4a, 0x91, 0x29, 0x7c, 0x3b, 0x8a, 0xc8, 0xf1,
- 0x8e, 0xe8, 0x96, 0xb2, 0xbd, 0x72, 0xa3, 0xb0, 0x7b, 0x7b, 0x11, 0xab, 0x3c, 0x59, 0x22, 0x07,
- 0xfc, 0x43, 0xbb, 0x09, 0x28, 0xc2, 0x17, 0xce, 0x9c, 0x53, 0xd1, 0x6f, 0x15, 0x28, 0xc6, 0xa2,
- 0xe6, 0x84, 0x78, 0xec, 0xc5, 0x96, 0x13, 0xdd, 0x83, 0xfc, 0x74, 0x5a, 0xc9, 0xfb, 0x57, 0x3e,
- 0x55, 0x09, 0x2b, 0x96, 0xc0, 0xc7, 0xc2, 0x9a, 0x0e, 0x1b, 0x51, 0x18, 0xd3, 0x11, 0x74, 0x5e,
- 0x6f, 0xfe, 0xeb, 0xa4, 0xbc, 0xcc, 0xee, 0x8b, 0x9a, 0x7c, 0x3c, 0xb3, 0x0d, 0xda, 0x3b, 0x20,
- 0x61, 0x68, 0x0f, 0x49, 0x78, 0x8e, 0x3f, 0x7f, 0xa4, 0x00, 0x8e, 0xa5, 0xd1, 0x7f, 0x00, 0xdc,
- 0xe8, 0xf3, 0x58, 0x32, 0x2f, 0x29, 0xf5, 0x01, 0x42, 0x90, 0xe6, 0xa1, 0xcb, 0x7e, 0x16, 0xdf,
- 0x88, 0x00, 0x38, 0xae, 0x4f, 0x03, 0xc6, 0xaf, 0x9c, 0x68, 0xe3, 0xd2, 0xae, 0xb9, 0x48, 0x39,
- 0xa4, 0x6d, 0x5d, 0xfe, 0xd6, 0xa7, 0x60, 0x78, 0x06, 0x18, 0xfd, 0x17, 0x2e, 0xc6, 0x9e, 0x31,
- 0xf2, 0x94, 0x89, 0xb6, 0xcf, 0xe3, 0x82, 0xa4, 0x59, 0xe4, 0x29, 0xd3, 0x7e, 0x50, 0x60, 0xfd,
- 0x14, 0x08, 0xd2, 0xe0, 0xea, 0x81, 0xd9, 0xe9, 0x18, 0x35, 0xb3, 0x5b, 0x3f, 0x68, 0xb7, 0xb0,
- 0x65, 0x34, 0xf7, 0xcc, 0xee, 0x61, 0xb3, 0xd3, 0x36, 0xf7, 0xea, 0x8f, 0xea, 0x66, 0x55, 0xbd,
- 0x80, 0x36, 0x60, 0xbd, 0xd1, 0xaa, 0x74, 0x63, 0xb9, 0xaa, 0x59, 0x39, 0xac, 0xa9, 0x0a, 0xda,
- 0x82, 0xcb, 0x49, 0xb2, 0x65, 0xd4, 0xf7, 0xcd, 0xaa, 0x9a, 0x3a, 0xa9, 0x50, 0x31, 0x3a, 0xf5,
- 0x3d, 0x75, 0x05, 0x6d, 0xc2, 0xa5, 0x59, 0xf2, 0xbb, 0x06, 0x6e, 0xd6, 0x9b, 0x35, 0x35, 0x7d,
- 0x52, 0xde, 0xc4, 0xb8, 0x85, 0xd5, 0x0c, 0x7f, 0x45, 0x2e, 0x25, 0x6a, 0x25, 0x7b, 0xe1, 0x23,
- 0x50, 0xe3, 0x60, 0x03, 0x49, 0x93, 0x3d, 0x71, 0x67, 0x89, 0xcc, 0x3e, 0xbe, 0x80, 0xd7, 0x24,
- 0xdc, 0xd4, 0x42, 0x17, 0x4a, 0xa2, 0xf5, 0x8f, 0xf1, 0xa3, 0xb6, 0x5f, 0xfc, 0x22, 0x89, 0xfb,
- 0xf8, 0xf8, 0x02, 0x2e, 0x86, 0xd1, 0x45, 0x8e, 0xe0, 0x2a, 0x00, 0xb9, 0x18, 0x5a, 0xfb, 0x49,
- 0x81, 0x5c, 0x2c, 0xae, 0x7d, 0xa7, 0x40, 0x9a, 0xdf, 0x3d, 0xb4, 0x06, 0x85, 0x64, 0x15, 0x0a,
- 0x90, 0xed, 0x58, 0xad, 0x76, 0xdb, 0xac, 0xaa, 0x0a, 0x3f, 0xe0, 0xc3, 0xa6, 0x48, 0x5f, 0x0a,
- 0xe5, 0x20, 0x5d, 0x6d, 0x35, 0x4d, 0x75, 0x05, 0x01, 0xac, 0x3e, 0x8a, 0x8a, 0x90, 0x46, 0x45,
- 0xc8, 0xef, 0xf1, 0x62, 0xee, 0xf3, 0x63, 0x86, 0x6b, 0x1c, 0xb6, 0xab, 0x86, 0x65, 0x56, 0xd5,
- 0x55, 0x74, 0x11, 0x72, 0x55, 0x6c, 0xd4, 0x85, 0x7e, 0x96, 0xb3, 0xc4, 0xc9, 0xac, 0xaa, 0x39,
- 0xce, 0xea, 0x58, 0x06, 0xb6, 0x38, 0x2b, 0x8f, 0x4a, 0x00, 0x12, 0x84, 0x9f, 0x81, 0x73, 0x05,
- 0x0a, 0x3f, 0x15, 0xb4, 0x57, 0xe0, 0x72, 0x74, 0x5d, 0x0f, 0xa2, 0xdd, 0xe5, 0x9c, 0xdb, 0xe4,
- 0xc4, 0xd3, 0x60, 0x2a, 0x2e, 0xd3, 0xdd, 0x86, 0xac, 0xdc, 0x7e, 0x64, 0x1d, 0xe7, 0xcf, 0x73,
- 0x04, 0x85, 0x49, 0x38, 0x1e, 0xb1, 0x10, 0xc7, 0x30, 0xda, 0x8f, 0x0a, 0x14, 0x13, 0x2c, 0xd4,
- 0x82, 0xbc, 0xcd, 0x18, 0x71, 0x7d, 0x46, 0x06, 0x72, 0x3e, 0xef, 0xcc, 0x31, 0x41, 0x0e, 0xa8,
- 0xe7, 0x30, 0x1a, 0x38, 0xde, 0x50, 0x0c, 0xe8, 0x63, 0x0c, 0x0e, 0xd8, 0xa7, 0xae, 0xeb, 0x30,
- 0x0e, 0x98, 0x5a, 0x1a, 0x70, 0x8a, 0xa1, 0x6d, 0xc3, 0xd5, 0x68, 0x51, 0xe9, 0x91, 0x76, 0xf2,
- 0x39, 0x8b, 0x5f, 0x1d, 0x02, 0x28, 0xc9, 0xb1, 0x9e, 0xf9, 0x44, 0x6b, 0xc9, 0x8e, 0x01, 0x58,
- 0xed, 0x58, 0x98, 0x57, 0x46, 0x34, 0x4b, 0xa5, 0xd5, 0xda, 0x37, 0x8d, 0x66, 0xd4, 0x2c, 0xf5,
- 0xa6, 0x65, 0xd6, 0x4c, 0xac, 0xa6, 0xb8, 0x54, 0xf3, 0xf0, 0xa0, 0x62, 0x62, 0x75, 0x05, 0xe5,
- 0x21, 0x63, 0x60, 0x6c, 0xbc, 0xa7, 0xa6, 0x39, 0xb9, 0x55, 0x69, 0x98, 0x7b, 0x96, 0x9a, 0xd1,
- 0x7e, 0x55, 0x60, 0x2b, 0x69, 0xe7, 0x78, 0x81, 0xe2, 0x43, 0x4e, 0x3c, 0xda, 0x51, 0x65, 0xc5,
- 0x37, 0xb2, 0x20, 0xcd, 0x9e, 0xf9, 0xd1, 0x25, 0x29, 0xed, 0xbe, 0x35, 0x77, 0xf1, 0x4e, 0x07,
- 0x13, 0xbd, 0x3b, 0x02, 0x0d, 0x6d, 0x43, 0x61, 0x20, 0xed, 0x3a, 0x34, 0xde, 0xc3, 0x66, 0x49,
- 0xe8, 0x7f, 0x50, 0x1c, 0x90, 0x23, 0x7b, 0x3c, 0x62, 0xdd, 0x89, 0x3d, 0x1a, 0x13, 0x39, 0xf6,
- 0x2e, 0x4a, 0xe2, 0x3b, 0x9c, 0x86, 0x2e, 0x43, 0x66, 0x18, 0xd0, 0xb1, 0xbf, 0x95, 0x89, 0x7a,
- 0x51, 0x1c, 0xb4, 0xcf, 0xe1, 0xda, 0x99, 0xc9, 0x96, 0x5d, 0xf9, 0x01, 0x64, 0xe3, 0xf5, 0x23,
- 0xea, 0x17, 0x63, 0xc9, 0xc0, 0x66, 0xd6, 0xcf, 0x18, 0x71, 0xf7, 0x77, 0x10, 0x2f, 0x8b, 0x5c,
- 0x50, 0xd1, 0x97, 0x0a, 0x64, 0xe5, 0xc2, 0x8c, 0xee, 0xcf, 0x6f, 0xe6, 0xe4, 0x1f, 0x84, 0xf2,
- 0x83, 0xa5, 0x74, 0x65, 0xc0, 0x13, 0x58, 0xc1, 0x63, 0x0f, 0xcd, 0x7f, 0xf9, 0x12, 0xcb, 0x6f,
- 0xf9, 0xee, 0xc2, 0x7a, 0xd2, 0xee, 0x67, 0x90, 0x95, 0xcb, 0x14, 0x9a, 0x1f, 0x23, 0xb9, 0x90,
- 0x95, 0xef, 0x2d, 0xae, 0x28, 0xad, 0x7f, 0x01, 0xb9, 0x1a, 0x61, 0x62, 0xfa, 0xa2, 0x07, 0x0b,
- 0xa2, 0xcc, 0x6e, 0x67, 0xe5, 0x25, 0x1f, 0x07, 0xf4, 0xb5, 0x02, 0x85, 0x1a, 0x61, 0x71, 0xcf,
- 0xa0, 0x87, 0x0b, 0x3a, 0x71, 0x62, 0xb7, 0x2a, 0xbf, 0xb9, 0xb4, 0xfe, 0x34, 0x23, 0xab, 0xd1,
- 0x3f, 0x04, 0xf4, 0xda, 0xdc, 0x50, 0x27, 0xff, 0x7d, 0x94, 0xef, 0x2f, 0xa3, 0x2a, 0x1d, 0xf8,
- 0x4a, 0x11, 0x0b, 0xb7, 0xc8, 0x51, 0x87, 0x05, 0xc4, 0x76, 0xff, 0x91, 0xca, 0xdc, 0x56, 0xd0,
- 0x37, 0x0a, 0xa8, 0x35, 0xc2, 0xe4, 0xaa, 0xb0, 0xb0, 0x2f, 0xa7, 0x37, 0xcd, 0xf2, 0xeb, 0xcb,
- 0x29, 0x47, 0x99, 0xb9, 0xad, 0xf0, 0x6e, 0x29, 0x26, 0x5e, 0x51, 0xf4, 0xc6, 0x82, 0xa9, 0x49,
- 0x3e, 0xd6, 0xe5, 0x87, 0xcb, 0xaa, 0xcb, 0x62, 0x7d, 0xaf, 0xc0, 0xe6, 0x19, 0xa3, 0x14, 0xd5,
- 0xe6, 0xc6, 0xfe, 0xeb, 0x97, 0xaf, 0xfc, 0xf8, 0xef, 0x03, 0xc9, 0xcd, 0xab, 0x02, 0x2f, 0x9d,
- 0x09, 0x95, 0x40, 0xaa, 0xac, 0x36, 0x68, 0xcf, 0xf0, 0x9d, 0xf7, 0xd5, 0x04, 0xa7, 0x3b, 0xd9,
- 0xe9, 0xad, 0x8a, 0x7f, 0x3d, 0x77, 0xfe, 0x0c, 0x00, 0x00, 0xff, 0xff, 0xb7, 0xb1, 0x58, 0x7c,
- 0x58, 0x12, 0x00, 0x00,
-}
diff --git a/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go
index 39184e8..7959fba 100644
--- a/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go
+++ b/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go
@@ -3,16 +3,17 @@
package pipeline_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-import descriptor "github.com/golang/protobuf/protoc-gen-go/descriptor"
-import any "github.com/golang/protobuf/ptypes/any"
-import _ "github.com/golang/protobuf/ptypes/timestamp"
-
import (
- context "golang.org/x/net/context"
+ context "context"
+ fmt "fmt"
+ proto "github.com/golang/protobuf/proto"
+ descriptor "github.com/golang/protobuf/protoc-gen-go/descriptor"
+ any "github.com/golang/protobuf/ptypes/any"
+ _ "github.com/golang/protobuf/ptypes/timestamp"
grpc "google.golang.org/grpc"
+ codes "google.golang.org/grpc/codes"
+ status "google.golang.org/grpc/status"
+ math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
@@ -24,7 +25,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type BeamConstants_Constants int32
@@ -47,6 +48,7 @@
1: "MAX_TIMESTAMP_MILLIS",
2: "GLOBAL_WINDOW_MAX_TIMESTAMP_MILLIS",
}
+
var BeamConstants_Constants_value = map[string]int32{
"MIN_TIMESTAMP_MILLIS": 0,
"MAX_TIMESTAMP_MILLIS": 1,
@@ -56,8 +58,9 @@
func (x BeamConstants_Constants) String() string {
return proto.EnumName(BeamConstants_Constants_name, int32(x))
}
+
func (BeamConstants_Constants) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{0, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{0, 0}
}
type StandardPTransforms_Primitives int32
@@ -121,6 +124,7 @@
6: "MAP_WINDOWS",
7: "MERGE_WINDOWS",
}
+
var StandardPTransforms_Primitives_value = map[string]int32{
"PAR_DO": 0,
"FLATTEN": 1,
@@ -135,8 +139,9 @@
func (x StandardPTransforms_Primitives) String() string {
return proto.EnumName(StandardPTransforms_Primitives_name, int32(x))
}
+
func (StandardPTransforms_Primitives) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{4, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{4, 0}
}
type StandardPTransforms_DeprecatedPrimitives int32
@@ -154,6 +159,7 @@
0: "READ",
1: "CREATE_VIEW",
}
+
var StandardPTransforms_DeprecatedPrimitives_value = map[string]int32{
"READ": 0,
"CREATE_VIEW": 1,
@@ -162,8 +168,9 @@
func (x StandardPTransforms_DeprecatedPrimitives) String() string {
return proto.EnumName(StandardPTransforms_DeprecatedPrimitives_name, int32(x))
}
+
func (StandardPTransforms_DeprecatedPrimitives) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{4, 1}
+ return fileDescriptor_cf57597c3a9659a9, []int{4, 1}
}
type StandardPTransforms_Composites int32
@@ -191,6 +198,7 @@
2: "RESHUFFLE",
3: "WRITE_FILES",
}
+
var StandardPTransforms_Composites_value = map[string]int32{
"COMBINE_PER_KEY": 0,
"COMBINE_GLOBALLY": 1,
@@ -201,8 +209,9 @@
func (x StandardPTransforms_Composites) String() string {
return proto.EnumName(StandardPTransforms_Composites_name, int32(x))
}
+
func (StandardPTransforms_Composites) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{4, 2}
+ return fileDescriptor_cf57597c3a9659a9, []int{4, 2}
}
// Payload for all of these: CombinePayload
@@ -237,6 +246,7 @@
2: "COMBINE_PER_KEY_EXTRACT_OUTPUTS",
3: "COMBINE_GROUPED_VALUES",
}
+
var StandardPTransforms_CombineComponents_value = map[string]int32{
"COMBINE_PER_KEY_PRECOMBINE": 0,
"COMBINE_PER_KEY_MERGE_ACCUMULATORS": 1,
@@ -247,8 +257,9 @@
func (x StandardPTransforms_CombineComponents) String() string {
return proto.EnumName(StandardPTransforms_CombineComponents_name, int32(x))
}
+
func (StandardPTransforms_CombineComponents) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{4, 3}
+ return fileDescriptor_cf57597c3a9659a9, []int{4, 3}
}
// Payload for all of these: ParDoPayload containing the user's SDF
@@ -295,6 +306,7 @@
4: "SPLIT_AND_SIZE_RESTRICTIONS",
5: "PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS",
}
+
var StandardPTransforms_SplittableParDoComponents_value = map[string]int32{
"PAIR_WITH_RESTRICTION": 0,
"SPLIT_RESTRICTION": 1,
@@ -307,8 +319,9 @@
func (x StandardPTransforms_SplittableParDoComponents) String() string {
return proto.EnumName(StandardPTransforms_SplittableParDoComponents_name, int32(x))
}
+
func (StandardPTransforms_SplittableParDoComponents) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{4, 4}
+ return fileDescriptor_cf57597c3a9659a9, []int{4, 4}
}
type StandardSideInputTypes_Enum int32
@@ -330,6 +343,7 @@
0: "ITERABLE",
1: "MULTIMAP",
}
+
var StandardSideInputTypes_Enum_value = map[string]int32{
"ITERABLE": 0,
"MULTIMAP": 1,
@@ -338,8 +352,9 @@
func (x StandardSideInputTypes_Enum) String() string {
return proto.EnumName(StandardSideInputTypes_Enum_name, int32(x))
}
+
func (StandardSideInputTypes_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{5, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{5, 0}
}
type Parameter_Type_Enum int32
@@ -357,6 +372,7 @@
2: "PIPELINE_OPTIONS",
3: "RESTRICTION_TRACKER",
}
+
var Parameter_Type_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"WINDOW": 1,
@@ -367,8 +383,9 @@
func (x Parameter_Type_Enum) String() string {
return proto.EnumName(Parameter_Type_Enum_name, int32(x))
}
+
func (Parameter_Type_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{8, 0, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{8, 0, 0}
}
type IsBounded_Enum int32
@@ -384,6 +401,7 @@
1: "UNBOUNDED",
2: "BOUNDED",
}
+
var IsBounded_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"UNBOUNDED": 1,
@@ -393,8 +411,9 @@
func (x IsBounded_Enum) String() string {
return proto.EnumName(IsBounded_Enum_name, int32(x))
}
+
func (IsBounded_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{16, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{16, 0}
}
type StandardCoders_Enum int32
@@ -462,6 +481,15 @@
// of the element
// Components: The element coder and the window coder, in that order
StandardCoders_WINDOWED_VALUE StandardCoders_Enum = 8
+ // A windowed value coder with parameterized timestamp, windows and pane info.
+ // Encodes an element with only the value of the windowed value.
+ // Decodes the value and assigns the parameterized timestamp, windows and pane info to the
+ // windowed value.
+ // Components: The element coder and the window coder, in that order
+ // The payload of this coder is an encoded windowed value using the
+ // beam:coder:windowed_value:v1 coder parameterized by a beam:coder:bytes:v1
+ // element coder and the window coder that this param_windowed_value coder uses.
+ StandardCoders_PARAM_WINDOWED_VALUE StandardCoders_Enum = 14
// Encodes an iterable of elements, some of which may be stored elsewhere.
//
// The encoding for a state-backed iterable is the same as that for
@@ -530,9 +558,11 @@
6: "LENGTH_PREFIX",
7: "GLOBAL_WINDOW",
8: "WINDOWED_VALUE",
+ 14: "PARAM_WINDOWED_VALUE",
9: "STATE_BACKED_ITERABLE",
13: "ROW",
}
+
var StandardCoders_Enum_value = map[string]int32{
"BYTES": 0,
"STRING_UTF8": 10,
@@ -546,15 +576,17 @@
"LENGTH_PREFIX": 6,
"GLOBAL_WINDOW": 7,
"WINDOWED_VALUE": 8,
+ "PARAM_WINDOWED_VALUE": 14,
"STATE_BACKED_ITERABLE": 9,
- "ROW": 13,
+ "ROW": 13,
}
func (x StandardCoders_Enum) String() string {
return proto.EnumName(StandardCoders_Enum_name, int32(x))
}
+
func (StandardCoders_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{24, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{24, 0}
}
type MergeStatus_Enum int32
@@ -580,6 +612,7 @@
2: "NEEDS_MERGE",
3: "ALREADY_MERGED",
}
+
var MergeStatus_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"NON_MERGING": 1,
@@ -590,8 +623,9 @@
func (x MergeStatus_Enum) String() string {
return proto.EnumName(MergeStatus_Enum_name, int32(x))
}
+
func (MergeStatus_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{26, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{26, 0}
}
type AccumulationMode_Enum int32
@@ -612,6 +646,7 @@
2: "ACCUMULATING",
3: "RETRACTING",
}
+
var AccumulationMode_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"DISCARDING": 1,
@@ -622,8 +657,9 @@
func (x AccumulationMode_Enum) String() string {
return proto.EnumName(AccumulationMode_Enum_name, int32(x))
}
+
func (AccumulationMode_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{27, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{27, 0}
}
type ClosingBehavior_Enum int32
@@ -642,6 +678,7 @@
1: "EMIT_ALWAYS",
2: "EMIT_IF_NONEMPTY",
}
+
var ClosingBehavior_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"EMIT_ALWAYS": 1,
@@ -651,8 +688,9 @@
func (x ClosingBehavior_Enum) String() string {
return proto.EnumName(ClosingBehavior_Enum_name, int32(x))
}
+
func (ClosingBehavior_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{28, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{28, 0}
}
type OnTimeBehavior_Enum int32
@@ -671,6 +709,7 @@
1: "FIRE_ALWAYS",
2: "FIRE_IF_NONEMPTY",
}
+
var OnTimeBehavior_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"FIRE_ALWAYS": 1,
@@ -680,8 +719,9 @@
func (x OnTimeBehavior_Enum) String() string {
return proto.EnumName(OnTimeBehavior_Enum_name, int32(x))
}
+
func (OnTimeBehavior_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{29, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{29, 0}
}
type OutputTime_Enum int32
@@ -704,6 +744,7 @@
2: "LATEST_IN_PANE",
3: "EARLIEST_IN_PANE",
}
+
var OutputTime_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"END_OF_WINDOW": 1,
@@ -714,8 +755,9 @@
func (x OutputTime_Enum) String() string {
return proto.EnumName(OutputTime_Enum_name, int32(x))
}
+
func (OutputTime_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{30, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{30, 0}
}
type TimeDomain_Enum int32
@@ -741,6 +783,7 @@
2: "PROCESSING_TIME",
3: "SYNCHRONIZED_PROCESSING_TIME",
}
+
var TimeDomain_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"EVENT_TIME": 1,
@@ -751,8 +794,9 @@
func (x TimeDomain_Enum) String() string {
return proto.EnumName(TimeDomain_Enum_name, int32(x))
}
+
func (TimeDomain_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{31, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{31, 0}
}
type StandardEnvironments_Environments int32
@@ -768,6 +812,7 @@
1: "PROCESS",
2: "EXTERNAL",
}
+
var StandardEnvironments_Environments_value = map[string]int32{
"DOCKER": 0,
"PROCESS": 1,
@@ -777,8 +822,9 @@
func (x StandardEnvironments_Environments) String() string {
return proto.EnumName(StandardEnvironments_Environments_name, int32(x))
}
+
func (StandardEnvironments_Environments) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{36, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{37, 0}
}
type DisplayData_Type_Enum int32
@@ -804,6 +850,7 @@
6: "DURATION",
7: "JAVA_CLASS",
}
+
var DisplayData_Type_Enum_value = map[string]int32{
"UNSPECIFIED": 0,
"STRING": 1,
@@ -818,8 +865,9 @@
func (x DisplayData_Type_Enum) String() string {
return proto.EnumName(DisplayData_Type_Enum_name, int32(x))
}
+
func (DisplayData_Type_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{42, 2, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{42, 2, 0}
}
type BeamConstants struct {
@@ -832,16 +880,17 @@
func (m *BeamConstants) String() string { return proto.CompactTextString(m) }
func (*BeamConstants) ProtoMessage() {}
func (*BeamConstants) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{0}
+ return fileDescriptor_cf57597c3a9659a9, []int{0}
}
+
func (m *BeamConstants) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_BeamConstants.Unmarshal(m, b)
}
func (m *BeamConstants) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_BeamConstants.Marshal(b, m, deterministic)
}
-func (dst *BeamConstants) XXX_Merge(src proto.Message) {
- xxx_messageInfo_BeamConstants.Merge(dst, src)
+func (m *BeamConstants) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_BeamConstants.Merge(m, src)
}
func (m *BeamConstants) XXX_Size() int {
return xxx_messageInfo_BeamConstants.Size(m)
@@ -874,16 +923,17 @@
func (m *Components) String() string { return proto.CompactTextString(m) }
func (*Components) ProtoMessage() {}
func (*Components) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{1}
+ return fileDescriptor_cf57597c3a9659a9, []int{1}
}
+
func (m *Components) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Components.Unmarshal(m, b)
}
func (m *Components) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Components.Marshal(b, m, deterministic)
}
-func (dst *Components) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Components.Merge(dst, src)
+func (m *Components) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Components.Merge(m, src)
}
func (m *Components) XXX_Size() int {
return xxx_messageInfo_Components.Size(m)
@@ -958,16 +1008,17 @@
func (m *Pipeline) String() string { return proto.CompactTextString(m) }
func (*Pipeline) ProtoMessage() {}
func (*Pipeline) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{2}
+ return fileDescriptor_cf57597c3a9659a9, []int{2}
}
+
func (m *Pipeline) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Pipeline.Unmarshal(m, b)
}
func (m *Pipeline) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Pipeline.Marshal(b, m, deterministic)
}
-func (dst *Pipeline) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Pipeline.Merge(dst, src)
+func (m *Pipeline) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Pipeline.Merge(m, src)
}
func (m *Pipeline) XXX_Size() int {
return xxx_messageInfo_Pipeline.Size(m)
@@ -1062,26 +1113,31 @@
// (Optional) Static display data for this PTransform application. If
// there is none, or it is not relevant (such as use by the Fn API)
// then it may be omitted.
- DisplayData *DisplayData `protobuf:"bytes,6,opt,name=display_data,json=displayData,proto3" json:"display_data,omitempty"`
- XXX_NoUnkeyedLiteral struct{} `json:"-"`
- XXX_unrecognized []byte `json:"-"`
- XXX_sizecache int32 `json:"-"`
+ DisplayData *DisplayData `protobuf:"bytes,6,opt,name=display_data,json=displayData,proto3" json:"display_data,omitempty"`
+ // (Optional) Environment where the current PTransform should be executed in.
+ // Runner that executes the pipeline may choose to override this if needed. If
+ // not specified, environment will be decided by the runner.
+ EnvironmentId string `protobuf:"bytes,7,opt,name=environment_id,json=environmentId,proto3" json:"environment_id,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
}
func (m *PTransform) Reset() { *m = PTransform{} }
func (m *PTransform) String() string { return proto.CompactTextString(m) }
func (*PTransform) ProtoMessage() {}
func (*PTransform) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{3}
+ return fileDescriptor_cf57597c3a9659a9, []int{3}
}
+
func (m *PTransform) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PTransform.Unmarshal(m, b)
}
func (m *PTransform) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PTransform.Marshal(b, m, deterministic)
}
-func (dst *PTransform) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PTransform.Merge(dst, src)
+func (m *PTransform) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PTransform.Merge(m, src)
}
func (m *PTransform) XXX_Size() int {
return xxx_messageInfo_PTransform.Size(m)
@@ -1134,6 +1190,13 @@
return nil
}
+func (m *PTransform) GetEnvironmentId() string {
+ if m != nil {
+ return m.EnvironmentId
+ }
+ return ""
+}
+
type StandardPTransforms struct {
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
@@ -1144,16 +1207,17 @@
func (m *StandardPTransforms) String() string { return proto.CompactTextString(m) }
func (*StandardPTransforms) ProtoMessage() {}
func (*StandardPTransforms) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{4}
+ return fileDescriptor_cf57597c3a9659a9, []int{4}
}
+
func (m *StandardPTransforms) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StandardPTransforms.Unmarshal(m, b)
}
func (m *StandardPTransforms) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StandardPTransforms.Marshal(b, m, deterministic)
}
-func (dst *StandardPTransforms) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StandardPTransforms.Merge(dst, src)
+func (m *StandardPTransforms) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StandardPTransforms.Merge(m, src)
}
func (m *StandardPTransforms) XXX_Size() int {
return xxx_messageInfo_StandardPTransforms.Size(m)
@@ -1174,16 +1238,17 @@
func (m *StandardSideInputTypes) String() string { return proto.CompactTextString(m) }
func (*StandardSideInputTypes) ProtoMessage() {}
func (*StandardSideInputTypes) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{5}
+ return fileDescriptor_cf57597c3a9659a9, []int{5}
}
+
func (m *StandardSideInputTypes) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StandardSideInputTypes.Unmarshal(m, b)
}
func (m *StandardSideInputTypes) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StandardSideInputTypes.Marshal(b, m, deterministic)
}
-func (dst *StandardSideInputTypes) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StandardSideInputTypes.Merge(dst, src)
+func (m *StandardSideInputTypes) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StandardSideInputTypes.Merge(m, src)
}
func (m *StandardSideInputTypes) XXX_Size() int {
return xxx_messageInfo_StandardSideInputTypes.Size(m)
@@ -1224,16 +1289,17 @@
func (m *PCollection) String() string { return proto.CompactTextString(m) }
func (*PCollection) ProtoMessage() {}
func (*PCollection) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{6}
+ return fileDescriptor_cf57597c3a9659a9, []int{6}
}
+
func (m *PCollection) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PCollection.Unmarshal(m, b)
}
func (m *PCollection) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PCollection.Marshal(b, m, deterministic)
}
-func (dst *PCollection) XXX_Merge(src proto.Message) {
- xxx_messageInfo_PCollection.Merge(dst, src)
+func (m *PCollection) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_PCollection.Merge(m, src)
}
func (m *PCollection) XXX_Size() int {
return xxx_messageInfo_PCollection.Size(m)
@@ -1281,8 +1347,8 @@
// The payload for the primitive ParDo transform.
type ParDoPayload struct {
- // (Required) The SdkFunctionSpec of the DoFn.
- DoFn *SdkFunctionSpec `protobuf:"bytes,1,opt,name=do_fn,json=doFn,proto3" json:"do_fn,omitempty"`
+ // (Required) The FunctionSpec of the DoFn.
+ DoFn *FunctionSpec `protobuf:"bytes,1,opt,name=do_fn,json=doFn,proto3" json:"do_fn,omitempty"`
// (Required) Additional pieces of context the DoFn may require that
// are not otherwise represented in the payload.
// (may force runners to execute the ParDo differently)
@@ -1309,16 +1375,17 @@
func (m *ParDoPayload) String() string { return proto.CompactTextString(m) }
func (*ParDoPayload) ProtoMessage() {}
func (*ParDoPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{7}
+ return fileDescriptor_cf57597c3a9659a9, []int{7}
}
+
func (m *ParDoPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ParDoPayload.Unmarshal(m, b)
}
func (m *ParDoPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ParDoPayload.Marshal(b, m, deterministic)
}
-func (dst *ParDoPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ParDoPayload.Merge(dst, src)
+func (m *ParDoPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ParDoPayload.Merge(m, src)
}
func (m *ParDoPayload) XXX_Size() int {
return xxx_messageInfo_ParDoPayload.Size(m)
@@ -1329,7 +1396,7 @@
var xxx_messageInfo_ParDoPayload proto.InternalMessageInfo
-func (m *ParDoPayload) GetDoFn() *SdkFunctionSpec {
+func (m *ParDoPayload) GetDoFn() *FunctionSpec {
if m != nil {
return m.DoFn
}
@@ -1410,16 +1477,17 @@
func (m *Parameter) String() string { return proto.CompactTextString(m) }
func (*Parameter) ProtoMessage() {}
func (*Parameter) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{8}
+ return fileDescriptor_cf57597c3a9659a9, []int{8}
}
+
func (m *Parameter) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Parameter.Unmarshal(m, b)
}
func (m *Parameter) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Parameter.Marshal(b, m, deterministic)
}
-func (dst *Parameter) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Parameter.Merge(dst, src)
+func (m *Parameter) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Parameter.Merge(m, src)
}
func (m *Parameter) XXX_Size() int {
return xxx_messageInfo_Parameter.Size(m)
@@ -1447,16 +1515,17 @@
func (m *Parameter_Type) String() string { return proto.CompactTextString(m) }
func (*Parameter_Type) ProtoMessage() {}
func (*Parameter_Type) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{8, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{8, 0}
}
+
func (m *Parameter_Type) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Parameter_Type.Unmarshal(m, b)
}
func (m *Parameter_Type) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Parameter_Type.Marshal(b, m, deterministic)
}
-func (dst *Parameter_Type) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Parameter_Type.Merge(dst, src)
+func (m *Parameter_Type) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Parameter_Type.Merge(m, src)
}
func (m *Parameter_Type) XXX_Size() int {
return xxx_messageInfo_Parameter_Type.Size(m)
@@ -1484,16 +1553,17 @@
func (m *StateSpec) String() string { return proto.CompactTextString(m) }
func (*StateSpec) ProtoMessage() {}
func (*StateSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{9}
+ return fileDescriptor_cf57597c3a9659a9, []int{9}
}
+
func (m *StateSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StateSpec.Unmarshal(m, b)
}
func (m *StateSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StateSpec.Marshal(b, m, deterministic)
}
-func (dst *StateSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StateSpec.Merge(dst, src)
+func (m *StateSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StateSpec.Merge(m, src)
}
func (m *StateSpec) XXX_Size() int {
return xxx_messageInfo_StateSpec.Size(m)
@@ -1511,24 +1581,32 @@
type StateSpec_ReadModifyWriteSpec struct {
ReadModifyWriteSpec *ReadModifyWriteStateSpec `protobuf:"bytes,1,opt,name=read_modify_write_spec,json=readModifyWriteSpec,proto3,oneof"`
}
+
type StateSpec_BagSpec struct {
BagSpec *BagStateSpec `protobuf:"bytes,2,opt,name=bag_spec,json=bagSpec,proto3,oneof"`
}
+
type StateSpec_CombiningSpec struct {
CombiningSpec *CombiningStateSpec `protobuf:"bytes,3,opt,name=combining_spec,json=combiningSpec,proto3,oneof"`
}
+
type StateSpec_MapSpec struct {
MapSpec *MapStateSpec `protobuf:"bytes,4,opt,name=map_spec,json=mapSpec,proto3,oneof"`
}
+
type StateSpec_SetSpec struct {
SetSpec *SetStateSpec `protobuf:"bytes,5,opt,name=set_spec,json=setSpec,proto3,oneof"`
}
func (*StateSpec_ReadModifyWriteSpec) isStateSpec_Spec() {}
-func (*StateSpec_BagSpec) isStateSpec_Spec() {}
-func (*StateSpec_CombiningSpec) isStateSpec_Spec() {}
-func (*StateSpec_MapSpec) isStateSpec_Spec() {}
-func (*StateSpec_SetSpec) isStateSpec_Spec() {}
+
+func (*StateSpec_BagSpec) isStateSpec_Spec() {}
+
+func (*StateSpec_CombiningSpec) isStateSpec_Spec() {}
+
+func (*StateSpec_MapSpec) isStateSpec_Spec() {}
+
+func (*StateSpec_SetSpec) isStateSpec_Spec() {}
func (m *StateSpec) GetSpec() isStateSpec_Spec {
if m != nil {
@@ -1572,9 +1650,9 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*StateSpec) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _StateSpec_OneofMarshaler, _StateSpec_OneofUnmarshaler, _StateSpec_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*StateSpec) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*StateSpec_ReadModifyWriteSpec)(nil),
(*StateSpec_BagSpec)(nil),
(*StateSpec_CombiningSpec)(nil),
@@ -1583,126 +1661,6 @@
}
}
-func _StateSpec_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*StateSpec)
- // spec
- switch x := m.Spec.(type) {
- case *StateSpec_ReadModifyWriteSpec:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ReadModifyWriteSpec); err != nil {
- return err
- }
- case *StateSpec_BagSpec:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.BagSpec); err != nil {
- return err
- }
- case *StateSpec_CombiningSpec:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.CombiningSpec); err != nil {
- return err
- }
- case *StateSpec_MapSpec:
- b.EncodeVarint(4<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.MapSpec); err != nil {
- return err
- }
- case *StateSpec_SetSpec:
- b.EncodeVarint(5<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.SetSpec); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("StateSpec.Spec has unexpected type %T", x)
- }
- return nil
-}
-
-func _StateSpec_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*StateSpec)
- switch tag {
- case 1: // spec.read_modify_write_spec
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ReadModifyWriteStateSpec)
- err := b.DecodeMessage(msg)
- m.Spec = &StateSpec_ReadModifyWriteSpec{msg}
- return true, err
- case 2: // spec.bag_spec
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(BagStateSpec)
- err := b.DecodeMessage(msg)
- m.Spec = &StateSpec_BagSpec{msg}
- return true, err
- case 3: // spec.combining_spec
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(CombiningStateSpec)
- err := b.DecodeMessage(msg)
- m.Spec = &StateSpec_CombiningSpec{msg}
- return true, err
- case 4: // spec.map_spec
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(MapStateSpec)
- err := b.DecodeMessage(msg)
- m.Spec = &StateSpec_MapSpec{msg}
- return true, err
- case 5: // spec.set_spec
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(SetStateSpec)
- err := b.DecodeMessage(msg)
- m.Spec = &StateSpec_SetSpec{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _StateSpec_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*StateSpec)
- // spec
- switch x := m.Spec.(type) {
- case *StateSpec_ReadModifyWriteSpec:
- s := proto.Size(x.ReadModifyWriteSpec)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateSpec_BagSpec:
- s := proto.Size(x.BagSpec)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateSpec_CombiningSpec:
- s := proto.Size(x.CombiningSpec)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateSpec_MapSpec:
- s := proto.Size(x.MapSpec)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *StateSpec_SetSpec:
- s := proto.Size(x.SetSpec)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type ReadModifyWriteStateSpec struct {
CoderId string `protobuf:"bytes,1,opt,name=coder_id,json=coderId,proto3" json:"coder_id,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
@@ -1714,16 +1672,17 @@
func (m *ReadModifyWriteStateSpec) String() string { return proto.CompactTextString(m) }
func (*ReadModifyWriteStateSpec) ProtoMessage() {}
func (*ReadModifyWriteStateSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{10}
+ return fileDescriptor_cf57597c3a9659a9, []int{10}
}
+
func (m *ReadModifyWriteStateSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ReadModifyWriteStateSpec.Unmarshal(m, b)
}
func (m *ReadModifyWriteStateSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ReadModifyWriteStateSpec.Marshal(b, m, deterministic)
}
-func (dst *ReadModifyWriteStateSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ReadModifyWriteStateSpec.Merge(dst, src)
+func (m *ReadModifyWriteStateSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ReadModifyWriteStateSpec.Merge(m, src)
}
func (m *ReadModifyWriteStateSpec) XXX_Size() int {
return xxx_messageInfo_ReadModifyWriteStateSpec.Size(m)
@@ -1752,16 +1711,17 @@
func (m *BagStateSpec) String() string { return proto.CompactTextString(m) }
func (*BagStateSpec) ProtoMessage() {}
func (*BagStateSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{11}
+ return fileDescriptor_cf57597c3a9659a9, []int{11}
}
+
func (m *BagStateSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_BagStateSpec.Unmarshal(m, b)
}
func (m *BagStateSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_BagStateSpec.Marshal(b, m, deterministic)
}
-func (dst *BagStateSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_BagStateSpec.Merge(dst, src)
+func (m *BagStateSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_BagStateSpec.Merge(m, src)
}
func (m *BagStateSpec) XXX_Size() int {
return xxx_messageInfo_BagStateSpec.Size(m)
@@ -1780,27 +1740,28 @@
}
type CombiningStateSpec struct {
- AccumulatorCoderId string `protobuf:"bytes,1,opt,name=accumulator_coder_id,json=accumulatorCoderId,proto3" json:"accumulator_coder_id,omitempty"`
- CombineFn *SdkFunctionSpec `protobuf:"bytes,2,opt,name=combine_fn,json=combineFn,proto3" json:"combine_fn,omitempty"`
- XXX_NoUnkeyedLiteral struct{} `json:"-"`
- XXX_unrecognized []byte `json:"-"`
- XXX_sizecache int32 `json:"-"`
+ AccumulatorCoderId string `protobuf:"bytes,1,opt,name=accumulator_coder_id,json=accumulatorCoderId,proto3" json:"accumulator_coder_id,omitempty"`
+ CombineFn *FunctionSpec `protobuf:"bytes,2,opt,name=combine_fn,json=combineFn,proto3" json:"combine_fn,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
}
func (m *CombiningStateSpec) Reset() { *m = CombiningStateSpec{} }
func (m *CombiningStateSpec) String() string { return proto.CompactTextString(m) }
func (*CombiningStateSpec) ProtoMessage() {}
func (*CombiningStateSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{12}
+ return fileDescriptor_cf57597c3a9659a9, []int{12}
}
+
func (m *CombiningStateSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CombiningStateSpec.Unmarshal(m, b)
}
func (m *CombiningStateSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CombiningStateSpec.Marshal(b, m, deterministic)
}
-func (dst *CombiningStateSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_CombiningStateSpec.Merge(dst, src)
+func (m *CombiningStateSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CombiningStateSpec.Merge(m, src)
}
func (m *CombiningStateSpec) XXX_Size() int {
return xxx_messageInfo_CombiningStateSpec.Size(m)
@@ -1818,7 +1779,7 @@
return ""
}
-func (m *CombiningStateSpec) GetCombineFn() *SdkFunctionSpec {
+func (m *CombiningStateSpec) GetCombineFn() *FunctionSpec {
if m != nil {
return m.CombineFn
}
@@ -1837,16 +1798,17 @@
func (m *MapStateSpec) String() string { return proto.CompactTextString(m) }
func (*MapStateSpec) ProtoMessage() {}
func (*MapStateSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{13}
+ return fileDescriptor_cf57597c3a9659a9, []int{13}
}
+
func (m *MapStateSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MapStateSpec.Unmarshal(m, b)
}
func (m *MapStateSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MapStateSpec.Marshal(b, m, deterministic)
}
-func (dst *MapStateSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MapStateSpec.Merge(dst, src)
+func (m *MapStateSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MapStateSpec.Merge(m, src)
}
func (m *MapStateSpec) XXX_Size() int {
return xxx_messageInfo_MapStateSpec.Size(m)
@@ -1882,16 +1844,17 @@
func (m *SetStateSpec) String() string { return proto.CompactTextString(m) }
func (*SetStateSpec) ProtoMessage() {}
func (*SetStateSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{14}
+ return fileDescriptor_cf57597c3a9659a9, []int{14}
}
+
func (m *SetStateSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_SetStateSpec.Unmarshal(m, b)
}
func (m *SetStateSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_SetStateSpec.Marshal(b, m, deterministic)
}
-func (dst *SetStateSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_SetStateSpec.Merge(dst, src)
+func (m *SetStateSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_SetStateSpec.Merge(m, src)
}
func (m *SetStateSpec) XXX_Size() int {
return xxx_messageInfo_SetStateSpec.Size(m)
@@ -1921,16 +1884,17 @@
func (m *TimerSpec) String() string { return proto.CompactTextString(m) }
func (*TimerSpec) ProtoMessage() {}
func (*TimerSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{15}
+ return fileDescriptor_cf57597c3a9659a9, []int{15}
}
+
func (m *TimerSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TimerSpec.Unmarshal(m, b)
}
func (m *TimerSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TimerSpec.Marshal(b, m, deterministic)
}
-func (dst *TimerSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TimerSpec.Merge(dst, src)
+func (m *TimerSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TimerSpec.Merge(m, src)
}
func (m *TimerSpec) XXX_Size() int {
return xxx_messageInfo_TimerSpec.Size(m)
@@ -1965,16 +1929,17 @@
func (m *IsBounded) String() string { return proto.CompactTextString(m) }
func (*IsBounded) ProtoMessage() {}
func (*IsBounded) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{16}
+ return fileDescriptor_cf57597c3a9659a9, []int{16}
}
+
func (m *IsBounded) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_IsBounded.Unmarshal(m, b)
}
func (m *IsBounded) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_IsBounded.Marshal(b, m, deterministic)
}
-func (dst *IsBounded) XXX_Merge(src proto.Message) {
- xxx_messageInfo_IsBounded.Merge(dst, src)
+func (m *IsBounded) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_IsBounded.Merge(m, src)
}
func (m *IsBounded) XXX_Size() int {
return xxx_messageInfo_IsBounded.Size(m)
@@ -1987,8 +1952,8 @@
// The payload for the primitive Read transform.
type ReadPayload struct {
- // (Required) The SdkFunctionSpec of the source for this Read.
- Source *SdkFunctionSpec `protobuf:"bytes,1,opt,name=source,proto3" json:"source,omitempty"`
+ // (Required) The FunctionSpec of the source for this Read.
+ Source *FunctionSpec `protobuf:"bytes,1,opt,name=source,proto3" json:"source,omitempty"`
// (Required) Whether the source is bounded or unbounded
IsBounded IsBounded_Enum `protobuf:"varint,2,opt,name=is_bounded,json=isBounded,proto3,enum=org.apache.beam.model.pipeline.v1.IsBounded_Enum" json:"is_bounded,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
@@ -2000,16 +1965,17 @@
func (m *ReadPayload) String() string { return proto.CompactTextString(m) }
func (*ReadPayload) ProtoMessage() {}
func (*ReadPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{17}
+ return fileDescriptor_cf57597c3a9659a9, []int{17}
}
+
func (m *ReadPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ReadPayload.Unmarshal(m, b)
}
func (m *ReadPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ReadPayload.Marshal(b, m, deterministic)
}
-func (dst *ReadPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ReadPayload.Merge(dst, src)
+func (m *ReadPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ReadPayload.Merge(m, src)
}
func (m *ReadPayload) XXX_Size() int {
return xxx_messageInfo_ReadPayload.Size(m)
@@ -2020,7 +1986,7 @@
var xxx_messageInfo_ReadPayload proto.InternalMessageInfo
-func (m *ReadPayload) GetSource() *SdkFunctionSpec {
+func (m *ReadPayload) GetSource() *FunctionSpec {
if m != nil {
return m.Source
}
@@ -2036,27 +2002,28 @@
// The payload for the WindowInto transform.
type WindowIntoPayload struct {
- // (Required) The SdkFunctionSpec of the WindowFn.
- WindowFn *SdkFunctionSpec `protobuf:"bytes,1,opt,name=window_fn,json=windowFn,proto3" json:"window_fn,omitempty"`
- XXX_NoUnkeyedLiteral struct{} `json:"-"`
- XXX_unrecognized []byte `json:"-"`
- XXX_sizecache int32 `json:"-"`
+ // (Required) The FunctionSpec of the WindowFn.
+ WindowFn *FunctionSpec `protobuf:"bytes,1,opt,name=window_fn,json=windowFn,proto3" json:"window_fn,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
}
func (m *WindowIntoPayload) Reset() { *m = WindowIntoPayload{} }
func (m *WindowIntoPayload) String() string { return proto.CompactTextString(m) }
func (*WindowIntoPayload) ProtoMessage() {}
func (*WindowIntoPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{18}
+ return fileDescriptor_cf57597c3a9659a9, []int{18}
}
+
func (m *WindowIntoPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_WindowIntoPayload.Unmarshal(m, b)
}
func (m *WindowIntoPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_WindowIntoPayload.Marshal(b, m, deterministic)
}
-func (dst *WindowIntoPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_WindowIntoPayload.Merge(dst, src)
+func (m *WindowIntoPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_WindowIntoPayload.Merge(m, src)
}
func (m *WindowIntoPayload) XXX_Size() int {
return xxx_messageInfo_WindowIntoPayload.Size(m)
@@ -2067,7 +2034,7 @@
var xxx_messageInfo_WindowIntoPayload proto.InternalMessageInfo
-func (m *WindowIntoPayload) GetWindowFn() *SdkFunctionSpec {
+func (m *WindowIntoPayload) GetWindowFn() *FunctionSpec {
if m != nil {
return m.WindowFn
}
@@ -2076,8 +2043,8 @@
// The payload for the special-but-not-primitive Combine transform.
type CombinePayload struct {
- // (Required) The SdkFunctionSpec of the CombineFn.
- CombineFn *SdkFunctionSpec `protobuf:"bytes,1,opt,name=combine_fn,json=combineFn,proto3" json:"combine_fn,omitempty"`
+ // (Required) The FunctionSpec of the CombineFn.
+ CombineFn *FunctionSpec `protobuf:"bytes,1,opt,name=combine_fn,json=combineFn,proto3" json:"combine_fn,omitempty"`
// (Required) A reference to the Coder to use for accumulators of the CombineFn
AccumulatorCoderId string `protobuf:"bytes,2,opt,name=accumulator_coder_id,json=accumulatorCoderId,proto3" json:"accumulator_coder_id,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
@@ -2089,16 +2056,17 @@
func (m *CombinePayload) String() string { return proto.CompactTextString(m) }
func (*CombinePayload) ProtoMessage() {}
func (*CombinePayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{19}
+ return fileDescriptor_cf57597c3a9659a9, []int{19}
}
+
func (m *CombinePayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CombinePayload.Unmarshal(m, b)
}
func (m *CombinePayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CombinePayload.Marshal(b, m, deterministic)
}
-func (dst *CombinePayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_CombinePayload.Merge(dst, src)
+func (m *CombinePayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CombinePayload.Merge(m, src)
}
func (m *CombinePayload) XXX_Size() int {
return xxx_messageInfo_CombinePayload.Size(m)
@@ -2109,7 +2077,7 @@
var xxx_messageInfo_CombinePayload proto.InternalMessageInfo
-func (m *CombinePayload) GetCombineFn() *SdkFunctionSpec {
+func (m *CombinePayload) GetCombineFn() *FunctionSpec {
if m != nil {
return m.CombineFn
}
@@ -2141,16 +2109,17 @@
func (m *TestStreamPayload) String() string { return proto.CompactTextString(m) }
func (*TestStreamPayload) ProtoMessage() {}
func (*TestStreamPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{20}
+ return fileDescriptor_cf57597c3a9659a9, []int{20}
}
+
func (m *TestStreamPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TestStreamPayload.Unmarshal(m, b)
}
func (m *TestStreamPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TestStreamPayload.Marshal(b, m, deterministic)
}
-func (dst *TestStreamPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TestStreamPayload.Merge(dst, src)
+func (m *TestStreamPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TestStreamPayload.Merge(m, src)
}
func (m *TestStreamPayload) XXX_Size() int {
return xxx_messageInfo_TestStreamPayload.Size(m)
@@ -2197,16 +2166,17 @@
func (m *TestStreamPayload_Event) String() string { return proto.CompactTextString(m) }
func (*TestStreamPayload_Event) ProtoMessage() {}
func (*TestStreamPayload_Event) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{20, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{20, 0}
}
+
func (m *TestStreamPayload_Event) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TestStreamPayload_Event.Unmarshal(m, b)
}
func (m *TestStreamPayload_Event) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TestStreamPayload_Event.Marshal(b, m, deterministic)
}
-func (dst *TestStreamPayload_Event) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TestStreamPayload_Event.Merge(dst, src)
+func (m *TestStreamPayload_Event) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TestStreamPayload_Event.Merge(m, src)
}
func (m *TestStreamPayload_Event) XXX_Size() int {
return xxx_messageInfo_TestStreamPayload_Event.Size(m)
@@ -2224,16 +2194,20 @@
type TestStreamPayload_Event_WatermarkEvent struct {
WatermarkEvent *TestStreamPayload_Event_AdvanceWatermark `protobuf:"bytes,1,opt,name=watermark_event,json=watermarkEvent,proto3,oneof"`
}
+
type TestStreamPayload_Event_ProcessingTimeEvent struct {
ProcessingTimeEvent *TestStreamPayload_Event_AdvanceProcessingTime `protobuf:"bytes,2,opt,name=processing_time_event,json=processingTimeEvent,proto3,oneof"`
}
+
type TestStreamPayload_Event_ElementEvent struct {
ElementEvent *TestStreamPayload_Event_AddElements `protobuf:"bytes,3,opt,name=element_event,json=elementEvent,proto3,oneof"`
}
-func (*TestStreamPayload_Event_WatermarkEvent) isTestStreamPayload_Event_Event() {}
+func (*TestStreamPayload_Event_WatermarkEvent) isTestStreamPayload_Event_Event() {}
+
func (*TestStreamPayload_Event_ProcessingTimeEvent) isTestStreamPayload_Event_Event() {}
-func (*TestStreamPayload_Event_ElementEvent) isTestStreamPayload_Event_Event() {}
+
+func (*TestStreamPayload_Event_ElementEvent) isTestStreamPayload_Event_Event() {}
func (m *TestStreamPayload_Event) GetEvent() isTestStreamPayload_Event_Event {
if m != nil {
@@ -2263,99 +2237,15 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*TestStreamPayload_Event) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _TestStreamPayload_Event_OneofMarshaler, _TestStreamPayload_Event_OneofUnmarshaler, _TestStreamPayload_Event_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*TestStreamPayload_Event) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*TestStreamPayload_Event_WatermarkEvent)(nil),
(*TestStreamPayload_Event_ProcessingTimeEvent)(nil),
(*TestStreamPayload_Event_ElementEvent)(nil),
}
}
-func _TestStreamPayload_Event_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*TestStreamPayload_Event)
- // event
- switch x := m.Event.(type) {
- case *TestStreamPayload_Event_WatermarkEvent:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.WatermarkEvent); err != nil {
- return err
- }
- case *TestStreamPayload_Event_ProcessingTimeEvent:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ProcessingTimeEvent); err != nil {
- return err
- }
- case *TestStreamPayload_Event_ElementEvent:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ElementEvent); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("TestStreamPayload_Event.Event has unexpected type %T", x)
- }
- return nil
-}
-
-func _TestStreamPayload_Event_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*TestStreamPayload_Event)
- switch tag {
- case 1: // event.watermark_event
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(TestStreamPayload_Event_AdvanceWatermark)
- err := b.DecodeMessage(msg)
- m.Event = &TestStreamPayload_Event_WatermarkEvent{msg}
- return true, err
- case 2: // event.processing_time_event
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(TestStreamPayload_Event_AdvanceProcessingTime)
- err := b.DecodeMessage(msg)
- m.Event = &TestStreamPayload_Event_ProcessingTimeEvent{msg}
- return true, err
- case 3: // event.element_event
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(TestStreamPayload_Event_AddElements)
- err := b.DecodeMessage(msg)
- m.Event = &TestStreamPayload_Event_ElementEvent{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _TestStreamPayload_Event_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*TestStreamPayload_Event)
- // event
- switch x := m.Event.(type) {
- case *TestStreamPayload_Event_WatermarkEvent:
- s := proto.Size(x.WatermarkEvent)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *TestStreamPayload_Event_ProcessingTimeEvent:
- s := proto.Size(x.ProcessingTimeEvent)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *TestStreamPayload_Event_ElementEvent:
- s := proto.Size(x.ElementEvent)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// Advances the watermark to the specified timestamp.
type TestStreamPayload_Event_AdvanceWatermark struct {
// (Required) The watermark to advance to.
@@ -2375,16 +2265,17 @@
func (m *TestStreamPayload_Event_AdvanceWatermark) String() string { return proto.CompactTextString(m) }
func (*TestStreamPayload_Event_AdvanceWatermark) ProtoMessage() {}
func (*TestStreamPayload_Event_AdvanceWatermark) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{20, 0, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{20, 0, 0}
}
+
func (m *TestStreamPayload_Event_AdvanceWatermark) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TestStreamPayload_Event_AdvanceWatermark.Unmarshal(m, b)
}
func (m *TestStreamPayload_Event_AdvanceWatermark) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TestStreamPayload_Event_AdvanceWatermark.Marshal(b, m, deterministic)
}
-func (dst *TestStreamPayload_Event_AdvanceWatermark) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TestStreamPayload_Event_AdvanceWatermark.Merge(dst, src)
+func (m *TestStreamPayload_Event_AdvanceWatermark) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TestStreamPayload_Event_AdvanceWatermark.Merge(m, src)
}
func (m *TestStreamPayload_Event_AdvanceWatermark) XXX_Size() int {
return xxx_messageInfo_TestStreamPayload_Event_AdvanceWatermark.Size(m)
@@ -2426,16 +2317,17 @@
}
func (*TestStreamPayload_Event_AdvanceProcessingTime) ProtoMessage() {}
func (*TestStreamPayload_Event_AdvanceProcessingTime) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{20, 0, 1}
+ return fileDescriptor_cf57597c3a9659a9, []int{20, 0, 1}
}
+
func (m *TestStreamPayload_Event_AdvanceProcessingTime) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TestStreamPayload_Event_AdvanceProcessingTime.Unmarshal(m, b)
}
func (m *TestStreamPayload_Event_AdvanceProcessingTime) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TestStreamPayload_Event_AdvanceProcessingTime.Marshal(b, m, deterministic)
}
-func (dst *TestStreamPayload_Event_AdvanceProcessingTime) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TestStreamPayload_Event_AdvanceProcessingTime.Merge(dst, src)
+func (m *TestStreamPayload_Event_AdvanceProcessingTime) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TestStreamPayload_Event_AdvanceProcessingTime.Merge(m, src)
}
func (m *TestStreamPayload_Event_AdvanceProcessingTime) XXX_Size() int {
return xxx_messageInfo_TestStreamPayload_Event_AdvanceProcessingTime.Size(m)
@@ -2470,16 +2362,17 @@
func (m *TestStreamPayload_Event_AddElements) String() string { return proto.CompactTextString(m) }
func (*TestStreamPayload_Event_AddElements) ProtoMessage() {}
func (*TestStreamPayload_Event_AddElements) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{20, 0, 2}
+ return fileDescriptor_cf57597c3a9659a9, []int{20, 0, 2}
}
+
func (m *TestStreamPayload_Event_AddElements) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TestStreamPayload_Event_AddElements.Unmarshal(m, b)
}
func (m *TestStreamPayload_Event_AddElements) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TestStreamPayload_Event_AddElements.Marshal(b, m, deterministic)
}
-func (dst *TestStreamPayload_Event_AddElements) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TestStreamPayload_Event_AddElements.Merge(dst, src)
+func (m *TestStreamPayload_Event_AddElements) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TestStreamPayload_Event_AddElements.Merge(m, src)
}
func (m *TestStreamPayload_Event_AddElements) XXX_Size() int {
return xxx_messageInfo_TestStreamPayload_Event_AddElements.Size(m)
@@ -2520,16 +2413,17 @@
func (m *TestStreamPayload_TimestampedElement) String() string { return proto.CompactTextString(m) }
func (*TestStreamPayload_TimestampedElement) ProtoMessage() {}
func (*TestStreamPayload_TimestampedElement) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{20, 1}
+ return fileDescriptor_cf57597c3a9659a9, []int{20, 1}
}
+
func (m *TestStreamPayload_TimestampedElement) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TestStreamPayload_TimestampedElement.Unmarshal(m, b)
}
func (m *TestStreamPayload_TimestampedElement) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TestStreamPayload_TimestampedElement.Marshal(b, m, deterministic)
}
-func (dst *TestStreamPayload_TimestampedElement) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TestStreamPayload_TimestampedElement.Merge(dst, src)
+func (m *TestStreamPayload_TimestampedElement) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TestStreamPayload_TimestampedElement.Merge(m, src)
}
func (m *TestStreamPayload_TimestampedElement) XXX_Size() int {
return xxx_messageInfo_TestStreamPayload_TimestampedElement.Size(m)
@@ -2564,16 +2458,17 @@
func (m *EventsRequest) String() string { return proto.CompactTextString(m) }
func (*EventsRequest) ProtoMessage() {}
func (*EventsRequest) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{21}
+ return fileDescriptor_cf57597c3a9659a9, []int{21}
}
+
func (m *EventsRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_EventsRequest.Unmarshal(m, b)
}
func (m *EventsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_EventsRequest.Marshal(b, m, deterministic)
}
-func (dst *EventsRequest) XXX_Merge(src proto.Message) {
- xxx_messageInfo_EventsRequest.Merge(dst, src)
+func (m *EventsRequest) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_EventsRequest.Merge(m, src)
}
func (m *EventsRequest) XXX_Size() int {
return xxx_messageInfo_EventsRequest.Size(m)
@@ -2586,10 +2481,10 @@
// The payload for the special-but-not-primitive WriteFiles transform.
type WriteFilesPayload struct {
- // (Required) The SdkFunctionSpec of the FileBasedSink.
- Sink *SdkFunctionSpec `protobuf:"bytes,1,opt,name=sink,proto3" json:"sink,omitempty"`
+ // (Required) The FunctionSpec of the FileBasedSink.
+ Sink *FunctionSpec `protobuf:"bytes,1,opt,name=sink,proto3" json:"sink,omitempty"`
// (Required) The format function.
- FormatFunction *SdkFunctionSpec `protobuf:"bytes,2,opt,name=format_function,json=formatFunction,proto3" json:"format_function,omitempty"`
+ FormatFunction *FunctionSpec `protobuf:"bytes,2,opt,name=format_function,json=formatFunction,proto3" json:"format_function,omitempty"`
WindowedWrites bool `protobuf:"varint,3,opt,name=windowed_writes,json=windowedWrites,proto3" json:"windowed_writes,omitempty"`
RunnerDeterminedSharding bool `protobuf:"varint,4,opt,name=runner_determined_sharding,json=runnerDeterminedSharding,proto3" json:"runner_determined_sharding,omitempty"`
SideInputs map[string]*SideInput `protobuf:"bytes,5,rep,name=side_inputs,json=sideInputs,proto3" json:"side_inputs,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
@@ -2602,16 +2497,17 @@
func (m *WriteFilesPayload) String() string { return proto.CompactTextString(m) }
func (*WriteFilesPayload) ProtoMessage() {}
func (*WriteFilesPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{22}
+ return fileDescriptor_cf57597c3a9659a9, []int{22}
}
+
func (m *WriteFilesPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_WriteFilesPayload.Unmarshal(m, b)
}
func (m *WriteFilesPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_WriteFilesPayload.Marshal(b, m, deterministic)
}
-func (dst *WriteFilesPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_WriteFilesPayload.Merge(dst, src)
+func (m *WriteFilesPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_WriteFilesPayload.Merge(m, src)
}
func (m *WriteFilesPayload) XXX_Size() int {
return xxx_messageInfo_WriteFilesPayload.Size(m)
@@ -2622,14 +2518,14 @@
var xxx_messageInfo_WriteFilesPayload proto.InternalMessageInfo
-func (m *WriteFilesPayload) GetSink() *SdkFunctionSpec {
+func (m *WriteFilesPayload) GetSink() *FunctionSpec {
if m != nil {
return m.Sink
}
return nil
}
-func (m *WriteFilesPayload) GetFormatFunction() *SdkFunctionSpec {
+func (m *WriteFilesPayload) GetFormatFunction() *FunctionSpec {
if m != nil {
return m.FormatFunction
}
@@ -2668,7 +2564,7 @@
Spec *FunctionSpec `protobuf:"bytes,1,opt,name=spec,proto3" json:"spec,omitempty"`
// (Optional) If this coder is parametric, such as ListCoder(VarIntCoder),
// this is a list of the components. In order for encodings to be identical,
- // the SdkFunctionSpec and all components must be identical, recursively.
+ // the FunctionSpec and all components must be identical, recursively.
ComponentCoderIds []string `protobuf:"bytes,2,rep,name=component_coder_ids,json=componentCoderIds,proto3" json:"component_coder_ids,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
@@ -2679,16 +2575,17 @@
func (m *Coder) String() string { return proto.CompactTextString(m) }
func (*Coder) ProtoMessage() {}
func (*Coder) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{23}
+ return fileDescriptor_cf57597c3a9659a9, []int{23}
}
+
func (m *Coder) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Coder.Unmarshal(m, b)
}
func (m *Coder) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Coder.Marshal(b, m, deterministic)
}
-func (dst *Coder) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Coder.Merge(dst, src)
+func (m *Coder) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Coder.Merge(m, src)
}
func (m *Coder) XXX_Size() int {
return xxx_messageInfo_Coder.Size(m)
@@ -2723,16 +2620,17 @@
func (m *StandardCoders) String() string { return proto.CompactTextString(m) }
func (*StandardCoders) ProtoMessage() {}
func (*StandardCoders) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{24}
+ return fileDescriptor_cf57597c3a9659a9, []int{24}
}
+
func (m *StandardCoders) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StandardCoders.Unmarshal(m, b)
}
func (m *StandardCoders) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StandardCoders.Marshal(b, m, deterministic)
}
-func (dst *StandardCoders) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StandardCoders.Merge(dst, src)
+func (m *StandardCoders) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StandardCoders.Merge(m, src)
}
func (m *StandardCoders) XXX_Size() int {
return xxx_messageInfo_StandardCoders.Size(m)
@@ -2748,10 +2646,10 @@
//
// TODO: consider inlining field on PCollection
type WindowingStrategy struct {
- // (Required) The SdkFunctionSpec of the UDF that assigns windows,
+ // (Required) The FunctionSpec of the UDF that assigns windows,
// merges windows, and shifts timestamps before they are
// combined according to the OutputTime.
- WindowFn *SdkFunctionSpec `protobuf:"bytes,1,opt,name=window_fn,json=windowFn,proto3" json:"window_fn,omitempty"`
+ WindowFn *FunctionSpec `protobuf:"bytes,1,opt,name=window_fn,json=windowFn,proto3" json:"window_fn,omitempty"`
// (Required) Whether or not the window fn is merging.
//
// This knowledge is required for many optimizations.
@@ -2784,7 +2682,11 @@
// (Required) Whether or not the window fn assigns inputs to exactly one window
//
// This knowledge is required for some optimizations
- AssignsToOneWindow bool `protobuf:"varint,10,opt,name=assigns_to_one_window,json=assignsToOneWindow,proto3" json:"assigns_to_one_window,omitempty"`
+ AssignsToOneWindow bool `protobuf:"varint,10,opt,name=assigns_to_one_window,json=assignsToOneWindow,proto3" json:"assigns_to_one_window,omitempty"`
+ // (Optional) Environment where the current window_fn should be applied in.
+ // Runner that executes the pipeline may choose to override this if needed.
+ // If not specified, environment will be decided by the runner.
+ EnvironmentId string `protobuf:"bytes,11,opt,name=environment_id,json=environmentId,proto3" json:"environment_id,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
@@ -2794,16 +2696,17 @@
func (m *WindowingStrategy) String() string { return proto.CompactTextString(m) }
func (*WindowingStrategy) ProtoMessage() {}
func (*WindowingStrategy) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{25}
+ return fileDescriptor_cf57597c3a9659a9, []int{25}
}
+
func (m *WindowingStrategy) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_WindowingStrategy.Unmarshal(m, b)
}
func (m *WindowingStrategy) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_WindowingStrategy.Marshal(b, m, deterministic)
}
-func (dst *WindowingStrategy) XXX_Merge(src proto.Message) {
- xxx_messageInfo_WindowingStrategy.Merge(dst, src)
+func (m *WindowingStrategy) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_WindowingStrategy.Merge(m, src)
}
func (m *WindowingStrategy) XXX_Size() int {
return xxx_messageInfo_WindowingStrategy.Size(m)
@@ -2814,7 +2717,7 @@
var xxx_messageInfo_WindowingStrategy proto.InternalMessageInfo
-func (m *WindowingStrategy) GetWindowFn() *SdkFunctionSpec {
+func (m *WindowingStrategy) GetWindowFn() *FunctionSpec {
if m != nil {
return m.WindowFn
}
@@ -2884,6 +2787,13 @@
return false
}
+func (m *WindowingStrategy) GetEnvironmentId() string {
+ if m != nil {
+ return m.EnvironmentId
+ }
+ return ""
+}
+
// Whether or not a PCollection's WindowFn is non-merging, merging, or
// merging-but-already-merged, in which case a subsequent GroupByKey is almost
// always going to do something the user does not want
@@ -2897,16 +2807,17 @@
func (m *MergeStatus) String() string { return proto.CompactTextString(m) }
func (*MergeStatus) ProtoMessage() {}
func (*MergeStatus) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{26}
+ return fileDescriptor_cf57597c3a9659a9, []int{26}
}
+
func (m *MergeStatus) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MergeStatus.Unmarshal(m, b)
}
func (m *MergeStatus) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MergeStatus.Marshal(b, m, deterministic)
}
-func (dst *MergeStatus) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MergeStatus.Merge(dst, src)
+func (m *MergeStatus) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MergeStatus.Merge(m, src)
}
func (m *MergeStatus) XXX_Size() int {
return xxx_messageInfo_MergeStatus.Size(m)
@@ -2930,16 +2841,17 @@
func (m *AccumulationMode) String() string { return proto.CompactTextString(m) }
func (*AccumulationMode) ProtoMessage() {}
func (*AccumulationMode) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{27}
+ return fileDescriptor_cf57597c3a9659a9, []int{27}
}
+
func (m *AccumulationMode) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_AccumulationMode.Unmarshal(m, b)
}
func (m *AccumulationMode) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_AccumulationMode.Marshal(b, m, deterministic)
}
-func (dst *AccumulationMode) XXX_Merge(src proto.Message) {
- xxx_messageInfo_AccumulationMode.Merge(dst, src)
+func (m *AccumulationMode) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_AccumulationMode.Merge(m, src)
}
func (m *AccumulationMode) XXX_Size() int {
return xxx_messageInfo_AccumulationMode.Size(m)
@@ -2962,16 +2874,17 @@
func (m *ClosingBehavior) String() string { return proto.CompactTextString(m) }
func (*ClosingBehavior) ProtoMessage() {}
func (*ClosingBehavior) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{28}
+ return fileDescriptor_cf57597c3a9659a9, []int{28}
}
+
func (m *ClosingBehavior) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ClosingBehavior.Unmarshal(m, b)
}
func (m *ClosingBehavior) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ClosingBehavior.Marshal(b, m, deterministic)
}
-func (dst *ClosingBehavior) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ClosingBehavior.Merge(dst, src)
+func (m *ClosingBehavior) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ClosingBehavior.Merge(m, src)
}
func (m *ClosingBehavior) XXX_Size() int {
return xxx_messageInfo_ClosingBehavior.Size(m)
@@ -2994,16 +2907,17 @@
func (m *OnTimeBehavior) String() string { return proto.CompactTextString(m) }
func (*OnTimeBehavior) ProtoMessage() {}
func (*OnTimeBehavior) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{29}
+ return fileDescriptor_cf57597c3a9659a9, []int{29}
}
+
func (m *OnTimeBehavior) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_OnTimeBehavior.Unmarshal(m, b)
}
func (m *OnTimeBehavior) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_OnTimeBehavior.Marshal(b, m, deterministic)
}
-func (dst *OnTimeBehavior) XXX_Merge(src proto.Message) {
- xxx_messageInfo_OnTimeBehavior.Merge(dst, src)
+func (m *OnTimeBehavior) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_OnTimeBehavior.Merge(m, src)
}
func (m *OnTimeBehavior) XXX_Size() int {
return xxx_messageInfo_OnTimeBehavior.Size(m)
@@ -3026,16 +2940,17 @@
func (m *OutputTime) String() string { return proto.CompactTextString(m) }
func (*OutputTime) ProtoMessage() {}
func (*OutputTime) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{30}
+ return fileDescriptor_cf57597c3a9659a9, []int{30}
}
+
func (m *OutputTime) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_OutputTime.Unmarshal(m, b)
}
func (m *OutputTime) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_OutputTime.Marshal(b, m, deterministic)
}
-func (dst *OutputTime) XXX_Merge(src proto.Message) {
- xxx_messageInfo_OutputTime.Merge(dst, src)
+func (m *OutputTime) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_OutputTime.Merge(m, src)
}
func (m *OutputTime) XXX_Size() int {
return xxx_messageInfo_OutputTime.Size(m)
@@ -3057,16 +2972,17 @@
func (m *TimeDomain) String() string { return proto.CompactTextString(m) }
func (*TimeDomain) ProtoMessage() {}
func (*TimeDomain) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{31}
+ return fileDescriptor_cf57597c3a9659a9, []int{31}
}
+
func (m *TimeDomain) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TimeDomain.Unmarshal(m, b)
}
func (m *TimeDomain) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TimeDomain.Marshal(b, m, deterministic)
}
-func (dst *TimeDomain) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TimeDomain.Merge(dst, src)
+func (m *TimeDomain) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TimeDomain.Merge(m, src)
}
func (m *TimeDomain) XXX_Size() int {
return xxx_messageInfo_TimeDomain.Size(m)
@@ -3107,16 +3023,17 @@
func (m *Trigger) String() string { return proto.CompactTextString(m) }
func (*Trigger) ProtoMessage() {}
func (*Trigger) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32}
+ return fileDescriptor_cf57597c3a9659a9, []int{32}
}
+
func (m *Trigger) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger.Unmarshal(m, b)
}
func (m *Trigger) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger.Marshal(b, m, deterministic)
}
-func (dst *Trigger) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger.Merge(dst, src)
+func (m *Trigger) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger.Merge(m, src)
}
func (m *Trigger) XXX_Size() int {
return xxx_messageInfo_Trigger.Size(m)
@@ -3134,52 +3051,74 @@
type Trigger_AfterAll_ struct {
AfterAll *Trigger_AfterAll `protobuf:"bytes,1,opt,name=after_all,json=afterAll,proto3,oneof"`
}
+
type Trigger_AfterAny_ struct {
AfterAny *Trigger_AfterAny `protobuf:"bytes,2,opt,name=after_any,json=afterAny,proto3,oneof"`
}
+
type Trigger_AfterEach_ struct {
AfterEach *Trigger_AfterEach `protobuf:"bytes,3,opt,name=after_each,json=afterEach,proto3,oneof"`
}
+
type Trigger_AfterEndOfWindow_ struct {
AfterEndOfWindow *Trigger_AfterEndOfWindow `protobuf:"bytes,4,opt,name=after_end_of_window,json=afterEndOfWindow,proto3,oneof"`
}
+
type Trigger_AfterProcessingTime_ struct {
AfterProcessingTime *Trigger_AfterProcessingTime `protobuf:"bytes,5,opt,name=after_processing_time,json=afterProcessingTime,proto3,oneof"`
}
+
type Trigger_AfterSynchronizedProcessingTime_ struct {
AfterSynchronizedProcessingTime *Trigger_AfterSynchronizedProcessingTime `protobuf:"bytes,6,opt,name=after_synchronized_processing_time,json=afterSynchronizedProcessingTime,proto3,oneof"`
}
+
type Trigger_Always_ struct {
Always *Trigger_Always `protobuf:"bytes,12,opt,name=always,proto3,oneof"`
}
+
type Trigger_Default_ struct {
Default *Trigger_Default `protobuf:"bytes,7,opt,name=default,proto3,oneof"`
}
+
type Trigger_ElementCount_ struct {
ElementCount *Trigger_ElementCount `protobuf:"bytes,8,opt,name=element_count,json=elementCount,proto3,oneof"`
}
+
type Trigger_Never_ struct {
Never *Trigger_Never `protobuf:"bytes,9,opt,name=never,proto3,oneof"`
}
+
type Trigger_OrFinally_ struct {
OrFinally *Trigger_OrFinally `protobuf:"bytes,10,opt,name=or_finally,json=orFinally,proto3,oneof"`
}
+
type Trigger_Repeat_ struct {
Repeat *Trigger_Repeat `protobuf:"bytes,11,opt,name=repeat,proto3,oneof"`
}
-func (*Trigger_AfterAll_) isTrigger_Trigger() {}
-func (*Trigger_AfterAny_) isTrigger_Trigger() {}
-func (*Trigger_AfterEach_) isTrigger_Trigger() {}
-func (*Trigger_AfterEndOfWindow_) isTrigger_Trigger() {}
-func (*Trigger_AfterProcessingTime_) isTrigger_Trigger() {}
+func (*Trigger_AfterAll_) isTrigger_Trigger() {}
+
+func (*Trigger_AfterAny_) isTrigger_Trigger() {}
+
+func (*Trigger_AfterEach_) isTrigger_Trigger() {}
+
+func (*Trigger_AfterEndOfWindow_) isTrigger_Trigger() {}
+
+func (*Trigger_AfterProcessingTime_) isTrigger_Trigger() {}
+
func (*Trigger_AfterSynchronizedProcessingTime_) isTrigger_Trigger() {}
-func (*Trigger_Always_) isTrigger_Trigger() {}
-func (*Trigger_Default_) isTrigger_Trigger() {}
-func (*Trigger_ElementCount_) isTrigger_Trigger() {}
-func (*Trigger_Never_) isTrigger_Trigger() {}
-func (*Trigger_OrFinally_) isTrigger_Trigger() {}
-func (*Trigger_Repeat_) isTrigger_Trigger() {}
+
+func (*Trigger_Always_) isTrigger_Trigger() {}
+
+func (*Trigger_Default_) isTrigger_Trigger() {}
+
+func (*Trigger_ElementCount_) isTrigger_Trigger() {}
+
+func (*Trigger_Never_) isTrigger_Trigger() {}
+
+func (*Trigger_OrFinally_) isTrigger_Trigger() {}
+
+func (*Trigger_Repeat_) isTrigger_Trigger() {}
func (m *Trigger) GetTrigger() isTrigger_Trigger {
if m != nil {
@@ -3272,9 +3211,9 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*Trigger) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _Trigger_OneofMarshaler, _Trigger_OneofUnmarshaler, _Trigger_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*Trigger) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*Trigger_AfterAll_)(nil),
(*Trigger_AfterAny_)(nil),
(*Trigger_AfterEach_)(nil),
@@ -3290,252 +3229,6 @@
}
}
-func _Trigger_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*Trigger)
- // trigger
- switch x := m.Trigger.(type) {
- case *Trigger_AfterAll_:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.AfterAll); err != nil {
- return err
- }
- case *Trigger_AfterAny_:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.AfterAny); err != nil {
- return err
- }
- case *Trigger_AfterEach_:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.AfterEach); err != nil {
- return err
- }
- case *Trigger_AfterEndOfWindow_:
- b.EncodeVarint(4<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.AfterEndOfWindow); err != nil {
- return err
- }
- case *Trigger_AfterProcessingTime_:
- b.EncodeVarint(5<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.AfterProcessingTime); err != nil {
- return err
- }
- case *Trigger_AfterSynchronizedProcessingTime_:
- b.EncodeVarint(6<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.AfterSynchronizedProcessingTime); err != nil {
- return err
- }
- case *Trigger_Always_:
- b.EncodeVarint(12<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Always); err != nil {
- return err
- }
- case *Trigger_Default_:
- b.EncodeVarint(7<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Default); err != nil {
- return err
- }
- case *Trigger_ElementCount_:
- b.EncodeVarint(8<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ElementCount); err != nil {
- return err
- }
- case *Trigger_Never_:
- b.EncodeVarint(9<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Never); err != nil {
- return err
- }
- case *Trigger_OrFinally_:
- b.EncodeVarint(10<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.OrFinally); err != nil {
- return err
- }
- case *Trigger_Repeat_:
- b.EncodeVarint(11<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Repeat); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("Trigger.Trigger has unexpected type %T", x)
- }
- return nil
-}
-
-func _Trigger_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*Trigger)
- switch tag {
- case 1: // trigger.after_all
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_AfterAll)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_AfterAll_{msg}
- return true, err
- case 2: // trigger.after_any
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_AfterAny)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_AfterAny_{msg}
- return true, err
- case 3: // trigger.after_each
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_AfterEach)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_AfterEach_{msg}
- return true, err
- case 4: // trigger.after_end_of_window
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_AfterEndOfWindow)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_AfterEndOfWindow_{msg}
- return true, err
- case 5: // trigger.after_processing_time
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_AfterProcessingTime)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_AfterProcessingTime_{msg}
- return true, err
- case 6: // trigger.after_synchronized_processing_time
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_AfterSynchronizedProcessingTime)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_AfterSynchronizedProcessingTime_{msg}
- return true, err
- case 12: // trigger.always
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_Always)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_Always_{msg}
- return true, err
- case 7: // trigger.default
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_Default)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_Default_{msg}
- return true, err
- case 8: // trigger.element_count
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_ElementCount)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_ElementCount_{msg}
- return true, err
- case 9: // trigger.never
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_Never)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_Never_{msg}
- return true, err
- case 10: // trigger.or_finally
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_OrFinally)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_OrFinally_{msg}
- return true, err
- case 11: // trigger.repeat
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Trigger_Repeat)
- err := b.DecodeMessage(msg)
- m.Trigger = &Trigger_Repeat_{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _Trigger_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*Trigger)
- // trigger
- switch x := m.Trigger.(type) {
- case *Trigger_AfterAll_:
- s := proto.Size(x.AfterAll)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_AfterAny_:
- s := proto.Size(x.AfterAny)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_AfterEach_:
- s := proto.Size(x.AfterEach)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_AfterEndOfWindow_:
- s := proto.Size(x.AfterEndOfWindow)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_AfterProcessingTime_:
- s := proto.Size(x.AfterProcessingTime)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_AfterSynchronizedProcessingTime_:
- s := proto.Size(x.AfterSynchronizedProcessingTime)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_Always_:
- s := proto.Size(x.Always)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_Default_:
- s := proto.Size(x.Default)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_ElementCount_:
- s := proto.Size(x.ElementCount)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_Never_:
- s := proto.Size(x.Never)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_OrFinally_:
- s := proto.Size(x.OrFinally)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Trigger_Repeat_:
- s := proto.Size(x.Repeat)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// Ready when all subtriggers are ready.
type Trigger_AfterAll struct {
Subtriggers []*Trigger `protobuf:"bytes,1,rep,name=subtriggers,proto3" json:"subtriggers,omitempty"`
@@ -3548,16 +3241,17 @@
func (m *Trigger_AfterAll) String() string { return proto.CompactTextString(m) }
func (*Trigger_AfterAll) ProtoMessage() {}
func (*Trigger_AfterAll) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 0}
}
+
func (m *Trigger_AfterAll) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_AfterAll.Unmarshal(m, b)
}
func (m *Trigger_AfterAll) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_AfterAll.Marshal(b, m, deterministic)
}
-func (dst *Trigger_AfterAll) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_AfterAll.Merge(dst, src)
+func (m *Trigger_AfterAll) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_AfterAll.Merge(m, src)
}
func (m *Trigger_AfterAll) XXX_Size() int {
return xxx_messageInfo_Trigger_AfterAll.Size(m)
@@ -3587,16 +3281,17 @@
func (m *Trigger_AfterAny) String() string { return proto.CompactTextString(m) }
func (*Trigger_AfterAny) ProtoMessage() {}
func (*Trigger_AfterAny) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 1}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 1}
}
+
func (m *Trigger_AfterAny) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_AfterAny.Unmarshal(m, b)
}
func (m *Trigger_AfterAny) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_AfterAny.Marshal(b, m, deterministic)
}
-func (dst *Trigger_AfterAny) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_AfterAny.Merge(dst, src)
+func (m *Trigger_AfterAny) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_AfterAny.Merge(m, src)
}
func (m *Trigger_AfterAny) XXX_Size() int {
return xxx_messageInfo_Trigger_AfterAny.Size(m)
@@ -3627,16 +3322,17 @@
func (m *Trigger_AfterEach) String() string { return proto.CompactTextString(m) }
func (*Trigger_AfterEach) ProtoMessage() {}
func (*Trigger_AfterEach) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 2}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 2}
}
+
func (m *Trigger_AfterEach) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_AfterEach.Unmarshal(m, b)
}
func (m *Trigger_AfterEach) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_AfterEach.Marshal(b, m, deterministic)
}
-func (dst *Trigger_AfterEach) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_AfterEach.Merge(dst, src)
+func (m *Trigger_AfterEach) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_AfterEach.Merge(m, src)
}
func (m *Trigger_AfterEach) XXX_Size() int {
return xxx_messageInfo_Trigger_AfterEach.Size(m)
@@ -3673,16 +3369,17 @@
func (m *Trigger_AfterEndOfWindow) String() string { return proto.CompactTextString(m) }
func (*Trigger_AfterEndOfWindow) ProtoMessage() {}
func (*Trigger_AfterEndOfWindow) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 3}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 3}
}
+
func (m *Trigger_AfterEndOfWindow) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_AfterEndOfWindow.Unmarshal(m, b)
}
func (m *Trigger_AfterEndOfWindow) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_AfterEndOfWindow.Marshal(b, m, deterministic)
}
-func (dst *Trigger_AfterEndOfWindow) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_AfterEndOfWindow.Merge(dst, src)
+func (m *Trigger_AfterEndOfWindow) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_AfterEndOfWindow.Merge(m, src)
}
func (m *Trigger_AfterEndOfWindow) XXX_Size() int {
return xxx_messageInfo_Trigger_AfterEndOfWindow.Size(m)
@@ -3721,16 +3418,17 @@
func (m *Trigger_AfterProcessingTime) String() string { return proto.CompactTextString(m) }
func (*Trigger_AfterProcessingTime) ProtoMessage() {}
func (*Trigger_AfterProcessingTime) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 4}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 4}
}
+
func (m *Trigger_AfterProcessingTime) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_AfterProcessingTime.Unmarshal(m, b)
}
func (m *Trigger_AfterProcessingTime) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_AfterProcessingTime.Marshal(b, m, deterministic)
}
-func (dst *Trigger_AfterProcessingTime) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_AfterProcessingTime.Merge(dst, src)
+func (m *Trigger_AfterProcessingTime) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_AfterProcessingTime.Merge(m, src)
}
func (m *Trigger_AfterProcessingTime) XXX_Size() int {
return xxx_messageInfo_Trigger_AfterProcessingTime.Size(m)
@@ -3762,16 +3460,17 @@
func (m *Trigger_AfterSynchronizedProcessingTime) String() string { return proto.CompactTextString(m) }
func (*Trigger_AfterSynchronizedProcessingTime) ProtoMessage() {}
func (*Trigger_AfterSynchronizedProcessingTime) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 5}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 5}
}
+
func (m *Trigger_AfterSynchronizedProcessingTime) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_AfterSynchronizedProcessingTime.Unmarshal(m, b)
}
func (m *Trigger_AfterSynchronizedProcessingTime) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_AfterSynchronizedProcessingTime.Marshal(b, m, deterministic)
}
-func (dst *Trigger_AfterSynchronizedProcessingTime) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_AfterSynchronizedProcessingTime.Merge(dst, src)
+func (m *Trigger_AfterSynchronizedProcessingTime) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_AfterSynchronizedProcessingTime.Merge(m, src)
}
func (m *Trigger_AfterSynchronizedProcessingTime) XXX_Size() int {
return xxx_messageInfo_Trigger_AfterSynchronizedProcessingTime.Size(m)
@@ -3794,16 +3493,17 @@
func (m *Trigger_Default) String() string { return proto.CompactTextString(m) }
func (*Trigger_Default) ProtoMessage() {}
func (*Trigger_Default) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 6}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 6}
}
+
func (m *Trigger_Default) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_Default.Unmarshal(m, b)
}
func (m *Trigger_Default) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_Default.Marshal(b, m, deterministic)
}
-func (dst *Trigger_Default) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_Default.Merge(dst, src)
+func (m *Trigger_Default) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_Default.Merge(m, src)
}
func (m *Trigger_Default) XXX_Size() int {
return xxx_messageInfo_Trigger_Default.Size(m)
@@ -3826,16 +3526,17 @@
func (m *Trigger_ElementCount) String() string { return proto.CompactTextString(m) }
func (*Trigger_ElementCount) ProtoMessage() {}
func (*Trigger_ElementCount) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 7}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 7}
}
+
func (m *Trigger_ElementCount) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_ElementCount.Unmarshal(m, b)
}
func (m *Trigger_ElementCount) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_ElementCount.Marshal(b, m, deterministic)
}
-func (dst *Trigger_ElementCount) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_ElementCount.Merge(dst, src)
+func (m *Trigger_ElementCount) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_ElementCount.Merge(m, src)
}
func (m *Trigger_ElementCount) XXX_Size() int {
return xxx_messageInfo_Trigger_ElementCount.Size(m)
@@ -3865,16 +3566,17 @@
func (m *Trigger_Never) String() string { return proto.CompactTextString(m) }
func (*Trigger_Never) ProtoMessage() {}
func (*Trigger_Never) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 8}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 8}
}
+
func (m *Trigger_Never) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_Never.Unmarshal(m, b)
}
func (m *Trigger_Never) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_Never.Marshal(b, m, deterministic)
}
-func (dst *Trigger_Never) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_Never.Merge(dst, src)
+func (m *Trigger_Never) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_Never.Merge(m, src)
}
func (m *Trigger_Never) XXX_Size() int {
return xxx_messageInfo_Trigger_Never.Size(m)
@@ -3897,16 +3599,17 @@
func (m *Trigger_Always) String() string { return proto.CompactTextString(m) }
func (*Trigger_Always) ProtoMessage() {}
func (*Trigger_Always) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 9}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 9}
}
+
func (m *Trigger_Always) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_Always.Unmarshal(m, b)
}
func (m *Trigger_Always) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_Always.Marshal(b, m, deterministic)
}
-func (dst *Trigger_Always) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_Always.Merge(dst, src)
+func (m *Trigger_Always) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_Always.Merge(m, src)
}
func (m *Trigger_Always) XXX_Size() int {
return xxx_messageInfo_Trigger_Always.Size(m)
@@ -3933,16 +3636,17 @@
func (m *Trigger_OrFinally) String() string { return proto.CompactTextString(m) }
func (*Trigger_OrFinally) ProtoMessage() {}
func (*Trigger_OrFinally) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 10}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 10}
}
+
func (m *Trigger_OrFinally) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_OrFinally.Unmarshal(m, b)
}
func (m *Trigger_OrFinally) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_OrFinally.Marshal(b, m, deterministic)
}
-func (dst *Trigger_OrFinally) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_OrFinally.Merge(dst, src)
+func (m *Trigger_OrFinally) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_OrFinally.Merge(m, src)
}
func (m *Trigger_OrFinally) XXX_Size() int {
return xxx_messageInfo_Trigger_OrFinally.Size(m)
@@ -3981,16 +3685,17 @@
func (m *Trigger_Repeat) String() string { return proto.CompactTextString(m) }
func (*Trigger_Repeat) ProtoMessage() {}
func (*Trigger_Repeat) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{32, 11}
+ return fileDescriptor_cf57597c3a9659a9, []int{32, 11}
}
+
func (m *Trigger_Repeat) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Trigger_Repeat.Unmarshal(m, b)
}
func (m *Trigger_Repeat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Trigger_Repeat.Marshal(b, m, deterministic)
}
-func (dst *Trigger_Repeat) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Trigger_Repeat.Merge(dst, src)
+func (m *Trigger_Repeat) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Trigger_Repeat.Merge(m, src)
}
func (m *Trigger_Repeat) XXX_Size() int {
return xxx_messageInfo_Trigger_Repeat.Size(m)
@@ -4026,16 +3731,17 @@
func (m *TimestampTransform) String() string { return proto.CompactTextString(m) }
func (*TimestampTransform) ProtoMessage() {}
func (*TimestampTransform) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{33}
+ return fileDescriptor_cf57597c3a9659a9, []int{33}
}
+
func (m *TimestampTransform) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TimestampTransform.Unmarshal(m, b)
}
func (m *TimestampTransform) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TimestampTransform.Marshal(b, m, deterministic)
}
-func (dst *TimestampTransform) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TimestampTransform.Merge(dst, src)
+func (m *TimestampTransform) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TimestampTransform.Merge(m, src)
}
func (m *TimestampTransform) XXX_Size() int {
return xxx_messageInfo_TimestampTransform.Size(m)
@@ -4053,11 +3759,13 @@
type TimestampTransform_Delay_ struct {
Delay *TimestampTransform_Delay `protobuf:"bytes,1,opt,name=delay,proto3,oneof"`
}
+
type TimestampTransform_AlignTo_ struct {
AlignTo *TimestampTransform_AlignTo `protobuf:"bytes,2,opt,name=align_to,json=alignTo,proto3,oneof"`
}
-func (*TimestampTransform_Delay_) isTimestampTransform_TimestampTransform() {}
+func (*TimestampTransform_Delay_) isTimestampTransform_TimestampTransform() {}
+
func (*TimestampTransform_AlignTo_) isTimestampTransform_TimestampTransform() {}
func (m *TimestampTransform) GetTimestampTransform() isTimestampTransform_TimestampTransform {
@@ -4081,80 +3789,14 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*TimestampTransform) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _TimestampTransform_OneofMarshaler, _TimestampTransform_OneofUnmarshaler, _TimestampTransform_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*TimestampTransform) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*TimestampTransform_Delay_)(nil),
(*TimestampTransform_AlignTo_)(nil),
}
}
-func _TimestampTransform_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*TimestampTransform)
- // timestamp_transform
- switch x := m.TimestampTransform.(type) {
- case *TimestampTransform_Delay_:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Delay); err != nil {
- return err
- }
- case *TimestampTransform_AlignTo_:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.AlignTo); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("TimestampTransform.TimestampTransform has unexpected type %T", x)
- }
- return nil
-}
-
-func _TimestampTransform_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*TimestampTransform)
- switch tag {
- case 1: // timestamp_transform.delay
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(TimestampTransform_Delay)
- err := b.DecodeMessage(msg)
- m.TimestampTransform = &TimestampTransform_Delay_{msg}
- return true, err
- case 2: // timestamp_transform.align_to
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(TimestampTransform_AlignTo)
- err := b.DecodeMessage(msg)
- m.TimestampTransform = &TimestampTransform_AlignTo_{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _TimestampTransform_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*TimestampTransform)
- // timestamp_transform
- switch x := m.TimestampTransform.(type) {
- case *TimestampTransform_Delay_:
- s := proto.Size(x.Delay)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *TimestampTransform_AlignTo_:
- s := proto.Size(x.AlignTo)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type TimestampTransform_Delay struct {
// (Required) The delay, in milliseconds.
DelayMillis int64 `protobuf:"varint,1,opt,name=delay_millis,json=delayMillis,proto3" json:"delay_millis,omitempty"`
@@ -4167,16 +3809,17 @@
func (m *TimestampTransform_Delay) String() string { return proto.CompactTextString(m) }
func (*TimestampTransform_Delay) ProtoMessage() {}
func (*TimestampTransform_Delay) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{33, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{33, 0}
}
+
func (m *TimestampTransform_Delay) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TimestampTransform_Delay.Unmarshal(m, b)
}
func (m *TimestampTransform_Delay) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TimestampTransform_Delay.Marshal(b, m, deterministic)
}
-func (dst *TimestampTransform_Delay) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TimestampTransform_Delay.Merge(dst, src)
+func (m *TimestampTransform_Delay) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TimestampTransform_Delay.Merge(m, src)
}
func (m *TimestampTransform_Delay) XXX_Size() int {
return xxx_messageInfo_TimestampTransform_Delay.Size(m)
@@ -4210,16 +3853,17 @@
func (m *TimestampTransform_AlignTo) String() string { return proto.CompactTextString(m) }
func (*TimestampTransform_AlignTo) ProtoMessage() {}
func (*TimestampTransform_AlignTo) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{33, 1}
+ return fileDescriptor_cf57597c3a9659a9, []int{33, 1}
}
+
func (m *TimestampTransform_AlignTo) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_TimestampTransform_AlignTo.Unmarshal(m, b)
}
func (m *TimestampTransform_AlignTo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_TimestampTransform_AlignTo.Marshal(b, m, deterministic)
}
-func (dst *TimestampTransform_AlignTo) XXX_Merge(src proto.Message) {
- xxx_messageInfo_TimestampTransform_AlignTo.Merge(dst, src)
+func (m *TimestampTransform_AlignTo) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_TimestampTransform_AlignTo.Merge(m, src)
}
func (m *TimestampTransform_AlignTo) XXX_Size() int {
return xxx_messageInfo_TimestampTransform_AlignTo.Size(m)
@@ -4256,38 +3900,39 @@
// performance possibilities, is "beam:sideinput:multimap" (or some such
// URN)
AccessPattern *FunctionSpec `protobuf:"bytes,1,opt,name=access_pattern,json=accessPattern,proto3" json:"access_pattern,omitempty"`
- // (Required) The SdkFunctionSpec of the UDF that adapts a particular
+ // (Required) The FunctionSpec of the UDF that adapts a particular
// access_pattern to a user-facing view type.
//
// For example, View.asSingleton() may include a `view_fn` that adapts a
// specially-designed multimap to a single value per window.
- ViewFn *SdkFunctionSpec `protobuf:"bytes,2,opt,name=view_fn,json=viewFn,proto3" json:"view_fn,omitempty"`
- // (Required) The SdkFunctionSpec of the UDF that maps a main input window
+ ViewFn *FunctionSpec `protobuf:"bytes,2,opt,name=view_fn,json=viewFn,proto3" json:"view_fn,omitempty"`
+ // (Required) The FunctionSpec of the UDF that maps a main input window
// to a side input window.
//
// For example, when the main input is in fixed windows of one hour, this
// can specify that the side input should be accessed according to the day
// in which that hour falls.
- WindowMappingFn *SdkFunctionSpec `protobuf:"bytes,3,opt,name=window_mapping_fn,json=windowMappingFn,proto3" json:"window_mapping_fn,omitempty"`
- XXX_NoUnkeyedLiteral struct{} `json:"-"`
- XXX_unrecognized []byte `json:"-"`
- XXX_sizecache int32 `json:"-"`
+ WindowMappingFn *FunctionSpec `protobuf:"bytes,3,opt,name=window_mapping_fn,json=windowMappingFn,proto3" json:"window_mapping_fn,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
}
func (m *SideInput) Reset() { *m = SideInput{} }
func (m *SideInput) String() string { return proto.CompactTextString(m) }
func (*SideInput) ProtoMessage() {}
func (*SideInput) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{34}
+ return fileDescriptor_cf57597c3a9659a9, []int{34}
}
+
func (m *SideInput) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_SideInput.Unmarshal(m, b)
}
func (m *SideInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_SideInput.Marshal(b, m, deterministic)
}
-func (dst *SideInput) XXX_Merge(src proto.Message) {
- xxx_messageInfo_SideInput.Merge(dst, src)
+func (m *SideInput) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_SideInput.Merge(m, src)
}
func (m *SideInput) XXX_Size() int {
return xxx_messageInfo_SideInput.Size(m)
@@ -4305,20 +3950,76 @@
return nil
}
-func (m *SideInput) GetViewFn() *SdkFunctionSpec {
+func (m *SideInput) GetViewFn() *FunctionSpec {
if m != nil {
return m.ViewFn
}
return nil
}
-func (m *SideInput) GetWindowMappingFn() *SdkFunctionSpec {
+func (m *SideInput) GetWindowMappingFn() *FunctionSpec {
if m != nil {
return m.WindowMappingFn
}
return nil
}
+// Settings that decide the coder type of wire coder.
+type WireCoderSetting struct {
+ // (Required) The URN of the wire coder.
+ // Note that only windowed value coder or parameterized windowed value coder are supported.
+ Urn string `protobuf:"bytes,1,opt,name=urn,proto3" json:"urn,omitempty"`
+ // (Optional) The data specifying any parameters to the URN. If
+ // the URN is beam:coder:windowed_value:v1, this may be omitted. If the URN is
+ // beam:coder:param_windowed_value:v1, the payload is an encoded windowed
+ // value using the beam:coder:windowed_value:v1 coder parameterized by
+ // a beam:coder:bytes:v1 element coder and the window coder that this
+ // param_windowed_value coder uses.
+ Payload []byte `protobuf:"bytes,2,opt,name=payload,proto3" json:"payload,omitempty"`
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
+ XXX_unrecognized []byte `json:"-"`
+ XXX_sizecache int32 `json:"-"`
+}
+
+func (m *WireCoderSetting) Reset() { *m = WireCoderSetting{} }
+func (m *WireCoderSetting) String() string { return proto.CompactTextString(m) }
+func (*WireCoderSetting) ProtoMessage() {}
+func (*WireCoderSetting) Descriptor() ([]byte, []int) {
+ return fileDescriptor_cf57597c3a9659a9, []int{35}
+}
+
+func (m *WireCoderSetting) XXX_Unmarshal(b []byte) error {
+ return xxx_messageInfo_WireCoderSetting.Unmarshal(m, b)
+}
+func (m *WireCoderSetting) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+ return xxx_messageInfo_WireCoderSetting.Marshal(b, m, deterministic)
+}
+func (m *WireCoderSetting) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_WireCoderSetting.Merge(m, src)
+}
+func (m *WireCoderSetting) XXX_Size() int {
+ return xxx_messageInfo_WireCoderSetting.Size(m)
+}
+func (m *WireCoderSetting) XXX_DiscardUnknown() {
+ xxx_messageInfo_WireCoderSetting.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_WireCoderSetting proto.InternalMessageInfo
+
+func (m *WireCoderSetting) GetUrn() string {
+ if m != nil {
+ return m.Urn
+ }
+ return ""
+}
+
+func (m *WireCoderSetting) GetPayload() []byte {
+ if m != nil {
+ return m.Payload
+ }
+ return nil
+}
+
// An environment for executing UDFs. By default, an SDK container URL, but
// can also be a process forked by a command, or an externally managed process.
type Environment struct {
@@ -4336,16 +4037,17 @@
func (m *Environment) String() string { return proto.CompactTextString(m) }
func (*Environment) ProtoMessage() {}
func (*Environment) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{35}
+ return fileDescriptor_cf57597c3a9659a9, []int{36}
}
+
func (m *Environment) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Environment.Unmarshal(m, b)
}
func (m *Environment) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Environment.Marshal(b, m, deterministic)
}
-func (dst *Environment) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Environment.Merge(dst, src)
+func (m *Environment) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Environment.Merge(m, src)
}
func (m *Environment) XXX_Size() int {
return xxx_messageInfo_Environment.Size(m)
@@ -4380,16 +4082,17 @@
func (m *StandardEnvironments) String() string { return proto.CompactTextString(m) }
func (*StandardEnvironments) ProtoMessage() {}
func (*StandardEnvironments) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{36}
+ return fileDescriptor_cf57597c3a9659a9, []int{37}
}
+
func (m *StandardEnvironments) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StandardEnvironments.Unmarshal(m, b)
}
func (m *StandardEnvironments) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StandardEnvironments.Marshal(b, m, deterministic)
}
-func (dst *StandardEnvironments) XXX_Merge(src proto.Message) {
- xxx_messageInfo_StandardEnvironments.Merge(dst, src)
+func (m *StandardEnvironments) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_StandardEnvironments.Merge(m, src)
}
func (m *StandardEnvironments) XXX_Size() int {
return xxx_messageInfo_StandardEnvironments.Size(m)
@@ -4412,16 +4115,17 @@
func (m *DockerPayload) String() string { return proto.CompactTextString(m) }
func (*DockerPayload) ProtoMessage() {}
func (*DockerPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{37}
+ return fileDescriptor_cf57597c3a9659a9, []int{38}
}
+
func (m *DockerPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DockerPayload.Unmarshal(m, b)
}
func (m *DockerPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DockerPayload.Marshal(b, m, deterministic)
}
-func (dst *DockerPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DockerPayload.Merge(dst, src)
+func (m *DockerPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DockerPayload.Merge(m, src)
}
func (m *DockerPayload) XXX_Size() int {
return xxx_messageInfo_DockerPayload.Size(m)
@@ -4453,16 +4157,17 @@
func (m *ProcessPayload) String() string { return proto.CompactTextString(m) }
func (*ProcessPayload) ProtoMessage() {}
func (*ProcessPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{38}
+ return fileDescriptor_cf57597c3a9659a9, []int{39}
}
+
func (m *ProcessPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ProcessPayload.Unmarshal(m, b)
}
func (m *ProcessPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ProcessPayload.Marshal(b, m, deterministic)
}
-func (dst *ProcessPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ProcessPayload.Merge(dst, src)
+func (m *ProcessPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ProcessPayload.Merge(m, src)
}
func (m *ProcessPayload) XXX_Size() int {
return xxx_messageInfo_ProcessPayload.Size(m)
@@ -4513,16 +4218,17 @@
func (m *ExternalPayload) String() string { return proto.CompactTextString(m) }
func (*ExternalPayload) ProtoMessage() {}
func (*ExternalPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{39}
+ return fileDescriptor_cf57597c3a9659a9, []int{40}
}
+
func (m *ExternalPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExternalPayload.Unmarshal(m, b)
}
func (m *ExternalPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExternalPayload.Marshal(b, m, deterministic)
}
-func (dst *ExternalPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExternalPayload.Merge(dst, src)
+func (m *ExternalPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExternalPayload.Merge(m, src)
}
func (m *ExternalPayload) XXX_Size() int {
return xxx_messageInfo_ExternalPayload.Size(m)
@@ -4547,57 +4253,6 @@
return nil
}
-// A specification of a user defined function.
-//
-type SdkFunctionSpec struct {
- // (Required) A full specification of this function.
- Spec *FunctionSpec `protobuf:"bytes,1,opt,name=spec,proto3" json:"spec,omitempty"`
- // (Required) Reference to an execution environment capable of
- // invoking this function.
- EnvironmentId string `protobuf:"bytes,2,opt,name=environment_id,json=environmentId,proto3" json:"environment_id,omitempty"`
- XXX_NoUnkeyedLiteral struct{} `json:"-"`
- XXX_unrecognized []byte `json:"-"`
- XXX_sizecache int32 `json:"-"`
-}
-
-func (m *SdkFunctionSpec) Reset() { *m = SdkFunctionSpec{} }
-func (m *SdkFunctionSpec) String() string { return proto.CompactTextString(m) }
-func (*SdkFunctionSpec) ProtoMessage() {}
-func (*SdkFunctionSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{40}
-}
-func (m *SdkFunctionSpec) XXX_Unmarshal(b []byte) error {
- return xxx_messageInfo_SdkFunctionSpec.Unmarshal(m, b)
-}
-func (m *SdkFunctionSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
- return xxx_messageInfo_SdkFunctionSpec.Marshal(b, m, deterministic)
-}
-func (dst *SdkFunctionSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_SdkFunctionSpec.Merge(dst, src)
-}
-func (m *SdkFunctionSpec) XXX_Size() int {
- return xxx_messageInfo_SdkFunctionSpec.Size(m)
-}
-func (m *SdkFunctionSpec) XXX_DiscardUnknown() {
- xxx_messageInfo_SdkFunctionSpec.DiscardUnknown(m)
-}
-
-var xxx_messageInfo_SdkFunctionSpec proto.InternalMessageInfo
-
-func (m *SdkFunctionSpec) GetSpec() *FunctionSpec {
- if m != nil {
- return m.Spec
- }
- return nil
-}
-
-func (m *SdkFunctionSpec) GetEnvironmentId() string {
- if m != nil {
- return m.EnvironmentId
- }
- return ""
-}
-
// A URN along with a parameter object whose schema is determined by the
// URN.
//
@@ -4643,16 +4298,17 @@
func (m *FunctionSpec) String() string { return proto.CompactTextString(m) }
func (*FunctionSpec) ProtoMessage() {}
func (*FunctionSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{41}
+ return fileDescriptor_cf57597c3a9659a9, []int{41}
}
+
func (m *FunctionSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FunctionSpec.Unmarshal(m, b)
}
func (m *FunctionSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FunctionSpec.Marshal(b, m, deterministic)
}
-func (dst *FunctionSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_FunctionSpec.Merge(dst, src)
+func (m *FunctionSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_FunctionSpec.Merge(m, src)
}
func (m *FunctionSpec) XXX_Size() int {
return xxx_messageInfo_FunctionSpec.Size(m)
@@ -4690,16 +4346,17 @@
func (m *DisplayData) String() string { return proto.CompactTextString(m) }
func (*DisplayData) ProtoMessage() {}
func (*DisplayData) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{42}
+ return fileDescriptor_cf57597c3a9659a9, []int{42}
}
+
func (m *DisplayData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DisplayData.Unmarshal(m, b)
}
func (m *DisplayData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DisplayData.Marshal(b, m, deterministic)
}
-func (dst *DisplayData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DisplayData.Merge(dst, src)
+func (m *DisplayData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DisplayData.Merge(m, src)
}
func (m *DisplayData) XXX_Size() int {
return xxx_messageInfo_DisplayData.Size(m)
@@ -4734,16 +4391,17 @@
func (m *DisplayData_Identifier) String() string { return proto.CompactTextString(m) }
func (*DisplayData_Identifier) ProtoMessage() {}
func (*DisplayData_Identifier) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{42, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{42, 0}
}
+
func (m *DisplayData_Identifier) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DisplayData_Identifier.Unmarshal(m, b)
}
func (m *DisplayData_Identifier) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DisplayData_Identifier.Marshal(b, m, deterministic)
}
-func (dst *DisplayData_Identifier) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DisplayData_Identifier.Merge(dst, src)
+func (m *DisplayData_Identifier) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DisplayData_Identifier.Merge(m, src)
}
func (m *DisplayData_Identifier) XXX_Size() int {
return xxx_messageInfo_DisplayData_Identifier.Size(m)
@@ -4798,16 +4456,17 @@
func (m *DisplayData_Item) String() string { return proto.CompactTextString(m) }
func (*DisplayData_Item) ProtoMessage() {}
func (*DisplayData_Item) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{42, 1}
+ return fileDescriptor_cf57597c3a9659a9, []int{42, 1}
}
+
func (m *DisplayData_Item) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DisplayData_Item.Unmarshal(m, b)
}
func (m *DisplayData_Item) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DisplayData_Item.Marshal(b, m, deterministic)
}
-func (dst *DisplayData_Item) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DisplayData_Item.Merge(dst, src)
+func (m *DisplayData_Item) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DisplayData_Item.Merge(m, src)
}
func (m *DisplayData_Item) XXX_Size() int {
return xxx_messageInfo_DisplayData_Item.Size(m)
@@ -4870,16 +4529,17 @@
func (m *DisplayData_Type) String() string { return proto.CompactTextString(m) }
func (*DisplayData_Type) ProtoMessage() {}
func (*DisplayData_Type) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{42, 2}
+ return fileDescriptor_cf57597c3a9659a9, []int{42, 2}
}
+
func (m *DisplayData_Type) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DisplayData_Type.Unmarshal(m, b)
}
func (m *DisplayData_Type) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DisplayData_Type.Marshal(b, m, deterministic)
}
-func (dst *DisplayData_Type) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DisplayData_Type.Merge(dst, src)
+func (m *DisplayData_Type) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DisplayData_Type.Merge(m, src)
}
func (m *DisplayData_Type) XXX_Size() int {
return xxx_messageInfo_DisplayData_Type.Size(m)
@@ -4905,7 +4565,7 @@
// Types that are valid to be assigned to Root:
// *MessageWithComponents_Coder
// *MessageWithComponents_CombinePayload
- // *MessageWithComponents_SdkFunctionSpec
+ // *MessageWithComponents_FunctionSpec
// *MessageWithComponents_ParDoPayload
// *MessageWithComponents_Ptransform
// *MessageWithComponents_Pcollection
@@ -4913,7 +4573,6 @@
// *MessageWithComponents_SideInput
// *MessageWithComponents_WindowIntoPayload
// *MessageWithComponents_WindowingStrategy
- // *MessageWithComponents_FunctionSpec
Root isMessageWithComponents_Root `protobuf_oneof:"root"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
@@ -4924,16 +4583,17 @@
func (m *MessageWithComponents) String() string { return proto.CompactTextString(m) }
func (*MessageWithComponents) ProtoMessage() {}
func (*MessageWithComponents) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{43}
+ return fileDescriptor_cf57597c3a9659a9, []int{43}
}
+
func (m *MessageWithComponents) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MessageWithComponents.Unmarshal(m, b)
}
func (m *MessageWithComponents) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MessageWithComponents.Marshal(b, m, deterministic)
}
-func (dst *MessageWithComponents) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MessageWithComponents.Merge(dst, src)
+func (m *MessageWithComponents) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MessageWithComponents.Merge(m, src)
}
func (m *MessageWithComponents) XXX_Size() int {
return xxx_messageInfo_MessageWithComponents.Size(m)
@@ -4944,6 +4604,13 @@
var xxx_messageInfo_MessageWithComponents proto.InternalMessageInfo
+func (m *MessageWithComponents) GetComponents() *Components {
+ if m != nil {
+ return m.Components
+ }
+ return nil
+}
+
type isMessageWithComponents_Root interface {
isMessageWithComponents_Root()
}
@@ -4951,48 +4618,62 @@
type MessageWithComponents_Coder struct {
Coder *Coder `protobuf:"bytes,2,opt,name=coder,proto3,oneof"`
}
+
type MessageWithComponents_CombinePayload struct {
CombinePayload *CombinePayload `protobuf:"bytes,3,opt,name=combine_payload,json=combinePayload,proto3,oneof"`
}
-type MessageWithComponents_SdkFunctionSpec struct {
- SdkFunctionSpec *SdkFunctionSpec `protobuf:"bytes,4,opt,name=sdk_function_spec,json=sdkFunctionSpec,proto3,oneof"`
+
+type MessageWithComponents_FunctionSpec struct {
+ FunctionSpec *FunctionSpec `protobuf:"bytes,4,opt,name=function_spec,json=functionSpec,proto3,oneof"`
}
+
type MessageWithComponents_ParDoPayload struct {
ParDoPayload *ParDoPayload `protobuf:"bytes,6,opt,name=par_do_payload,json=parDoPayload,proto3,oneof"`
}
+
type MessageWithComponents_Ptransform struct {
Ptransform *PTransform `protobuf:"bytes,7,opt,name=ptransform,proto3,oneof"`
}
+
type MessageWithComponents_Pcollection struct {
Pcollection *PCollection `protobuf:"bytes,8,opt,name=pcollection,proto3,oneof"`
}
+
type MessageWithComponents_ReadPayload struct {
ReadPayload *ReadPayload `protobuf:"bytes,9,opt,name=read_payload,json=readPayload,proto3,oneof"`
}
+
type MessageWithComponents_SideInput struct {
SideInput *SideInput `protobuf:"bytes,11,opt,name=side_input,json=sideInput,proto3,oneof"`
}
+
type MessageWithComponents_WindowIntoPayload struct {
WindowIntoPayload *WindowIntoPayload `protobuf:"bytes,12,opt,name=window_into_payload,json=windowIntoPayload,proto3,oneof"`
}
+
type MessageWithComponents_WindowingStrategy struct {
WindowingStrategy *WindowingStrategy `protobuf:"bytes,13,opt,name=windowing_strategy,json=windowingStrategy,proto3,oneof"`
}
-type MessageWithComponents_FunctionSpec struct {
- FunctionSpec *FunctionSpec `protobuf:"bytes,14,opt,name=function_spec,json=functionSpec,proto3,oneof"`
-}
-func (*MessageWithComponents_Coder) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_CombinePayload) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_SdkFunctionSpec) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_ParDoPayload) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_Ptransform) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_Pcollection) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_ReadPayload) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_SideInput) isMessageWithComponents_Root() {}
+func (*MessageWithComponents_Coder) isMessageWithComponents_Root() {}
+
+func (*MessageWithComponents_CombinePayload) isMessageWithComponents_Root() {}
+
+func (*MessageWithComponents_FunctionSpec) isMessageWithComponents_Root() {}
+
+func (*MessageWithComponents_ParDoPayload) isMessageWithComponents_Root() {}
+
+func (*MessageWithComponents_Ptransform) isMessageWithComponents_Root() {}
+
+func (*MessageWithComponents_Pcollection) isMessageWithComponents_Root() {}
+
+func (*MessageWithComponents_ReadPayload) isMessageWithComponents_Root() {}
+
+func (*MessageWithComponents_SideInput) isMessageWithComponents_Root() {}
+
func (*MessageWithComponents_WindowIntoPayload) isMessageWithComponents_Root() {}
+
func (*MessageWithComponents_WindowingStrategy) isMessageWithComponents_Root() {}
-func (*MessageWithComponents_FunctionSpec) isMessageWithComponents_Root() {}
func (m *MessageWithComponents) GetRoot() isMessageWithComponents_Root {
if m != nil {
@@ -5001,13 +4682,6 @@
return nil
}
-func (m *MessageWithComponents) GetComponents() *Components {
- if m != nil {
- return m.Components
- }
- return nil
-}
-
func (m *MessageWithComponents) GetCoder() *Coder {
if x, ok := m.GetRoot().(*MessageWithComponents_Coder); ok {
return x.Coder
@@ -5022,9 +4696,9 @@
return nil
}
-func (m *MessageWithComponents) GetSdkFunctionSpec() *SdkFunctionSpec {
- if x, ok := m.GetRoot().(*MessageWithComponents_SdkFunctionSpec); ok {
- return x.SdkFunctionSpec
+func (m *MessageWithComponents) GetFunctionSpec() *FunctionSpec {
+ if x, ok := m.GetRoot().(*MessageWithComponents_FunctionSpec); ok {
+ return x.FunctionSpec
}
return nil
}
@@ -5078,19 +4752,12 @@
return nil
}
-func (m *MessageWithComponents) GetFunctionSpec() *FunctionSpec {
- if x, ok := m.GetRoot().(*MessageWithComponents_FunctionSpec); ok {
- return x.FunctionSpec
- }
- return nil
-}
-
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*MessageWithComponents) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _MessageWithComponents_OneofMarshaler, _MessageWithComponents_OneofUnmarshaler, _MessageWithComponents_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*MessageWithComponents) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*MessageWithComponents_Coder)(nil),
(*MessageWithComponents_CombinePayload)(nil),
- (*MessageWithComponents_SdkFunctionSpec)(nil),
+ (*MessageWithComponents_FunctionSpec)(nil),
(*MessageWithComponents_ParDoPayload)(nil),
(*MessageWithComponents_Ptransform)(nil),
(*MessageWithComponents_Pcollection)(nil),
@@ -5098,238 +4765,9 @@
(*MessageWithComponents_SideInput)(nil),
(*MessageWithComponents_WindowIntoPayload)(nil),
(*MessageWithComponents_WindowingStrategy)(nil),
- (*MessageWithComponents_FunctionSpec)(nil),
}
}
-func _MessageWithComponents_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*MessageWithComponents)
- // root
- switch x := m.Root.(type) {
- case *MessageWithComponents_Coder:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Coder); err != nil {
- return err
- }
- case *MessageWithComponents_CombinePayload:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.CombinePayload); err != nil {
- return err
- }
- case *MessageWithComponents_SdkFunctionSpec:
- b.EncodeVarint(4<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.SdkFunctionSpec); err != nil {
- return err
- }
- case *MessageWithComponents_ParDoPayload:
- b.EncodeVarint(6<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ParDoPayload); err != nil {
- return err
- }
- case *MessageWithComponents_Ptransform:
- b.EncodeVarint(7<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Ptransform); err != nil {
- return err
- }
- case *MessageWithComponents_Pcollection:
- b.EncodeVarint(8<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Pcollection); err != nil {
- return err
- }
- case *MessageWithComponents_ReadPayload:
- b.EncodeVarint(9<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ReadPayload); err != nil {
- return err
- }
- case *MessageWithComponents_SideInput:
- b.EncodeVarint(11<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.SideInput); err != nil {
- return err
- }
- case *MessageWithComponents_WindowIntoPayload:
- b.EncodeVarint(12<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.WindowIntoPayload); err != nil {
- return err
- }
- case *MessageWithComponents_WindowingStrategy:
- b.EncodeVarint(13<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.WindowingStrategy); err != nil {
- return err
- }
- case *MessageWithComponents_FunctionSpec:
- b.EncodeVarint(14<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.FunctionSpec); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("MessageWithComponents.Root has unexpected type %T", x)
- }
- return nil
-}
-
-func _MessageWithComponents_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*MessageWithComponents)
- switch tag {
- case 2: // root.coder
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Coder)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_Coder{msg}
- return true, err
- case 3: // root.combine_payload
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(CombinePayload)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_CombinePayload{msg}
- return true, err
- case 4: // root.sdk_function_spec
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(SdkFunctionSpec)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_SdkFunctionSpec{msg}
- return true, err
- case 6: // root.par_do_payload
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ParDoPayload)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_ParDoPayload{msg}
- return true, err
- case 7: // root.ptransform
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(PTransform)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_Ptransform{msg}
- return true, err
- case 8: // root.pcollection
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(PCollection)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_Pcollection{msg}
- return true, err
- case 9: // root.read_payload
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ReadPayload)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_ReadPayload{msg}
- return true, err
- case 11: // root.side_input
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(SideInput)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_SideInput{msg}
- return true, err
- case 12: // root.window_into_payload
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(WindowIntoPayload)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_WindowIntoPayload{msg}
- return true, err
- case 13: // root.windowing_strategy
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(WindowingStrategy)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_WindowingStrategy{msg}
- return true, err
- case 14: // root.function_spec
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(FunctionSpec)
- err := b.DecodeMessage(msg)
- m.Root = &MessageWithComponents_FunctionSpec{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _MessageWithComponents_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*MessageWithComponents)
- // root
- switch x := m.Root.(type) {
- case *MessageWithComponents_Coder:
- s := proto.Size(x.Coder)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_CombinePayload:
- s := proto.Size(x.CombinePayload)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_SdkFunctionSpec:
- s := proto.Size(x.SdkFunctionSpec)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_ParDoPayload:
- s := proto.Size(x.ParDoPayload)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_Ptransform:
- s := proto.Size(x.Ptransform)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_Pcollection:
- s := proto.Size(x.Pcollection)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_ReadPayload:
- s := proto.Size(x.ReadPayload)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_SideInput:
- s := proto.Size(x.SideInput)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_WindowIntoPayload:
- s := proto.Size(x.WindowIntoPayload)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_WindowingStrategy:
- s := proto.Size(x.WindowingStrategy)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MessageWithComponents_FunctionSpec:
- s := proto.Size(x.FunctionSpec)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// The payload for an executable stage. This will eventually be passed to an SDK in the form of a
// ProcessBundleDescriptor.
type ExecutableStagePayload struct {
@@ -5338,6 +4776,8 @@
// We use an environment rather than environment id
// because ExecutableStages use environments directly. This may change in the future.
Environment *Environment `protobuf:"bytes,1,opt,name=environment,proto3" json:"environment,omitempty"`
+ // set the wire coder of this executable stage
+ WireCoderSetting *WireCoderSetting `protobuf:"bytes,9,opt,name=wire_coder_setting,json=wireCoderSetting,proto3" json:"wire_coder_setting,omitempty"`
// (Required) Input PCollection id. This must be present as a value in the inputs of any
// PTransform the ExecutableStagePayload is the payload of.
Input string `protobuf:"bytes,2,opt,name=input,proto3" json:"input,omitempty"`
@@ -5368,16 +4808,17 @@
func (m *ExecutableStagePayload) String() string { return proto.CompactTextString(m) }
func (*ExecutableStagePayload) ProtoMessage() {}
func (*ExecutableStagePayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{44}
+ return fileDescriptor_cf57597c3a9659a9, []int{44}
}
+
func (m *ExecutableStagePayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExecutableStagePayload.Unmarshal(m, b)
}
func (m *ExecutableStagePayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExecutableStagePayload.Marshal(b, m, deterministic)
}
-func (dst *ExecutableStagePayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExecutableStagePayload.Merge(dst, src)
+func (m *ExecutableStagePayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExecutableStagePayload.Merge(m, src)
}
func (m *ExecutableStagePayload) XXX_Size() int {
return xxx_messageInfo_ExecutableStagePayload.Size(m)
@@ -5395,6 +4836,13 @@
return nil
}
+func (m *ExecutableStagePayload) GetWireCoderSetting() *WireCoderSetting {
+ if m != nil {
+ return m.WireCoderSetting
+ }
+ return nil
+}
+
func (m *ExecutableStagePayload) GetInput() string {
if m != nil {
return m.Input
@@ -5460,16 +4908,17 @@
func (m *ExecutableStagePayload_SideInputId) String() string { return proto.CompactTextString(m) }
func (*ExecutableStagePayload_SideInputId) ProtoMessage() {}
func (*ExecutableStagePayload_SideInputId) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{44, 0}
+ return fileDescriptor_cf57597c3a9659a9, []int{44, 0}
}
+
func (m *ExecutableStagePayload_SideInputId) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExecutableStagePayload_SideInputId.Unmarshal(m, b)
}
func (m *ExecutableStagePayload_SideInputId) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExecutableStagePayload_SideInputId.Marshal(b, m, deterministic)
}
-func (dst *ExecutableStagePayload_SideInputId) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExecutableStagePayload_SideInputId.Merge(dst, src)
+func (m *ExecutableStagePayload_SideInputId) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExecutableStagePayload_SideInputId.Merge(m, src)
}
func (m *ExecutableStagePayload_SideInputId) XXX_Size() int {
return xxx_messageInfo_ExecutableStagePayload_SideInputId.Size(m)
@@ -5510,16 +4959,17 @@
func (m *ExecutableStagePayload_UserStateId) String() string { return proto.CompactTextString(m) }
func (*ExecutableStagePayload_UserStateId) ProtoMessage() {}
func (*ExecutableStagePayload_UserStateId) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{44, 1}
+ return fileDescriptor_cf57597c3a9659a9, []int{44, 1}
}
+
func (m *ExecutableStagePayload_UserStateId) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExecutableStagePayload_UserStateId.Unmarshal(m, b)
}
func (m *ExecutableStagePayload_UserStateId) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExecutableStagePayload_UserStateId.Marshal(b, m, deterministic)
}
-func (dst *ExecutableStagePayload_UserStateId) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExecutableStagePayload_UserStateId.Merge(dst, src)
+func (m *ExecutableStagePayload_UserStateId) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExecutableStagePayload_UserStateId.Merge(m, src)
}
func (m *ExecutableStagePayload_UserStateId) XXX_Size() int {
return xxx_messageInfo_ExecutableStagePayload_UserStateId.Size(m)
@@ -5560,16 +5010,17 @@
func (m *ExecutableStagePayload_TimerId) String() string { return proto.CompactTextString(m) }
func (*ExecutableStagePayload_TimerId) ProtoMessage() {}
func (*ExecutableStagePayload_TimerId) Descriptor() ([]byte, []int) {
- return fileDescriptor_beam_runner_api_70c7dbd5f3375954, []int{44, 2}
+ return fileDescriptor_cf57597c3a9659a9, []int{44, 2}
}
+
func (m *ExecutableStagePayload_TimerId) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExecutableStagePayload_TimerId.Unmarshal(m, b)
}
func (m *ExecutableStagePayload_TimerId) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExecutableStagePayload_TimerId.Marshal(b, m, deterministic)
}
-func (dst *ExecutableStagePayload_TimerId) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExecutableStagePayload_TimerId.Merge(dst, src)
+func (m *ExecutableStagePayload_TimerId) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExecutableStagePayload_TimerId.Merge(m, src)
}
func (m *ExecutableStagePayload_TimerId) XXX_Size() int {
return xxx_messageInfo_ExecutableStagePayload_TimerId.Size(m)
@@ -5599,7 +5050,7 @@
ExtensionType: (*string)(nil),
Field: 185324356,
Name: "org.apache.beam.model.pipeline.v1.beam_urn",
- Tag: "bytes,185324356,opt,name=beam_urn,json=beamUrn",
+ Tag: "bytes,185324356,opt,name=beam_urn",
Filename: "beam_runner_api.proto",
}
@@ -5608,11 +5059,29 @@
ExtensionType: (*string)(nil),
Field: 185324357,
Name: "org.apache.beam.model.pipeline.v1.beam_constant",
- Tag: "bytes,185324357,opt,name=beam_constant,json=beamConstant",
+ Tag: "bytes,185324357,opt,name=beam_constant",
Filename: "beam_runner_api.proto",
}
func init() {
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.BeamConstants_Constants", BeamConstants_Constants_name, BeamConstants_Constants_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_Primitives", StandardPTransforms_Primitives_name, StandardPTransforms_Primitives_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_DeprecatedPrimitives", StandardPTransforms_DeprecatedPrimitives_name, StandardPTransforms_DeprecatedPrimitives_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_Composites", StandardPTransforms_Composites_name, StandardPTransforms_Composites_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_CombineComponents", StandardPTransforms_CombineComponents_name, StandardPTransforms_CombineComponents_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_SplittableParDoComponents", StandardPTransforms_SplittableParDoComponents_name, StandardPTransforms_SplittableParDoComponents_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardSideInputTypes_Enum", StandardSideInputTypes_Enum_name, StandardSideInputTypes_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.Parameter_Type_Enum", Parameter_Type_Enum_name, Parameter_Type_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.IsBounded_Enum", IsBounded_Enum_name, IsBounded_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardCoders_Enum", StandardCoders_Enum_name, StandardCoders_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MergeStatus_Enum", MergeStatus_Enum_name, MergeStatus_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.AccumulationMode_Enum", AccumulationMode_Enum_name, AccumulationMode_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.ClosingBehavior_Enum", ClosingBehavior_Enum_name, ClosingBehavior_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.OnTimeBehavior_Enum", OnTimeBehavior_Enum_name, OnTimeBehavior_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.OutputTime_Enum", OutputTime_Enum_name, OutputTime_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.TimeDomain_Enum", TimeDomain_Enum_name, TimeDomain_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardEnvironments_Environments", StandardEnvironments_Environments_name, StandardEnvironments_Environments_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.DisplayData_Type_Enum", DisplayData_Type_Enum_name, DisplayData_Type_Enum_value)
proto.RegisterType((*BeamConstants)(nil), "org.apache.beam.model.pipeline.v1.BeamConstants")
proto.RegisterType((*Components)(nil), "org.apache.beam.model.pipeline.v1.Components")
proto.RegisterMapType((map[string]*Coder)(nil), "org.apache.beam.model.pipeline.v1.Components.CodersEntry")
@@ -5679,6 +5148,7 @@
proto.RegisterType((*TimestampTransform_Delay)(nil), "org.apache.beam.model.pipeline.v1.TimestampTransform.Delay")
proto.RegisterType((*TimestampTransform_AlignTo)(nil), "org.apache.beam.model.pipeline.v1.TimestampTransform.AlignTo")
proto.RegisterType((*SideInput)(nil), "org.apache.beam.model.pipeline.v1.SideInput")
+ proto.RegisterType((*WireCoderSetting)(nil), "org.apache.beam.model.pipeline.v1.WireCoderSetting")
proto.RegisterType((*Environment)(nil), "org.apache.beam.model.pipeline.v1.Environment")
proto.RegisterType((*StandardEnvironments)(nil), "org.apache.beam.model.pipeline.v1.StandardEnvironments")
proto.RegisterType((*DockerPayload)(nil), "org.apache.beam.model.pipeline.v1.DockerPayload")
@@ -5686,7 +5156,6 @@
proto.RegisterMapType((map[string]string)(nil), "org.apache.beam.model.pipeline.v1.ProcessPayload.EnvEntry")
proto.RegisterType((*ExternalPayload)(nil), "org.apache.beam.model.pipeline.v1.ExternalPayload")
proto.RegisterMapType((map[string]string)(nil), "org.apache.beam.model.pipeline.v1.ExternalPayload.ParamsEntry")
- proto.RegisterType((*SdkFunctionSpec)(nil), "org.apache.beam.model.pipeline.v1.SdkFunctionSpec")
proto.RegisterType((*FunctionSpec)(nil), "org.apache.beam.model.pipeline.v1.FunctionSpec")
proto.RegisterType((*DisplayData)(nil), "org.apache.beam.model.pipeline.v1.DisplayData")
proto.RegisterType((*DisplayData_Identifier)(nil), "org.apache.beam.model.pipeline.v1.DisplayData.Identifier")
@@ -5697,28 +5166,344 @@
proto.RegisterType((*ExecutableStagePayload_SideInputId)(nil), "org.apache.beam.model.pipeline.v1.ExecutableStagePayload.SideInputId")
proto.RegisterType((*ExecutableStagePayload_UserStateId)(nil), "org.apache.beam.model.pipeline.v1.ExecutableStagePayload.UserStateId")
proto.RegisterType((*ExecutableStagePayload_TimerId)(nil), "org.apache.beam.model.pipeline.v1.ExecutableStagePayload.TimerId")
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.BeamConstants_Constants", BeamConstants_Constants_name, BeamConstants_Constants_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_Primitives", StandardPTransforms_Primitives_name, StandardPTransforms_Primitives_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_DeprecatedPrimitives", StandardPTransforms_DeprecatedPrimitives_name, StandardPTransforms_DeprecatedPrimitives_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_Composites", StandardPTransforms_Composites_name, StandardPTransforms_Composites_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_CombineComponents", StandardPTransforms_CombineComponents_name, StandardPTransforms_CombineComponents_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardPTransforms_SplittableParDoComponents", StandardPTransforms_SplittableParDoComponents_name, StandardPTransforms_SplittableParDoComponents_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardSideInputTypes_Enum", StandardSideInputTypes_Enum_name, StandardSideInputTypes_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.Parameter_Type_Enum", Parameter_Type_Enum_name, Parameter_Type_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.IsBounded_Enum", IsBounded_Enum_name, IsBounded_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardCoders_Enum", StandardCoders_Enum_name, StandardCoders_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MergeStatus_Enum", MergeStatus_Enum_name, MergeStatus_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.AccumulationMode_Enum", AccumulationMode_Enum_name, AccumulationMode_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.ClosingBehavior_Enum", ClosingBehavior_Enum_name, ClosingBehavior_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.OnTimeBehavior_Enum", OnTimeBehavior_Enum_name, OnTimeBehavior_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.OutputTime_Enum", OutputTime_Enum_name, OutputTime_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.TimeDomain_Enum", TimeDomain_Enum_name, TimeDomain_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.StandardEnvironments_Environments", StandardEnvironments_Environments_name, StandardEnvironments_Environments_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.DisplayData_Type_Enum", DisplayData_Type_Enum_name, DisplayData_Type_Enum_value)
proto.RegisterExtension(E_BeamUrn)
proto.RegisterExtension(E_BeamConstant)
}
+func init() { proto.RegisterFile("beam_runner_api.proto", fileDescriptor_cf57597c3a9659a9) }
+
+var fileDescriptor_cf57597c3a9659a9 = []byte{
+ // 5243 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x5c, 0xcd, 0x6f, 0x23, 0xc9,
+ 0x75, 0x17, 0x49, 0xf1, 0xeb, 0x91, 0xa2, 0x7a, 0x4a, 0x9a, 0x59, 0x6d, 0x7b, 0xbd, 0x33, 0xdb,
+ 0xbb, 0xde, 0x1d, 0x6f, 0xd6, 0xdc, 0x91, 0xb4, 0xb3, 0x3b, 0x23, 0xdb, 0xb3, 0x26, 0xc5, 0xd6,
+ 0xa8, 0x67, 0xf8, 0xe5, 0x26, 0x25, 0xcd, 0xac, 0xed, 0xed, 0x94, 0xd8, 0x45, 0xa9, 0x31, 0xcd,
+ 0x6e, 0xba, 0xbb, 0xa9, 0x59, 0x1a, 0x0e, 0x02, 0x04, 0xc1, 0x1e, 0x72, 0x48, 0x90, 0x1c, 0x0c,
+ 0xe4, 0x14, 0xc0, 0x01, 0x82, 0x24, 0x08, 0x90, 0xc0, 0x4e, 0xf2, 0x07, 0x38, 0xc9, 0x31, 0x01,
+ 0x02, 0x04, 0x08, 0x90, 0x5b, 0xfe, 0x80, 0x5c, 0x02, 0xf8, 0x90, 0x9c, 0x82, 0xfa, 0xe8, 0x66,
+ 0x93, 0x92, 0x66, 0x49, 0xcd, 0x20, 0x37, 0xf6, 0xeb, 0x7a, 0xbf, 0x57, 0xf5, 0xaa, 0xea, 0xd5,
+ 0xab, 0xf7, 0x5e, 0x13, 0xae, 0x1f, 0x13, 0x3c, 0x30, 0xbc, 0x91, 0xe3, 0x10, 0xcf, 0xc0, 0x43,
+ 0xab, 0x3c, 0xf4, 0xdc, 0xc0, 0x45, 0x6f, 0xb9, 0xde, 0x49, 0x19, 0x0f, 0x71, 0xef, 0x94, 0x94,
+ 0x69, 0x8b, 0xf2, 0xc0, 0x35, 0x89, 0x5d, 0x1e, 0x5a, 0x43, 0x62, 0x5b, 0x0e, 0x29, 0x9f, 0x6d,
+ 0xca, 0xab, 0xc4, 0x31, 0x87, 0xae, 0xe5, 0x04, 0x3e, 0xe7, 0x91, 0x5f, 0x3f, 0x71, 0xdd, 0x13,
+ 0x9b, 0x7c, 0xc8, 0x9e, 0x8e, 0x47, 0xfd, 0x0f, 0xb1, 0x33, 0x16, 0xaf, 0x6e, 0xcd, 0xbe, 0x32,
+ 0x89, 0xdf, 0xf3, 0xac, 0x61, 0xe0, 0x7a, 0xa2, 0xc5, 0xcd, 0xd9, 0x16, 0x81, 0x35, 0x20, 0x7e,
+ 0x80, 0x07, 0x43, 0xde, 0x40, 0xf9, 0x55, 0x02, 0x56, 0xaa, 0x04, 0x0f, 0x76, 0x5d, 0xc7, 0x0f,
+ 0xb0, 0x13, 0xf8, 0xca, 0xdf, 0x24, 0x20, 0x1f, 0x3d, 0xa1, 0x4d, 0x58, 0x6f, 0x68, 0x4d, 0xa3,
+ 0xab, 0x35, 0xd4, 0x4e, 0xb7, 0xd2, 0x68, 0x1b, 0x0d, 0xad, 0x5e, 0xd7, 0x3a, 0xd2, 0x92, 0xfc,
+ 0xda, 0x5f, 0xfe, 0xed, 0xff, 0xfe, 0x2a, 0x7d, 0xed, 0x5b, 0xf7, 0xb7, 0xb6, 0xb6, 0xb7, 0x3f,
+ 0xd9, 0xba, 0xb3, 0xfd, 0xf1, 0xbd, 0xbb, 0x1f, 0x7d, 0xf2, 0xc9, 0x5d, 0x74, 0x07, 0xd6, 0x1b,
+ 0x95, 0x27, 0xe7, 0x59, 0x12, 0xf2, 0x0d, 0xc6, 0x22, 0x9d, 0xe3, 0x78, 0x00, 0xca, 0xc3, 0x7a,
+ 0xab, 0x5a, 0xa9, 0x1b, 0x47, 0x5a, 0xb3, 0xd6, 0x3a, 0x32, 0x2e, 0xe4, 0x4f, 0x4e, 0xf3, 0x6f,
+ 0xde, 0xbf, 0x7b, 0xe7, 0x23, 0xc6, 0xaf, 0xfc, 0x7d, 0x0e, 0x60, 0xd7, 0x1d, 0x0c, 0x5d, 0x87,
+ 0xd0, 0x3e, 0xff, 0x08, 0x20, 0xf0, 0xb0, 0xe3, 0xf7, 0x5d, 0x6f, 0xe0, 0x6f, 0x24, 0x6e, 0xa5,
+ 0x6e, 0x17, 0xb6, 0xbe, 0x5b, 0xfe, 0x4a, 0xd5, 0x97, 0x27, 0x10, 0xe5, 0x6e, 0xc4, 0xaf, 0x3a,
+ 0x81, 0x37, 0xd6, 0x63, 0x80, 0xa8, 0x07, 0xc5, 0x61, 0xcf, 0xb5, 0x6d, 0xd2, 0x0b, 0x2c, 0xd7,
+ 0xf1, 0x37, 0x92, 0x4c, 0xc0, 0xa7, 0x8b, 0x09, 0x68, 0xc7, 0x10, 0xb8, 0x88, 0x29, 0x50, 0x34,
+ 0x86, 0xf5, 0xe7, 0x96, 0x63, 0xba, 0xcf, 0x2d, 0xe7, 0xc4, 0xf0, 0x03, 0x0f, 0x07, 0xe4, 0xc4,
+ 0x22, 0xfe, 0x46, 0x8a, 0x09, 0xdb, 0x5b, 0x4c, 0xd8, 0x51, 0x88, 0xd4, 0x89, 0x80, 0xb8, 0xcc,
+ 0xb5, 0xe7, 0xe7, 0xdf, 0xa0, 0xef, 0x43, 0xa6, 0xe7, 0x9a, 0xc4, 0xf3, 0x37, 0x96, 0x99, 0xb0,
+ 0xfb, 0x8b, 0x09, 0xdb, 0x65, 0xbc, 0x1c, 0x5f, 0x00, 0x51, 0x95, 0x11, 0xe7, 0xcc, 0xf2, 0x5c,
+ 0x67, 0x40, 0xdb, 0x6c, 0xa4, 0xaf, 0xa2, 0x32, 0x35, 0x86, 0x20, 0x54, 0x16, 0x07, 0x95, 0x6d,
+ 0x58, 0x9d, 0x99, 0x36, 0x24, 0x41, 0xea, 0x19, 0x19, 0x6f, 0x24, 0x6e, 0x25, 0x6e, 0xe7, 0x75,
+ 0xfa, 0x13, 0xed, 0x42, 0xfa, 0x0c, 0xdb, 0x23, 0xb2, 0x91, 0xbc, 0x95, 0xb8, 0x5d, 0xd8, 0xfa,
+ 0xd6, 0x1c, 0x5d, 0x68, 0x47, 0xa8, 0x3a, 0xe7, 0xdd, 0x49, 0xde, 0x4b, 0xc8, 0x2e, 0x5c, 0x3b,
+ 0x37, 0x87, 0x17, 0xc8, 0xab, 0x4d, 0xcb, 0x2b, 0xcf, 0x23, 0x6f, 0x37, 0x82, 0x8d, 0x0b, 0xfc,
+ 0x29, 0x6c, 0x5c, 0x36, 0x8f, 0x17, 0xc8, 0x7d, 0x34, 0x2d, 0xf7, 0xa3, 0x39, 0xe4, 0xce, 0xa2,
+ 0x8f, 0xe3, 0xd2, 0x7b, 0x50, 0x88, 0x4d, 0xec, 0x05, 0x02, 0x1f, 0x4c, 0x0b, 0xbc, 0x3d, 0xd7,
+ 0xdc, 0x9a, 0xc4, 0x9b, 0xd1, 0xe9, 0xb9, 0x49, 0x7e, 0x35, 0x3a, 0x8d, 0xc1, 0xc6, 0x04, 0x2a,
+ 0xff, 0x91, 0x80, 0x5c, 0x5b, 0x34, 0x43, 0x0d, 0x80, 0x5e, 0xb4, 0xda, 0x98, 0xbc, 0xf9, 0xd6,
+ 0xc7, 0x64, 0x89, 0xea, 0x31, 0x00, 0xf4, 0x01, 0x20, 0xcf, 0x75, 0x03, 0x23, 0xb2, 0x1c, 0x86,
+ 0x65, 0x72, 0x63, 0x91, 0xd7, 0x25, 0xfa, 0x26, 0x5a, 0x56, 0x9a, 0x49, 0x37, 0x5d, 0xd1, 0xb4,
+ 0xfc, 0xa1, 0x8d, 0xc7, 0x86, 0x89, 0x03, 0xbc, 0x91, 0x9a, 0x7b, 0x68, 0x35, 0xce, 0x56, 0xc3,
+ 0x01, 0xd6, 0x0b, 0xe6, 0xe4, 0x41, 0xf9, 0xc5, 0x32, 0xc0, 0x64, 0xed, 0xa2, 0x9b, 0x50, 0x18,
+ 0x39, 0xd6, 0x8f, 0x47, 0xc4, 0x70, 0xf0, 0x80, 0x6c, 0xa4, 0x99, 0x3e, 0x81, 0x93, 0x9a, 0x78,
+ 0x40, 0xd0, 0x2e, 0x2c, 0xfb, 0x43, 0xd2, 0x13, 0x23, 0xff, 0x70, 0x0e, 0xd1, 0x7b, 0x23, 0x87,
+ 0x2d, 0xd3, 0xce, 0x90, 0xf4, 0x74, 0xc6, 0x8c, 0xde, 0x81, 0x15, 0x7f, 0x74, 0x1c, 0x33, 0xbf,
+ 0x7c, 0xc0, 0xd3, 0x44, 0x6a, 0x62, 0x2c, 0x67, 0x38, 0x0a, 0x42, 0x7b, 0x76, 0x7f, 0xa1, 0x6d,
+ 0x58, 0xd6, 0x18, 0xaf, 0x30, 0x31, 0x1c, 0x08, 0x75, 0x21, 0xeb, 0x8e, 0x02, 0x86, 0xc9, 0xcd,
+ 0xd6, 0xce, 0x62, 0x98, 0x2d, 0xce, 0xcc, 0x41, 0x43, 0xa8, 0x73, 0xd3, 0x92, 0x79, 0xe9, 0x69,
+ 0x41, 0xdf, 0x80, 0x52, 0xcc, 0x6c, 0x19, 0x96, 0xb9, 0x91, 0x65, 0x53, 0xb1, 0x12, 0xa3, 0x6a,
+ 0xa6, 0x7c, 0x1f, 0x0a, 0xb1, 0x61, 0x5e, 0xb0, 0x0b, 0xd6, 0xe3, 0xbb, 0x20, 0x1f, 0xdf, 0x46,
+ 0x3b, 0x50, 0x8c, 0x8f, 0x66, 0x11, 0x5e, 0xe5, 0xef, 0x56, 0x60, 0xad, 0x13, 0x60, 0xc7, 0xc4,
+ 0x9e, 0x39, 0xd1, 0x8e, 0xaf, 0xfc, 0x45, 0x0a, 0xa0, 0xed, 0x59, 0x03, 0x2b, 0xb0, 0xce, 0x88,
+ 0x8f, 0xbe, 0x09, 0x99, 0x76, 0x45, 0x37, 0x6a, 0x2d, 0x69, 0x49, 0xfe, 0xfa, 0xcf, 0xe9, 0xa9,
+ 0xfc, 0x1a, 0xd5, 0xc3, 0x4e, 0x34, 0xc7, 0x3b, 0x43, 0xec, 0x99, 0xee, 0xce, 0xd9, 0x26, 0xfa,
+ 0x00, 0xb2, 0x7b, 0xf5, 0x4a, 0xb7, 0xab, 0x36, 0xa5, 0x84, 0x7c, 0x93, 0xb5, 0x7d, 0x7d, 0xa6,
+ 0x6d, 0xdf, 0xc6, 0x41, 0x40, 0x1c, 0xda, 0xfa, 0x63, 0x28, 0x3e, 0xd4, 0x5b, 0x07, 0x6d, 0xa3,
+ 0xfa, 0xd4, 0x78, 0xac, 0x3e, 0x95, 0x92, 0xf2, 0x3b, 0x8c, 0xe5, 0xcd, 0x19, 0x96, 0x13, 0xcf,
+ 0x1d, 0x0d, 0x8d, 0xe3, 0xb1, 0xf1, 0x8c, 0x8c, 0x85, 0x14, 0xad, 0xd1, 0x3e, 0xa8, 0x77, 0x54,
+ 0x29, 0x75, 0x89, 0x14, 0x6b, 0x30, 0x1c, 0xd9, 0x3e, 0xa1, 0xad, 0x3f, 0x81, 0x52, 0xa5, 0xd3,
+ 0xd1, 0x1e, 0x36, 0x85, 0xc3, 0xd1, 0x91, 0x96, 0xe5, 0xb7, 0x19, 0xd3, 0xd7, 0x67, 0x98, 0xf8,
+ 0x01, 0x69, 0x58, 0x4e, 0xc0, 0x06, 0xb3, 0x0d, 0x85, 0xae, 0xda, 0xe9, 0x1a, 0x9d, 0xae, 0xae,
+ 0x56, 0x1a, 0x52, 0x5a, 0x56, 0x18, 0xd7, 0x1b, 0x33, 0x5c, 0x01, 0xf1, 0x03, 0x3f, 0xf0, 0x28,
+ 0xf1, 0x6c, 0x13, 0x7d, 0x04, 0x85, 0x46, 0xa5, 0x1d, 0x89, 0xca, 0x5c, 0x22, 0x6a, 0x80, 0x87,
+ 0x06, 0x17, 0xe7, 0x53, 0xae, 0x7b, 0xb0, 0xd2, 0x50, 0xf5, 0x87, 0x6a, 0xc4, 0x97, 0x95, 0xbf,
+ 0xc1, 0xf8, 0x6e, 0xce, 0xf2, 0x11, 0xef, 0x84, 0xc4, 0x38, 0x95, 0x00, 0xd6, 0x6b, 0x64, 0xe8,
+ 0x91, 0x1e, 0x0e, 0x88, 0x19, 0x9b, 0xb4, 0x77, 0x61, 0x59, 0x57, 0x2b, 0x35, 0x69, 0x49, 0x7e,
+ 0x83, 0x01, 0xdd, 0x98, 0x01, 0xf2, 0x08, 0x36, 0x45, 0x7f, 0x77, 0x75, 0xb5, 0xd2, 0x55, 0x8d,
+ 0x43, 0x4d, 0x3d, 0x92, 0x12, 0x97, 0xf4, 0xb7, 0xe7, 0x11, 0x1c, 0x10, 0xe3, 0xcc, 0x22, 0xcf,
+ 0xa9, 0xd4, 0xff, 0x4e, 0x08, 0x27, 0xcc, 0xb7, 0x02, 0xe2, 0xa3, 0xef, 0xc0, 0xea, 0x6e, 0xab,
+ 0x51, 0xd5, 0x9a, 0xaa, 0xd1, 0x56, 0x75, 0x36, 0x97, 0x4b, 0xf2, 0x7b, 0x0c, 0xe8, 0xad, 0x59,
+ 0x20, 0x77, 0x70, 0x6c, 0x39, 0xc4, 0x18, 0x12, 0x2f, 0x9c, 0xce, 0x07, 0x20, 0x85, 0xdc, 0xdc,
+ 0x33, 0xac, 0x3f, 0x95, 0x12, 0xf2, 0x6d, 0xc6, 0xae, 0x5c, 0xc2, 0x7e, 0x62, 0xbb, 0xc7, 0xd8,
+ 0xb6, 0x19, 0xff, 0x1d, 0xc8, 0xeb, 0x6a, 0x67, 0xff, 0x60, 0x6f, 0xaf, 0xae, 0x4a, 0x49, 0xf9,
+ 0x2d, 0xc6, 0xf8, 0xb5, 0x73, 0xe3, 0xf5, 0x4f, 0x47, 0xfd, 0xbe, 0x4d, 0xc4, 0xa0, 0x8f, 0x74,
+ 0xad, 0xab, 0x1a, 0x7b, 0x5a, 0x5d, 0xed, 0x48, 0xa9, 0xcb, 0xd6, 0x83, 0x67, 0x05, 0xc4, 0xe8,
+ 0x5b, 0x36, 0x61, 0xaa, 0xfe, 0x75, 0x12, 0xae, 0xed, 0x72, 0xf9, 0x31, 0x07, 0x54, 0x07, 0x79,
+ 0x66, 0xec, 0x46, 0x5b, 0x57, 0x05, 0x49, 0x5a, 0x92, 0xb7, 0x18, 0xf4, 0x07, 0x2f, 0x56, 0x83,
+ 0x41, 0x67, 0x90, 0x93, 0x68, 0xff, 0x8e, 0x41, 0x99, 0xc5, 0xe4, 0xcb, 0xa3, 0xb2, 0xbb, 0x7b,
+ 0xd0, 0x38, 0xa8, 0x57, 0xba, 0x2d, 0x9d, 0xfa, 0xd8, 0x3b, 0x0c, 0xfb, 0xa3, 0xaf, 0xc0, 0xe6,
+ 0x6b, 0x06, 0xf7, 0x7a, 0xa3, 0xc1, 0xc8, 0xc6, 0x81, 0xeb, 0xb1, 0x25, 0xf7, 0x43, 0xb8, 0x39,
+ 0x2b, 0x43, 0x7d, 0xd2, 0xd5, 0x2b, 0xbb, 0x5d, 0xa3, 0x75, 0xd0, 0x6d, 0x1f, 0x74, 0xa9, 0x13,
+ 0xfe, 0x09, 0x13, 0xb0, 0xf9, 0x15, 0x02, 0xc8, 0x17, 0x81, 0x87, 0x7b, 0x81, 0x21, 0x0c, 0x29,
+ 0x45, 0x7f, 0x04, 0x37, 0xa2, 0x39, 0xa5, 0x5b, 0x5c, 0xad, 0x19, 0x87, 0x95, 0xfa, 0x01, 0x53,
+ 0x76, 0x99, 0x81, 0xde, 0xbe, 0x6c, 0x66, 0xe9, 0x66, 0x27, 0xa6, 0xc1, 0xcc, 0x14, 0xd3, 0xfb,
+ 0xef, 0x2f, 0xc3, 0xeb, 0x9d, 0xa1, 0x6d, 0x05, 0x01, 0x3e, 0xb6, 0x49, 0x1b, 0x7b, 0x35, 0x37,
+ 0xa6, 0xff, 0x3a, 0x5c, 0x6f, 0x57, 0x34, 0xdd, 0x38, 0xd2, 0xba, 0xfb, 0x86, 0xae, 0x76, 0xba,
+ 0xba, 0xb6, 0xdb, 0xd5, 0x5a, 0x4d, 0x69, 0x49, 0xde, 0x64, 0x82, 0x7e, 0x63, 0x46, 0x90, 0x6f,
+ 0xf6, 0x8d, 0x21, 0xb6, 0x3c, 0xe3, 0xb9, 0x15, 0x9c, 0x1a, 0x1e, 0xf1, 0x03, 0xcf, 0x62, 0x27,
+ 0x1b, 0xed, 0x77, 0x0d, 0xae, 0x75, 0xda, 0x75, 0xad, 0x3b, 0x85, 0x94, 0x90, 0xbf, 0xc5, 0x90,
+ 0xde, 0xbb, 0x00, 0xc9, 0xa7, 0x1d, 0x9b, 0x45, 0x69, 0xc2, 0x8d, 0xb6, 0xde, 0xda, 0x55, 0x3b,
+ 0x1d, 0xaa, 0x57, 0xb5, 0x66, 0xa8, 0x75, 0xb5, 0xa1, 0x36, 0x99, 0x4a, 0x2f, 0x5e, 0x0f, 0xac,
+ 0x53, 0x9e, 0xdb, 0x23, 0xbe, 0x4f, 0x55, 0x4a, 0x4c, 0x83, 0xd8, 0x84, 0x39, 0x46, 0x14, 0xaf,
+ 0x0a, 0x52, 0x88, 0x17, 0x21, 0xa5, 0xe4, 0x0f, 0x18, 0xd2, 0xbb, 0x2f, 0x40, 0x8a, 0x63, 0x3c,
+ 0x81, 0xaf, 0xf1, 0x91, 0x55, 0x9a, 0x35, 0xa3, 0xa3, 0x7d, 0xa6, 0xc6, 0x87, 0x48, 0x6d, 0xe2,
+ 0xc5, 0x73, 0x3d, 0x19, 0x23, 0x76, 0x4c, 0xc3, 0xb7, 0x7e, 0x42, 0xe2, 0x83, 0x65, 0xc8, 0x2e,
+ 0xbc, 0x17, 0xf6, 0x8e, 0xe2, 0x4e, 0x46, 0xcb, 0x44, 0x4d, 0x49, 0x49, 0xcb, 0x55, 0x26, 0xe5,
+ 0x3b, 0x2f, 0xe8, 0x34, 0x95, 0x11, 0x0d, 0x9f, 0x49, 0x9d, 0x11, 0xa8, 0xfc, 0x4e, 0x02, 0x6e,
+ 0x84, 0xe7, 0x56, 0xc7, 0x32, 0x09, 0x3b, 0x3b, 0xbb, 0xe3, 0x21, 0xf1, 0x95, 0x53, 0x58, 0x56,
+ 0x9d, 0xd1, 0x00, 0x7d, 0x08, 0x39, 0xad, 0xab, 0xea, 0x95, 0x6a, 0x9d, 0xee, 0xc1, 0xb8, 0x49,
+ 0xf0, 0x2d, 0x93, 0x18, 0xcc, 0x8f, 0xd8, 0xb1, 0x02, 0xe2, 0xd1, 0x25, 0x45, 0x07, 0xf1, 0x21,
+ 0xe4, 0x1a, 0x07, 0xf5, 0xae, 0xd6, 0xa8, 0xb4, 0xa5, 0xc4, 0x65, 0x0c, 0x83, 0x91, 0x1d, 0x58,
+ 0x03, 0x3c, 0xa4, 0x9d, 0xf8, 0x79, 0x12, 0x0a, 0x31, 0xef, 0x7d, 0xd6, 0xe5, 0x4a, 0x9c, 0x73,
+ 0xb9, 0x5e, 0x87, 0x1c, 0xbb, 0x21, 0x51, 0x2f, 0x80, 0x1f, 0xc5, 0x59, 0xf6, 0xac, 0x99, 0xa8,
+ 0x0d, 0x60, 0xf9, 0xc6, 0xb1, 0x3b, 0x72, 0x4c, 0x62, 0x32, 0x77, 0xb0, 0xb4, 0xb5, 0x39, 0x87,
+ 0xdf, 0xa1, 0xf9, 0x55, 0xce, 0x53, 0xa6, 0x83, 0xd6, 0xf3, 0x56, 0xf8, 0x8c, 0xb6, 0xe0, 0xfa,
+ 0xb9, 0x2b, 0xe5, 0x98, 0x4a, 0x5e, 0x66, 0x92, 0xcf, 0xdd, 0x05, 0xc7, 0x9a, 0x79, 0xce, 0xff,
+ 0x49, 0xbf, 0xbc, 0x5b, 0xfa, 0xb3, 0x2c, 0x14, 0xd9, 0x86, 0x6d, 0xe3, 0xb1, 0xed, 0x62, 0x93,
+ 0xba, 0xf3, 0xa6, 0x6b, 0xf4, 0x9d, 0x2b, 0x3b, 0x9e, 0xa6, 0xbb, 0xe7, 0xa0, 0x3a, 0xc0, 0x10,
+ 0x7b, 0x78, 0x40, 0x02, 0x7a, 0x73, 0xe5, 0x77, 0xf2, 0x0f, 0xe6, 0x71, 0x01, 0x43, 0x26, 0x3d,
+ 0xc6, 0x8f, 0x7e, 0x13, 0x0a, 0x93, 0x39, 0x0e, 0xbd, 0xd4, 0x4f, 0xe7, 0x83, 0x8b, 0x46, 0x56,
+ 0x8e, 0x16, 0x62, 0x18, 0x45, 0xf0, 0x23, 0x02, 0x93, 0x10, 0xd0, 0xf3, 0x93, 0xba, 0xcd, 0xa1,
+ 0xcf, 0xba, 0xb8, 0x04, 0x0a, 0x41, 0xb5, 0x10, 0x49, 0x88, 0x08, 0x54, 0x42, 0x60, 0x0d, 0x88,
+ 0x27, 0x24, 0xa4, 0xaf, 0x26, 0xa1, 0x4b, 0x21, 0xe2, 0x12, 0x82, 0x88, 0x80, 0xde, 0x04, 0xf0,
+ 0x23, 0x23, 0xcc, 0x7c, 0xe3, 0x9c, 0x1e, 0xa3, 0xa0, 0x3b, 0xb0, 0x1e, 0xdb, 0xa7, 0x46, 0xb4,
+ 0xd4, 0xb9, 0xc3, 0x8b, 0x62, 0xef, 0x76, 0xc5, 0xaa, 0xdf, 0x86, 0xeb, 0x1e, 0xf9, 0xf1, 0x88,
+ 0xba, 0x4f, 0x46, 0xdf, 0x72, 0xb0, 0x6d, 0xfd, 0x04, 0xd3, 0xf7, 0x1b, 0x39, 0x06, 0xbe, 0x1e,
+ 0xbe, 0xdc, 0x8b, 0xbd, 0x93, 0x9f, 0xc1, 0xea, 0x8c, 0xa6, 0x2f, 0x70, 0x79, 0xab, 0xd3, 0x97,
+ 0xc6, 0x79, 0x96, 0x46, 0x04, 0x1a, 0x77, 0xae, 0xa9, 0xb0, 0x69, 0xa5, 0xbf, 0x22, 0x61, 0x21,
+ 0xe8, 0x8c, 0xb0, 0x19, 0xfd, 0xbf, 0x1a, 0x61, 0x11, 0x68, 0xdc, 0xf5, 0xff, 0x65, 0x02, 0xf2,
+ 0xd1, 0x6e, 0x40, 0x8f, 0x60, 0x39, 0x18, 0x0f, 0xb9, 0xd1, 0x2a, 0x6d, 0x7d, 0xbc, 0xc8, 0x4e,
+ 0x2a, 0x53, 0xbb, 0xcb, 0xcd, 0x0f, 0xc3, 0x90, 0x3f, 0x83, 0x65, 0x4a, 0x52, 0x74, 0x61, 0x89,
+ 0x57, 0xa1, 0x70, 0xd0, 0xec, 0xb4, 0xd5, 0x5d, 0x6d, 0x4f, 0x53, 0x6b, 0xd2, 0x12, 0x02, 0xc8,
+ 0x70, 0x2f, 0x57, 0x4a, 0xa0, 0x75, 0x90, 0xda, 0x5a, 0x5b, 0xad, 0x53, 0x3f, 0xa1, 0xd5, 0xe6,
+ 0x67, 0x44, 0x12, 0xbd, 0x06, 0x6b, 0xb1, 0x53, 0xc3, 0xa0, 0x4e, 0xc9, 0x63, 0x55, 0x97, 0x52,
+ 0xca, 0xbf, 0xa6, 0x20, 0x1f, 0xe9, 0x0e, 0x79, 0x70, 0x83, 0x7a, 0xb1, 0xc6, 0xc0, 0x35, 0xad,
+ 0xfe, 0xd8, 0xe0, 0xde, 0x5a, 0xec, 0x56, 0xfb, 0xed, 0x39, 0xc6, 0xa1, 0x13, 0x6c, 0x36, 0x18,
+ 0xff, 0x11, 0x65, 0x8f, 0xc0, 0xf7, 0x97, 0xf4, 0x35, 0x6f, 0xe6, 0x1d, 0x95, 0x59, 0x87, 0xdc,
+ 0x31, 0x3e, 0xe1, 0x52, 0x92, 0x73, 0x9b, 0xb0, 0x2a, 0x3e, 0x89, 0x23, 0x67, 0x8f, 0xf1, 0x09,
+ 0x43, 0xfb, 0x1c, 0x4a, 0xdc, 0xed, 0x61, 0x56, 0x9a, 0x62, 0xf2, 0x50, 0xc0, 0xdd, 0xf9, 0x22,
+ 0x11, 0x9c, 0x31, 0x8e, 0xbc, 0x12, 0xc1, 0x85, 0xbd, 0xa5, 0x17, 0x0d, 0x86, 0xbc, 0x3c, 0x77,
+ 0x6f, 0x1b, 0x78, 0x38, 0xd5, 0xdb, 0x01, 0x1e, 0x86, 0x68, 0x3e, 0x09, 0x38, 0x5a, 0x7a, 0x6e,
+ 0xb4, 0x0e, 0x09, 0xa6, 0xd0, 0x7c, 0x12, 0xd0, 0x9f, 0xd5, 0x0c, 0x8f, 0x40, 0x28, 0x77, 0x61,
+ 0xe3, 0xb2, 0x49, 0x98, 0x3a, 0x32, 0x13, 0x53, 0x47, 0xa6, 0x72, 0x0f, 0x8a, 0x71, 0xad, 0xa2,
+ 0xdb, 0x20, 0x85, 0x2e, 0xc3, 0x0c, 0x4b, 0x49, 0xd0, 0x85, 0xd9, 0x51, 0x7e, 0x96, 0x00, 0x74,
+ 0x5e, 0x79, 0xd4, 0x7e, 0xc5, 0x5c, 0xe4, 0x59, 0x10, 0x14, 0x7b, 0x17, 0xda, 0xaf, 0x26, 0x8b,
+ 0x21, 0x31, 0xa7, 0xb5, 0xef, 0x2c, 0xb0, 0x1a, 0xa6, 0x0e, 0xb4, 0xbc, 0x80, 0xd8, 0x73, 0x94,
+ 0x43, 0x28, 0xc6, 0x55, 0x8f, 0x6e, 0x41, 0x91, 0x7a, 0xd7, 0x33, 0x3d, 0x81, 0x67, 0x64, 0x1c,
+ 0xf6, 0xe0, 0x1d, 0x28, 0xb1, 0x2d, 0x6d, 0xcc, 0x38, 0x16, 0x45, 0x46, 0xdd, 0x9d, 0xa8, 0x2a,
+ 0x3e, 0x09, 0x0b, 0xa8, 0xea, 0xcb, 0x04, 0xe4, 0x23, 0xf3, 0x81, 0x3a, 0xfc, 0x8c, 0x31, 0x4c,
+ 0x77, 0x80, 0x2d, 0x47, 0x18, 0x8b, 0xad, 0x39, 0x2d, 0x50, 0x8d, 0x31, 0x71, 0x43, 0xc1, 0x8e,
+ 0x15, 0x4e, 0xa0, 0x43, 0xe0, 0x07, 0xd7, 0xec, 0x10, 0x18, 0x35, 0xec, 0xc8, 0xf7, 0x20, 0x1f,
+ 0xf9, 0x3a, 0xca, 0xf6, 0x65, 0x96, 0x65, 0x05, 0xf2, 0x07, 0xcd, 0x6a, 0xeb, 0xa0, 0x59, 0x53,
+ 0x6b, 0x52, 0x02, 0x15, 0x20, 0x1b, 0x3e, 0x24, 0x95, 0x3f, 0x4f, 0x40, 0x81, 0xae, 0xb3, 0xd0,
+ 0x11, 0x79, 0x08, 0x19, 0xdf, 0x1d, 0x79, 0x3d, 0x72, 0x55, 0x4f, 0x44, 0xb0, 0xcf, 0xf8, 0x6e,
+ 0xc9, 0x97, 0xf7, 0xdd, 0x14, 0x0c, 0xd7, 0x78, 0x78, 0x56, 0x73, 0x82, 0xc8, 0x71, 0xaa, 0x43,
+ 0x5e, 0x84, 0x27, 0xae, 0xee, 0x3c, 0xe5, 0x38, 0xc2, 0x9e, 0xa3, 0xfc, 0x51, 0x02, 0x4a, 0xe2,
+ 0x2a, 0x1b, 0x0a, 0x98, 0x5e, 0xcd, 0x89, 0x97, 0x5d, 0xcd, 0x97, 0xee, 0xa7, 0xe4, 0x65, 0xfb,
+ 0x49, 0xf9, 0xe7, 0x2c, 0x5c, 0xeb, 0x12, 0x3f, 0xe8, 0xb0, 0x60, 0x4a, 0xd8, 0xaf, 0xcb, 0x6d,
+ 0x00, 0xd2, 0x21, 0x43, 0xce, 0x58, 0x00, 0x37, 0x39, 0x77, 0x14, 0xf0, 0x9c, 0x80, 0xb2, 0x4a,
+ 0x21, 0x74, 0x81, 0x84, 0x3a, 0x90, 0x0b, 0x93, 0x72, 0xc2, 0x18, 0x7f, 0x32, 0x07, 0x6a, 0x65,
+ 0x68, 0x75, 0x88, 0x77, 0x66, 0xf5, 0x48, 0x2d, 0xca, 0xca, 0xe9, 0x11, 0x90, 0xfc, 0xb3, 0x34,
+ 0xa4, 0x99, 0x18, 0x74, 0x06, 0xab, 0xcf, 0x71, 0x40, 0xbc, 0x01, 0xf6, 0x9e, 0x19, 0x4c, 0xa4,
+ 0x50, 0xf5, 0xe3, 0xab, 0xf7, 0xbd, 0x5c, 0x31, 0xcf, 0xb0, 0xd3, 0x23, 0x47, 0x21, 0xf0, 0xfe,
+ 0x92, 0x5e, 0x8a, 0xa4, 0x70, 0xb9, 0x5f, 0x26, 0xe0, 0xba, 0xb8, 0x60, 0xd1, 0xb3, 0x86, 0xed,
+ 0x63, 0x2e, 0x9e, 0xdb, 0xad, 0xf6, 0xcb, 0x8b, 0x6f, 0x47, 0xf0, 0x74, 0xbf, 0xd3, 0x03, 0x74,
+ 0x38, 0x45, 0xe1, 0x1d, 0x19, 0xc0, 0x4a, 0x68, 0x7c, 0xb8, 0x7c, 0xae, 0xe4, 0xbd, 0x97, 0x92,
+ 0x6f, 0xaa, 0xe2, 0xa2, 0xbb, 0xbf, 0xa4, 0x17, 0x05, 0x3c, 0x7b, 0x27, 0x6b, 0x20, 0xcd, 0x6a,
+ 0x07, 0xbd, 0x0d, 0x2b, 0x0e, 0x79, 0x6e, 0x44, 0x1a, 0x62, 0x33, 0x90, 0xd2, 0x8b, 0x0e, 0x79,
+ 0x3e, 0x69, 0x24, 0x41, 0x2a, 0xc0, 0x27, 0x62, 0xb5, 0xd2, 0x9f, 0x72, 0x15, 0xae, 0x5f, 0x38,
+ 0x52, 0xf4, 0x4d, 0x90, 0x30, 0x7f, 0x61, 0x98, 0x23, 0x8f, 0xbb, 0xb0, 0x1c, 0x72, 0x55, 0xd0,
+ 0x6b, 0x82, 0x2c, 0xff, 0x6e, 0x02, 0x0a, 0xb1, 0xee, 0xa2, 0x1e, 0xe4, 0xc2, 0x3b, 0xba, 0xc8,
+ 0x5d, 0x3e, 0xbc, 0x92, 0x22, 0xba, 0x61, 0xa6, 0x97, 0x84, 0xd8, 0x7a, 0x04, 0x1c, 0x0e, 0x25,
+ 0x15, 0x0d, 0xa5, 0x9a, 0x85, 0x34, 0x53, 0xbe, 0xfc, 0x03, 0x40, 0xe7, 0x59, 0xd1, 0x7b, 0xb0,
+ 0x4a, 0x1c, 0xba, 0xc9, 0xa2, 0x6b, 0x38, 0x1b, 0x4f, 0x51, 0x2f, 0x09, 0x72, 0xd8, 0xf0, 0x0d,
+ 0xc8, 0x47, 0x39, 0x66, 0xa6, 0xaa, 0x94, 0x3e, 0x21, 0x28, 0xab, 0xb0, 0xc2, 0x26, 0xc1, 0xd7,
+ 0xb9, 0x23, 0xaf, 0xfc, 0x57, 0x0a, 0xae, 0xb1, 0x13, 0x7e, 0xcf, 0xb2, 0x89, 0x1f, 0x6e, 0xf0,
+ 0x5d, 0x58, 0xf6, 0x2d, 0xe7, 0xd9, 0xd5, 0x53, 0x11, 0x96, 0xf3, 0x0c, 0x3d, 0x81, 0xd5, 0xbe,
+ 0xeb, 0x0d, 0x70, 0x60, 0xf4, 0xc5, 0xcb, 0xab, 0x1e, 0xc8, 0x25, 0x8e, 0x13, 0xd2, 0xa8, 0x32,
+ 0xb8, 0xd9, 0x24, 0x26, 0x77, 0x31, 0x7d, 0xa6, 0xc9, 0x9c, 0x5e, 0x0a, 0xc9, 0x6c, 0x48, 0x3e,
+ 0xfa, 0x0e, 0xc8, 0xa2, 0x06, 0xc0, 0xa4, 0x8e, 0xf1, 0xc0, 0x72, 0x88, 0x69, 0xf8, 0xa7, 0xd8,
+ 0x33, 0x2d, 0xe7, 0x84, 0xb9, 0x5f, 0x39, 0x7d, 0x83, 0xb7, 0xa8, 0x45, 0x0d, 0x3a, 0xe2, 0x3d,
+ 0x22, 0xd3, 0x97, 0x50, 0x7e, 0x81, 0xab, 0xcd, 0x93, 0xc9, 0x9b, 0x55, 0xe8, 0x8b, 0x6e, 0xa2,
+ 0xff, 0xaf, 0xd7, 0x27, 0xe5, 0xa7, 0x90, 0x66, 0xb6, 0xfd, 0xd5, 0x64, 0x9b, 0xca, 0xb0, 0x16,
+ 0x65, 0xdc, 0xa2, 0xe3, 0x24, 0xcc, 0x39, 0x5d, 0x8b, 0x5e, 0x89, 0xd3, 0xc4, 0x57, 0xfe, 0x33,
+ 0x0d, 0xa5, 0x30, 0x4a, 0xc4, 0xd3, 0x99, 0xca, 0xbf, 0xa4, 0x85, 0xeb, 0xf0, 0x0e, 0xa4, 0xab,
+ 0x4f, 0xbb, 0x6a, 0x47, 0x5a, 0x92, 0x5f, 0x67, 0xa1, 0x9e, 0x35, 0x16, 0xea, 0x61, 0xa8, 0x3b,
+ 0xc7, 0xe3, 0x80, 0x05, 0x1e, 0xd1, 0x1d, 0x28, 0xd0, 0x5b, 0x48, 0xf3, 0xa1, 0x71, 0xd0, 0xdd,
+ 0xbb, 0x27, 0xc1, 0x54, 0xae, 0x81, 0xb7, 0xa5, 0x97, 0x5a, 0xe7, 0xc4, 0x18, 0x05, 0xfd, 0x7b,
+ 0x94, 0xe3, 0x4d, 0x48, 0x3e, 0x3e, 0x94, 0x12, 0xf2, 0x0d, 0xd6, 0x50, 0x8a, 0x35, 0x7c, 0x76,
+ 0x46, 0xdf, 0x2b, 0xb0, 0x5c, 0x6d, 0xb5, 0xea, 0x52, 0x51, 0xde, 0x60, 0x2d, 0x50, 0x5c, 0xac,
+ 0xeb, 0xda, 0xb4, 0xcd, 0xbb, 0x90, 0x39, 0xac, 0xe8, 0x5a, 0xb3, 0x2b, 0x25, 0x65, 0x99, 0xb5,
+ 0x5a, 0x8f, 0xb5, 0x3a, 0xc3, 0x9e, 0xe5, 0x04, 0xa2, 0x5d, 0xad, 0x75, 0x50, 0xad, 0xab, 0x52,
+ 0xe1, 0x82, 0x76, 0xa6, 0x3b, 0x12, 0x91, 0xad, 0xf7, 0x63, 0xa1, 0xb0, 0xd4, 0x54, 0x36, 0x80,
+ 0xb7, 0x8c, 0x47, 0xc1, 0xde, 0x81, 0x74, 0x57, 0x6b, 0xa8, 0xba, 0xb4, 0x7c, 0x81, 0x5e, 0x98,
+ 0x47, 0xc6, 0xb3, 0x15, 0xab, 0x5a, 0xb3, 0xab, 0xea, 0x87, 0x51, 0x11, 0x87, 0x94, 0x9e, 0x0a,
+ 0xa1, 0x0b, 0x60, 0x27, 0x20, 0xde, 0x19, 0xb6, 0x45, 0xba, 0x82, 0x07, 0xde, 0x57, 0xea, 0x6a,
+ 0xf3, 0x61, 0x77, 0xdf, 0x68, 0xeb, 0xea, 0x9e, 0xf6, 0x44, 0xca, 0x4c, 0x85, 0xda, 0x38, 0x9f,
+ 0x4d, 0x9c, 0x93, 0xe0, 0xd4, 0x18, 0x7a, 0xa4, 0x6f, 0x7d, 0x21, 0xb8, 0xa6, 0x4a, 0x46, 0xa4,
+ 0xec, 0x05, 0x5c, 0x3c, 0x23, 0x10, 0x93, 0xf5, 0x31, 0x94, 0x78, 0xf3, 0x30, 0xf6, 0x2c, 0xe5,
+ 0xa6, 0x32, 0x38, 0x9c, 0x2d, 0xda, 0xdb, 0x7c, 0xd9, 0xb2, 0x60, 0xeb, 0x7a, 0xbb, 0xa2, 0x57,
+ 0x1a, 0xc6, 0x0c, 0x77, 0x69, 0x2a, 0x25, 0xc1, 0xb9, 0x59, 0x14, 0xc9, 0x38, 0x8f, 0x51, 0x83,
+ 0xeb, 0x9d, 0x6e, 0xa5, 0xab, 0x1a, 0x55, 0x7a, 0x75, 0xad, 0x19, 0xd1, 0x04, 0xe4, 0xe5, 0x6f,
+ 0x32, 0x90, 0xb7, 0xa7, 0xd6, 0x10, 0x0e, 0x88, 0x71, 0x8c, 0x7b, 0xcf, 0x88, 0x69, 0xc4, 0x67,
+ 0xe3, 0x16, 0xa4, 0xf4, 0xd6, 0x91, 0xb4, 0x22, 0xbf, 0xc6, 0x78, 0xae, 0xc5, 0x78, 0x3c, 0x36,
+ 0x46, 0xe5, 0xaf, 0x32, 0xa1, 0xaf, 0x18, 0x0b, 0xe5, 0xbd, 0x5a, 0x5f, 0x11, 0x1d, 0x42, 0x91,
+ 0x67, 0x10, 0x68, 0x3f, 0x47, 0xbe, 0x70, 0x71, 0xb7, 0xe7, 0xb9, 0x48, 0x52, 0xb6, 0x0e, 0xe3,
+ 0xe2, 0x4e, 0x6e, 0x61, 0x30, 0xa1, 0xa0, 0x77, 0x43, 0xc3, 0x3a, 0xf1, 0x0d, 0xf9, 0x11, 0xb5,
+ 0xc2, 0xc9, 0xe1, 0x25, 0xa7, 0x06, 0xd9, 0xc0, 0xb3, 0x4e, 0x4e, 0x88, 0x27, 0xee, 0xb0, 0xef,
+ 0xcf, 0x73, 0x44, 0x72, 0x0e, 0x3d, 0x64, 0x45, 0x04, 0xae, 0x45, 0x2e, 0xa7, 0xe5, 0x3a, 0x06,
+ 0x65, 0x61, 0xb7, 0xd8, 0xd2, 0xd6, 0xbd, 0x79, 0x1c, 0xbc, 0x18, 0x6f, 0xc3, 0x35, 0x45, 0xc4,
+ 0x43, 0xc2, 0x33, 0x64, 0x7a, 0x47, 0xe2, 0x59, 0x10, 0xe6, 0x62, 0xb1, 0x30, 0xd9, 0x7c, 0x77,
+ 0x24, 0x9e, 0xc4, 0xa5, 0x87, 0xb1, 0xb8, 0x23, 0xb9, 0x11, 0x01, 0x1d, 0x83, 0xd4, 0xb3, 0x5d,
+ 0xe6, 0xb8, 0x1d, 0x93, 0x53, 0x7c, 0x66, 0xb9, 0x1e, 0x0b, 0xab, 0x95, 0xe6, 0xf2, 0x4d, 0x77,
+ 0x39, 0x6b, 0x55, 0x70, 0x72, 0xf8, 0xd5, 0xde, 0x34, 0x95, 0x39, 0x31, 0xb6, 0xcd, 0x56, 0xb1,
+ 0x8d, 0x03, 0xe2, 0x10, 0xdf, 0x67, 0x71, 0x38, 0xea, 0xc4, 0x70, 0x7a, 0x5d, 0x90, 0xd1, 0xe7,
+ 0x50, 0x6a, 0x39, 0xb4, 0x63, 0x21, 0xf3, 0x46, 0x7e, 0xee, 0xb8, 0xd1, 0x34, 0x23, 0xef, 0xcb,
+ 0x0c, 0x1a, 0xda, 0x84, 0xeb, 0xd8, 0xf7, 0xad, 0x13, 0xc7, 0x37, 0x02, 0xd7, 0x70, 0x9d, 0x30,
+ 0xdf, 0xb9, 0x01, 0xec, 0x0c, 0x45, 0xe2, 0x65, 0xd7, 0x6d, 0x39, 0x84, 0x2f, 0xfe, 0x0b, 0xf2,
+ 0xec, 0x85, 0x0b, 0xf2, 0xec, 0xca, 0x0f, 0xa1, 0x10, 0x5b, 0x93, 0x4a, 0xe3, 0xb2, 0x8b, 0xe4,
+ 0x2a, 0x14, 0x9a, 0xad, 0x26, 0xcb, 0xb9, 0x69, 0xcd, 0x87, 0x52, 0x82, 0x11, 0x54, 0xb5, 0xd6,
+ 0xe1, 0x69, 0x38, 0x29, 0x89, 0x10, 0x94, 0x2a, 0x75, 0x5d, 0xad, 0xd4, 0x44, 0x66, 0xae, 0x26,
+ 0xa5, 0x94, 0x1f, 0x81, 0x34, 0xbb, 0x4c, 0x14, 0xed, 0x32, 0x11, 0x25, 0x80, 0x9a, 0xd6, 0xd9,
+ 0xad, 0xe8, 0x35, 0x2e, 0x41, 0x82, 0x62, 0x94, 0xdc, 0xa3, 0x94, 0x24, 0x6d, 0xa1, 0xab, 0x2c,
+ 0x21, 0x47, 0x9f, 0x53, 0xca, 0xf7, 0x61, 0x75, 0x66, 0x2a, 0x95, 0x07, 0x2f, 0x18, 0x80, 0xda,
+ 0xd0, 0xba, 0x46, 0xa5, 0x7e, 0x54, 0x79, 0xda, 0xe1, 0x81, 0x36, 0x46, 0xd0, 0xf6, 0x8c, 0x66,
+ 0xab, 0xa9, 0x36, 0xda, 0xdd, 0xa7, 0x52, 0x52, 0x69, 0xcf, 0xce, 0xe4, 0x0b, 0x11, 0xf7, 0x34,
+ 0x5d, 0x9d, 0x42, 0x64, 0x84, 0x69, 0xc4, 0x63, 0x80, 0xc9, 0x4a, 0x56, 0xba, 0x97, 0xa1, 0x5d,
+ 0x83, 0x15, 0xb5, 0x59, 0x33, 0x5a, 0x7b, 0x46, 0x14, 0x0a, 0x44, 0x50, 0xaa, 0x57, 0x58, 0xbe,
+ 0x5d, 0x6b, 0x1a, 0xed, 0x4a, 0x93, 0x6a, 0x99, 0xf6, 0xba, 0xa2, 0xd7, 0xb5, 0x38, 0x35, 0xa5,
+ 0xd8, 0x00, 0x93, 0x88, 0x82, 0xf2, 0xf9, 0x0b, 0x34, 0xac, 0x1e, 0xaa, 0xcd, 0x2e, 0x2b, 0x2e,
+ 0x94, 0x12, 0x68, 0x0d, 0x56, 0x45, 0x9a, 0x8a, 0x9e, 0xe8, 0x8c, 0x98, 0x44, 0xb7, 0xe0, 0x8d,
+ 0xce, 0xd3, 0xe6, 0xee, 0xbe, 0xde, 0x6a, 0xb2, 0xd4, 0xd5, 0x6c, 0x8b, 0x94, 0xf2, 0xa7, 0x12,
+ 0x64, 0x85, 0x35, 0x41, 0x3a, 0xe4, 0x71, 0x3f, 0x20, 0x9e, 0x81, 0x6d, 0x5b, 0x18, 0xd6, 0xed,
+ 0xf9, 0x8d, 0x51, 0xb9, 0x42, 0x79, 0x2b, 0xb6, 0xbd, 0xbf, 0xa4, 0xe7, 0xb0, 0xf8, 0x1d, 0xc3,
+ 0x74, 0xc6, 0xc2, 0xe1, 0x5a, 0x1c, 0xd3, 0x19, 0x4f, 0x30, 0x9d, 0x31, 0x3a, 0x00, 0xe0, 0x98,
+ 0x04, 0xf7, 0x4e, 0xc5, 0x0d, 0xeb, 0xa3, 0x45, 0x41, 0x55, 0xdc, 0x3b, 0xdd, 0x5f, 0xd2, 0x79,
+ 0xef, 0xe8, 0x03, 0xb2, 0x61, 0x4d, 0xc0, 0x3a, 0xa6, 0xe1, 0xf6, 0xc3, 0x6d, 0xb9, 0x3c, 0x77,
+ 0xb4, 0x75, 0x1a, 0xdf, 0x31, 0x5b, 0x7d, 0xbe, 0x7f, 0xf7, 0x97, 0x74, 0x09, 0xcf, 0xd0, 0x50,
+ 0x00, 0xd7, 0xb9, 0xb4, 0x99, 0x7b, 0xab, 0x88, 0x3d, 0x3e, 0x58, 0x54, 0xde, 0xf9, 0xfb, 0x29,
+ 0x3e, 0x4f, 0x46, 0x7f, 0x9c, 0x00, 0x85, 0x8b, 0xf5, 0xc7, 0x4e, 0xef, 0xd4, 0x73, 0x1d, 0x96,
+ 0x8e, 0x9c, 0xed, 0x03, 0xaf, 0x0d, 0x7a, 0xb4, 0x68, 0x1f, 0x3a, 0x31, 0xcc, 0x73, 0xfd, 0xb9,
+ 0x89, 0x5f, 0xdc, 0x04, 0x3d, 0x86, 0x0c, 0xb6, 0x9f, 0xe3, 0xb1, 0xbf, 0x51, 0x64, 0xe2, 0x37,
+ 0x17, 0x11, 0xcf, 0x18, 0xf7, 0x97, 0x74, 0x01, 0x81, 0x9a, 0x90, 0x35, 0x49, 0x1f, 0x8f, 0xec,
+ 0x80, 0x9d, 0x25, 0x85, 0xf9, 0x22, 0x79, 0x02, 0xad, 0xc6, 0x39, 0xf7, 0x97, 0xf4, 0x10, 0x04,
+ 0x7d, 0x3e, 0xb9, 0xd8, 0xf7, 0xdc, 0x91, 0x13, 0xb0, 0xd3, 0x63, 0xbe, 0xe8, 0x49, 0x88, 0xaa,
+ 0x86, 0xd1, 0xc7, 0x91, 0x13, 0xc4, 0x6e, 0xf2, 0xec, 0x19, 0xed, 0x43, 0xda, 0x21, 0x67, 0x84,
+ 0x1f, 0x36, 0x85, 0xad, 0x3b, 0x0b, 0xe0, 0x36, 0x29, 0xdf, 0xfe, 0x92, 0xce, 0x01, 0xe8, 0xee,
+ 0x70, 0x3d, 0x9e, 0x71, 0xb2, 0xc7, 0xec, 0x50, 0x59, 0x6c, 0x77, 0xb4, 0xbc, 0x3d, 0xce, 0x4b,
+ 0x77, 0x87, 0x1b, 0x3e, 0xd0, 0xd9, 0xf1, 0xc8, 0x90, 0xe0, 0x80, 0x9d, 0x3d, 0x8b, 0xcd, 0x8e,
+ 0xce, 0x18, 0xe9, 0xec, 0x70, 0x08, 0xf9, 0x09, 0xe4, 0x42, 0x6b, 0x81, 0xea, 0x50, 0x60, 0x15,
+ 0x75, 0xac, 0x69, 0x18, 0x27, 0x58, 0xc4, 0x09, 0x8a, 0xb3, 0x4f, 0x90, 0x9d, 0xf1, 0x2b, 0x46,
+ 0x7e, 0x0a, 0xf9, 0xc8, 0x70, 0xbc, 0x62, 0xe8, 0x5f, 0x24, 0x40, 0x9a, 0x35, 0x1a, 0xa8, 0x05,
+ 0x2b, 0x04, 0x7b, 0xf6, 0xd8, 0xe8, 0x5b, 0xf4, 0x12, 0x16, 0x96, 0x71, 0x2e, 0x22, 0xa4, 0xc8,
+ 0x00, 0xf6, 0x38, 0x3f, 0x6a, 0x40, 0x91, 0xfa, 0x3e, 0x11, 0x5e, 0x72, 0x61, 0xbc, 0x02, 0xe5,
+ 0x17, 0x70, 0xf2, 0x6f, 0xc3, 0xda, 0x05, 0x86, 0x07, 0x9d, 0xc2, 0x7a, 0x14, 0x23, 0x31, 0xce,
+ 0xd5, 0xae, 0xdf, 0x9d, 0x33, 0x9e, 0xce, 0xd8, 0x27, 0xc5, 0xca, 0x6b, 0xc1, 0x39, 0x9a, 0x2f,
+ 0xbf, 0x05, 0x37, 0xbf, 0xc2, 0xea, 0xc8, 0x79, 0xc8, 0x8a, 0xbd, 0x2c, 0x6f, 0x43, 0x31, 0xbe,
+ 0x01, 0xd1, 0xdb, 0xb3, 0x1b, 0x9a, 0xaa, 0x37, 0x3d, 0xbd, 0x2b, 0xe5, 0x2c, 0xa4, 0xd9, 0xee,
+ 0x92, 0x73, 0x90, 0xe1, 0x26, 0x46, 0xfe, 0xc3, 0x04, 0xe4, 0xa3, 0x2d, 0x82, 0x1e, 0xc0, 0x72,
+ 0x94, 0x2d, 0x58, 0x4c, 0x97, 0x8c, 0x8f, 0x7a, 0xff, 0xe1, 0x4e, 0x5d, 0x7c, 0x3a, 0x42, 0x56,
+ 0xb9, 0x0b, 0x19, 0xbe, 0xc5, 0xd0, 0x23, 0x80, 0xc9, 0xc2, 0xba, 0x42, 0xaf, 0x62, 0xdc, 0xd5,
+ 0x7c, 0x74, 0x33, 0x51, 0xfe, 0x21, 0x19, 0x8b, 0xa4, 0x4d, 0xea, 0x70, 0x3b, 0x90, 0x36, 0x89,
+ 0x8d, 0xc7, 0x0b, 0x64, 0x24, 0xcf, 0xa3, 0x94, 0x6b, 0x14, 0x82, 0xda, 0x2f, 0x86, 0x85, 0x3e,
+ 0x83, 0x1c, 0xb6, 0xad, 0x13, 0xc7, 0x08, 0x5c, 0xa1, 0x93, 0xef, 0x5e, 0x0d, 0xb7, 0x42, 0x51,
+ 0xba, 0x2e, 0xb5, 0xe2, 0x98, 0xff, 0x94, 0xdf, 0x87, 0x34, 0x93, 0x86, 0xde, 0x82, 0x22, 0x93,
+ 0x66, 0x0c, 0x2c, 0xdb, 0xb6, 0x7c, 0x11, 0xd0, 0x2c, 0x30, 0x5a, 0x83, 0x91, 0xe4, 0xfb, 0x90,
+ 0x15, 0x08, 0xe8, 0x06, 0x64, 0x86, 0xc4, 0xb3, 0x5c, 0x7e, 0x85, 0x4b, 0xe9, 0xe2, 0x89, 0xd2,
+ 0xdd, 0x7e, 0xdf, 0x27, 0x01, 0x73, 0x12, 0x52, 0xba, 0x78, 0xaa, 0x5e, 0x87, 0xb5, 0x0b, 0xf6,
+ 0x80, 0xf2, 0x7b, 0x49, 0xc8, 0x47, 0x91, 0x24, 0x74, 0x08, 0x25, 0xdc, 0x63, 0x25, 0x41, 0x43,
+ 0x1c, 0x04, 0xc4, 0xbb, 0xf2, 0x5d, 0x76, 0x85, 0xc3, 0xb4, 0x39, 0x0a, 0xda, 0x87, 0xec, 0x99,
+ 0x45, 0x9e, 0xbf, 0x44, 0xd2, 0x2e, 0x43, 0xf9, 0xf7, 0x1c, 0xf4, 0x03, 0xb8, 0x26, 0xae, 0xb0,
+ 0x03, 0x3c, 0x1c, 0x52, 0xe7, 0xa0, 0xef, 0x08, 0x77, 0x6b, 0x61, 0x4c, 0x71, 0x19, 0x6e, 0x70,
+ 0xa0, 0x3d, 0x47, 0x79, 0x00, 0xd2, 0x91, 0xe5, 0xf1, 0x2c, 0x5e, 0x87, 0x04, 0x81, 0xe5, 0x9c,
+ 0x20, 0x09, 0x52, 0x23, 0xa1, 0x87, 0xbc, 0x4e, 0x7f, 0xa2, 0x0d, 0xc8, 0x0e, 0x79, 0xdc, 0x8f,
+ 0x0d, 0xa6, 0xa8, 0x87, 0x8f, 0xca, 0xa7, 0x50, 0x88, 0x55, 0xc2, 0x87, 0xac, 0xc9, 0x0b, 0x59,
+ 0x53, 0x53, 0xac, 0x8f, 0x96, 0x73, 0x09, 0x29, 0xa9, 0xfc, 0x49, 0x02, 0xd6, 0xc3, 0x00, 0x5a,
+ 0xbc, 0x54, 0x5f, 0xf9, 0x32, 0x01, 0xc5, 0x38, 0x01, 0xbd, 0x03, 0x99, 0x5a, 0x8b, 0xe5, 0xe8,
+ 0x97, 0xa6, 0x02, 0x5b, 0xc4, 0x39, 0xdb, 0x31, 0xdd, 0xde, 0x33, 0x1e, 0x36, 0x7a, 0x17, 0xb2,
+ 0xc2, 0xbd, 0x96, 0x12, 0x53, 0xe1, 0x25, 0xda, 0x4c, 0x38, 0x58, 0xb4, 0xdd, 0x6d, 0xc8, 0xa9,
+ 0x4f, 0xba, 0xaa, 0xde, 0xac, 0xd4, 0x67, 0x42, 0x60, 0xb4, 0x21, 0xf9, 0x82, 0x4e, 0x22, 0xb6,
+ 0x77, 0xce, 0x36, 0x95, 0x7b, 0xb0, 0x52, 0x63, 0xf0, 0x61, 0x2c, 0xf9, 0x3d, 0x58, 0xed, 0xb9,
+ 0x4e, 0x80, 0x2d, 0x87, 0x78, 0x86, 0x35, 0xc0, 0x27, 0x61, 0x21, 0x56, 0x29, 0x22, 0x6b, 0x94,
+ 0xaa, 0xfc, 0x5b, 0x02, 0x4a, 0xc2, 0x14, 0x86, 0xbc, 0x25, 0x48, 0xba, 0xbe, 0x68, 0x9e, 0x74,
+ 0x7d, 0x84, 0x60, 0x19, 0x7b, 0xbd, 0x53, 0xa1, 0x31, 0xf6, 0x9b, 0xaa, 0xac, 0xe7, 0x0e, 0x06,
+ 0xd8, 0x09, 0x63, 0x15, 0xe1, 0x23, 0xaa, 0x43, 0x8a, 0x38, 0x67, 0x8b, 0x94, 0xa3, 0x4f, 0x49,
+ 0x2f, 0xab, 0xce, 0x19, 0x8f, 0xd6, 0x52, 0x18, 0xf9, 0x63, 0xc8, 0x85, 0x84, 0x85, 0x2a, 0xba,
+ 0xff, 0x27, 0x01, 0xab, 0xaa, 0x50, 0x50, 0x38, 0xae, 0x78, 0x46, 0x2b, 0xf1, 0x8a, 0x32, 0x5a,
+ 0xe8, 0x10, 0x32, 0x2c, 0xf6, 0x15, 0xa6, 0xde, 0xe6, 0xf1, 0xc6, 0x67, 0x3a, 0xc6, 0x6b, 0x48,
+ 0xc2, 0xca, 0x7e, 0x8e, 0x26, 0xdf, 0x87, 0x42, 0x8c, 0xbc, 0xd0, 0xd8, 0x77, 0xa0, 0x18, 0xdf,
+ 0x50, 0x2f, 0xde, 0x2b, 0xd3, 0x0b, 0x5e, 0xf9, 0xf5, 0x32, 0x14, 0x62, 0x45, 0x6c, 0x48, 0x83,
+ 0xb4, 0x15, 0x90, 0xe8, 0x50, 0xde, 0x5e, 0xac, 0x06, 0xae, 0xac, 0x05, 0x64, 0xa0, 0x73, 0x04,
+ 0xb9, 0x0f, 0xa0, 0x99, 0xc4, 0x09, 0xac, 0xbe, 0x45, 0x3c, 0x6a, 0x56, 0xe3, 0xdf, 0x88, 0x88,
+ 0xde, 0x15, 0x82, 0xc9, 0xe7, 0x21, 0xf4, 0xdc, 0x9d, 0x34, 0x99, 0x6c, 0xd9, 0x09, 0xdf, 0x81,
+ 0xe7, 0x84, 0x8a, 0x49, 0x45, 0x8a, 0x91, 0x7f, 0x99, 0x84, 0x65, 0x2a, 0x17, 0x69, 0x90, 0x14,
+ 0xc0, 0xf3, 0x7d, 0x6b, 0x31, 0xd5, 0xf1, 0xa8, 0xa7, 0x7a, 0xd2, 0xa2, 0x8b, 0x9a, 0xd7, 0x05,
+ 0x25, 0xe7, 0x8e, 0x93, 0xc5, 0xc1, 0x66, 0x2a, 0x83, 0xd0, 0xfb, 0xe1, 0xd4, 0x71, 0x0b, 0xb9,
+ 0x5e, 0xe6, 0xdf, 0x2b, 0x96, 0xc3, 0xef, 0x15, 0xcb, 0x15, 0x27, 0xfc, 0x0c, 0x09, 0xdd, 0x85,
+ 0x82, 0x7f, 0xea, 0x7a, 0x01, 0x0f, 0xa9, 0x8a, 0x2b, 0xe6, 0xc5, 0x1c, 0xc0, 0x1a, 0x1e, 0x32,
+ 0xb6, 0x75, 0x48, 0xdb, 0xf8, 0x98, 0xd8, 0xe2, 0x8b, 0x17, 0xfe, 0x80, 0x5e, 0x87, 0x9c, 0x6d,
+ 0x39, 0xcf, 0x8c, 0x91, 0x67, 0xb3, 0x8b, 0x5b, 0x5e, 0xcf, 0xd2, 0xe7, 0x03, 0xcf, 0x96, 0x7f,
+ 0x4b, 0x54, 0x2b, 0x8d, 0x5e, 0x50, 0xad, 0xc4, 0x73, 0x00, 0xbc, 0xa0, 0x40, 0x6b, 0x76, 0xd5,
+ 0x87, 0xaa, 0x2e, 0x25, 0x51, 0x1e, 0xd2, 0x7b, 0xf5, 0x56, 0xa5, 0x2b, 0xa5, 0x78, 0xa1, 0x41,
+ 0xab, 0xae, 0x56, 0x9a, 0xd2, 0x32, 0x5a, 0x81, 0x7c, 0xf4, 0x35, 0xa3, 0x94, 0x46, 0x45, 0xc8,
+ 0xd5, 0x0e, 0xf4, 0x0a, 0xab, 0x23, 0xce, 0xa0, 0x12, 0xc0, 0xa3, 0xca, 0x61, 0xc5, 0xd8, 0xad,
+ 0x57, 0x3a, 0x1d, 0x29, 0xab, 0xfc, 0x7b, 0x16, 0xae, 0x37, 0x88, 0xef, 0xe3, 0x13, 0x72, 0x64,
+ 0x05, 0xa7, 0xb1, 0xb2, 0xe6, 0x57, 0xfc, 0x81, 0xd2, 0xf7, 0x20, 0xcd, 0xa2, 0xac, 0x8b, 0x7e,
+ 0xb1, 0x45, 0xbd, 0x0e, 0xc6, 0x88, 0x7e, 0x48, 0x4d, 0xab, 0xa8, 0xfb, 0x8e, 0x6d, 0xa2, 0xf9,
+ 0xee, 0x39, 0xd3, 0xb5, 0x06, 0xfb, 0x4b, 0xba, 0xa8, 0x7b, 0x8a, 0xaa, 0x0f, 0x0e, 0x61, 0x25,
+ 0x4c, 0xdc, 0x2d, 0x5a, 0xae, 0x14, 0xdf, 0xf4, 0xf4, 0xd6, 0xd8, 0x8f, 0x1b, 0x81, 0x23, 0x28,
+ 0x0d, 0xb1, 0x67, 0x98, 0x6e, 0xd4, 0xe9, 0xcc, 0xdc, 0xc0, 0xf1, 0xd2, 0x48, 0x0a, 0x3c, 0x8c,
+ 0x17, 0xb2, 0xb6, 0x00, 0x86, 0xd1, 0x8e, 0x14, 0x37, 0xe8, 0xc5, 0x3e, 0x30, 0xdc, 0x5f, 0xd2,
+ 0x63, 0x10, 0x48, 0x87, 0x42, 0xec, 0xa3, 0x50, 0x71, 0x7b, 0x5e, 0xf0, 0x13, 0xc2, 0xfd, 0x25,
+ 0x3d, 0x0e, 0x82, 0x3a, 0x50, 0x64, 0x15, 0x72, 0xe1, 0xd8, 0xf3, 0x73, 0x83, 0xc6, 0x4a, 0x65,
+ 0x28, 0xa8, 0x17, 0xab, 0x9c, 0x69, 0x00, 0x4c, 0x32, 0x95, 0xe2, 0xae, 0xbb, 0x50, 0x8a, 0x90,
+ 0x5e, 0x9b, 0xa3, 0x94, 0x24, 0xea, 0xc3, 0x5a, 0xec, 0xbb, 0x9b, 0xa8, 0xab, 0xc5, 0x05, 0x3f,
+ 0x65, 0x8c, 0xd5, 0xca, 0xec, 0x2f, 0xe9, 0xc2, 0x2d, 0x8b, 0x17, 0xd0, 0x10, 0x40, 0xe7, 0x2b,
+ 0xa2, 0x37, 0x56, 0xae, 0xfe, 0xc5, 0xe4, 0x44, 0x4c, 0x8c, 0x58, 0xcd, 0xc0, 0xb2, 0xe7, 0xba,
+ 0x81, 0xf2, 0x65, 0x16, 0x6e, 0xa8, 0x5f, 0x90, 0xde, 0x88, 0x55, 0xc7, 0x76, 0x02, 0x7c, 0x12,
+ 0xad, 0xf5, 0x36, 0x14, 0x62, 0x61, 0x69, 0xb1, 0xb7, 0x17, 0xfd, 0xb0, 0x31, 0x0e, 0x81, 0x30,
+ 0x1d, 0x9b, 0x17, 0x96, 0x81, 0xf9, 0xdc, 0x59, 0x14, 0xb3, 0xbd, 0x3d, 0xd7, 0xd8, 0xa6, 0xfd,
+ 0x4c, 0x5d, 0x7a, 0x3e, 0xeb, 0x79, 0xae, 0x43, 0x9a, 0x4f, 0xb8, 0x38, 0x77, 0x2d, 0x31, 0x79,
+ 0x17, 0x94, 0x4e, 0xab, 0x73, 0xf9, 0x02, 0x17, 0xa9, 0x66, 0xb2, 0x46, 0x34, 0x73, 0xaa, 0x80,
+ 0xfa, 0xcd, 0xa9, 0xaf, 0xbc, 0x97, 0x59, 0xca, 0x37, 0xfe, 0x99, 0xf6, 0xc6, 0xe4, 0x83, 0xc0,
+ 0x34, 0x7b, 0x19, 0x7d, 0xd4, 0x37, 0x6d, 0x47, 0x33, 0x2f, 0x6b, 0x47, 0xfb, 0x50, 0x18, 0xf9,
+ 0x54, 0xc7, 0x01, 0x0e, 0x88, 0xbf, 0x91, 0x7d, 0xd9, 0x01, 0x1f, 0xf8, 0xc4, 0x63, 0x95, 0x79,
+ 0x74, 0xc0, 0xa3, 0xf0, 0xc1, 0x47, 0x4f, 0x21, 0xc3, 0xd2, 0xad, 0xfe, 0x46, 0x8e, 0x89, 0xa8,
+ 0x5c, 0x5d, 0x04, 0x2b, 0xe0, 0xd3, 0x4c, 0x5d, 0x00, 0xca, 0x2d, 0x28, 0xc4, 0xd4, 0x3c, 0x8f,
+ 0x47, 0xf2, 0x75, 0x00, 0xdb, 0xed, 0x61, 0x9b, 0x7f, 0xd9, 0xc0, 0x17, 0x40, 0x9e, 0x51, 0x9a,
+ 0x78, 0x40, 0x28, 0x60, 0x6c, 0x18, 0xaf, 0x00, 0xf0, 0x31, 0x64, 0x45, 0xa7, 0x5f, 0x1e, 0x6c,
+ 0xeb, 0x0f, 0x12, 0xf1, 0xaa, 0x32, 0xe1, 0xd3, 0xa2, 0x9f, 0x40, 0x86, 0xd7, 0xa6, 0xa0, 0x79,
+ 0x02, 0x89, 0x53, 0x65, 0x2c, 0xf2, 0x4b, 0x94, 0x99, 0x29, 0x4b, 0x77, 0x12, 0x3b, 0x9f, 0x42,
+ 0x8e, 0xfd, 0x63, 0x04, 0x75, 0x49, 0xdf, 0x3a, 0xe7, 0xd2, 0x50, 0x37, 0x84, 0x39, 0x33, 0xad,
+ 0x21, 0xff, 0xcb, 0x81, 0x7f, 0xfc, 0xb3, 0xbf, 0x7e, 0xc2, 0x9d, 0x16, 0xca, 0x75, 0xe0, 0x39,
+ 0x3b, 0x1a, 0xac, 0x30, 0x80, 0x9e, 0xf8, 0xe7, 0x86, 0x79, 0x50, 0xfe, 0x29, 0x44, 0x29, 0x1e,
+ 0xc7, 0xfe, 0x01, 0xa2, 0xfa, 0x6d, 0xf8, 0xea, 0xbf, 0xa9, 0xa8, 0xe6, 0x75, 0x56, 0xb5, 0x52,
+ 0x19, 0x5a, 0x9f, 0x15, 0x42, 0xba, 0x71, 0xb6, 0x79, 0x9c, 0x61, 0xe2, 0xb6, 0xff, 0x2f, 0x00,
+ 0x00, 0xff, 0xff, 0x74, 0x65, 0x10, 0xa9, 0x01, 0x43, 0x00, 0x00,
+}
+
// Reference imports to suppress errors if they are not otherwise used.
var _ context.Context
var _ grpc.ClientConn
@@ -5781,6 +5566,14 @@
Events(*EventsRequest, TestStreamService_EventsServer) error
}
+// UnimplementedTestStreamServiceServer can be embedded to have forward compatible implementations.
+type UnimplementedTestStreamServiceServer struct {
+}
+
+func (*UnimplementedTestStreamServiceServer) Events(req *EventsRequest, srv TestStreamService_EventsServer) error {
+ return status.Errorf(codes.Unimplemented, "method Events not implemented")
+}
+
func RegisterTestStreamServiceServer(s *grpc.Server, srv TestStreamServiceServer) {
s.RegisterService(&_TestStreamService_serviceDesc, srv)
}
@@ -5819,337 +5612,3 @@
},
Metadata: "beam_runner_api.proto",
}
-
-func init() {
- proto.RegisterFile("beam_runner_api.proto", fileDescriptor_beam_runner_api_70c7dbd5f3375954)
-}
-
-var fileDescriptor_beam_runner_api_70c7dbd5f3375954 = []byte{
- // 5205 bytes of a gzipped FileDescriptorProto
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x7c, 0xdb, 0x6f, 0x23, 0xc9,
- 0x75, 0x37, 0x2f, 0xe2, 0xed, 0x90, 0xa2, 0x5a, 0x25, 0xcd, 0xac, 0xb6, 0xbd, 0xde, 0x99, 0xed,
- 0x5d, 0xef, 0x8e, 0xf7, 0x5b, 0x73, 0x47, 0x9a, 0x99, 0x9d, 0x19, 0xd9, 0x9e, 0x35, 0x29, 0x36,
- 0x47, 0x3d, 0xc3, 0x9b, 0x9b, 0x94, 0x34, 0xb3, 0xb6, 0xb7, 0xdd, 0x62, 0x17, 0xa5, 0x86, 0x9a,
- 0xdd, 0x74, 0x77, 0x53, 0xb3, 0x34, 0x6c, 0x7c, 0x40, 0x10, 0x18, 0x41, 0x02, 0xe4, 0xf2, 0x90,
- 0x00, 0x7e, 0x08, 0x02, 0xd8, 0x40, 0x80, 0x24, 0x0f, 0x09, 0xe2, 0x24, 0x40, 0x5e, 0x9d, 0xe4,
- 0x31, 0x0f, 0x01, 0xf2, 0x94, 0x3f, 0x23, 0x81, 0x1f, 0x92, 0xa7, 0xa0, 0x2e, 0xdd, 0x6c, 0x52,
- 0xd2, 0x2c, 0x29, 0x09, 0x79, 0x63, 0x9f, 0xae, 0xf3, 0x3b, 0x55, 0xa7, 0xaa, 0x4e, 0x9d, 0x73,
- 0xea, 0x34, 0xe1, 0xc6, 0x21, 0xd6, 0x07, 0x9a, 0x3b, 0xb2, 0x6d, 0xec, 0x6a, 0xfa, 0xd0, 0x2c,
- 0x0d, 0x5d, 0xc7, 0x77, 0xd0, 0x3b, 0x8e, 0x7b, 0x54, 0xd2, 0x87, 0x7a, 0xef, 0x18, 0x97, 0x48,
- 0x8b, 0xd2, 0xc0, 0x31, 0xb0, 0x55, 0x1a, 0x9a, 0x43, 0x6c, 0x99, 0x36, 0x2e, 0x9d, 0x6e, 0x8a,
- 0x2b, 0xd8, 0x36, 0x86, 0x8e, 0x69, 0xfb, 0x1e, 0xe3, 0x11, 0xdf, 0x3c, 0x72, 0x9c, 0x23, 0x0b,
- 0x7f, 0x4c, 0x9f, 0x0e, 0x47, 0xfd, 0x8f, 0x75, 0x7b, 0xcc, 0x5f, 0xdd, 0x9e, 0x7d, 0x65, 0x60,
- 0xaf, 0xe7, 0x9a, 0x43, 0xdf, 0x71, 0x79, 0x8b, 0x5b, 0xb3, 0x2d, 0x7c, 0x73, 0x80, 0x3d, 0x5f,
- 0x1f, 0x0c, 0x59, 0x03, 0xe9, 0xd7, 0x71, 0x58, 0xae, 0x60, 0x7d, 0xb0, 0xe3, 0xd8, 0x9e, 0xaf,
- 0xdb, 0xbe, 0x27, 0xfd, 0x4d, 0x1c, 0x72, 0xe1, 0x13, 0xda, 0x84, 0xf5, 0x86, 0xd2, 0xd4, 0xba,
- 0x4a, 0x43, 0xee, 0x74, 0xcb, 0x8d, 0xb6, 0xd6, 0x50, 0xea, 0x75, 0xa5, 0x23, 0xc4, 0xc4, 0x37,
- 0xfe, 0xf2, 0xef, 0xfe, 0xe7, 0xd7, 0xa9, 0xd5, 0x6f, 0x3c, 0xde, 0xda, 0xba, 0x77, 0xef, 0xe1,
- 0xd6, 0xdd, 0x7b, 0x9f, 0x3c, 0x7a, 0x70, 0xff, 0xe1, 0xc3, 0x07, 0xe8, 0x2e, 0xac, 0x37, 0xca,
- 0x2f, 0xce, 0xb2, 0xc4, 0xc5, 0x9b, 0x94, 0x45, 0x38, 0xc3, 0xf1, 0x04, 0xa4, 0xa7, 0xf5, 0x56,
- 0xa5, 0x5c, 0xd7, 0x0e, 0x94, 0x66, 0xb5, 0x75, 0xa0, 0x9d, 0xcb, 0x9f, 0x98, 0xe6, 0xdf, 0x7c,
- 0xfc, 0xe0, 0xee, 0x7d, 0xca, 0x2f, 0xfd, 0x43, 0x16, 0x60, 0xc7, 0x19, 0x0c, 0x1d, 0x1b, 0x93,
- 0x3e, 0xff, 0x00, 0xc0, 0x77, 0x75, 0xdb, 0xeb, 0x3b, 0xee, 0xc0, 0xdb, 0x88, 0xdf, 0x4e, 0xde,
- 0xc9, 0x6f, 0x7d, 0xbb, 0xf4, 0xa5, 0xaa, 0x2f, 0x4d, 0x20, 0x4a, 0xdd, 0x90, 0x5f, 0xb6, 0x7d,
- 0x77, 0xac, 0x46, 0x00, 0x51, 0x0f, 0x0a, 0xc3, 0x9e, 0x63, 0x59, 0xb8, 0xe7, 0x9b, 0x8e, 0xed,
- 0x6d, 0x24, 0xa8, 0x80, 0x4f, 0x17, 0x13, 0xd0, 0x8e, 0x20, 0x30, 0x11, 0x53, 0xa0, 0x68, 0x0c,
- 0xeb, 0xaf, 0x4c, 0xdb, 0x70, 0x5e, 0x99, 0xf6, 0x91, 0xe6, 0xf9, 0xae, 0xee, 0xe3, 0x23, 0x13,
- 0x7b, 0x1b, 0x49, 0x2a, 0xac, 0xb6, 0x98, 0xb0, 0x83, 0x00, 0xa9, 0x13, 0x02, 0x31, 0x99, 0x6b,
- 0xaf, 0xce, 0xbe, 0x41, 0xdf, 0x85, 0x74, 0xcf, 0x31, 0xb0, 0xeb, 0x6d, 0x2c, 0x51, 0x61, 0x8f,
- 0x17, 0x13, 0xb6, 0x43, 0x79, 0x19, 0x3e, 0x07, 0x22, 0x2a, 0xc3, 0xf6, 0xa9, 0xe9, 0x3a, 0xf6,
- 0x80, 0xb4, 0xd9, 0x48, 0x5d, 0x46, 0x65, 0x72, 0x04, 0x81, 0xab, 0x2c, 0x0a, 0x2a, 0x5a, 0xb0,
- 0x32, 0x33, 0x6d, 0x48, 0x80, 0xe4, 0x09, 0x1e, 0x6f, 0xc4, 0x6f, 0xc7, 0xef, 0xe4, 0x54, 0xf2,
- 0x13, 0xed, 0x40, 0xea, 0x54, 0xb7, 0x46, 0x78, 0x23, 0x71, 0x3b, 0x7e, 0x27, 0xbf, 0xf5, 0x8d,
- 0x39, 0xba, 0xd0, 0x0e, 0x51, 0x55, 0xc6, 0xbb, 0x9d, 0x78, 0x14, 0x17, 0x1d, 0x58, 0x3d, 0x33,
- 0x87, 0xe7, 0xc8, 0xab, 0x4e, 0xcb, 0x2b, 0xcd, 0x23, 0x6f, 0x27, 0x84, 0x8d, 0x0a, 0xfc, 0x09,
- 0x6c, 0x5c, 0x34, 0x8f, 0xe7, 0xc8, 0x7d, 0x36, 0x2d, 0xf7, 0xfe, 0x1c, 0x72, 0x67, 0xd1, 0xc7,
- 0x51, 0xe9, 0x3d, 0xc8, 0x47, 0x26, 0xf6, 0x1c, 0x81, 0x4f, 0xa6, 0x05, 0xde, 0x99, 0x6b, 0x6e,
- 0x0d, 0xec, 0xce, 0xe8, 0xf4, 0xcc, 0x24, 0x5f, 0x8f, 0x4e, 0x23, 0xb0, 0x11, 0x81, 0xd2, 0x7f,
- 0xc4, 0x21, 0xdb, 0xe6, 0xcd, 0x50, 0x03, 0xa0, 0x17, 0xae, 0x36, 0x2a, 0x6f, 0xbe, 0xf5, 0x31,
- 0x59, 0xa2, 0x6a, 0x04, 0x00, 0x7d, 0x04, 0xc8, 0x75, 0x1c, 0x5f, 0x0b, 0x2d, 0x87, 0x66, 0x1a,
- 0xcc, 0x58, 0xe4, 0x54, 0x81, 0xbc, 0x09, 0x97, 0x95, 0x62, 0x90, 0x4d, 0x57, 0x30, 0x4c, 0x6f,
- 0x68, 0xe9, 0x63, 0xcd, 0xd0, 0x7d, 0x7d, 0x23, 0x39, 0xf7, 0xd0, 0xaa, 0x8c, 0xad, 0xaa, 0xfb,
- 0xba, 0x9a, 0x37, 0x26, 0x0f, 0xd2, 0xef, 0x2d, 0x01, 0x4c, 0xd6, 0x2e, 0xba, 0x05, 0xf9, 0x91,
- 0x6d, 0xfe, 0x68, 0x84, 0x35, 0x5b, 0x1f, 0xe0, 0x8d, 0x14, 0xd5, 0x27, 0x30, 0x52, 0x53, 0x1f,
- 0x60, 0xb4, 0x03, 0x4b, 0xde, 0x10, 0xf7, 0xf8, 0xc8, 0x3f, 0x9e, 0x43, 0x74, 0x6d, 0x64, 0xd3,
- 0x65, 0xda, 0x19, 0xe2, 0x9e, 0x4a, 0x99, 0xd1, 0x7b, 0xb0, 0xec, 0x8d, 0x0e, 0x23, 0xe6, 0x97,
- 0x0d, 0x78, 0x9a, 0x48, 0x4c, 0x8c, 0x69, 0x0f, 0x47, 0x7e, 0x60, 0xcf, 0x1e, 0x2f, 0xb4, 0x0d,
- 0x4b, 0x0a, 0xe5, 0xe5, 0x26, 0x86, 0x01, 0xa1, 0x2e, 0x64, 0x9c, 0x91, 0x4f, 0x31, 0x99, 0xd9,
- 0xda, 0x5e, 0x0c, 0xb3, 0xc5, 0x98, 0x19, 0x68, 0x00, 0x75, 0x66, 0x5a, 0xd2, 0x57, 0x9e, 0x16,
- 0xf1, 0x31, 0xe4, 0x23, 0xfd, 0x3f, 0x67, 0x79, 0xaf, 0x47, 0x97, 0x77, 0x2e, 0xba, 0x3f, 0xb6,
- 0xa1, 0x10, 0xed, 0xe6, 0x22, 0xbc, 0xd2, 0xdf, 0x2f, 0xc3, 0x5a, 0xc7, 0xd7, 0x6d, 0x43, 0x77,
- 0x8d, 0xc9, 0xb0, 0x3d, 0xe9, 0x2f, 0x92, 0x00, 0x6d, 0xd7, 0x1c, 0x98, 0xbe, 0x79, 0x8a, 0x3d,
- 0xf4, 0x75, 0x48, 0xb7, 0xcb, 0xaa, 0x56, 0x6d, 0x09, 0x31, 0xf1, 0xab, 0xbf, 0x20, 0xc7, 0xed,
- 0x1b, 0x64, 0x80, 0xdb, 0xe1, 0xe4, 0x6d, 0x0f, 0x75, 0xd7, 0x70, 0xb6, 0x4f, 0x37, 0xd1, 0x47,
- 0x90, 0xa9, 0xd5, 0xcb, 0xdd, 0xae, 0xdc, 0x14, 0xe2, 0xe2, 0x2d, 0xda, 0xf6, 0xcd, 0x99, 0xb6,
- 0x7d, 0x4b, 0xf7, 0x7d, 0x6c, 0x93, 0xd6, 0x9f, 0x40, 0xe1, 0xa9, 0xda, 0xda, 0x6b, 0x6b, 0x95,
- 0x97, 0xda, 0x73, 0xf9, 0xa5, 0x90, 0x10, 0xdf, 0xa3, 0x2c, 0x6f, 0xcf, 0xb0, 0x1c, 0xb9, 0xce,
- 0x68, 0xa8, 0x1d, 0x8e, 0xb5, 0x13, 0x3c, 0xe6, 0x52, 0x94, 0x46, 0x7b, 0xaf, 0xde, 0x91, 0x85,
- 0xe4, 0x05, 0x52, 0xcc, 0xc1, 0x70, 0x64, 0x79, 0x98, 0xb4, 0x7e, 0x08, 0xc5, 0x72, 0xa7, 0xa3,
- 0x3c, 0x6d, 0x72, 0x4f, 0xa2, 0x23, 0x2c, 0x89, 0xef, 0x52, 0xa6, 0xaf, 0xce, 0x30, 0xb1, 0x93,
- 0x4f, 0x33, 0x6d, 0x9f, 0x0e, 0xe6, 0x1e, 0xe4, 0xbb, 0x72, 0xa7, 0xab, 0x75, 0xba, 0xaa, 0x5c,
- 0x6e, 0x08, 0x29, 0x51, 0xa2, 0x5c, 0x6f, 0xcd, 0x70, 0xf9, 0xd8, 0xf3, 0x3d, 0xdf, 0x25, 0xc4,
- 0xd3, 0x4d, 0x74, 0x1f, 0xf2, 0x8d, 0x72, 0x3b, 0x14, 0x95, 0xbe, 0x40, 0xd4, 0x40, 0x1f, 0x6a,
- 0x4c, 0x9c, 0x47, 0xb8, 0x1e, 0xc1, 0x72, 0x43, 0x56, 0x9f, 0xca, 0x21, 0x5f, 0x46, 0xfc, 0x1a,
- 0xe5, 0xbb, 0x35, 0xcb, 0x87, 0xdd, 0x23, 0x1c, 0xe1, 0x94, 0x7c, 0x58, 0xaf, 0xe2, 0xa1, 0x8b,
- 0x7b, 0xba, 0x8f, 0x8d, 0xc8, 0xa4, 0xbd, 0x0f, 0x4b, 0xaa, 0x5c, 0xae, 0x0a, 0x31, 0xf1, 0x2d,
- 0x0a, 0x74, 0x73, 0x06, 0xc8, 0xc5, 0xba, 0xc1, 0xfb, 0xbb, 0xa3, 0xca, 0xe5, 0xae, 0xac, 0xed,
- 0x2b, 0xf2, 0x81, 0x10, 0xbf, 0xa0, 0xbf, 0x3d, 0x17, 0xeb, 0x3e, 0xd6, 0x4e, 0x4d, 0xfc, 0x8a,
- 0x48, 0xfd, 0xcf, 0x38, 0xf7, 0xae, 0x3c, 0xd3, 0xc7, 0x1e, 0xfa, 0x16, 0xac, 0xec, 0xb4, 0x1a,
- 0x15, 0xa5, 0x29, 0x6b, 0x6d, 0x59, 0xa5, 0x73, 0x19, 0x13, 0x3f, 0xa0, 0x40, 0xef, 0xcc, 0x02,
- 0x39, 0x83, 0x43, 0xd3, 0xc6, 0xda, 0x10, 0xbb, 0xc1, 0x74, 0x3e, 0x01, 0x21, 0xe0, 0x66, 0x2e,
- 0x5f, 0xfd, 0xa5, 0x10, 0x17, 0xef, 0x50, 0x76, 0xe9, 0x02, 0xf6, 0x23, 0xcb, 0x39, 0xd4, 0x2d,
- 0x8b, 0xf2, 0xdf, 0x85, 0x9c, 0x2a, 0x77, 0x76, 0xf7, 0x6a, 0xb5, 0xba, 0x2c, 0x24, 0xc4, 0x77,
- 0x28, 0xe3, 0x57, 0xce, 0x8c, 0xd7, 0x3b, 0x1e, 0xf5, 0xfb, 0x16, 0xe6, 0x83, 0x3e, 0x50, 0x95,
- 0xae, 0xac, 0xd5, 0x94, 0xba, 0xdc, 0x11, 0x92, 0x17, 0xad, 0x07, 0xd7, 0xf4, 0xb1, 0xd6, 0x37,
- 0x2d, 0x4c, 0x55, 0xfd, 0x9b, 0x04, 0xac, 0xee, 0x30, 0xf9, 0x11, 0xcf, 0x52, 0x05, 0x71, 0x66,
- 0xec, 0x5a, 0x5b, 0x95, 0x39, 0x49, 0x88, 0x89, 0x5b, 0x14, 0xfa, 0xa3, 0xd7, 0xab, 0x41, 0x23,
- 0x33, 0xc8, 0x48, 0xa4, 0x7f, 0x87, 0x20, 0xcd, 0x62, 0xb2, 0xe5, 0x51, 0xde, 0xd9, 0xd9, 0x6b,
- 0xec, 0xd5, 0xcb, 0xdd, 0x96, 0x4a, 0x9c, 0xe7, 0x6d, 0x8a, 0x7d, 0xff, 0x4b, 0xb0, 0xd9, 0x9a,
- 0xd1, 0x7b, 0xbd, 0xd1, 0x60, 0x64, 0xe9, 0xbe, 0xe3, 0xd2, 0x25, 0xf7, 0x7d, 0xb8, 0x35, 0x2b,
- 0x43, 0x7e, 0xd1, 0x55, 0xcb, 0x3b, 0x5d, 0xad, 0xb5, 0xd7, 0x6d, 0xef, 0x75, 0x89, 0x77, 0xfd,
- 0x90, 0x0a, 0xd8, 0xfc, 0x12, 0x01, 0xf8, 0x0b, 0xdf, 0xd5, 0x7b, 0xbe, 0xc6, 0x2d, 0x24, 0x41,
- 0x7f, 0x06, 0x37, 0xc3, 0x39, 0x25, 0x5b, 0x5c, 0xae, 0x6a, 0xfb, 0xe5, 0xfa, 0x1e, 0x55, 0x76,
- 0x89, 0x82, 0xde, 0xb9, 0x68, 0x66, 0xc9, 0x66, 0xc7, 0x86, 0x46, 0xcd, 0x14, 0xd5, 0xfb, 0xef,
- 0x2f, 0xc1, 0x9b, 0x9d, 0xa1, 0x65, 0xfa, 0xbe, 0x7e, 0x68, 0xe1, 0xb6, 0xee, 0x56, 0x9d, 0x88,
- 0xfe, 0xeb, 0x70, 0xa3, 0x5d, 0x56, 0x54, 0xed, 0x40, 0xe9, 0xee, 0x6a, 0xaa, 0xdc, 0xe9, 0xaa,
- 0xca, 0x4e, 0x57, 0x69, 0x35, 0x85, 0x98, 0xb8, 0x49, 0x05, 0xfd, 0xbf, 0x19, 0x41, 0x9e, 0xd1,
- 0xd7, 0x86, 0xba, 0xe9, 0x6a, 0xaf, 0x4c, 0xff, 0x58, 0x73, 0xb1, 0xe7, 0xbb, 0x26, 0x3d, 0xb2,
- 0x48, 0xbf, 0xab, 0xb0, 0xda, 0x69, 0xd7, 0x95, 0xee, 0x14, 0x52, 0x5c, 0xfc, 0x06, 0x45, 0xfa,
- 0xe0, 0x1c, 0x24, 0x8f, 0x74, 0x6c, 0x16, 0xa5, 0x09, 0x37, 0xdb, 0x6a, 0x6b, 0x47, 0xee, 0x74,
- 0x88, 0x5e, 0xe5, 0xaa, 0x26, 0xd7, 0xe5, 0x86, 0xdc, 0xa4, 0x2a, 0x3d, 0x7f, 0x3d, 0xd0, 0x4e,
- 0xb9, 0x4e, 0x0f, 0x7b, 0x1e, 0x51, 0x29, 0x36, 0x34, 0x6c, 0x61, 0xea, 0xf1, 0x10, 0xbc, 0x0a,
- 0x08, 0x01, 0x5e, 0x88, 0x94, 0x14, 0x3f, 0xa2, 0x48, 0xef, 0xbf, 0x06, 0x29, 0x8a, 0xf1, 0x02,
- 0xbe, 0xc2, 0x46, 0x56, 0x6e, 0x56, 0xb5, 0x8e, 0xf2, 0x99, 0x1c, 0x1d, 0x22, 0xb1, 0x89, 0xe7,
- 0xcf, 0xf5, 0x64, 0x8c, 0xba, 0x6d, 0x68, 0x9e, 0xf9, 0x63, 0x1c, 0x1d, 0x2c, 0x45, 0x76, 0xe0,
- 0x83, 0xa0, 0x77, 0x04, 0x77, 0x32, 0x5a, 0x2a, 0x6a, 0x4a, 0x4a, 0x4a, 0xac, 0x50, 0x29, 0xdf,
- 0x7a, 0x4d, 0xa7, 0x89, 0x8c, 0x70, 0xf8, 0x54, 0xea, 0x8c, 0x40, 0xe9, 0xb7, 0xe2, 0x70, 0x33,
- 0x38, 0xb7, 0x3a, 0xa6, 0x81, 0xe9, 0xd9, 0xd9, 0x1d, 0x0f, 0xb1, 0x27, 0x1d, 0xc3, 0x92, 0x6c,
- 0x8f, 0x06, 0xe8, 0x63, 0xc8, 0x2a, 0x5d, 0x59, 0x2d, 0x57, 0xea, 0x64, 0x0f, 0x46, 0x4d, 0x82,
- 0x67, 0x1a, 0x58, 0xa3, 0x0e, 0xc2, 0xb6, 0xe9, 0x63, 0x97, 0x2c, 0x29, 0x32, 0x88, 0x8f, 0x21,
- 0xdb, 0xd8, 0xab, 0x77, 0x95, 0x46, 0xb9, 0x2d, 0xc4, 0x2f, 0x62, 0x18, 0x8c, 0x2c, 0xdf, 0x1c,
- 0xe8, 0x43, 0xd2, 0x89, 0x5f, 0x24, 0x20, 0x1f, 0x71, 0xcb, 0x67, 0x7d, 0xa9, 0xf8, 0x19, 0x5f,
- 0xea, 0x4d, 0xc8, 0xd2, 0xd0, 0x47, 0x33, 0x0d, 0x7e, 0x14, 0x67, 0xe8, 0xb3, 0x62, 0xa0, 0x36,
- 0x80, 0xe9, 0x69, 0x87, 0xce, 0xc8, 0x36, 0xb0, 0x41, 0xfd, 0xbc, 0xe2, 0xd6, 0xe6, 0x1c, 0x0e,
- 0x85, 0xe2, 0x55, 0x18, 0x4f, 0x89, 0x0c, 0x5a, 0xcd, 0x99, 0xc1, 0x33, 0xda, 0x82, 0x1b, 0x67,
- 0x62, 0xc5, 0x31, 0x91, 0xbc, 0x44, 0x25, 0x9f, 0x09, 0xf2, 0xc6, 0x8a, 0x71, 0xc6, 0xb1, 0x49,
- 0x5d, 0xdd, 0xdf, 0xfc, 0x79, 0x06, 0x0a, 0x74, 0xc3, 0xb6, 0xf5, 0xb1, 0xe5, 0xe8, 0x06, 0x7a,
- 0x0a, 0x29, 0xc3, 0xd1, 0xfa, 0x36, 0xf7, 0x28, 0xb7, 0xe6, 0x00, 0xef, 0x18, 0x27, 0xd3, 0x4e,
- 0xa5, 0xe1, 0xd4, 0x6c, 0x54, 0x07, 0x18, 0xea, 0xae, 0x3e, 0xc0, 0x3e, 0x89, 0x4a, 0x59, 0xbc,
- 0xfd, 0xd1, 0x3c, 0xee, 0x5d, 0xc0, 0xa4, 0x46, 0xf8, 0xd1, 0x0f, 0x21, 0x3f, 0x99, 0xe6, 0xc0,
- 0x03, 0xfd, 0x74, 0x3e, 0xb8, 0x70, 0x70, 0xa5, 0x70, 0x2d, 0x06, 0x19, 0x02, 0x2f, 0x24, 0x50,
- 0x09, 0x3e, 0x39, 0x42, 0x89, 0x4b, 0x1c, 0xf8, 0xa3, 0x8b, 0x4b, 0x20, 0x10, 0x44, 0x0b, 0xa1,
- 0x84, 0x90, 0x40, 0x24, 0xf8, 0xe6, 0x00, 0xbb, 0x5c, 0x42, 0xea, 0x72, 0x12, 0xba, 0x04, 0x22,
- 0x2a, 0xc1, 0x0f, 0x09, 0xe8, 0x6d, 0x00, 0x2f, 0xb4, 0xc3, 0xd4, 0xef, 0xcd, 0xaa, 0x11, 0x0a,
- 0xba, 0x0b, 0xeb, 0x91, 0xad, 0xaa, 0x85, 0xab, 0x3d, 0x43, 0xd7, 0x1c, 0x8a, 0xbc, 0xdb, 0xe1,
- 0x0b, 0xff, 0x1e, 0xdc, 0x70, 0xf1, 0x8f, 0x46, 0xc4, 0x83, 0xd2, 0xfa, 0xa6, 0xad, 0x5b, 0xe6,
- 0x8f, 0x75, 0xf2, 0x7e, 0x23, 0x4b, 0xc1, 0xd7, 0x83, 0x97, 0xb5, 0xc8, 0x3b, 0xf1, 0x04, 0x56,
- 0x66, 0x34, 0x7d, 0x8e, 0xd7, 0x5b, 0x99, 0x0e, 0x08, 0xe7, 0x59, 0x1a, 0x21, 0x68, 0xd4, 0xbf,
- 0x26, 0xc2, 0xa6, 0x95, 0x7e, 0x4d, 0xc2, 0x02, 0xd0, 0x19, 0x61, 0x33, 0xfa, 0xbf, 0x1e, 0x61,
- 0x21, 0x68, 0xd4, 0xfb, 0xff, 0x55, 0x1c, 0x72, 0xe1, 0x6e, 0x40, 0xcf, 0x60, 0xc9, 0x1f, 0x0f,
- 0x99, 0xdd, 0x2a, 0x6e, 0x7d, 0xb2, 0xc8, 0x4e, 0x2a, 0x11, 0xd3, 0xcb, 0x2c, 0x10, 0xc5, 0x10,
- 0x3f, 0x83, 0x25, 0x42, 0x92, 0x54, 0x6e, 0x8c, 0x57, 0x20, 0xbf, 0xd7, 0xec, 0xb4, 0xe5, 0x1d,
- 0xa5, 0xa6, 0xc8, 0x55, 0x21, 0x86, 0x00, 0xd2, 0xcc, 0xd1, 0x15, 0xe2, 0x68, 0x1d, 0x84, 0xb6,
- 0xd2, 0x96, 0xeb, 0xc4, 0x55, 0x68, 0xb5, 0xd9, 0x31, 0x91, 0x40, 0x6f, 0xc0, 0x5a, 0xe4, 0xe0,
- 0xd0, 0x88, 0x5f, 0xf2, 0x5c, 0x56, 0x85, 0xa4, 0xf4, 0x6f, 0x49, 0xc8, 0x85, 0xba, 0x43, 0x2e,
- 0xdc, 0x24, 0x8e, 0xac, 0x36, 0x70, 0x0c, 0xb3, 0x3f, 0xd6, 0x98, 0xc3, 0x16, 0x89, 0x58, 0xbf,
- 0x39, 0xc7, 0x38, 0x54, 0xac, 0x1b, 0x0d, 0xca, 0x7f, 0x40, 0xd8, 0x43, 0xf0, 0xdd, 0x98, 0xba,
- 0xe6, 0xce, 0xbc, 0x23, 0x32, 0xeb, 0x90, 0x3d, 0xd4, 0x8f, 0x98, 0x94, 0xc4, 0xdc, 0x71, 0x71,
- 0x45, 0x3f, 0x8a, 0x22, 0x67, 0x0e, 0xf5, 0x23, 0x8a, 0xf6, 0x39, 0x14, 0x99, 0xe7, 0x43, 0x0d,
- 0x35, 0xc1, 0x64, 0x61, 0xfe, 0x83, 0xf9, 0xb2, 0x0c, 0x8c, 0x31, 0x8a, 0xbc, 0x1c, 0xc2, 0x05,
- 0xbd, 0x25, 0xb1, 0x06, 0x45, 0x5e, 0x9a, 0xbb, 0xb7, 0x0d, 0x7d, 0x38, 0xd5, 0xdb, 0x81, 0x3e,
- 0x0c, 0xd0, 0x3c, 0xec, 0x33, 0xb4, 0xd4, 0xdc, 0x68, 0x1d, 0xec, 0x4f, 0xa1, 0x79, 0xd8, 0x27,
- 0x3f, 0x2b, 0x69, 0x96, 0x5d, 0x90, 0x1e, 0xc0, 0xc6, 0x45, 0x93, 0x30, 0x75, 0x6a, 0xc6, 0xa7,
- 0x4e, 0x4d, 0xe9, 0x11, 0x14, 0xa2, 0x5a, 0x45, 0x77, 0x40, 0x08, 0xbc, 0x86, 0x19, 0x96, 0x22,
- 0xa7, 0x73, 0xb3, 0x23, 0xfd, 0x3c, 0x0e, 0xe8, 0xac, 0xf2, 0x88, 0xfd, 0x8a, 0x78, 0xc9, 0xb3,
- 0x20, 0x28, 0xf2, 0x2e, 0xb0, 0x5f, 0xdf, 0xa5, 0xf9, 0x21, 0xea, 0xb7, 0xf6, 0x6d, 0xbe, 0x1a,
- 0x2e, 0x73, 0xa6, 0xe5, 0x38, 0x4a, 0xcd, 0x96, 0xf6, 0xa1, 0x10, 0xd5, 0x3e, 0xba, 0x0d, 0x05,
- 0xe2, 0x63, 0xcf, 0x74, 0x06, 0x4e, 0xf0, 0x38, 0xe8, 0xc4, 0x7b, 0x50, 0xa4, 0xbb, 0x5a, 0x9b,
- 0x71, 0x2f, 0x0a, 0x94, 0xba, 0x33, 0xd1, 0x56, 0x74, 0x1e, 0x16, 0xd0, 0xd6, 0xcf, 0xe2, 0x90,
- 0x0b, 0x2d, 0x08, 0xea, 0xb0, 0x63, 0x46, 0x33, 0x9c, 0x81, 0x6e, 0xda, 0xdc, 0x5e, 0x6c, 0xcd,
- 0x69, 0x84, 0xaa, 0x94, 0x89, 0xd9, 0x0a, 0x7a, 0xb2, 0x30, 0x02, 0x19, 0x02, 0x3b, 0xbb, 0x66,
- 0x87, 0x40, 0xa9, 0x41, 0x47, 0xbe, 0x03, 0xb9, 0xd0, 0xe3, 0x91, 0xee, 0x5d, 0x64, 0x5c, 0x96,
- 0x21, 0xb7, 0xd7, 0xac, 0xb4, 0xf6, 0x9a, 0x55, 0xb9, 0x2a, 0xc4, 0x51, 0x1e, 0x32, 0xc1, 0x43,
- 0x42, 0xfa, 0xab, 0x38, 0xe4, 0xc9, 0x52, 0x0b, 0xdc, 0x91, 0x67, 0x90, 0xf6, 0x9c, 0x91, 0xdb,
- 0xc3, 0x57, 0xf0, 0x47, 0x38, 0xc2, 0x8c, 0x13, 0x97, 0xb8, 0xba, 0x13, 0x27, 0x19, 0xb0, 0xca,
- 0x12, 0xb0, 0x8a, 0xed, 0x87, 0x1e, 0x54, 0x0b, 0x72, 0x3c, 0x4f, 0x71, 0x25, 0x2f, 0x2a, 0xcb,
- 0x40, 0x6a, 0xb6, 0xf4, 0xc7, 0x71, 0x28, 0xf2, 0xb0, 0x36, 0x90, 0x31, 0xbd, 0xac, 0xe3, 0xd7,
- 0xb0, 0xac, 0x2f, 0xdc, 0x5b, 0x89, 0x8b, 0xf6, 0x96, 0xf4, 0xaf, 0x19, 0x58, 0xed, 0x62, 0xcf,
- 0xef, 0xd0, 0xdc, 0x4a, 0xd0, 0xb5, 0x8b, 0xed, 0x01, 0x52, 0x21, 0x8d, 0x4f, 0x69, 0xa2, 0x36,
- 0x31, 0x77, 0xb6, 0xef, 0x8c, 0x80, 0x92, 0x4c, 0x20, 0x54, 0x8e, 0x84, 0x3a, 0x90, 0x0d, 0x2e,
- 0xdf, 0xb8, 0x61, 0x7e, 0x38, 0x07, 0x6a, 0x79, 0x68, 0x76, 0xb0, 0x7b, 0x6a, 0xf6, 0x70, 0x35,
- 0xbc, 0x7d, 0x53, 0x43, 0x20, 0xf1, 0x4f, 0x52, 0x90, 0xa2, 0x62, 0xd0, 0x29, 0xac, 0xbc, 0xd2,
- 0x7d, 0xec, 0x0e, 0x74, 0xf7, 0x44, 0xa3, 0x22, 0xb9, 0xb6, 0x9f, 0x5f, 0xbe, 0xef, 0xa5, 0xb2,
- 0x71, 0xaa, 0xdb, 0x3d, 0x7c, 0x10, 0x00, 0xef, 0xc6, 0xd4, 0x62, 0x28, 0x85, 0xc9, 0xfd, 0x59,
- 0x1c, 0x6e, 0xf0, 0x78, 0x8b, 0x9c, 0x3b, 0x74, 0x43, 0x33, 0xf1, 0xcc, 0x86, 0xb5, 0xaf, 0x2e,
- 0xbe, 0x1d, 0xc2, 0x93, 0x8d, 0x4f, 0x0e, 0xd3, 0xe1, 0x14, 0x85, 0x75, 0x64, 0x00, 0xcb, 0x81,
- 0x15, 0x62, 0xf2, 0x99, 0x92, 0x6b, 0x57, 0x92, 0x6f, 0xc8, 0x3c, 0xee, 0xdd, 0x8d, 0xa9, 0x05,
- 0x0e, 0x4f, 0xdf, 0x89, 0x0a, 0x08, 0xb3, 0xda, 0x41, 0xef, 0xc2, 0xb2, 0x8d, 0x5f, 0x69, 0xa1,
- 0x86, 0xe8, 0x0c, 0x24, 0xd5, 0x82, 0x8d, 0x5f, 0x4d, 0x1a, 0x09, 0x90, 0xf4, 0xf5, 0x23, 0xbe,
- 0x5a, 0xc9, 0x4f, 0xb1, 0x02, 0x37, 0xce, 0x1d, 0x29, 0xfa, 0x3a, 0x08, 0x3a, 0x7b, 0xa1, 0x19,
- 0x23, 0x97, 0xb9, 0xb3, 0x0c, 0x72, 0x85, 0xd3, 0xab, 0x9c, 0x2c, 0xfe, 0x76, 0x1c, 0xf2, 0x91,
- 0xee, 0xa2, 0x1e, 0x64, 0x83, 0x90, 0x9d, 0xdf, 0x51, 0x3e, 0xbd, 0x94, 0x22, 0xba, 0xc1, 0x8d,
- 0x2e, 0x0e, 0xb0, 0xd5, 0x10, 0x38, 0x18, 0x4a, 0x32, 0x1c, 0x4a, 0x25, 0x03, 0x29, 0xaa, 0x7c,
- 0xf1, 0x7b, 0x80, 0xce, 0xb2, 0xa2, 0x0f, 0x60, 0x05, 0xdb, 0x64, 0x93, 0x85, 0x51, 0x39, 0x1d,
- 0x4f, 0x41, 0x2d, 0x72, 0x72, 0xd0, 0xf0, 0x2d, 0xc8, 0x85, 0x77, 0xc9, 0x54, 0x55, 0x49, 0x75,
- 0x42, 0x90, 0x56, 0x60, 0x99, 0x4e, 0x82, 0xa7, 0x32, 0xa7, 0x5e, 0xfa, 0xaf, 0x24, 0xac, 0xd2,
- 0xd3, 0xbe, 0x66, 0x5a, 0xd8, 0x0b, 0x36, 0x78, 0x0d, 0x96, 0x3c, 0xd3, 0x3e, 0xb9, 0x4a, 0x80,
- 0x48, 0xf8, 0xd1, 0xf7, 0x60, 0xa5, 0xef, 0xb8, 0x03, 0xdd, 0xd7, 0xfa, 0xfc, 0xe5, 0x15, 0xce,
- 0xe7, 0x22, 0x83, 0x0a, 0x68, 0x44, 0x25, 0xcc, 0x7e, 0x62, 0x83, 0x39, 0x9d, 0x1e, 0xd5, 0x67,
- 0x56, 0x2d, 0x06, 0x64, 0x3a, 0x30, 0x0f, 0x7d, 0x0b, 0x44, 0x7e, 0xe3, 0x6f, 0x10, 0x57, 0x79,
- 0x60, 0xda, 0xd8, 0xd0, 0xbc, 0x63, 0xdd, 0x35, 0x4c, 0xfb, 0x88, 0x3a, 0x64, 0x59, 0x75, 0x83,
- 0xb5, 0xa8, 0x86, 0x0d, 0x3a, 0xfc, 0x3d, 0xc2, 0xd3, 0x61, 0x29, 0x0b, 0xe9, 0xaa, 0xf3, 0xdc,
- 0xdb, 0xcd, 0xaa, 0xf5, 0x75, 0xb1, 0xe9, 0xff, 0x69, 0x40, 0x25, 0xfd, 0x04, 0x52, 0xd4, 0xc2,
- 0x5f, 0xcf, 0xdd, 0x52, 0x09, 0xd6, 0xc2, 0xfb, 0xb5, 0xf0, 0x50, 0x09, 0x6e, 0x98, 0x56, 0xc3,
- 0x57, 0xfc, 0x4c, 0xf1, 0xa4, 0x3f, 0x4d, 0x41, 0x31, 0x48, 0x1d, 0xb1, 0xcb, 0x4b, 0xe9, 0x77,
- 0x53, 0xdc, 0x93, 0x78, 0x0f, 0x52, 0x95, 0x97, 0x5d, 0xb9, 0x23, 0xc4, 0xc4, 0x37, 0x69, 0xfe,
- 0x67, 0x8d, 0xe6, 0x7f, 0x28, 0xea, 0xf6, 0xe1, 0xd8, 0xa7, 0xd9, 0x48, 0x74, 0x17, 0xf2, 0x24,
- 0x2e, 0x69, 0x3e, 0xd5, 0xf6, 0xba, 0xb5, 0x47, 0x02, 0x4c, 0x5d, 0x40, 0xb0, 0xb6, 0x24, 0xcc,
- 0xb5, 0x8f, 0xb4, 0x91, 0xdf, 0x7f, 0x44, 0x38, 0xde, 0x86, 0xc4, 0xf3, 0x7d, 0x21, 0x2e, 0xde,
- 0xa4, 0x0d, 0x85, 0x48, 0xc3, 0x93, 0x53, 0xf2, 0x5e, 0x82, 0xa5, 0x4a, 0xab, 0x55, 0x17, 0x0a,
- 0xe2, 0x06, 0x6d, 0x81, 0xa2, 0x62, 0x1d, 0xc7, 0x22, 0x6d, 0xde, 0x87, 0xf4, 0x7e, 0x59, 0x55,
- 0x9a, 0x5d, 0x21, 0x21, 0x8a, 0xb4, 0xd5, 0x7a, 0xa4, 0xd5, 0xa9, 0xee, 0x9a, 0xb6, 0xcf, 0xdb,
- 0x55, 0x5b, 0x7b, 0x95, 0xba, 0x2c, 0xe4, 0xcf, 0x69, 0x67, 0x38, 0x23, 0x9e, 0xee, 0xfa, 0x30,
- 0x92, 0x1f, 0x4b, 0x4e, 0x5d, 0x11, 0xb0, 0x96, 0xd1, 0xd4, 0xd8, 0x7b, 0x90, 0xea, 0x2a, 0x0d,
- 0x59, 0x15, 0x96, 0xce, 0xd1, 0x0b, 0x75, 0xd0, 0xd8, 0x15, 0xc6, 0x8a, 0xd2, 0xec, 0xca, 0xea,
- 0x7e, 0x58, 0xb2, 0x21, 0xa4, 0xa6, 0xf2, 0xea, 0x1c, 0xd8, 0xf6, 0xb1, 0x7b, 0xaa, 0x5b, 0xfc,
- 0x0e, 0x83, 0x65, 0xe3, 0x97, 0xeb, 0x72, 0xf3, 0x69, 0x77, 0x57, 0x6b, 0xab, 0x72, 0x4d, 0x79,
- 0x21, 0xa4, 0xa7, 0xf2, 0x6f, 0x8c, 0xcf, 0xc2, 0xf6, 0x91, 0x7f, 0xac, 0x0d, 0x5d, 0xdc, 0x37,
- 0xbf, 0xe0, 0x5c, 0x53, 0x05, 0x22, 0x42, 0xe6, 0x1c, 0x2e, 0x76, 0x4d, 0x10, 0x91, 0xf5, 0x09,
- 0x14, 0x59, 0xf3, 0x20, 0x21, 0x2d, 0x64, 0xa7, 0xae, 0x75, 0x18, 0x5b, 0xb8, 0xb7, 0xd9, 0xb2,
- 0xa5, 0x79, 0xe1, 0x1b, 0x9d, 0x6e, 0xb9, 0x2b, 0x6b, 0x15, 0x12, 0x88, 0x56, 0xb5, 0x50, 0x79,
- 0x39, 0xf1, 0xeb, 0x94, 0xfd, 0xdd, 0xa9, 0xf9, 0xd7, 0x7d, 0xac, 0x1d, 0xea, 0xbd, 0x13, 0x6c,
- 0x68, 0x51, 0x4d, 0xde, 0x86, 0xa4, 0xda, 0x3a, 0x10, 0x96, 0xc5, 0x37, 0x28, 0xcf, 0x6a, 0x84,
- 0xc7, 0xa5, 0xfd, 0x93, 0x7e, 0x27, 0x1d, 0xf8, 0x7c, 0x91, 0xdc, 0xdc, 0xb5, 0xfb, 0x7c, 0x68,
- 0x1f, 0x0a, 0xec, 0x56, 0x80, 0x74, 0x75, 0xe4, 0x71, 0x6f, 0xf5, 0xde, 0x3c, 0x91, 0x21, 0x61,
- 0xeb, 0x50, 0x2e, 0xe6, 0xaf, 0xe6, 0x07, 0x13, 0x0a, 0x7a, 0x3f, 0xb0, 0x8b, 0x13, 0x07, 0x8f,
- 0x9d, 0x33, 0xcb, 0x8c, 0x1c, 0x84, 0x2c, 0x55, 0xc8, 0xf8, 0xae, 0x79, 0x74, 0x84, 0x5d, 0x1e,
- 0x94, 0x7e, 0x38, 0xcf, 0x39, 0xc7, 0x38, 0xd4, 0x80, 0x15, 0x61, 0x58, 0x0d, 0xfd, 0x46, 0xd3,
- 0xb1, 0x35, 0xc2, 0x42, 0xc3, 0xd2, 0xe2, 0xd6, 0xa3, 0x79, 0xbc, 0xb4, 0x08, 0x6f, 0xc3, 0x31,
- 0x78, 0x0a, 0x43, 0xd0, 0x67, 0xc8, 0x24, 0xe2, 0x61, 0x37, 0x1b, 0xd4, 0x4f, 0xa2, 0x79, 0xaf,
- 0xf9, 0x22, 0x1e, 0x76, 0x31, 0x4b, 0x4e, 0x54, 0x1e, 0xf1, 0x38, 0x21, 0x01, 0x1d, 0x82, 0xd0,
- 0xb3, 0x1c, 0xea, 0x7d, 0x1d, 0xe2, 0x63, 0xfd, 0xd4, 0x74, 0x5c, 0x9a, 0x27, 0x2b, 0xce, 0xe5,
- 0x60, 0xee, 0x30, 0xd6, 0x0a, 0xe7, 0x64, 0xf0, 0x2b, 0xbd, 0x69, 0x2a, 0xf5, 0x44, 0x2c, 0x8b,
- 0x2e, 0x64, 0x4b, 0xf7, 0xb1, 0x8d, 0x3d, 0x8f, 0x26, 0xd6, 0x88, 0x27, 0xc2, 0xe8, 0x75, 0x4e,
- 0x46, 0x9f, 0x43, 0xb1, 0x65, 0x93, 0x8e, 0x05, 0xcc, 0x1b, 0xb9, 0xb9, 0x13, 0x41, 0xd3, 0x8c,
- 0xac, 0x2f, 0x33, 0x68, 0x68, 0x13, 0x6e, 0xe8, 0x9e, 0x67, 0x1e, 0xd9, 0x9e, 0xe6, 0x3b, 0x9a,
- 0x63, 0x07, 0x77, 0x98, 0x1b, 0x40, 0x8f, 0x40, 0xc4, 0x5f, 0x76, 0x9d, 0x96, 0x8d, 0xd9, 0xfa,
- 0x97, 0xbe, 0x0f, 0xf9, 0xc8, 0x62, 0x93, 0x1a, 0x17, 0xc5, 0x7b, 0x2b, 0x90, 0x6f, 0xb6, 0x9a,
- 0xf4, 0x82, 0x4c, 0x69, 0x3e, 0x15, 0xe2, 0x94, 0x20, 0xcb, 0xd5, 0x0e, 0xbb, 0x33, 0x13, 0x12,
- 0x08, 0x41, 0xb1, 0x5c, 0x57, 0xe5, 0x72, 0x95, 0x5f, 0xa3, 0x55, 0x85, 0xa4, 0xf4, 0x03, 0x10,
- 0x66, 0xe7, 0x5f, 0x52, 0x2e, 0x12, 0x51, 0x04, 0xa8, 0x2a, 0x9d, 0x9d, 0xb2, 0x5a, 0x65, 0x12,
- 0x04, 0x28, 0x84, 0x37, 0x71, 0x84, 0x92, 0x20, 0x2d, 0x54, 0x99, 0xde, 0x9e, 0x91, 0xe7, 0xa4,
- 0xf4, 0x5d, 0x58, 0x99, 0x99, 0x23, 0xe9, 0xc9, 0x6b, 0x06, 0x20, 0x37, 0x94, 0xae, 0x56, 0xae,
- 0x1f, 0x94, 0x5f, 0x76, 0x58, 0x4a, 0x8c, 0x12, 0x94, 0x9a, 0xd6, 0x6c, 0x35, 0xe5, 0x46, 0xbb,
- 0xfb, 0x52, 0x48, 0x48, 0xed, 0xd9, 0x29, 0x7a, 0x2d, 0x62, 0x4d, 0x51, 0xe5, 0x29, 0x44, 0x4a,
- 0x98, 0x46, 0x3c, 0x04, 0x98, 0x2c, 0x51, 0xa9, 0x7b, 0x11, 0xda, 0x2a, 0x2c, 0xcb, 0xcd, 0xaa,
- 0xd6, 0xaa, 0x69, 0x61, 0xd2, 0x0e, 0x41, 0xb1, 0x5e, 0xa6, 0x97, 0xe3, 0x4a, 0x53, 0x6b, 0x97,
- 0x9b, 0x44, 0xcb, 0xa4, 0xd7, 0x65, 0xb5, 0xae, 0x44, 0xa9, 0x49, 0xc9, 0x02, 0x98, 0x04, 0xfe,
- 0xd2, 0xe7, 0xaf, 0xd1, 0xb0, 0xbc, 0x2f, 0x37, 0xbb, 0xb4, 0xc4, 0x4f, 0x88, 0xa3, 0x35, 0x58,
- 0xe1, 0x77, 0x4a, 0xe4, 0xa4, 0xa5, 0xc4, 0x04, 0xba, 0x0d, 0x6f, 0x75, 0x5e, 0x36, 0x77, 0x76,
- 0xd5, 0x56, 0x93, 0xde, 0x33, 0xcd, 0xb6, 0x48, 0x4a, 0xbf, 0x14, 0x20, 0xc3, 0xcd, 0x04, 0x52,
- 0x21, 0xa7, 0xf7, 0x7d, 0xec, 0x6a, 0xba, 0x65, 0x71, 0xa3, 0x79, 0x6f, 0x7e, 0x2b, 0x53, 0x2a,
- 0x13, 0xde, 0xb2, 0x65, 0xed, 0xc6, 0xd4, 0xac, 0xce, 0x7f, 0x47, 0x30, 0xed, 0x31, 0x77, 0x84,
- 0x16, 0xc7, 0xb4, 0xc7, 0x13, 0x4c, 0x7b, 0x8c, 0xf6, 0x00, 0x18, 0x26, 0xd6, 0x7b, 0xc7, 0x3c,
- 0xfe, 0xb9, 0xbf, 0x28, 0xa8, 0xac, 0xf7, 0x8e, 0x77, 0x63, 0x2a, 0xeb, 0x1d, 0x79, 0x40, 0x16,
- 0xac, 0x71, 0x58, 0xdb, 0xd0, 0x9c, 0x7e, 0xb0, 0xdf, 0x96, 0xe6, 0xce, 0x8b, 0x4e, 0xe3, 0xdb,
- 0x46, 0xab, 0xcf, 0x36, 0xe6, 0x6e, 0x4c, 0x15, 0xf4, 0x19, 0x1a, 0xf2, 0xe1, 0x06, 0x93, 0x36,
- 0x13, 0x55, 0xf2, 0x2c, 0xe1, 0x93, 0x45, 0xe5, 0x9d, 0x8d, 0x1e, 0xf5, 0xb3, 0x64, 0xf4, 0xf3,
- 0x38, 0x48, 0x4c, 0xac, 0x37, 0xb6, 0x7b, 0xc7, 0xae, 0x63, 0xd3, 0xbb, 0xc3, 0xd9, 0x3e, 0xb0,
- 0x0a, 0x9d, 0x67, 0x8b, 0xf6, 0xa1, 0x13, 0xc1, 0x3c, 0xd3, 0x9f, 0x5b, 0xfa, 0xeb, 0x9b, 0xa0,
- 0xe7, 0x90, 0xd6, 0xad, 0x57, 0xfa, 0xd8, 0xdb, 0x28, 0x50, 0xf1, 0x9b, 0x8b, 0x88, 0xa7, 0x8c,
- 0xbb, 0x31, 0x95, 0x43, 0xa0, 0x26, 0x64, 0x0c, 0xdc, 0xd7, 0x47, 0x96, 0x4f, 0x0f, 0x89, 0xf9,
- 0x8e, 0xff, 0x00, 0xad, 0xca, 0x38, 0x77, 0x63, 0x6a, 0x00, 0x82, 0x3e, 0x9f, 0x84, 0xdd, 0x3d,
- 0x67, 0x64, 0xfb, 0xf4, 0x58, 0x98, 0x2f, 0xb7, 0x11, 0xa0, 0xca, 0x41, 0x92, 0x70, 0x64, 0xfb,
- 0x91, 0x38, 0x9b, 0x3e, 0xa3, 0x5d, 0x48, 0xd9, 0xf8, 0x14, 0xb3, 0x53, 0x24, 0xbf, 0x75, 0x77,
- 0x01, 0xdc, 0x26, 0xe1, 0xdb, 0x8d, 0xa9, 0x0c, 0x80, 0xec, 0x0e, 0xc7, 0x65, 0x77, 0x43, 0xd6,
- 0x98, 0x9e, 0x16, 0x8b, 0xed, 0x8e, 0x96, 0x5b, 0x63, 0xbc, 0x64, 0x77, 0x38, 0xc1, 0x03, 0x99,
- 0x1d, 0x17, 0x0f, 0xb1, 0xee, 0x6f, 0xe4, 0x17, 0x9e, 0x1d, 0x95, 0x32, 0x92, 0xd9, 0x61, 0x10,
- 0xe2, 0x0b, 0xc8, 0x06, 0xd6, 0x02, 0xd5, 0x21, 0x4f, 0xeb, 0xda, 0x68, 0xd3, 0x20, 0x8a, 0x5f,
- 0xc4, 0xbb, 0x89, 0xb2, 0x4f, 0x90, 0xed, 0xf1, 0x35, 0x23, 0xbf, 0x84, 0x5c, 0x68, 0x38, 0xae,
- 0x19, 0xfa, 0x6f, 0xe3, 0x20, 0xcc, 0x1a, 0x0d, 0xd4, 0x82, 0x65, 0xac, 0xbb, 0xd6, 0x58, 0xeb,
- 0x9b, 0x24, 0x38, 0x0a, 0x8a, 0x29, 0x17, 0x11, 0x52, 0xa0, 0x00, 0x35, 0xc6, 0x8f, 0x1a, 0x50,
- 0x20, 0x4e, 0x4d, 0x88, 0x97, 0x58, 0x18, 0x2f, 0x4f, 0xf8, 0x39, 0x9c, 0xf8, 0xff, 0x61, 0xed,
- 0x1c, 0xc3, 0x83, 0x8e, 0x61, 0x3d, 0xcc, 0x60, 0x68, 0x67, 0x2a, 0xc8, 0x1f, 0xcc, 0x99, 0xf6,
- 0xa6, 0xec, 0x93, 0x92, 0xe1, 0x35, 0xff, 0x0c, 0xcd, 0x13, 0xdf, 0x81, 0x5b, 0x5f, 0x62, 0x75,
- 0xc4, 0x1c, 0x64, 0xf8, 0x5e, 0x16, 0xef, 0x41, 0x21, 0xba, 0x01, 0xd1, 0xbb, 0xb3, 0x1b, 0x9a,
- 0xa8, 0x37, 0x35, 0xbd, 0x2b, 0xc5, 0x0c, 0xa4, 0xe8, 0xee, 0x12, 0xb3, 0x90, 0x66, 0x26, 0x46,
- 0xfc, 0xa3, 0x38, 0xe4, 0xc2, 0x2d, 0x82, 0x9e, 0xc0, 0x52, 0x98, 0xd4, 0x5f, 0x4c, 0x97, 0x94,
- 0x8f, 0xb8, 0xf5, 0xc1, 0x4e, 0x5d, 0x7c, 0x3a, 0x02, 0x56, 0xb1, 0x0b, 0x69, 0xb6, 0xc5, 0xd0,
- 0x33, 0x80, 0xc9, 0xc2, 0xba, 0x44, 0xaf, 0x22, 0xdc, 0x95, 0x5c, 0x18, 0x72, 0x48, 0xff, 0x94,
- 0x88, 0xe4, 0xb9, 0x26, 0xd5, 0xb0, 0x1d, 0x48, 0x19, 0xd8, 0xd2, 0xc7, 0x0b, 0xdc, 0x1d, 0x9e,
- 0x45, 0x29, 0x55, 0x09, 0x04, 0xb1, 0x5f, 0x14, 0x0b, 0x7d, 0x06, 0x59, 0xdd, 0x32, 0x8f, 0x6c,
- 0xcd, 0x77, 0xb8, 0x4e, 0xbe, 0x7d, 0x39, 0xdc, 0x32, 0x41, 0xe9, 0x3a, 0xc4, 0x8a, 0xeb, 0xec,
- 0xa7, 0xf8, 0x21, 0xa4, 0xa8, 0x34, 0xf4, 0x0e, 0x14, 0xa8, 0x34, 0x6d, 0x60, 0x5a, 0x96, 0xe9,
- 0xf1, 0x74, 0x63, 0x9e, 0xd2, 0x1a, 0x94, 0x24, 0x3e, 0x86, 0x0c, 0x47, 0x40, 0x37, 0x21, 0x3d,
- 0xc4, 0xae, 0xe9, 0xb0, 0xd8, 0x2c, 0xa9, 0xf2, 0x27, 0x42, 0x77, 0xfa, 0x7d, 0x0f, 0xfb, 0xd4,
- 0x49, 0x48, 0xaa, 0xfc, 0xa9, 0x72, 0x03, 0xd6, 0xce, 0xd9, 0x03, 0xd2, 0x1f, 0x26, 0x20, 0x17,
- 0x66, 0x78, 0xd0, 0x3e, 0x14, 0xf5, 0x1e, 0xad, 0xdf, 0x19, 0xea, 0xbe, 0x8f, 0x5d, 0xfb, 0xb2,
- 0x79, 0x9d, 0x65, 0x06, 0xd3, 0x66, 0x28, 0xe8, 0x39, 0x64, 0x4e, 0x4d, 0xfc, 0xea, 0x6a, 0xd7,
- 0x6b, 0x69, 0x02, 0x51, 0xb3, 0xd1, 0xe7, 0xb0, 0xca, 0xc3, 0xd3, 0x81, 0x3e, 0x1c, 0x12, 0xff,
- 0xa0, 0x6f, 0x73, 0x8f, 0xeb, 0x32, 0xb0, 0x3c, 0xd6, 0x6d, 0x30, 0xac, 0x9a, 0x2d, 0x7d, 0x0a,
- 0xf9, 0x48, 0x55, 0x39, 0x12, 0x20, 0x39, 0x72, 0xed, 0x20, 0x69, 0x3c, 0x72, 0x6d, 0xb4, 0x01,
- 0x99, 0x21, 0x4b, 0xc8, 0x51, 0xb1, 0x05, 0x35, 0x78, 0x7c, 0xb6, 0x94, 0x8d, 0x0b, 0x09, 0xe9,
- 0xcf, 0xe2, 0xb0, 0x1e, 0xa4, 0xa7, 0xa2, 0x65, 0xef, 0xd2, 0xcf, 0xe2, 0x50, 0x88, 0x12, 0xd0,
- 0x7b, 0x90, 0xae, 0xb6, 0xe8, 0x9d, 0x78, 0x6c, 0x2a, 0x6d, 0x84, 0xed, 0xd3, 0x6d, 0xc3, 0xe9,
- 0x9d, 0xb0, 0xa4, 0xcc, 0xfb, 0x90, 0xe1, 0x4e, 0xb2, 0x10, 0x9f, 0x4a, 0xde, 0x90, 0x66, 0xdc,
- 0x4d, 0x22, 0xed, 0xee, 0x40, 0x56, 0x7e, 0xd1, 0x95, 0xd5, 0x66, 0xb9, 0x3e, 0x93, 0x60, 0x22,
- 0x0d, 0xf1, 0x17, 0x64, 0x2a, 0x74, 0x6b, 0xfb, 0x74, 0x53, 0x7a, 0x04, 0xcb, 0x55, 0x0a, 0x1f,
- 0xe4, 0x6b, 0x3f, 0x80, 0x95, 0x9e, 0x63, 0xfb, 0xba, 0x69, 0x93, 0x78, 0x7f, 0xa0, 0x1f, 0x05,
- 0xb5, 0x4f, 0xc5, 0x90, 0xac, 0x10, 0xaa, 0xf4, 0xef, 0x71, 0x28, 0x72, 0x83, 0x16, 0xf0, 0x16,
- 0x21, 0xe1, 0x78, 0xbc, 0x79, 0xc2, 0xf1, 0x10, 0x82, 0x25, 0xdd, 0xed, 0x1d, 0x73, 0x8d, 0xd1,
- 0xdf, 0x44, 0x65, 0x3d, 0x67, 0x30, 0xd0, 0xed, 0x20, 0x95, 0x10, 0x3c, 0xa2, 0x3a, 0x24, 0xb1,
- 0x7d, 0xba, 0x48, 0x69, 0xf7, 0x94, 0xf4, 0x92, 0x6c, 0x9f, 0xb2, 0x5c, 0x28, 0x81, 0x11, 0x3f,
- 0x81, 0x6c, 0x40, 0x58, 0xa8, 0x88, 0xfa, 0xbf, 0xe3, 0xb0, 0x22, 0x73, 0x05, 0x05, 0xe3, 0x8a,
- 0xde, 0x1a, 0xc5, 0xaf, 0xe9, 0xd6, 0x08, 0xed, 0x43, 0x9a, 0x56, 0x2c, 0x05, 0xd7, 0x5b, 0xf3,
- 0xf8, 0xd4, 0x33, 0x1d, 0x63, 0x35, 0x1b, 0x41, 0x95, 0x3c, 0x43, 0x13, 0x1f, 0x43, 0x3e, 0x42,
- 0x5e, 0x68, 0xec, 0x3f, 0x85, 0x95, 0x99, 0x3d, 0x71, 0x3d, 0x59, 0xdd, 0xaf, 0x41, 0x31, 0xf2,
- 0x19, 0xcf, 0xe4, 0x9a, 0x70, 0x39, 0x42, 0x55, 0x0c, 0x69, 0x1b, 0x0a, 0x53, 0xb2, 0xf9, 0x7e,
- 0x8b, 0xcf, 0xb1, 0xdf, 0xa4, 0xdf, 0x2c, 0x41, 0x3e, 0x52, 0xb6, 0x86, 0x14, 0x48, 0x99, 0x3e,
- 0x0e, 0x4f, 0xf6, 0x7b, 0x8b, 0x55, 0xbd, 0x95, 0x14, 0x1f, 0x0f, 0x54, 0x86, 0x20, 0xf6, 0x01,
- 0x14, 0x03, 0xdb, 0xbe, 0xd9, 0x37, 0xb1, 0x4b, 0x6c, 0x73, 0xf4, 0x73, 0x0f, 0xde, 0xbb, 0xbc,
- 0x3f, 0xf9, 0xd2, 0x83, 0x1c, 0xde, 0x93, 0x26, 0x13, 0x8b, 0x31, 0xe1, 0xdb, 0x73, 0xed, 0x60,
- 0x5e, 0x92, 0xe1, 0xbc, 0x88, 0xbf, 0x4a, 0xc0, 0x12, 0x91, 0x8b, 0x14, 0x48, 0x70, 0xe0, 0xf9,
- 0x3e, 0x9b, 0x98, 0xea, 0x78, 0xd8, 0x53, 0x35, 0x61, 0x92, 0x3d, 0xc5, 0xca, 0x80, 0x12, 0x73,
- 0x67, 0xd1, 0xa2, 0x60, 0x33, 0x85, 0x40, 0xe8, 0xc3, 0x60, 0xe5, 0x30, 0x1b, 0xbb, 0x5e, 0x62,
- 0x9f, 0x1e, 0x96, 0x82, 0x4f, 0x0f, 0x4b, 0x65, 0x3b, 0xf8, 0xa2, 0x08, 0x3d, 0x80, 0xbc, 0x77,
- 0xec, 0xb8, 0x3e, 0xcb, 0xb9, 0xf2, 0x38, 0xf5, 0x7c, 0x0e, 0xa0, 0x0d, 0xf7, 0x29, 0xdb, 0x3a,
- 0xa4, 0x2c, 0xfd, 0x10, 0x5b, 0xfc, 0xe3, 0x15, 0xf6, 0x80, 0xde, 0x84, 0xac, 0x65, 0xda, 0x27,
- 0xda, 0xc8, 0xb5, 0x68, 0xf4, 0x97, 0x53, 0x33, 0xe4, 0x79, 0xcf, 0xb5, 0xc4, 0x9f, 0xf2, 0xe2,
- 0xa4, 0xd1, 0x6b, 0x8a, 0x93, 0x58, 0x82, 0x9f, 0x15, 0x0f, 0x28, 0xcd, 0xae, 0xfc, 0x54, 0x56,
- 0x85, 0x04, 0xca, 0x41, 0xaa, 0x56, 0x6f, 0x95, 0xbb, 0x42, 0x92, 0x15, 0x15, 0xb4, 0xea, 0x72,
- 0xb9, 0x29, 0x2c, 0xa1, 0x65, 0xc8, 0x85, 0x1f, 0x26, 0x0a, 0x29, 0x54, 0x80, 0x6c, 0x75, 0x4f,
- 0x2d, 0xd3, 0xca, 0xe1, 0x34, 0x2a, 0x02, 0x3c, 0x2b, 0xef, 0x97, 0xb5, 0x9d, 0x7a, 0xb9, 0xd3,
- 0x11, 0x32, 0xd2, 0x3f, 0x66, 0xe1, 0x46, 0x03, 0x7b, 0x9e, 0x7e, 0x84, 0x0f, 0x4c, 0xff, 0x38,
- 0x52, 0xc8, 0x7c, 0xcd, 0xdf, 0x1a, 0x7d, 0x07, 0x52, 0x34, 0x07, 0xbb, 0xe8, 0xc7, 0x57, 0xc4,
- 0x75, 0xa1, 0x8c, 0xe8, 0xfb, 0xc4, 0xb2, 0xf3, 0x4a, 0xef, 0xc8, 0x26, 0x9a, 0x2f, 0x58, 0x9a,
- 0xae, 0x28, 0xd8, 0x8d, 0xa9, 0xbc, 0xcc, 0x29, 0xac, 0x31, 0xf8, 0x21, 0xac, 0x7a, 0xc6, 0x49,
- 0x78, 0x39, 0x17, 0xad, 0x50, 0xba, 0xc4, 0x59, 0xbc, 0x1b, 0x53, 0x57, 0xbc, 0x19, 0x53, 0x74,
- 0x00, 0xc5, 0xa1, 0xee, 0x6a, 0x86, 0x13, 0x76, 0x3f, 0x3d, 0xb7, 0x51, 0x8a, 0xd6, 0x44, 0x92,
- 0xe8, 0x76, 0x18, 0x2d, 0x62, 0x6d, 0x01, 0x0c, 0xc3, 0xbd, 0xc9, 0x03, 0xf2, 0xc5, 0xbe, 0x1a,
- 0xdc, 0x8d, 0xa9, 0x11, 0x08, 0xa4, 0x42, 0x3e, 0xf2, 0xa5, 0x27, 0x0f, 0xc6, 0x17, 0xfc, 0x2e,
- 0x70, 0x37, 0xa6, 0x46, 0x41, 0x50, 0x07, 0x0a, 0xb4, 0x34, 0x2e, 0x18, 0x7b, 0x6e, 0x6e, 0xd0,
- 0x48, 0x81, 0x0c, 0x01, 0x75, 0x23, 0xf5, 0x32, 0x0d, 0x80, 0xc9, 0x85, 0x24, 0x0f, 0x9d, 0x17,
- 0xba, 0x09, 0x24, 0x51, 0x78, 0x78, 0xf3, 0x88, 0xfa, 0xb0, 0x16, 0xf9, 0xe6, 0x26, 0xec, 0x6a,
- 0x61, 0xc1, 0xef, 0x13, 0x23, 0xe5, 0x31, 0xbb, 0x31, 0x95, 0xbb, 0x78, 0xd1, 0x9a, 0x19, 0x0c,
- 0xe8, 0x6c, 0x35, 0xf4, 0xc6, 0xf2, 0xe5, 0x3f, 0x83, 0x9c, 0x88, 0x89, 0x5e, 0xd3, 0xec, 0xc3,
- 0xf2, 0xf4, 0x72, 0x2e, 0x5e, 0xea, 0x10, 0x24, 0xeb, 0xad, 0x1f, 0x79, 0xae, 0xa4, 0x61, 0xc9,
- 0x75, 0x1c, 0x5f, 0xfa, 0x65, 0x1a, 0x6e, 0xca, 0x5f, 0xe0, 0xde, 0x88, 0x96, 0xdb, 0x76, 0x7c,
- 0xfd, 0x28, 0xdc, 0x4d, 0x6d, 0xc8, 0x47, 0xce, 0x46, 0x6e, 0x3d, 0x16, 0xfd, 0x0a, 0x32, 0x0a,
- 0x41, 0x0c, 0x2b, 0x9b, 0x65, 0x7e, 0xea, 0x9b, 0x7c, 0xc6, 0xce, 0x29, 0x94, 0x96, 0xe7, 0xf2,
- 0x44, 0xce, 0xeb, 0xf7, 0x64, 0x61, 0x28, 0xc6, 0x54, 0xb9, 0xf4, 0xdb, 0x53, 0xdf, 0x6b, 0x2f,
- 0xd1, 0xeb, 0xdc, 0xe8, 0x07, 0xd7, 0x1b, 0x93, 0x4f, 0xfb, 0x52, 0xf4, 0x65, 0xf8, 0x79, 0xde,
- 0xb4, 0x19, 0x4d, 0x5f, 0xd5, 0x8c, 0xf6, 0x21, 0x3f, 0xf2, 0xb0, 0x4b, 0x2f, 0xca, 0xb0, 0xb7,
- 0x91, 0xb9, 0xea, 0x80, 0xf7, 0x3c, 0xec, 0xd2, 0x22, 0x3c, 0x32, 0xe0, 0x51, 0xf0, 0xe0, 0xa1,
- 0x97, 0x90, 0xa6, 0x57, 0xa9, 0xde, 0x46, 0x96, 0x8a, 0x28, 0x5f, 0x5e, 0x04, 0xad, 0xd5, 0x53,
- 0x0c, 0x95, 0x03, 0x8a, 0x2d, 0xc8, 0x47, 0xd4, 0x3c, 0x8f, 0x43, 0xf2, 0x55, 0x00, 0xcb, 0xe9,
- 0xe9, 0x16, 0xfb, 0x94, 0x81, 0x2d, 0x80, 0x1c, 0xa5, 0x34, 0xf5, 0x01, 0x26, 0x80, 0x91, 0x61,
- 0x5c, 0x03, 0xe0, 0x73, 0xc8, 0xf0, 0x4e, 0x5f, 0x1d, 0x6c, 0xeb, 0x0f, 0xe2, 0xd1, 0xba, 0x31,
- 0xee, 0x51, 0xa3, 0x1f, 0x43, 0x9a, 0x55, 0x9f, 0xa0, 0x79, 0x92, 0x91, 0x53, 0x85, 0x2a, 0xe2,
- 0x15, 0x0a, 0xc9, 0xa4, 0xd8, 0xdd, 0xf8, 0xf6, 0xa7, 0x90, 0xa5, 0xff, 0xfd, 0x40, 0x3c, 0xd2,
- 0x77, 0xce, 0x78, 0x34, 0xc4, 0x0b, 0xa1, 0xbe, 0x4c, 0x6b, 0xc8, 0xfe, 0x3c, 0xe0, 0x9f, 0xff,
- 0xfc, 0xaf, 0x5f, 0x30, 0x9f, 0x85, 0x70, 0xed, 0xb9, 0xf6, 0xb6, 0x02, 0xcb, 0x14, 0xa0, 0xc7,
- 0xff, 0x83, 0x61, 0x1e, 0x94, 0x7f, 0x09, 0x50, 0x0a, 0x87, 0x91, 0xff, 0x72, 0xa8, 0x7c, 0x13,
- 0xbe, 0xfc, 0x0f, 0x27, 0x2a, 0x39, 0x95, 0x56, 0xa4, 0x94, 0x87, 0xe6, 0x67, 0xf9, 0x80, 0xae,
- 0x9d, 0x6e, 0x1e, 0xa6, 0xa9, 0xb8, 0x7b, 0xff, 0x1b, 0x00, 0x00, 0xff, 0xff, 0xa7, 0x38, 0x87,
- 0x1a, 0xcb, 0x42, 0x00, 0x00,
-}
diff --git a/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go
index 7644758..23c63e1 100644
--- a/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go
+++ b/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go
@@ -3,9 +3,11 @@
package pipeline_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
+import (
+ fmt "fmt"
+ proto "github.com/golang/protobuf/proto"
+ math "math"
+)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
@@ -16,7 +18,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type ApiServiceDescriptor struct {
// (Required) The URL to connect to.
@@ -37,16 +39,17 @@
func (m *ApiServiceDescriptor) String() string { return proto.CompactTextString(m) }
func (*ApiServiceDescriptor) ProtoMessage() {}
func (*ApiServiceDescriptor) Descriptor() ([]byte, []int) {
- return fileDescriptor_endpoints_09141d845209a85b, []int{0}
+ return fileDescriptor_6445e0c85107719d, []int{0}
}
+
func (m *ApiServiceDescriptor) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ApiServiceDescriptor.Unmarshal(m, b)
}
func (m *ApiServiceDescriptor) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ApiServiceDescriptor.Marshal(b, m, deterministic)
}
-func (dst *ApiServiceDescriptor) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ApiServiceDescriptor.Merge(dst, src)
+func (m *ApiServiceDescriptor) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ApiServiceDescriptor.Merge(m, src)
}
func (m *ApiServiceDescriptor) XXX_Size() int {
return xxx_messageInfo_ApiServiceDescriptor.Size(m)
@@ -57,6 +60,13 @@
var xxx_messageInfo_ApiServiceDescriptor proto.InternalMessageInfo
+func (m *ApiServiceDescriptor) GetUrl() string {
+ if m != nil {
+ return m.Url
+ }
+ return ""
+}
+
type isApiServiceDescriptor_Authentication interface {
isApiServiceDescriptor_Authentication()
}
@@ -74,13 +84,6 @@
return nil
}
-func (m *ApiServiceDescriptor) GetUrl() string {
- if m != nil {
- return m.Url
- }
- return ""
-}
-
func (m *ApiServiceDescriptor) GetOauth2ClientCredentialsGrant() *OAuth2ClientCredentialsGrant {
if x, ok := m.GetAuthentication().(*ApiServiceDescriptor_Oauth2ClientCredentialsGrant); ok {
return x.Oauth2ClientCredentialsGrant
@@ -88,61 +91,13 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*ApiServiceDescriptor) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _ApiServiceDescriptor_OneofMarshaler, _ApiServiceDescriptor_OneofUnmarshaler, _ApiServiceDescriptor_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*ApiServiceDescriptor) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*ApiServiceDescriptor_Oauth2ClientCredentialsGrant)(nil),
}
}
-func _ApiServiceDescriptor_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*ApiServiceDescriptor)
- // authentication
- switch x := m.Authentication.(type) {
- case *ApiServiceDescriptor_Oauth2ClientCredentialsGrant:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Oauth2ClientCredentialsGrant); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("ApiServiceDescriptor.Authentication has unexpected type %T", x)
- }
- return nil
-}
-
-func _ApiServiceDescriptor_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*ApiServiceDescriptor)
- switch tag {
- case 3: // authentication.oauth2_client_credentials_grant
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(OAuth2ClientCredentialsGrant)
- err := b.DecodeMessage(msg)
- m.Authentication = &ApiServiceDescriptor_Oauth2ClientCredentialsGrant{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _ApiServiceDescriptor_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*ApiServiceDescriptor)
- // authentication
- switch x := m.Authentication.(type) {
- case *ApiServiceDescriptor_Oauth2ClientCredentialsGrant:
- s := proto.Size(x.Oauth2ClientCredentialsGrant)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type OAuth2ClientCredentialsGrant struct {
// (Required) The URL to submit a "client_credentials" grant type request for
// an OAuth access token which will be used as a bearer token for requests.
@@ -156,16 +111,17 @@
func (m *OAuth2ClientCredentialsGrant) String() string { return proto.CompactTextString(m) }
func (*OAuth2ClientCredentialsGrant) ProtoMessage() {}
func (*OAuth2ClientCredentialsGrant) Descriptor() ([]byte, []int) {
- return fileDescriptor_endpoints_09141d845209a85b, []int{1}
+ return fileDescriptor_6445e0c85107719d, []int{1}
}
+
func (m *OAuth2ClientCredentialsGrant) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_OAuth2ClientCredentialsGrant.Unmarshal(m, b)
}
func (m *OAuth2ClientCredentialsGrant) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_OAuth2ClientCredentialsGrant.Marshal(b, m, deterministic)
}
-func (dst *OAuth2ClientCredentialsGrant) XXX_Merge(src proto.Message) {
- xxx_messageInfo_OAuth2ClientCredentialsGrant.Merge(dst, src)
+func (m *OAuth2ClientCredentialsGrant) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_OAuth2ClientCredentialsGrant.Merge(m, src)
}
func (m *OAuth2ClientCredentialsGrant) XXX_Size() int {
return xxx_messageInfo_OAuth2ClientCredentialsGrant.Size(m)
@@ -188,9 +144,9 @@
proto.RegisterType((*OAuth2ClientCredentialsGrant)(nil), "org.apache.beam.model.pipeline.v1.OAuth2ClientCredentialsGrant")
}
-func init() { proto.RegisterFile("endpoints.proto", fileDescriptor_endpoints_09141d845209a85b) }
+func init() { proto.RegisterFile("endpoints.proto", fileDescriptor_6445e0c85107719d) }
-var fileDescriptor_endpoints_09141d845209a85b = []byte{
+var fileDescriptor_6445e0c85107719d = []byte{
// 235 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x90, 0xb1, 0x4a, 0x03, 0x41,
0x10, 0x86, 0x5d, 0x03, 0x42, 0x36, 0xa0, 0xe1, 0xb0, 0x48, 0x11, 0x30, 0xa6, 0x4a, 0xb5, 0x98,
diff --git a/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go
index 730c552..5cdf157 100644
--- a/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go
+++ b/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go
@@ -3,9 +3,11 @@
package pipeline_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
+import (
+ fmt "fmt"
+ proto "github.com/golang/protobuf/proto"
+ math "math"
+)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
@@ -16,7 +18,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type ConfigValue struct {
// Coder and its components (in case of a compound Coder)
@@ -32,16 +34,17 @@
func (m *ConfigValue) String() string { return proto.CompactTextString(m) }
func (*ConfigValue) ProtoMessage() {}
func (*ConfigValue) Descriptor() ([]byte, []int) {
- return fileDescriptor_external_transforms_3f14b624c3936585, []int{0}
+ return fileDescriptor_d0efcf8cb92c1e82, []int{0}
}
+
func (m *ConfigValue) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ConfigValue.Unmarshal(m, b)
}
func (m *ConfigValue) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ConfigValue.Marshal(b, m, deterministic)
}
-func (dst *ConfigValue) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ConfigValue.Merge(dst, src)
+func (m *ConfigValue) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ConfigValue.Merge(m, src)
}
func (m *ConfigValue) XXX_Size() int {
return xxx_messageInfo_ConfigValue.Size(m)
@@ -80,16 +83,17 @@
func (m *ExternalConfigurationPayload) String() string { return proto.CompactTextString(m) }
func (*ExternalConfigurationPayload) ProtoMessage() {}
func (*ExternalConfigurationPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_external_transforms_3f14b624c3936585, []int{1}
+ return fileDescriptor_d0efcf8cb92c1e82, []int{1}
}
+
func (m *ExternalConfigurationPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExternalConfigurationPayload.Unmarshal(m, b)
}
func (m *ExternalConfigurationPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExternalConfigurationPayload.Marshal(b, m, deterministic)
}
-func (dst *ExternalConfigurationPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExternalConfigurationPayload.Merge(dst, src)
+func (m *ExternalConfigurationPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExternalConfigurationPayload.Merge(m, src)
}
func (m *ExternalConfigurationPayload) XXX_Size() int {
return xxx_messageInfo_ExternalConfigurationPayload.Size(m)
@@ -113,11 +117,9 @@
proto.RegisterMapType((map[string]*ConfigValue)(nil), "org.apache.beam.model.pipeline.v1.ExternalConfigurationPayload.ConfigurationEntry")
}
-func init() {
- proto.RegisterFile("external_transforms.proto", fileDescriptor_external_transforms_3f14b624c3936585)
-}
+func init() { proto.RegisterFile("external_transforms.proto", fileDescriptor_d0efcf8cb92c1e82) }
-var fileDescriptor_external_transforms_3f14b624c3936585 = []byte{
+var fileDescriptor_d0efcf8cb92c1e82 = []byte{
// 278 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x51, 0x4d, 0x4b, 0xc3, 0x40,
0x10, 0x25, 0x29, 0x7e, 0x64, 0xa3, 0x20, 0x0b, 0x42, 0xac, 0x1e, 0x62, 0x4f, 0x39, 0x2d, 0xb4,
diff --git a/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go
index 9aecebb..24f5185 100644
--- a/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go
+++ b/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go
@@ -3,11 +3,13 @@
package pipeline_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-import descriptor "github.com/golang/protobuf/protoc-gen-go/descriptor"
-import timestamp "github.com/golang/protobuf/ptypes/timestamp"
+import (
+ fmt "fmt"
+ proto "github.com/golang/protobuf/proto"
+ descriptor "github.com/golang/protobuf/protoc-gen-go/descriptor"
+ timestamp "github.com/golang/protobuf/ptypes/timestamp"
+ math "math"
+)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
@@ -18,7 +20,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type MonitoringInfoSpecs_Enum int32
@@ -47,6 +49,7 @@
5: "TOTAL_MSECS",
6: "USER_DISTRIBUTION_COUNTER",
}
+
var MonitoringInfoSpecs_Enum_value = map[string]int32{
"USER_COUNTER": 0,
"ELEMENT_COUNT": 1,
@@ -61,8 +64,9 @@
func (x MonitoringInfoSpecs_Enum) String() string {
return proto.EnumName(MonitoringInfoSpecs_Enum_name, int32(x))
}
+
func (MonitoringInfoSpecs_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{2, 0}
+ return fileDescriptor_6039342a2ba47b72, []int{2, 0}
}
type MonitoringInfo_MonitoringInfoLabels int32
@@ -89,6 +93,7 @@
5: "NAMESPACE",
6: "NAME",
}
+
var MonitoringInfo_MonitoringInfoLabels_value = map[string]int32{
"TRANSFORM": 0,
"PCOLLECTION": 1,
@@ -102,8 +107,9 @@
func (x MonitoringInfo_MonitoringInfoLabels) String() string {
return proto.EnumName(MonitoringInfo_MonitoringInfoLabels_name, int32(x))
}
+
func (MonitoringInfo_MonitoringInfoLabels) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{4, 0}
+ return fileDescriptor_6039342a2ba47b72, []int{4, 0}
}
type MonitoringInfoTypeUrns_Enum int32
@@ -119,6 +125,7 @@
1: "DISTRIBUTION_INT64_TYPE",
2: "LATEST_INT64_TYPE",
}
+
var MonitoringInfoTypeUrns_Enum_value = map[string]int32{
"SUM_INT64_TYPE": 0,
"DISTRIBUTION_INT64_TYPE": 1,
@@ -128,8 +135,9 @@
func (x MonitoringInfoTypeUrns_Enum) String() string {
return proto.EnumName(MonitoringInfoTypeUrns_Enum_name, int32(x))
}
+
func (MonitoringInfoTypeUrns_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{5, 0}
+ return fileDescriptor_6039342a2ba47b72, []int{5, 0}
}
// A specification containing required set of fields and labels required
@@ -152,16 +160,17 @@
func (m *MonitoringInfoSpec) String() string { return proto.CompactTextString(m) }
func (*MonitoringInfoSpec) ProtoMessage() {}
func (*MonitoringInfoSpec) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{0}
+ return fileDescriptor_6039342a2ba47b72, []int{0}
}
+
func (m *MonitoringInfoSpec) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringInfoSpec.Unmarshal(m, b)
}
func (m *MonitoringInfoSpec) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringInfoSpec.Marshal(b, m, deterministic)
}
-func (dst *MonitoringInfoSpec) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringInfoSpec.Merge(dst, src)
+func (m *MonitoringInfoSpec) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringInfoSpec.Merge(m, src)
}
func (m *MonitoringInfoSpec) XXX_Size() int {
return xxx_messageInfo_MonitoringInfoSpec.Size(m)
@@ -213,16 +222,17 @@
func (m *Annotation) String() string { return proto.CompactTextString(m) }
func (*Annotation) ProtoMessage() {}
func (*Annotation) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{1}
+ return fileDescriptor_6039342a2ba47b72, []int{1}
}
+
func (m *Annotation) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Annotation.Unmarshal(m, b)
}
func (m *Annotation) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Annotation.Marshal(b, m, deterministic)
}
-func (dst *Annotation) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Annotation.Merge(dst, src)
+func (m *Annotation) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Annotation.Merge(m, src)
}
func (m *Annotation) XXX_Size() int {
return xxx_messageInfo_Annotation.Size(m)
@@ -262,16 +272,17 @@
func (m *MonitoringInfoSpecs) String() string { return proto.CompactTextString(m) }
func (*MonitoringInfoSpecs) ProtoMessage() {}
func (*MonitoringInfoSpecs) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{2}
+ return fileDescriptor_6039342a2ba47b72, []int{2}
}
+
func (m *MonitoringInfoSpecs) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringInfoSpecs.Unmarshal(m, b)
}
func (m *MonitoringInfoSpecs) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringInfoSpecs.Marshal(b, m, deterministic)
}
-func (dst *MonitoringInfoSpecs) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringInfoSpecs.Merge(dst, src)
+func (m *MonitoringInfoSpecs) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringInfoSpecs.Merge(m, src)
}
func (m *MonitoringInfoSpecs) XXX_Size() int {
return xxx_messageInfo_MonitoringInfoSpecs.Size(m)
@@ -296,16 +307,17 @@
func (m *MonitoringInfoLabelProps) String() string { return proto.CompactTextString(m) }
func (*MonitoringInfoLabelProps) ProtoMessage() {}
func (*MonitoringInfoLabelProps) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{3}
+ return fileDescriptor_6039342a2ba47b72, []int{3}
}
+
func (m *MonitoringInfoLabelProps) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringInfoLabelProps.Unmarshal(m, b)
}
func (m *MonitoringInfoLabelProps) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringInfoLabelProps.Marshal(b, m, deterministic)
}
-func (dst *MonitoringInfoLabelProps) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringInfoLabelProps.Merge(dst, src)
+func (m *MonitoringInfoLabelProps) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringInfoLabelProps.Merge(m, src)
}
func (m *MonitoringInfoLabelProps) XXX_Size() int {
return xxx_messageInfo_MonitoringInfoLabelProps.Size(m)
@@ -362,16 +374,17 @@
func (m *MonitoringInfo) String() string { return proto.CompactTextString(m) }
func (*MonitoringInfo) ProtoMessage() {}
func (*MonitoringInfo) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{4}
+ return fileDescriptor_6039342a2ba47b72, []int{4}
}
+
func (m *MonitoringInfo) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringInfo.Unmarshal(m, b)
}
func (m *MonitoringInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringInfo.Marshal(b, m, deterministic)
}
-func (dst *MonitoringInfo) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringInfo.Merge(dst, src)
+func (m *MonitoringInfo) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringInfo.Merge(m, src)
}
func (m *MonitoringInfo) XXX_Size() int {
return xxx_messageInfo_MonitoringInfo.Size(m)
@@ -382,27 +395,6 @@
var xxx_messageInfo_MonitoringInfo proto.InternalMessageInfo
-type isMonitoringInfo_Data interface {
- isMonitoringInfo_Data()
-}
-
-type MonitoringInfo_MonitoringTableData struct {
- MonitoringTableData *MonitoringTableData `protobuf:"bytes,3,opt,name=monitoring_table_data,json=monitoringTableData,proto3,oneof"`
-}
-type MonitoringInfo_Metric struct {
- Metric *Metric `protobuf:"bytes,4,opt,name=metric,proto3,oneof"`
-}
-
-func (*MonitoringInfo_MonitoringTableData) isMonitoringInfo_Data() {}
-func (*MonitoringInfo_Metric) isMonitoringInfo_Data() {}
-
-func (m *MonitoringInfo) GetData() isMonitoringInfo_Data {
- if m != nil {
- return m.Data
- }
- return nil
-}
-
func (m *MonitoringInfo) GetUrn() string {
if m != nil {
return m.Urn
@@ -417,6 +409,29 @@
return ""
}
+type isMonitoringInfo_Data interface {
+ isMonitoringInfo_Data()
+}
+
+type MonitoringInfo_MonitoringTableData struct {
+ MonitoringTableData *MonitoringTableData `protobuf:"bytes,3,opt,name=monitoring_table_data,json=monitoringTableData,proto3,oneof"`
+}
+
+type MonitoringInfo_Metric struct {
+ Metric *Metric `protobuf:"bytes,4,opt,name=metric,proto3,oneof"`
+}
+
+func (*MonitoringInfo_MonitoringTableData) isMonitoringInfo_Data() {}
+
+func (*MonitoringInfo_Metric) isMonitoringInfo_Data() {}
+
+func (m *MonitoringInfo) GetData() isMonitoringInfo_Data {
+ if m != nil {
+ return m.Data
+ }
+ return nil
+}
+
func (m *MonitoringInfo) GetMonitoringTableData() *MonitoringTableData {
if x, ok := m.GetData().(*MonitoringInfo_MonitoringTableData); ok {
return x.MonitoringTableData
@@ -445,80 +460,14 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*MonitoringInfo) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _MonitoringInfo_OneofMarshaler, _MonitoringInfo_OneofUnmarshaler, _MonitoringInfo_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*MonitoringInfo) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*MonitoringInfo_MonitoringTableData)(nil),
(*MonitoringInfo_Metric)(nil),
}
}
-func _MonitoringInfo_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*MonitoringInfo)
- // data
- switch x := m.Data.(type) {
- case *MonitoringInfo_MonitoringTableData:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.MonitoringTableData); err != nil {
- return err
- }
- case *MonitoringInfo_Metric:
- b.EncodeVarint(4<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Metric); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("MonitoringInfo.Data has unexpected type %T", x)
- }
- return nil
-}
-
-func _MonitoringInfo_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*MonitoringInfo)
- switch tag {
- case 3: // data.monitoring_table_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(MonitoringTableData)
- err := b.DecodeMessage(msg)
- m.Data = &MonitoringInfo_MonitoringTableData{msg}
- return true, err
- case 4: // data.metric
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(Metric)
- err := b.DecodeMessage(msg)
- m.Data = &MonitoringInfo_Metric{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _MonitoringInfo_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*MonitoringInfo)
- // data
- switch x := m.Data.(type) {
- case *MonitoringInfo_MonitoringTableData:
- s := proto.Size(x.MonitoringTableData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *MonitoringInfo_Metric:
- s := proto.Size(x.Metric)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type MonitoringInfoTypeUrns struct {
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
@@ -529,16 +478,17 @@
func (m *MonitoringInfoTypeUrns) String() string { return proto.CompactTextString(m) }
func (*MonitoringInfoTypeUrns) ProtoMessage() {}
func (*MonitoringInfoTypeUrns) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{5}
+ return fileDescriptor_6039342a2ba47b72, []int{5}
}
+
func (m *MonitoringInfoTypeUrns) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringInfoTypeUrns.Unmarshal(m, b)
}
func (m *MonitoringInfoTypeUrns) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringInfoTypeUrns.Marshal(b, m, deterministic)
}
-func (dst *MonitoringInfoTypeUrns) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringInfoTypeUrns.Merge(dst, src)
+func (m *MonitoringInfoTypeUrns) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringInfoTypeUrns.Merge(m, src)
}
func (m *MonitoringInfoTypeUrns) XXX_Size() int {
return xxx_messageInfo_MonitoringInfoTypeUrns.Size(m)
@@ -566,16 +516,17 @@
func (m *Metric) String() string { return proto.CompactTextString(m) }
func (*Metric) ProtoMessage() {}
func (*Metric) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{6}
+ return fileDescriptor_6039342a2ba47b72, []int{6}
}
+
func (m *Metric) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Metric.Unmarshal(m, b)
}
func (m *Metric) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Metric.Marshal(b, m, deterministic)
}
-func (dst *Metric) XXX_Merge(src proto.Message) {
- xxx_messageInfo_Metric.Merge(dst, src)
+func (m *Metric) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_Metric.Merge(m, src)
}
func (m *Metric) XXX_Size() int {
return xxx_messageInfo_Metric.Size(m)
@@ -593,16 +544,20 @@
type Metric_CounterData struct {
CounterData *CounterData `protobuf:"bytes,1,opt,name=counter_data,json=counterData,proto3,oneof"`
}
+
type Metric_DistributionData struct {
DistributionData *DistributionData `protobuf:"bytes,2,opt,name=distribution_data,json=distributionData,proto3,oneof"`
}
+
type Metric_ExtremaData struct {
ExtremaData *ExtremaData `protobuf:"bytes,3,opt,name=extrema_data,json=extremaData,proto3,oneof"`
}
-func (*Metric_CounterData) isMetric_Data() {}
+func (*Metric_CounterData) isMetric_Data() {}
+
func (*Metric_DistributionData) isMetric_Data() {}
-func (*Metric_ExtremaData) isMetric_Data() {}
+
+func (*Metric_ExtremaData) isMetric_Data() {}
func (m *Metric) GetData() isMetric_Data {
if m != nil {
@@ -632,99 +587,15 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*Metric) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _Metric_OneofMarshaler, _Metric_OneofUnmarshaler, _Metric_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*Metric) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*Metric_CounterData)(nil),
(*Metric_DistributionData)(nil),
(*Metric_ExtremaData)(nil),
}
}
-func _Metric_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*Metric)
- // data
- switch x := m.Data.(type) {
- case *Metric_CounterData:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.CounterData); err != nil {
- return err
- }
- case *Metric_DistributionData:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.DistributionData); err != nil {
- return err
- }
- case *Metric_ExtremaData:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.ExtremaData); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("Metric.Data has unexpected type %T", x)
- }
- return nil
-}
-
-func _Metric_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*Metric)
- switch tag {
- case 1: // data.counter_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(CounterData)
- err := b.DecodeMessage(msg)
- m.Data = &Metric_CounterData{msg}
- return true, err
- case 2: // data.distribution_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(DistributionData)
- err := b.DecodeMessage(msg)
- m.Data = &Metric_DistributionData{msg}
- return true, err
- case 3: // data.extrema_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(ExtremaData)
- err := b.DecodeMessage(msg)
- m.Data = &Metric_ExtremaData{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _Metric_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*Metric)
- // data
- switch x := m.Data.(type) {
- case *Metric_CounterData:
- s := proto.Size(x.CounterData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Metric_DistributionData:
- s := proto.Size(x.DistributionData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *Metric_ExtremaData:
- s := proto.Size(x.ExtremaData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// Data associated with a Counter or Gauge metric.
// This is designed to be compatible with metric collection
// systems such as DropWizard.
@@ -743,16 +614,17 @@
func (m *CounterData) String() string { return proto.CompactTextString(m) }
func (*CounterData) ProtoMessage() {}
func (*CounterData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{7}
+ return fileDescriptor_6039342a2ba47b72, []int{7}
}
+
func (m *CounterData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CounterData.Unmarshal(m, b)
}
func (m *CounterData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CounterData.Marshal(b, m, deterministic)
}
-func (dst *CounterData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_CounterData.Merge(dst, src)
+func (m *CounterData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_CounterData.Merge(m, src)
}
func (m *CounterData) XXX_Size() int {
return xxx_messageInfo_CounterData.Size(m)
@@ -770,15 +642,19 @@
type CounterData_Int64Value struct {
Int64Value int64 `protobuf:"varint,1,opt,name=int64_value,json=int64Value,proto3,oneof"`
}
+
type CounterData_DoubleValue struct {
DoubleValue float64 `protobuf:"fixed64,2,opt,name=double_value,json=doubleValue,proto3,oneof"`
}
+
type CounterData_StringValue struct {
StringValue string `protobuf:"bytes,3,opt,name=string_value,json=stringValue,proto3,oneof"`
}
-func (*CounterData_Int64Value) isCounterData_Value() {}
+func (*CounterData_Int64Value) isCounterData_Value() {}
+
func (*CounterData_DoubleValue) isCounterData_Value() {}
+
func (*CounterData_StringValue) isCounterData_Value() {}
func (m *CounterData) GetValue() isCounterData_Value {
@@ -809,85 +685,15 @@
return ""
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*CounterData) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _CounterData_OneofMarshaler, _CounterData_OneofUnmarshaler, _CounterData_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*CounterData) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*CounterData_Int64Value)(nil),
(*CounterData_DoubleValue)(nil),
(*CounterData_StringValue)(nil),
}
}
-func _CounterData_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*CounterData)
- // value
- switch x := m.Value.(type) {
- case *CounterData_Int64Value:
- b.EncodeVarint(1<<3 | proto.WireVarint)
- b.EncodeVarint(uint64(x.Int64Value))
- case *CounterData_DoubleValue:
- b.EncodeVarint(2<<3 | proto.WireFixed64)
- b.EncodeFixed64(math.Float64bits(x.DoubleValue))
- case *CounterData_StringValue:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- b.EncodeStringBytes(x.StringValue)
- case nil:
- default:
- return fmt.Errorf("CounterData.Value has unexpected type %T", x)
- }
- return nil
-}
-
-func _CounterData_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*CounterData)
- switch tag {
- case 1: // value.int64_value
- if wire != proto.WireVarint {
- return true, proto.ErrInternalBadWireType
- }
- x, err := b.DecodeVarint()
- m.Value = &CounterData_Int64Value{int64(x)}
- return true, err
- case 2: // value.double_value
- if wire != proto.WireFixed64 {
- return true, proto.ErrInternalBadWireType
- }
- x, err := b.DecodeFixed64()
- m.Value = &CounterData_DoubleValue{math.Float64frombits(x)}
- return true, err
- case 3: // value.string_value
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- x, err := b.DecodeStringBytes()
- m.Value = &CounterData_StringValue{x}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _CounterData_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*CounterData)
- // value
- switch x := m.Value.(type) {
- case *CounterData_Int64Value:
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(x.Int64Value))
- case *CounterData_DoubleValue:
- n += 1 // tag and wire
- n += 8
- case *CounterData_StringValue:
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(len(x.StringValue)))
- n += len(x.StringValue)
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
// Extrema messages are used for calculating
// Top-N/Bottom-N metrics.
type ExtremaData struct {
@@ -904,16 +710,17 @@
func (m *ExtremaData) String() string { return proto.CompactTextString(m) }
func (*ExtremaData) ProtoMessage() {}
func (*ExtremaData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{8}
+ return fileDescriptor_6039342a2ba47b72, []int{8}
}
+
func (m *ExtremaData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_ExtremaData.Unmarshal(m, b)
}
func (m *ExtremaData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_ExtremaData.Marshal(b, m, deterministic)
}
-func (dst *ExtremaData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_ExtremaData.Merge(dst, src)
+func (m *ExtremaData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_ExtremaData.Merge(m, src)
}
func (m *ExtremaData) XXX_Size() int {
return xxx_messageInfo_ExtremaData.Size(m)
@@ -931,11 +738,13 @@
type ExtremaData_IntExtremaData struct {
IntExtremaData *IntExtremaData `protobuf:"bytes,1,opt,name=int_extrema_data,json=intExtremaData,proto3,oneof"`
}
+
type ExtremaData_DoubleExtremaData struct {
DoubleExtremaData *DoubleExtremaData `protobuf:"bytes,2,opt,name=double_extrema_data,json=doubleExtremaData,proto3,oneof"`
}
-func (*ExtremaData_IntExtremaData) isExtremaData_Extrema() {}
+func (*ExtremaData_IntExtremaData) isExtremaData_Extrema() {}
+
func (*ExtremaData_DoubleExtremaData) isExtremaData_Extrema() {}
func (m *ExtremaData) GetExtrema() isExtremaData_Extrema {
@@ -959,80 +768,14 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*ExtremaData) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _ExtremaData_OneofMarshaler, _ExtremaData_OneofUnmarshaler, _ExtremaData_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*ExtremaData) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*ExtremaData_IntExtremaData)(nil),
(*ExtremaData_DoubleExtremaData)(nil),
}
}
-func _ExtremaData_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*ExtremaData)
- // extrema
- switch x := m.Extrema.(type) {
- case *ExtremaData_IntExtremaData:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.IntExtremaData); err != nil {
- return err
- }
- case *ExtremaData_DoubleExtremaData:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.DoubleExtremaData); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("ExtremaData.Extrema has unexpected type %T", x)
- }
- return nil
-}
-
-func _ExtremaData_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*ExtremaData)
- switch tag {
- case 1: // extrema.int_extrema_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(IntExtremaData)
- err := b.DecodeMessage(msg)
- m.Extrema = &ExtremaData_IntExtremaData{msg}
- return true, err
- case 2: // extrema.double_extrema_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(DoubleExtremaData)
- err := b.DecodeMessage(msg)
- m.Extrema = &ExtremaData_DoubleExtremaData{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _ExtremaData_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*ExtremaData)
- // extrema
- switch x := m.Extrema.(type) {
- case *ExtremaData_IntExtremaData:
- s := proto.Size(x.IntExtremaData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *ExtremaData_DoubleExtremaData:
- s := proto.Size(x.DoubleExtremaData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type IntExtremaData struct {
IntValues []int64 `protobuf:"varint,1,rep,packed,name=int_values,json=intValues,proto3" json:"int_values,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
@@ -1044,16 +787,17 @@
func (m *IntExtremaData) String() string { return proto.CompactTextString(m) }
func (*IntExtremaData) ProtoMessage() {}
func (*IntExtremaData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{9}
+ return fileDescriptor_6039342a2ba47b72, []int{9}
}
+
func (m *IntExtremaData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_IntExtremaData.Unmarshal(m, b)
}
func (m *IntExtremaData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_IntExtremaData.Marshal(b, m, deterministic)
}
-func (dst *IntExtremaData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_IntExtremaData.Merge(dst, src)
+func (m *IntExtremaData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_IntExtremaData.Merge(m, src)
}
func (m *IntExtremaData) XXX_Size() int {
return xxx_messageInfo_IntExtremaData.Size(m)
@@ -1082,16 +826,17 @@
func (m *DoubleExtremaData) String() string { return proto.CompactTextString(m) }
func (*DoubleExtremaData) ProtoMessage() {}
func (*DoubleExtremaData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{10}
+ return fileDescriptor_6039342a2ba47b72, []int{10}
}
+
func (m *DoubleExtremaData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DoubleExtremaData.Unmarshal(m, b)
}
func (m *DoubleExtremaData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DoubleExtremaData.Marshal(b, m, deterministic)
}
-func (dst *DoubleExtremaData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DoubleExtremaData.Merge(dst, src)
+func (m *DoubleExtremaData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DoubleExtremaData.Merge(m, src)
}
func (m *DoubleExtremaData) XXX_Size() int {
return xxx_messageInfo_DoubleExtremaData.Size(m)
@@ -1127,16 +872,17 @@
func (m *DistributionData) String() string { return proto.CompactTextString(m) }
func (*DistributionData) ProtoMessage() {}
func (*DistributionData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{11}
+ return fileDescriptor_6039342a2ba47b72, []int{11}
}
+
func (m *DistributionData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DistributionData.Unmarshal(m, b)
}
func (m *DistributionData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DistributionData.Marshal(b, m, deterministic)
}
-func (dst *DistributionData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DistributionData.Merge(dst, src)
+func (m *DistributionData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DistributionData.Merge(m, src)
}
func (m *DistributionData) XXX_Size() int {
return xxx_messageInfo_DistributionData.Size(m)
@@ -1154,11 +900,13 @@
type DistributionData_IntDistributionData struct {
IntDistributionData *IntDistributionData `protobuf:"bytes,1,opt,name=int_distribution_data,json=intDistributionData,proto3,oneof"`
}
+
type DistributionData_DoubleDistributionData struct {
DoubleDistributionData *DoubleDistributionData `protobuf:"bytes,2,opt,name=double_distribution_data,json=doubleDistributionData,proto3,oneof"`
}
-func (*DistributionData_IntDistributionData) isDistributionData_Distribution() {}
+func (*DistributionData_IntDistributionData) isDistributionData_Distribution() {}
+
func (*DistributionData_DoubleDistributionData) isDistributionData_Distribution() {}
func (m *DistributionData) GetDistribution() isDistributionData_Distribution {
@@ -1182,80 +930,14 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*DistributionData) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _DistributionData_OneofMarshaler, _DistributionData_OneofUnmarshaler, _DistributionData_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*DistributionData) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*DistributionData_IntDistributionData)(nil),
(*DistributionData_DoubleDistributionData)(nil),
}
}
-func _DistributionData_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*DistributionData)
- // distribution
- switch x := m.Distribution.(type) {
- case *DistributionData_IntDistributionData:
- b.EncodeVarint(1<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.IntDistributionData); err != nil {
- return err
- }
- case *DistributionData_DoubleDistributionData:
- b.EncodeVarint(2<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.DoubleDistributionData); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("DistributionData.Distribution has unexpected type %T", x)
- }
- return nil
-}
-
-func _DistributionData_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*DistributionData)
- switch tag {
- case 1: // distribution.int_distribution_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(IntDistributionData)
- err := b.DecodeMessage(msg)
- m.Distribution = &DistributionData_IntDistributionData{msg}
- return true, err
- case 2: // distribution.double_distribution_data
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(DoubleDistributionData)
- err := b.DecodeMessage(msg)
- m.Distribution = &DistributionData_DoubleDistributionData{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _DistributionData_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*DistributionData)
- // distribution
- switch x := m.Distribution.(type) {
- case *DistributionData_IntDistributionData:
- s := proto.Size(x.IntDistributionData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case *DistributionData_DoubleDistributionData:
- s := proto.Size(x.DoubleDistributionData)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type IntDistributionData struct {
Count int64 `protobuf:"varint,1,opt,name=count,proto3" json:"count,omitempty"`
Sum int64 `protobuf:"varint,2,opt,name=sum,proto3" json:"sum,omitempty"`
@@ -1270,16 +952,17 @@
func (m *IntDistributionData) String() string { return proto.CompactTextString(m) }
func (*IntDistributionData) ProtoMessage() {}
func (*IntDistributionData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{12}
+ return fileDescriptor_6039342a2ba47b72, []int{12}
}
+
func (m *IntDistributionData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_IntDistributionData.Unmarshal(m, b)
}
func (m *IntDistributionData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_IntDistributionData.Marshal(b, m, deterministic)
}
-func (dst *IntDistributionData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_IntDistributionData.Merge(dst, src)
+func (m *IntDistributionData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_IntDistributionData.Merge(m, src)
}
func (m *IntDistributionData) XXX_Size() int {
return xxx_messageInfo_IntDistributionData.Size(m)
@@ -1332,16 +1015,17 @@
func (m *DoubleDistributionData) String() string { return proto.CompactTextString(m) }
func (*DoubleDistributionData) ProtoMessage() {}
func (*DoubleDistributionData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{13}
+ return fileDescriptor_6039342a2ba47b72, []int{13}
}
+
func (m *DoubleDistributionData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_DoubleDistributionData.Unmarshal(m, b)
}
func (m *DoubleDistributionData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_DoubleDistributionData.Marshal(b, m, deterministic)
}
-func (dst *DoubleDistributionData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_DoubleDistributionData.Merge(dst, src)
+func (m *DoubleDistributionData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_DoubleDistributionData.Merge(m, src)
}
func (m *DoubleDistributionData) XXX_Size() int {
return xxx_messageInfo_DoubleDistributionData.Size(m)
@@ -1414,16 +1098,17 @@
func (m *MonitoringTableData) String() string { return proto.CompactTextString(m) }
func (*MonitoringTableData) ProtoMessage() {}
func (*MonitoringTableData) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{14}
+ return fileDescriptor_6039342a2ba47b72, []int{14}
}
+
func (m *MonitoringTableData) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringTableData.Unmarshal(m, b)
}
func (m *MonitoringTableData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringTableData.Marshal(b, m, deterministic)
}
-func (dst *MonitoringTableData) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringTableData.Merge(dst, src)
+func (m *MonitoringTableData) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringTableData.Merge(m, src)
}
func (m *MonitoringTableData) XXX_Size() int {
return xxx_messageInfo_MonitoringTableData.Size(m)
@@ -1466,16 +1151,17 @@
func (m *MonitoringTableData_MonitoringColumnValue) String() string { return proto.CompactTextString(m) }
func (*MonitoringTableData_MonitoringColumnValue) ProtoMessage() {}
func (*MonitoringTableData_MonitoringColumnValue) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{14, 0}
+ return fileDescriptor_6039342a2ba47b72, []int{14, 0}
}
+
func (m *MonitoringTableData_MonitoringColumnValue) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringTableData_MonitoringColumnValue.Unmarshal(m, b)
}
func (m *MonitoringTableData_MonitoringColumnValue) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringTableData_MonitoringColumnValue.Marshal(b, m, deterministic)
}
-func (dst *MonitoringTableData_MonitoringColumnValue) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringTableData_MonitoringColumnValue.Merge(dst, src)
+func (m *MonitoringTableData_MonitoringColumnValue) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringTableData_MonitoringColumnValue.Merge(m, src)
}
func (m *MonitoringTableData_MonitoringColumnValue) XXX_Size() int {
return xxx_messageInfo_MonitoringTableData_MonitoringColumnValue.Size(m)
@@ -1493,22 +1179,28 @@
type MonitoringTableData_MonitoringColumnValue_Int64Value struct {
Int64Value int64 `protobuf:"varint,1,opt,name=int64_value,json=int64Value,proto3,oneof"`
}
+
type MonitoringTableData_MonitoringColumnValue_DoubleValue struct {
DoubleValue float64 `protobuf:"fixed64,2,opt,name=double_value,json=doubleValue,proto3,oneof"`
}
+
type MonitoringTableData_MonitoringColumnValue_StringValue struct {
StringValue string `protobuf:"bytes,3,opt,name=string_value,json=stringValue,proto3,oneof"`
}
+
type MonitoringTableData_MonitoringColumnValue_Timestamp struct {
Timestamp *timestamp.Timestamp `protobuf:"bytes,4,opt,name=timestamp,proto3,oneof"`
}
func (*MonitoringTableData_MonitoringColumnValue_Int64Value) isMonitoringTableData_MonitoringColumnValue_Value() {
}
+
func (*MonitoringTableData_MonitoringColumnValue_DoubleValue) isMonitoringTableData_MonitoringColumnValue_Value() {
}
+
func (*MonitoringTableData_MonitoringColumnValue_StringValue) isMonitoringTableData_MonitoringColumnValue_Value() {
}
+
func (*MonitoringTableData_MonitoringColumnValue_Timestamp) isMonitoringTableData_MonitoringColumnValue_Value() {
}
@@ -1547,9 +1239,9 @@
return nil
}
-// XXX_OneofFuncs is for the internal use of the proto package.
-func (*MonitoringTableData_MonitoringColumnValue) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) {
- return _MonitoringTableData_MonitoringColumnValue_OneofMarshaler, _MonitoringTableData_MonitoringColumnValue_OneofUnmarshaler, _MonitoringTableData_MonitoringColumnValue_OneofSizer, []interface{}{
+// XXX_OneofWrappers is for the internal use of the proto package.
+func (*MonitoringTableData_MonitoringColumnValue) XXX_OneofWrappers() []interface{} {
+ return []interface{}{
(*MonitoringTableData_MonitoringColumnValue_Int64Value)(nil),
(*MonitoringTableData_MonitoringColumnValue_DoubleValue)(nil),
(*MonitoringTableData_MonitoringColumnValue_StringValue)(nil),
@@ -1557,94 +1249,6 @@
}
}
-func _MonitoringTableData_MonitoringColumnValue_OneofMarshaler(msg proto.Message, b *proto.Buffer) error {
- m := msg.(*MonitoringTableData_MonitoringColumnValue)
- // value
- switch x := m.Value.(type) {
- case *MonitoringTableData_MonitoringColumnValue_Int64Value:
- b.EncodeVarint(1<<3 | proto.WireVarint)
- b.EncodeVarint(uint64(x.Int64Value))
- case *MonitoringTableData_MonitoringColumnValue_DoubleValue:
- b.EncodeVarint(2<<3 | proto.WireFixed64)
- b.EncodeFixed64(math.Float64bits(x.DoubleValue))
- case *MonitoringTableData_MonitoringColumnValue_StringValue:
- b.EncodeVarint(3<<3 | proto.WireBytes)
- b.EncodeStringBytes(x.StringValue)
- case *MonitoringTableData_MonitoringColumnValue_Timestamp:
- b.EncodeVarint(4<<3 | proto.WireBytes)
- if err := b.EncodeMessage(x.Timestamp); err != nil {
- return err
- }
- case nil:
- default:
- return fmt.Errorf("MonitoringTableData_MonitoringColumnValue.Value has unexpected type %T", x)
- }
- return nil
-}
-
-func _MonitoringTableData_MonitoringColumnValue_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) {
- m := msg.(*MonitoringTableData_MonitoringColumnValue)
- switch tag {
- case 1: // value.int64_value
- if wire != proto.WireVarint {
- return true, proto.ErrInternalBadWireType
- }
- x, err := b.DecodeVarint()
- m.Value = &MonitoringTableData_MonitoringColumnValue_Int64Value{int64(x)}
- return true, err
- case 2: // value.double_value
- if wire != proto.WireFixed64 {
- return true, proto.ErrInternalBadWireType
- }
- x, err := b.DecodeFixed64()
- m.Value = &MonitoringTableData_MonitoringColumnValue_DoubleValue{math.Float64frombits(x)}
- return true, err
- case 3: // value.string_value
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- x, err := b.DecodeStringBytes()
- m.Value = &MonitoringTableData_MonitoringColumnValue_StringValue{x}
- return true, err
- case 4: // value.timestamp
- if wire != proto.WireBytes {
- return true, proto.ErrInternalBadWireType
- }
- msg := new(timestamp.Timestamp)
- err := b.DecodeMessage(msg)
- m.Value = &MonitoringTableData_MonitoringColumnValue_Timestamp{msg}
- return true, err
- default:
- return false, nil
- }
-}
-
-func _MonitoringTableData_MonitoringColumnValue_OneofSizer(msg proto.Message) (n int) {
- m := msg.(*MonitoringTableData_MonitoringColumnValue)
- // value
- switch x := m.Value.(type) {
- case *MonitoringTableData_MonitoringColumnValue_Int64Value:
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(x.Int64Value))
- case *MonitoringTableData_MonitoringColumnValue_DoubleValue:
- n += 1 // tag and wire
- n += 8
- case *MonitoringTableData_MonitoringColumnValue_StringValue:
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(len(x.StringValue)))
- n += len(x.StringValue)
- case *MonitoringTableData_MonitoringColumnValue_Timestamp:
- s := proto.Size(x.Timestamp)
- n += 1 // tag and wire
- n += proto.SizeVarint(uint64(s))
- n += s
- case nil:
- default:
- panic(fmt.Sprintf("proto: unexpected type %T in oneof", x))
- }
- return n
-}
-
type MonitoringTableData_MonitoringRow struct {
Values []*MonitoringTableData_MonitoringColumnValue `protobuf:"bytes,1,rep,name=values,proto3" json:"values,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
@@ -1656,16 +1260,17 @@
func (m *MonitoringTableData_MonitoringRow) String() string { return proto.CompactTextString(m) }
func (*MonitoringTableData_MonitoringRow) ProtoMessage() {}
func (*MonitoringTableData_MonitoringRow) Descriptor() ([]byte, []int) {
- return fileDescriptor_metrics_f27e09e153e79ab4, []int{14, 1}
+ return fileDescriptor_6039342a2ba47b72, []int{14, 1}
}
+
func (m *MonitoringTableData_MonitoringRow) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MonitoringTableData_MonitoringRow.Unmarshal(m, b)
}
func (m *MonitoringTableData_MonitoringRow) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MonitoringTableData_MonitoringRow.Marshal(b, m, deterministic)
}
-func (dst *MonitoringTableData_MonitoringRow) XXX_Merge(src proto.Message) {
- xxx_messageInfo_MonitoringTableData_MonitoringRow.Merge(dst, src)
+func (m *MonitoringTableData_MonitoringRow) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_MonitoringTableData_MonitoringRow.Merge(m, src)
}
func (m *MonitoringTableData_MonitoringRow) XXX_Size() int {
return xxx_messageInfo_MonitoringTableData_MonitoringRow.Size(m)
@@ -1688,7 +1293,7 @@
ExtensionType: (*MonitoringInfoLabelProps)(nil),
Field: 127337796,
Name: "org.apache.beam.model.pipeline.v1.label_props",
- Tag: "bytes,127337796,opt,name=label_props,json=labelProps",
+ Tag: "bytes,127337796,opt,name=label_props",
Filename: "metrics.proto",
}
@@ -1697,11 +1302,14 @@
ExtensionType: (*MonitoringInfoSpec)(nil),
Field: 207174266,
Name: "org.apache.beam.model.pipeline.v1.monitoring_info_spec",
- Tag: "bytes,207174266,opt,name=monitoring_info_spec,json=monitoringInfoSpec",
+ Tag: "bytes,207174266,opt,name=monitoring_info_spec",
Filename: "metrics.proto",
}
func init() {
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MonitoringInfoSpecs_Enum", MonitoringInfoSpecs_Enum_name, MonitoringInfoSpecs_Enum_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MonitoringInfo_MonitoringInfoLabels", MonitoringInfo_MonitoringInfoLabels_name, MonitoringInfo_MonitoringInfoLabels_value)
+ proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MonitoringInfoTypeUrns_Enum", MonitoringInfoTypeUrns_Enum_name, MonitoringInfoTypeUrns_Enum_value)
proto.RegisterType((*MonitoringInfoSpec)(nil), "org.apache.beam.model.pipeline.v1.MonitoringInfoSpec")
proto.RegisterType((*Annotation)(nil), "org.apache.beam.model.pipeline.v1.Annotation")
proto.RegisterType((*MonitoringInfoSpecs)(nil), "org.apache.beam.model.pipeline.v1.MonitoringInfoSpecs")
@@ -1720,16 +1328,13 @@
proto.RegisterType((*MonitoringTableData)(nil), "org.apache.beam.model.pipeline.v1.MonitoringTableData")
proto.RegisterType((*MonitoringTableData_MonitoringColumnValue)(nil), "org.apache.beam.model.pipeline.v1.MonitoringTableData.MonitoringColumnValue")
proto.RegisterType((*MonitoringTableData_MonitoringRow)(nil), "org.apache.beam.model.pipeline.v1.MonitoringTableData.MonitoringRow")
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MonitoringInfoSpecs_Enum", MonitoringInfoSpecs_Enum_name, MonitoringInfoSpecs_Enum_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MonitoringInfo_MonitoringInfoLabels", MonitoringInfo_MonitoringInfoLabels_name, MonitoringInfo_MonitoringInfoLabels_value)
- proto.RegisterEnum("org.apache.beam.model.pipeline.v1.MonitoringInfoTypeUrns_Enum", MonitoringInfoTypeUrns_Enum_name, MonitoringInfoTypeUrns_Enum_value)
proto.RegisterExtension(E_LabelProps)
proto.RegisterExtension(E_MonitoringInfoSpec)
}
-func init() { proto.RegisterFile("metrics.proto", fileDescriptor_metrics_f27e09e153e79ab4) }
+func init() { proto.RegisterFile("metrics.proto", fileDescriptor_6039342a2ba47b72) }
-var fileDescriptor_metrics_f27e09e153e79ab4 = []byte{
+var fileDescriptor_6039342a2ba47b72 = []byte{
// 1897 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x58, 0xdd, 0x6f, 0x23, 0x57,
0x15, 0xcf, 0xb5, 0x1d, 0xa7, 0x39, 0x4e, 0x82, 0x73, 0xb3, 0xbb, 0x75, 0x47, 0xac, 0xb8, 0xeb,
diff --git a/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go
index 993b7ec..0dcdba8 100644
--- a/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go
+++ b/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go
@@ -3,11 +3,13 @@
package pipeline_v1
-import proto "github.com/golang/protobuf/proto"
-import fmt "fmt"
-import math "math"
-import duration "github.com/golang/protobuf/ptypes/duration"
-import timestamp "github.com/golang/protobuf/ptypes/timestamp"
+import (
+ fmt "fmt"
+ proto "github.com/golang/protobuf/proto"
+ duration "github.com/golang/protobuf/ptypes/duration"
+ timestamp "github.com/golang/protobuf/ptypes/timestamp"
+ math "math"
+)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
@@ -18,7 +20,7 @@
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type GlobalWindowsPayload_Enum int32
@@ -30,6 +32,7 @@
var GlobalWindowsPayload_Enum_name = map[int32]string{
0: "PROPERTIES",
}
+
var GlobalWindowsPayload_Enum_value = map[string]int32{
"PROPERTIES": 0,
}
@@ -37,8 +40,9 @@
func (x GlobalWindowsPayload_Enum) String() string {
return proto.EnumName(GlobalWindowsPayload_Enum_name, int32(x))
}
+
func (GlobalWindowsPayload_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{0, 0}
+ return fileDescriptor_fab9dd76b0d0d680, []int{0, 0}
}
type FixedWindowsPayload_Enum int32
@@ -51,6 +55,7 @@
var FixedWindowsPayload_Enum_name = map[int32]string{
0: "PROPERTIES",
}
+
var FixedWindowsPayload_Enum_value = map[string]int32{
"PROPERTIES": 0,
}
@@ -58,8 +63,9 @@
func (x FixedWindowsPayload_Enum) String() string {
return proto.EnumName(FixedWindowsPayload_Enum_name, int32(x))
}
+
func (FixedWindowsPayload_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{1, 0}
+ return fileDescriptor_fab9dd76b0d0d680, []int{1, 0}
}
type SlidingWindowsPayload_Enum int32
@@ -72,6 +78,7 @@
var SlidingWindowsPayload_Enum_name = map[int32]string{
0: "PROPERTIES",
}
+
var SlidingWindowsPayload_Enum_value = map[string]int32{
"PROPERTIES": 0,
}
@@ -79,8 +86,9 @@
func (x SlidingWindowsPayload_Enum) String() string {
return proto.EnumName(SlidingWindowsPayload_Enum_name, int32(x))
}
+
func (SlidingWindowsPayload_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{2, 0}
+ return fileDescriptor_fab9dd76b0d0d680, []int{2, 0}
}
type SessionsPayload_Enum int32
@@ -93,6 +101,7 @@
var SessionsPayload_Enum_name = map[int32]string{
0: "PROPERTIES",
}
+
var SessionsPayload_Enum_value = map[string]int32{
"PROPERTIES": 0,
}
@@ -100,8 +109,9 @@
func (x SessionsPayload_Enum) String() string {
return proto.EnumName(SessionsPayload_Enum_name, int32(x))
}
+
func (SessionsPayload_Enum) EnumDescriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{3, 0}
+ return fileDescriptor_fab9dd76b0d0d680, []int{3, 0}
}
type GlobalWindowsPayload struct {
@@ -114,16 +124,17 @@
func (m *GlobalWindowsPayload) String() string { return proto.CompactTextString(m) }
func (*GlobalWindowsPayload) ProtoMessage() {}
func (*GlobalWindowsPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{0}
+ return fileDescriptor_fab9dd76b0d0d680, []int{0}
}
+
func (m *GlobalWindowsPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GlobalWindowsPayload.Unmarshal(m, b)
}
func (m *GlobalWindowsPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GlobalWindowsPayload.Marshal(b, m, deterministic)
}
-func (dst *GlobalWindowsPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_GlobalWindowsPayload.Merge(dst, src)
+func (m *GlobalWindowsPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_GlobalWindowsPayload.Merge(m, src)
}
func (m *GlobalWindowsPayload) XXX_Size() int {
return xxx_messageInfo_GlobalWindowsPayload.Size(m)
@@ -146,16 +157,17 @@
func (m *FixedWindowsPayload) String() string { return proto.CompactTextString(m) }
func (*FixedWindowsPayload) ProtoMessage() {}
func (*FixedWindowsPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{1}
+ return fileDescriptor_fab9dd76b0d0d680, []int{1}
}
+
func (m *FixedWindowsPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FixedWindowsPayload.Unmarshal(m, b)
}
func (m *FixedWindowsPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FixedWindowsPayload.Marshal(b, m, deterministic)
}
-func (dst *FixedWindowsPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_FixedWindowsPayload.Merge(dst, src)
+func (m *FixedWindowsPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_FixedWindowsPayload.Merge(m, src)
}
func (m *FixedWindowsPayload) XXX_Size() int {
return xxx_messageInfo_FixedWindowsPayload.Size(m)
@@ -193,16 +205,17 @@
func (m *SlidingWindowsPayload) String() string { return proto.CompactTextString(m) }
func (*SlidingWindowsPayload) ProtoMessage() {}
func (*SlidingWindowsPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{2}
+ return fileDescriptor_fab9dd76b0d0d680, []int{2}
}
+
func (m *SlidingWindowsPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_SlidingWindowsPayload.Unmarshal(m, b)
}
func (m *SlidingWindowsPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_SlidingWindowsPayload.Marshal(b, m, deterministic)
}
-func (dst *SlidingWindowsPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_SlidingWindowsPayload.Merge(dst, src)
+func (m *SlidingWindowsPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_SlidingWindowsPayload.Merge(m, src)
}
func (m *SlidingWindowsPayload) XXX_Size() int {
return xxx_messageInfo_SlidingWindowsPayload.Size(m)
@@ -245,16 +258,17 @@
func (m *SessionsPayload) String() string { return proto.CompactTextString(m) }
func (*SessionsPayload) ProtoMessage() {}
func (*SessionsPayload) Descriptor() ([]byte, []int) {
- return fileDescriptor_standard_window_fns_fd35e1520ea21389, []int{3}
+ return fileDescriptor_fab9dd76b0d0d680, []int{3}
}
+
func (m *SessionsPayload) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_SessionsPayload.Unmarshal(m, b)
}
func (m *SessionsPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_SessionsPayload.Marshal(b, m, deterministic)
}
-func (dst *SessionsPayload) XXX_Merge(src proto.Message) {
- xxx_messageInfo_SessionsPayload.Merge(dst, src)
+func (m *SessionsPayload) XXX_Merge(src proto.Message) {
+ xxx_messageInfo_SessionsPayload.Merge(m, src)
}
func (m *SessionsPayload) XXX_Size() int {
return xxx_messageInfo_SessionsPayload.Size(m)
@@ -273,21 +287,19 @@
}
func init() {
- proto.RegisterType((*GlobalWindowsPayload)(nil), "org.apache.beam.model.pipeline.v1.GlobalWindowsPayload")
- proto.RegisterType((*FixedWindowsPayload)(nil), "org.apache.beam.model.pipeline.v1.FixedWindowsPayload")
- proto.RegisterType((*SlidingWindowsPayload)(nil), "org.apache.beam.model.pipeline.v1.SlidingWindowsPayload")
- proto.RegisterType((*SessionsPayload)(nil), "org.apache.beam.model.pipeline.v1.SessionsPayload")
proto.RegisterEnum("org.apache.beam.model.pipeline.v1.GlobalWindowsPayload_Enum", GlobalWindowsPayload_Enum_name, GlobalWindowsPayload_Enum_value)
proto.RegisterEnum("org.apache.beam.model.pipeline.v1.FixedWindowsPayload_Enum", FixedWindowsPayload_Enum_name, FixedWindowsPayload_Enum_value)
proto.RegisterEnum("org.apache.beam.model.pipeline.v1.SlidingWindowsPayload_Enum", SlidingWindowsPayload_Enum_name, SlidingWindowsPayload_Enum_value)
proto.RegisterEnum("org.apache.beam.model.pipeline.v1.SessionsPayload_Enum", SessionsPayload_Enum_name, SessionsPayload_Enum_value)
+ proto.RegisterType((*GlobalWindowsPayload)(nil), "org.apache.beam.model.pipeline.v1.GlobalWindowsPayload")
+ proto.RegisterType((*FixedWindowsPayload)(nil), "org.apache.beam.model.pipeline.v1.FixedWindowsPayload")
+ proto.RegisterType((*SlidingWindowsPayload)(nil), "org.apache.beam.model.pipeline.v1.SlidingWindowsPayload")
+ proto.RegisterType((*SessionsPayload)(nil), "org.apache.beam.model.pipeline.v1.SessionsPayload")
}
-func init() {
- proto.RegisterFile("standard_window_fns.proto", fileDescriptor_standard_window_fns_fd35e1520ea21389)
-}
+func init() { proto.RegisterFile("standard_window_fns.proto", fileDescriptor_fab9dd76b0d0d680) }
-var fileDescriptor_standard_window_fns_fd35e1520ea21389 = []byte{
+var fileDescriptor_fab9dd76b0d0d680 = []byte{
// 407 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x93, 0x31, 0x4f, 0xdb, 0x40,
0x14, 0xc7, 0xeb, 0x36, 0x4d, 0xab, 0xcb, 0xd0, 0xd6, 0x6d, 0xa4, 0xc4, 0x43, 0x9b, 0x78, 0x68,
diff --git a/sdks/java/container/boot.go b/sdks/java/container/boot.go
index 67cd387..91e588f 100644
--- a/sdks/java/container/boot.go
+++ b/sdks/java/container/boot.go
@@ -97,6 +97,10 @@
os.Setenv("LOGGING_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(&pb.ApiServiceDescriptor{Url: *loggingEndpoint}))
os.Setenv("CONTROL_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(&pb.ApiServiceDescriptor{Url: *controlEndpoint}))
+ if info.GetStatusEndpoint() != nil {
+ os.Setenv("STATUS_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(info.GetStatusEndpoint()))
+ }
+
const jarsDir = "/opt/apache/beam/jars"
cp := []string{
filepath.Join(jarsDir, "slf4j-api.jar"),
diff --git a/sdks/java/core/build.gradle b/sdks/java/core/build.gradle
index a7ed6c2..a14305d 100644
--- a/sdks/java/core/build.gradle
+++ b/sdks/java/core/build.gradle
@@ -69,6 +69,7 @@
compile library.java.protobuf_java
compile library.java.commons_compress
compile library.java.commons_lang3
+ shadow library.java.jsr305
shadow library.java.jackson_core
shadow library.java.jackson_annotations
shadow library.java.jackson_databind
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileIO.java
index 3785911..78f7559 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileIO.java
@@ -135,9 +135,15 @@
* .apply(FileIO.readMatches().withCompression(GZIP))
* .apply(MapElements
* // uses imports from TypeDescriptors
- * .into(KVs(strings(), strings()))
- * .via((ReadableFile f) -> KV.of(
- * f.getMetadata().resourceId().toString(), f.readFullyAsUTF8String())));
+ * .into(kvs(strings(), strings()))
+ * .via((ReadableFile f) -> {
+ * try {
+ * return KV.of(
+ * f.getMetadata().resourceId().toString(), f.readFullyAsUTF8String());
+ * } catch (IOException ex) {
+ * throw new RuntimeException("Failed to read the file", ex);
+ * }
+ * }));
* }</pre>
*
* <h2>Writing files</h2>
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/ExperimentalOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/ExperimentalOptions.java
index b9825ca..017b0d4 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/ExperimentalOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/ExperimentalOptions.java
@@ -30,6 +30,9 @@
@Experimental
@Hidden
public interface ExperimentalOptions extends PipelineOptions {
+
+ String STATE_CACHE_SIZE = "state_cache_size";
+
@Description(
"[Experimental] Apache Beam provides a number of experimental features that can "
+ "be enabled with this flag. If executing against a managed service, please contact the "
@@ -60,4 +63,22 @@
}
options.setExperiments(experiments);
}
+
+ /** Return the value for the specified experiment or null if not present. */
+ static String getExperimentValue(PipelineOptions options, String experiment) {
+ if (options == null) {
+ return null;
+ }
+ List<String> experiments = options.as(ExperimentalOptions.class).getExperiments();
+ if (experiments == null) {
+ return null;
+ }
+ for (String experimentEntry : experiments) {
+ String[] tokens = experimentEntry.split(experiment + "=", -1);
+ if (tokens.length > 1) {
+ return tokens[1];
+ }
+ }
+ return null;
+ }
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java
index a531e86..b67a0b8 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java
@@ -88,6 +88,13 @@
void setEnvironmentExpirationMillis(int environmentExpirationMillis);
+ @Description(
+ "Specifies if bundles should be distributed to the next available free SDK worker. By default SDK workers are pinned to runner tasks for the duration of the pipeline. This option can help for pipelines with long and skewed bundle execution times to increase throughput and improve worker utilization.")
+ @Default.Boolean(false)
+ boolean getLoadBalanceBundles();
+
+ void setLoadBalanceBundles(boolean loadBalanceBundles);
+
@Description("The output path for the executable file to be created.")
@Nullable
String getOutputExecutablePath();
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueSetter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueSetter.java
index 5d9e82b..db7caaa 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueSetter.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueSetter.java
@@ -26,7 +26,7 @@
*
* <p>An interface to set a field of a class.
*
- * <p>Implementations of this interface are generated at runtime to map Row fields back into objet
+ * <p>Implementations of this interface are generated at runtime to map Row fields back into object
* fields.
*/
@Internal
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueTypeInformation.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueTypeInformation.java
index a6ecc45..33ed888 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueTypeInformation.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FieldValueTypeInformation.java
@@ -22,7 +22,10 @@
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
import javax.annotation.Nullable;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType;
import org.apache.beam.sdk.schemas.utils.ReflectUtils;
import org.apache.beam.sdk.values.TypeDescriptor;
@@ -47,6 +50,8 @@
@Nullable
public abstract Method getMethod();
+ public abstract Map<String, FieldValueTypeInformation> getOneOfTypes();
+
/** If the field is a container type, returns the element type. */
@Nullable
public abstract FieldValueTypeInformation getElementType();
@@ -62,7 +67,7 @@
abstract Builder toBuilder();
@AutoValue.Builder
- abstract static class Builder {
+ public abstract static class Builder {
public abstract Builder setName(String name);
public abstract Builder setNullable(boolean nullable);
@@ -75,6 +80,8 @@
public abstract Builder setMethod(@Nullable Method method);
+ public abstract Builder setOneOfTypes(Map<String, FieldValueTypeInformation> oneOfTypes);
+
public abstract Builder setElementType(@Nullable FieldValueTypeInformation elementType);
public abstract Builder setMapKeyType(@Nullable FieldValueTypeInformation mapKeyType);
@@ -84,6 +91,22 @@
abstract FieldValueTypeInformation build();
}
+ public static FieldValueTypeInformation forOneOf(
+ String name, boolean nullable, Map<String, FieldValueTypeInformation> oneOfTypes) {
+ final TypeDescriptor<OneOfType.Value> typeDescriptor = TypeDescriptor.of(OneOfType.Value.class);
+ return new AutoValue_FieldValueTypeInformation.Builder()
+ .setName(name)
+ .setNullable(nullable)
+ .setType(typeDescriptor)
+ .setRawType(typeDescriptor.getRawType())
+ .setField(null)
+ .setElementType(null)
+ .setMapKeyType(null)
+ .setMapValueType(null)
+ .setOneOfTypes(oneOfTypes)
+ .build();
+ }
+
public static FieldValueTypeInformation forField(Field field) {
TypeDescriptor type = TypeDescriptor.of(field.getGenericType());
return new AutoValue_FieldValueTypeInformation.Builder()
@@ -95,6 +118,7 @@
.setElementType(getIterableComponentType(field))
.setMapKeyType(getMapKeyType(field))
.setMapValueType(getMapValueType(field))
+ .setOneOfTypes(Collections.emptyMap())
.build();
}
@@ -119,6 +143,7 @@
.setElementType(getIterableComponentType(type))
.setMapKeyType(getMapKeyType(type))
.setMapValueType(getMapValueType(type))
+ .setOneOfTypes(Collections.emptyMap())
.build();
}
@@ -148,6 +173,7 @@
.setElementType(getIterableComponentType(type))
.setMapKeyType(getMapKeyType(type))
.setMapValueType(getMapValueType(type))
+ .setOneOfTypes(Collections.emptyMap())
.build();
}
@@ -175,6 +201,7 @@
.setElementType(getIterableComponentType(componentType))
.setMapKeyType(getMapKeyType(componentType))
.setMapValueType(getMapValueType(componentType))
+ .setOneOfTypes(Collections.emptyMap())
.build();
}
@@ -217,6 +244,7 @@
.setElementType(getIterableComponentType(mapType))
.setMapKeyType(getMapKeyType(mapType))
.setMapValueType(getMapValueType(mapType))
+ .setOneOfTypes(Collections.emptyMap())
.build();
}
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FromRowUsingCreator.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FromRowUsingCreator.java
index 61c0d05..499991f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FromRowUsingCreator.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/FromRowUsingCreator.java
@@ -28,6 +28,7 @@
import javax.annotation.Nullable;
import org.apache.beam.sdk.schemas.Schema.FieldType;
import org.apache.beam.sdk.schemas.Schema.TypeName;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.RowWithGetters;
@@ -80,13 +81,7 @@
FieldValueTypeInformation typeInformation = checkNotNull(typeInformations.get(i));
params[i] =
fromValue(
- type,
- row.getValue(i),
- typeInformation.getRawType(),
- typeInformation.getElementType(),
- typeInformation.getMapKeyType(),
- typeInformation.getMapValueType(),
- typeFactory);
+ type, row.getValue(i), typeInformation.getRawType(), typeInformation, typeFactory);
}
SchemaUserTypeCreator creator = schemaTypeCreatorFactory.create(clazz, schema);
@@ -99,10 +94,11 @@
FieldType type,
ValueT value,
Type fieldType,
- FieldValueTypeInformation elementType,
- FieldValueTypeInformation keyType,
- FieldValueTypeInformation valueType,
+ FieldValueTypeInformation fieldValueTypeInformation,
Factory<List<FieldValueTypeInformation>> typeFactory) {
+ FieldValueTypeInformation elementType = fieldValueTypeInformation.getElementType();
+ FieldValueTypeInformation keyType = fieldValueTypeInformation.getMapKeyType();
+ FieldValueTypeInformation valueType = fieldValueTypeInformation.getMapValueType();
if (value == null) {
return null;
}
@@ -127,6 +123,22 @@
valueType,
typeFactory);
} else {
+ if (type.getTypeName().isLogicalType()
+ && OneOfType.IDENTIFIER.equals(type.getLogicalType().getIdentifier())) {
+ OneOfType oneOfType = type.getLogicalType(OneOfType.class);
+ OneOfType.Value oneOfValue = oneOfType.toInputType((Row) value);
+ FieldValueTypeInformation oneOfFieldValueTypeInformation =
+ checkNotNull(
+ fieldValueTypeInformation.getOneOfTypes().get(oneOfValue.getCaseType().toString()));
+ Object fromValue =
+ fromValue(
+ oneOfValue.getFieldType(),
+ oneOfValue.getValue(),
+ oneOfFieldValueTypeInformation.getRawType(),
+ oneOfFieldValueTypeInformation,
+ typeFactory);
+ return (ValueT) oneOfType.createValue(oneOfValue.getCaseType(), fromValue);
+ }
return value;
}
}
@@ -156,9 +168,7 @@
elementType,
element,
elementTypeInformation.getType().getType(),
- elementTypeInformation.getElementType(),
- elementTypeInformation.getMapKeyType(),
- elementTypeInformation.getMapValueType(),
+ elementTypeInformation,
typeFactory));
}
@@ -175,9 +185,7 @@
elementType,
element,
elementTypeInformation.getType().getType(),
- elementTypeInformation.getElementType(),
- elementTypeInformation.getMapKeyType(),
- elementTypeInformation.getMapValueType(),
+ elementTypeInformation,
typeFactory));
}
@@ -196,18 +204,14 @@
keyType,
entry.getKey(),
keyTypeInformation.getType().getType(),
- keyTypeInformation.getElementType(),
- keyTypeInformation.getMapKeyType(),
- keyTypeInformation.getMapValueType(),
+ keyTypeInformation,
typeFactory);
Object value =
fromValue(
valueType,
entry.getValue(),
valueTypeInformation.getType().getType(),
- valueTypeInformation.getElementType(),
- valueTypeInformation.getMapKeyType(),
- valueTypeInformation.getMapValueType(),
+ valueTypeInformation,
typeFactory);
newMap.put(key, value);
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java
index c998037..fc6eb5e 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java
@@ -559,8 +559,14 @@
abstract FieldType.Builder toBuilder();
+ public boolean isLogicalType(String logicalTypeIdentifier) {
+ return getTypeName().isLogicalType()
+ && getLogicalType().getIdentifier().equals(logicalTypeIdentifier);
+ }
+
/** Helper function for retrieving the concrete logical type subclass. */
- public <LogicalTypeT> LogicalTypeT getLogicalType(Class<LogicalTypeT> logicalTypeClass) {
+ public <LogicalTypeT extends LogicalType> LogicalTypeT getLogicalType(
+ Class<LogicalTypeT> logicalTypeClass) {
return logicalTypeClass.cast(getLogicalType());
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaCoder.java
index 9359c75..889925a 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaCoder.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaCoder.java
@@ -102,7 +102,8 @@
/**
* Returns a {@link SchemaCoder} for the specified class. If no schema is registered for this
- * class, then throws {@link NoSuchSchemaException}.
+ * class, then throws {@link NoSuchSchemaException}. The parameter functions to convert from and
+ * to Rows <b>must</b> implement the equals contract.
*/
public static <T> SchemaCoder<T> of(
Schema schema,
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java
index cea324a..6784712 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java
@@ -20,12 +20,19 @@
import java.util.Map;
import java.util.UUID;
import org.apache.beam.model.pipeline.v1.SchemaApi;
+import org.apache.beam.model.pipeline.v1.SchemaApi.ArrayTypeValue;
+import org.apache.beam.model.pipeline.v1.SchemaApi.FieldValue;
+import org.apache.beam.model.pipeline.v1.SchemaApi.IterableTypeValue;
+import org.apache.beam.model.pipeline.v1.SchemaApi.MapTypeEntry;
+import org.apache.beam.model.pipeline.v1.SchemaApi.MapTypeValue;
import org.apache.beam.sdk.schemas.Schema.Field;
import org.apache.beam.sdk.schemas.Schema.FieldType;
import org.apache.beam.sdk.schemas.Schema.LogicalType;
import org.apache.beam.sdk.schemas.Schema.TypeName;
import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.values.Row;
import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps;
/** Utility methods for translating schemas. */
@@ -35,7 +42,7 @@
private static final String URN_BEAM_LOGICAL_DECIMAL = "beam:logical_type:decimal:v1";
private static final String URN_BEAM_LOGICAL_JAVASDK = "beam:logical_type:javasdk:v1";
- public static SchemaApi.Schema schemaToProto(Schema schema) {
+ public static SchemaApi.Schema schemaToProto(Schema schema, boolean serializeLogicalType) {
String uuid = schema.getUUID() != null ? schema.getUUID().toString() : "";
SchemaApi.Schema.Builder builder = SchemaApi.Schema.newBuilder().setId(uuid);
for (Field field : schema.getFields()) {
@@ -43,62 +50,76 @@
fieldToProto(
field,
schema.indexOf(field.getName()),
- schema.getEncodingPositions().get(field.getName()));
+ schema.getEncodingPositions().get(field.getName()),
+ serializeLogicalType);
builder.addFields(protoField);
}
return builder.build();
}
- private static SchemaApi.Field fieldToProto(Field field, int fieldId, int position) {
+ private static SchemaApi.Field fieldToProto(
+ Field field, int fieldId, int position, boolean serializeLogicalType) {
return SchemaApi.Field.newBuilder()
.setName(field.getName())
.setDescription(field.getDescription())
- .setType(fieldTypeToProto(field.getType()))
+ .setType(fieldTypeToProto(field.getType(), serializeLogicalType))
.setId(fieldId)
.setEncodingPosition(position)
.build();
}
- private static SchemaApi.FieldType fieldTypeToProto(FieldType fieldType) {
+ private static SchemaApi.FieldType fieldTypeToProto(
+ FieldType fieldType, boolean serializeLogicalType) {
SchemaApi.FieldType.Builder builder = SchemaApi.FieldType.newBuilder();
switch (fieldType.getTypeName()) {
case ROW:
builder.setRowType(
- SchemaApi.RowType.newBuilder().setSchema(schemaToProto(fieldType.getRowSchema())));
+ SchemaApi.RowType.newBuilder()
+ .setSchema(schemaToProto(fieldType.getRowSchema(), serializeLogicalType)));
break;
case ARRAY:
builder.setArrayType(
SchemaApi.ArrayType.newBuilder()
- .setElementType(fieldTypeToProto(fieldType.getCollectionElementType())));
+ .setElementType(
+ fieldTypeToProto(fieldType.getCollectionElementType(), serializeLogicalType)));
break;
case ITERABLE:
builder.setIterableType(
SchemaApi.IterableType.newBuilder()
- .setElementType(fieldTypeToProto(fieldType.getCollectionElementType())));
+ .setElementType(
+ fieldTypeToProto(fieldType.getCollectionElementType(), serializeLogicalType)));
break;
case MAP:
builder.setMapType(
SchemaApi.MapType.newBuilder()
- .setKeyType(fieldTypeToProto(fieldType.getMapKeyType()))
- .setValueType(fieldTypeToProto(fieldType.getMapValueType()))
+ .setKeyType(fieldTypeToProto(fieldType.getMapKeyType(), serializeLogicalType))
+ .setValueType(fieldTypeToProto(fieldType.getMapValueType(), serializeLogicalType))
.build());
break;
case LOGICAL_TYPE:
LogicalType logicalType = fieldType.getLogicalType();
- builder.setLogicalType(
+ SchemaApi.LogicalType.Builder logicalTypeBuilder =
SchemaApi.LogicalType.newBuilder()
+ .setArgumentType(
+ fieldTypeToProto(logicalType.getArgumentType(), serializeLogicalType))
+ .setArgument(
+ rowFieldToProto(logicalType.getArgumentType(), logicalType.getArgument()))
+ .setRepresentation(
+ fieldTypeToProto(logicalType.getBaseType(), serializeLogicalType))
// TODO(BEAM-7855): "javasdk" types should only be a last resort. Types defined in
// Beam should have their own URN, and there should be a mechanism for users to
// register their own types by URN.
- .setUrn(URN_BEAM_LOGICAL_JAVASDK)
- .setPayload(
- ByteString.copyFrom(SerializableUtils.serializeToByteArray(logicalType)))
- .setRepresentation(fieldTypeToProto(logicalType.getBaseType()))
- .build());
+ .setUrn(URN_BEAM_LOGICAL_JAVASDK);
+ if (serializeLogicalType) {
+ logicalTypeBuilder =
+ logicalTypeBuilder.setPayload(
+ ByteString.copyFrom(SerializableUtils.serializeToByteArray(logicalType)));
+ }
+ builder.setLogicalType(logicalTypeBuilder.build());
break;
// Special-case for DATETIME and DECIMAL which are logical types in portable representation,
// but not yet in Java. (BEAM-7554)
@@ -106,14 +127,14 @@
builder.setLogicalType(
SchemaApi.LogicalType.newBuilder()
.setUrn(URN_BEAM_LOGICAL_DATETIME)
- .setRepresentation(fieldTypeToProto(FieldType.INT64))
+ .setRepresentation(fieldTypeToProto(FieldType.INT64, serializeLogicalType))
.build());
break;
case DECIMAL:
builder.setLogicalType(
SchemaApi.LogicalType.newBuilder()
.setUrn(URN_BEAM_LOGICAL_DECIMAL)
- .setRepresentation(fieldTypeToProto(FieldType.BYTES))
+ .setRepresentation(fieldTypeToProto(FieldType.BYTES, serializeLogicalType))
.build());
break;
case BYTE:
@@ -240,4 +261,94 @@
"Unexpected type_info: " + protoFieldType.getTypeInfoCase());
}
}
+
+ public static SchemaApi.Row rowToProto(Row row) {
+ SchemaApi.Row.Builder builder = SchemaApi.Row.newBuilder();
+ for (int i = 0; i < row.getFieldCount(); ++i) {
+ builder.addValues(rowFieldToProto(row.getSchema().getField(i).getType(), row.getValue(i)));
+ }
+ return builder.build();
+ }
+
+ private static SchemaApi.FieldValue rowFieldToProto(FieldType fieldType, Object value) {
+ FieldValue.Builder builder = FieldValue.newBuilder();
+ switch (fieldType.getTypeName()) {
+ case ARRAY:
+ return builder
+ .setArrayValue(
+ arrayValueToProto(fieldType.getCollectionElementType(), (Iterable) value))
+ .build();
+ case ITERABLE:
+ return builder
+ .setIterableValue(
+ iterableValueToProto(fieldType.getCollectionElementType(), (Iterable) value))
+ .build();
+ case MAP:
+ return builder
+ .setMapValue(
+ mapToProto(fieldType.getMapKeyType(), fieldType.getMapValueType(), (Map) value))
+ .build();
+ case ROW:
+ return builder.setRowValue(rowToProto((Row) value)).build();
+ case LOGICAL_TYPE:
+ default:
+ return builder.setAtomicValue(primitiveRowFieldToProto(fieldType, value)).build();
+ }
+ }
+
+ private static SchemaApi.ArrayTypeValue arrayValueToProto(
+ FieldType elementType, Iterable values) {
+ return ArrayTypeValue.newBuilder()
+ .addAllElement(Iterables.transform(values, e -> rowFieldToProto(elementType, e)))
+ .build();
+ }
+
+ private static SchemaApi.IterableTypeValue iterableValueToProto(
+ FieldType elementType, Iterable values) {
+ return IterableTypeValue.newBuilder()
+ .addAllElement(Iterables.transform(values, e -> rowFieldToProto(elementType, e)))
+ .build();
+ }
+
+ private static SchemaApi.MapTypeValue mapToProto(
+ FieldType keyType, FieldType valueType, Map<Object, Object> map) {
+ MapTypeValue.Builder builder = MapTypeValue.newBuilder();
+ for (Map.Entry entry : map.entrySet()) {
+ MapTypeEntry mapProtoEntry =
+ MapTypeEntry.newBuilder()
+ .setKey(rowFieldToProto(keyType, entry.getKey()))
+ .setValue(rowFieldToProto(valueType, entry.getValue()))
+ .build();
+ builder.addEntries(mapProtoEntry);
+ }
+ return builder.build();
+ }
+
+ private static SchemaApi.AtomicTypeValue primitiveRowFieldToProto(
+ FieldType fieldType, Object value) {
+ switch (fieldType.getTypeName()) {
+ case BYTE:
+ return SchemaApi.AtomicTypeValue.newBuilder().setByte((int) value).build();
+ case INT16:
+ return SchemaApi.AtomicTypeValue.newBuilder().setInt16((int) value).build();
+ case INT32:
+ return SchemaApi.AtomicTypeValue.newBuilder().setInt32((int) value).build();
+ case INT64:
+ return SchemaApi.AtomicTypeValue.newBuilder().setInt64((long) value).build();
+ case FLOAT:
+ return SchemaApi.AtomicTypeValue.newBuilder().setFloat((float) value).build();
+ case DOUBLE:
+ return SchemaApi.AtomicTypeValue.newBuilder().setDouble((double) value).build();
+ case STRING:
+ return SchemaApi.AtomicTypeValue.newBuilder().setString((String) value).build();
+ case BOOLEAN:
+ return SchemaApi.AtomicTypeValue.newBuilder().setBoolean((boolean) value).build();
+ case BYTES:
+ return SchemaApi.AtomicTypeValue.newBuilder()
+ .setBytes(ByteString.copyFrom((byte[]) value))
+ .build();
+ default:
+ throw new RuntimeException("FieldType unexpected " + fieldType.getTypeName());
+ }
+ }
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/OneOfType.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/OneOfType.java
index 214eeb5..dc9d9b0 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/OneOfType.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/OneOfType.java
@@ -22,7 +22,9 @@
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
import java.util.stream.Collectors;
+import javax.annotation.Nullable;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.Field;
import org.apache.beam.sdk.schemas.Schema.FieldType;
@@ -44,15 +46,24 @@
private final byte[] schemaProtoRepresentation;
private OneOfType(List<Field> fields) {
+ this(fields, null);
+ }
+
+ private OneOfType(List<Field> fields, @Nullable Map<String, Integer> enumMap) {
List<Field> nullableFields =
fields.stream()
.map(f -> Field.nullable(f.getName(), f.getType()))
.collect(Collectors.toList());
- List<String> enumValues =
- nullableFields.stream().map(Field::getName).collect(Collectors.toList());
+ if (enumMap != null) {
+ nullableFields.stream().forEach(f -> checkArgument(enumMap.containsKey(f.getName())));
+ enumerationType = EnumerationType.create(enumMap);
+ } else {
+ List<String> enumValues =
+ nullableFields.stream().map(Field::getName).collect(Collectors.toList());
+ enumerationType = EnumerationType.create(enumValues);
+ }
oneOfSchema = Schema.builder().addFields(nullableFields).build();
- enumerationType = EnumerationType.create(enumValues);
- schemaProtoRepresentation = SchemaTranslation.schemaToProto(oneOfSchema).toByteArray();
+ schemaProtoRepresentation = SchemaTranslation.schemaToProto(oneOfSchema, false).toByteArray();
}
/** Create an {@link OneOfType} logical type. */
@@ -65,6 +76,14 @@
return new OneOfType(fields);
}
+ /**
+ * Create an {@link OneOfType} logical type. This method allows control over the integer values in
+ * the generated enum.
+ */
+ public static OneOfType create(List<Field> fields, Map<String, Integer> enumValues) {
+ return new OneOfType(fields, enumValues);
+ }
+
/** Returns the schema of the underlying {@link Row} that is used to represent the union. */
public Schema getOneOfSchema() {
return oneOfSchema;
@@ -158,15 +177,24 @@
return caseType;
}
- /** Returns the current value of the OneOf. */
- @SuppressWarnings("TypeParameterUnusedInFormals")
- public <T> T getValue() {
+ /** Returns the current value of the OneOf as the destination type. */
+ public <T> T getValue(Class<T> clazz) {
return (T) value;
}
+ /** Returns the current value of the OneOf. */
+ public Object getValue() {
+ return value;
+ }
+
/** Return the type of this union field. */
public FieldType getFieldType() {
return fieldType;
}
+
+ @Override
+ public String toString() {
+ return "caseType: " + caseType + " Value: " + value;
+ }
}
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/CoGroup.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/CoGroup.java
index 9400f98..1631408 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/CoGroup.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/CoGroup.java
@@ -534,13 +534,14 @@
public void process(@Element KV<Row, CoGbkResult> kv, OutputReceiver<Row> o) {
Row key = kv.getKey();
CoGbkResult result = kv.getValue();
- List<Object> fields = Lists.newArrayListWithCapacity(sortedTags.size());
+ List<Object> fields = Lists.newArrayListWithCapacity(sortedTags.size() + 1);
+ fields.add(key);
for (int i = 0; i < sortedTags.size(); ++i) {
String tupleTag = tagToKeyedTag.get(i);
SerializableFunction<Object, Row> toRow = toRows.get(i);
fields.add(new Result(result.getAll(tupleTag), toRow));
}
- Row row = Row.withSchema(outputSchema).addValue(key).addValues(fields).build();
+ Row row = Row.withSchema(outputSchema).attachValues(fields).build();
o.output(row);
}
}
@@ -681,7 +682,7 @@
}
private Row buildOutputRow(List rows) {
- return Row.withSchema(outputSchema).addValues(rows).build();
+ return Row.withSchema(outputSchema).attachValues(Lists.newArrayList(rows)).build();
}
}
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Convert.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Convert.java
index 5176def..5233b0c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Convert.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Convert.java
@@ -40,7 +40,7 @@
* Convert a {@link PCollection}{@literal <InputT>} into a {@link PCollection}{@literal <Row>}.
*
* <p>The input {@link PCollection} must have a schema attached. The output collection will have
- * the same schema as the iput.
+ * the same schema as the input.
*/
public static <InputT> PTransform<PCollection<InputT>, PCollection<Row>> toRows() {
return to(Row.class);
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java
index 27cd654..f0e891c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java
@@ -40,6 +40,7 @@
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TypeDescriptors;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
/**
* A generic grouping transform for schema {@link PCollection}s.
@@ -707,8 +708,7 @@
public void process(@Element KV<Row, Iterable<Row>> e, OutputReceiver<Row> o) {
o.output(
Row.withSchema(outputSchema)
- .addValue(e.getKey())
- .addIterable(e.getValue())
+ .attachValues(Lists.newArrayList(e.getKey(), e.getValue()))
.build());
}
}))
@@ -929,7 +929,8 @@
public void process(@Element KV<Row, Row> element, OutputReceiver<Row> o) {
o.output(
Row.withSchema(outputSchema)
- .addValues(element.getKey(), element.getValue())
+ .attachValues(
+ Lists.newArrayList(element.getKey(), element.getValue()))
.build());
}
}))
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java
index fd7f601..436da6c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java
@@ -54,7 +54,7 @@
static <T extends SpecificRecord> SchemaUserTypeCreator getCreator(
Class<T> clazz, Schema schema) {
return CACHED_CREATORS.computeIfAbsent(
- new ClassWithSchema(clazz, schema), c -> createCreator(clazz, schema));
+ ClassWithSchema.create(clazz, schema), c -> createCreator(clazz, schema));
}
private static <T> SchemaUserTypeCreator createCreator(Class<T> clazz, Schema schema) {
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java
index 80c337e..3740de6 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java
@@ -20,6 +20,9 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
@@ -27,6 +30,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -422,7 +426,37 @@
*/
public static SerializableFunction<GenericRecord, Row> getGenericRecordToRowFunction(
@Nullable Schema schema) {
- return g -> toBeamRowStrict(g, schema);
+ return new GenericRecordToRowFn(schema);
+ }
+
+ private static class GenericRecordToRowFn implements SerializableFunction<GenericRecord, Row> {
+ private final Schema schema;
+
+ GenericRecordToRowFn(Schema schema) {
+ this.schema = schema;
+ }
+
+ @Override
+ public Row apply(GenericRecord input) {
+ return toBeamRowStrict(input, schema);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (other == null || getClass() != other.getClass()) {
+ return false;
+ }
+ GenericRecordToRowFn that = (GenericRecordToRowFn) other;
+ return schema.equals(that.schema);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(schema);
+ }
}
/**
@@ -431,7 +465,50 @@
*/
public static SerializableFunction<Row, GenericRecord> getRowToGenericRecordFunction(
@Nullable org.apache.avro.Schema avroSchema) {
- return g -> toGenericRecord(g, avroSchema);
+ return new RowToGenericRecordFn(avroSchema);
+ }
+
+ private static class RowToGenericRecordFn implements SerializableFunction<Row, GenericRecord> {
+ private transient org.apache.avro.Schema avroSchema;
+
+ RowToGenericRecordFn(@Nullable org.apache.avro.Schema avroSchema) {
+ this.avroSchema = avroSchema;
+ }
+
+ @Override
+ public GenericRecord apply(Row input) {
+ return toGenericRecord(input, avroSchema);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (other == null || getClass() != other.getClass()) {
+ return false;
+ }
+ RowToGenericRecordFn that = (RowToGenericRecordFn) other;
+ return avroSchema.equals(that.avroSchema);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(avroSchema);
+ }
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ final String avroSchemaAsString = (avroSchema == null) ? null : avroSchema.toString();
+ out.writeObject(avroSchemaAsString);
+ }
+
+ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+ final String avroSchemaAsString = (String) in.readObject();
+ avroSchema =
+ (avroSchemaAsString == null)
+ ? null
+ : new org.apache.avro.Schema.Parser().parse(avroSchemaAsString);
+ }
}
/**
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ByteBuddyUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ByteBuddyUtils.java
index 791dafb..c00d5d0 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ByteBuddyUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ByteBuddyUtils.java
@@ -94,7 +94,6 @@
new ForLoadedType(ReadableInstant.class);
private static final ForLoadedType READABLE_PARTIAL_TYPE =
new ForLoadedType(ReadablePartial.class);
- private static final ForLoadedType OBJECT_TYPE = new ForLoadedType(Object.class);
private static final ForLoadedType INTEGER_TYPE = new ForLoadedType(Integer.class);
private static final ForLoadedType ENUM_TYPE = new ForLoadedType(Enum.class);
private static final ForLoadedType BYTE_BUDDY_UTILS_TYPE =
@@ -134,7 +133,7 @@
// Create a new FieldValueGetter subclass.
@SuppressWarnings("unchecked")
- static DynamicType.Builder<FieldValueGetter> subclassGetterInterface(
+ public static DynamicType.Builder<FieldValueGetter> subclassGetterInterface(
ByteBuddy byteBuddy, Type objectType, Type fieldType) {
TypeDescription.Generic getterGenericType =
TypeDescription.Generic.Builder.parameterizedType(
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/JavaBeanUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/JavaBeanUtils.java
index 759d77d..e25342b 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/JavaBeanUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/JavaBeanUtils.java
@@ -102,7 +102,7 @@
public static List<FieldValueTypeInformation> getFieldTypes(
Class<?> clazz, Schema schema, FieldValueTypeSupplier fieldValueTypeSupplier) {
return CACHED_FIELD_TYPES.computeIfAbsent(
- new ClassWithSchema(clazz, schema), c -> fieldValueTypeSupplier.get(clazz, schema));
+ ClassWithSchema.create(clazz, schema), c -> fieldValueTypeSupplier.get(clazz, schema));
}
// The list of getters for a class is cached, so we only create the classes the first time
@@ -121,7 +121,7 @@
FieldValueTypeSupplier fieldValueTypeSupplier,
TypeConversionsFactory typeConversionsFactory) {
return CACHED_GETTERS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return types.stream()
@@ -130,7 +130,7 @@
});
}
- private static <T> FieldValueGetter createGetter(
+ public static <T> FieldValueGetter createGetter(
FieldValueTypeInformation typeInformation, TypeConversionsFactory typeConversionsFactory) {
DynamicType.Builder<FieldValueGetter> builder =
ByteBuddyUtils.subclassGetterInterface(
@@ -184,7 +184,7 @@
FieldValueTypeSupplier fieldValueTypeSupplier,
TypeConversionsFactory typeConversionsFactory) {
return CACHED_SETTERS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return types.stream()
@@ -193,14 +193,14 @@
});
}
- private static FieldValueSetter createSetter(
+ public static FieldValueSetter createSetter(
FieldValueTypeInformation typeInformation, TypeConversionsFactory typeConversionsFactory) {
DynamicType.Builder<FieldValueSetter> builder =
ByteBuddyUtils.subclassSetterInterface(
BYTE_BUDDY,
typeInformation.getMethod().getDeclaringClass(),
typeConversionsFactory.createTypeConversion(false).convert(typeInformation.getType()));
- builder = implementSetterMethods(builder, typeInformation.getMethod(), typeConversionsFactory);
+ builder = implementSetterMethods(builder, typeInformation, typeConversionsFactory);
try {
return builder
.make()
@@ -222,14 +222,13 @@
private static DynamicType.Builder<FieldValueSetter> implementSetterMethods(
DynamicType.Builder<FieldValueSetter> builder,
- Method method,
+ FieldValueTypeInformation fieldValueTypeInformation,
TypeConversionsFactory typeConversionsFactory) {
- FieldValueTypeInformation javaTypeInformation = FieldValueTypeInformation.forSetter(method);
return builder
.method(ElementMatchers.named("name"))
- .intercept(FixedValue.reference(javaTypeInformation.getName()))
+ .intercept(FixedValue.reference(fieldValueTypeInformation.getName()))
.method(ElementMatchers.named("set"))
- .intercept(new InvokeSetterInstruction(method, typeConversionsFactory));
+ .intercept(new InvokeSetterInstruction(fieldValueTypeInformation, typeConversionsFactory));
}
// The list of constructors for a class is cached, so we only create the classes the first time
@@ -244,7 +243,7 @@
FieldValueTypeSupplier fieldValueTypeSupplier,
TypeConversionsFactory typeConversionsFactory) {
return CACHED_CREATORS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return createConstructorCreator(
@@ -291,7 +290,7 @@
FieldValueTypeSupplier fieldValueTypeSupplier,
TypeConversionsFactory typeConversionsFactory) {
return CACHED_CREATORS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return createStaticCreator(clazz, creator, schema, types, typeConversionsFactory);
@@ -377,11 +376,13 @@
// Implements a method to write a public set out on an object.
private static class InvokeSetterInstruction implements Implementation {
// Setter method that will be invoked
- private Method method;
+ private FieldValueTypeInformation fieldValueTypeInformation;
private final TypeConversionsFactory typeConversionsFactory;
- InvokeSetterInstruction(Method method, TypeConversionsFactory typeConversionsFactory) {
- this.method = method;
+ InvokeSetterInstruction(
+ FieldValueTypeInformation fieldValueTypeInformation,
+ TypeConversionsFactory typeConversionsFactory) {
+ this.fieldValueTypeInformation = fieldValueTypeInformation;
this.typeConversionsFactory = typeConversionsFactory;
}
@@ -393,13 +394,13 @@
@Override
public ByteCodeAppender appender(final Target implementationTarget) {
return (methodVisitor, implementationContext, instrumentedMethod) -> {
- FieldValueTypeInformation javaTypeInformation = FieldValueTypeInformation.forSetter(method);
// this + method parameters.
int numLocals = 1 + instrumentedMethod.getParameters().size();
// The instruction to read the field.
StackManipulation readField = MethodVariableAccess.REFERENCE.loadFrom(2);
+ Method method = fieldValueTypeInformation.getMethod();
boolean setterMethodReturnsVoid = method.getReturnType().equals(Void.TYPE);
// Read the object onto the stack.
StackManipulation stackManipulation =
@@ -409,7 +410,7 @@
// Do any conversions necessary.
typeConversionsFactory
.createSetterConversions(readField)
- .convert(javaTypeInformation.getType()),
+ .convert(fieldValueTypeInformation.getType()),
// Now update the field and return void.
MethodInvocation.invoke(new ForLoadedMethod(method)));
if (!setterMethodReturnsVoid) {
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/POJOUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/POJOUtils.java
index a58ddf8..aa968b4 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/POJOUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/POJOUtils.java
@@ -81,7 +81,7 @@
public static List<FieldValueTypeInformation> getFieldTypes(
Class<?> clazz, Schema schema, FieldValueTypeSupplier fieldValueTypeSupplier) {
return CACHED_FIELD_TYPES.computeIfAbsent(
- new ClassWithSchema(clazz, schema), c -> fieldValueTypeSupplier.get(clazz, schema));
+ ClassWithSchema.create(clazz, schema), c -> fieldValueTypeSupplier.get(clazz, schema));
}
// The list of getters for a class is cached, so we only create the classes the first time
@@ -96,7 +96,7 @@
TypeConversionsFactory typeConversionsFactory) {
// Return the getters ordered by their position in the schema.
return CACHED_GETTERS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
List<FieldValueGetter> getters =
@@ -122,7 +122,7 @@
FieldValueTypeSupplier fieldValueTypeSupplier,
TypeConversionsFactory typeConversionsFactory) {
return CACHED_CREATORS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return createSetFieldCreator(clazz, schema, types, typeConversionsFactory);
@@ -169,7 +169,7 @@
FieldValueTypeSupplier fieldValueTypeSupplier,
TypeConversionsFactory typeConversionsFactory) {
return CACHED_CREATORS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return createConstructorCreator(
@@ -217,7 +217,7 @@
FieldValueTypeSupplier fieldValueTypeSupplier,
TypeConversionsFactory typeConversionsFactory) {
return CACHED_CREATORS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return createStaticCreator(clazz, creator, schema, types, typeConversionsFactory);
@@ -323,7 +323,7 @@
TypeConversionsFactory typeConversionsFactory) {
// Return the setters, ordered by their position in the schema.
return CACHED_SETTERS.computeIfAbsent(
- new ClassWithSchema(clazz, schema),
+ ClassWithSchema.create(clazz, schema),
c -> {
List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
return types.stream()
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ReflectUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ReflectUtils.java
index d56f0bd..08c494c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ReflectUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/ReflectUtils.java
@@ -19,6 +19,7 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+import com.google.auto.value.AutoValue;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
@@ -31,7 +32,6 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.beam.sdk.schemas.Schema;
@@ -39,35 +39,21 @@
import org.apache.beam.sdk.values.TypeDescriptor;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimaps;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.Primitives;
/** A set of reflection helper methods. */
public class ReflectUtils {
/** Represents a class and a schema. */
- public static class ClassWithSchema {
- private final Class clazz;
- private final Schema schema;
+ @AutoValue
+ public abstract static class ClassWithSchema {
+ public abstract Class getClazz();
- public ClassWithSchema(Class clazz, Schema schema) {
- this.clazz = clazz;
- this.schema = schema;
- }
+ public abstract Schema getSchema();
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
- }
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
- ClassWithSchema that = (ClassWithSchema) o;
- return Objects.equals(clazz, that.clazz) && Objects.equals(schema, that.schema);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(clazz, schema);
+ public static ClassWithSchema create(Class clazz, Schema schema) {
+ return new AutoValue_ReflectUtils_ClassWithSchema(clazz, schema);
}
}
@@ -94,6 +80,10 @@
});
}
+ public static Multimap<String, Method> getMethodsMap(Class clazz) {
+ return Multimaps.index(getMethods(clazz), Method::getName);
+ }
+
@Nullable
public static Constructor getAnnotatedConstructor(Class clazz) {
return Arrays.stream(clazz.getDeclaredConstructors())
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/state/Timer.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/state/Timer.java
index 4da1278..ba996b7 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/state/Timer.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/state/Timer.java
@@ -66,4 +66,10 @@
* period}.
*/
Timer align(Duration period);
+
+ /**
+ * Sets event time timer's output timestamp. Output watermark will be held at this timestamp until
+ * the timer fires.
+ */
+ Timer withOutputTimestamp(Instant outputTime);
}
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesTestStreamWithOutputTimestamp.java
similarity index 64%
copy from runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
copy to sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesTestStreamWithOutputTimestamp.java
index a114f40..e498ac8 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/FlinkClassloading.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesTestStreamWithOutputTimestamp.java
@@ -15,16 +15,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.beam.runners.flink.translation.utils;
+package org.apache.beam.sdk.testing;
-import com.fasterxml.jackson.databind.type.TypeFactory;
-
-/** Utilities for dealing with classloading. */
-public class FlinkClassloading {
-
- public static void deleteStaticCaches() {
- // Clear cache to get rid of any references to the Flink Classloader
- // See https://jira.apache.org/jira/browse/BEAM-6460
- TypeFactory.defaultInstance().clearCache();
- }
-}
+/**
+ * Category tag for validation tests which use outputTimestamp. Tests tagged with {@link
+ * UsesTestStreamWithOutputTimestamp} should be run for runners which support outputTimestamp.
+ */
+public interface UsesTestStreamWithOutputTimestamp {}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/WindowedValue.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/WindowedValue.java
index 84b1253..55c1601 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/WindowedValue.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/WindowedValue.java
@@ -20,8 +20,12 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Collection;
@@ -30,6 +34,7 @@
import java.util.List;
import java.util.Objects;
import java.util.Set;
+import org.apache.beam.sdk.coders.ByteArrayCoder;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.CoderException;
import org.apache.beam.sdk.coders.CollectionCoder;
@@ -505,6 +510,11 @@
return ValueOnlyWindowedValueCoder.of(valueCoder);
}
+ /** Returns the {@code ParamWindowedValueCoder} from the given valueCoder. */
+ public static <T> ParamWindowedValueCoder<T> getParamWindowedValueCoder(Coder<T> valueCoder) {
+ return ParamWindowedValueCoder.of(valueCoder);
+ }
+
/** Abstract class for {@code WindowedValue} coder. */
public abstract static class WindowedValueCoder<T> extends StructuredCoder<WindowedValue<T>> {
final Coder<T> valueCoder;
@@ -637,7 +647,11 @@
*
* <p>A {@code ValueOnlyWindowedValueCoder} only encodes and decodes the value. It drops timestamp
* and windows for encoding, and uses defaults timestamp, and windows for decoding.
+ *
+ * @deprecated Use ParamWindowedValueCoder instead, it is a general purpose implementation of the
+ * same concept but makes timestamp, windows and pane info configurable.
*/
+ @Deprecated
public static class ValueOnlyWindowedValueCoder<T> extends WindowedValueCoder<T> {
public static <T> ValueOnlyWindowedValueCoder<T> of(Coder<T> valueCoder) {
return new ValueOnlyWindowedValueCoder<>(valueCoder);
@@ -693,4 +707,181 @@
return Collections.singletonList(valueCoder);
}
}
+
+ /**
+ * A parameterized coder for {@code WindowedValue}.
+ *
+ * <p>A {@code ParamWindowedValueCoder} only encodes and decodes the value. It drops timestamp,
+ * windows, and pane info during encoding, and uses the supplied parameterized timestamp, windows
+ * and pane info values during decoding when reconstructing the windowed value.
+ */
+ public static class ParamWindowedValueCoder<T> extends FullWindowedValueCoder<T> {
+
+ private static final long serialVersionUID = 1L;
+
+ private transient Instant timestamp;
+ private transient Collection<? extends BoundedWindow> windows;
+ private transient PaneInfo pane;
+
+ private static final byte[] EMPTY_BYTES = new byte[0];
+
+ /**
+ * Returns the {@link ParamWindowedValueCoder} for the given valueCoder and windowCoder using
+ * the supplied parameterized timestamp, windows and pane info for {@link WindowedValue}.
+ */
+ public static <T> ParamWindowedValueCoder<T> of(
+ Coder<T> valueCoder,
+ Coder<? extends BoundedWindow> windowCoder,
+ Instant timestamp,
+ Collection<? extends BoundedWindow> windows,
+ PaneInfo pane) {
+ return new ParamWindowedValueCoder<>(valueCoder, windowCoder, timestamp, windows, pane);
+ }
+
+ /**
+ * Returns the {@link ParamWindowedValueCoder} for the given valueCoder and windowCoder using
+ * {@link BoundedWindow#TIMESTAMP_MIN_VALUE} as the timestamp, {@link #GLOBAL_WINDOWS} as the
+ * window and {@link PaneInfo#NO_FIRING} as the pane info for parameters.
+ */
+ public static <T> ParamWindowedValueCoder<T> of(
+ Coder<T> valueCoder, Coder<? extends BoundedWindow> windowCoder) {
+ return ParamWindowedValueCoder.of(
+ valueCoder,
+ windowCoder,
+ BoundedWindow.TIMESTAMP_MIN_VALUE,
+ GLOBAL_WINDOWS,
+ PaneInfo.NO_FIRING);
+ }
+
+ /**
+ * Returns the {@link ParamWindowedValueCoder} for the given valueCoder and {@link
+ * GlobalWindow.Coder#INSTANCE} using {@link BoundedWindow#TIMESTAMP_MIN_VALUE} as the
+ * timestamp, {@link #GLOBAL_WINDOWS} as the window and {@link PaneInfo#NO_FIRING} as the pane
+ * info for parameters.
+ */
+ public static <T> ParamWindowedValueCoder<T> of(Coder<T> valueCoder) {
+ return ParamWindowedValueCoder.of(valueCoder, GlobalWindow.Coder.INSTANCE);
+ }
+
+ ParamWindowedValueCoder(
+ Coder<T> valueCoder,
+ Coder<? extends BoundedWindow> windowCoder,
+ Instant timestamp,
+ Collection<? extends BoundedWindow> windows,
+ PaneInfo pane) {
+ super(valueCoder, windowCoder);
+ this.timestamp = timestamp;
+ this.windows = windows;
+ this.pane = pane;
+ }
+
+ @Override
+ public <NewT> WindowedValueCoder<NewT> withValueCoder(Coder<NewT> valueCoder) {
+ return new ParamWindowedValueCoder<>(valueCoder, getWindowCoder(), timestamp, windows, pane);
+ }
+
+ @Override
+ public void encode(WindowedValue<T> windowedElem, OutputStream outStream)
+ throws CoderException, IOException {
+ encode(windowedElem, outStream, Context.NESTED);
+ }
+
+ @Override
+ public void encode(WindowedValue<T> windowedElem, OutputStream outStream, Context context)
+ throws CoderException, IOException {
+ valueCoder.encode(windowedElem.getValue(), outStream, context);
+ }
+
+ @Override
+ public WindowedValue<T> decode(InputStream inStream) throws CoderException, IOException {
+ return decode(inStream, Context.NESTED);
+ }
+
+ @Override
+ public WindowedValue<T> decode(InputStream inStream, Context context)
+ throws CoderException, IOException {
+ T value = valueCoder.decode(inStream, context);
+ return WindowedValue.of(value, this.timestamp, this.windows, this.pane);
+ }
+
+ @Override
+ public void verifyDeterministic() throws NonDeterministicException {
+ verifyDeterministic(
+ this, "ParamWindowedValueCoder requires a deterministic valueCoder", valueCoder);
+ }
+
+ @Override
+ public void registerByteSizeObserver(WindowedValue<T> value, ElementByteSizeObserver observer)
+ throws Exception {
+ valueCoder.registerByteSizeObserver(value.getValue(), observer);
+ }
+
+ public Instant getTimestamp() {
+ return timestamp;
+ }
+
+ public Collection<? extends BoundedWindow> getWindows() {
+ return windows;
+ }
+
+ public PaneInfo getPane() {
+ return pane;
+ }
+
+ /** Returns the serialized payload that will be provided when deserializing this coder. */
+ public static byte[] getPayload(ParamWindowedValueCoder<?> from) {
+ // Use FullWindowedValueCoder to encode the constant members(timestamp, window, pane) in
+ // ParamWindowedValueCoder
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ WindowedValue<byte[]> windowedValue =
+ WindowedValue.of(EMPTY_BYTES, from.getTimestamp(), from.getWindows(), from.getPane());
+ WindowedValue.FullWindowedValueCoder<byte[]> windowedValueCoder =
+ WindowedValue.FullWindowedValueCoder.of(ByteArrayCoder.of(), from.getWindowCoder());
+ try {
+ windowedValueCoder.encode(windowedValue, baos);
+ } catch (IOException e) {
+ throw new RuntimeException(
+ "Unable to encode constant members of ParamWindowedValueCoder: ", e);
+ }
+ return baos.toByteArray();
+ }
+
+ /** Create a {@link Coder} from its component {@link Coder coders}. */
+ public static WindowedValue.ParamWindowedValueCoder<?> fromComponents(
+ List<Coder<?>> components, byte[] payload) {
+ Coder<? extends BoundedWindow> windowCoder =
+ (Coder<? extends BoundedWindow>) components.get(1);
+ WindowedValue.FullWindowedValueCoder<byte[]> windowedValueCoder =
+ WindowedValue.FullWindowedValueCoder.of(ByteArrayCoder.of(), windowCoder);
+
+ try {
+ ByteArrayInputStream bais = new ByteArrayInputStream(payload);
+ WindowedValue<byte[]> windowedValue = windowedValueCoder.decode(bais);
+ return WindowedValue.ParamWindowedValueCoder.of(
+ components.get(0),
+ windowCoder,
+ windowedValue.getTimestamp(),
+ windowedValue.getWindows(),
+ windowedValue.getPane());
+ } catch (IOException e) {
+ throw new RuntimeException(
+ "Unable to decode constant members from payload for ParamWindowedValueCoder: ", e);
+ }
+ }
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ out.defaultWriteObject();
+ out.writeObject(getPayload(this));
+ }
+
+ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+ in.defaultReadObject();
+ byte[] payload = (byte[]) in.readObject();
+ ParamWindowedValueCoder<?> paramWindowedValueCoder =
+ fromComponents(Arrays.asList(valueCoder, getWindowCoder()), payload);
+ this.timestamp = paramWindowedValueCoder.timestamp;
+ this.windows = paramWindowedValueCoder.windows;
+ this.pane = paramWindowedValueCoder.pane;
+ }
+ }
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionList.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionList.java
index d4ea50e..2851cd8 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionList.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionList.java
@@ -115,7 +115,7 @@
pipeline,
ImmutableList.<TaggedPValue>builder()
.addAll(pcollections)
- .add(TaggedPValue.of(new TupleTag<T>(), pc))
+ .add(TaggedPValue.of(new TupleTag<T>(Integer.toString(pcollections.size())), pc))
.build());
}
@@ -130,11 +130,13 @@
public PCollectionList<T> and(Iterable<PCollection<T>> pcs) {
ImmutableList.Builder<TaggedPValue> builder = ImmutableList.builder();
builder.addAll(pcollections);
+ int nextIndex = pcollections.size();
for (PCollection<T> pc : pcs) {
if (pc.getPipeline() != pipeline) {
throw new IllegalArgumentException("PCollections come from different Pipelines");
}
- builder.add(TaggedPValue.of(new TupleTag<T>(), pc));
+ builder.add(TaggedPValue.of(new TupleTag<T>(Integer.toString(nextIndex)), pc));
+ nextIndex += 1;
}
return new PCollectionList<>(pipeline, builder.build());
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java
index 3945ba1..0cc74cb 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java
@@ -214,9 +214,8 @@
* schema doesn't match.
*/
@Nullable
- @SuppressWarnings("TypeParameterUnusedInFormals")
- public <T> T getLogicalTypeValue(String fieldName) {
- return getLogicalTypeValue(getSchema().indexOf(fieldName));
+ public <T> T getLogicalTypeValue(String fieldName, Class<T> clazz) {
+ return getLogicalTypeValue(getSchema().indexOf(fieldName), clazz);
}
/**
@@ -360,8 +359,7 @@
* schema doesn't match.
*/
@Nullable
- @SuppressWarnings("TypeParameterUnusedInFormals")
- public <T> T getLogicalTypeValue(int idx) {
+ public <T> T getLogicalTypeValue(int idx, Class<T> clazz) {
LogicalType logicalType = checkNotNull(getSchema().getField(idx).getType().getLogicalType());
return (T) logicalType.toInputType(getValue(idx));
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java
index ebf59b9..0d68709 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java
@@ -29,6 +29,7 @@
import org.apache.beam.sdk.schemas.Schema.Field;
import org.apache.beam.sdk.schemas.Schema.FieldType;
import org.apache.beam.sdk.schemas.Schema.TypeName;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Collections2;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
@@ -122,6 +123,15 @@
cacheKey, i -> getMapValue(type.getMapKeyType(), type.getMapValueType(), map))
: (T) getMapValue(type.getMapKeyType(), type.getMapValueType(), map);
} else {
+ if (type.isLogicalType(OneOfType.IDENTIFIER)) {
+ OneOfType oneOfType = type.getLogicalType(OneOfType.class);
+ OneOfType.Value oneOfValue = (OneOfType.Value) fieldValue;
+ Object convertedOneOfField =
+ getValue(oneOfValue.getFieldType(), oneOfValue.getValue(), null);
+ return (T)
+ oneOfType.toBaseType(
+ oneOfType.createValue(oneOfValue.getCaseType(), convertedOneOfField));
+ }
return (T) fieldValue;
}
}
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/options/ExperimentalOptionsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/options/ExperimentalOptionsTest.java
index c60007a..eebf621 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/options/ExperimentalOptionsTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/options/ExperimentalOptionsTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.sdk.options;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -36,4 +37,14 @@
assertTrue(ExperimentalOptions.hasExperiment(options, "experimentB"));
assertFalse(ExperimentalOptions.hasExperiment(options, "experimentC"));
}
+
+ @Test
+ public void testExperimentGetValue() {
+ ExperimentalOptions options =
+ PipelineOptionsFactory.fromArgs(
+ "--experiments=experimentA=0,state_cache_size=1,experimentB=0")
+ .as(ExperimentalOptions.class);
+ assertEquals(
+ "1", ExperimentalOptions.getExperimentValue(options, ExperimentalOptions.STATE_CACHE_SIZE));
+ }
}
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/logicaltypes/LogicalTypesTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/logicaltypes/LogicalTypesTest.java
index 657af04..95a05a3 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/logicaltypes/LogicalTypesTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/logicaltypes/LogicalTypesTest.java
@@ -46,12 +46,12 @@
Row row1 = Row.withSchema(schema).addValue(enumeration.valueOf(1)).build();
Row row2 = Row.withSchema(schema).addValue(enumeration.valueOf("FIRST")).build();
assertEquals(row1, row2);
- assertEquals(1, row1.<EnumerationType.Value>getLogicalTypeValue(0).getValue());
+ assertEquals(1, row1.getLogicalTypeValue(0, EnumerationType.Value.class).getValue());
Row row3 = Row.withSchema(schema).addValue(enumeration.valueOf(2)).build();
Row row4 = Row.withSchema(schema).addValue(enumeration.valueOf("SECOND")).build();
assertEquals(row3, row4);
- assertEquals(2, row3.<EnumerationType.Value>getLogicalTypeValue(0).getValue());
+ assertEquals(2, row3.getLogicalTypeValue(0, EnumerationType.Value.class).getValue());
}
@Test
@@ -62,12 +62,12 @@
Row stringOneOf =
Row.withSchema(schema).addValue(oneOf.createValue("string", "stringValue")).build();
- Value union = stringOneOf.getLogicalTypeValue(0);
+ Value union = stringOneOf.getLogicalTypeValue(0, OneOfType.Value.class);
assertEquals("string", union.getCaseType().toString());
assertEquals("stringValue", union.getValue());
Row intOneOf = Row.withSchema(schema).addValue(oneOf.createValue("int32", 42)).build();
- union = intOneOf.getLogicalTypeValue(0);
+ union = intOneOf.getLogicalTypeValue(0, OneOfType.Value.class);
assertEquals("int32", union.getCaseType().toString());
assertEquals(42, (int) union.getValue());
}
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/utils/AvroUtilsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/utils/AvroUtilsTest.java
index 3679e21..cd27cb1 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/utils/AvroUtilsTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/utils/AvroUtilsTest.java
@@ -48,7 +48,9 @@
import org.apache.beam.sdk.schemas.Schema.FieldType;
import org.apache.beam.sdk.schemas.utils.AvroGenerators.RecordSchemaGenerator;
import org.apache.beam.sdk.schemas.utils.AvroUtils.TypeWithNullability;
+import org.apache.beam.sdk.testing.CoderProperties;
import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.util.SerializableUtils;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
@@ -501,6 +503,12 @@
}
@Test
+ public void testRowToGenericRecordFunction() {
+ SerializableUtils.ensureSerializable(AvroUtils.getRowToGenericRecordFunction(NULL_SCHEMA));
+ SerializableUtils.ensureSerializable(AvroUtils.getRowToGenericRecordFunction(null));
+ }
+
+ @Test
public void testGenericRecordToBeamRow() {
GenericRecord genericRecord = getGenericRecord();
Row row = AvroUtils.toBeamRowStrict(getGenericRecord(), null);
@@ -513,6 +521,12 @@
}
@Test
+ public void testGenericRecordToRowFunction() {
+ SerializableUtils.ensureSerializable(AvroUtils.getGenericRecordToRowFunction(Schema.of()));
+ SerializableUtils.ensureSerializable(AvroUtils.getGenericRecordToRowFunction(null));
+ }
+
+ @Test
public void testAvroSchemaCoders() {
Pipeline pipeline = Pipeline.create();
org.apache.avro.Schema schema =
@@ -533,6 +547,7 @@
assertFalse(records.hasSchema());
records.setCoder(AvroUtils.schemaCoder(schema));
assertTrue(records.hasSchema());
+ CoderProperties.coderSerializable(records.getCoder());
AvroGeneratedUser user = new AvroGeneratedUser("foo", 42, "green");
PCollection<AvroGeneratedUser> users =
@@ -540,6 +555,7 @@
assertFalse(users.hasSchema());
users.setCoder(AvroUtils.schemaCoder((AvroCoder<AvroGeneratedUser>) users.getCoder()));
assertTrue(users.hasSchema());
+ CoderProperties.coderSerializable(users.getCoder());
}
public static ContainsField containsField(Function<org.apache.avro.Schema, Boolean> predicate) {
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java
index db57335..884b9a6 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.sdk.transforms;
+import static junit.framework.TestCase.assertTrue;
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasKey;
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasType;
@@ -93,6 +94,7 @@
import org.apache.beam.sdk.testing.UsesStatefulParDo;
import org.apache.beam.sdk.testing.UsesStrictTimerOrdering;
import org.apache.beam.sdk.testing.UsesTestStream;
+import org.apache.beam.sdk.testing.UsesTestStreamWithOutputTimestamp;
import org.apache.beam.sdk.testing.UsesTestStreamWithProcessingTime;
import org.apache.beam.sdk.testing.UsesTimersInParDo;
import org.apache.beam.sdk.testing.ValidatesRunner;
@@ -3729,6 +3731,83 @@
pipeline.run();
}
+ @Test
+ @Category({
+ ValidatesRunner.class,
+ UsesStatefulParDo.class,
+ UsesTimersInParDo.class,
+ UsesTestStreamWithOutputTimestamp.class
+ })
+ public void testOutputTimestamp() {
+ final String timerId = "bar";
+ DoFn<KV<String, Integer>, KV<String, Integer>> fn1 =
+ new DoFn<KV<String, Integer>, KV<String, Integer>>() {
+
+ @TimerId(timerId)
+ private final TimerSpec timer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+ @ProcessElement
+ public void processElement(
+ @TimerId(timerId) Timer timer, OutputReceiver<KV<String, Integer>> o) {
+ timer.withOutputTimestamp(new Instant(5)).set(new Instant(10));
+ // Output a message. This will cause the next DoFn to set a timer as well.
+ o.output(KV.of("foo", 100));
+ }
+
+ @OnTimer(timerId)
+ public void onTimer(OnTimerContext c, BoundedWindow w) {}
+ };
+
+ DoFn<KV<String, Integer>, Integer> fn2 =
+ new DoFn<KV<String, Integer>, Integer>() {
+
+ @TimerId(timerId)
+ private final TimerSpec timer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+ @StateId("timerFired")
+ final StateSpec<ValueState<Boolean>> timerFiredState = StateSpecs.value();
+
+ @ProcessElement
+ public void processElement(
+ @TimerId(timerId) Timer timer,
+ @StateId("timerFired") ValueState<Boolean> timerFiredState) {
+ Boolean timerFired = timerFiredState.read();
+ assertTrue(timerFired == null || !timerFired);
+ // Set a timer to 8. This is earlier than the previous DoFn's timer, but after the
+ // previous
+ // DoFn timer's watermark hold. This timer should not fire until the previous timer
+ // fires and removes
+ // the watermark hold.
+ timer.set(new Instant(8));
+ }
+
+ @OnTimer(timerId)
+ public void onTimer(
+ @StateId("timerFired") ValueState<Boolean> timerFiredState,
+ OutputReceiver<Integer> o) {
+ timerFiredState.write(true);
+ o.output(100);
+ }
+ };
+
+ TestStream<KV<String, Integer>> stream =
+ TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))
+ .advanceWatermarkTo(new Instant(0))
+ // Cause fn2 to set a timer.
+ .addElements(KV.of("key", 1))
+ // Normally this would case fn2's timer to expire, but it shouldn't here because of
+ // the output timestamp.
+ .advanceWatermarkTo(new Instant(9))
+ // If the timer fired, then this would case fn2 to fail with an assertion error.
+ .addElements(KV.of("key", 1))
+ .advanceWatermarkToInfinity();
+ PCollection<Integer> output =
+ pipeline.apply(stream).apply("first", ParDo.of(fn1)).apply("second", ParDo.of(fn2));
+
+ PAssert.that(output).containsInAnyOrder(100); // result output
+ pipeline.run();
+ }
+
private static class TwoTimerTest extends PTransform<PBegin, PDone> {
private static PTransform<PBegin, PDone> of(
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/CoderUtilsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/CoderUtilsTest.java
index e566ebf..a681fbe 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/CoderUtilsTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/CoderUtilsTest.java
@@ -17,8 +17,8 @@
*/
package org.apache.beam.sdk.util;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/WindowedValueTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/WindowedValueTest.java
index 206b548..821c216 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/WindowedValueTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/WindowedValueTest.java
@@ -78,6 +78,12 @@
}
@Test
+ public void testParamWindowedValueCoderIsSerializableWithWellKnownCoderType() {
+ CoderProperties.coderSerializable(
+ WindowedValue.getParamWindowedValueCoder(GlobalWindow.Coder.INSTANCE));
+ }
+
+ @Test
public void testValueOnlyWindowedValueCoderIsSerializableWithWellKnownCoderType() {
CoderProperties.coderSerializable(WindowedValue.getValueOnlyCoder(GlobalWindow.Coder.INSTANCE));
}
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/values/PCollectionListTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/values/PCollectionListTest.java
index 21b384b..b13a4db 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/values/PCollectionListTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/values/PCollectionListTest.java
@@ -131,4 +131,17 @@
tester.testEquals();
}
+
+ @Test
+ public void testTagNames() {
+ Pipeline p = TestPipeline.create();
+ PCollection<String> first = p.apply("first", Create.of("1"));
+ PCollection<String> second = p.apply("second", Create.of("2"));
+ PCollection<String> third = p.apply("third", Create.of("3"));
+
+ PCollectionList<String> list = PCollectionList.of(first).and(second).and(third);
+ assertThat(list.pcollections.get(0).getTag().id, equalTo("0"));
+ assertThat(list.pcollections.get(1).getTag().id, equalTo("1"));
+ assertThat(list.pcollections.get(2).getTag().id, equalTo("2"));
+ }
}
diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java
index 3547411..da62796 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemTest.java
@@ -20,7 +20,7 @@
import static org.hamcrest.Matchers.contains;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
-import static org.mockito.Matchers.anyString;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Matchers.isNull;
import static org.mockito.Mockito.when;
diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializerTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializerTest.java
index 551f7bc..13a9fd7 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializerTest.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializerTest.java
@@ -21,9 +21,9 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyInt;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.atLeastOnce;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
diff --git a/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteBuddyUtils.java b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteBuddyUtils.java
new file mode 100644
index 0000000..6827fb1
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteBuddyUtils.java
@@ -0,0 +1,663 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.protobuf;
+
+import static org.apache.beam.sdk.extensions.protobuf.ProtoSchemaTranslator.getFieldNumber;
+
+import com.google.protobuf.BoolValue;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.DoubleValue;
+import com.google.protobuf.Duration;
+import com.google.protobuf.FloatValue;
+import com.google.protobuf.Int32Value;
+import com.google.protobuf.Int64Value;
+import com.google.protobuf.Internal.EnumLite;
+import com.google.protobuf.MessageLite;
+import com.google.protobuf.ProtocolMessageEnum;
+import com.google.protobuf.StringValue;
+import com.google.protobuf.Timestamp;
+import com.google.protobuf.UInt32Value;
+import com.google.protobuf.UInt64Value;
+import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Type;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.DurationNanos;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.TimestampNanos;
+import org.apache.beam.sdk.schemas.FieldValueGetter;
+import org.apache.beam.sdk.schemas.FieldValueSetter;
+import org.apache.beam.sdk.schemas.FieldValueTypeInformation;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.Field;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.Schema.TypeName;
+import org.apache.beam.sdk.schemas.SchemaUserTypeCreator;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType.Value;
+import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertType;
+import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertValueForGetter;
+import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertValueForSetter;
+import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.InjectPackageStrategy;
+import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversion;
+import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversionsFactory;
+import org.apache.beam.sdk.schemas.utils.FieldValueTypeSupplier;
+import org.apache.beam.sdk.schemas.utils.JavaBeanUtils;
+import org.apache.beam.sdk.schemas.utils.ReflectUtils;
+import org.apache.beam.sdk.schemas.utils.ReflectUtils.ClassWithSchema;
+import org.apache.beam.sdk.util.common.ReflectHelpers;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.ByteBuddy;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.description.method.MethodDescription;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.description.type.TypeDescription;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.description.type.TypeDescription.ForLoadedType;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.dynamic.DynamicType;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.dynamic.loading.ClassLoadingStrategy;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.dynamic.scaffold.InstrumentedType;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.Implementation;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.ByteCodeAppender;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.StackManipulation;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.StackManipulation.Compound;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.assign.Assigner;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.assign.Assigner.Typing;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.assign.TypeCasting;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.member.MethodInvocation;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.implementation.bytecode.member.MethodReturn;
+import org.apache.beam.vendor.bytebuddy.v1_9_3.net.bytebuddy.matcher.ElementMatchers;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.CaseFormat;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap;
+
+public class ProtoByteBuddyUtils {
+ private static final ByteBuddy BYTE_BUDDY = new ByteBuddy();
+ private static final TypeDescriptor<ByteString> BYTE_STRING_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(ByteString.class);
+ private static final TypeDescriptor<Timestamp> PROTO_TIMESTAMP_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(Timestamp.class);
+ private static final TypeDescriptor<Duration> PROTO_DURATION_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(Duration.class);
+ private static final TypeDescriptor<Int32Value> PROTO_INT32_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(Int32Value.class);
+ private static final TypeDescriptor<Int64Value> PROTO_INT64_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(Int64Value.class);
+ private static final TypeDescriptor<UInt32Value> PROTO_UINT32_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(UInt32Value.class);
+ private static final TypeDescriptor<UInt64Value> PROTO_UINT64_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(UInt64Value.class);
+ private static final TypeDescriptor<FloatValue> PROTO_FLOAT_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(FloatValue.class);
+ private static final TypeDescriptor<DoubleValue> PROTO_DOUBLE_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(DoubleValue.class);
+ private static final TypeDescriptor<BoolValue> PROTO_BOOL_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(BoolValue.class);
+ private static final TypeDescriptor<StringValue> PROTO_STRING_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(StringValue.class);
+ private static final TypeDescriptor<BytesValue> PROTO_BYTES_VALUE_TYPE_DESCRIPTOR =
+ TypeDescriptor.of(BytesValue.class);
+
+ private static final ForLoadedType BYTE_STRING_TYPE = new ForLoadedType(ByteString.class);
+ private static final ForLoadedType BYTE_ARRAY_TYPE = new ForLoadedType(byte[].class);
+ private static final ForLoadedType PROTO_ENUM_TYPE = new ForLoadedType(ProtocolMessageEnum.class);
+ private static final ForLoadedType INTEGER_TYPE = new ForLoadedType(Integer.class);
+
+ private static final Map<TypeDescriptor<?>, ForLoadedType> WRAPPER_LOADED_TYPES =
+ ImmutableMap.<TypeDescriptor<?>, ForLoadedType>builder()
+ .put(PROTO_INT32_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(Int32Value.class))
+ .put(PROTO_INT64_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(Int64Value.class))
+ .put(PROTO_UINT32_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(UInt32Value.class))
+ .put(PROTO_UINT64_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(UInt64Value.class))
+ .put(PROTO_FLOAT_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(FloatValue.class))
+ .put(PROTO_DOUBLE_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(DoubleValue.class))
+ .put(PROTO_BOOL_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(BoolValue.class))
+ .put(PROTO_STRING_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(StringValue.class))
+ .put(PROTO_BYTES_VALUE_TYPE_DESCRIPTOR, new ForLoadedType(BytesValue.class))
+ .build();
+
+ private static final Map<TypeName, String> PROTO_GETTER_SUFFIX =
+ ImmutableMap.of(
+ TypeName.ARRAY, "List",
+ TypeName.ITERABLE, "List",
+ TypeName.MAP, "Map");
+
+ private static final Map<TypeName, String> PROTO_SETTER_PREFIX =
+ ImmutableMap.of(
+ TypeName.ARRAY, "addAll",
+ TypeName.ITERABLE, "addAll",
+ TypeName.MAP, "putAll");
+ private static final String DEFAULT_PROTO_GETTER_PREFIX = "get";
+ private static final String DEFAULT_PROTO_SETTER_PREFIX = "set";
+
+ static String protoGetterName(String name, FieldType fieldType) {
+ final String camel = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name);
+ return DEFAULT_PROTO_GETTER_PREFIX
+ + camel
+ + PROTO_GETTER_SUFFIX.getOrDefault(fieldType.getTypeName(), "");
+ }
+
+ static String protoSetterName(String name, FieldType fieldType) {
+ final String camel = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name);
+ return protoSetterPrefix(fieldType) + camel;
+ }
+
+ static String protoSetterPrefix(FieldType fieldType) {
+ return PROTO_SETTER_PREFIX.getOrDefault(fieldType.getTypeName(), DEFAULT_PROTO_SETTER_PREFIX);
+ }
+
+ static class ProtoConvertType extends ConvertType {
+ ProtoConvertType(boolean returnRawValues) {
+ super(returnRawValues);
+ }
+
+ private static final Map<TypeDescriptor<?>, Class<?>> TYPE_OVERRIDES =
+ ImmutableMap.<TypeDescriptor<?>, Class<?>>builder()
+ .put(PROTO_TIMESTAMP_TYPE_DESCRIPTOR, Row.class)
+ .put(PROTO_DURATION_TYPE_DESCRIPTOR, Row.class)
+ .put(PROTO_INT32_VALUE_TYPE_DESCRIPTOR, Integer.class)
+ .put(PROTO_INT64_VALUE_TYPE_DESCRIPTOR, Long.class)
+ .put(PROTO_UINT32_VALUE_TYPE_DESCRIPTOR, Integer.class)
+ .put(PROTO_UINT64_VALUE_TYPE_DESCRIPTOR, Long.class)
+ .put(PROTO_FLOAT_VALUE_TYPE_DESCRIPTOR, Float.class)
+ .put(PROTO_DOUBLE_VALUE_TYPE_DESCRIPTOR, Double.class)
+ .put(PROTO_BOOL_VALUE_TYPE_DESCRIPTOR, Boolean.class)
+ .put(PROTO_STRING_VALUE_TYPE_DESCRIPTOR, String.class)
+ .put(PROTO_BYTES_VALUE_TYPE_DESCRIPTOR, byte[].class)
+ .build();
+
+ @Override
+ public Type convert(TypeDescriptor typeDescriptor) {
+ if (typeDescriptor.equals(BYTE_STRING_TYPE_DESCRIPTOR)
+ || typeDescriptor.isSubtypeOf(BYTE_STRING_TYPE_DESCRIPTOR)) {
+ return byte[].class;
+ } else if (typeDescriptor.isSubtypeOf(TypeDescriptor.of(ProtocolMessageEnum.class))) {
+ return Integer.class;
+ } else if (typeDescriptor.equals(PROTO_TIMESTAMP_TYPE_DESCRIPTOR)
+ || typeDescriptor.equals(PROTO_DURATION_TYPE_DESCRIPTOR)) {
+ return Row.class;
+ } else {
+ Type type = TYPE_OVERRIDES.get(typeDescriptor);
+ return (type != null) ? type : super.convert(typeDescriptor);
+ }
+ }
+ }
+
+ static class ProtoConvertValueForGetter extends ConvertValueForGetter {
+ ProtoConvertValueForGetter(StackManipulation readValue) {
+ super(readValue);
+ }
+
+ @Override
+ protected ProtoTypeConversionsFactory getFactory() {
+ return new ProtoTypeConversionsFactory();
+ }
+
+ @Override
+ public StackManipulation convert(TypeDescriptor type) {
+ if (type.equals(BYTE_STRING_TYPE_DESCRIPTOR)
+ || type.isSubtypeOf(BYTE_STRING_TYPE_DESCRIPTOR)) {
+ return new Compound(
+ readValue,
+ MethodInvocation.invoke(
+ BYTE_STRING_TYPE
+ .getDeclaredMethods()
+ .filter(ElementMatchers.named("toByteArray"))
+ .getOnly()));
+ } else if (type.isSubtypeOf(TypeDescriptor.of(ProtocolMessageEnum.class))) {
+ return new Compound(
+ readValue,
+ MethodInvocation.invoke(
+ PROTO_ENUM_TYPE
+ .getDeclaredMethods()
+ .filter(
+ ElementMatchers.named("getNumber").and(ElementMatchers.takesArguments(0)))
+ .getOnly()),
+ Assigner.DEFAULT.assign(
+ INTEGER_TYPE.asUnboxed().asGenericType(),
+ INTEGER_TYPE.asGenericType(),
+ Typing.STATIC));
+ } else if (type.equals(PROTO_TIMESTAMP_TYPE_DESCRIPTOR)) {
+ return new Compound(
+ readValue,
+ MethodInvocation.invoke(
+ new ForLoadedType(TimestampNanos.class)
+ .getDeclaredMethods()
+ .filter(ElementMatchers.named("toRow"))
+ .getOnly()));
+ } else if (type.equals(PROTO_DURATION_TYPE_DESCRIPTOR)) {
+ return new Compound(
+ readValue,
+ MethodInvocation.invoke(
+ new ForLoadedType(DurationNanos.class)
+ .getDeclaredMethods()
+ .filter(ElementMatchers.named("toRow"))
+ .getOnly()));
+ } else {
+ ForLoadedType wrapperType = WRAPPER_LOADED_TYPES.get(type);
+ if (wrapperType != null) {
+ MethodDescription.InDefinedShape getValueMethod =
+ wrapperType.getDeclaredMethods().filter(ElementMatchers.named("getValue")).getOnly();
+ TypeDescription.Generic returnType = getValueMethod.getReturnType();
+ StackManipulation stackManipulation =
+ new Compound(
+ readValue,
+ MethodInvocation.invoke(getValueMethod),
+ Assigner.DEFAULT.assign(
+ returnType, returnType.asErasure().asBoxed().asGenericType(), Typing.STATIC));
+ if (type.equals(PROTO_BYTES_VALUE_TYPE_DESCRIPTOR)) {
+ stackManipulation =
+ getFactory()
+ .createGetterConversions(stackManipulation)
+ .convert(BYTE_STRING_TYPE_DESCRIPTOR);
+ }
+ return stackManipulation;
+ }
+ return super.convert(type);
+ }
+ }
+ }
+
+ static class ProtoConvertValueForSetter extends ConvertValueForSetter {
+ ProtoConvertValueForSetter(StackManipulation readValue) {
+ super(readValue);
+ }
+
+ @Override
+ protected ProtoTypeConversionsFactory getFactory() {
+ return new ProtoTypeConversionsFactory();
+ }
+
+ @Override
+ public StackManipulation convert(TypeDescriptor type) {
+ if (type.isSubtypeOf(TypeDescriptor.of(ByteString.class))) {
+ return new Compound(
+ readValue,
+ TypeCasting.to(BYTE_ARRAY_TYPE),
+ MethodInvocation.invoke(
+ BYTE_STRING_TYPE
+ .getDeclaredMethods()
+ .filter(
+ ElementMatchers.named("copyFrom")
+ .and(ElementMatchers.takesArguments(BYTE_ARRAY_TYPE)))
+ .getOnly()));
+ } else if (type.isSubtypeOf(TypeDescriptor.of(ProtocolMessageEnum.class))) {
+ ForLoadedType loadedType = new ForLoadedType(type.getRawType());
+ // Convert the stored number back to the enum constant.
+ return new Compound(
+ readValue,
+ Assigner.DEFAULT.assign(
+ INTEGER_TYPE.asBoxed().asGenericType(),
+ INTEGER_TYPE.asUnboxed().asGenericType(),
+ Typing.STATIC),
+ MethodInvocation.invoke(
+ loadedType
+ .getDeclaredMethods()
+ .filter(
+ ElementMatchers.named("forNumber")
+ .and(ElementMatchers.isStatic().and(ElementMatchers.takesArguments(1))))
+ .getOnly()));
+ } else if (type.equals(PROTO_TIMESTAMP_TYPE_DESCRIPTOR)) {
+ return new Compound(
+ readValue,
+ MethodInvocation.invoke(
+ new ForLoadedType(TimestampNanos.class)
+ .getDeclaredMethods()
+ .filter(ElementMatchers.named("toTimestamp"))
+ .getOnly()));
+ } else if (type.equals(PROTO_DURATION_TYPE_DESCRIPTOR)) {
+ return new Compound(
+ readValue,
+ MethodInvocation.invoke(
+ new ForLoadedType(DurationNanos.class)
+ .getDeclaredMethods()
+ .filter(ElementMatchers.named("toDuration"))
+ .getOnly()));
+ } else {
+ ForLoadedType wrapperType = WRAPPER_LOADED_TYPES.get(type);
+ if (wrapperType != null) {
+ if (type.equals(PROTO_BYTES_VALUE_TYPE_DESCRIPTOR)) {
+ readValue =
+ getFactory()
+ .createSetterConversions(readValue)
+ .convert(TypeDescriptor.of(ByteString.class));
+ }
+ MethodDescription.InDefinedShape ofMethod =
+ wrapperType.getDeclaredMethods().filter(ElementMatchers.named("of")).getOnly();
+ TypeDescription.Generic argumentType = ofMethod.getParameters().get(0).getType();
+ return new Compound(
+ readValue,
+ Assigner.DEFAULT.assign(
+ argumentType.asErasure().asBoxed().asGenericType(), argumentType, Typing.STATIC),
+ MethodInvocation.invoke(ofMethod));
+ } else {
+ return super.convert(type);
+ }
+ }
+ }
+ }
+
+ static class ProtoTypeConversionsFactory implements TypeConversionsFactory {
+ @Override
+ public TypeConversion<Type> createTypeConversion(boolean returnRawTypes) {
+ return new ProtoConvertType(returnRawTypes);
+ }
+
+ @Override
+ public TypeConversion<StackManipulation> createGetterConversions(StackManipulation readValue) {
+ return new ProtoConvertValueForGetter(readValue);
+ }
+
+ @Override
+ public TypeConversion<StackManipulation> createSetterConversions(StackManipulation readValue) {
+ return new ProtoConvertValueForSetter(readValue);
+ }
+ }
+
+ // The list of getters for a class is cached, so we only create the classes the first time
+ // getSetters is called.
+ private static final Map<ClassWithSchema, List<FieldValueGetter>> CACHED_GETTERS =
+ Maps.newConcurrentMap();
+
+ /**
+ * Return the list of {@link FieldValueGetter}s for a Java Bean class
+ *
+ * <p>The returned list is ordered by the order of fields in the schema.
+ */
+ public static List<FieldValueGetter> getGetters(
+ Class<?> clazz,
+ Schema schema,
+ FieldValueTypeSupplier fieldValueTypeSupplier,
+ TypeConversionsFactory typeConversionsFactory) {
+ Multimap<String, Method> methods = ReflectUtils.getMethodsMap(clazz);
+ return CACHED_GETTERS.computeIfAbsent(
+ ClassWithSchema.create(clazz, schema),
+ c -> {
+ List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
+ return types.stream()
+ .map(
+ t ->
+ createGetter(
+ t,
+ typeConversionsFactory,
+ clazz,
+ methods,
+ schema.getField(t.getName()),
+ fieldValueTypeSupplier))
+ .collect(Collectors.toList());
+ });
+ }
+
+ static class OneOfFieldValueGetter<ProtoT extends MessageLite>
+ implements FieldValueGetter<ProtoT, OneOfType.Value> {
+ private final String name;
+ private final Supplier<Method> getCaseMethod;
+ private final Map<Integer, FieldValueGetter<ProtoT, ?>> getterMethodMap;
+ private final OneOfType oneOfType;
+
+ public OneOfFieldValueGetter(
+ String name,
+ Supplier<Method> getCaseMethod,
+ Map<Integer, FieldValueGetter<ProtoT, ?>> getterMethodMap,
+ OneOfType oneOfType) {
+ this.name = name;
+ this.getCaseMethod = getCaseMethod;
+ this.getterMethodMap = getterMethodMap;
+ this.oneOfType = oneOfType;
+ }
+
+ @Nullable
+ @Override
+ public Value get(ProtoT object) {
+ try {
+ EnumLite caseValue = (EnumLite) getCaseMethod.get().invoke(object);
+ if (caseValue.getNumber() == 0) {
+ return null;
+ } else {
+ Object value = getterMethodMap.get(caseValue.getNumber()).get(object);
+ return oneOfType.createValue(
+ oneOfType.getCaseEnumType().valueOf(caseValue.getNumber()), value);
+ }
+ } catch (IllegalAccessException | InvocationTargetException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public String name() {
+ return name;
+ }
+ }
+
+ private static FieldValueGetter createGetter(
+ FieldValueTypeInformation fieldValueTypeInformation,
+ TypeConversionsFactory typeConversionsFactory,
+ Class clazz,
+ Multimap<String, Method> methods,
+ Field field,
+ FieldValueTypeSupplier fieldValueTypeSupplier) {
+ if (field.getType().isLogicalType(OneOfType.IDENTIFIER)) {
+ OneOfType oneOfType = field.getType().getLogicalType(OneOfType.class);
+
+ // The case accessor method in the proto is named getOneOfNameCase.
+ Method caseMethod =
+ getProtoGetter(
+ methods,
+ field.getName() + "_case",
+ FieldType.logicalType(oneOfType.getCaseEnumType()));
+ Map<Integer, FieldValueGetter> oneOfGetters = Maps.newHashMap();
+ Map<String, FieldValueTypeInformation> oneOfFieldTypes =
+ fieldValueTypeSupplier.get(clazz, oneOfType.getOneOfSchema()).stream()
+ .collect(Collectors.toMap(FieldValueTypeInformation::getName, f -> f));
+ for (Field oneOfField : oneOfType.getOneOfSchema().getFields()) {
+ int protoFieldIndex = getFieldNumber(oneOfField.getType());
+ FieldValueGetter oneOfFieldGetter =
+ createGetter(
+ oneOfFieldTypes.get(oneOfField.getName()),
+ typeConversionsFactory,
+ clazz,
+ methods,
+ oneOfField,
+ fieldValueTypeSupplier);
+ oneOfGetters.put(protoFieldIndex, oneOfFieldGetter);
+ }
+ return new OneOfFieldValueGetter(
+ field.getName(),
+ (Supplier<Method> & Serializable) () -> caseMethod,
+ oneOfGetters,
+ oneOfType);
+ } else {
+ return JavaBeanUtils.createGetter(fieldValueTypeInformation, typeConversionsFactory);
+ }
+ }
+
+ private static Class getProtoGeneratedBuilder(Class<?> clazz) {
+ String builderClassName = clazz.getName() + "$Builder";
+ try {
+ return Class.forName(builderClassName);
+ } catch (ClassNotFoundException e) {
+ return null;
+ }
+ }
+
+ static Method getProtoSetter(Multimap<String, Method> methods, String name, FieldType fieldType) {
+ final TypeDescriptor<MessageLite.Builder> builderDescriptor =
+ TypeDescriptor.of(MessageLite.Builder.class);
+ return methods.get(protoSetterName(name, fieldType)).stream()
+ // Setter methods take only a single parameter.
+ .filter(m -> m.getParameterCount() == 1)
+ // For nested types, we don't use the version that takes a builder.
+ .filter(
+ m -> !TypeDescriptor.of(m.getGenericParameterTypes()[0]).isSubtypeOf(builderDescriptor))
+ .findAny()
+ .orElseThrow(IllegalArgumentException::new);
+ }
+
+ static Method getProtoGetter(Multimap<String, Method> methods, String name, FieldType fieldType) {
+ return methods.get(protoGetterName(name, fieldType)).stream()
+ .filter(m -> m.getParameterCount() == 0)
+ .findAny()
+ .orElseThrow(IllegalArgumentException::new);
+ }
+
+ @Nullable
+ public static SchemaUserTypeCreator getBuilderCreator(
+ Class<?> protoClass, Schema schema, FieldValueTypeSupplier fieldValueTypeSupplier) {
+ Class<?> builderClass = getProtoGeneratedBuilder(protoClass);
+ if (builderClass == null) {
+ return null;
+ }
+
+ List<FieldValueSetter> setters = Lists.newArrayListWithCapacity(schema.getFieldCount());
+ Multimap<String, Method> methods = ReflectUtils.getMethodsMap(builderClass);
+ for (Field field : schema.getFields()) {
+ if (field.getType().isLogicalType(OneOfType.IDENTIFIER)) {
+ OneOfType oneOfType = field.getType().getLogicalType(OneOfType.class);
+ Map<Integer, Method> oneOfMethods = Maps.newHashMap();
+ for (Field oneOfField : oneOfType.getOneOfSchema().getFields()) {
+ Method method = getProtoSetter(methods, oneOfField.getName(), oneOfField.getType());
+ oneOfMethods.put(getFieldNumber(oneOfField.getType()), method);
+ }
+ setters.add(
+ new ProtoOneOfSetter(
+ (Function<Integer, Method> & Serializable) oneOfMethods::get, field.getName()));
+ } else {
+ Method method = getProtoSetter(methods, field.getName(), field.getType());
+ setters.add(
+ JavaBeanUtils.createSetter(
+ FieldValueTypeInformation.forSetter(method, protoSetterPrefix(field.getType())),
+ new ProtoTypeConversionsFactory()));
+ }
+ }
+ return createBuilderCreator(protoClass, builderClass, setters, schema);
+ }
+
+ static class ProtoOneOfSetter<BuilderT extends MessageLite.Builder>
+ implements FieldValueSetter<BuilderT, OneOfType.Value> {
+ private final Function<Integer, Method> methods;
+ private final String name;
+
+ ProtoOneOfSetter(Function<Integer, Method> methods, String name) {
+ this.methods = methods;
+ this.name = name;
+ }
+
+ @Override
+ public void set(BuilderT builder, OneOfType.Value oneOfValue) {
+ Method method = methods.apply(oneOfValue.getCaseType().getValue());
+ try {
+ method.invoke(builder, oneOfValue.getValue());
+ } catch (IllegalAccessException | InvocationTargetException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public String name() {
+ return name;
+ }
+ }
+
+ static SchemaUserTypeCreator createBuilderCreator(
+ Class<?> protoClass, Class<?> builderClass, List<FieldValueSetter> setters, Schema schema) {
+ try {
+ DynamicType.Builder<Supplier> builder =
+ BYTE_BUDDY
+ .with(new InjectPackageStrategy(builderClass))
+ .subclass(Supplier.class)
+ .method(ElementMatchers.named("get"))
+ .intercept(new BuilderSupplier(protoClass));
+ Supplier supplier =
+ builder
+ .make()
+ .load(ReflectHelpers.findClassLoader(), ClassLoadingStrategy.Default.INJECTION)
+ .getLoaded()
+ .getDeclaredConstructor()
+ .newInstance();
+ return new ProtoCreatorFactory(supplier, setters);
+ } catch (InstantiationException
+ | IllegalAccessException
+ | NoSuchMethodException
+ | InvocationTargetException e) {
+ throw new RuntimeException(
+ "Unable to generate a creator for class " + builderClass + " with schema " + schema);
+ }
+ }
+
+ static class ProtoCreatorFactory implements SchemaUserTypeCreator {
+ private final Supplier<? extends MessageLite.Builder> builderCreator;
+ private final List<FieldValueSetter> setters;
+
+ public ProtoCreatorFactory(
+ Supplier<? extends MessageLite.Builder> builderCreator, List<FieldValueSetter> setters) {
+ this.builderCreator = builderCreator;
+ this.setters = setters;
+ }
+
+ @Override
+ public Object create(Object... params) {
+ MessageLite.Builder builder = builderCreator.get();
+ for (int i = 0; i < params.length; ++i) {
+ setters.get(i).set(builder, params[i]);
+ }
+ return builder.build();
+ }
+ }
+
+ static class BuilderSupplier implements Implementation {
+ private final Class<?> protoClass;
+
+ public BuilderSupplier(Class<?> protoClass) {
+ this.protoClass = protoClass;
+ }
+
+ @Override
+ public InstrumentedType prepare(InstrumentedType instrumentedType) {
+ return instrumentedType;
+ }
+
+ @Override
+ public ByteCodeAppender appender(final Target implementationTarget) {
+ ForLoadedType loadedProto = new ForLoadedType(protoClass);
+ return (methodVisitor, implementationContext, instrumentedMethod) -> {
+ // this + method parameters.
+ int numLocals = 1 + instrumentedMethod.getParameters().size();
+
+ // Create the builder object by calling ProtoClass.newBuilder().
+ StackManipulation stackManipulation =
+ new StackManipulation.Compound(
+ MethodInvocation.invoke(
+ loadedProto
+ .getDeclaredMethods()
+ .filter(
+ ElementMatchers.named("newBuilder")
+ .and(ElementMatchers.takesArguments(0)))
+ .getOnly()),
+ MethodReturn.REFERENCE);
+ StackManipulation.Size size = stackManipulation.apply(methodVisitor, implementationContext);
+ return new ByteCodeAppender.Size(size.getMaximalSize(), numLocals);
+ };
+ }
+ }
+}
diff --git a/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoMessageSchema.java b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoMessageSchema.java
new file mode 100644
index 0000000..47a928c
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoMessageSchema.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.protobuf;
+
+import static org.apache.beam.sdk.extensions.protobuf.ProtoByteBuddyUtils.getProtoGetter;
+
+import com.google.protobuf.DynamicMessage;
+import com.google.protobuf.Message;
+import java.lang.reflect.Method;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Experimental.Kind;
+import org.apache.beam.sdk.extensions.protobuf.ProtoByteBuddyUtils.ProtoTypeConversionsFactory;
+import org.apache.beam.sdk.schemas.FieldValueGetter;
+import org.apache.beam.sdk.schemas.FieldValueTypeInformation;
+import org.apache.beam.sdk.schemas.GetterBasedSchemaProvider;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.Field;
+import org.apache.beam.sdk.schemas.SchemaUserTypeCreator;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType;
+import org.apache.beam.sdk.schemas.utils.FieldValueTypeSupplier;
+import org.apache.beam.sdk.schemas.utils.JavaBeanUtils;
+import org.apache.beam.sdk.schemas.utils.ReflectUtils;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap;
+
+@Experimental(Kind.SCHEMAS)
+public class ProtoMessageSchema extends GetterBasedSchemaProvider {
+
+ private static final class ProtoClassFieldValueTypeSupplier implements FieldValueTypeSupplier {
+ @Override
+ public List<FieldValueTypeInformation> get(Class<?> clazz) {
+ throw new RuntimeException("Unexpected call.");
+ }
+
+ @Override
+ public List<FieldValueTypeInformation> get(Class<?> clazz, Schema schema) {
+ Multimap<String, Method> methods = ReflectUtils.getMethodsMap(clazz);
+ List<FieldValueTypeInformation> types =
+ Lists.newArrayListWithCapacity(schema.getFieldCount());
+ for (Field field : schema.getFields()) {
+ if (field.getType().isLogicalType(OneOfType.IDENTIFIER)) {
+ // This is a OneOf. Look for the getters for each OneOf option.
+ OneOfType oneOfType = field.getType().getLogicalType(OneOfType.class);
+ Map<String, FieldValueTypeInformation> oneOfTypes = Maps.newHashMap();
+ for (Field oneOfField : oneOfType.getOneOfSchema().getFields()) {
+ Method method = getProtoGetter(methods, oneOfField.getName(), oneOfField.getType());
+ oneOfTypes.put(
+ oneOfField.getName(),
+ FieldValueTypeInformation.forGetter(method).withName(field.getName()));
+ }
+ // Add an entry that encapsulates information about all possible getters.
+ types.add(
+ FieldValueTypeInformation.forOneOf(
+ field.getName(), field.getType().getNullable(), oneOfTypes)
+ .withName(field.getName()));
+ } else {
+ // This is a simple field. Add the getter.
+ Method method = getProtoGetter(methods, field.getName(), field.getType());
+ types.add(FieldValueTypeInformation.forGetter(method).withName(field.getName()));
+ }
+ }
+ return types;
+ }
+ }
+
+ @Nullable
+ @Override
+ public <T> Schema schemaFor(TypeDescriptor<T> typeDescriptor) {
+ checkForDynamicType(typeDescriptor);
+ return ProtoSchemaTranslator.getSchema((Class<Message>) typeDescriptor.getRawType());
+ }
+
+ @Override
+ public List<FieldValueGetter> fieldValueGetters(Class<?> targetClass, Schema schema) {
+ return ProtoByteBuddyUtils.getGetters(
+ targetClass,
+ schema,
+ new ProtoClassFieldValueTypeSupplier(),
+ new ProtoTypeConversionsFactory());
+ }
+
+ @Override
+ public List<FieldValueTypeInformation> fieldValueTypeInformations(
+ Class<?> targetClass, Schema schema) {
+ return JavaBeanUtils.getFieldTypes(targetClass, schema, new ProtoClassFieldValueTypeSupplier());
+ }
+
+ @Override
+ public SchemaUserTypeCreator schemaTypeCreator(Class<?> targetClass, Schema schema) {
+ SchemaUserTypeCreator creator =
+ ProtoByteBuddyUtils.getBuilderCreator(
+ targetClass, schema, new ProtoClassFieldValueTypeSupplier());
+ if (creator == null) {
+ throw new RuntimeException("Cannot create creator for " + targetClass);
+ }
+ return creator;
+ }
+
+ private <T> void checkForDynamicType(TypeDescriptor<T> typeDescriptor) {
+ if (typeDescriptor.getRawType().equals(DynamicMessage.class)) {
+ throw new RuntimeException(
+ "DynamicMessage is not allowed for the standard ProtoSchemaProvider, use ProtoDynamicMessageSchema instead.");
+ }
+ }
+}
diff --git a/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaLogicalTypes.java b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaLogicalTypes.java
new file mode 100644
index 0000000..0d4a5a6
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaLogicalTypes.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.protobuf;
+
+import com.google.protobuf.Duration;
+import com.google.protobuf.Timestamp;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.Schema.LogicalType;
+import org.apache.beam.sdk.schemas.logicaltypes.PassThroughLogicalType;
+import org.apache.beam.sdk.values.Row;
+
+/** A set of {@link LogicalType} classes to represent protocol buffer types. */
+public class ProtoSchemaLogicalTypes {
+ /** Base class for types representing timestamps or durations as nanoseconds. */
+ public abstract static class NanosType<T> implements LogicalType<T, Row> {
+ private final String identifier;
+
+ private static final Schema SCHEMA =
+ Schema.builder().addInt64Field("seconds").addInt32Field("nanos").build();
+
+ protected NanosType(String identifier) {
+ this.identifier = identifier;
+ }
+
+ @Override
+ public String getIdentifier() {
+ return identifier;
+ }
+
+ @Override
+ public FieldType getArgumentType() {
+ return FieldType.STRING;
+ }
+
+ @Override
+ public FieldType getBaseType() {
+ return FieldType.row(SCHEMA);
+ }
+ }
+
+ /** A timestamp represented as nanoseconds since the epoch. */
+ public static class TimestampNanos extends NanosType<Timestamp> {
+ public static final String IDENTIFIER = "ProtoTimestamp";
+
+ public TimestampNanos() {
+ super(IDENTIFIER);
+ }
+
+ @Override
+ public Row toBaseType(Timestamp input) {
+ return toRow(input);
+ }
+
+ @Override
+ public Timestamp toInputType(Row base) {
+ return toTimestamp(base);
+ }
+
+ public static Row toRow(Timestamp input) {
+ return Row.withSchema(NanosType.SCHEMA)
+ .addValues(input.getSeconds(), input.getNanos())
+ .build();
+ }
+
+ public static Timestamp toTimestamp(Row row) {
+ return Timestamp.newBuilder().setSeconds(row.getInt64(0)).setNanos(row.getInt32(1)).build();
+ }
+ }
+
+ /** A duration represented in nanoseconds. */
+ public static class DurationNanos extends NanosType<Duration> {
+ public static final String IDENTIFIER = "ProtoTimestamp";
+
+ public DurationNanos() {
+ super(IDENTIFIER);
+ }
+
+ @Override
+ public Row toBaseType(Duration input) {
+ return toRow(input);
+ }
+
+ @Override
+ public Duration toInputType(Row base) {
+ return toDuration(base);
+ }
+
+ public static Row toRow(Duration input) {
+ return Row.withSchema(NanosType.SCHEMA)
+ .addValues(input.getSeconds(), input.getNanos())
+ .build();
+ }
+
+ public static Duration toDuration(Row row) {
+ return Duration.newBuilder().setSeconds(row.getInt64(0)).setNanos(row.getInt32(1)).build();
+ }
+ }
+
+ /** A UInt32 type. */
+ public static class UInt32 extends PassThroughLogicalType<Integer> {
+ public static final String IDENTIFIER = "Uint32";
+
+ UInt32() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT32);
+ }
+ }
+
+ /** A SInt32 type. */
+ public static class SInt32 extends PassThroughLogicalType<Integer> {
+ public static final String IDENTIFIER = "Sint32";
+
+ SInt32() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT32);
+ }
+ }
+
+ /** A Fixed32 type. */
+ public static class Fixed32 extends PassThroughLogicalType<Integer> {
+ public static final String IDENTIFIER = "Fixed32";
+
+ Fixed32() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT32);
+ }
+ }
+
+ /** A SFixed32 type. */
+ public static class SFixed32 extends PassThroughLogicalType<Integer> {
+ public static final String IDENTIFIER = "SFixed32";
+
+ SFixed32() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT32);
+ }
+ }
+
+ /** A UIn64 type. */
+ public static class UInt64 extends PassThroughLogicalType<Long> {
+ public static final String IDENTIFIER = "Uint64";
+
+ UInt64() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT64);
+ }
+ }
+
+ /** A SIn64 type. */
+ public static class SInt64 extends PassThroughLogicalType<Long> {
+ public static final String IDENTIFIER = "Sint64";
+
+ SInt64() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT64);
+ }
+ }
+
+ /** A Fixed64 type. */
+ public static class Fixed64 extends PassThroughLogicalType<Long> {
+ public static final String IDENTIFIER = "Fixed64";
+
+ Fixed64() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT64);
+ }
+ }
+
+ /** An SFixed64 type. */
+ public static class SFixed64 extends PassThroughLogicalType<Long> {
+ public static final String IDENTIFIER = "SFixed64";
+
+ SFixed64() {
+ super(IDENTIFIER, FieldType.STRING, "", FieldType.INT64);
+ }
+ }
+}
diff --git a/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaTranslator.java b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaTranslator.java
new file mode 100644
index 0000000..d27f480
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaTranslator.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.protobuf;
+
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+
+import com.google.protobuf.Descriptors;
+import com.google.protobuf.Descriptors.EnumValueDescriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.Descriptors.OneofDescriptor;
+import com.google.protobuf.Message;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.DurationNanos;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.Fixed32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.Fixed64;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SFixed32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SFixed64;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SInt32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SInt64;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.TimestampNanos;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.UInt32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.UInt64;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.Field;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.EnumerationType;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets;
+
+/**
+ * This class provides utilities for inferring a Beam schema from a protocol buffer.
+ *
+ * <p>The following proto primitive types map to the following Beam types:
+ *
+ * <ul>
+ * <li>INT32 maps to FieldType.INT32
+ * <li>INT64 maps to FieldType.INT64
+ * <li>FLOAT maps to FieldType.FLOAT
+ * <li>DOUBLE maps to FieldType.DOUBLE
+ * <li>BOOL maps to FieldType.BOOLEAN
+ * <li>STRING maps to FieldType.STRING
+ * <li>BYTES maps to FieldType.BYTES
+ * </ul>
+ *
+ * <p>The following proto numeric types do not have have native Beam primitive types. LogicalType
+ * objects were created to represent these types. Normal numeric types are used as the base type of
+ * each of these logical types, so SQL queries should work as normal.
+ *
+ * <ul>
+ * <li>UINT32 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.UInt32()))
+ * <li>SINT32 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.SInt32()))
+ * <li>FIXED32 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.Fixed32()))
+ * <li>SFIXED32 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.SFixed32()))
+ * <li>UINT64 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.UInt64()))
+ * <li>SINT64 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.SInt64()))
+ * <li>FIXED64 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.Fixed64()))
+ * <li>SFIXED64 maps to FieldType.logicalType(new ProtoSchemaLogicalTypes.SFixed64()))
+ * </ul>
+ *
+ * <p>Protobuf maps are mapped to Beam FieldType.MAP types. Protobuf repeated fields are mapped to
+ * Beam FieldType.ARRAY types.
+ *
+ * <p>Beam schemas include the EnumerationType logical type to represent enumerations, and protobuf
+ * enumerations are translated to this logical type. The base representation type for this logical
+ * type is an INT32.
+ *
+ * <p>Beam schemas include the OneOfType logical type to represent unions, and protobuf oneOfs are
+ * translated to this logical type. The base representation type for this logical type is a subrow
+ * containing an optional field for each oneof option.
+ *
+ * <p>google.com.protobuf.Timestamp messages cannot be translated to FieldType.DATETIME types, as
+ * the proto type represents nanonseconds and Beam's native type does not currently support that. a
+ * new TimestampNanos logical type has been introduced to allow representing nanosecond timestamp,
+ * as well as a DurationNanos logical type to represent google.com.protobuf.Duration types.
+ *
+ * <p>Protobuf wrapper classes are translated to nullable types, as follows.
+ *
+ * <ul>
+ * <li>google.protobuf.Int32Value maps to a nullable FieldType.INT32
+ * <li>google.protobuf.Int64Value maps to a nullable FieldType.INT64
+ * <li>google.protobuf.UInt32Value maps to a nullable FieldType.logicalType(new UInt32())
+ * <li>google.protobuf.UInt64Value maps to a nullable Field.logicalType(new UInt64())
+ * <li>google.protobuf.FloatValue maps to a nullable FieldType.FLOAT
+ * <li>google.protobuf.DoubleValue maps to a nullable FieldType.DOUBLE
+ * <li>google.protobuf.BoolValue maps to a nullable FieldType.BOOLEAN
+ * <li>google.protobuf.StringValue maps to a nullable FieldType.STRING
+ * <li>google.protobuf.BytesValue maps to a nullable FieldType.BYTES
+ * </ul>
+ */
+@Experimental(Experimental.Kind.SCHEMAS)
+public class ProtoSchemaTranslator {
+ /** This METADATA tag is used to store the field number of a proto tag. */
+ public static final String PROTO_NUMBER_METADATA_TAG = "PROTO_NUMBER";
+
+ /** Attach a proto field number to a type. */
+ public static FieldType withFieldNumber(FieldType fieldType, int index) {
+ return fieldType.withMetadata(PROTO_NUMBER_METADATA_TAG, Long.toString(index));
+ }
+
+ /** Return the proto field number for a type. */
+ public static int getFieldNumber(FieldType fieldType) {
+ return Integer.parseInt(fieldType.getMetadataString(PROTO_NUMBER_METADATA_TAG));
+ }
+
+ /** Return a Beam scheam representing a proto class. */
+ public static Schema getSchema(Class<? extends Message> clazz) {
+ return getSchema(ProtobufUtil.getDescriptorForClass(clazz));
+ }
+
+ private static Schema getSchema(Descriptors.Descriptor descriptor) {
+ Set<Integer> oneOfFields = Sets.newHashSet();
+ List<Field> fields = Lists.newArrayListWithCapacity(descriptor.getFields().size());
+ for (OneofDescriptor oneofDescriptor : descriptor.getOneofs()) {
+ List<Field> subFields = Lists.newArrayListWithCapacity(oneofDescriptor.getFieldCount());
+ Map<String, Integer> enumIds = Maps.newHashMap();
+ for (FieldDescriptor fieldDescriptor : oneofDescriptor.getFields()) {
+ oneOfFields.add(fieldDescriptor.getNumber());
+ // Store proto field number in metadata.
+ FieldType fieldType =
+ withFieldNumber(
+ beamFieldTypeFromProtoField(fieldDescriptor), fieldDescriptor.getNumber());
+ subFields.add(Field.nullable(fieldDescriptor.getName(), fieldType));
+ checkArgument(
+ enumIds.putIfAbsent(fieldDescriptor.getName(), fieldDescriptor.getNumber()) == null);
+ }
+ FieldType oneOfType = FieldType.logicalType(OneOfType.create(subFields, enumIds));
+ fields.add(Field.of(oneofDescriptor.getName(), oneOfType));
+ }
+
+ for (Descriptors.FieldDescriptor fieldDescriptor : descriptor.getFields()) {
+ if (!oneOfFields.contains(fieldDescriptor.getNumber())) {
+ // Store proto field number in metadata.
+ FieldType fieldType =
+ withFieldNumber(
+ beamFieldTypeFromProtoField(fieldDescriptor), fieldDescriptor.getNumber());
+ fields.add(Field.of(fieldDescriptor.getName(), fieldType));
+ }
+ }
+ return Schema.builder().addFields(fields).build();
+ }
+
+ private static FieldType beamFieldTypeFromProtoField(
+ Descriptors.FieldDescriptor protoFieldDescriptor) {
+ FieldType fieldType = null;
+ if (protoFieldDescriptor.isMapField()) {
+ FieldDescriptor keyFieldDescriptor =
+ protoFieldDescriptor.getMessageType().findFieldByName("key");
+ FieldDescriptor valueFieldDescriptor =
+ protoFieldDescriptor.getMessageType().findFieldByName("value");
+ fieldType =
+ FieldType.map(
+ beamFieldTypeFromProtoField(keyFieldDescriptor),
+ beamFieldTypeFromProtoField(valueFieldDescriptor));
+ } else if (protoFieldDescriptor.isRepeated()) {
+ fieldType = FieldType.array(beamFieldTypeFromSingularProtoField(protoFieldDescriptor));
+ } else {
+ fieldType = beamFieldTypeFromSingularProtoField(protoFieldDescriptor);
+ }
+ return fieldType;
+ }
+
+ private static FieldType beamFieldTypeFromSingularProtoField(
+ Descriptors.FieldDescriptor protoFieldDescriptor) {
+ Descriptors.FieldDescriptor.Type fieldDescriptor = protoFieldDescriptor.getType();
+ FieldType fieldType;
+ switch (fieldDescriptor) {
+ case INT32:
+ fieldType = FieldType.INT32;
+ break;
+ case INT64:
+ fieldType = FieldType.INT64;
+ break;
+ case FLOAT:
+ fieldType = FieldType.FLOAT;
+ break;
+ case DOUBLE:
+ fieldType = FieldType.DOUBLE;
+ break;
+ case BOOL:
+ fieldType = FieldType.BOOLEAN;
+ break;
+ case STRING:
+ fieldType = FieldType.STRING;
+ break;
+ case BYTES:
+ fieldType = FieldType.BYTES;
+ break;
+ case UINT32:
+ fieldType = FieldType.logicalType(new UInt32());
+ break;
+ case SINT32:
+ fieldType = FieldType.logicalType(new SInt32());
+ break;
+ case FIXED32:
+ fieldType = FieldType.logicalType(new Fixed32());
+ break;
+ case SFIXED32:
+ fieldType = FieldType.logicalType(new SFixed32());
+ break;
+ case UINT64:
+ fieldType = FieldType.logicalType(new UInt64());
+ break;
+ case SINT64:
+ fieldType = FieldType.logicalType(new SInt64());
+ break;
+ case FIXED64:
+ fieldType = FieldType.logicalType(new Fixed64());
+ break;
+ case SFIXED64:
+ fieldType = FieldType.logicalType(new SFixed64());
+ break;
+
+ case ENUM:
+ Map<String, Integer> enumValues = Maps.newHashMap();
+ for (EnumValueDescriptor enumValue : protoFieldDescriptor.getEnumType().getValues()) {
+ if (enumValues.putIfAbsent(enumValue.getName(), enumValue.getNumber()) != null) {
+ throw new RuntimeException("Aliased enumerations not currently supported.");
+ }
+ }
+ fieldType = FieldType.logicalType(EnumerationType.create(enumValues));
+ break;
+ case MESSAGE:
+ case GROUP:
+ String fullName = protoFieldDescriptor.getMessageType().getFullName();
+ switch (fullName) {
+ case "google.protobuf.Timestamp":
+ fieldType = FieldType.logicalType(new TimestampNanos());
+ break;
+ case "google.protobuf.Int32Value":
+ case "google.protobuf.UInt32Value":
+ case "google.protobuf.Int64Value":
+ case "google.protobuf.UInt64Value":
+ case "google.protobuf.FloatValue":
+ case "google.protobuf.DoubleValue":
+ case "google.protobuf.StringValue":
+ case "google.protobuf.BoolValue":
+ case "google.protobuf.BytesValue":
+ fieldType =
+ beamFieldTypeFromSingularProtoField(
+ protoFieldDescriptor.getMessageType().findFieldByNumber(1))
+ .withNullable(true);
+ break;
+ case "google.protobuf.Duration":
+ fieldType = FieldType.logicalType(new DurationNanos());
+ break;
+ case "google.protobuf.Any":
+ throw new RuntimeException("Any not yet supported");
+ default:
+ fieldType = FieldType.row(getSchema(protoFieldDescriptor.getMessageType()));
+ }
+ break;
+ default:
+ throw new RuntimeException("Field type not matched.");
+ }
+ if (protoFieldDescriptor.isOptional()) {
+ fieldType = fieldType.withNullable(true);
+ }
+ return fieldType;
+ }
+}
diff --git a/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoMessageSchemaTest.java b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoMessageSchemaTest.java
new file mode 100644
index 0000000..8952f9a
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoMessageSchemaTest.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.protobuf;
+
+import static org.apache.beam.sdk.extensions.protobuf.ProtoSchemaTranslator.withFieldNumber;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.MAP_PRIMITIVE_PROTO;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.MAP_PRIMITIVE_ROW;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.MAP_PRIMITIVE_SCHEMA;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.NESTED_PROTO;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.NESTED_ROW;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.NESTED_SCHEMA;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_PROTO_BOOL;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_PROTO_INT32;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_PROTO_PRIMITIVE;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_PROTO_STRING;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_ROW_BOOL;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_ROW_INT32;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_ROW_PRIMITIVE;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_ROW_STRING;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.ONEOF_SCHEMA;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.OUTER_ONEOF_PROTO;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.OUTER_ONEOF_ROW;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.OUTER_ONEOF_SCHEMA;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.PRIMITIVE_PROTO;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.PRIMITIVE_ROW;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.PRIMITIVE_SCHEMA;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.REPEATED_PROTO;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.REPEATED_ROW;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.REPEATED_SCHEMA;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.WKT_MESSAGE_PROTO;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.WKT_MESSAGE_ROW;
+import static org.apache.beam.sdk.extensions.protobuf.TestProtoSchemas.WKT_MESSAGE_SCHEMA;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.EnumMessage;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.EnumMessage.Enum;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.MapPrimitive;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.Nested;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.OneOf;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.OuterOneOf;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.Primitive;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.RepeatPrimitive;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.WktMessage;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.EnumerationType;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class ProtoMessageSchemaTest {
+
+ @Test
+ public void testPrimitiveSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(Primitive.class));
+ assertEquals(PRIMITIVE_SCHEMA, schema);
+ }
+
+ @Test
+ public void testPrimitiveProtoToRow() {
+ SerializableFunction<Primitive, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(Primitive.class));
+ assertEquals(PRIMITIVE_ROW, toRow.apply(PRIMITIVE_PROTO));
+ }
+
+ @Test
+ public void testPrimitiveRowToProto() {
+ SerializableFunction<Row, Primitive> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(Primitive.class));
+ assertEquals(PRIMITIVE_PROTO, fromRow.apply(PRIMITIVE_ROW));
+ }
+
+ @Test
+ public void testRepeatedSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(RepeatPrimitive.class));
+ assertEquals(REPEATED_SCHEMA, schema);
+ }
+
+ @Test
+ public void testRepeatedProtoToRow() {
+ SerializableFunction<RepeatPrimitive, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(RepeatPrimitive.class));
+ assertEquals(REPEATED_ROW, toRow.apply(REPEATED_PROTO));
+ }
+
+ @Test
+ public void testRepeatedRowToProto() {
+ SerializableFunction<Row, RepeatPrimitive> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(RepeatPrimitive.class));
+ assertEquals(REPEATED_PROTO, fromRow.apply(REPEATED_ROW));
+ }
+
+ // Test map type
+ @Test
+ public void testMapSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(MapPrimitive.class));
+ assertEquals(MAP_PRIMITIVE_SCHEMA, schema);
+ }
+
+ @Test
+ public void testMapProtoToRow() {
+ SerializableFunction<MapPrimitive, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(MapPrimitive.class));
+ assertEquals(MAP_PRIMITIVE_ROW, toRow.apply(MAP_PRIMITIVE_PROTO));
+ }
+
+ @Test
+ public void testMapRowToProto() {
+ SerializableFunction<Row, MapPrimitive> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(MapPrimitive.class));
+ assertEquals(MAP_PRIMITIVE_PROTO, fromRow.apply(MAP_PRIMITIVE_ROW));
+ }
+
+ @Test
+ public void testNestedSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(Nested.class));
+ assertEquals(NESTED_SCHEMA, schema);
+ }
+
+ @Test
+ public void testNestedProtoToRow() {
+ SerializableFunction<Nested, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(Nested.class));
+ assertEquals(NESTED_ROW, toRow.apply(NESTED_PROTO));
+ }
+
+ @Test
+ public void testNestedRowToProto() {
+ SerializableFunction<Row, Nested> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(Nested.class));
+ assertEquals(NESTED_PROTO, fromRow.apply(NESTED_ROW));
+ }
+
+ @Test
+ public void testOneOfSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(OneOf.class));
+ assertEquals(ONEOF_SCHEMA, schema);
+ }
+
+ @Test
+ public void testOneOfProtoToRow() {
+ SerializableFunction<OneOf, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(OneOf.class));
+ assertEquals(ONEOF_ROW_INT32, toRow.apply(ONEOF_PROTO_INT32));
+ assertEquals(ONEOF_ROW_BOOL, toRow.apply(ONEOF_PROTO_BOOL));
+ assertEquals(ONEOF_ROW_STRING, toRow.apply(ONEOF_PROTO_STRING));
+ assertEquals(ONEOF_ROW_PRIMITIVE, toRow.apply(ONEOF_PROTO_PRIMITIVE));
+ }
+
+ @Test
+ public void testOneOfRowToProto() {
+ SerializableFunction<Row, OneOf> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(OneOf.class));
+ assertEquals(ONEOF_PROTO_INT32, fromRow.apply(ONEOF_ROW_INT32));
+ assertEquals(ONEOF_PROTO_BOOL, fromRow.apply(ONEOF_ROW_BOOL));
+ assertEquals(ONEOF_PROTO_STRING, fromRow.apply(ONEOF_ROW_STRING));
+ assertEquals(ONEOF_PROTO_PRIMITIVE, fromRow.apply(ONEOF_ROW_PRIMITIVE));
+ }
+
+ @Test
+ public void testOuterOneOfSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(OuterOneOf.class));
+ assertEquals(OUTER_ONEOF_SCHEMA, schema);
+ }
+
+ @Test
+ public void testOuterOneOfProtoToRow() {
+ SerializableFunction<OuterOneOf, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(OuterOneOf.class));
+ assertEquals(OUTER_ONEOF_ROW, toRow.apply(OUTER_ONEOF_PROTO));
+ }
+
+ @Test
+ public void testOuterOneOfRowToProto() {
+ SerializableFunction<Row, OuterOneOf> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(OuterOneOf.class));
+ assertEquals(OUTER_ONEOF_PROTO, fromRow.apply(OUTER_ONEOF_ROW));
+ }
+
+ private static final EnumerationType ENUM_TYPE =
+ EnumerationType.create(ImmutableMap.of("ZERO", 0, "TWO", 2, "THREE", 3));
+ private static final Schema ENUM_SCHEMA =
+ Schema.builder()
+ .addField("enum", withFieldNumber(FieldType.logicalType(ENUM_TYPE).withNullable(true), 1))
+ .build();
+ private static final Row ENUM_ROW =
+ Row.withSchema(ENUM_SCHEMA).addValues(ENUM_TYPE.valueOf("TWO")).build();
+ private static final EnumMessage ENUM_PROTO = EnumMessage.newBuilder().setEnum(Enum.TWO).build();
+
+ @Test
+ public void testEnumSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(EnumMessage.class));
+ assertEquals(ENUM_SCHEMA, schema);
+ }
+
+ @Test
+ public void testEnumProtoToRow() {
+ SerializableFunction<EnumMessage, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(EnumMessage.class));
+ assertEquals(ENUM_ROW, toRow.apply(ENUM_PROTO));
+ }
+
+ @Test
+ public void testEnumRowToProto() {
+ SerializableFunction<Row, EnumMessage> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(EnumMessage.class));
+ assertEquals(ENUM_PROTO, fromRow.apply(ENUM_ROW));
+ }
+
+ @Test
+ public void testWktMessageSchema() {
+ Schema schema = new ProtoMessageSchema().schemaFor(TypeDescriptor.of(WktMessage.class));
+ assertEquals(WKT_MESSAGE_SCHEMA, schema);
+ }
+
+ @Test
+ public void testWktProtoToRow() {
+ SerializableFunction<WktMessage, Row> toRow =
+ new ProtoMessageSchema().toRowFunction(TypeDescriptor.of(WktMessage.class));
+ assertEquals(WKT_MESSAGE_ROW, toRow.apply(WKT_MESSAGE_PROTO));
+ }
+
+ @Test
+ public void testWktRowToProto() {
+ SerializableFunction<Row, WktMessage> fromRow =
+ new ProtoMessageSchema().fromRowFunction(TypeDescriptor.of(WktMessage.class));
+ assertEquals(WKT_MESSAGE_PROTO, fromRow.apply(WKT_MESSAGE_ROW));
+ }
+}
diff --git a/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaTranslatorTest.java b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaTranslatorTest.java
new file mode 100644
index 0000000..34ceb10
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoSchemaTranslatorTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.protobuf;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Collection of standard tests for Protobuf Schema support. */
+@RunWith(JUnit4.class)
+public class ProtoSchemaTranslatorTest {
+ @Test
+ public void testPrimitiveSchema() {
+ assertEquals(
+ TestProtoSchemas.PRIMITIVE_SCHEMA,
+ ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.Primitive.class));
+ }
+
+ @Test
+ public void testRepeatedSchema() {
+ assertEquals(
+ TestProtoSchemas.REPEATED_SCHEMA,
+ ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.RepeatPrimitive.class));
+ }
+
+ @Test
+ public void testMapPrimitiveSchema() {
+ assertEquals(
+ TestProtoSchemas.MAP_PRIMITIVE_SCHEMA,
+ ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.MapPrimitive.class));
+ }
+
+ @Test
+ public void testNestedSchema() {
+ assertEquals(
+ TestProtoSchemas.NESTED_SCHEMA,
+ ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.Nested.class));
+ }
+
+ @Test
+ public void testOneOfSchema() {
+ assertEquals(
+ TestProtoSchemas.ONEOF_SCHEMA,
+ ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.OneOf.class));
+ }
+
+ @Test
+ public void testNestedOneOfSchema() {
+ assertEquals(
+ TestProtoSchemas.OUTER_ONEOF_SCHEMA,
+ ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.OuterOneOf.class));
+ }
+
+ @Test
+ public void testWrapperMessagesSchema() {
+ assertEquals(
+ TestProtoSchemas.WKT_MESSAGE_SCHEMA,
+ ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.WktMessage.class));
+ }
+}
diff --git a/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/TestProtoSchemas.java b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/TestProtoSchemas.java
new file mode 100644
index 0000000..88892d8
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/TestProtoSchemas.java
@@ -0,0 +1,397 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.protobuf;
+
+import static org.apache.beam.sdk.extensions.protobuf.ProtoSchemaTranslator.getFieldNumber;
+import static org.apache.beam.sdk.extensions.protobuf.ProtoSchemaTranslator.withFieldNumber;
+
+import com.google.protobuf.BoolValue;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.DoubleValue;
+import com.google.protobuf.Duration;
+import com.google.protobuf.FloatValue;
+import com.google.protobuf.Int32Value;
+import com.google.protobuf.Int64Value;
+import com.google.protobuf.StringValue;
+import com.google.protobuf.Timestamp;
+import com.google.protobuf.UInt32Value;
+import com.google.protobuf.UInt64Value;
+import java.time.Instant;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.MapPrimitive;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.Nested;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.OneOf;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.OuterOneOf;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.Primitive;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.RepeatPrimitive;
+import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.WktMessage;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.DurationNanos;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.Fixed32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.Fixed64;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SFixed32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SFixed64;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SInt32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.SInt64;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.TimestampNanos;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.UInt32;
+import org.apache.beam.sdk.extensions.protobuf.ProtoSchemaLogicalTypes.UInt64;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.Field;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.OneOfType;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+
+class TestProtoSchemas {
+ // The schema we expect from the Primitive proto.
+ static final Schema PRIMITIVE_SCHEMA =
+ Schema.builder()
+ .addNullableField("primitive_double", withFieldNumber(FieldType.DOUBLE, 1))
+ .addNullableField("primitive_float", withFieldNumber(FieldType.FLOAT, 2))
+ .addNullableField("primitive_int32", withFieldNumber(FieldType.INT32, 3))
+ .addNullableField("primitive_int64", withFieldNumber(FieldType.INT64, 4))
+ .addNullableField(
+ "primitive_uint32", withFieldNumber(FieldType.logicalType(new UInt32()), 5))
+ .addNullableField(
+ "primitive_uint64", withFieldNumber(FieldType.logicalType(new UInt64()), 6))
+ .addNullableField(
+ "primitive_sint32", withFieldNumber(FieldType.logicalType(new SInt32()), 7))
+ .addNullableField(
+ "primitive_sint64", withFieldNumber(FieldType.logicalType(new SInt64()), 8))
+ .addNullableField(
+ "primitive_fixed32", withFieldNumber(FieldType.logicalType(new Fixed32()), 9))
+ .addNullableField(
+ "primitive_fixed64", withFieldNumber(FieldType.logicalType(new Fixed64()), 10))
+ .addNullableField(
+ "primitive_sfixed32", withFieldNumber(FieldType.logicalType(new SFixed32()), 11))
+ .addNullableField(
+ "primitive_sfixed64", withFieldNumber(FieldType.logicalType(new SFixed64()), 12))
+ .addNullableField("primitive_bool", withFieldNumber(FieldType.BOOLEAN, 13))
+ .addNullableField("primitive_string", withFieldNumber(FieldType.STRING, 14))
+ .addNullableField("primitive_bytes", withFieldNumber(FieldType.BYTES, 15))
+ .build();
+
+ // A sample instance of the row.
+ private static final byte[] BYTE_ARRAY = new byte[] {1, 2, 3, 4};
+ static final Row PRIMITIVE_ROW =
+ Row.withSchema(PRIMITIVE_SCHEMA)
+ .addValues(
+ 1.1, 2.2F, 32, 64L, 33, 65L, 123, 124L, 30, 62L, 31, 63L, true, "horsey", BYTE_ARRAY)
+ .build();
+
+ // A sample instance of the proto.
+ static final Primitive PRIMITIVE_PROTO =
+ Primitive.newBuilder()
+ .setPrimitiveDouble(1.1)
+ .setPrimitiveFloat(2.2F)
+ .setPrimitiveInt32(32)
+ .setPrimitiveInt64(64)
+ .setPrimitiveUint32(33)
+ .setPrimitiveUint64(65)
+ .setPrimitiveSint32(123)
+ .setPrimitiveSint64(124)
+ .setPrimitiveFixed32(30)
+ .setPrimitiveFixed64(62)
+ .setPrimitiveSfixed32(31)
+ .setPrimitiveSfixed64(63)
+ .setPrimitiveBool(true)
+ .setPrimitiveString("horsey")
+ .setPrimitiveBytes(ByteString.copyFrom(BYTE_ARRAY))
+ .build();
+
+ // The schema for the RepeatedPrimitive proto.
+ static final Schema REPEATED_SCHEMA =
+ Schema.builder()
+ .addField("repeated_double", withFieldNumber(FieldType.array(FieldType.DOUBLE), 1))
+ .addField("repeated_float", withFieldNumber(FieldType.array(FieldType.FLOAT), 2))
+ .addField("repeated_int32", withFieldNumber(FieldType.array(FieldType.INT32), 3))
+ .addField("repeated_int64", withFieldNumber(FieldType.array(FieldType.INT64), 4))
+ .addField(
+ "repeated_uint32",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new UInt32())), 5))
+ .addField(
+ "repeated_uint64",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new UInt64())), 6))
+ .addField(
+ "repeated_sint32",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new SInt32())), 7))
+ .addField(
+ "repeated_sint64",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new SInt64())), 8))
+ .addField(
+ "repeated_fixed32",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new Fixed32())), 9))
+ .addField(
+ "repeated_fixed64",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new Fixed64())), 10))
+ .addField(
+ "repeated_sfixed32",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new SFixed32())), 11))
+ .addField(
+ "repeated_sfixed64",
+ withFieldNumber(FieldType.array(FieldType.logicalType(new SFixed64())), 12))
+ .addField("repeated_bool", withFieldNumber(FieldType.array(FieldType.BOOLEAN), 13))
+ .addField("repeated_string", withFieldNumber(FieldType.array(FieldType.STRING), 14))
+ .addField("repeated_bytes", withFieldNumber(FieldType.array(FieldType.BYTES), 15))
+ .build();
+
+ // A sample instance of the row.
+ static final Row REPEATED_ROW =
+ Row.withSchema(REPEATED_SCHEMA)
+ .addArray(1.1, 1.1)
+ .addArray(2.2F, 2.2F)
+ .addArray(32, 32)
+ .addArray(64L, 64L)
+ .addArray(33, 33)
+ .addArray(65L, 65L)
+ .addArray(123, 123)
+ .addArray(124L, 124L)
+ .addArray(30, 30)
+ .addArray(62L, 62L)
+ .addArray(31, 31)
+ .addArray(63L, 63L)
+ .addArray(true, true)
+ .addArray("horsey", "horsey")
+ .addArray(BYTE_ARRAY, BYTE_ARRAY)
+ .build();
+
+ // A sample instance of the proto.
+ static final RepeatPrimitive REPEATED_PROTO =
+ RepeatPrimitive.newBuilder()
+ .addAllRepeatedDouble(ImmutableList.of(1.1, 1.1))
+ .addAllRepeatedFloat(ImmutableList.of(2.2F, 2.2F))
+ .addAllRepeatedInt32(ImmutableList.of(32, 32))
+ .addAllRepeatedInt64(ImmutableList.of(64L, 64L))
+ .addAllRepeatedUint32(ImmutableList.of(33, 33))
+ .addAllRepeatedUint64(ImmutableList.of(65L, 65L))
+ .addAllRepeatedSint32(ImmutableList.of(123, 123))
+ .addAllRepeatedSint64(ImmutableList.of(124L, 124L))
+ .addAllRepeatedFixed32(ImmutableList.of(30, 30))
+ .addAllRepeatedFixed64(ImmutableList.of(62L, 62L))
+ .addAllRepeatedSfixed32(ImmutableList.of(31, 31))
+ .addAllRepeatedSfixed64(ImmutableList.of(63L, 63L))
+ .addAllRepeatedBool(ImmutableList.of(true, true))
+ .addAllRepeatedString(ImmutableList.of("horsey", "horsey"))
+ .addAllRepeatedBytes(
+ ImmutableList.of(ByteString.copyFrom(BYTE_ARRAY), ByteString.copyFrom(BYTE_ARRAY)))
+ .build();
+
+ // The schema for the MapPrimitive proto.
+ static final Schema MAP_PRIMITIVE_SCHEMA =
+ Schema.builder()
+ .addField(
+ "string_string_map",
+ withFieldNumber(
+ FieldType.map(
+ FieldType.STRING.withNullable(true), FieldType.STRING.withNullable(true)),
+ 1))
+ .addField(
+ "string_int_map",
+ withFieldNumber(
+ FieldType.map(
+ FieldType.STRING.withNullable(true), FieldType.INT32.withNullable(true)),
+ 2))
+ .addField(
+ "int_string_map",
+ withFieldNumber(
+ FieldType.map(
+ FieldType.INT32.withNullable(true), FieldType.STRING.withNullable(true)),
+ 3))
+ .addField(
+ "string_bytes_map",
+ withFieldNumber(
+ FieldType.map(
+ FieldType.STRING.withNullable(true), FieldType.BYTES.withNullable(true)),
+ 4))
+ .build();
+
+ // A sample instance of the row.
+ static final Row MAP_PRIMITIVE_ROW =
+ Row.withSchema(MAP_PRIMITIVE_SCHEMA)
+ .addValue(ImmutableMap.of("k1", "v1", "k2", "v2"))
+ .addValue(ImmutableMap.of("k1", 1, "k2", 2))
+ .addValue(ImmutableMap.of(1, "v1", 2, "v2"))
+ .addValue(ImmutableMap.of("k1", BYTE_ARRAY, "k2", BYTE_ARRAY))
+ .build();
+
+ // A sample instance of the proto.
+ static final MapPrimitive MAP_PRIMITIVE_PROTO =
+ MapPrimitive.newBuilder()
+ .putAllStringStringMap(ImmutableMap.of("k1", "v1", "k2", "v2"))
+ .putAllStringIntMap(ImmutableMap.of("k1", 1, "k2", 2))
+ .putAllIntStringMap(ImmutableMap.of(1, "v1", 2, "v2"))
+ .putAllStringBytesMap(
+ ImmutableMap.of(
+ "k1", ByteString.copyFrom(BYTE_ARRAY), "k2", ByteString.copyFrom(BYTE_ARRAY)))
+ .build();
+
+ // The schema for the Nested proto.
+ static final Schema NESTED_SCHEMA =
+ Schema.builder()
+ .addField(
+ "nested", withFieldNumber(FieldType.row(PRIMITIVE_SCHEMA).withNullable(true), 1))
+ .addField(
+ "nested_list", withFieldNumber(FieldType.array(FieldType.row(PRIMITIVE_SCHEMA)), 2))
+ .addField(
+ "nested_map",
+ withFieldNumber(
+ FieldType.map(
+ FieldType.STRING.withNullable(true),
+ FieldType.row(PRIMITIVE_SCHEMA).withNullable(true)),
+ 3))
+ .build();
+
+ // A sample instance of the row.
+ static final Row NESTED_ROW =
+ Row.withSchema(NESTED_SCHEMA)
+ .addValue(PRIMITIVE_ROW)
+ .addArray(ImmutableList.of(PRIMITIVE_ROW, PRIMITIVE_ROW))
+ .addValue(ImmutableMap.of("k1", PRIMITIVE_ROW, "k2", PRIMITIVE_ROW))
+ .build();
+
+ // A sample instance of the proto.
+ static final Nested NESTED_PROTO =
+ Nested.newBuilder()
+ .setNested(PRIMITIVE_PROTO)
+ .addAllNestedList(ImmutableList.of(PRIMITIVE_PROTO, PRIMITIVE_PROTO))
+ .putAllNestedMap(ImmutableMap.of("k1", PRIMITIVE_PROTO, "k2", PRIMITIVE_PROTO))
+ .build();
+
+ // The schema for the OneOf proto.
+ private static final List<Field> ONEOF_FIELDS =
+ ImmutableList.of(
+ Field.of("oneof_int32", withFieldNumber(FieldType.INT32, 2)),
+ Field.of("oneof_bool", withFieldNumber(FieldType.BOOLEAN, 3)),
+ Field.of("oneof_string", withFieldNumber(FieldType.STRING, 4)),
+ Field.of("oneof_primitive", withFieldNumber(FieldType.row(PRIMITIVE_SCHEMA), 5)));
+ private static final Map<String, Integer> ONE_OF_ENUM_MAP =
+ ONEOF_FIELDS.stream()
+ .collect(Collectors.toMap(Field::getName, f -> getFieldNumber(f.getType())));
+ static final OneOfType ONE_OF_TYPE = OneOfType.create(ONEOF_FIELDS, ONE_OF_ENUM_MAP);
+ static final Schema ONEOF_SCHEMA =
+ Schema.builder()
+ .addField("special_oneof", FieldType.logicalType(ONE_OF_TYPE))
+ .addField("place1", withFieldNumber(FieldType.STRING.withNullable(true), 1))
+ .addField("place2", withFieldNumber(FieldType.INT32.withNullable(true), 6))
+ .build();
+
+ // Sample row instances for each OneOf case.
+ static final Row ONEOF_ROW_INT32 =
+ Row.withSchema(ONEOF_SCHEMA)
+ .addValues(ONE_OF_TYPE.createValue("oneof_int32", 1), "foo", 0)
+ .build();
+ static final Row ONEOF_ROW_BOOL =
+ Row.withSchema(ONEOF_SCHEMA)
+ .addValues(ONE_OF_TYPE.createValue("oneof_bool", true), "foo", 0)
+ .build();
+ static final Row ONEOF_ROW_STRING =
+ Row.withSchema(ONEOF_SCHEMA)
+ .addValues(ONE_OF_TYPE.createValue("oneof_string", "foo"), "foo", 0)
+ .build();
+ static final Row ONEOF_ROW_PRIMITIVE =
+ Row.withSchema(ONEOF_SCHEMA)
+ .addValues(ONE_OF_TYPE.createValue("oneof_primitive", PRIMITIVE_ROW), "foo", 0)
+ .build();
+
+ // Sample proto instances for each oneof case.
+ static final OneOf ONEOF_PROTO_INT32 =
+ OneOf.newBuilder().setOneofInt32(1).setPlace1("foo").setPlace2(0).build();
+ static final OneOf ONEOF_PROTO_BOOL =
+ OneOf.newBuilder().setOneofBool(true).setPlace1("foo").setPlace2(0).build();
+ static final OneOf ONEOF_PROTO_STRING =
+ OneOf.newBuilder().setOneofString("foo").setPlace1("foo").setPlace2(0).build();
+ static final OneOf ONEOF_PROTO_PRIMITIVE =
+ OneOf.newBuilder().setOneofPrimitive(PRIMITIVE_PROTO).setPlace1("foo").setPlace2(0).build();
+
+ // The schema for the OuterOneOf proto.
+ private static final List<Field> OUTER_ONEOF_FIELDS =
+ ImmutableList.of(
+ Field.of("oneof_oneof", withFieldNumber(FieldType.row(ONEOF_SCHEMA), 1)),
+ Field.of("oneof_int32", withFieldNumber(FieldType.INT32, 2)));
+ private static final Map<String, Integer> OUTER_ONE_OF_ENUM_MAP =
+ OUTER_ONEOF_FIELDS.stream()
+ .collect(Collectors.toMap(Field::getName, f -> getFieldNumber(f.getType())));
+ static final OneOfType OUTER_ONEOF_TYPE =
+ OneOfType.create(OUTER_ONEOF_FIELDS, OUTER_ONE_OF_ENUM_MAP);
+ static final Schema OUTER_ONEOF_SCHEMA =
+ Schema.builder().addField("outer_oneof", FieldType.logicalType(OUTER_ONEOF_TYPE)).build();
+
+ // A sample instance of the Row.
+ static final Row OUTER_ONEOF_ROW =
+ Row.withSchema(OUTER_ONEOF_SCHEMA)
+ .addValues(OUTER_ONEOF_TYPE.createValue("oneof_oneof", ONEOF_ROW_PRIMITIVE))
+ .build();
+
+ // A sample instance of the proto.
+ static final OuterOneOf OUTER_ONEOF_PROTO =
+ OuterOneOf.newBuilder().setOneofOneof(ONEOF_PROTO_PRIMITIVE).build();
+
+ static final Schema WKT_MESSAGE_SCHEMA =
+ Schema.builder()
+ .addNullableField("double", withFieldNumber(FieldType.DOUBLE, 1))
+ .addNullableField("float", withFieldNumber(FieldType.FLOAT, 2))
+ .addNullableField("int32", withFieldNumber(FieldType.INT32, 3))
+ .addNullableField("int64", withFieldNumber(FieldType.INT64, 4))
+ .addNullableField("uint32", withFieldNumber(FieldType.logicalType(new UInt32()), 5))
+ .addNullableField("uint64", withFieldNumber(FieldType.logicalType(new UInt64()), 6))
+ .addNullableField("bool", withFieldNumber(FieldType.BOOLEAN, 13))
+ .addNullableField("string", withFieldNumber(FieldType.STRING, 14))
+ .addNullableField("bytes", withFieldNumber(FieldType.BYTES, 15))
+ .addNullableField(
+ "timestamp", withFieldNumber(FieldType.logicalType(new TimestampNanos()), 16))
+ .addNullableField(
+ "duration", withFieldNumber(FieldType.logicalType(new DurationNanos()), 17))
+ .build();
+ // A sample instance of the row.
+ static final Instant JAVA_NOW = Instant.now();
+ static final Timestamp PROTO_NOW =
+ Timestamp.newBuilder()
+ .setSeconds(JAVA_NOW.getEpochSecond())
+ .setNanos(JAVA_NOW.getNano())
+ .build();
+ static final Duration PROTO_DURATION =
+ Duration.newBuilder()
+ .setSeconds(JAVA_NOW.getEpochSecond())
+ .setNanos(JAVA_NOW.getNano())
+ .build();
+ static final Row WKT_MESSAGE_ROW =
+ Row.withSchema(WKT_MESSAGE_SCHEMA)
+ .addValues(
+ 1.1, 2.2F, 32, 64L, 33, 65L, true, "horsey", BYTE_ARRAY, PROTO_NOW, PROTO_DURATION)
+ .build();
+
+ // A sample instance of the proto.
+ static final WktMessage WKT_MESSAGE_PROTO =
+ WktMessage.newBuilder()
+ .setDouble(DoubleValue.of(1.1))
+ .setFloat(FloatValue.of(2.2F))
+ .setInt32(Int32Value.of(32))
+ .setInt64(Int64Value.of(64))
+ .setUint32(UInt32Value.of(33))
+ .setUint64(UInt64Value.of(65))
+ .setBool(BoolValue.of(true))
+ .setString(StringValue.of("horsey"))
+ .setBytes(BytesValue.of(ByteString.copyFrom(BYTE_ARRAY)))
+ .setTimestamp(PROTO_NOW)
+ .setDuration(PROTO_DURATION)
+ .build();
+}
diff --git a/sdks/java/extensions/protobuf/src/test/proto/proto3_schema_messages.proto b/sdks/java/extensions/protobuf/src/test/proto/proto3_schema_messages.proto
new file mode 100644
index 0000000..6f6ec44
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/proto/proto3_schema_messages.proto
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Protocol Buffer messages used for testing Proto3 Schema implementation.
+ */
+
+syntax = "proto3";
+
+package proto3_schema_messages;
+
+import "google/protobuf/duration.proto";
+import "google/protobuf/timestamp.proto";
+import "google/protobuf/wrappers.proto";
+import "google/protobuf/descriptor.proto";
+
+option java_package = "org.apache.beam.sdk.extensions.protobuf";
+
+message Primitive {
+ double primitive_double = 1;
+ float primitive_float = 2;
+ int32 primitive_int32 = 3;
+ int64 primitive_int64 = 4;
+ uint32 primitive_uint32 = 5;
+ uint64 primitive_uint64 = 6;
+ sint32 primitive_sint32 = 7;
+ sint64 primitive_sint64 = 8;
+ fixed32 primitive_fixed32 = 9;
+ fixed64 primitive_fixed64 = 10;
+ sfixed32 primitive_sfixed32 = 11;
+ sfixed64 primitive_sfixed64 = 12;
+ bool primitive_bool = 13;
+ string primitive_string = 14;
+ bytes primitive_bytes = 15;
+}
+
+message RepeatPrimitive {
+ repeated double repeated_double = 1;
+ repeated float repeated_float = 2;
+ repeated int32 repeated_int32 = 3;
+ repeated int64 repeated_int64 = 4;
+ repeated uint32 repeated_uint32 = 5;
+ repeated uint64 repeated_uint64 = 6;
+ repeated sint32 repeated_sint32 = 7;
+ repeated sint64 repeated_sint64 = 8;
+ repeated fixed32 repeated_fixed32 = 9;
+ repeated fixed64 repeated_fixed64 = 10;
+ repeated sfixed32 repeated_sfixed32 = 11;
+ repeated sfixed64 repeated_sfixed64 = 12;
+ repeated bool repeated_bool = 13;
+ repeated string repeated_string = 14;
+ repeated bytes repeated_bytes = 15;
+}
+
+message MapPrimitive {
+ map<string, string> string_string_map = 1;
+ map<string, int32> string_int_map = 2;
+ map<int32, string> int_string_map = 3;
+ map<string, bytes> string_bytes_map = 4;
+}
+
+message Nested {
+ Primitive nested = 1;
+ repeated Primitive nested_list = 2;
+ map<string, Primitive> nested_map = 3;
+}
+
+message OneOf {
+ string place1 = 1;
+ oneof special_oneof {
+ int32 oneof_int32 = 2;
+ bool oneof_bool = 3;
+ string oneof_string = 4;
+ Primitive oneof_primitive = 5;
+ }
+ int32 place2 = 6;
+}
+
+message OuterOneOf {
+ oneof outer_oneof {
+ OneOf oneof_oneof = 1;
+ int32 oneof_int32 = 2;
+ }
+}
+
+message EnumMessage {
+ enum Enum {
+ ZERO = 0;
+ TWO = 2;
+ THREE = 3;
+ }
+ Enum enum = 1;
+}
+
+message WktMessage {
+ google.protobuf.DoubleValue double = 1;
+ google.protobuf.FloatValue float = 2;
+ google.protobuf.Int32Value int32 = 3;
+ google.protobuf.Int64Value int64 = 4;
+ google.protobuf.UInt32Value uint32 = 5;
+ google.protobuf.UInt64Value uint64 = 6;
+ google.protobuf.BoolValue bool = 13;
+ google.protobuf.StringValue string = 14;
+ google.protobuf.BytesValue bytes = 15;
+ google.protobuf.Timestamp timestamp = 16;
+ google.protobuf.Duration duration = 17;
+}
+
diff --git a/sdks/java/extensions/protobuf/src/test/resources/README.md b/sdks/java/extensions/protobuf/src/test/resources/README.md
new file mode 100644
index 0000000..79083f5
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/resources/README.md
@@ -0,0 +1,34 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+This recreates the proto descriptor set included in this resource directory.
+
+```bash
+export PROTO_INCLUDE=<proto_include_dir>
+```
+Execute the following command to create the pb files, in the beam root folder:
+
+```bash
+protoc \
+ -Isdks/java/extensions/protobuf/src/test/resources/ \
+ -I$PROTO_INCLUDE \
+ --descriptor_set_out=sdks/java/extensions/protobuf/src/test/resources/org/apache/beam/sdk/extensions/protobuf/test_option_v1.pb \
+ --include_imports \
+ sdks/java/extensions/protobuf/src/test/resources/test/option/v1/simple.proto
+```
diff --git a/sdks/java/extensions/protobuf/src/test/resources/org/apache/beam/sdk/extensions/protobuf/test_option_v1.pb b/sdks/java/extensions/protobuf/src/test/resources/org/apache/beam/sdk/extensions/protobuf/test_option_v1.pb
new file mode 100644
index 0000000..4e97ad0
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/resources/org/apache/beam/sdk/extensions/protobuf/test_option_v1.pb
Binary files differ
diff --git a/sdks/java/extensions/protobuf/src/test/resources/test/option/v1/option.proto b/sdks/java/extensions/protobuf/src/test/resources/test/option/v1/option.proto
new file mode 100644
index 0000000..ca40119
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/resources/test/option/v1/option.proto
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+package test.option.v1;
+
+import "google/protobuf/descriptor.proto";
+
+extend google.protobuf.FileOptions {
+ double fileoption_double = 66666700;
+ float fileoption_float = 66666701;
+ int32 fileoption_int32 = 66666702;
+ int64 fileoption_int64 = 66666703;
+ uint32 fileoption_uint32 = 66666704;
+ uint64 fileoption_uint64 = 66666705;
+ sint32 fileoption_sint32 = 66666706;
+ sint64 fileoption_sint64 = 66666707;
+ fixed32 fileoption_fixed32 = 66666708;
+ fixed64 fileoption_fixed64 = 66666709;
+ sfixed32 fileoption_sfixed32 = 66666710;
+ sfixed64 fileoption_sfixed64 = 66666711;
+ bool fileoption_bool = 66666712;
+ string fileoption_string = 66666713;
+ bytes fileoption_bytes = 66666714;
+ OptionMessage fileoption_message = 66666715;
+ OptionEnum fileoption_enum = 66666716;
+}
+
+extend google.protobuf.MessageOptions {
+ double messageoption_double = 66666700;
+ float messageoption_float = 66666701;
+ int32 messageoption_int32 = 66666702;
+ int64 messageoption_int64 = 66666703;
+ uint32 messageoption_uint32 = 66666704;
+ uint64 messageoption_uint64 = 66666705;
+ sint32 messageoption_sint32 = 66666706;
+ sint64 messageoption_sint64 = 66666707;
+ fixed32 messageoption_fixed32 = 66666708;
+ fixed64 messageoption_fixed64 = 66666709;
+ sfixed32 messageoption_sfixed32 = 66666710;
+ sfixed64 messageoption_sfixed64 = 66666711;
+ bool messageoption_bool = 66666712;
+ string messageoption_string = 66666713;
+ bytes messageoption_bytes = 66666714;
+ OptionMessage messageoption_message = 66666715;
+ OptionEnum messageoption_enum = 66666716;
+
+ repeated double messageoption_repeated_double = 66666800;
+ repeated float messageoption_repeated_float = 66666801;
+ repeated int32 messageoption_repeated_int32 = 66666802;
+ repeated int64 messageoption_repeated_int64 = 66666803;
+ repeated uint32 messageoption_repeated_uint32 = 66666804;
+ repeated uint64 messageoption_repeated_uint64 = 66666805;
+ repeated sint32 messageoption_repeated_sint32 = 66666806;
+ repeated sint64 messageoption_repeated_sint64 = 66666807;
+ repeated fixed32 messageoption_repeated_fixed32 = 66666808;
+ repeated fixed64 messageoption_repeated_fixed64 = 66666809;
+ repeated sfixed32 messageoption_repeated_sfixed32 = 66666810;
+ repeated sfixed64 messageoption_repeated_sfixed64 = 66666811;
+ repeated bool messageoption_repeated_bool = 66666812;
+ repeated string messageoption_repeated_string = 66666813;
+ repeated bytes messageoption_repeated_bytes = 66666814;
+ repeated OptionMessage messageoption_repeated_message = 66666815;
+ repeated OptionEnum messageoption_repeated_enum = 66666816;
+}
+
+extend google.protobuf.FieldOptions {
+ double fieldoption_double = 66666700;
+ float fieldoption_float = 66666701;
+ int32 fieldoption_int32 = 66666702;
+ int64 fieldoption_int64 = 66666703;
+ uint32 fieldoption_uint32 = 66666704;
+ uint64 fieldoption_uint64 = 66666705;
+ sint32 fieldoption_sint32 = 66666706;
+ sint64 fieldoption_sint64 = 66666707;
+ fixed32 fieldoption_fixed32 = 66666708;
+ fixed64 fieldoption_fixed64 = 66666709;
+ sfixed32 fieldoption_sfixed32 = 66666710;
+ sfixed64 fieldoption_sfixed64 = 66666711;
+ bool fieldoption_bool = 66666712;
+ string fieldoption_string = 66666713;
+ bytes fieldoption_bytes = 66666714;
+ OptionMessage fieldoption_message = 66666715;
+ OptionEnum fieldoption_enum = 66666716;
+
+ repeated double fieldoption_repeated_double = 66666800;
+ repeated float fieldoption_repeated_float = 66666801;
+ repeated int32 fieldoption_repeated_int32 = 66666802;
+ repeated int64 fieldoption_repeated_int64 = 66666803;
+ repeated uint32 fieldoption_repeated_uint32 = 66666804;
+ repeated uint64 fieldoption_repeated_uint64 = 66666805;
+ repeated sint32 fieldoption_repeated_sint32 = 66666806;
+ repeated sint64 fieldoption_repeated_sint64 = 66666807;
+ repeated fixed32 fieldoption_repeated_fixed32 = 66666808;
+ repeated fixed64 fieldoption_repeated_fixed64 = 66666809;
+ repeated sfixed32 fieldoption_repeated_sfixed32 = 66666810;
+ repeated sfixed64 fieldoption_repeated_sfixed64 = 66666811;
+ repeated bool fieldoption_repeated_bool = 66666812;
+ repeated string fieldoption_repeated_string = 66666813;
+ repeated bytes fieldoption_repeated_bytes = 66666814;
+ repeated OptionMessage fieldoption_repeated_message = 66666815;
+ repeated OptionEnum fieldoption_repeated_enum = 66666816;
+}
+
+enum OptionEnum {
+ DEFAULT = 0;
+ ENUM1 = 1;
+ ENUM2 = 2;
+}
+
+message OptionMessage {
+ string string = 1;
+ repeated string repeated_string = 2;
+
+ int32 int32 = 3;
+ repeated int32 repeated_int32 = 4;
+
+ int64 int64 = 5;
+
+ OptionEnum test_enum = 6;
+}
\ No newline at end of file
diff --git a/sdks/java/extensions/protobuf/src/test/resources/test/option/v1/simple.proto b/sdks/java/extensions/protobuf/src/test/resources/test/option/v1/simple.proto
new file mode 100644
index 0000000..1750ddf
--- /dev/null
+++ b/sdks/java/extensions/protobuf/src/test/resources/test/option/v1/simple.proto
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+import "test/option/v1/option.proto";
+
+package test.option.v1;
+
+message MessageWithOptions {
+ string test_name = 1;
+ int32 test_index = 2;
+
+ int32 field_with_fieldoption_double = 700 [(test.option.v1.fieldoption_double) = 100.1];
+ int32 field_with_fieldoption_float = 701 [(test.option.v1.fieldoption_float) = 101.2];
+ int32 field_with_fieldoption_int32 = 702 [(test.option.v1.fieldoption_int32) = 102];
+ int32 field_with_fieldoption_int64 = 703 [(test.option.v1.fieldoption_int64) = 103];
+ int32 field_with_fieldoption_uint32 = 704 [(test.option.v1.fieldoption_uint32) = 104];
+ int32 field_with_fieldoption_uint64 = 705 [(test.option.v1.fieldoption_uint64) = 105];
+ int32 field_with_fieldoption_sint32 = 706 [(test.option.v1.fieldoption_sint32) = 106];
+ int32 field_with_fieldoption_sint64 = 707 [(test.option.v1.fieldoption_sint64) = 107];
+ int32 field_with_fieldoption_fixed32 = 708;
+ int32 field_with_fieldoption_fixed64 = 709;
+ int32 field_with_fieldoption_sfixed32 = 710;
+ int32 field_with_fieldoption_sfixed64 = 711;
+ int32 field_with_fieldoption_bool = 712 [(test.option.v1.fieldoption_bool) = true];
+ int32 field_with_fieldoption_string = 713 [(test.option.v1.fieldoption_string) = "Oh yeah"];
+ int32 field_with_fieldoption_bytes = 714;
+ int32 field_with_fieldoption_message = 715;
+ int32 field_with_fieldoption_enum = 716 [(test.option.v1.fieldoption_enum) = ENUM1];
+
+ int32 field_with_fieldoption_repeated_double = 800;
+ int32 field_with_fieldoption_repeated_float = 801;
+ int32 field_with_fieldoption_repeated_int32 = 802;
+ int32 field_with_fieldoption_repeated_int64 = 803;
+ int32 field_with_fieldoption_repeated_uint32 = 804;
+ int32 field_with_fieldoption_repeated_uint64 = 805;
+ int32 field_with_fieldoption_repeated_sint32 = 806;
+ int32 field_with_fieldoption_repeated_sint64 = 807;
+ int32 field_with_fieldoption_repeated_fixed32 = 808;
+ int32 field_with_fieldoption_repeated_fixed64 = 809;
+ int32 field_with_fieldoption_repeated_sfixed32 = 810;
+ int32 field_with_fieldoption_repeated_sfixed64 = 811;
+ int32 field_with_fieldoption_repeated_bool = 812;
+ int32 field_with_fieldoption_repeated_string = 813 [(test.option.v1.fieldoption_repeated_string) = "Oh yeah",
+ (test.option.v1.fieldoption_repeated_string) = "Oh no"];
+ int32 field_with_fieldoption_repeated_bytes = 814;
+ int32 field_with_fieldoption_repeated_message = 815;
+ int32 field_with_fieldoption_repeated_enum = 816;
+
+}
+
diff --git a/sdks/java/extensions/sketching/build.gradle b/sdks/java/extensions/sketching/build.gradle
index d923501..54cd4d2d 100644
--- a/sdks/java/extensions/sketching/build.gradle
+++ b/sdks/java/extensions/sketching/build.gradle
@@ -31,7 +31,6 @@
compile "com.tdunning:t-digest:$tdigest_version"
compile library.java.slf4j_api
testCompile library.java.avro
- testCompile library.java.commons_lang3
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
diff --git a/sdks/java/extensions/sql/build.gradle b/sdks/java/extensions/sql/build.gradle
index d7cc6b1..ee4db16 100644
--- a/sdks/java/extensions/sql/build.gradle
+++ b/sdks/java/extensions/sql/build.gradle
@@ -48,7 +48,6 @@
compile project(path: ":runners:direct-java", configuration: "shadow")
compile library.java.commons_codec
compile library.java.commons_csv
- compile library.java.commons_lang3
compile library.java.jackson_databind
compile library.java.joda_time
compile library.java.vendored_calcite_1_20_0
diff --git a/sdks/java/extensions/sql/perf-tests/build.gradle b/sdks/java/extensions/sql/perf-tests/build.gradle
new file mode 100644
index 0000000..7875a6b
--- /dev/null
+++ b/sdks/java/extensions/sql/perf-tests/build.gradle
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+plugins { id 'org.apache.beam.module' }
+applyJavaNature(automaticModuleName: 'org.apache.beam.sdk.extensions.sql.meta.provider')
+provideIntegrationTestingDependencies()
+enableJavaPerformanceTesting()
+
+description = "Apache Beam :: SDKs :: Java :: Extensions :: SQL :: IO Performance tests"
+ext.summary = "Performance tests for SQL IO sources"
+
+dependencies {
+ testCompile project(path: ":sdks:java:io:google-cloud-platform", configuration: "testRuntime")
+ testCompile project(path: ":sdks:java:extensions:sql", configuration: "testRuntime")
+}
+
+
diff --git a/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryIOPushDownIT.java b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryIOPushDownIT.java
new file mode 100644
index 0000000..05ad30e
--- /dev/null
+++ b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryIOPushDownIT.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.meta.provider.bigquery;
+
+import static org.apache.beam.sdk.extensions.sql.impl.planner.BeamRuleSets.getRuleSets;
+
+import com.google.cloud.Timestamp;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils;
+import org.apache.beam.sdk.extensions.sql.impl.rule.BeamIOPushDownRule;
+import org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore;
+import org.apache.beam.sdk.io.common.IOITHelper;
+import org.apache.beam.sdk.io.common.IOTestPipelineOptions;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.testutils.NamedTestResult;
+import org.apache.beam.sdk.testutils.metrics.IOITMetrics;
+import org.apache.beam.sdk.testutils.metrics.MetricsReader;
+import org.apache.beam.sdk.testutils.metrics.TimeMonitor;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptRule;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.tools.RuleSet;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.tools.RuleSets;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class BigQueryIOPushDownIT {
+ private static final String READ_FROM_TABLE = "bigquery-public-data:hacker_news.full";
+ private static final String NAMESPACE = BigQueryIOPushDownIT.class.getName();
+ private static final String FIELDS_READ_METRIC = "fields_read";
+ private static final String READ_TIME_METRIC = "read_time";
+ private static final String CREATE_TABLE_STATEMENT =
+ "CREATE EXTERNAL TABLE HACKER_NEWS( \n"
+ + " title VARCHAR, \n"
+ + " url VARCHAR, \n"
+ + " text VARCHAR, \n"
+ + " dead BOOLEAN, \n"
+ + " `by` VARCHAR, \n"
+ + " score INTEGER, \n"
+ + " `time` INTEGER, \n"
+ + " `timestamp` TIMESTAMP, \n"
+ + " type VARCHAR, \n"
+ + " id INTEGER, \n"
+ + " parent INTEGER, \n"
+ + " descendants INTEGER, \n"
+ + " ranking INTEGER, \n"
+ + " deleted BOOLEAN \n"
+ + ") \n"
+ + "TYPE 'bigquery' \n"
+ + "LOCATION '"
+ + READ_FROM_TABLE
+ + "' \n"
+ + "TBLPROPERTIES '{ method: \"%s\" }'";
+ private static final String SELECT_STATEMENT =
+ "SELECT `by` as author, type, title, score from HACKER_NEWS where (type='story' or type='job') and score>2";
+
+ private static SQLBigQueryPerfTestOptions options;
+ private static String metricsBigQueryDataset;
+ private static String metricsBigQueryTable;
+ private Pipeline pipeline = Pipeline.create(options);
+ private BeamSqlEnv sqlEnv;
+
+ @BeforeClass
+ public static void setUp() {
+ options = IOITHelper.readIOTestPipelineOptions(SQLBigQueryPerfTestOptions.class);
+ metricsBigQueryDataset = options.getMetricsBigQueryDataset();
+ metricsBigQueryTable = options.getMetricsBigQueryTable();
+ }
+
+ @Before
+ public void before() {
+ sqlEnv = BeamSqlEnv.inMemory(new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
+ }
+
+ @Test
+ public void readUsingDirectReadMethodPushDown() {
+ sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
+
+ BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
+ PCollection<Row> output =
+ BeamSqlRelUtils.toPCollection(pipeline, beamRelNode)
+ .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
+
+ PipelineResult result = pipeline.run();
+ result.waitUntilFinish();
+ collectAndPublishMetrics(result, "_directread_pushdown");
+ }
+
+ @Test
+ public void readUsingDirectReadMethod() {
+ List<RelOptRule> ruleList = new ArrayList<>();
+ for (RuleSet x : getRuleSets()) {
+ x.iterator().forEachRemaining(ruleList::add);
+ }
+ // Remove push-down rule
+ ruleList.remove(BeamIOPushDownRule.INSTANCE);
+
+ InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
+ inMemoryMetaStore.registerProvider(
+ new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
+ sqlEnv =
+ BeamSqlEnv.builder(inMemoryMetaStore)
+ .setPipelineOptions(PipelineOptionsFactory.create())
+ .setRuleSets(new RuleSet[] {RuleSets.ofList(ruleList)})
+ .build();
+ sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
+
+ BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
+ PCollection<Row> output =
+ BeamSqlRelUtils.toPCollection(pipeline, beamRelNode)
+ .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
+
+ PipelineResult result = pipeline.run();
+ result.waitUntilFinish();
+ collectAndPublishMetrics(result, "_directread");
+ }
+
+ @Test
+ public void readUsingDefaultMethod() {
+ sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DEFAULT.toString()));
+
+ BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
+ PCollection<Row> output =
+ BeamSqlRelUtils.toPCollection(pipeline, beamRelNode)
+ .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
+
+ PipelineResult result = pipeline.run();
+ result.waitUntilFinish();
+ collectAndPublishMetrics(result, "_default");
+ }
+
+ private void collectAndPublishMetrics(PipelineResult readResult, String postfix) {
+ String uuid = UUID.randomUUID().toString();
+ String timestamp = Timestamp.now().toString();
+
+ Set<Function<MetricsReader, NamedTestResult>> readSuppliers = getReadSuppliers(uuid, timestamp);
+ IOITMetrics readMetrics =
+ new IOITMetrics(readSuppliers, readResult, NAMESPACE, uuid, timestamp);
+ readMetrics.publish(metricsBigQueryDataset, metricsBigQueryTable + postfix);
+ }
+
+ private Set<Function<MetricsReader, NamedTestResult>> getReadSuppliers(
+ String uuid, String timestamp) {
+ Set<Function<MetricsReader, NamedTestResult>> suppliers = new HashSet<>();
+ suppliers.add(
+ reader -> {
+ long readStart = reader.getStartTimeMetric(READ_TIME_METRIC);
+ long readEnd = reader.getEndTimeMetric(READ_TIME_METRIC);
+ return NamedTestResult.create(
+ uuid, timestamp, READ_TIME_METRIC, (readEnd - readStart) / 1e3);
+ });
+ suppliers.add(
+ reader -> {
+ long fieldsRead = reader.getCounterMetric(FIELDS_READ_METRIC);
+ return NamedTestResult.create(uuid, timestamp, FIELDS_READ_METRIC, fieldsRead);
+ });
+ return suppliers;
+ }
+
+ /** Options for this io performance test. */
+ public interface SQLBigQueryPerfTestOptions extends IOTestPipelineOptions {
+ @Description("BQ dataset for the metrics data")
+ String getMetricsBigQueryDataset();
+
+ void setMetricsBigQueryDataset(String dataset);
+
+ @Description("BQ table for metrics data")
+ String getMetricsBigQueryTable();
+
+ void setMetricsBigQueryTable(String table);
+ }
+}
diff --git a/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTable.java b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTable.java
new file mode 100644
index 0000000..98a1330
--- /dev/null
+++ b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTable.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.meta.provider.bigquery;
+
+import java.util.List;
+import org.apache.beam.sdk.extensions.sql.meta.BeamSqlTableFilter;
+import org.apache.beam.sdk.extensions.sql.meta.Table;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils.ConversionOptions;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+
+public class BigQueryPerfTable extends BigQueryTable {
+ private final String namespace;
+ private final String metric;
+
+ BigQueryPerfTable(Table table, ConversionOptions options, String namespace, String metric) {
+ super(table, options);
+ this.namespace = namespace;
+ this.metric = metric;
+ }
+
+ @Override
+ public PCollection<Row> buildIOReader(PBegin begin) {
+ return super.buildIOReader(begin).apply(ParDo.of(new RowMonitor(namespace, metric)));
+ }
+
+ @Override
+ public PCollection<Row> buildIOReader(
+ PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) {
+ return super.buildIOReader(begin, filters, fieldNames)
+ .apply(ParDo.of(new RowMonitor(namespace, metric)));
+ }
+
+ /** Monitor that records the number of Fields in each Row read from an IO. */
+ private static class RowMonitor extends DoFn<Row, Row> {
+
+ private Counter totalRows;
+
+ RowMonitor(String namespace, String name) {
+ this.totalRows = Metrics.counter(namespace, name);
+ }
+
+ @ProcessElement
+ public void processElement(ProcessContext c) {
+ totalRows.inc(c.element().getFieldCount());
+ c.output(c.element());
+ }
+ }
+}
diff --git a/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTableProvider.java b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTableProvider.java
new file mode 100644
index 0000000..9488d7e
--- /dev/null
+++ b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTableProvider.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.meta.provider.bigquery;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable;
+import org.apache.beam.sdk.extensions.sql.meta.Table;
+
+/** A test table provider for BigQueryIOPushDownIT. */
+public class BigQueryPerfTableProvider extends BigQueryTableProvider {
+ private final String namespace;
+ private final String metric;
+
+ BigQueryPerfTableProvider(String namespace, String metric) {
+ this.namespace = namespace;
+ this.metric = metric;
+ }
+
+ @Override
+ public BeamSqlTable buildBeamSqlTable(Table table) {
+ return new BigQueryPerfTable(
+ table, getConversionOptions(table.getProperties()), namespace, metric);
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java
index f30f9f3..14774b9 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java
@@ -20,6 +20,7 @@
import java.util.List;
import org.apache.beam.sdk.annotations.Internal;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
+import org.apache.beam.sdk.extensions.sql.impl.rule.BeamAggregateProjectMergeRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamAggregationRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamBasicAggregationRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamCalcRule;
@@ -41,7 +42,6 @@
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptRule;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rules.AggregateJoinTransposeRule;
-import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rules.AggregateProjectMergeRule;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rules.AggregateRemoveRule;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rules.AggregateUnionAggregateRule;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rules.CalcMergeRule;
@@ -96,7 +96,7 @@
ProjectSetOpTransposeRule.INSTANCE,
// aggregation and projection rules
- AggregateProjectMergeRule.INSTANCE,
+ BeamAggregateProjectMergeRule.INSTANCE,
// push a projection past a filter or vice versa
ProjectFilterTransposeRule.INSTANCE,
FilterProjectTransposeRule.INSTANCE,
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/AbstractBeamCalcRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/AbstractBeamCalcRel.java
new file mode 100644
index 0000000..61abbb6
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/AbstractBeamCalcRel.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.rel;
+
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.extensions.sql.impl.planner.BeamCostModel;
+import org.apache.beam.sdk.extensions.sql.impl.planner.NodeStats;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptCluster;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptPlanner;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelTraitSet;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Calc;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLocalRef;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexProgram;
+
+/** BeamRelNode to replace {@code Project} and {@code Filter} node. */
+@Internal
+public abstract class AbstractBeamCalcRel extends Calc implements BeamRelNode {
+
+ public AbstractBeamCalcRel(
+ RelOptCluster cluster, RelTraitSet traits, RelNode input, RexProgram program) {
+ super(cluster, traits, input, program);
+ }
+
+ public boolean isInputSortRelAndLimitOnly() {
+ return (input instanceof BeamSortRel) && ((BeamSortRel) input).isLimitOnly();
+ }
+
+ public int getLimitCountOfSortRel() {
+ if (input instanceof BeamSortRel) {
+ return ((BeamSortRel) input).getCount();
+ }
+
+ throw new RuntimeException("Could not get the limit count from a non BeamSortRel input.");
+ }
+
+ @Override
+ public NodeStats estimateNodeStats(RelMetadataQuery mq) {
+ NodeStats inputStat = BeamSqlRelUtils.getNodeStats(input, mq);
+ double selectivity = estimateFilterSelectivity(getInput(), program, mq);
+
+ return inputStat.multiply(selectivity);
+ }
+
+ private static double estimateFilterSelectivity(
+ RelNode child, RexProgram program, RelMetadataQuery mq) {
+ // Similar to calcite, if the calc node is representing filter operation we estimate the filter
+ // selectivity based on the number of equality conditions, number of inequality conditions, ....
+ RexLocalRef programCondition = program.getCondition();
+ RexNode condition;
+ if (programCondition == null) {
+ condition = null;
+ } else {
+ condition = program.expandLocalRef(programCondition);
+ }
+ // Currently this gets the selectivity based on Calcite's Selectivity Handler (RelMdSelectivity)
+ return mq.getSelectivity(child, condition);
+ }
+
+ @Override
+ public BeamCostModel beamComputeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
+ NodeStats inputStat = BeamSqlRelUtils.getNodeStats(this.input, mq);
+ return BeamCostModel.FACTORY.makeCost(inputStat.getRowCount(), inputStat.getRate());
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
index e65d582..6c5e3cf 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
@@ -33,9 +33,7 @@
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
-import org.apache.beam.sdk.extensions.sql.impl.planner.BeamCostModel;
import org.apache.beam.sdk.extensions.sql.impl.planner.BeamJavaTypeFactory;
-import org.apache.beam.sdk.extensions.sql.impl.planner.NodeStats;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.CharType;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.DateType;
@@ -65,15 +63,12 @@
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.linq4j.tree.ParameterExpression;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.linq4j.tree.Types;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptCluster;
-import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptPlanner;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptPredicateList;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelTraitSet;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Calc;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexBuilder;
-import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLocalRef;
-import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexProgram;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexSimplify;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexUtil;
@@ -90,8 +85,8 @@
import org.joda.time.DateTimeZone;
import org.joda.time.ReadableInstant;
-/** BeamRelNode to replace a {@code Project} node. */
-public class BeamCalcRel extends Calc implements BeamRelNode {
+/** BeamRelNode to replace {@code Project} and {@code Filter} node. */
+public class BeamCalcRel extends AbstractBeamCalcRel {
private static final ParameterExpression outputSchemaParam =
Expressions.parameter(Schema.class, "outputSchema");
@@ -147,7 +142,7 @@
final RelMetadataQuery mq = RelMetadataQuery.instance();
final RelOptPredicateList predicates = mq.getPulledUpPredicates(getInput());
final RexSimplify simplify = new RexSimplify(rexBuilder, predicates, RexUtil.EXECUTOR);
- final RexProgram program = BeamCalcRel.this.program.normalize(rexBuilder, simplify);
+ final RexProgram program = getProgram().normalize(rexBuilder, simplify);
Expression condition =
RexToLixTranslator.translateCondition(
@@ -210,47 +205,6 @@
}
}
- public int getLimitCountOfSortRel() {
- if (input instanceof BeamSortRel) {
- return ((BeamSortRel) input).getCount();
- }
-
- throw new RuntimeException("Could not get the limit count from a non BeamSortRel input.");
- }
-
- @Override
- public NodeStats estimateNodeStats(RelMetadataQuery mq) {
- NodeStats inputStat = BeamSqlRelUtils.getNodeStats(this.input, mq);
- double selectivity = estimateFilterSelectivity(getInput(), program, mq);
-
- return inputStat.multiply(selectivity);
- }
-
- private static double estimateFilterSelectivity(
- RelNode child, RexProgram program, RelMetadataQuery mq) {
- // Similar to calcite, if the calc node is representing filter operation we estimate the filter
- // selectivity based on the number of equality conditions, number of inequality conditions, ....
- RexLocalRef programCondition = program.getCondition();
- RexNode condition;
- if (programCondition == null) {
- condition = null;
- } else {
- condition = program.expandLocalRef(programCondition);
- }
- // Currently this gets the selectivity based on Calcite's Selectivity Handler (RelMdSelectivity)
- return mq.getSelectivity(child, condition);
- }
-
- @Override
- public BeamCostModel beamComputeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
- NodeStats inputStat = BeamSqlRelUtils.getNodeStats(this.input, mq);
- return BeamCostModel.FACTORY.makeCost(inputStat.getRowCount(), inputStat.getRate());
- }
-
- public boolean isInputSortRelAndLimitOnly() {
- return (input instanceof BeamSortRel) && ((BeamSortRel) input).isLimitOnly();
- }
-
/** {@code CalcFn} is the executor for a {@link BeamCalcRel} step. */
private static class CalcFn extends DoFn<Row, Row> {
private final String processElementBlock;
@@ -306,7 +260,7 @@
.put(TypeName.DOUBLE, Double.class)
.build();
- private Expression castOutput(Expression value, FieldType toType) {
+ private static Expression castOutput(Expression value, FieldType toType) {
if (value.getType() == Object.class || !(value.getType() instanceof Class)) {
// fast copy path, just pass object through
return value;
@@ -334,7 +288,7 @@
return value;
}
- private Expression castOutputTime(Expression value, FieldType toType) {
+ private static Expression castOutputTime(Expression value, FieldType toType) {
Expression valueDateTime = value;
// First, convert to millis
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java
index 7e78945..4ec00cf 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java
@@ -383,14 +383,15 @@
private static boolean isLimitQuery(BeamRelNode node) {
return (node instanceof BeamSortRel && ((BeamSortRel) node).isLimitOnly())
- || (node instanceof BeamCalcRel && ((BeamCalcRel) node).isInputSortRelAndLimitOnly());
+ || (node instanceof AbstractBeamCalcRel
+ && ((AbstractBeamCalcRel) node).isInputSortRelAndLimitOnly());
}
private static int getLimitCount(BeamRelNode node) {
if (node instanceof BeamSortRel) {
return ((BeamSortRel) node).getCount();
- } else if (node instanceof BeamCalcRel) {
- return ((BeamCalcRel) node).getLimitCountOfSortRel();
+ } else if (node instanceof AbstractBeamCalcRel) {
+ return ((AbstractBeamCalcRel) node).getLimitCountOfSortRel();
}
throw new RuntimeException(
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamAggregateProjectMergeRule.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamAggregateProjectMergeRule.java
new file mode 100644
index 0000000..b146b04
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamAggregateProjectMergeRule.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.rule;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.SingleRel;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Aggregate;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Filter;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Project;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.RelFactories;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rules.AggregateProjectMergeRule;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.tools.RelBuilderFactory;
+
+/**
+ * This rule is essentially a wrapper around Calcite's {@code AggregateProjectMergeRule}. In the
+ * case when an underlying IO supports project push-down it is more efficient to not merge {@code
+ * Project} with an {@code Aggregate}, leaving it for the {@code BeamIOPUshDownRule}.
+ */
+public class BeamAggregateProjectMergeRule extends AggregateProjectMergeRule {
+ public static final AggregateProjectMergeRule INSTANCE =
+ new BeamAggregateProjectMergeRule(
+ Aggregate.class, Project.class, RelFactories.LOGICAL_BUILDER);
+
+ public BeamAggregateProjectMergeRule(
+ Class<? extends Aggregate> aggregateClass,
+ Class<? extends Project> projectClass,
+ RelBuilderFactory relBuilderFactory) {
+ super(aggregateClass, projectClass, relBuilderFactory);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final Project project = call.rel(1);
+ BeamIOSourceRel io = getUnderlyingIO(new HashSet<>(), project);
+
+ // Only perform AggregateProjectMergeRule when IO is not present or project push-down is not
+ // supported.
+ if (io == null || !io.getBeamSqlTable().supportsProjects().isSupported()) {
+ super.onMatch(call);
+ }
+ }
+
+ /**
+ * Following scenarios are possible:<br>
+ * 1) Aggregate <- Project <- IO.<br>
+ * 2) Aggregate <- Project <- Chain of Project/Filter <- IO.<br>
+ * 3) Aggregate <- Project <- Something else.<br>
+ * 4) Aggregate <- Project <- Chain of Project/Filter <- Something else.
+ *
+ * @param parent project that matched this rule.
+ * @return {@code BeamIOSourceRel} when it is present or null when some other {@code RelNode} is
+ * present.
+ */
+ private BeamIOSourceRel getUnderlyingIO(Set<RelNode> visitedNodes, SingleRel parent) {
+ // No need to look at the same node more than once.
+ if (visitedNodes.contains(parent)) {
+ return null;
+ }
+ visitedNodes.add(parent);
+ List<RelNode> nodes = ((RelSubset) parent.getInput()).getRelList();
+
+ for (RelNode node : nodes) {
+ if (node instanceof Filter || node instanceof Project) {
+ // Search node inputs for an IO.
+ BeamIOSourceRel child = getUnderlyingIO(visitedNodes, (SingleRel) node);
+ if (child != null) {
+ return child;
+ }
+ } else if (node instanceof BeamIOSourceRel) {
+ return (BeamIOSourceRel) node;
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udf/BuiltinStringFunctions.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udf/BuiltinStringFunctions.java
index 1a90bf5..b7f9318 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udf/BuiltinStringFunctions.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udf/BuiltinStringFunctions.java
@@ -21,12 +21,12 @@
import com.google.auto.service.AutoService;
import java.util.Arrays;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.ArrayUtils;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils;
import org.apache.beam.sdk.schemas.Schema.TypeName;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.linq4j.function.Strict;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.lang3.ArrayUtils;
-import org.apache.commons.lang3.StringUtils;
/** BuiltinStringFunctions. */
@AutoService(BeamBuiltinFunctionProvider.class)
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BeamSqlUnparseContext.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BeamSqlUnparseContext.java
new file mode 100644
index 0000000..60fd1bc
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BeamSqlUnparseContext.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.meta.provider.bigquery;
+
+import static org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rel2sql.SqlImplementor.POS;
+
+import java.util.function.IntFunction;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.avatica.util.ByteString;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rel2sql.SqlImplementor;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLiteral;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexProgram;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlKind;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlLiteral;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlWriter;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.type.SqlTypeFamily;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.BitString;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.commons.lang.StringEscapeUtils;
+
+public class BeamSqlUnparseContext extends SqlImplementor.SimpleContext {
+
+ public BeamSqlUnparseContext(IntFunction<SqlNode> field) {
+ super(BeamBigQuerySqlDialect.DEFAULT, field);
+ }
+
+ @Override
+ public SqlNode toSql(RexProgram program, RexNode rex) {
+ if (rex.getKind().equals(SqlKind.LITERAL)) {
+ final RexLiteral literal = (RexLiteral) rex;
+ SqlTypeFamily family = literal.getTypeName().getFamily();
+ if (SqlTypeFamily.BINARY.equals(family)) {
+ ByteString byteString = literal.getValueAs(ByteString.class);
+ BitString bitString = BitString.createFromHexString(byteString.toString(16));
+ return new SqlByteStringLiteral(bitString, POS);
+ } else if (SqlTypeFamily.CHARACTER.equals(family)) {
+ String escaped = StringEscapeUtils.escapeJava(literal.getValueAs(String.class));
+ return SqlLiteral.createCharString(escaped, POS);
+ }
+ }
+
+ return super.toSql(program, rex);
+ }
+
+ private static class SqlByteStringLiteral extends SqlLiteral {
+
+ SqlByteStringLiteral(BitString bytes, SqlParserPos pos) {
+ super(bytes, SqlTypeName.BINARY, pos);
+ }
+
+ @Override
+ public SqlByteStringLiteral clone(SqlParserPos pos) {
+ return new SqlByteStringLiteral((BitString) this.value, pos);
+ }
+
+ @Override
+ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) {
+ assert this.value instanceof BitString;
+
+ StringBuilder builder = new StringBuilder("B'");
+ for (byte b : ((BitString) this.value).getAsByteArray()) {
+ builder.append(String.format("\\x%02X", b));
+ }
+ builder.append("'");
+
+ writer.literal(builder.toString());
+ }
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTable.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTable.java
index 789d8ec..71f4235 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTable.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTable.java
@@ -188,8 +188,7 @@
// TODO: BigQuerySqlDialectWithTypeTranslation can be replaced with BigQuerySqlDialect after
// updating vendor Calcite version.
- SqlImplementor.SimpleContext context =
- new SqlImplementor.SimpleContext(BeamBigQuerySqlDialect.DEFAULT, field);
+ SqlImplementor.Context context = new BeamSqlUnparseContext(field);
// Create a single SqlNode from a list of RexNodes
SqlNode andSqlNode = null;
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTableProvider.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTableProvider.java
index 9c4266b..b1646aa 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTableProvider.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryTableProvider.java
@@ -19,6 +19,7 @@
import static org.apache.beam.vendor.calcite.v1_20_0.com.google.common.base.MoreObjects.firstNonNull;
+import com.alibaba.fastjson.JSONObject;
import com.google.auto.service.AutoService;
import org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable;
import org.apache.beam.sdk.extensions.sql.meta.Table;
@@ -52,13 +53,15 @@
@Override
public BeamSqlTable buildBeamSqlTable(Table table) {
- return new BigQueryTable(
- table,
- ConversionOptions.builder()
- .setTruncateTimestamps(
- firstNonNull(table.getProperties().getBoolean("truncateTimestamps"), false)
- ? TruncateTimestamps.TRUNCATE
- : TruncateTimestamps.REJECT)
- .build());
+ return new BigQueryTable(table, getConversionOptions(table.getProperties()));
+ }
+
+ protected static ConversionOptions getConversionOptions(JSONObject properties) {
+ return ConversionOptions.builder()
+ .setTruncateTimestamps(
+ firstNonNull(properties.getBoolean("truncateTimestamps"), false)
+ ? TruncateTimestamps.TRUNCATE
+ : TruncateTimestamps.REJECT)
+ .build();
}
}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubIOJsonTable.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubIOJsonTable.java
index 9e639e6..535b72c 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubIOJsonTable.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubIOJsonTable.java
@@ -31,13 +31,11 @@
import org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.schemas.Schema;
-import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionTuple;
import org.apache.beam.sdk.values.POutput;
import org.apache.beam.sdk.values.Row;
-import org.apache.beam.sdk.values.TupleTagList;
/**
* <i>Experimental</i>
@@ -148,8 +146,14 @@
public PCollection<Row> buildIOReader(PBegin begin) {
PCollectionTuple rowsWithDlq =
begin
- .apply("readFromPubsub", readMessagesWithAttributes())
- .apply("parseMessageToRow", createParserParDo());
+ .apply("ReadFromPubsub", readMessagesWithAttributes())
+ .apply(
+ "PubsubMessageToRow",
+ PubsubMessageToRow.builder()
+ .messageSchema(getSchema())
+ .useDlq(config.useDlq())
+ .useFlatSchema(config.getUseFlatSchema())
+ .build());
rowsWithDlq.get(MAIN_TAG).setRowSchema(getSchema());
if (config.useDlq()) {
@@ -159,17 +163,6 @@
return rowsWithDlq.get(MAIN_TAG);
}
- private ParDo.MultiOutput<PubsubMessage, Row> createParserParDo() {
- return ParDo.of(
- PubsubMessageToRow.builder()
- .messageSchema(getSchema())
- .useDlq(config.useDlq())
- .useFlatSchema(config.getUseFlatSchema())
- .build())
- .withOutputTags(
- MAIN_TAG, config.useDlq() ? TupleTagList.of(DLQ_TAG) : TupleTagList.empty());
- }
-
private PubsubIO.Read<PubsubMessage> readMessagesWithAttributes() {
PubsubIO.Read<PubsubMessage> read =
PubsubIO.readMessagesWithAttributes().fromTopic(config.getTopic());
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRow.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRow.java
index 64d4bc3..bf80fe4 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRow.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRow.java
@@ -22,6 +22,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.auto.value.AutoValue;
+import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
@@ -33,26 +34,30 @@
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.TypeName;
import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.util.RowJson.RowJsonDeserializer;
import org.apache.beam.sdk.util.RowJson.RowJsonDeserializer.UnsupportedRowJsonException;
import org.apache.beam.sdk.util.RowJsonUtils;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.TupleTagList;
import org.joda.time.Instant;
-/** A {@link DoFn} to convert {@link PubsubMessage} with JSON payload to {@link Row}. */
+/** Read side converter for {@link PubsubMessage} with JSON payload. */
@Internal
@Experimental
@AutoValue
-public abstract class PubsubMessageToRow extends DoFn<PubsubMessage, Row> {
+public abstract class PubsubMessageToRow
+ extends PTransform<PCollection<PubsubMessage>, PCollectionTuple> implements Serializable {
static final String TIMESTAMP_FIELD = "event_timestamp";
static final String ATTRIBUTES_FIELD = "attributes";
static final String PAYLOAD_FIELD = "payload";
static final TupleTag<PubsubMessage> DLQ_TAG = new TupleTag<PubsubMessage>() {};
static final TupleTag<Row> MAIN_TAG = new TupleTag<Row>() {};
- private transient volatile @Nullable ObjectMapper objectMapper;
-
/**
* Schema of the Pubsub message.
*
@@ -74,60 +79,110 @@
public abstract boolean useFlatSchema();
- private Schema payloadSchema() {
- if (!useFlatSchema()) {
- return messageSchema().getField(PAYLOAD_FIELD).getType().getRowSchema();
- } else {
- // The payload contains every field in the schema except event_timestamp
- return new Schema(
- messageSchema().getFields().stream()
- .filter(f -> !f.getName().equals(TIMESTAMP_FIELD))
- .collect(Collectors.toList()));
- }
- }
-
public static Builder builder() {
return new AutoValue_PubsubMessageToRow.Builder();
}
- @DoFn.ProcessElement
- public void processElement(ProcessContext context) {
- try {
- List<Object> values = getFieldValues(context);
- context.output(Row.withSchema(messageSchema()).addValues(values).build());
- } catch (UnsupportedRowJsonException jsonException) {
- if (useDlq()) {
- context.output(DLQ_TAG, context.element());
+ @Override
+ public PCollectionTuple expand(PCollection<PubsubMessage> input) {
+ PCollectionTuple rows =
+ input.apply(
+ ParDo.of(
+ useFlatSchema()
+ ? new FlatSchemaPubsubMessageToRoW(messageSchema(), useDlq())
+ : new NestedSchemaPubsubMessageToRow(messageSchema(), useDlq()))
+ .withOutputTags(
+ MAIN_TAG, useDlq() ? TupleTagList.of(DLQ_TAG) : TupleTagList.empty()));
+ return rows;
+ }
+
+ /**
+ * A {@link DoFn} to convert a flat schema{@link PubsubMessage} with JSON payload to {@link Row}.
+ */
+ @Internal
+ private static class FlatSchemaPubsubMessageToRoW extends DoFn<PubsubMessage, Row> {
+
+ private final Schema messageSchema;
+
+ private final boolean useDlq;
+
+ private transient volatile @Nullable ObjectMapper objectMapper;
+
+ protected FlatSchemaPubsubMessageToRoW(Schema messageSchema, boolean useDlq) {
+ this.messageSchema = messageSchema;
+ this.useDlq = useDlq;
+ }
+
+ /**
+ * Get the value for a field from a given payload in the order they're specified in the flat
+ * schema.
+ */
+ private Object getValueForFieldFlatSchema(Schema.Field field, Instant timestamp, Row payload) {
+ String fieldName = field.getName();
+ if (TIMESTAMP_FIELD.equals(fieldName)) {
+ return timestamp;
} else {
- throw new RuntimeException("Error parsing message", jsonException);
+ return payload.getValue(fieldName);
+ }
+ }
+
+ private Row parsePayload(PubsubMessage pubsubMessage) {
+ String payloadJson = new String(pubsubMessage.getPayload(), StandardCharsets.UTF_8);
+ // Construct flat payload schema.
+ Schema payloadSchema =
+ new Schema(
+ messageSchema.getFields().stream()
+ .filter(f -> !f.getName().equals(TIMESTAMP_FIELD))
+ .collect(Collectors.toList()));
+ ;
+
+ if (objectMapper == null) {
+ objectMapper = newObjectMapperWith(RowJsonDeserializer.forSchema(payloadSchema));
+ }
+
+ return RowJsonUtils.jsonToRow(objectMapper, payloadJson);
+ }
+
+ @ProcessElement
+ public void processElement(ProcessContext context) {
+ try {
+ Row payload = parsePayload(context.element());
+ List<Object> values =
+ messageSchema.getFields().stream()
+ .map(field -> getValueForFieldFlatSchema(field, context.timestamp(), payload))
+ .collect(toList());
+ context.output(Row.withSchema(messageSchema).addValues(values).build());
+ } catch (UnsupportedRowJsonException jsonException) {
+ if (useDlq) {
+ context.output(DLQ_TAG, context.element());
+ } else {
+ throw new RuntimeException("Error parsing message", jsonException);
+ }
}
}
}
/**
- * Get values for fields in the same order they're specified in schema, including timestamp,
- * payload, and attributes.
+ * A {@link DoFn} to convert a nested schema {@link PubsubMessage} with JSON payload to {@link
+ * Row}.
*/
- private List<Object> getFieldValues(ProcessContext context) {
- Row payload = parsePayloadJsonRow(context.element());
- return messageSchema().getFields().stream()
- .map(
- field ->
- getValueForField(
- field, context.timestamp(), context.element().getAttributeMap(), payload))
- .collect(toList());
- }
+ @Internal
+ private static class NestedSchemaPubsubMessageToRow extends DoFn<PubsubMessage, Row> {
- private Object getValueForField(
- Schema.Field field, Instant timestamp, Map<String, String> attributeMap, Row payload) {
- // TODO(BEAM-8801): do this check once at construction time, rather than for every element.
- if (useFlatSchema()) {
- if (field.getName().equals(TIMESTAMP_FIELD)) {
- return timestamp;
- } else {
- return payload.getValue(field.getName());
- }
- } else {
+ private final Schema messageSchema;
+
+ private final boolean useDlq;
+
+ private transient volatile @Nullable ObjectMapper objectMapper;
+
+ protected NestedSchemaPubsubMessageToRow(Schema messageSchema, boolean useDlq) {
+ this.messageSchema = messageSchema;
+ this.useDlq = useDlq;
+ }
+
+ /** Get the value for a field int the order they're specified in the nested schema. */
+ private Object getValueForFieldNestedSchema(
+ Schema.Field field, Instant timestamp, Map<String, String> attributeMap, Row payload) {
switch (field.getName()) {
case TIMESTAMP_FIELD:
return timestamp;
@@ -144,16 +199,42 @@
+ "'timestamp', 'attributes', and 'payload' fields");
}
}
- }
- private Row parsePayloadJsonRow(PubsubMessage pubsubMessage) {
- String payloadJson = new String(pubsubMessage.getPayload(), StandardCharsets.UTF_8);
+ private Row parsePayload(PubsubMessage pubsubMessage) {
+ String payloadJson = new String(pubsubMessage.getPayload(), StandardCharsets.UTF_8);
+ // Retrieve nested payload schema.
+ Schema payloadSchema = messageSchema.getField(PAYLOAD_FIELD).getType().getRowSchema();
- if (objectMapper == null) {
- objectMapper = newObjectMapperWith(RowJsonDeserializer.forSchema(payloadSchema()));
+ if (objectMapper == null) {
+ objectMapper = newObjectMapperWith(RowJsonDeserializer.forSchema(payloadSchema));
+ }
+
+ return RowJsonUtils.jsonToRow(objectMapper, payloadJson);
}
- return RowJsonUtils.jsonToRow(objectMapper, payloadJson);
+ @ProcessElement
+ public void processElement(ProcessContext context) {
+ try {
+ Row payload = parsePayload(context.element());
+ List<Object> values =
+ messageSchema.getFields().stream()
+ .map(
+ field ->
+ getValueForFieldNestedSchema(
+ field,
+ context.timestamp(),
+ context.element().getAttributeMap(),
+ payload))
+ .collect(toList());
+ context.output(Row.withSchema(messageSchema).addValues(values).build());
+ } catch (UnsupportedRowJsonException jsonException) {
+ if (useDlq) {
+ context.output(DLQ_TAG, context.element());
+ } else {
+ throw new RuntimeException("Error parsing message", jsonException);
+ }
+ }
+ }
}
@AutoValue.Builder
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamAggregateProjectMergeRuleTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamAggregateProjectMergeRuleTest.java
new file mode 100644
index 0000000..2bce48c
--- /dev/null
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamAggregateProjectMergeRuleTest.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.rule;
+
+import static org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider.PUSH_DOWN_OPTION;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+
+import com.alibaba.fastjson.JSON;
+import org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamAggregationRel;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamCalcRel;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
+import org.apache.beam.sdk.extensions.sql.meta.Table;
+import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider;
+import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider.PushDownOptions;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class BeamAggregateProjectMergeRuleTest {
+ private static final Schema BASIC_SCHEMA =
+ Schema.builder()
+ .addInt32Field("unused1")
+ .addInt32Field("id")
+ .addStringField("name")
+ .addInt32Field("unused2")
+ .build();
+ private BeamSqlEnv sqlEnv;
+
+ @Rule public TestPipeline pipeline = TestPipeline.create();
+
+ @Before
+ public void buildUp() {
+ TestTableProvider tableProvider = new TestTableProvider();
+ Table projectTable = getTable("TEST_PROJECT", PushDownOptions.PROJECT);
+ Table filterTable = getTable("TEST_FILTER", PushDownOptions.FILTER);
+ Table noneTable = getTable("TEST_NONE", PushDownOptions.NONE);
+ tableProvider.createTable(projectTable);
+ tableProvider.createTable(filterTable);
+ tableProvider.createTable(noneTable);
+ sqlEnv = BeamSqlEnv.inMemory(tableProvider);
+ }
+
+ @Test
+ public void testBeamAggregateProjectMergeRule_withProjectTable() {
+ // When an IO supports project push-down, Projects should be merged with an IO.
+ String sqlQuery = "select SUM(id) as id_sum from TEST_PROJECT group by name";
+ BeamRelNode beamRel = sqlEnv.parseQuery(sqlQuery);
+
+ BeamAggregationRel aggregate = (BeamAggregationRel) beamRel.getInput(0);
+ BeamIOSourceRel ioSourceRel = (BeamIOSourceRel) aggregate.getInput();
+
+ // Make sure project push-down took place.
+ assertThat(ioSourceRel, instanceOf(BeamPushDownIOSourceRel.class));
+ assertThat(ioSourceRel.getRowType().getFieldNames(), containsInAnyOrder("name", "id"));
+ }
+
+ @Test
+ public void testBeamAggregateProjectMergeRule_withProjectTable_withPredicate() {
+ // When an IO supports project push-down, Projects should be merged with an IO.
+ String sqlQuery = "select SUM(id) as id_sum from TEST_PROJECT where unused1=1 group by name";
+ BeamRelNode beamRel = sqlEnv.parseQuery(sqlQuery);
+
+ BeamAggregationRel aggregate = (BeamAggregationRel) beamRel.getInput(0);
+ BeamCalcRel calc = (BeamCalcRel) aggregate.getInput();
+ BeamIOSourceRel ioSourceRel = (BeamIOSourceRel) calc.getInput();
+
+ // Make sure project push-down took place.
+ assertThat(ioSourceRel, instanceOf(BeamPushDownIOSourceRel.class));
+ assertThat(
+ ioSourceRel.getRowType().getFieldNames(), containsInAnyOrder("name", "id", "unused1"));
+ }
+
+ @Test
+ public void testBeamAggregateProjectMergeRule_withFilterTable() {
+ // When an IO does not supports project push-down, Projects should be merged with an aggregate.
+ String sqlQuery = "select SUM(id) as id_sum from TEST_FILTER group by name";
+ BeamRelNode beamRel = sqlEnv.parseQuery(sqlQuery);
+
+ BeamAggregationRel aggregate = (BeamAggregationRel) beamRel.getInput(0);
+ BeamIOSourceRel ioSourceRel = (BeamIOSourceRel) aggregate.getInput();
+
+ // Make sure project merged with an aggregate.
+ assertThat(aggregate.getRowType().getFieldNames(), containsInAnyOrder("id_sum", "name"));
+
+ // IO projects al fields.
+ assertThat(ioSourceRel, instanceOf(BeamIOSourceRel.class));
+ assertThat(
+ ioSourceRel.getRowType().getFieldNames(),
+ containsInAnyOrder("unused1", "name", "id", "unused2"));
+ }
+
+ @Test
+ public void testBeamAggregateProjectMergeRule_withNoneTable() {
+ // When an IO does not supports project push-down, Projects should be merged with an aggregate.
+ String sqlQuery = "select SUM(id) as id_sum from TEST_NONE group by name";
+ BeamRelNode beamRel = sqlEnv.parseQuery(sqlQuery);
+
+ BeamAggregationRel aggregate = (BeamAggregationRel) beamRel.getInput(0);
+ BeamIOSourceRel ioSourceRel = (BeamIOSourceRel) aggregate.getInput();
+
+ // Make sure project merged with an aggregate.
+ assertThat(aggregate.getRowType().getFieldNames(), containsInAnyOrder("id_sum", "name"));
+
+ // IO projects al fields.
+ assertThat(ioSourceRel, instanceOf(BeamIOSourceRel.class));
+ assertThat(
+ ioSourceRel.getRowType().getFieldNames(),
+ containsInAnyOrder("unused1", "name", "id", "unused2"));
+ }
+
+ private static Table getTable(String name, PushDownOptions options) {
+ return Table.builder()
+ .name(name)
+ .comment(name + " table")
+ .schema(BASIC_SCHEMA)
+ .properties(
+ JSON.parseObject("{ " + PUSH_DOWN_OPTION + ": " + "\"" + options.toString() + "\" }"))
+ .type("test")
+ .build();
+ }
+}
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/udf/BeamSalUhfSpecialTypeAndValueTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/udf/BeamSalUhfSpecialTypeAndValueTest.java
index ad59c8e..1370c62 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/udf/BeamSalUhfSpecialTypeAndValueTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/udf/BeamSalUhfSpecialTypeAndValueTest.java
@@ -19,6 +19,7 @@
import static java.nio.charset.StandardCharsets.UTF_8;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.ArrayUtils;
import org.apache.beam.sdk.extensions.sql.BeamSqlDslBase;
import org.apache.beam.sdk.extensions.sql.SqlTransform;
import org.apache.beam.sdk.schemas.Schema;
@@ -26,7 +27,6 @@
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.Row;
-import org.apache.commons.lang3.ArrayUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRowTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRowTest.java
index cc6ae5f..5a5e491 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRowTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubMessageToRowTest.java
@@ -36,16 +36,15 @@
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionTuple;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TimestampedValue;
-import org.apache.beam.sdk.values.TupleTagList;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableSet;
import org.joda.time.DateTime;
import org.joda.time.Instant;
+import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
@@ -69,7 +68,7 @@
.addRowField("payload", payloadSchema)
.build();
- PCollection<Row> rows =
+ PCollectionTuple rows =
pipeline
.apply(
"create",
@@ -81,14 +80,13 @@
message(4, map("dttr", "vdl"), "{ \"name\" : null, \"id\" : null }")))
.apply(
"convert",
- ParDo.of(
- PubsubMessageToRow.builder()
- .messageSchema(messageSchema)
- .useDlq(false)
- .useFlatSchema(false)
- .build()));
+ PubsubMessageToRow.builder()
+ .messageSchema(messageSchema)
+ .useDlq(false)
+ .useFlatSchema(false)
+ .build());
- PAssert.that(rows)
+ PAssert.that(rows.get(MAIN_TAG))
.containsInAnyOrder(
Row.withSchema(messageSchema)
.addValues(ts(1), map("attr", "val"), row(payloadSchema, 3, "foo"))
@@ -131,13 +129,11 @@
message(4, map("bttr", "vbl"), "{ \"name\" : \"baz\", \"id\" : 5 }")))
.apply(
"convert",
- ParDo.of(
- PubsubMessageToRow.builder()
- .messageSchema(messageSchema)
- .useDlq(true)
- .useFlatSchema(false)
- .build())
- .withOutputTags(MAIN_TAG, TupleTagList.of(DLQ_TAG)));
+ PubsubMessageToRow.builder()
+ .messageSchema(messageSchema)
+ .useDlq(true)
+ .useFlatSchema(false)
+ .build());
PCollection<Row> rows = outputs.get(MAIN_TAG);
PCollection<PubsubMessage> dlqMessages = outputs.get(DLQ_TAG);
@@ -177,7 +173,7 @@
.addNullableField("name", FieldType.STRING)
.build();
- PCollection<Row> rows =
+ PCollectionTuple rows =
pipeline
.apply(
"create",
@@ -189,14 +185,13 @@
message(4, map("dttr", "vdl"), "{ \"name\" : null, \"id\" : null }")))
.apply(
"convert",
- ParDo.of(
- PubsubMessageToRow.builder()
- .messageSchema(messageSchema)
- .useDlq(false)
- .useFlatSchema(true)
- .build()));
+ PubsubMessageToRow.builder()
+ .messageSchema(messageSchema)
+ .useDlq(false)
+ .useFlatSchema(true)
+ .build());
- PAssert.that(rows)
+ PAssert.that(rows.get(MAIN_TAG))
.containsInAnyOrder(
Row.withSchema(messageSchema)
.addValues(ts(1), /* map("attr", "val"), */ 3, "foo")
@@ -237,17 +232,14 @@
message(4, map("bttr", "vbl"), "{ \"name\" : \"baz\", \"id\" : 5 }")))
.apply(
"convert",
- ParDo.of(
- PubsubMessageToRow.builder()
- .messageSchema(messageSchema)
- .useDlq(true)
- .useFlatSchema(true)
- .build())
- .withOutputTags(
- PubsubMessageToRow.MAIN_TAG, TupleTagList.of(PubsubMessageToRow.DLQ_TAG)));
+ PubsubMessageToRow.builder()
+ .messageSchema(messageSchema)
+ .useDlq(true)
+ .useFlatSchema(true)
+ .build());
- PCollection<Row> rows = outputs.get(PubsubMessageToRow.MAIN_TAG);
- PCollection<PubsubMessage> dlqMessages = outputs.get(PubsubMessageToRow.DLQ_TAG);
+ PCollection<Row> rows = outputs.get(MAIN_TAG);
+ PCollection<PubsubMessage> dlqMessages = outputs.get(DLQ_TAG);
PAssert.that(dlqMessages)
.satisfies(
@@ -275,6 +267,66 @@
pipeline.run();
}
+ @Test
+ public void testFlatSchemaMessageInvalidElement() {
+ Schema messageSchema =
+ Schema.builder()
+ .addDateTimeField("event_timestamp")
+ .addInt32Field("id")
+ .addStringField("name")
+ .build();
+
+ pipeline
+ .apply(
+ "create",
+ Create.timestamped(
+ message(1, map("attr", "val"), "{ \"id\" : 3, \"name\" : \"foo\" }"),
+ message(2, map("attr1", "val1"), "{ \"invalid1\" : \"sdfsd\" }")))
+ .apply(
+ "convert",
+ PubsubMessageToRow.builder()
+ .messageSchema(messageSchema)
+ .useDlq(false)
+ .useFlatSchema(true)
+ .build());
+
+ Exception exception = Assert.assertThrows(RuntimeException.class, () -> pipeline.run());
+ Assert.assertTrue(exception.getMessage().contains("Error parsing message"));
+ }
+
+ @Test
+ public void testNestedSchemaMessageInvalidElement() {
+ Schema payloadSchema =
+ Schema.builder()
+ .addNullableField("id", FieldType.INT32)
+ .addNullableField("name", FieldType.STRING)
+ .build();
+
+ Schema messageSchema =
+ Schema.builder()
+ .addDateTimeField("event_timestamp")
+ .addMapField("attributes", VARCHAR, VARCHAR)
+ .addRowField("payload", payloadSchema)
+ .build();
+
+ pipeline
+ .apply(
+ "create",
+ Create.timestamped(
+ message(1, map("attr", "val"), "{ \"id\" : 3, \"name\" : \"foo\" }"),
+ message(2, map("attr1", "val1"), "{ \"invalid1\" : \"sdfsd\" }")))
+ .apply(
+ "convert",
+ PubsubMessageToRow.builder()
+ .messageSchema(messageSchema)
+ .useDlq(false)
+ .useFlatSchema(false)
+ .build());
+
+ Exception exception = Assert.assertThrows(RuntimeException.class, () -> pipeline.run());
+ Assert.assertTrue(exception.getMessage().contains("Error parsing message"));
+ }
+
private Row row(Schema schema, Object... objects) {
return Row.withSchema(schema).addValues(objects).build();
}
diff --git a/sdks/java/extensions/sql/zetasql/build.gradle b/sdks/java/extensions/sql/zetasql/build.gradle
index 560b454..330209b 100644
--- a/sdks/java/extensions/sql/zetasql/build.gradle
+++ b/sdks/java/extensions/sql/zetasql/build.gradle
@@ -25,7 +25,7 @@
description = "Apache Beam :: SDKs :: Java :: Extensions :: SQL :: ZetaSQL"
ext.summary = "ZetaSQL to Calcite translator"
-def zetasql_version = "2019.10.1"
+def zetasql_version = "2019.12.1"
dependencies {
compile project(":sdks:java:core")
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
new file mode 100644
index 0000000..330fb2d
--- /dev/null
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.zetasql;
+
+import com.google.zetasql.AnalyzerOptions;
+import com.google.zetasql.PreparedExpression;
+import com.google.zetasql.Value;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.IntFunction;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.extensions.sql.impl.rel.AbstractBeamCalcRel;
+import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
+import org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamBigQuerySqlDialect;
+import org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamSqlUnparseContext;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.Field;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptCluster;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelTraitSet;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Calc;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rel2sql.SqlImplementor;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexProgram;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlDialect;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlIdentifier;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+
+/**
+ * TODO[BEAM-8630]: This class is currently a prototype and not used in runtime.
+ *
+ * <p>BeamRelNode to replace {@code Project} and {@code Filter} node based on the {@code ZetaSQL}
+ * expression evaluator.
+ */
+@Internal
+public class BeamZetaSqlCalcRel extends AbstractBeamCalcRel {
+
+ private static final SqlDialect DIALECT = BeamBigQuerySqlDialect.DEFAULT;
+ private final SqlImplementor.Context context;
+
+ public BeamZetaSqlCalcRel(
+ RelOptCluster cluster, RelTraitSet traits, RelNode input, RexProgram program) {
+ super(cluster, traits, input, program);
+ final IntFunction<SqlNode> fn =
+ i ->
+ new SqlIdentifier(
+ getProgram().getInputRowType().getFieldList().get(i).getName(), SqlParserPos.ZERO);
+ context = new BeamSqlUnparseContext(fn);
+ }
+
+ @Override
+ public Calc copy(RelTraitSet traitSet, RelNode input, RexProgram program) {
+ return new BeamZetaSqlCalcRel(getCluster(), traitSet, input, program);
+ }
+
+ @Override
+ public PTransform<PCollectionList<Row>, PCollection<Row>> buildPTransform() {
+ return new Transform();
+ }
+
+ private class Transform extends PTransform<PCollectionList<Row>, PCollection<Row>> {
+ @Override
+ public PCollection<Row> expand(PCollectionList<Row> pinput) {
+ Preconditions.checkArgument(
+ pinput.size() == 1,
+ "%s expected a single input PCollection, but received %d.",
+ BeamZetaSqlCalcRel.class.getSimpleName(),
+ pinput.size());
+ PCollection<Row> upstream = pinput.get(0);
+
+ final List<String> projects =
+ getProgram().getProjectList().stream()
+ .map(BeamZetaSqlCalcRel.this::unparseRexNode)
+ .collect(Collectors.toList());
+ final RexNode condition = getProgram().getCondition();
+
+ // TODO[BEAM-8630]: validate sql expressions at pipeline construction time
+ Schema outputSchema = CalciteUtils.toSchema(getRowType());
+ CalcFn calcFn =
+ new CalcFn(
+ projects,
+ condition == null ? null : unparseRexNode(condition),
+ upstream.getSchema(),
+ outputSchema);
+ return upstream.apply(ParDo.of(calcFn)).setRowSchema(outputSchema);
+ }
+ }
+
+ private String unparseRexNode(RexNode rex) {
+ return context.toSql(getProgram(), rex).toSqlString(DIALECT).getSql();
+ }
+
+ /**
+ * {@code CalcFn} is the executor for a {@link BeamZetaSqlCalcRel} step. The implementation is
+ * based on the {@code ZetaSQL} expression evaluator.
+ */
+ private static class CalcFn extends DoFn<Row, Row> {
+ private final List<String> projects;
+ @Nullable private final String condition;
+ private final Schema inputSchema;
+ private final Schema outputSchema;
+ private transient List<PreparedExpression> projectExps;
+ @Nullable private transient PreparedExpression conditionExp;
+
+ CalcFn(
+ List<String> projects,
+ @Nullable String condition,
+ Schema inputSchema,
+ Schema outputSchema) {
+ Preconditions.checkArgument(projects.size() == outputSchema.getFieldCount());
+ this.projects = ImmutableList.copyOf(projects);
+ this.condition = condition;
+ this.inputSchema = inputSchema;
+ this.outputSchema = outputSchema;
+ }
+
+ @Setup
+ public void setup() {
+ AnalyzerOptions options = SqlAnalyzer.initAnalyzerOptions();
+ for (Field field : inputSchema.getFields()) {
+ options.addExpressionColumn(
+ sanitize(field.getName()), ZetaSqlUtils.beamFieldTypeToZetaSqlType(field.getType()));
+ }
+
+ // TODO[BEAM-8630]: use a single PreparedExpression for all condition and projects
+ projectExps = new ArrayList<>();
+ for (String project : projects) {
+ PreparedExpression projectExp = new PreparedExpression(sanitize(project));
+ projectExp.prepare(options);
+ projectExps.add(projectExp);
+ }
+ if (condition != null) {
+ conditionExp = new PreparedExpression(sanitize(condition));
+ conditionExp.prepare(options);
+ }
+ }
+
+ @ProcessElement
+ public void processElement(ProcessContext c) {
+ Map<String, Value> columns = new HashMap<>();
+ Row row = c.element();
+ for (Field field : inputSchema.getFields()) {
+ columns.put(
+ sanitize(field.getName()),
+ ZetaSqlUtils.javaObjectToZetaSqlValue(row.getValue(field.getName()), field.getType()));
+ }
+
+ // TODO[BEAM-8630]: support parameters in expression evaluation
+ // The map is empty because parameters in the query string have already been substituted.
+ Map<String, Value> params = Collections.emptyMap();
+
+ if (conditionExp != null && !conditionExp.execute(columns, params).getBoolValue()) {
+ return;
+ }
+
+ Row.Builder output = Row.withSchema(outputSchema);
+ for (int i = 0; i < outputSchema.getFieldCount(); i++) {
+ // TODO[BEAM-8630]: performance optimization by bundling the gRPC calls
+ Value v = projectExps.get(i).execute(columns, params);
+ output.addValue(
+ ZetaSqlUtils.zetaSqlValueToJavaObject(v, outputSchema.getField(i).getType()));
+ }
+ c.output(output.build());
+ }
+
+ @Teardown
+ public void teardown() {
+ for (PreparedExpression projectExp : projectExps) {
+ projectExp.close();
+ }
+ if (conditionExp != null) {
+ conditionExp.close();
+ }
+ }
+
+ // Replaces "$" with "_" because "$" is not allowed in a valid ZetaSQL identifier
+ // (ZetaSQL identifier syntax: [A-Za-z_][A-Za-z_0-9]*)
+ // TODO[BEAM-8630]: check if this is sufficient and correct, or even better fix this in Calcite
+ private static String sanitize(String identifier) {
+ return identifier.replaceAll("\\$", "_");
+ }
+ }
+}
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRule.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRule.java
new file mode 100644
index 0000000..2e7ea0f
--- /dev/null
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRule.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.zetasql;
+
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamLogicalConvention;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.Convention;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptRule;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.convert.ConverterRule;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Calc;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.logical.LogicalCalc;
+
+/** A {@code ConverterRule} to replace {@link Calc} with {@link BeamZetaSqlCalcRel}. */
+public class BeamZetaSqlCalcRule extends ConverterRule {
+ public static final BeamZetaSqlCalcRule INSTANCE = new BeamZetaSqlCalcRule();
+
+ private BeamZetaSqlCalcRule() {
+ super(
+ LogicalCalc.class, Convention.NONE, BeamLogicalConvention.INSTANCE, "BeamZetaSqlCalcRule");
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall x) {
+ return true;
+ }
+
+ @Override
+ public RelNode convert(RelNode rel) {
+ final Calc calc = (Calc) rel;
+ final RelNode input = calc.getInput();
+
+ return new BeamZetaSqlCalcRel(
+ calc.getCluster(),
+ calc.getTraitSet().replace(BeamLogicalConvention.INSTANCE),
+ RelOptRule.convert(input, input.getTraitSet().replace(BeamLogicalConvention.INSTANCE)),
+ calc.getProgram());
+ }
+}
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLQueryPlanner.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLQueryPlanner.java
index 6ec56ae..1fc8ded 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLQueryPlanner.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLQueryPlanner.java
@@ -26,12 +26,14 @@
import org.apache.beam.sdk.extensions.sql.impl.QueryPlanner;
import org.apache.beam.sdk.extensions.sql.impl.SqlConversionException;
import org.apache.beam.sdk.extensions.sql.impl.planner.BeamCostModel;
+import org.apache.beam.sdk.extensions.sql.impl.planner.BeamRuleSets;
import org.apache.beam.sdk.extensions.sql.impl.planner.RelMdNodeStats;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamLogicalConvention;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.jdbc.CalciteSchema;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.ConventionTraitDef;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptRule;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelTraitDef;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelTraitSet;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.prepare.CalciteCatalogReader;
@@ -39,6 +41,7 @@
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.rules.JoinCommuteRule;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.schema.SchemaPlus;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlNode;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlOperatorTable;
@@ -50,6 +53,7 @@
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.tools.Frameworks;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.tools.RelConversionException;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.tools.RuleSet;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.tools.RuleSets;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
/** ZetaSQLQueryPlanner. */
@@ -61,7 +65,36 @@
}
public ZetaSQLQueryPlanner(JdbcConnection jdbcConnection, RuleSet[] ruleSets) {
- plannerImpl = new ZetaSQLPlannerImpl(defaultConfig(jdbcConnection, ruleSets));
+ plannerImpl =
+ new ZetaSQLPlannerImpl(defaultConfig(jdbcConnection, modifyRuleSetsForZetaSql(ruleSets)));
+ }
+
+ public static RuleSet[] getZetaSqlRuleSets() {
+ return modifyRuleSetsForZetaSql(BeamRuleSets.getRuleSets());
+ }
+
+ private static RuleSet[] modifyRuleSetsForZetaSql(RuleSet[] ruleSets) {
+ RuleSet[] ret = new RuleSet[ruleSets.length];
+ for (int i = 0; i < ruleSets.length; i++) {
+ ImmutableList.Builder<RelOptRule> bd = ImmutableList.builder();
+ for (RelOptRule rule : ruleSets[i]) {
+ // TODO[BEAM-9075]: Fix join re-ordering for ZetaSQL planner. Currently join re-ordering
+ // requires the JoinCommuteRule, which doesn't work without struct flattening.
+ if (rule instanceof JoinCommuteRule) {
+ continue;
+ }
+ // TODO[BEAM-8630]: uncomment the next block once we have fully migrated to
+ // BeamZetaSqlCalcRel
+ // else if (rule instanceof BeamCalcRule) {
+ // bd.add(BeamZetaSqlCalcRule.INSTANCE);
+ // }
+ else {
+ bd.add(rule);
+ }
+ }
+ ret[i] = RuleSets.ofList(bd.build());
+ }
+ return ret;
}
@Override
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtils.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtils.java
new file mode 100644
index 0000000..d771857
--- /dev/null
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtils.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.zetasql;
+
+import com.google.protobuf.ByteString;
+import com.google.zetasql.ArrayType;
+import com.google.zetasql.StructType;
+import com.google.zetasql.StructType.StructField;
+import com.google.zetasql.Type;
+import com.google.zetasql.TypeFactory;
+import com.google.zetasql.Value;
+import com.google.zetasql.ZetaSQLType.TypeKind;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.Field;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.math.LongMath;
+import org.joda.time.Instant;
+
+/** Utility methods for ZetaSQL related operations. */
+@Internal
+public final class ZetaSqlUtils {
+
+ private static final long MICROS_PER_MILLI = 1000L;
+
+ private ZetaSqlUtils() {}
+
+ // Unsupported ZetaSQL types: INT32, UINT32, UINT64, FLOAT, ENUM, PROTO, GEOGRAPHY
+ // TODO[BEAM-8630]: support ZetaSQL types: DATE, TIME, DATETIME
+ public static Type beamFieldTypeToZetaSqlType(FieldType fieldType) {
+ switch (fieldType.getTypeName()) {
+ case INT64:
+ return TypeFactory.createSimpleType(TypeKind.TYPE_INT64);
+ case DECIMAL:
+ return TypeFactory.createSimpleType(TypeKind.TYPE_NUMERIC);
+ case DOUBLE:
+ return TypeFactory.createSimpleType(TypeKind.TYPE_DOUBLE);
+ case STRING:
+ return TypeFactory.createSimpleType(TypeKind.TYPE_STRING);
+ case DATETIME:
+ // TODO[BEAM-8630]: Mapping Timestamp to DATETIME results in some timezone/precision issues.
+ // Can we convert Timestamp to a LogicalType? Will it solve the problem?
+ return TypeFactory.createSimpleType(TypeKind.TYPE_TIMESTAMP);
+ case BOOLEAN:
+ return TypeFactory.createSimpleType(TypeKind.TYPE_BOOL);
+ case BYTES:
+ return TypeFactory.createSimpleType(TypeKind.TYPE_BYTES);
+ case ARRAY:
+ return createZetaSqlArrayTypeFromBeamElementFieldType(fieldType.getCollectionElementType());
+ case ROW:
+ return createZetaSqlStructTypeFromBeamSchema(fieldType.getRowSchema());
+ default:
+ throw new IllegalArgumentException(
+ "Unsupported Beam fieldType: " + fieldType.getTypeName());
+ }
+ }
+
+ private static ArrayType createZetaSqlArrayTypeFromBeamElementFieldType(
+ FieldType elementFieldType) {
+ return TypeFactory.createArrayType(beamFieldTypeToZetaSqlType(elementFieldType));
+ }
+
+ private static StructType createZetaSqlStructTypeFromBeamSchema(Schema schema) {
+ return TypeFactory.createStructType(
+ schema.getFields().stream()
+ .map(ZetaSqlUtils::beamFieldToZetaSqlStructField)
+ .collect(Collectors.toList()));
+ }
+
+ private static StructField beamFieldToZetaSqlStructField(Field field) {
+ return new StructField(field.getName(), beamFieldTypeToZetaSqlType(field.getType()));
+ }
+
+ public static Value javaObjectToZetaSqlValue(Object object, FieldType fieldType) {
+ if (object == null) {
+ return Value.createNullValue(beamFieldTypeToZetaSqlType(fieldType));
+ }
+ switch (fieldType.getTypeName()) {
+ case INT64:
+ return Value.createInt64Value((Long) object);
+ // TODO[BEAM-8630]: Value.createNumericValue() is broken due to a dependency issue
+ // case DECIMAL:
+ // return Value.createNumericValue((BigDecimal) object);
+ case DOUBLE:
+ return Value.createDoubleValue((Double) object);
+ case STRING:
+ return Value.createStringValue((String) object);
+ case DATETIME:
+ return jodaInstantToZetaSqlTimestampValue((Instant) object);
+ case BOOLEAN:
+ return Value.createBoolValue((Boolean) object);
+ case BYTES:
+ return Value.createBytesValue(ByteString.copyFrom((byte[]) object));
+ case ARRAY:
+ return javaListToZetaSqlArrayValue(
+ (List<Object>) object, fieldType.getCollectionElementType());
+ case ROW:
+ return beamRowToZetaSqlStructValue((Row) object, fieldType.getRowSchema());
+ default:
+ throw new IllegalArgumentException(
+ "Unsupported Beam fieldType: " + fieldType.getTypeName());
+ }
+ }
+
+ private static Value jodaInstantToZetaSqlTimestampValue(Instant instant) {
+ return javaLongToZetaSqlTimestampValue(instant.getMillis());
+ }
+
+ private static Value javaLongToZetaSqlTimestampValue(Long millis) {
+ return Value.createTimestampValueFromUnixMicros(
+ LongMath.checkedMultiply(millis, MICROS_PER_MILLI));
+ }
+
+ private static Value javaListToZetaSqlArrayValue(List<Object> elements, FieldType elementType) {
+ List<Value> values =
+ elements.stream()
+ .map(e -> javaObjectToZetaSqlValue(e, elementType))
+ .collect(Collectors.toList());
+ return Value.createArrayValue(
+ createZetaSqlArrayTypeFromBeamElementFieldType(elementType), values);
+ }
+
+ private static Value beamRowToZetaSqlStructValue(Row row, Schema schema) {
+ List<Value> values = new ArrayList<>(row.getFieldCount());
+
+ for (int i = 0; i < row.getFieldCount(); i++) {
+ values.add(javaObjectToZetaSqlValue(row.getValue(i), schema.getField(i).getType()));
+ }
+ return Value.createStructValue(createZetaSqlStructTypeFromBeamSchema(schema), values);
+ }
+
+ public static Object zetaSqlValueToJavaObject(Value value, FieldType fieldType) {
+ if (value.isNull()) {
+ return null;
+ }
+ switch (fieldType.getTypeName()) {
+ case INT64:
+ return value.getInt64Value();
+ case DECIMAL:
+ return value.getNumericValue();
+ case DOUBLE:
+ // Floats with a floating part equal to zero are treated as whole (INT64).
+ // Cast to double when that happens.
+ if (value.getType().getKind().equals(TypeKind.TYPE_INT64)) {
+ return (double) value.getInt64Value();
+ }
+ return value.getDoubleValue();
+ case STRING:
+ return value.getStringValue();
+ case DATETIME:
+ return zetaSqlTimestampValueToJodaInstant(value);
+ case BOOLEAN:
+ return value.getBoolValue();
+ case BYTES:
+ return value.getBytesValue().toByteArray();
+ case ARRAY:
+ return zetaSqlArrayValueToJavaList(value, fieldType.getCollectionElementType());
+ case ROW:
+ return zetaSqlStructValueToBeamRow(value, fieldType.getRowSchema());
+ default:
+ throw new IllegalArgumentException(
+ "Unsupported Beam fieldType: " + fieldType.getTypeName());
+ }
+ }
+
+ private static Instant zetaSqlTimestampValueToJodaInstant(Value timestampValue) {
+ long millis = timestampValue.getTimestampUnixMicros() / MICROS_PER_MILLI;
+ return Instant.ofEpochMilli(millis);
+ }
+
+ private static List<Object> zetaSqlArrayValueToJavaList(Value arrayValue, FieldType elementType) {
+ return arrayValue.getElementList().stream()
+ .map(e -> zetaSqlValueToJavaObject(e, elementType))
+ .collect(Collectors.toList());
+ }
+
+ private static Row zetaSqlStructValueToBeamRow(Value structValue, Schema schema) {
+ List<Object> objects = new ArrayList<>(schema.getFieldCount());
+ List<Value> values = structValue.getFieldList();
+ for (int i = 0; i < values.size(); i++) {
+ objects.add(zetaSqlValueToJavaObject(values.get(i), schema.getField(i).getType()));
+ }
+ return Row.withSchema(schema).addValues(objects).build();
+ }
+}
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java
index 8b5c81c..e55481e 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java
@@ -35,6 +35,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.zetasql.ArrayType;
+import com.google.zetasql.EnumType;
import com.google.zetasql.Type;
import com.google.zetasql.Value;
import com.google.zetasql.ZetaSQLType.TypeKind;
@@ -560,7 +561,7 @@
ret = convertArrayValueToRexNode(type.asArray(), value);
break;
case TYPE_ENUM:
- ret = convertEnumToRexNode(type, value);
+ ret = convertEnumToRexNode(type.asEnum(), value);
break;
default:
// TODO: convert struct literal.
@@ -661,8 +662,8 @@
return rexBuilder().makeCall(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, operands);
}
- private RexNode convertEnumToRexNode(Type type, Value value) {
- if (type.typeName().equals("`zetasql.functions.DateTimestampPart`")) {
+ private RexNode convertEnumToRexNode(EnumType type, Value value) {
+ if ("zetasql.functions.DateTimestampPart".equals(type.getDescriptor().getFullName())) {
return convertTimeUnitRangeEnumToRexNode(type, value);
} else {
throw new RuntimeException(
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SingleRowScanConverter.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SingleRowScanConverter.java
index 4721b33..a16a443 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SingleRowScanConverter.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SingleRowScanConverter.java
@@ -18,9 +18,15 @@
package org.apache.beam.sdk.extensions.sql.zetasql.translation;
import com.google.zetasql.resolvedast.ResolvedNodes.ResolvedSingleRowScan;
+import java.math.BigDecimal;
import java.util.List;
+import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptCluster;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.logical.LogicalValues;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.type.RelDataType;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLiteral;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.type.SqlTypeName;
/** Converts a single row value. */
class SingleRowScanConverter extends RelConverter<ResolvedSingleRowScan> {
@@ -36,6 +42,26 @@
@Override
public RelNode convert(ResolvedSingleRowScan zetaNode, List<RelNode> inputs) {
- return LogicalValues.createOneRow(getCluster());
+ return createOneRow(getCluster());
+ }
+
+ // This function creates a single dummy input row for queries that don't read from a table.
+ // For example: SELECT "hello"
+ // The code is copy-pasted from Calcite's LogicalValues.createOneRow() with a single line
+ // change: SqlTypeName.INTEGER replaced by SqlTypeName.BIGINT.
+ // Would like to call LogicalValues.createOneRow() directly, but it uses type SqlTypeName.INTEGER
+ // which corresponds to TypeKind.TYPE_INT32 in ZetaSQL, a type not supported in ZetaSQL
+ // PRODUCT_EXTERNAL mode. See
+ // https://github.com/google/zetasql/blob/c610a21ffdc110293c1c7bd255a2674ebc7ec7a8/java/com/google/zetasql/TypeFactory.java#L61
+ private static LogicalValues createOneRow(RelOptCluster cluster) {
+ final RelDataType rowType =
+ cluster.getTypeFactory().builder().add("ZERO", SqlTypeName.BIGINT).nullable(false).build();
+ final ImmutableList<ImmutableList<RexLiteral>> tuples =
+ ImmutableList.of(
+ ImmutableList.of(
+ cluster
+ .getRexBuilder()
+ .makeExactLiteral(BigDecimal.ZERO, rowType.getFieldList().get(0).getType())));
+ return LogicalValues.create(cluster, rowType, tuples);
}
}
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java
index 5cfd878..8b8b67d 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java
@@ -59,7 +59,6 @@
import org.apache.beam.sdk.extensions.sql.impl.JdbcConnection;
import org.apache.beam.sdk.extensions.sql.impl.JdbcDriver;
import org.apache.beam.sdk.extensions.sql.impl.planner.BeamCostModel;
-import org.apache.beam.sdk.extensions.sql.impl.planner.BeamRuleSets;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils;
import org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable;
@@ -142,6 +141,73 @@
}
@Test
+ public void testByteLiterals() {
+ String sql = "SELECT b'abc'";
+
+ byte[] byteString = new byte[] {'a', 'b', 'c'};
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ final Schema schema = Schema.builder().addNullableField("ColA", FieldType.BYTES).build();
+
+ PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues(byteString).build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testByteString() {
+ String sql = "SELECT @p0 IS NULL AS ColA";
+
+ ByteString byteString = ByteString.copyFrom(new byte[] {0x62});
+
+ ImmutableMap<String, Value> params =
+ ImmutableMap.<String, Value>builder().put("p0", Value.createBytesValue(byteString)).build();
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql, params);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ final Schema schema = Schema.builder().addNullableField("ColA", FieldType.BOOLEAN).build();
+
+ PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues(false).build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testFloat() {
+ String sql = "SELECT 3.0";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ final Schema schema = Schema.builder().addNullableField("ColA", FieldType.DOUBLE).build();
+
+ PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues(3.0).build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testStringLiterals() {
+ String sql = "SELECT 'abc\\n'";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ final Schema schema = Schema.builder().addNullableField("ColA", FieldType.STRING).build();
+
+ PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues("abc\n").build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
public void testEQ1() {
String sql = "SELECT @p0 = @p1 AS ColA";
@@ -1321,6 +1387,24 @@
}
@Test
+ public void testZetaSQLStructFieldAccessInnerJoin() {
+ String sql =
+ "SELECT A.rowCol.data FROM table_with_struct_two AS A INNER JOIN "
+ + "table_with_struct AS B "
+ + "ON A.rowCol.row_id = B.id";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+ final Schema schema = Schema.builder().addStringField("field1").build();
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(schema).addValue("data1").build(),
+ Row.withSchema(schema).addValue("data2").build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
public void testZetaSQLSelectFromTableWithArrayType() {
String sql = "SELECT array_col FROM table_with_array;";
@@ -3755,7 +3839,7 @@
.defaultSchema(defaultSchemaPlus)
.traitDefs(traitDefs)
.context(Contexts.of(contexts))
- .ruleSets(BeamRuleSets.getRuleSets())
+ .ruleSets(ZetaSQLQueryPlanner.getZetaSqlRuleSets())
.costFactory(BeamCostModel.FACTORY)
.typeSystem(jdbcConnection.getTypeFactory().getTypeSystem())
.build();
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLPushDownTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLPushDownTest.java
index a75db39..a96b957 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLPushDownTest.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLPushDownTest.java
@@ -27,7 +27,6 @@
import org.apache.beam.sdk.extensions.sql.impl.JdbcConnection;
import org.apache.beam.sdk.extensions.sql.impl.JdbcDriver;
import org.apache.beam.sdk.extensions.sql.impl.planner.BeamCostModel;
-import org.apache.beam.sdk.extensions.sql.impl.planner.BeamRuleSets;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
import org.apache.beam.sdk.extensions.sql.meta.Table;
@@ -187,7 +186,7 @@
.defaultSchema(defaultSchemaPlus)
.traitDefs(traitDefs)
.context(Contexts.of(contexts))
- .ruleSets(BeamRuleSets.getRuleSets())
+ .ruleSets(ZetaSQLQueryPlanner.getZetaSqlRuleSets())
.costFactory(BeamCostModel.FACTORY)
.typeSystem(jdbcConnection.getTypeFactory().getTypeSystem())
.build();
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtilsTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtilsTest.java
new file mode 100644
index 0000000..a2da5c1
--- /dev/null
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtilsTest.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.zetasql;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.protobuf.ByteString;
+import com.google.zetasql.ArrayType;
+import com.google.zetasql.StructType;
+import com.google.zetasql.StructType.StructField;
+import com.google.zetasql.TypeFactory;
+import com.google.zetasql.Value;
+import com.google.zetasql.ZetaSQLType.TypeKind;
+import java.util.Arrays;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.values.Row;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for utility methods for ZetaSQL related operations. */
+@RunWith(JUnit4.class)
+public class ZetaSqlUtilsTest {
+
+ private static final Schema TEST_INNER_SCHEMA =
+ Schema.builder().addField("i1", FieldType.INT64).addField("i2", FieldType.STRING).build();
+
+ private static final Schema TEST_SCHEMA =
+ Schema.builder()
+ .addField("f1", FieldType.INT64)
+ // .addField("f2", FieldType.DECIMAL)
+ .addField("f3", FieldType.DOUBLE)
+ .addField("f4", FieldType.STRING)
+ .addField("f5", FieldType.DATETIME)
+ .addField("f6", FieldType.BOOLEAN)
+ .addField("f7", FieldType.BYTES)
+ .addArrayField("f8", FieldType.DOUBLE)
+ .addRowField("f9", TEST_INNER_SCHEMA)
+ .addNullableField("f10", FieldType.INT64)
+ .build();
+
+ private static final FieldType TEST_FIELD_TYPE = FieldType.row(TEST_SCHEMA);
+
+ private static final ArrayType TEST_INNER_ARRAY_TYPE =
+ TypeFactory.createArrayType(TypeFactory.createSimpleType(TypeKind.TYPE_DOUBLE));
+
+ private static final StructType TEST_INNER_STRUCT_TYPE =
+ TypeFactory.createStructType(
+ Arrays.asList(
+ new StructField("i1", TypeFactory.createSimpleType(TypeKind.TYPE_INT64)),
+ new StructField("i2", TypeFactory.createSimpleType(TypeKind.TYPE_STRING))));
+
+ private static final StructType TEST_TYPE =
+ TypeFactory.createStructType(
+ Arrays.asList(
+ new StructField("f1", TypeFactory.createSimpleType(TypeKind.TYPE_INT64)),
+ // new StructField("f2", TypeFactory.createSimpleType(TypeKind.TYPE_NUMERIC)),
+ new StructField("f3", TypeFactory.createSimpleType(TypeKind.TYPE_DOUBLE)),
+ new StructField("f4", TypeFactory.createSimpleType(TypeKind.TYPE_STRING)),
+ new StructField("f5", TypeFactory.createSimpleType(TypeKind.TYPE_TIMESTAMP)),
+ new StructField("f6", TypeFactory.createSimpleType(TypeKind.TYPE_BOOL)),
+ new StructField("f7", TypeFactory.createSimpleType(TypeKind.TYPE_BYTES)),
+ new StructField("f8", TEST_INNER_ARRAY_TYPE),
+ new StructField("f9", TEST_INNER_STRUCT_TYPE),
+ new StructField("f10", TypeFactory.createSimpleType(TypeKind.TYPE_INT64))));
+
+ private static final Row TEST_ROW =
+ Row.withSchema(TEST_SCHEMA)
+ .addValue(64L)
+ // .addValue(BigDecimal.valueOf(9999L))
+ .addValue(5.0)
+ .addValue("Hello")
+ .addValue(Instant.ofEpochMilli(12345678L))
+ .addValue(false)
+ .addValue(new byte[] {0x11, 0x22})
+ .addArray(3.0, 6.5)
+ .addValue(Row.withSchema(TEST_INNER_SCHEMA).addValues(0L, "world").build())
+ .addValue(null)
+ .build();
+
+ private static final Value TEST_VALUE =
+ Value.createStructValue(
+ TEST_TYPE,
+ Arrays.asList(
+ Value.createInt64Value(64L),
+ // TODO[BEAM-8630]: Value.createNumericValue() is broken due to a dependency issue
+ // Value.createNumericValue(BigDecimal.valueOf(9999L)),
+ Value.createDoubleValue(5.0),
+ Value.createStringValue("Hello"),
+ Value.createTimestampValueFromUnixMicros(12345678000L),
+ Value.createBoolValue(false),
+ Value.createBytesValue(ByteString.copyFrom(new byte[] {0x11, 0x22})),
+ Value.createArrayValue(
+ TEST_INNER_ARRAY_TYPE,
+ Arrays.asList(Value.createDoubleValue(3.0), Value.createDoubleValue(6.5))),
+ Value.createStructValue(
+ TEST_INNER_STRUCT_TYPE,
+ Arrays.asList(Value.createInt64Value(0L), Value.createStringValue("world"))),
+ Value.createNullValue(TypeFactory.createSimpleType(TypeKind.TYPE_INT64))));
+
+ @Test
+ public void testBeamFieldTypeToZetaSqlType() {
+ assertEquals(ZetaSqlUtils.beamFieldTypeToZetaSqlType(TEST_FIELD_TYPE), TEST_TYPE);
+ }
+
+ @Test
+ public void testJavaObjectToZetaSqlValue() {
+ assertEquals(ZetaSqlUtils.javaObjectToZetaSqlValue(TEST_ROW, TEST_FIELD_TYPE), TEST_VALUE);
+ }
+
+ @Test
+ public void testZetaSqlValueToJavaObject() {
+ assertEquals(ZetaSqlUtils.zetaSqlValueToJavaObject(TEST_VALUE, TEST_FIELD_TYPE), TEST_ROW);
+ }
+}
diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataBufferingOutboundObserver.java b/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataBufferingOutboundObserver.java
index 72ab5d6..bbc2916 100644
--- a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataBufferingOutboundObserver.java
+++ b/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataBufferingOutboundObserver.java
@@ -33,20 +33,26 @@
* {@link BeamFnApi.Elements} message when the buffer threshold is surpassed.
*
* <p>The default size-based buffer threshold can be overridden by specifying the experiment {@code
- * beam_fn_api_data_buffer_size_limit=<bytes>}
+ * data_buffer_size_limit=<bytes>}
*
* <p>The default time-based buffer threshold can be overridden by specifying the experiment {@code
- * beam_fn_api_data_buffer_time_limit=<milliseconds>}
+ * data_buffer_time_limit_ms=<milliseconds>}
*/
public interface BeamFnDataBufferingOutboundObserver<T> extends CloseableFnDataReceiver<T> {
// TODO: Consider moving this constant out of this interface
- /** @deprecated Use BEAM_FN_API_DATA_BUFFER_SIZE_LIMIT instead. */
+ /** @deprecated Use DATA_BUFFER_SIZE_LIMIT instead. */
@Deprecated String BEAM_FN_API_DATA_BUFFER_LIMIT = "beam_fn_api_data_buffer_limit=";
- String BEAM_FN_API_DATA_BUFFER_SIZE_LIMIT = "beam_fn_api_data_buffer_size_limit=";
+ /** @deprecated Use DATA_BUFFER_SIZE_LIMIT instead. */
+ @Deprecated String BEAM_FN_API_DATA_BUFFER_SIZE_LIMIT = "beam_fn_api_data_buffer_size_limit=";
+
+ String DATA_BUFFER_SIZE_LIMIT = "data_buffer_size_limit=";
@VisibleForTesting int DEFAULT_BUFFER_LIMIT_BYTES = 1_000_000;
- String BEAM_FN_API_DATA_BUFFER_TIME_LIMIT = "beam_fn_api_data_buffer_time_limit=";
+ /** @deprecated Use DATA_BUFFER_TIME_LIMIT_MS instead. */
+ @Deprecated String BEAM_FN_API_DATA_BUFFER_TIME_LIMIT = "beam_fn_api_data_buffer_time_limit=";
+
+ String DATA_BUFFER_TIME_LIMIT_MS = "data_buffer_time_limit_ms=";
long DEFAULT_BUFFER_LIMIT_TIME_MS = -1L;
static <T> BeamFnDataSizeBasedBufferingOutboundObserver<T> forLocation(
@@ -68,6 +74,9 @@
static int getSizeLimit(PipelineOptions options) {
List<String> experiments = options.as(ExperimentalOptions.class).getExperiments();
for (String experiment : experiments == null ? Collections.<String>emptyList() : experiments) {
+ if (experiment.startsWith(DATA_BUFFER_SIZE_LIMIT)) {
+ return Integer.parseInt(experiment.substring(DATA_BUFFER_SIZE_LIMIT.length()));
+ }
if (experiment.startsWith(BEAM_FN_API_DATA_BUFFER_SIZE_LIMIT)) {
return Integer.parseInt(experiment.substring(BEAM_FN_API_DATA_BUFFER_SIZE_LIMIT.length()));
}
@@ -81,6 +90,9 @@
static long getTimeLimit(PipelineOptions options) {
List<String> experiments = options.as(ExperimentalOptions.class).getExperiments();
for (String experiment : experiments == null ? Collections.<String>emptyList() : experiments) {
+ if (experiment.startsWith(DATA_BUFFER_TIME_LIMIT_MS)) {
+ return Long.parseLong(experiment.substring(DATA_BUFFER_TIME_LIMIT_MS.length()));
+ }
if (experiment.startsWith(BEAM_FN_API_DATA_BUFFER_TIME_LIMIT)) {
return Long.parseLong(experiment.substring(BEAM_FN_API_DATA_BUFFER_TIME_LIMIT.length()));
}
diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java b/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java
index dde456b..83f94f9 100644
--- a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java
+++ b/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java
@@ -28,7 +28,8 @@
public abstract class OutboundObserverFactory {
/**
* Create a buffering {@link OutboundObserverFactory} for client-side RPCs with the specified
- * {@link ExecutorService} and the default buffer size.
+ * {@link ExecutorService} and the default buffer size. All {@link StreamObserver}s created by
+ * this factory are thread safe.
*/
public static OutboundObserverFactory clientBuffered(ExecutorService executorService) {
return new Buffered(executorService, Buffered.DEFAULT_BUFFER_SIZE);
@@ -36,7 +37,8 @@
/**
* Create a buffering {@link OutboundObserverFactory} for client-side RPCs with the specified
- * {@link ExecutorService} and buffer size.
+ * {@link ExecutorService} and buffer size. All {@link StreamObserver}s created by this factory
+ * are thread safe.
*/
public static OutboundObserverFactory clientBuffered(
ExecutorService executorService, int bufferSize) {
@@ -45,8 +47,7 @@
/**
* Create the default {@link OutboundObserverFactory} for client-side RPCs, which uses basic
- * unbuffered flow control and adds synchronization to provide thread safety of access to the
- * returned observer.
+ * unbuffered flow control. All {@link StreamObserver}s created by this factory are thread safe.
*/
public static OutboundObserverFactory clientDirect() {
return new DirectClient();
diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataSizeBasedBufferingOutboundObserverTest.java b/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataSizeBasedBufferingOutboundObserverTest.java
index 0e53b26..ed2f700 100644
--- a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataSizeBasedBufferingOutboundObserverTest.java
+++ b/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataSizeBasedBufferingOutboundObserverTest.java
@@ -130,7 +130,7 @@
PipelineOptions options = PipelineOptionsFactory.create();
options
.as(ExperimentalOptions.class)
- .setExperiments(Arrays.asList("beam_fn_api_data_buffer_size_limit=100"));
+ .setExperiments(Arrays.asList("data_buffer_size_limit=100"));
CloseableFnDataReceiver<WindowedValue<byte[]>> consumer =
BeamFnDataBufferingOutboundObserver.forLocation(
options,
diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataTimeBasedBufferingOutboundObserverTest.java b/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataTimeBasedBufferingOutboundObserverTest.java
index f4effa8..eaf6290 100644
--- a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataTimeBasedBufferingOutboundObserverTest.java
+++ b/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataTimeBasedBufferingOutboundObserverTest.java
@@ -54,7 +54,7 @@
PipelineOptions options = PipelineOptionsFactory.create();
options
.as(ExperimentalOptions.class)
- .setExperiments(Arrays.asList("beam_fn_api_data_buffer_time_limit=1"));
+ .setExperiments(Arrays.asList("data_buffer_time_limit_ms=1"));
final CountDownLatch waitForFlush = new CountDownLatch(1);
CloseableFnDataReceiver<WindowedValue<byte[]>> consumer =
BeamFnDataBufferingOutboundObserver.forLocation(
@@ -80,7 +80,7 @@
PipelineOptions options = PipelineOptionsFactory.create();
options
.as(ExperimentalOptions.class)
- .setExperiments(Arrays.asList("beam_fn_api_data_buffer_time_limit=1"));
+ .setExperiments(Arrays.asList("data_buffer_time_limit_ms=1"));
BeamFnDataTimeBasedBufferingOutboundObserver<WindowedValue<byte[]>> consumer =
(BeamFnDataTimeBasedBufferingOutboundObserver<WindowedValue<byte[]>>)
BeamFnDataBufferingOutboundObserver.forLocation(
diff --git a/sdks/java/harness/build.gradle b/sdks/java/harness/build.gradle
index 51c65bb..42378ea 100644
--- a/sdks/java/harness/build.gradle
+++ b/sdks/java/harness/build.gradle
@@ -69,5 +69,6 @@
testCompile library.java.junit
testCompile library.java.mockito_core
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
+ testCompile project(":runners:core-construction-java")
shadowTestRuntimeClasspath library.java.slf4j_jdk14
}
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/CombineRunners.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/CombineRunners.java
index fbdb95d..5240e0c 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/CombineRunners.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/CombineRunners.java
@@ -157,7 +157,7 @@
CombineFn<InputT, AccumT, ?> combineFn =
(CombineFn)
SerializableUtils.deserializeFromByteArray(
- combinePayload.getCombineFn().getSpec().getPayload().toByteArray(), "CombineFn");
+ combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
Coder<AccumT> accumCoder =
(Coder<AccumT>) rehydratedComponents.getCoder(combinePayload.getAccumulatorCoderId());
@@ -190,7 +190,7 @@
CombineFn<?, AccumT, ?> combineFn =
(CombineFn)
SerializableUtils.deserializeFromByteArray(
- combinePayload.getCombineFn().getSpec().getPayload().toByteArray(), "CombineFn");
+ combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
return (KV<KeyT, Iterable<AccumT>> input) ->
KV.of(input.getKey(), combineFn.mergeAccumulators(input.getValue()));
@@ -203,7 +203,7 @@
CombineFn<?, AccumT, OutputT> combineFn =
(CombineFn)
SerializableUtils.deserializeFromByteArray(
- combinePayload.getCombineFn().getSpec().getPayload().toByteArray(), "CombineFn");
+ combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
return (KV<KeyT, AccumT> input) ->
KV.of(input.getKey(), combineFn.extractOutput(input.getValue()));
@@ -217,7 +217,7 @@
CombineFn<InputT, AccumT, OutputT> combineFn =
(CombineFn)
SerializableUtils.deserializeFromByteArray(
- combinePayload.getCombineFn().getSpec().getPayload().toByteArray(), "CombineFn");
+ combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
return (KV<KeyT, Iterable<InputT>> input) -> {
return KV.of(input.getKey(), combineFn.apply(input.getValue()));
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/DoFnPTransformRunnerFactory.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/DoFnPTransformRunnerFactory.java
deleted file mode 100644
index 433572b..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/DoFnPTransformRunnerFactory.java
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.fn.harness;
-
-import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.beam.fn.harness.control.BundleSplitListener;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.data.PCollectionConsumerRegistry;
-import org.apache.beam.fn.harness.data.PTransformFunctionRegistry;
-import org.apache.beam.fn.harness.state.BeamFnStateClient;
-import org.apache.beam.fn.harness.state.SideInputSpec;
-import org.apache.beam.model.pipeline.v1.RunnerApi;
-import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
-import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
-import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
-import org.apache.beam.runners.core.construction.PCollectionViewTranslation;
-import org.apache.beam.runners.core.construction.ParDoTranslation;
-import org.apache.beam.runners.core.construction.RehydratedComponents;
-import org.apache.beam.runners.core.construction.Timer;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.fn.data.FnDataReceiver;
-import org.apache.beam.sdk.function.ThrowingRunnable;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.schemas.SchemaCoder;
-import org.apache.beam.sdk.state.TimeDomain;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.Materializations;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.WindowingStrategy;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableListMultimap;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ListMultimap;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets;
-
-/** A {@link PTransformRunnerFactory} for transforms invoking a {@link DoFn}. */
-abstract class DoFnPTransformRunnerFactory<
- TransformInputT,
- FnInputT,
- OutputT,
- RunnerT extends DoFnPTransformRunnerFactory.DoFnPTransformRunner<TransformInputT>>
- implements PTransformRunnerFactory<RunnerT> {
- interface DoFnPTransformRunner<T> {
- void startBundle() throws Exception;
-
- void processElement(WindowedValue<T> input) throws Exception;
-
- void processTimer(
- String timerId, TimeDomain timeDomain, WindowedValue<KV<Object, Timer>> input);
-
- void finishBundle() throws Exception;
-
- void tearDown() throws Exception;
- }
-
- @Override
- public final RunnerT createRunnerForPTransform(
- PipelineOptions pipelineOptions,
- BeamFnDataClient beamFnDataClient,
- BeamFnStateClient beamFnStateClient,
- String pTransformId,
- PTransform pTransform,
- Supplier<String> processBundleInstructionId,
- Map<String, PCollection> pCollections,
- Map<String, RunnerApi.Coder> coders,
- Map<String, RunnerApi.WindowingStrategy> windowingStrategies,
- PCollectionConsumerRegistry pCollectionConsumerRegistry,
- PTransformFunctionRegistry startFunctionRegistry,
- PTransformFunctionRegistry finishFunctionRegistry,
- Consumer<ThrowingRunnable> tearDownFunctions,
- BundleSplitListener splitListener) {
- Context<FnInputT, OutputT> context =
- new Context<>(
- pipelineOptions,
- beamFnStateClient,
- pTransformId,
- pTransform,
- processBundleInstructionId,
- pCollections,
- coders,
- windowingStrategies,
- pCollectionConsumerRegistry,
- splitListener);
-
- RunnerT runner = createRunner(context);
-
- // Register the appropriate handlers.
- startFunctionRegistry.register(pTransformId, runner::startBundle);
- Iterable<String> mainInput =
- Sets.difference(
- pTransform.getInputsMap().keySet(),
- Sets.union(
- context.parDoPayload.getSideInputsMap().keySet(),
- context.parDoPayload.getTimerSpecsMap().keySet()));
- for (String localInputName : mainInput) {
- pCollectionConsumerRegistry.register(
- pTransform.getInputsOrThrow(localInputName),
- pTransformId,
- (FnDataReceiver) (FnDataReceiver<WindowedValue<TransformInputT>>) runner::processElement);
- }
-
- // Register as a consumer for each timer PCollection.
- for (String localName : context.parDoPayload.getTimerSpecsMap().keySet()) {
- TimeDomain timeDomain =
- DoFnSignatures.getTimerSpecOrThrow(
- context.doFnSignature.timerDeclarations().get(localName), context.doFn)
- .getTimeDomain();
- pCollectionConsumerRegistry.register(
- pTransform.getInputsOrThrow(localName),
- pTransformId,
- (FnDataReceiver)
- timer ->
- runner.processTimer(
- localName, timeDomain, (WindowedValue<KV<Object, Timer>>) timer));
- }
-
- finishFunctionRegistry.register(pTransformId, runner::finishBundle);
- tearDownFunctions.accept(runner::tearDown);
- return runner;
- }
-
- abstract RunnerT createRunner(Context<FnInputT, OutputT> context);
-
- static class Context<InputT, OutputT> {
- final PipelineOptions pipelineOptions;
- final BeamFnStateClient beamFnStateClient;
- final String ptransformId;
- final PTransform pTransform;
- final Supplier<String> processBundleInstructionId;
- final RehydratedComponents rehydratedComponents;
- final DoFn<InputT, OutputT> doFn;
- final DoFnSignature doFnSignature;
- final TupleTag<OutputT> mainOutputTag;
- final Coder<?> inputCoder;
- final SchemaCoder<InputT> schemaCoder;
- final Coder<?> keyCoder;
- final SchemaCoder<OutputT> mainOutputSchemaCoder;
- final Coder<? extends BoundedWindow> windowCoder;
- final WindowingStrategy<InputT, ?> windowingStrategy;
- final Map<TupleTag<?>, SideInputSpec> tagToSideInputSpecMap;
- Map<TupleTag<?>, Coder<?>> outputCoders;
- final ParDoPayload parDoPayload;
- final ListMultimap<String, FnDataReceiver<WindowedValue<?>>> localNameToConsumer;
- final BundleSplitListener splitListener;
-
- Context(
- PipelineOptions pipelineOptions,
- BeamFnStateClient beamFnStateClient,
- String ptransformId,
- PTransform pTransform,
- Supplier<String> processBundleInstructionId,
- Map<String, PCollection> pCollections,
- Map<String, RunnerApi.Coder> coders,
- Map<String, RunnerApi.WindowingStrategy> windowingStrategies,
- PCollectionConsumerRegistry pCollectionConsumerRegistry,
- BundleSplitListener splitListener) {
- this.pipelineOptions = pipelineOptions;
- this.beamFnStateClient = beamFnStateClient;
- this.ptransformId = ptransformId;
- this.pTransform = pTransform;
- this.processBundleInstructionId = processBundleInstructionId;
- ImmutableMap.Builder<TupleTag<?>, SideInputSpec> tagToSideInputSpecMapBuilder =
- ImmutableMap.builder();
- try {
- rehydratedComponents =
- RehydratedComponents.forComponents(
- RunnerApi.Components.newBuilder()
- .putAllCoders(coders)
- .putAllPcollections(pCollections)
- .putAllWindowingStrategies(windowingStrategies)
- .build())
- .withPipeline(Pipeline.create());
- parDoPayload = ParDoPayload.parseFrom(pTransform.getSpec().getPayload());
- doFn = (DoFn) ParDoTranslation.getDoFn(parDoPayload);
- doFnSignature = DoFnSignatures.signatureForDoFn(doFn);
- mainOutputTag = (TupleTag) ParDoTranslation.getMainOutputTag(parDoPayload);
- String mainInputTag =
- Iterables.getOnlyElement(
- Sets.difference(
- pTransform.getInputsMap().keySet(),
- Sets.union(
- parDoPayload.getSideInputsMap().keySet(),
- parDoPayload.getTimerSpecsMap().keySet())));
- PCollection mainInput = pCollections.get(pTransform.getInputsOrThrow(mainInputTag));
- inputCoder = rehydratedComponents.getCoder(mainInput.getCoderId());
- if (inputCoder instanceof KvCoder
- // TODO: Stop passing windowed value coders within PCollections.
- || (inputCoder instanceof WindowedValue.WindowedValueCoder
- && (((WindowedValueCoder) inputCoder).getValueCoder() instanceof KvCoder))) {
- this.keyCoder =
- inputCoder instanceof WindowedValueCoder
- ? ((KvCoder) ((WindowedValueCoder) inputCoder).getValueCoder()).getKeyCoder()
- : ((KvCoder) inputCoder).getKeyCoder();
- } else {
- this.keyCoder = null;
- }
- if (inputCoder instanceof SchemaCoder
- // TODO: Stop passing windowed value coders within PCollections.
- || (inputCoder instanceof WindowedValue.WindowedValueCoder
- && (((WindowedValueCoder) inputCoder).getValueCoder() instanceof SchemaCoder))) {
- this.schemaCoder =
- inputCoder instanceof WindowedValueCoder
- ? (SchemaCoder<InputT>) ((WindowedValueCoder) inputCoder).getValueCoder()
- : ((SchemaCoder<InputT>) inputCoder);
- } else {
- this.schemaCoder = null;
- }
-
- windowingStrategy =
- (WindowingStrategy)
- rehydratedComponents.getWindowingStrategy(mainInput.getWindowingStrategyId());
- windowCoder = windowingStrategy.getWindowFn().windowCoder();
-
- outputCoders = Maps.newHashMap();
- for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
- TupleTag<?> outputTag = new TupleTag<>(entry.getKey());
- RunnerApi.PCollection outputPCollection = pCollections.get(entry.getValue());
- Coder<?> outputCoder = rehydratedComponents.getCoder(outputPCollection.getCoderId());
- if (outputCoder instanceof WindowedValueCoder) {
- outputCoder = ((WindowedValueCoder) outputCoder).getValueCoder();
- }
- outputCoders.put(outputTag, outputCoder);
- }
- Coder<OutputT> outputCoder = (Coder<OutputT>) outputCoders.get(mainOutputTag);
- mainOutputSchemaCoder =
- (outputCoder instanceof SchemaCoder) ? (SchemaCoder<OutputT>) outputCoder : null;
-
- // Build the map from tag id to side input specification
- for (Map.Entry<String, RunnerApi.SideInput> entry :
- parDoPayload.getSideInputsMap().entrySet()) {
- String sideInputTag = entry.getKey();
- RunnerApi.SideInput sideInput = entry.getValue();
- checkArgument(
- Materializations.MULTIMAP_MATERIALIZATION_URN.equals(
- sideInput.getAccessPattern().getUrn()),
- "This SDK is only capable of dealing with %s materializations "
- + "but was asked to handle %s for PCollectionView with tag %s.",
- Materializations.MULTIMAP_MATERIALIZATION_URN,
- sideInput.getAccessPattern().getUrn(),
- sideInputTag);
-
- PCollection sideInputPCollection =
- pCollections.get(pTransform.getInputsOrThrow(sideInputTag));
- WindowingStrategy sideInputWindowingStrategy =
- rehydratedComponents.getWindowingStrategy(
- sideInputPCollection.getWindowingStrategyId());
- tagToSideInputSpecMapBuilder.put(
- new TupleTag<>(entry.getKey()),
- SideInputSpec.create(
- rehydratedComponents.getCoder(sideInputPCollection.getCoderId()),
- sideInputWindowingStrategy.getWindowFn().windowCoder(),
- PCollectionViewTranslation.viewFnFromProto(entry.getValue().getViewFn()),
- PCollectionViewTranslation.windowMappingFnFromProto(
- entry.getValue().getWindowMappingFn())));
- }
- } catch (IOException exn) {
- throw new IllegalArgumentException("Malformed ParDoPayload", exn);
- }
-
- ImmutableListMultimap.Builder<String, FnDataReceiver<WindowedValue<?>>>
- localNameToConsumerBuilder = ImmutableListMultimap.builder();
- for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
- localNameToConsumerBuilder.putAll(
- entry.getKey(), pCollectionConsumerRegistry.getMultiplexingConsumer(entry.getValue()));
- }
- localNameToConsumer = localNameToConsumerBuilder.build();
- tagToSideInputSpecMap = tagToSideInputSpecMapBuilder.build();
- this.splitListener = splitListener;
- }
- }
-}
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
index a44629f..deba809 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
@@ -22,20 +22,41 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
import com.google.auto.service.AutoService;
+import com.google.auto.value.AutoValue;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
-import org.apache.beam.fn.harness.DoFnPTransformRunnerFactory.Context;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import org.apache.beam.fn.harness.control.BundleSplitListener;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.data.PCollectionConsumerRegistry;
+import org.apache.beam.fn.harness.data.PTransformFunctionRegistry;
+import org.apache.beam.fn.harness.state.BeamFnStateClient;
import org.apache.beam.fn.harness.state.FnApiStateAccessor;
+import org.apache.beam.fn.harness.state.SideInputSpec;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.BundleApplication;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication;
+import org.apache.beam.model.pipeline.v1.RunnerApi;
+import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection;
+import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
+import org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload;
import org.apache.beam.runners.core.DoFnRunner;
import org.apache.beam.runners.core.LateDataUtils;
+import org.apache.beam.runners.core.construction.PCollectionViewTranslation;
import org.apache.beam.runners.core.construction.PTransformTranslation;
import org.apache.beam.runners.core.construction.ParDoTranslation;
+import org.apache.beam.runners.core.construction.RehydratedComponents;
import org.apache.beam.runners.core.construction.Timer;
+import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.fn.data.FnDataReceiver;
+import org.apache.beam.sdk.function.ThrowingRunnable;
import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.schemas.SchemaCoder;
import org.apache.beam.sdk.state.State;
import org.apache.beam.sdk.state.StateSpec;
import org.apache.beam.sdk.state.TimeDomain;
@@ -44,23 +65,37 @@
import org.apache.beam.sdk.transforms.DoFn.OutputReceiver;
import org.apache.beam.sdk.transforms.DoFnOutputReceivers;
import org.apache.beam.sdk.transforms.DoFnSchemaInformation;
+import org.apache.beam.sdk.transforms.Materializations;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
import org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration;
import org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration;
import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.splittabledofn.Sizes.HasSize;
+import org.apache.beam.sdk.transforms.splittabledofn.SplitResult;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
import org.apache.beam.sdk.util.UserCodeException;
import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.WindowingStrategy;
+import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString;
+import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.util.Durations;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableListMultimap;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ListMultimap;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets;
import org.joda.time.DateTimeUtils;
import org.joda.time.Duration;
import org.joda.time.Instant;
@@ -70,23 +105,275 @@
* abstraction caused by StateInternals/TimerInternals since they model state and timer concepts
* differently.
*/
-public class FnApiDoFnRunner<InputT, OutputT>
- implements DoFnPTransformRunnerFactory.DoFnPTransformRunner<InputT> {
+public class FnApiDoFnRunner<InputT, RestrictionT, PositionT, OutputT> {
/** A registrar which provides a factory to handle Java {@link DoFn}s. */
@AutoService(PTransformRunnerFactory.Registrar.class)
public static class Registrar implements PTransformRunnerFactory.Registrar {
@Override
public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
- return ImmutableMap.of(PTransformTranslation.PAR_DO_TRANSFORM_URN, new Factory());
+ Factory factory = new Factory();
+ return ImmutableMap.<String, PTransformRunnerFactory>builder()
+ .put(PTransformTranslation.PAR_DO_TRANSFORM_URN, factory)
+ .put(PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN, factory)
+ .put(PTransformTranslation.SPLITTABLE_SPLIT_RESTRICTION_URN, factory)
+ .put(PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN, factory)
+ .put(PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN, factory)
+ .put(
+ PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN, factory)
+ .build();
}
}
- static class Factory<InputT, OutputT>
- extends DoFnPTransformRunnerFactory<
- InputT, InputT, OutputT, FnApiDoFnRunner<InputT, OutputT>> {
+ static class Context<InputT, OutputT> {
+ final PipelineOptions pipelineOptions;
+ final BeamFnStateClient beamFnStateClient;
+ final String ptransformId;
+ final PTransform pTransform;
+ final Supplier<String> processBundleInstructionId;
+ final RehydratedComponents rehydratedComponents;
+ final DoFn<InputT, OutputT> doFn;
+ final DoFnSignature doFnSignature;
+ final TupleTag<OutputT> mainOutputTag;
+ final Coder<?> inputCoder;
+ final SchemaCoder<InputT> schemaCoder;
+ final Coder<?> keyCoder;
+ final SchemaCoder<OutputT> mainOutputSchemaCoder;
+ final Coder<? extends BoundedWindow> windowCoder;
+ final WindowingStrategy<InputT, ?> windowingStrategy;
+ final Map<TupleTag<?>, SideInputSpec> tagToSideInputSpecMap;
+ Map<TupleTag<?>, Coder<?>> outputCoders;
+ final ParDoPayload parDoPayload;
+ final ListMultimap<String, FnDataReceiver<WindowedValue<?>>> localNameToConsumer;
+ final BundleSplitListener splitListener;
+
+ Context(
+ PipelineOptions pipelineOptions,
+ BeamFnStateClient beamFnStateClient,
+ String ptransformId,
+ PTransform pTransform,
+ Supplier<String> processBundleInstructionId,
+ Map<String, PCollection> pCollections,
+ Map<String, RunnerApi.Coder> coders,
+ Map<String, RunnerApi.WindowingStrategy> windowingStrategies,
+ PCollectionConsumerRegistry pCollectionConsumerRegistry,
+ BundleSplitListener splitListener) {
+ this.pipelineOptions = pipelineOptions;
+ this.beamFnStateClient = beamFnStateClient;
+ this.ptransformId = ptransformId;
+ this.pTransform = pTransform;
+ this.processBundleInstructionId = processBundleInstructionId;
+ ImmutableMap.Builder<TupleTag<?>, SideInputSpec> tagToSideInputSpecMapBuilder =
+ ImmutableMap.builder();
+ try {
+ rehydratedComponents =
+ RehydratedComponents.forComponents(
+ RunnerApi.Components.newBuilder()
+ .putAllCoders(coders)
+ .putAllPcollections(pCollections)
+ .putAllWindowingStrategies(windowingStrategies)
+ .build())
+ .withPipeline(Pipeline.create());
+ parDoPayload = ParDoPayload.parseFrom(pTransform.getSpec().getPayload());
+ doFn = (DoFn) ParDoTranslation.getDoFn(parDoPayload);
+ doFnSignature = DoFnSignatures.signatureForDoFn(doFn);
+ switch (pTransform.getSpec().getUrn()) {
+ case PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN:
+ case PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN:
+ case PTransformTranslation.PAR_DO_TRANSFORM_URN:
+ mainOutputTag = (TupleTag) ParDoTranslation.getMainOutputTag(parDoPayload);
+ break;
+ case PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN:
+ case PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN:
+ case PTransformTranslation.SPLITTABLE_SPLIT_RESTRICTION_URN:
+ mainOutputTag =
+ new TupleTag(Iterables.getOnlyElement(pTransform.getOutputsMap().keySet()));
+ break;
+ default:
+ throw new IllegalStateException(
+ String.format("Unknown urn: %s", pTransform.getSpec().getUrn()));
+ }
+ String mainInputTag =
+ Iterables.getOnlyElement(
+ Sets.difference(
+ pTransform.getInputsMap().keySet(),
+ Sets.union(
+ parDoPayload.getSideInputsMap().keySet(),
+ parDoPayload.getTimerSpecsMap().keySet())));
+ PCollection mainInput = pCollections.get(pTransform.getInputsOrThrow(mainInputTag));
+ inputCoder = rehydratedComponents.getCoder(mainInput.getCoderId());
+ if (inputCoder instanceof KvCoder
+ // TODO: Stop passing windowed value coders within PCollections.
+ || (inputCoder instanceof WindowedValue.WindowedValueCoder
+ && (((WindowedValueCoder) inputCoder).getValueCoder() instanceof KvCoder))) {
+ this.keyCoder =
+ inputCoder instanceof WindowedValueCoder
+ ? ((KvCoder) ((WindowedValueCoder) inputCoder).getValueCoder()).getKeyCoder()
+ : ((KvCoder) inputCoder).getKeyCoder();
+ } else {
+ this.keyCoder = null;
+ }
+ if (inputCoder instanceof SchemaCoder
+ // TODO: Stop passing windowed value coders within PCollections.
+ || (inputCoder instanceof WindowedValue.WindowedValueCoder
+ && (((WindowedValueCoder) inputCoder).getValueCoder() instanceof SchemaCoder))) {
+ this.schemaCoder =
+ inputCoder instanceof WindowedValueCoder
+ ? (SchemaCoder<InputT>) ((WindowedValueCoder) inputCoder).getValueCoder()
+ : ((SchemaCoder<InputT>) inputCoder);
+ } else {
+ this.schemaCoder = null;
+ }
+
+ windowingStrategy =
+ (WindowingStrategy)
+ rehydratedComponents.getWindowingStrategy(mainInput.getWindowingStrategyId());
+ windowCoder = windowingStrategy.getWindowFn().windowCoder();
+
+ outputCoders = Maps.newHashMap();
+ for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
+ TupleTag<?> outputTag = new TupleTag<>(entry.getKey());
+ RunnerApi.PCollection outputPCollection = pCollections.get(entry.getValue());
+ Coder<?> outputCoder = rehydratedComponents.getCoder(outputPCollection.getCoderId());
+ if (outputCoder instanceof WindowedValueCoder) {
+ outputCoder = ((WindowedValueCoder) outputCoder).getValueCoder();
+ }
+ outputCoders.put(outputTag, outputCoder);
+ }
+ Coder<OutputT> outputCoder = (Coder<OutputT>) outputCoders.get(mainOutputTag);
+ mainOutputSchemaCoder =
+ (outputCoder instanceof SchemaCoder) ? (SchemaCoder<OutputT>) outputCoder : null;
+
+ // Build the map from tag id to side input specification
+ for (Map.Entry<String, RunnerApi.SideInput> entry :
+ parDoPayload.getSideInputsMap().entrySet()) {
+ String sideInputTag = entry.getKey();
+ RunnerApi.SideInput sideInput = entry.getValue();
+ checkArgument(
+ Materializations.MULTIMAP_MATERIALIZATION_URN.equals(
+ sideInput.getAccessPattern().getUrn()),
+ "This SDK is only capable of dealing with %s materializations "
+ + "but was asked to handle %s for PCollectionView with tag %s.",
+ Materializations.MULTIMAP_MATERIALIZATION_URN,
+ sideInput.getAccessPattern().getUrn(),
+ sideInputTag);
+
+ PCollection sideInputPCollection =
+ pCollections.get(pTransform.getInputsOrThrow(sideInputTag));
+ WindowingStrategy sideInputWindowingStrategy =
+ rehydratedComponents.getWindowingStrategy(
+ sideInputPCollection.getWindowingStrategyId());
+ tagToSideInputSpecMapBuilder.put(
+ new TupleTag<>(entry.getKey()),
+ SideInputSpec.create(
+ rehydratedComponents.getCoder(sideInputPCollection.getCoderId()),
+ sideInputWindowingStrategy.getWindowFn().windowCoder(),
+ PCollectionViewTranslation.viewFnFromProto(entry.getValue().getViewFn()),
+ PCollectionViewTranslation.windowMappingFnFromProto(
+ entry.getValue().getWindowMappingFn())));
+ }
+ } catch (IOException exn) {
+ throw new IllegalArgumentException("Malformed ParDoPayload", exn);
+ }
+
+ ImmutableListMultimap.Builder<String, FnDataReceiver<WindowedValue<?>>>
+ localNameToConsumerBuilder = ImmutableListMultimap.builder();
+ for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
+ localNameToConsumerBuilder.putAll(
+ entry.getKey(), pCollectionConsumerRegistry.getMultiplexingConsumer(entry.getValue()));
+ }
+ localNameToConsumer = localNameToConsumerBuilder.build();
+ tagToSideInputSpecMap = tagToSideInputSpecMapBuilder.build();
+ this.splitListener = splitListener;
+ }
+ }
+
+ static class Factory<InputT, RestrictionT, PositionT, OutputT>
+ implements PTransformRunnerFactory<
+ FnApiDoFnRunner<InputT, RestrictionT, PositionT, OutputT>> {
+
@Override
- public FnApiDoFnRunner<InputT, OutputT> createRunner(Context<InputT, OutputT> context) {
- return new FnApiDoFnRunner<>(context);
+ public final FnApiDoFnRunner<InputT, RestrictionT, PositionT, OutputT>
+ createRunnerForPTransform(
+ PipelineOptions pipelineOptions,
+ BeamFnDataClient beamFnDataClient,
+ BeamFnStateClient beamFnStateClient,
+ String pTransformId,
+ PTransform pTransform,
+ Supplier<String> processBundleInstructionId,
+ Map<String, PCollection> pCollections,
+ Map<String, RunnerApi.Coder> coders,
+ Map<String, RunnerApi.WindowingStrategy> windowingStrategies,
+ PCollectionConsumerRegistry pCollectionConsumerRegistry,
+ PTransformFunctionRegistry startFunctionRegistry,
+ PTransformFunctionRegistry finishFunctionRegistry,
+ Consumer<ThrowingRunnable> tearDownFunctions,
+ BundleSplitListener splitListener) {
+ Context<InputT, OutputT> context =
+ new Context<>(
+ pipelineOptions,
+ beamFnStateClient,
+ pTransformId,
+ pTransform,
+ processBundleInstructionId,
+ pCollections,
+ coders,
+ windowingStrategies,
+ pCollectionConsumerRegistry,
+ splitListener);
+
+ FnApiDoFnRunner<InputT, RestrictionT, PositionT, OutputT> runner =
+ new FnApiDoFnRunner<>(context);
+
+ // Register the appropriate handlers.
+ startFunctionRegistry.register(pTransformId, runner::startBundle);
+ String mainInput;
+ try {
+ mainInput = ParDoTranslation.getMainInputName(pTransform);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ FnDataReceiver<WindowedValue> mainInputConsumer;
+ switch (pTransform.getSpec().getUrn()) {
+ case PTransformTranslation.PAR_DO_TRANSFORM_URN:
+ mainInputConsumer = runner::processElementForParDo;
+ break;
+ case PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN:
+ mainInputConsumer = runner::processElementForPairWithRestriction;
+ break;
+ case PTransformTranslation.SPLITTABLE_SPLIT_RESTRICTION_URN:
+ case PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN:
+ mainInputConsumer = runner::processElementForSplitRestriction;
+ break;
+ case PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN:
+ mainInputConsumer = runner::processElementForElementAndRestriction;
+ break;
+ case PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN:
+ mainInputConsumer = runner::processElementForSizedElementAndRestriction;
+ break;
+ default:
+ throw new IllegalStateException("Unknown urn: " + pTransform.getSpec().getUrn());
+ }
+ pCollectionConsumerRegistry.register(
+ pTransform.getInputsOrThrow(mainInput), pTransformId, (FnDataReceiver) mainInputConsumer);
+
+ // Register as a consumer for each timer PCollection.
+ for (String localName : context.parDoPayload.getTimerSpecsMap().keySet()) {
+ TimeDomain timeDomain =
+ DoFnSignatures.getTimerSpecOrThrow(
+ context.doFnSignature.timerDeclarations().get(localName), context.doFn)
+ .getTimeDomain();
+ pCollectionConsumerRegistry.register(
+ pTransform.getInputsOrThrow(localName),
+ pTransformId,
+ (FnDataReceiver)
+ timer ->
+ runner.processTimer(
+ localName, timeDomain, (WindowedValue<KV<Object, Timer>>) timer));
+ }
+
+ finishFunctionRegistry.register(pTransformId, runner::finishBundle);
+ tearDownFunctions.accept(runner::tearDown);
+ return runner;
}
}
@@ -94,32 +381,58 @@
private final Context<InputT, OutputT> context;
private final Collection<FnDataReceiver<WindowedValue<OutputT>>> mainOutputConsumers;
+ private final String mainInputId;
private FnApiStateAccessor stateAccessor;
private final DoFnInvoker<InputT, OutputT> doFnInvoker;
private final DoFn<InputT, OutputT>.StartBundleContext startBundleContext;
private final ProcessBundleContext processContext;
private final OnTimerContext onTimerContext;
private final DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext;
+ /**
+ * Only set for {@link PTransformTranslation#SPLITTABLE_SPLIT_RESTRICTION_URN} and {@link
+ * PTransformTranslation#SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN} transforms. Can only be
+ * invoked from within {@code processElement...} methods.
+ */
+ private final OutputReceiver<RestrictionT> outputSplitRestrictionReceiver;
+ /**
+ * Only set for {@link PTransformTranslation#SPLITTABLE_PROCESS_ELEMENTS_URN} and {@link
+ * PTransformTranslation#SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN} transforms. Can
+ * only be invoked from within {@code processElement...} methods.
+ */
+ private final Function<SplitResult<RestrictionT>, WindowedSplitResult>
+ convertSplitResultToWindowedSplitResult;
- /** Only valid during {@link #processElement}, null otherwise. */
+ private final DoFnSchemaInformation doFnSchemaInformation;
+ private final Map<String, PCollectionView<?>> sideInputMapping;
+
+ // The member variables below are only valid for the lifetime of certain methods.
+ /** Only valid during {@code processElement...} methods, null otherwise. */
private WindowedValue<InputT> currentElement;
- /** Only valid during {@link #processElement} and {@link #processTimer}, null otherwise. */
+ /**
+ * Only valid during {@code processElement...} and {@link #processTimer} methods, null otherwise.
+ */
private BoundedWindow currentWindow;
+ /**
+ * Only valid during {@link #processElementForElementAndRestriction} and {@link
+ * #processElementForSizedElementAndRestriction}, null otherwise.
+ */
+ private RestrictionTracker<RestrictionT, PositionT> currentTracker;
+
/** Only valid during {@link #processTimer}, null otherwise. */
private WindowedValue<KV<Object, Timer>> currentTimer;
/** Only valid during {@link #processTimer}, null otherwise. */
private TimeDomain currentTimeDomain;
- private DoFnSchemaInformation doFnSchemaInformation;
-
- private Map<String, PCollectionView<?>> sideInputMapping;
-
FnApiDoFnRunner(Context<InputT, OutputT> context) {
this.context = context;
-
+ try {
+ this.mainInputId = ParDoTranslation.getMainInputName(context.pTransform);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
this.mainOutputConsumers =
(Collection<FnDataReceiver<WindowedValue<OutputT>>>)
(Collection) context.localNameToConsumer.get(context.mainOutputTag.getId());
@@ -162,9 +475,133 @@
outputTo(consumers, WindowedValue.of(output, timestamp, window, PaneInfo.NO_FIRING));
}
};
+ switch (context.pTransform.getSpec().getUrn()) {
+ case PTransformTranslation.SPLITTABLE_SPLIT_RESTRICTION_URN:
+ this.outputSplitRestrictionReceiver =
+ new OutputReceiver<RestrictionT>() {
+
+ @Override
+ public void output(RestrictionT output) {
+ outputTo(
+ mainOutputConsumers,
+ (WindowedValue<OutputT>)
+ currentElement.withValue(KV.of(currentElement.getValue(), output)));
+ }
+
+ @Override
+ public void outputWithTimestamp(RestrictionT output, Instant timestamp) {
+ outputTo(
+ mainOutputConsumers,
+ (WindowedValue<OutputT>)
+ WindowedValue.of(
+ KV.of(currentElement.getValue(), output),
+ timestamp,
+ currentWindow,
+ currentElement.getPane()));
+ }
+ };
+ break;
+ case PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN:
+ this.outputSplitRestrictionReceiver =
+ new OutputReceiver<RestrictionT>() {
+
+ @Override
+ public void output(RestrictionT output) {
+ RestrictionTracker<RestrictionT, PositionT> outputTracker =
+ doFnInvoker.invokeNewTracker(output);
+ outputTo(
+ mainOutputConsumers,
+ (WindowedValue<OutputT>)
+ currentElement.withValue(
+ KV.of(
+ KV.of(currentElement.getValue(), output),
+ outputTracker instanceof HasSize
+ ? ((HasSize) outputTracker).getSize()
+ : 1.0)));
+ }
+
+ @Override
+ public void outputWithTimestamp(RestrictionT output, Instant timestamp) {
+ RestrictionTracker<RestrictionT, PositionT> outputTracker =
+ doFnInvoker.invokeNewTracker(output);
+ outputTo(
+ mainOutputConsumers,
+ (WindowedValue<OutputT>)
+ WindowedValue.of(
+ KV.of(
+ KV.of(currentElement.getValue(), output),
+ outputTracker instanceof HasSize
+ ? ((HasSize) outputTracker).getSize()
+ : 1.0),
+ timestamp,
+ currentWindow,
+ currentElement.getPane()));
+ }
+ };
+ break;
+ default:
+ this.outputSplitRestrictionReceiver =
+ new OutputReceiver<RestrictionT>() {
+ @Override
+ public void output(RestrictionT output) {
+ throw new IllegalStateException(
+ String.format(
+ "Unimplemented split output handler for %s.",
+ context.pTransform.getSpec().getUrn()));
+ }
+
+ @Override
+ public void outputWithTimestamp(RestrictionT output, Instant timestamp) {
+ throw new IllegalStateException(
+ String.format(
+ "Unimplemented split output handler for %s.",
+ context.pTransform.getSpec().getUrn()));
+ }
+ };
+ }
+ switch (context.pTransform.getSpec().getUrn()) {
+ case PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN:
+ this.convertSplitResultToWindowedSplitResult =
+ (splitResult) ->
+ WindowedSplitResult.forRoots(
+ currentElement.withValue(
+ KV.of(currentElement.getValue(), splitResult.getPrimary())),
+ currentElement.withValue(
+ KV.of(currentElement.getValue(), splitResult.getResidual())));
+ break;
+ case PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN:
+ this.convertSplitResultToWindowedSplitResult =
+ (splitResult) -> {
+ RestrictionTracker<RestrictionT, PositionT> primaryTracker =
+ doFnInvoker.invokeNewTracker(splitResult.getPrimary());
+ RestrictionTracker<RestrictionT, PositionT> residualTracker =
+ doFnInvoker.invokeNewTracker(splitResult.getResidual());
+ return WindowedSplitResult.forRoots(
+ currentElement.withValue(
+ KV.of(
+ KV.of(currentElement.getValue(), splitResult.getPrimary()),
+ primaryTracker instanceof HasSize
+ ? ((HasSize) primaryTracker).getSize()
+ : 1.0)),
+ currentElement.withValue(
+ KV.of(
+ KV.of(currentElement.getValue(), splitResult.getResidual()),
+ residualTracker instanceof HasSize
+ ? ((HasSize) residualTracker).getSize()
+ : 1.0)));
+ };
+ break;
+ default:
+ this.convertSplitResultToWindowedSplitResult =
+ (splitResult) -> {
+ throw new IllegalStateException(
+ String.format(
+ "Unimplemented split conversion handler for %s.",
+ context.pTransform.getSpec().getUrn()));
+ };
+ }
}
- @Override
public void startBundle() {
this.stateAccessor =
new FnApiStateAccessor(
@@ -181,8 +618,7 @@
doFnInvoker.invokeStartBundle(startBundleContext);
}
- @Override
- public void processElement(WindowedValue<InputT> elem) {
+ public void processElementForParDo(WindowedValue<InputT> elem) {
currentElement = elem;
try {
Iterator<BoundedWindow> windowIterator =
@@ -197,7 +633,129 @@
}
}
- @Override
+ public void processElementForPairWithRestriction(WindowedValue<InputT> elem) {
+ currentElement = elem;
+ try {
+ Iterator<BoundedWindow> windowIterator =
+ (Iterator<BoundedWindow>) elem.getWindows().iterator();
+ while (windowIterator.hasNext()) {
+ currentWindow = windowIterator.next();
+ outputTo(
+ mainOutputConsumers,
+ (WindowedValue)
+ elem.withValue(
+ KV.of(
+ elem.getValue(),
+ doFnInvoker.invokeGetInitialRestriction(elem.getValue()))));
+ }
+ } finally {
+ currentElement = null;
+ currentWindow = null;
+ }
+ }
+
+ public void processElementForSplitRestriction(WindowedValue<KV<InputT, RestrictionT>> elem) {
+ currentElement = elem.withValue(elem.getValue().getKey());
+ try {
+ Iterator<BoundedWindow> windowIterator =
+ (Iterator<BoundedWindow>) elem.getWindows().iterator();
+ while (windowIterator.hasNext()) {
+ currentWindow = windowIterator.next();
+ doFnInvoker.invokeSplitRestriction(
+ elem.getValue().getKey(),
+ elem.getValue().getValue(),
+ this.outputSplitRestrictionReceiver);
+ }
+ } finally {
+ currentElement = null;
+ currentWindow = null;
+ }
+ }
+
+ /** Internal class to hold the primary and residual roots when converted to an input element. */
+ @AutoValue
+ abstract static class WindowedSplitResult {
+ public static WindowedSplitResult forRoots(
+ WindowedValue primaryRoot, WindowedValue residualRoot) {
+ return new AutoValue_FnApiDoFnRunner_WindowedSplitResult(primaryRoot, residualRoot);
+ }
+
+ public abstract WindowedValue getPrimaryRoot();
+
+ public abstract WindowedValue getResidualRoot();
+ }
+
+ public void processElementForSizedElementAndRestriction(
+ WindowedValue<KV<KV<InputT, RestrictionT>, Double>> elem) {
+ processElementForElementAndRestriction(elem.withValue(elem.getValue().getKey()));
+ }
+
+ public void processElementForElementAndRestriction(WindowedValue<KV<InputT, RestrictionT>> elem) {
+ currentElement = elem.withValue(elem.getValue().getKey());
+ try {
+ Iterator<BoundedWindow> windowIterator =
+ (Iterator<BoundedWindow>) elem.getWindows().iterator();
+ while (windowIterator.hasNext()) {
+ currentTracker = doFnInvoker.invokeNewTracker(elem.getValue().getValue());
+ currentWindow = windowIterator.next();
+ DoFn.ProcessContinuation continuation = doFnInvoker.invokeProcessElement(processContext);
+ // Ensure that all the work is done if the user tells us that they don't want to
+ // resume processing.
+ if (!continuation.shouldResume()) {
+ currentTracker.checkDone();
+ continue;
+ }
+
+ SplitResult<RestrictionT> result = currentTracker.trySplit(0);
+ // After the user has chosen to resume processing later, the Runner may have stolen
+ // the remainder of work through a split call so the above trySplit may fail. If so,
+ // the current restriction must be done.
+ if (result == null) {
+ currentTracker.checkDone();
+ continue;
+ }
+
+ // Otherwise we have a successful self checkpoint.
+ WindowedSplitResult windowedSplitResult =
+ convertSplitResultToWindowedSplitResult.apply(result);
+ ByteString.Output primaryBytes = ByteString.newOutput();
+ ByteString.Output residualBytes = ByteString.newOutput();
+ try {
+ Coder fullInputCoder =
+ WindowedValue.getFullCoder(context.inputCoder, context.windowCoder);
+ fullInputCoder.encode(windowedSplitResult.getPrimaryRoot(), primaryBytes);
+ fullInputCoder.encode(windowedSplitResult.getResidualRoot(), residualBytes);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ BundleApplication primaryApplication =
+ BundleApplication.newBuilder()
+ .setTransformId(context.ptransformId)
+ .setInputId(mainInputId)
+ .setElement(primaryBytes.toByteString())
+ .build();
+ BundleApplication residualApplication =
+ BundleApplication.newBuilder()
+ .setTransformId(context.ptransformId)
+ .setInputId(mainInputId)
+ .setElement(residualBytes.toByteString())
+ .build();
+ context.splitListener.split(
+ ImmutableList.of(primaryApplication),
+ ImmutableList.of(
+ DelayedBundleApplication.newBuilder()
+ .setApplication(residualApplication)
+ .setRequestedTimeDelay(
+ Durations.fromMillis(continuation.resumeDelay().getMillis()))
+ .build()));
+ }
+ } finally {
+ currentElement = null;
+ currentWindow = null;
+ currentTracker = null;
+ }
+ }
+
public void processTimer(
String timerId, TimeDomain timeDomain, WindowedValue<KV<Object, Timer>> timer) {
currentTimer = timer;
@@ -216,7 +774,6 @@
}
}
- @Override
public void finishBundle() {
doFnInvoker.invokeFinishBundle(finishBundleContext);
@@ -225,7 +782,6 @@
this.stateAccessor = null;
}
- @Override
public void tearDown() {
doFnInvoker.invokeTeardown();
}
@@ -338,6 +894,11 @@
return this;
}
+ @Override
+ public org.apache.beam.sdk.state.Timer withOutputTimestamp(Instant outputTime) {
+ throw new UnsupportedOperationException("TODO: Add support for timers");
+ }
+
/**
* For event time timers the target time should be prior to window GC time. So it returns
* min(time to set, GC Time of window).
@@ -449,7 +1010,7 @@
@Override
public RestrictionTracker<?, ?> restrictionTracker() {
- throw new UnsupportedOperationException("RestrictionTracker parameters are not supported.");
+ return currentTracker;
}
@Override
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java
index 1aa5ba5..6ec1673 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java
@@ -198,6 +198,9 @@
handlers.put(
BeamFnApi.InstructionRequest.RequestCase.PROCESS_BUNDLE,
processBundleHandler::processBundle);
+ handlers.put(
+ BeamFnApi.InstructionRequest.RequestCase.PROCESS_BUNDLE_SPLIT,
+ processBundleHandler::split);
BeamFnControlClient control =
new BeamFnControlClient(
id, controlApiServiceDescriptor, channelFactory, outboundObserverFactory, handlers);
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/SplittableProcessElementsRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/SplittableProcessElementsRunner.java
deleted file mode 100644
index 7670a9a..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/SplittableProcessElementsRunner.java
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.fn.harness;
-
-import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
-
-import com.google.auto.service.AutoService;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import org.apache.beam.fn.harness.DoFnPTransformRunnerFactory.Context;
-import org.apache.beam.fn.harness.state.FnApiStateAccessor;
-import org.apache.beam.model.fnexecution.v1.BeamFnApi.BundleApplication;
-import org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication;
-import org.apache.beam.runners.core.OutputAndTimeBoundedSplittableProcessElementInvoker;
-import org.apache.beam.runners.core.OutputWindowedValue;
-import org.apache.beam.runners.core.SplittableProcessElementInvoker;
-import org.apache.beam.runners.core.construction.PTransformTranslation;
-import org.apache.beam.runners.core.construction.Timer;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.fn.data.FnDataReceiver;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.state.TimeDomain;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
-import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.UserCodeException;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString;
-import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.util.Timestamps;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-/** Runs the {@link PTransformTranslation#SPLITTABLE_PROCESS_ELEMENTS_URN} transform. */
-public class SplittableProcessElementsRunner<InputT, RestrictionT, OutputT>
- implements DoFnPTransformRunnerFactory.DoFnPTransformRunner<KV<InputT, RestrictionT>> {
- /** A registrar which provides a factory to handle Java {@link DoFn}s. */
- @AutoService(PTransformRunnerFactory.Registrar.class)
- public static class Registrar implements PTransformRunnerFactory.Registrar {
- @Override
- public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
- return ImmutableMap.of(PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN, new Factory());
- }
- }
-
- static class Factory<InputT, RestrictionT, OutputT>
- extends DoFnPTransformRunnerFactory<
- KV<InputT, RestrictionT>,
- InputT,
- OutputT,
- SplittableProcessElementsRunner<InputT, RestrictionT, OutputT>> {
-
- @Override
- SplittableProcessElementsRunner<InputT, RestrictionT, OutputT> createRunner(
- Context<InputT, OutputT> context) {
- Coder<WindowedValue<KV<InputT, RestrictionT>>> windowedCoder =
- FullWindowedValueCoder.of(
- (Coder<KV<InputT, RestrictionT>>) context.inputCoder, context.windowCoder);
-
- return new SplittableProcessElementsRunner<>(
- context,
- windowedCoder,
- (Collection<FnDataReceiver<WindowedValue<OutputT>>>)
- (Collection) context.localNameToConsumer.get(context.mainOutputTag.getId()),
- Iterables.getOnlyElement(context.pTransform.getInputsMap().keySet()));
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////////////////////////
-
- private final Context<InputT, OutputT> context;
- private final String mainInputId;
- private final Coder<WindowedValue<KV<InputT, RestrictionT>>> inputCoder;
- private final Collection<FnDataReceiver<WindowedValue<OutputT>>> mainOutputConsumers;
- private final DoFnInvoker<InputT, OutputT> doFnInvoker;
- private final ScheduledExecutorService executor;
-
- private FnApiStateAccessor stateAccessor;
-
- private final DoFn<InputT, OutputT>.StartBundleContext startBundleContext;
- private final DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext;
-
- SplittableProcessElementsRunner(
- Context<InputT, OutputT> context,
- Coder<WindowedValue<KV<InputT, RestrictionT>>> inputCoder,
- Collection<FnDataReceiver<WindowedValue<OutputT>>> mainOutputConsumers,
- String mainInputId) {
- this.context = context;
- this.mainInputId = mainInputId;
- this.inputCoder = inputCoder;
- this.mainOutputConsumers = mainOutputConsumers;
- this.doFnInvoker = DoFnInvokers.invokerFor(context.doFn);
- this.doFnInvoker.invokeSetup();
- this.executor = Executors.newSingleThreadScheduledExecutor();
-
- this.startBundleContext =
- context.doFn.new StartBundleContext() {
- @Override
- public PipelineOptions getPipelineOptions() {
- return context.pipelineOptions;
- }
- };
- this.finishBundleContext =
- context.doFn.new FinishBundleContext() {
- @Override
- public PipelineOptions getPipelineOptions() {
- return context.pipelineOptions;
- }
-
- @Override
- public void output(OutputT output, Instant timestamp, BoundedWindow window) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public <T> void output(
- TupleTag<T> tag, T output, Instant timestamp, BoundedWindow window) {
- throw new UnsupportedOperationException();
- }
- };
- }
-
- @Override
- public void startBundle() {
- doFnInvoker.invokeStartBundle(startBundleContext);
- }
-
- @Override
- public void processElement(WindowedValue<KV<InputT, RestrictionT>> elem) {
- processElementTyped(elem);
- }
-
- private <PositionT> void processElementTyped(WindowedValue<KV<InputT, RestrictionT>> elem) {
- checkArgument(
- elem.getWindows().size() == 1,
- "SPLITTABLE_PROCESS_ELEMENTS expects its input to be in 1 window, but got %s windows",
- elem.getWindows().size());
- WindowedValue<InputT> element = elem.withValue(elem.getValue().getKey());
- BoundedWindow window = elem.getWindows().iterator().next();
- this.stateAccessor =
- new FnApiStateAccessor(
- context.pipelineOptions,
- context.ptransformId,
- context.processBundleInstructionId,
- context.tagToSideInputSpecMap,
- context.beamFnStateClient,
- context.keyCoder,
- (Coder<BoundedWindow>) context.windowCoder,
- () -> elem,
- () -> window);
- RestrictionTracker<RestrictionT, PositionT> tracker =
- doFnInvoker.invokeNewTracker(elem.getValue().getValue());
- OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, PositionT>
- processElementInvoker =
- new OutputAndTimeBoundedSplittableProcessElementInvoker<>(
- context.doFn,
- context.pipelineOptions,
- new OutputWindowedValue<OutputT>() {
- @Override
- public void outputWindowedValue(
- OutputT output,
- Instant timestamp,
- Collection<? extends BoundedWindow> windows,
- PaneInfo pane) {
- outputTo(
- mainOutputConsumers, WindowedValue.of(output, timestamp, windows, pane));
- }
-
- @Override
- public <AdditionalOutputT> void outputWindowedValue(
- TupleTag<AdditionalOutputT> tag,
- AdditionalOutputT output,
- Instant timestamp,
- Collection<? extends BoundedWindow> windows,
- PaneInfo pane) {
- Collection<FnDataReceiver<WindowedValue<AdditionalOutputT>>> consumers =
- (Collection) context.localNameToConsumer.get(tag.getId());
- if (consumers == null) {
- throw new IllegalArgumentException(
- String.format("Unknown output tag %s", tag));
- }
- outputTo(consumers, WindowedValue.of(output, timestamp, windows, pane));
- }
- },
- stateAccessor,
- executor,
- 10000,
- Duration.standardSeconds(10));
- SplittableProcessElementInvoker<InputT, OutputT, RestrictionT, PositionT>.Result result =
- processElementInvoker.invokeProcessElement(doFnInvoker, element, tracker);
- this.stateAccessor = null;
-
- if (result.getContinuation().shouldResume()) {
- WindowedValue<KV<InputT, RestrictionT>> primary =
- element.withValue(KV.of(element.getValue(), tracker.currentRestriction()));
- WindowedValue<KV<InputT, RestrictionT>> residual =
- element.withValue(KV.of(element.getValue(), result.getResidualRestriction()));
- ByteString.Output primaryBytes = ByteString.newOutput();
- ByteString.Output residualBytes = ByteString.newOutput();
- try {
- inputCoder.encode(primary, primaryBytes);
- inputCoder.encode(residual, residualBytes);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- BundleApplication primaryApplication =
- BundleApplication.newBuilder()
- .setTransformId(context.ptransformId)
- .setInputId(mainInputId)
- .setElement(primaryBytes.toByteString())
- .build();
- BundleApplication residualApplication =
- BundleApplication.newBuilder()
- .setTransformId(context.ptransformId)
- .setInputId(mainInputId)
- .setElement(residualBytes.toByteString())
- .build();
- context.splitListener.split(
- ImmutableList.of(primaryApplication),
- ImmutableList.of(
- DelayedBundleApplication.newBuilder()
- .setApplication(residualApplication)
- .setRequestedExecutionTime(
- Timestamps.fromMillis(
- System.currentTimeMillis()
- + result.getContinuation().resumeDelay().getMillis()))
- .build()));
- }
- }
-
- @Override
- public void processTimer(
- String timerId, TimeDomain timeDomain, WindowedValue<KV<Object, Timer>> input) {
- throw new UnsupportedOperationException("Timers are unsupported in a SplittableDoFn.");
- }
-
- @Override
- public void finishBundle() {
- doFnInvoker.invokeFinishBundle(finishBundleContext);
- }
-
- @Override
- public void tearDown() {
- doFnInvoker.invokeTeardown();
- }
-
- /** Outputs the given element to the specified set of consumers wrapping any exceptions. */
- private <T> void outputTo(
- Collection<FnDataReceiver<WindowedValue<T>>> consumers, WindowedValue<T> output) {
- try {
- for (FnDataReceiver<WindowedValue<T>> consumer : consumers) {
- consumer.accept(output);
- }
- } catch (Throwable t) {
- throw UserCodeException.wrap(t);
- }
- }
-}
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMappingFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMappingFnRunner.java
index 7fd71da..bdebcb4 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMappingFnRunner.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMappingFnRunner.java
@@ -20,8 +20,8 @@
import com.google.auto.service.AutoService;
import java.io.IOException;
import java.util.Map;
+import org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
-import org.apache.beam.model.pipeline.v1.RunnerApi.SdkFunctionSpec;
import org.apache.beam.model.pipeline.v1.RunnerApi.StandardPTransforms;
import org.apache.beam.runners.core.construction.BeamUrns;
import org.apache.beam.runners.core.construction.PCollectionViewTranslation;
@@ -65,8 +65,7 @@
static <T, W1 extends BoundedWindow, W2 extends BoundedWindow>
ThrowingFunction<KV<T, W1>, KV<T, W2>> createMapFunctionForPTransform(
String ptransformId, PTransform pTransform) throws IOException {
- SdkFunctionSpec windowMappingFnPayload =
- SdkFunctionSpec.parseFrom(pTransform.getSpec().getPayload());
+ FunctionSpec windowMappingFnPayload = FunctionSpec.parseFrom(pTransform.getSpec().getPayload());
WindowMappingFn<W2> windowMappingFn =
(WindowMappingFn<W2>)
PCollectionViewTranslation.windowMappingFnFromProto(windowMappingFnPayload);
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMergingFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMergingFnRunner.java
index d97f8de..ec79163 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMergingFnRunner.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/WindowMergingFnRunner.java
@@ -75,8 +75,8 @@
ThrowingFunction<KV<T, Iterable<W>>, KV<T, KV<Iterable<W>, Iterable<KV<W, Iterable<W>>>>>>
createMapFunctionForPTransform(String ptransformId, PTransform ptransform)
throws IOException {
- RunnerApi.SdkFunctionSpec payload =
- RunnerApi.SdkFunctionSpec.parseFrom(ptransform.getSpec().getPayload());
+ RunnerApi.FunctionSpec payload =
+ RunnerApi.FunctionSpec.parseFrom(ptransform.getSpec().getPayload());
WindowFn<?, W> windowFn =
(WindowFn<?, W>) WindowingStrategyTranslation.windowFnFromProto(payload);
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
index 11222859..a258e01 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
@@ -21,11 +21,13 @@
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.Set;
+import java.util.WeakHashMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.Phaser;
@@ -77,12 +79,20 @@
import org.slf4j.LoggerFactory;
/**
- * Processes {@link BeamFnApi.ProcessBundleRequest}s by materializing the set of required runners
- * for each {@link RunnerApi.FunctionSpec}, wiring them together based upon the {@code input} and
- * {@code output} map definitions.
+ * Processes {@link BeamFnApi.ProcessBundleRequest}s and {@link
+ * BeamFnApi.ProcessBundleSplitRequest}s.
*
- * <p>Finally executes the DAG based graph by starting all runners in reverse topological order, and
- * finishing all runners in forward topological order.
+ * <p>{@link BeamFnApi.ProcessBundleSplitRequest}s use a {@link BundleProcessorCache cache} to
+ * find/create a {@link BundleProcessor}. The creation of a {@link BundleProcessor} uses the
+ * associated {@link BeamFnApi.ProcessBundleDescriptor} definition; creating runners for each {@link
+ * RunnerApi.FunctionSpec}; wiring them together based upon the {@code input} and {@code output} map
+ * definitions. The {@link BundleProcessor} executes the DAG based graph by starting all runners in
+ * reverse topological order, and finishing all runners in forward topological order.
+ *
+ * <p>{@link BeamFnApi.ProcessBundleSplitRequest}s finds an {@code active} {@link BundleProcessor}
+ * associated with a currently processing {@link BeamFnApi.ProcessBundleRequest} and uses it to
+ * perform a split request. See <a href="https://s.apache.org/beam-breaking-fusion">breaking the
+ * fusion barrier</a> for further details.
*/
public class ProcessBundleHandler {
@@ -231,6 +241,7 @@
BundleProcessor bundleProcessor =
bundleProcessorCache.get(
request.getProcessBundle().getProcessBundleDescriptorId(),
+ request.getInstructionId(),
() -> {
try {
return createBundleProcessor(
@@ -240,7 +251,6 @@
throw new RuntimeException(e);
}
});
- bundleProcessor.setInstructionId(request.getInstructionId());
PTransformFunctionRegistry startFunctionRegistry = bundleProcessor.getStartFunctionRegistry();
PTransformFunctionRegistry finishFunctionRegistry = bundleProcessor.getFinishFunctionRegistry();
Multimap<String, DelayedBundleApplication> allResiduals = bundleProcessor.getAllResiduals();
@@ -294,6 +304,19 @@
return BeamFnApi.InstructionResponse.newBuilder().setProcessBundle(response);
}
+ /** Splits an active bundle. */
+ public BeamFnApi.InstructionResponse.Builder split(BeamFnApi.InstructionRequest request) {
+ BundleProcessor bundleProcessor =
+ bundleProcessorCache.find(request.getProcessBundleSplit().getInstructionId());
+ if (bundleProcessor == null) {
+ throw new IllegalStateException(
+ String.format(
+ "Unable to find active bundle for instruction id %s.",
+ request.getProcessBundleSplit().getInstructionId()));
+ }
+ throw new UnsupportedOperationException("TODO: BEAM-3836, support splitting within SDK.");
+ }
+
/** Shutdown the bundles, running the tearDown() functions. */
public void shutdown() throws Exception {
bundleProcessorCache.shutdown();
@@ -406,9 +429,18 @@
public static class BundleProcessorCache {
private final Map<String, ConcurrentLinkedQueue<BundleProcessor>> cachedBundleProcessors;
+ private final Map<String, BundleProcessor> activeBundleProcessors;
+
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
BundleProcessorCache() {
this.cachedBundleProcessors = Maps.newConcurrentMap();
+ // We specifically use a weak hash map so that references will automatically go out of scope
+ // and not need to be freed explicitly from the cache.
+ this.activeBundleProcessors = Collections.synchronizedMap(new WeakHashMap<>());
}
Map<String, ConcurrentLinkedQueue<BundleProcessor>> getCachedBundleProcessors() {
@@ -417,26 +449,43 @@
/**
* Get a {@link BundleProcessor} from the cache if it's available. Otherwise, create one using
- * the specified bundleProcessorSupplier.
+ * the specified {@code bundleProcessorSupplier}. The {@link BundleProcessor} that is returned
+ * can be {@link #find found} using the specified method.
+ *
+ * <p>The caller is responsible for calling {@link #release} to return the bundle processor back
+ * to this cache if and only if the bundle processor successfully processed a bundle.
*/
BundleProcessor get(
- String bundleDescriptorId, Supplier<BundleProcessor> bundleProcessorSupplier) {
+ String bundleDescriptorId,
+ String instructionId,
+ Supplier<BundleProcessor> bundleProcessorSupplier) {
ConcurrentLinkedQueue<BundleProcessor> bundleProcessors =
cachedBundleProcessors.computeIfAbsent(
bundleDescriptorId, descriptorId -> new ConcurrentLinkedQueue<>());
BundleProcessor bundleProcessor = bundleProcessors.poll();
- if (bundleProcessor != null) {
- return bundleProcessor;
+ if (bundleProcessor == null) {
+ bundleProcessor = bundleProcessorSupplier.get();
}
- return bundleProcessorSupplier.get();
+ bundleProcessor.setInstructionId(instructionId);
+ activeBundleProcessors.put(instructionId, bundleProcessor);
+ return bundleProcessor;
+ }
+
+ /**
+ * Finds an active bundle processor for the specified {@code instructionId} or null if one could
+ * not be found.
+ */
+ BundleProcessor find(String instructionId) {
+ return activeBundleProcessors.get(instructionId);
}
/**
* Add a {@link BundleProcessor} to cache. The {@link BundleProcessor} will be reset before
- * being added to the cache.
+ * being added to the cache and will be marked as inactive.
*/
void release(String bundleDescriptorId, BundleProcessor bundleProcessor) {
+ activeBundleProcessors.remove(bundleProcessor.getInstructionId());
bundleProcessor.reset();
cachedBundleProcessors.get(bundleDescriptorId).add(bundleProcessor);
}
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/stream/HarnessStreamObserverFactories.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/stream/HarnessStreamObserverFactories.java
index 15b28f6..7f21991 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/stream/HarnessStreamObserverFactories.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/stream/HarnessStreamObserverFactories.java
@@ -29,6 +29,11 @@
* to use in the java SDK harness.
*/
public abstract class HarnessStreamObserverFactories {
+
+ /**
+ * Creates an {@link OutboundObserverFactory} for client-side RPCs. All {@link StreamObserver}s
+ * created by this factory are thread safe.
+ */
public static OutboundObserverFactory fromOptions(PipelineOptions options) {
List<String> experiments = options.as(ExperimentalOptions.class).getExperiments();
if (experiments != null && experiments.contains("beam_fn_api_buffered_stream")) {
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java
index 4533283..0f4b375 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java
@@ -25,6 +25,7 @@
import static org.hamcrest.Matchers.hasSize;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
@@ -34,16 +35,22 @@
import java.util.ArrayList;
import java.util.List;
import java.util.ServiceLoader;
+import org.apache.beam.fn.harness.control.BundleSplitListener;
import org.apache.beam.fn.harness.data.PCollectionConsumerRegistry;
import org.apache.beam.fn.harness.data.PTransformFunctionRegistry;
import org.apache.beam.fn.harness.state.FakeBeamFnStateClient;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.BundleApplication;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey;
import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.Environment;
import org.apache.beam.runners.core.construction.PTransformTranslation;
+import org.apache.beam.runners.core.construction.ParDoTranslation;
import org.apache.beam.runners.core.construction.PipelineTranslation;
import org.apache.beam.runners.core.construction.SdkComponents;
+import org.apache.beam.runners.core.construction.graph.ProtoOverrides;
+import org.apache.beam.runners.core.construction.graph.SplittableParDoExpander;
import org.apache.beam.runners.core.metrics.ExecutionStateTracker;
import org.apache.beam.runners.core.metrics.MetricUpdates.MetricUpdate;
import org.apache.beam.runners.core.metrics.MetricsContainerImpl;
@@ -54,6 +61,7 @@
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.fn.data.FnDataReceiver;
import org.apache.beam.sdk.function.ThrowingRunnable;
+import org.apache.beam.sdk.io.range.OffsetRange;
import org.apache.beam.sdk.metrics.Counter;
import org.apache.beam.sdk.metrics.MetricKey;
import org.apache.beam.sdk.metrics.MetricName;
@@ -76,6 +84,8 @@
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.FixedWindows;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
@@ -277,7 +287,6 @@
.put(bagUserStateKey("combine", "Y"), encode("Y1Y2"))
.build(),
fakeClient.getData());
- mainOutputValues.clear();
}
/** Produces a bag user {@link StateKey} for the test PTransform id in the global window. */
@@ -446,7 +455,6 @@
// Assert that state data did not change
assertEquals(stateData, fakeClient.getData());
- mainOutputValues.clear();
}
private static class TestSideInputIsAccessibleForDownstreamCallersDoFn
@@ -957,7 +965,6 @@
.put(bagUserStateKey("bag", "C"), encode("C0", "processing"))
.build(),
fakeClient.getData());
- mainOutputValues.clear();
}
private <T> WindowedValue<T> valueInWindow(T value, BoundedWindow window) {
@@ -1020,4 +1027,350 @@
}
fail("Expected registrar not found.");
}
+
+ static class TestSplittableDoFn extends DoFn<String, String> {
+ private final PCollectionView<String> singletonSideInput;
+
+ private TestSplittableDoFn(PCollectionView<String> singletonSideInput) {
+ this.singletonSideInput = singletonSideInput;
+ }
+
+ @ProcessElement
+ public ProcessContinuation processElement(
+ ProcessContext context, RestrictionTracker<OffsetRange, Long> tracker) {
+ int upperBound = Integer.parseInt(context.sideInput(singletonSideInput));
+ for (int i = 0; i < upperBound; ++i) {
+ if (tracker.tryClaim((long) i)) {
+ context.output(context.element() + ":" + i);
+ }
+ }
+ if (tracker.currentRestriction().getTo() > upperBound) {
+ return ProcessContinuation.resume().withResumeDelay(Duration.millis(42L));
+ } else {
+ return ProcessContinuation.stop();
+ }
+ }
+
+ @GetInitialRestriction
+ public OffsetRange restriction(String element) {
+ return new OffsetRange(0, Integer.parseInt(element));
+ }
+
+ @NewTracker
+ public RestrictionTracker<OffsetRange, Long> newTracker(OffsetRange restriction) {
+ return new OffsetRangeTracker(restriction);
+ }
+
+ @SplitRestriction
+ public void splitRange(
+ String element, OffsetRange range, OutputReceiver<OffsetRange> receiver) {
+ receiver.output(new OffsetRange(range.getFrom(), (range.getFrom() + range.getTo()) / 2));
+ receiver.output(new OffsetRange((range.getFrom() + range.getTo()) / 2, range.getTo()));
+ }
+ }
+
+ @Test
+ public void testProcessElementForSizedElementAndRestriction() throws Exception {
+ Pipeline p = Pipeline.create();
+ PCollection<String> valuePCollection = p.apply(Create.of("unused"));
+ PCollectionView<String> singletonSideInputView = valuePCollection.apply(View.asSingleton());
+ valuePCollection.apply(
+ TEST_TRANSFORM_ID,
+ ParDo.of(new TestSplittableDoFn(singletonSideInputView))
+ .withSideInputs(singletonSideInputView));
+
+ RunnerApi.Pipeline pProto =
+ ProtoOverrides.updateTransform(
+ PTransformTranslation.PAR_DO_TRANSFORM_URN,
+ PipelineTranslation.toProto(p, SdkComponents.create(p.getOptions()), true),
+ SplittableParDoExpander.createSizedReplacement());
+ String expandedTransformId =
+ Iterables.find(
+ pProto.getComponents().getTransformsMap().entrySet(),
+ entry ->
+ entry
+ .getValue()
+ .getSpec()
+ .getUrn()
+ .equals(
+ PTransformTranslation
+ .SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN)
+ && entry.getValue().getUniqueName().contains(TEST_TRANSFORM_ID))
+ .getKey();
+ RunnerApi.PTransform pTransform =
+ pProto.getComponents().getTransformsOrThrow(expandedTransformId);
+ String inputPCollectionId =
+ pTransform.getInputsOrThrow(ParDoTranslation.getMainInputName(pTransform));
+ String outputPCollectionId = pTransform.getOutputsOrThrow("output");
+
+ ImmutableMap<StateKey, ByteString> stateData =
+ ImmutableMap.of(
+ multimapSideInputKey(singletonSideInputView.getTagInternal().getId(), ByteString.EMPTY),
+ encode("3"));
+
+ FakeBeamFnStateClient fakeClient = new FakeBeamFnStateClient(stateData);
+
+ List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
+ MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
+ PCollectionConsumerRegistry consumers =
+ new PCollectionConsumerRegistry(
+ metricsContainerRegistry, mock(ExecutionStateTracker.class));
+ consumers.register(
+ outputPCollectionId,
+ TEST_TRANSFORM_ID,
+ (FnDataReceiver) (FnDataReceiver<WindowedValue<String>>) mainOutputValues::add);
+ PTransformFunctionRegistry startFunctionRegistry =
+ new PTransformFunctionRegistry(
+ mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "start");
+ PTransformFunctionRegistry finishFunctionRegistry =
+ new PTransformFunctionRegistry(
+ mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "finish");
+ List<ThrowingRunnable> teardownFunctions = new ArrayList<>();
+ List<BundleApplication> primarySplits = new ArrayList<>();
+ List<DelayedBundleApplication> residualSplits = new ArrayList<>();
+
+ new FnApiDoFnRunner.Factory<>()
+ .createRunnerForPTransform(
+ PipelineOptionsFactory.create(),
+ null /* beamFnDataClient */,
+ fakeClient,
+ TEST_TRANSFORM_ID,
+ pTransform,
+ Suppliers.ofInstance("57L")::get,
+ pProto.getComponents().getPcollectionsMap(),
+ pProto.getComponents().getCodersMap(),
+ pProto.getComponents().getWindowingStrategiesMap(),
+ consumers,
+ startFunctionRegistry,
+ finishFunctionRegistry,
+ teardownFunctions::add,
+ new BundleSplitListener() {
+ @Override
+ public void split(
+ List<BundleApplication> primaryRoots,
+ List<DelayedBundleApplication> residualRoots) {
+ primarySplits.addAll(primaryRoots);
+ residualSplits.addAll(residualRoots);
+ }
+ });
+
+ Iterables.getOnlyElement(startFunctionRegistry.getFunctions()).run();
+ mainOutputValues.clear();
+
+ assertThat(consumers.keySet(), containsInAnyOrder(inputPCollectionId, outputPCollectionId));
+
+ FnDataReceiver<WindowedValue<?>> mainInput =
+ consumers.getMultiplexingConsumer(inputPCollectionId);
+ mainInput.accept(valueInGlobalWindow(KV.of(KV.of("5", new OffsetRange(0, 5)), 5.0)));
+ BundleApplication primaryRoot = Iterables.getOnlyElement(primarySplits);
+ DelayedBundleApplication residualRoot = Iterables.getOnlyElement(residualSplits);
+ assertEquals(ParDoTranslation.getMainInputName(pTransform), primaryRoot.getInputId());
+ assertEquals(TEST_TRANSFORM_ID, primaryRoot.getTransformId());
+ assertEquals(
+ ParDoTranslation.getMainInputName(pTransform), residualRoot.getApplication().getInputId());
+ assertEquals(TEST_TRANSFORM_ID, residualRoot.getApplication().getTransformId());
+ primarySplits.clear();
+ residualSplits.clear();
+
+ mainInput.accept(valueInGlobalWindow(KV.of(KV.of("2", new OffsetRange(0, 2)), 2.0)));
+ assertThat(
+ mainOutputValues,
+ contains(
+ valueInGlobalWindow("5:0"),
+ valueInGlobalWindow("5:1"),
+ valueInGlobalWindow("5:2"),
+ valueInGlobalWindow("2:0"),
+ valueInGlobalWindow("2:1")));
+ assertTrue(primarySplits.isEmpty());
+ assertTrue(residualSplits.isEmpty());
+ mainOutputValues.clear();
+
+ Iterables.getOnlyElement(finishFunctionRegistry.getFunctions()).run();
+ assertThat(mainOutputValues, empty());
+
+ Iterables.getOnlyElement(teardownFunctions).run();
+ assertThat(mainOutputValues, empty());
+
+ // Assert that state data did not change
+ assertEquals(stateData, fakeClient.getData());
+ }
+
+ @Test
+ public void testProcessElementForPairWithRestriction() throws Exception {
+ Pipeline p = Pipeline.create();
+ PCollection<String> valuePCollection = p.apply(Create.of("unused"));
+ PCollectionView<String> singletonSideInputView = valuePCollection.apply(View.asSingleton());
+ valuePCollection.apply(
+ TEST_TRANSFORM_ID,
+ ParDo.of(new TestSplittableDoFn(singletonSideInputView))
+ .withSideInputs(singletonSideInputView));
+
+ RunnerApi.Pipeline pProto =
+ ProtoOverrides.updateTransform(
+ PTransformTranslation.PAR_DO_TRANSFORM_URN,
+ PipelineTranslation.toProto(p, SdkComponents.create(p.getOptions()), true),
+ SplittableParDoExpander.createSizedReplacement());
+ String expandedTransformId =
+ Iterables.find(
+ pProto.getComponents().getTransformsMap().entrySet(),
+ entry ->
+ entry
+ .getValue()
+ .getSpec()
+ .getUrn()
+ .equals(PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN)
+ && entry.getValue().getUniqueName().contains(TEST_TRANSFORM_ID))
+ .getKey();
+ RunnerApi.PTransform pTransform =
+ pProto.getComponents().getTransformsOrThrow(expandedTransformId);
+ String inputPCollectionId =
+ pTransform.getInputsOrThrow(ParDoTranslation.getMainInputName(pTransform));
+ String outputPCollectionId = Iterables.getOnlyElement(pTransform.getOutputsMap().values());
+
+ FakeBeamFnStateClient fakeClient = new FakeBeamFnStateClient(ImmutableMap.of());
+
+ List<WindowedValue<KV<String, OffsetRange>>> mainOutputValues = new ArrayList<>();
+ MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
+ PCollectionConsumerRegistry consumers =
+ new PCollectionConsumerRegistry(
+ metricsContainerRegistry, mock(ExecutionStateTracker.class));
+ consumers.register(outputPCollectionId, TEST_TRANSFORM_ID, ((List) mainOutputValues)::add);
+ PTransformFunctionRegistry startFunctionRegistry =
+ new PTransformFunctionRegistry(
+ mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "start");
+ PTransformFunctionRegistry finishFunctionRegistry =
+ new PTransformFunctionRegistry(
+ mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "finish");
+ List<ThrowingRunnable> teardownFunctions = new ArrayList<>();
+
+ new FnApiDoFnRunner.Factory<>()
+ .createRunnerForPTransform(
+ PipelineOptionsFactory.create(),
+ null /* beamFnDataClient */,
+ fakeClient,
+ TEST_TRANSFORM_ID,
+ pTransform,
+ Suppliers.ofInstance("57L")::get,
+ pProto.getComponents().getPcollectionsMap(),
+ pProto.getComponents().getCodersMap(),
+ pProto.getComponents().getWindowingStrategiesMap(),
+ consumers,
+ startFunctionRegistry,
+ finishFunctionRegistry,
+ teardownFunctions::add,
+ null /* bundleSplitListener */);
+
+ Iterables.getOnlyElement(startFunctionRegistry.getFunctions()).run();
+ mainOutputValues.clear();
+
+ assertThat(consumers.keySet(), containsInAnyOrder(inputPCollectionId, outputPCollectionId));
+
+ FnDataReceiver<WindowedValue<?>> mainInput =
+ consumers.getMultiplexingConsumer(inputPCollectionId);
+ mainInput.accept(valueInGlobalWindow("5"));
+ mainInput.accept(valueInGlobalWindow("2"));
+ assertThat(
+ mainOutputValues,
+ contains(
+ valueInGlobalWindow(KV.of("5", new OffsetRange(0, 5))),
+ valueInGlobalWindow(KV.of("2", new OffsetRange(0, 2)))));
+ mainOutputValues.clear();
+
+ Iterables.getOnlyElement(finishFunctionRegistry.getFunctions()).run();
+ assertThat(mainOutputValues, empty());
+
+ Iterables.getOnlyElement(teardownFunctions).run();
+ assertThat(mainOutputValues, empty());
+ }
+
+ @Test
+ public void testProcessElementForSplitAndSizeRestriction() throws Exception {
+ Pipeline p = Pipeline.create();
+ PCollection<String> valuePCollection = p.apply(Create.of("unused"));
+ PCollectionView<String> singletonSideInputView = valuePCollection.apply(View.asSingleton());
+ valuePCollection.apply(
+ TEST_TRANSFORM_ID,
+ ParDo.of(new TestSplittableDoFn(singletonSideInputView))
+ .withSideInputs(singletonSideInputView));
+
+ RunnerApi.Pipeline pProto =
+ ProtoOverrides.updateTransform(
+ PTransformTranslation.PAR_DO_TRANSFORM_URN,
+ PipelineTranslation.toProto(p, SdkComponents.create(p.getOptions()), true),
+ SplittableParDoExpander.createSizedReplacement());
+ String expandedTransformId =
+ Iterables.find(
+ pProto.getComponents().getTransformsMap().entrySet(),
+ entry ->
+ entry
+ .getValue()
+ .getSpec()
+ .getUrn()
+ .equals(
+ PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN)
+ && entry.getValue().getUniqueName().contains(TEST_TRANSFORM_ID))
+ .getKey();
+ RunnerApi.PTransform pTransform =
+ pProto.getComponents().getTransformsOrThrow(expandedTransformId);
+ String inputPCollectionId =
+ pTransform.getInputsOrThrow(ParDoTranslation.getMainInputName(pTransform));
+ String outputPCollectionId = Iterables.getOnlyElement(pTransform.getOutputsMap().values());
+
+ FakeBeamFnStateClient fakeClient = new FakeBeamFnStateClient(ImmutableMap.of());
+
+ List<WindowedValue<KV<KV<String, OffsetRange>, Double>>> mainOutputValues = new ArrayList<>();
+ MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
+ PCollectionConsumerRegistry consumers =
+ new PCollectionConsumerRegistry(
+ metricsContainerRegistry, mock(ExecutionStateTracker.class));
+ consumers.register(outputPCollectionId, TEST_TRANSFORM_ID, ((List) mainOutputValues)::add);
+ PTransformFunctionRegistry startFunctionRegistry =
+ new PTransformFunctionRegistry(
+ mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "start");
+ PTransformFunctionRegistry finishFunctionRegistry =
+ new PTransformFunctionRegistry(
+ mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "finish");
+ List<ThrowingRunnable> teardownFunctions = new ArrayList<>();
+
+ new FnApiDoFnRunner.Factory<>()
+ .createRunnerForPTransform(
+ PipelineOptionsFactory.create(),
+ null /* beamFnDataClient */,
+ fakeClient,
+ TEST_TRANSFORM_ID,
+ pTransform,
+ Suppliers.ofInstance("57L")::get,
+ pProto.getComponents().getPcollectionsMap(),
+ pProto.getComponents().getCodersMap(),
+ pProto.getComponents().getWindowingStrategiesMap(),
+ consumers,
+ startFunctionRegistry,
+ finishFunctionRegistry,
+ teardownFunctions::add,
+ null /* bundleSplitListener */);
+
+ Iterables.getOnlyElement(startFunctionRegistry.getFunctions()).run();
+ mainOutputValues.clear();
+
+ assertThat(consumers.keySet(), containsInAnyOrder(inputPCollectionId, outputPCollectionId));
+
+ FnDataReceiver<WindowedValue<?>> mainInput =
+ consumers.getMultiplexingConsumer(inputPCollectionId);
+ mainInput.accept(valueInGlobalWindow(KV.of("5", new OffsetRange(0, 5))));
+ mainInput.accept(valueInGlobalWindow(KV.of("2", new OffsetRange(0, 2))));
+ assertThat(
+ mainOutputValues,
+ contains(
+ valueInGlobalWindow(KV.of(KV.of("5", new OffsetRange(0, 2)), 2.0)),
+ valueInGlobalWindow(KV.of(KV.of("5", new OffsetRange(2, 5)), 3.0)),
+ valueInGlobalWindow(KV.of(KV.of("2", new OffsetRange(0, 1)), 1.0)),
+ valueInGlobalWindow(KV.of(KV.of("2", new OffsetRange(1, 2)), 1.0))));
+ mainOutputValues.clear();
+
+ Iterables.getOnlyElement(finishFunctionRegistry.getFunctions()).run();
+ assertThat(mainOutputValues, empty());
+
+ Iterables.getOnlyElement(teardownFunctions).run();
+ assertThat(mainOutputValues, empty());
+ }
}
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
index 1a460e6..a7a233d 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
@@ -21,6 +21,8 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
@@ -222,8 +224,11 @@
@Override
BundleProcessor get(
- String bundleDescriptorId, Supplier<BundleProcessor> bundleProcessorSupplier) {
- return new TestBundleProcessor(super.get(bundleDescriptorId, bundleProcessorSupplier));
+ String bundleDescriptorId,
+ String instructionId,
+ Supplier<BundleProcessor> bundleProcessorSupplier) {
+ return new TestBundleProcessor(
+ super.get(bundleDescriptorId, instructionId, bundleProcessorSupplier));
}
}
@@ -325,9 +330,7 @@
SerializableUtils.serializeToByteArray(doFnWithExecutionInformation)))
.build();
RunnerApi.ParDoPayload parDoPayload =
- RunnerApi.ParDoPayload.newBuilder()
- .setDoFn(RunnerApi.SdkFunctionSpec.newBuilder().setSpec(functionSpec))
- .build();
+ RunnerApi.ParDoPayload.newBuilder().setDoFn(functionSpec).build();
BeamFnApi.ProcessBundleDescriptor processBundleDescriptor =
BeamFnApi.ProcessBundleDescriptor.newBuilder()
.putTransforms(
@@ -356,11 +359,8 @@
WindowingStrategy.newBuilder()
.setWindowCoderId("window-strategy-coder")
.setWindowFn(
- RunnerApi.SdkFunctionSpec.newBuilder()
- .setSpec(
- RunnerApi.FunctionSpec.newBuilder()
- .setUrn("beam:windowfn:global_windows:v0.1"))
- .build())
+ RunnerApi.FunctionSpec.newBuilder()
+ .setUrn("beam:windowfn:global_windows:v0.1"))
.setOutputTime(RunnerApi.OutputTime.Enum.END_OF_WINDOW)
.setAccumulationMode(RunnerApi.AccumulationMode.Enum.ACCUMULATING)
.setTrigger(
@@ -488,6 +488,24 @@
}
@Test
+ public void testBundleProcessorIsFoundWhenActive() {
+ BundleProcessor bundleProcessor = mock(BundleProcessor.class);
+ when(bundleProcessor.getInstructionId()).thenReturn("known");
+ BundleProcessorCache cache = new BundleProcessorCache();
+
+ // Check that an unknown bundle processor is not found
+ assertNull(cache.find("unknown"));
+
+ // Once it is active, ensure the bundle processor is found
+ cache.get("descriptorId", "known", () -> bundleProcessor);
+ assertSame(bundleProcessor, cache.find("known"));
+
+ // After it is released, ensure the bundle processor is no longer found
+ cache.release("descriptorId", bundleProcessor);
+ assertNull(cache.find("known"));
+ }
+
+ @Test
public void testBundleProcessorReset() {
PTransformFunctionRegistry startFunctionRegistry = mock(PTransformFunctionRegistry.class);
PTransformFunctionRegistry finishFunctionRegistry = mock(PTransformFunctionRegistry.class);
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/BeamFnDataGrpcClientTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/BeamFnDataGrpcClientTest.java
index 672d41b..deb6218 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/BeamFnDataGrpcClientTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/BeamFnDataGrpcClientTest.java
@@ -292,7 +292,7 @@
BeamFnDataGrpcClient clientFactory =
new BeamFnDataGrpcClient(
PipelineOptionsFactory.fromArgs(
- new String[] {"--experiments=beam_fn_api_data_buffer_size_limit=20"})
+ new String[] {"--experiments=data_buffer_size_limit=20"})
.create(),
(Endpoints.ApiServiceDescriptor descriptor) -> channel,
OutboundObserverFactory.trivial());
diff --git a/sdks/java/io/amazon-web-services/build.gradle b/sdks/java/io/amazon-web-services/build.gradle
index d7e4139..6948a58 100644
--- a/sdks/java/io/amazon-web-services/build.gradle
+++ b/sdks/java/io/amazon-web-services/build.gradle
@@ -38,7 +38,7 @@
compile library.java.jackson_annotations
compile library.java.jackson_databind
compile library.java.slf4j_api
- runtime 'commons-codec:commons-codec:1.9'
+ runtime library.java.commons_codec
runtime "org.apache.httpcomponents:httpclient:4.5.6"
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
testCompile project(path: ":sdks:java:io:common", configuration: "testRuntime")
diff --git a/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java b/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java
index 843ba78..a12dd41 100644
--- a/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java
+++ b/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java
@@ -163,7 +163,7 @@
BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
pipeline
.apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions)))
- .apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME)))
+ .apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName)))
.apply("Map records", ParDo.of(new MapKVToV()))
.apply(
"Write to BQ",
diff --git a/sdks/java/io/cassandra/build.gradle b/sdks/java/io/cassandra/build.gradle
index 36dbede..6f02904 100644
--- a/sdks/java/io/cassandra/build.gradle
+++ b/sdks/java/io/cassandra/build.gradle
@@ -43,6 +43,7 @@
// for embedded cassandra
testCompile group: 'info.archinnov', name: 'achilles-junit', version: "$achilles_version"
+ testCompile library.java.commons_io
testCompile library.java.jackson_jaxb_annotations
testRuntimeOnly library.java.slf4j_jdk14
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
diff --git a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
index f3577a3..d2676ef 100644
--- a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
+++ b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
@@ -32,6 +32,7 @@
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
+import com.datastax.driver.core.exceptions.NoHostAvailableException;
import com.datastax.driver.core.querybuilder.QueryBuilder;
import com.datastax.driver.mapping.annotations.Column;
import com.datastax.driver.mapping.annotations.Computed;
@@ -39,8 +40,12 @@
import com.datastax.driver.mapping.annotations.Table;
import info.archinnov.achilles.embedded.CassandraEmbeddedServerBuilder;
import info.archinnov.achilles.embedded.CassandraShutDownHook;
+import java.io.File;
+import java.io.IOException;
import java.io.Serializable;
import java.math.BigInteger;
+import java.nio.file.Files;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
@@ -75,14 +80,12 @@
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ListeningExecutorService;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.MoreExecutors;
import org.apache.cassandra.service.StorageServiceMBean;
+import org.apache.commons.io.FileUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
-import org.junit.ClassRule;
-import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.slf4j.Logger;
@@ -90,7 +93,6 @@
/** Tests of {@link CassandraIO}. */
@RunWith(JUnit4.class)
-@Ignore("Ignore until https://issues.apache.org/jira/browse/BEAM-8025 is resolved")
public class CassandraIOTest implements Serializable {
private static final long NUM_ROWS = 20L;
private static final String CASSANDRA_KEYSPACE = "beam_ks";
@@ -100,13 +102,15 @@
private static final String STORAGE_SERVICE_MBEAN = "org.apache.cassandra.db:type=StorageService";
private static final float ACCEPTABLE_EMPTY_SPLITS_PERCENTAGE = 0.5f;
private static final int FLUSH_TIMEOUT = 30000;
+ private static final int JMX_CONF_TIMEOUT = 1000;
private static int jmxPort;
private static int cassandraPort;
private static Cluster cluster;
private static Session session;
- @ClassRule public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+ private static final String TEMPORARY_FOLDER =
+ System.getProperty("java.io.tmpdir") + "/embedded-cassandra/";
@Rule public transient TestPipeline pipeline = TestPipeline.create();
private static CassandraShutDownHook shutdownHook;
@@ -114,13 +118,17 @@
public static void beforeClass() throws Exception {
jmxPort = NetworkTestHelper.getAvailableLocalPort();
shutdownHook = new CassandraShutDownHook();
- // randomized port at startup
- String data = TEMPORARY_FOLDER.newFolder("embedded-cassandra", "data").getPath();
- String commitLog = TEMPORARY_FOLDER.newFolder("embedded-cassandra", "commit-log").getPath();
- String cdcRaw = TEMPORARY_FOLDER.newFolder("embedded-cassandra", "cdc-raw").getPath();
- String hints = TEMPORARY_FOLDER.newFolder("embedded-cassandra", "hints").getPath();
- String savedCache = TEMPORARY_FOLDER.newFolder("embedded-cassandra", "saved-cache").getPath();
- cluster =
+ String data = TEMPORARY_FOLDER + "/data";
+ Files.createDirectories(Paths.get(data));
+ String commitLog = TEMPORARY_FOLDER + "/commit-log";
+ Files.createDirectories(Paths.get(commitLog));
+ String cdcRaw = TEMPORARY_FOLDER + "/cdc-raw";
+ Files.createDirectories(Paths.get(cdcRaw));
+ String hints = TEMPORARY_FOLDER + "/hints";
+ Files.createDirectories(Paths.get(hints));
+ String savedCache = TEMPORARY_FOLDER + "/saved-cache";
+ Files.createDirectories(Paths.get(savedCache));
+ CassandraEmbeddedServerBuilder builder =
CassandraEmbeddedServerBuilder.builder()
.withKeyspaceName(CASSANDRA_KEYSPACE)
.withDataFolder(data)
@@ -129,18 +137,40 @@
.withHintsFolder(hints)
.withSavedCachesFolder(savedCache)
.withShutdownHook(shutdownHook)
+ // randomized CQL port at startup
.withJMXPort(jmxPort)
- .buildNativeCluster();
+ .cleanDataFilesAtStartup(false);
+
+ // under load we get a NoHostAvailable exception at cluster creation,
+ // so retry to create it every 1 sec up to 3 times.
+ cluster = buildCluster(builder);
cassandraPort = cluster.getConfiguration().getProtocolOptions().getPort();
session = CassandraIOTest.cluster.newSession();
-
insertData();
+ disableAutoCompaction();
+ }
+
+ private static Cluster buildCluster(CassandraEmbeddedServerBuilder builder) {
+ int tried = 0;
+ while (tried < 3) {
+ try {
+ return builder.buildNativeCluster();
+ } catch (NoHostAvailableException e) {
+ tried++;
+ try {
+ Thread.sleep(1000L);
+ } catch (InterruptedException e1) {
+ }
+ }
+ }
+ throw new RuntimeException("Unable to create embedded Cassandra cluster");
}
@AfterClass
- public static void afterClass() throws InterruptedException {
+ public static void afterClass() throws InterruptedException, IOException {
shutdownHook.shutDownNow();
+ FileUtils.deleteDirectory(new File(TEMPORARY_FOLDER));
}
private static void insertData() throws Exception {
@@ -211,6 +241,27 @@
Thread.sleep(FLUSH_TIMEOUT);
}
+ /**
+ * Disable auto compaction on embedded cassandra host, to avoid race condition in temporary files
+ * cleaning.
+ */
+ @SuppressWarnings("unused")
+ private static void disableAutoCompaction() throws Exception {
+ JMXServiceURL url =
+ new JMXServiceURL(
+ String.format(
+ "service:jmx:rmi://%s/jndi/rmi://%s:%s/jmxrmi",
+ CASSANDRA_HOST, CASSANDRA_HOST, jmxPort));
+ JMXConnector jmxConnector = JMXConnectorFactory.connect(url, null);
+ MBeanServerConnection mBeanServerConnection = jmxConnector.getMBeanServerConnection();
+ ObjectName objectName = new ObjectName(STORAGE_SERVICE_MBEAN);
+ StorageServiceMBean mBeanProxy =
+ JMX.newMBeanProxy(mBeanServerConnection, objectName, StorageServiceMBean.class);
+ mBeanProxy.disableAutoCompaction(CASSANDRA_KEYSPACE, CASSANDRA_TABLE);
+ jmxConnector.close();
+ Thread.sleep(JMX_CONF_TIMEOUT);
+ }
+
@Test
public void testEstimatedSizeBytes() throws Exception {
PipelineOptions pipelineOptions = PipelineOptionsFactory.create();
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/build.gradle
index 0c4411d..3b64569 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/build.gradle
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/build.gradle
@@ -42,7 +42,6 @@
testCompile "org.apache.logging.log4j:log4j-core:$log4j_version"
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
- testCompile library.java.commons_io_1x
testCompile library.java.junit
testCompile "org.elasticsearch.client:elasticsearch-rest-client:5.6.3"
testCompile "org.elasticsearch:elasticsearch:$elastic_search_version"
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
index 85419c6..89d0b28 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
@@ -19,14 +19,13 @@
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_TYPE;
-import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.UPDATE_INDEX;
-import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.UPDATE_TYPE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.getEsIndex;
import java.io.IOException;
import java.io.Serializable;
import org.apache.beam.sdk.io.common.NetworkTestHelper;
import org.apache.beam.sdk.testing.TestPipeline;
+import org.elasticsearch.client.Request;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.node.Node;
@@ -86,15 +85,16 @@
connectionConfiguration =
ConnectionConfiguration.create(
new String[] {"http://" + ES_IP + ":" + esHttpPort}, getEsIndex(), ES_TYPE)
- .withSocketAndRetryTimeout(120000)
+ .withSocketTimeout(120000)
.withConnectTimeout(5000);
restClient = connectionConfiguration.createClient();
elasticsearchIOTestCommon =
new ElasticsearchIOTestCommon(connectionConfiguration, restClient, false);
int waitingTime = 0;
int healthCheckFrequency = 500;
+ Request request = new Request("HEAD", "/");
while ((waitingTime < MAX_STARTUP_WAITING_TIME_MSEC)
- && restClient.performRequest("HEAD", "/").getStatusLine().getStatusCode() != 200) {
+ && restClient.performRequest(request).getStatusLine().getStatusCode() != 200) {
try {
Thread.sleep(healthCheckFrequency);
waitingTime += healthCheckFrequency;
@@ -202,18 +202,6 @@
}
@Test
- public void testWritePartialUpdateWithErrors() throws Exception {
- // cannot share elasticsearchIOTestCommon because tests run in parallel.
- ConnectionConfiguration connectionConfiguration =
- ConnectionConfiguration.create(
- new String[] {"http://" + ES_IP + ":" + esHttpPort}, UPDATE_INDEX, UPDATE_TYPE);
- ElasticsearchIOTestCommon elasticsearchIOTestCommonWithErrors =
- new ElasticsearchIOTestCommon(connectionConfiguration, restClient, false);
- elasticsearchIOTestCommonWithErrors.setPipeline(pipeline);
- elasticsearchIOTestCommonWithErrors.testWritePartialUpdateWithErrors();
- }
-
- @Test
public void testReadWithMetadata() throws Exception {
elasticsearchIOTestCommon.setPipeline(pipeline);
elasticsearchIOTestCommon.testReadWithMetadata();
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle
index 2e13700..c1560ac 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle
@@ -61,7 +61,6 @@
testCompile "net.java.dev.jna:jna:$jna_version"
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
- testCompile library.java.commons_io_1x
testCompile library.java.junit
testCompile "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version"
testRuntimeOnly library.java.slf4j_jdk14
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
index d809cfd..4f2fc28 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
@@ -19,8 +19,6 @@
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_TYPE;
-import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.UPDATE_INDEX;
-import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.UPDATE_TYPE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.getEsIndex;
import static org.elasticsearch.test.ESIntegTestCase.Scope.SUITE;
@@ -95,7 +93,7 @@
if (connectionConfiguration == null) {
connectionConfiguration =
ConnectionConfiguration.create(fillAddresses(), getEsIndex(), ES_TYPE)
- .withSocketAndRetryTimeout(120000)
+ .withSocketTimeout(120000)
.withConnectTimeout(5000);
elasticsearchIOTestCommon =
new ElasticsearchIOTestCommon(connectionConfiguration, getRestClient(), false);
@@ -202,17 +200,6 @@
}
@Test
- public void testWritePartialUpdateWithErrors() throws Exception {
- // cannot share elasticsearchIOTestCommon because tests run in parallel.
- ConnectionConfiguration connectionConfiguration =
- ConnectionConfiguration.create(fillAddresses(), UPDATE_INDEX, UPDATE_TYPE);
- ElasticsearchIOTestCommon elasticsearchIOTestCommonWithErrors =
- new ElasticsearchIOTestCommon(connectionConfiguration, getRestClient(), false);
- elasticsearchIOTestCommonWithErrors.setPipeline(pipeline);
- elasticsearchIOTestCommonWithErrors.testWritePartialUpdateWithErrors();
- }
-
- @Test
public void testReadWithMetadata() throws Exception {
elasticsearchIOTestCommon.setPipeline(pipeline);
elasticsearchIOTestCommon.testReadWithMetadata();
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle
index b7bf6d0..6d7ae8a 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle
@@ -61,7 +61,6 @@
testCompile "net.java.dev.jna:jna:$jna_version"
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
- testCompile library.java.commons_io_1x
testCompile library.java.junit
testCompile "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version"
testRuntimeOnly library.java.slf4j_jdk14
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
index 84696e5..279fa1e 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
@@ -19,8 +19,6 @@
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_TYPE;
-import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.UPDATE_INDEX;
-import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.UPDATE_TYPE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.getEsIndex;
import static org.elasticsearch.test.ESIntegTestCase.Scope.SUITE;
@@ -95,7 +93,7 @@
if (connectionConfiguration == null) {
connectionConfiguration =
ConnectionConfiguration.create(fillAddresses(), getEsIndex(), ES_TYPE)
- .withSocketAndRetryTimeout(120000)
+ .withSocketTimeout(120000)
.withConnectTimeout(5000);
elasticsearchIOTestCommon =
new ElasticsearchIOTestCommon(connectionConfiguration, getRestClient(), false);
@@ -196,17 +194,6 @@
}
@Test
- public void testWritePartialUpdateWithErrors() throws Exception {
- // cannot share elasticsearchIOTestCommon because tests run in parallel.
- ConnectionConfiguration connectionConfiguration =
- ConnectionConfiguration.create(fillAddresses(), UPDATE_INDEX, UPDATE_TYPE);
- ElasticsearchIOTestCommon elasticsearchIOTestCommonWithErrors =
- new ElasticsearchIOTestCommon(connectionConfiguration, getRestClient(), false);
- elasticsearchIOTestCommonWithErrors.setPipeline(pipeline);
- elasticsearchIOTestCommonWithErrors.testWritePartialUpdateWithErrors();
- }
-
- @Test
public void testReadWithMetadata() throws Exception {
elasticsearchIOTestCommon.setPipeline(pipeline);
elasticsearchIOTestCommon.testReadWithMetadata();
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/build.gradle
new file mode 100644
index 0000000..f731cd0
--- /dev/null
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/build.gradle
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+plugins { id 'org.apache.beam.module' }
+applyJavaNature(
+ publish: false,
+ archivesBaseName: 'beam-sdks-java-io-elasticsearch-tests-7'
+)
+provideIntegrationTestingDependencies()
+enableJavaPerformanceTesting()
+
+description = "Apache Beam :: SDKs :: Java :: IO :: Elasticsearch-Tests :: 7.x"
+ext.summary = "Tests of ElasticsearchIO on Elasticsearch 7.x"
+
+test {
+ // needed for ESIntegTestCase
+ systemProperty "tests.security.manager", "false"
+}
+
+def jna_version = "4.1.0"
+def log4j_version = "2.11.1"
+def elastic_search_version = "7.5.0"
+
+configurations.all {
+ resolutionStrategy {
+ // Make sure the log4j versions for api and core match instead of taking the default
+ // Gradle rule of using the latest.
+ force "org.apache.logging.log4j:log4j-api:$log4j_version"
+ force "org.apache.logging.log4j:log4j-core:$log4j_version"
+ }
+}
+
+dependencies {
+ testCompile project(path: ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-common", configuration: "testRuntime")
+ testCompile "org.elasticsearch.test:framework:$elastic_search_version"
+ testCompile "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version"
+ testCompile "com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.7.5"
+ testCompile "org.elasticsearch:elasticsearch:$elastic_search_version"
+
+ testCompile project(path: ":sdks:java:core", configuration: "shadow")
+ testCompile project(":sdks:java:io:elasticsearch")
+ testCompile project(path: ":sdks:java:io:common", configuration: "testRuntime")
+ testCompile "org.apache.logging.log4j:log4j-core:$log4j_version"
+ testCompile "org.apache.logging.log4j:log4j-api:$log4j_version"
+ testCompile library.java.slf4j_api
+ testCompile "net.java.dev.jna:jna:$jna_version"
+ testCompile library.java.hamcrest_core
+ testCompile library.java.hamcrest_library
+ testCompile library.java.junit
+ testCompile "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version"
+ testRuntimeOnly library.java.slf4j_jdk14
+ testRuntimeOnly project(":runners:direct-java")
+}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/contrib/create_elk_container.sh b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/contrib/create_elk_container.sh
new file mode 100755
index 0000000..7a2862d
--- /dev/null
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/contrib/create_elk_container.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+################################################################################
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+#Create an ELK (Elasticsearch Logstash Kibana) container for ES v6.4.0 and compatible Logstash and Kibana versions,
+#bind then on host ports, allow shell access to container and mount current directory on /home/$USER inside the container
+
+docker create -p 5601:5601 -p 9200:9200 -p 5044:5044 -p 5000:5000 -p 9300:9300 -it -v $(pwd):/home/$USER/ --name elk-7.5.0 sebp/elk:740
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOIT.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOIT.java
new file mode 100644
index 0000000..42ae6d5
--- /dev/null
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOIT.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.elasticsearch;
+
+import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
+
+import org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOITCommon.ElasticsearchPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.elasticsearch.client.RestClient;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * A test of {@link ElasticsearchIO} on an independent Elasticsearch v7.x instance.
+ *
+ * <p>This test requires a running instance of Elasticsearch, and the test dataset must exist in the
+ * database. See {@link ElasticsearchIOITCommon} for instructions to achieve this.
+ *
+ * <p>You can run this test by doing the following from the beam parent module directory with the
+ * correct server IP:
+ *
+ * <pre>
+ * ./gradlew integrationTest -p sdks/java/io/elasticsearch-tests/elasticsearch-tests-7
+ * -DintegrationTestPipelineOptions='[
+ * "--elasticsearchServer=1.2.3.4",
+ * "--elasticsearchHttpPort=9200"]'
+ * --tests org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOIT
+ * -DintegrationTestRunner=direct
+ * </pre>
+ *
+ * <p>It is likely that you will need to configure <code>thread_pool.write.queue_size: 250</code>
+ * (or higher) in the backend Elasticsearch server for this test to run.
+ */
+@RunWith(JUnit4.class)
+public class ElasticsearchIOIT {
+ private static RestClient restClient;
+ private static ElasticsearchPipelineOptions options;
+ private static ConnectionConfiguration readConnectionConfiguration;
+ private static ConnectionConfiguration writeConnectionConfiguration;
+ private static ConnectionConfiguration updateConnectionConfiguration;
+ private static ElasticsearchIOTestCommon elasticsearchIOTestCommon;
+
+ @Rule public TestPipeline pipeline = TestPipeline.create();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ PipelineOptionsFactory.register(ElasticsearchPipelineOptions.class);
+ options = TestPipeline.testingPipelineOptions().as(ElasticsearchPipelineOptions.class);
+ readConnectionConfiguration =
+ ElasticsearchIOITCommon.getConnectionConfiguration(
+ options, ElasticsearchIOITCommon.IndexMode.READ);
+ writeConnectionConfiguration =
+ ElasticsearchIOITCommon.getConnectionConfiguration(
+ options, ElasticsearchIOITCommon.IndexMode.WRITE);
+ updateConnectionConfiguration =
+ ElasticsearchIOITCommon.getConnectionConfiguration(
+ options, ElasticsearchIOITCommon.IndexMode.WRITE_PARTIAL);
+ restClient = readConnectionConfiguration.createClient();
+ elasticsearchIOTestCommon =
+ new ElasticsearchIOTestCommon(readConnectionConfiguration, restClient, true);
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ ElasticsearchIOTestUtils.deleteIndex(writeConnectionConfiguration, restClient);
+ ElasticsearchIOTestUtils.deleteIndex(updateConnectionConfiguration, restClient);
+ restClient.close();
+ }
+
+ @Test
+ public void testSplitsVolume() throws Exception {
+ elasticsearchIOTestCommon.testSplit(10_000);
+ }
+
+ @Test
+ public void testReadVolume() throws Exception {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testRead();
+ }
+
+ @Test
+ public void testWriteVolume() throws Exception {
+ // cannot share elasticsearchIOTestCommon because tests run in parallel.
+ ElasticsearchIOTestCommon elasticsearchIOTestCommonWrite =
+ new ElasticsearchIOTestCommon(writeConnectionConfiguration, restClient, true);
+ elasticsearchIOTestCommonWrite.setPipeline(pipeline);
+ elasticsearchIOTestCommonWrite.testWrite();
+ }
+
+ @Test
+ public void testSizesVolume() throws Exception {
+ elasticsearchIOTestCommon.testSizes();
+ }
+
+ /**
+ * This test verifies volume loading of Elasticsearch using explicit document IDs and routed to an
+ * index named the same as the scientist, and type which is based on the modulo 2 of the scientist
+ * name. The goal of this IT is to help observe and verify that the overhead of adding the
+ * functions to parse the document and extract the ID is acceptable.
+ */
+ @Test
+ public void testWriteWithFullAddressingVolume() throws Exception {
+ // cannot share elasticsearchIOTestCommon because tests run in parallel.
+ ElasticsearchIOTestCommon elasticsearchIOTestCommonWrite =
+ new ElasticsearchIOTestCommon(writeConnectionConfiguration, restClient, true);
+ elasticsearchIOTestCommonWrite.setPipeline(pipeline);
+ elasticsearchIOTestCommonWrite.testWriteWithFullAddressing();
+ }
+
+ /**
+ * This test verifies volume partial updates of Elasticsearch. The test dataset index is cloned
+ * and then a new field is added to each document using a partial update. The test then asserts
+ * the updates were applied.
+ */
+ @Test
+ public void testWritePartialUpdate() throws Exception {
+ ElasticsearchIOTestUtils.copyIndex(
+ restClient,
+ readConnectionConfiguration.getIndex(),
+ updateConnectionConfiguration.getIndex());
+ // cannot share elasticsearchIOTestCommon because tests run in parallel.
+ ElasticsearchIOTestCommon elasticsearchIOTestCommonUpdate =
+ new ElasticsearchIOTestCommon(updateConnectionConfiguration, restClient, true);
+ elasticsearchIOTestCommonUpdate.setPipeline(pipeline);
+ elasticsearchIOTestCommonUpdate.testWritePartialUpdate();
+ }
+}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
new file mode 100644
index 0000000..2ccbda3
--- /dev/null
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.elasticsearch;
+
+import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
+import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_TYPE;
+import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.getEsIndex;
+import static org.elasticsearch.test.ESIntegTestCase.Scope.SUITE;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.transport.Netty4Plugin;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+/*
+Cannot use @RunWith(JUnit4.class) with ESIntegTestCase
+Cannot have @BeforeClass @AfterClass with ESIntegTestCase
+*/
+
+/** Tests for {@link ElasticsearchIO} version 7. */
+@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
+// use cluster of 1 node that has data + master roles
+@ESIntegTestCase.ClusterScope(scope = SUITE, numDataNodes = 1, supportsDedicatedMasters = false)
+public class ElasticsearchIOTest extends ESIntegTestCase implements Serializable {
+
+ private ElasticsearchIOTestCommon elasticsearchIOTestCommon;
+ private ConnectionConfiguration connectionConfiguration;
+
+ private String[] fillAddresses() {
+ ArrayList<String> result = new ArrayList<>();
+ for (InetSocketAddress address : cluster().httpAddresses()) {
+ result.add(String.format("http://%s:%s", address.getHostString(), address.getPort()));
+ }
+ return result.toArray(new String[result.size()]);
+ }
+
+ @Override
+ protected boolean addMockHttpTransport() {
+ return false;
+ }
+
+ @Override
+ protected Settings nodeSettings(int nodeOrdinal) {
+ System.setProperty("es.set.netty.runtime.available.processors", "false");
+ return Settings.builder()
+ .put(super.nodeSettings(nodeOrdinal))
+ // had problems with some jdk, embedded ES was too slow for bulk insertion,
+ // and queue of 50 was full. No pb with real ES instance (cf testWrite integration test)
+ .put("thread_pool.write.queue_size", 400)
+ .build();
+ }
+
+ @Override
+ public Settings indexSettings() {
+ return Settings.builder()
+ .put(super.indexSettings())
+ // useful to have updated sizes for getEstimatedSize
+ .put("index.store.stats_refresh_interval", 0)
+ .build();
+ }
+
+ @Override
+ protected Collection<Class<? extends Plugin>> nodePlugins() {
+ ArrayList<Class<? extends Plugin>> plugins = new ArrayList<>();
+ plugins.add(Netty4Plugin.class);
+ return plugins;
+ }
+
+ @Before
+ public void setup() throws IOException {
+ if (connectionConfiguration == null) {
+ connectionConfiguration =
+ ConnectionConfiguration.create(fillAddresses(), getEsIndex(), ES_TYPE)
+ .withSocketTimeout(120000)
+ .withConnectTimeout(5000);
+ elasticsearchIOTestCommon =
+ new ElasticsearchIOTestCommon(connectionConfiguration, getRestClient(), false);
+ }
+ }
+
+ @Rule public TestPipeline pipeline = TestPipeline.create();
+
+ @Test
+ public void testSizes() throws Exception {
+ // need to create the index using the helper method (not create it at first insertion)
+ // for the indexSettings() to be run
+ createIndex(getEsIndex());
+ elasticsearchIOTestCommon.testSizes();
+ }
+
+ @Test
+ public void testRead() throws Exception {
+ // need to create the index using the helper method (not create it at first insertion)
+ // for the indexSettings() to be run
+ createIndex(getEsIndex());
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testRead();
+ }
+
+ @Test
+ public void testReadWithQueryString() throws Exception {
+ // need to create the index using the helper method (not create it at first insertion)
+ // for the indexSettings() to be run
+ createIndex(getEsIndex());
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testReadWithQueryString();
+ }
+
+ @Test
+ public void testReadWithQueryValueProvider() throws Exception {
+ // need to create the index using the helper method (not create it at first insertion)
+ // for the indexSettings() to be run
+ createIndex(getEsIndex());
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testReadWithQueryValueProvider();
+ }
+
+ @Test
+ public void testWrite() throws Exception {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testWrite();
+ }
+
+ @Rule public ExpectedException expectedException = ExpectedException.none();
+
+ @Test
+ public void testWriteWithErrors() throws Exception {
+ elasticsearchIOTestCommon.setExpectedException(expectedException);
+ elasticsearchIOTestCommon.testWriteWithErrors();
+ }
+
+ @Test
+ public void testWriteWithMaxBatchSize() throws Exception {
+ elasticsearchIOTestCommon.testWriteWithMaxBatchSize();
+ }
+
+ @Test
+ public void testWriteWithMaxBatchSizeBytes() throws Exception {
+ elasticsearchIOTestCommon.testWriteWithMaxBatchSizeBytes();
+ }
+
+ @Test
+ public void testSplit() throws Exception {
+ // need to create the index using the helper method (not create it at first insertion)
+ // for the indexSettings() to be run
+ createIndex(getEsIndex());
+ elasticsearchIOTestCommon.testSplit(2_000);
+ }
+
+ @Test
+ public void testWriteWithIdFn() throws Exception {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testWriteWithIdFn();
+ }
+
+ @Test
+ public void testWriteWithIndexFn() throws Exception {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testWriteWithIndexFn();
+ }
+
+ @Test
+ public void testWriteFullAddressing() throws Exception {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testWriteWithFullAddressing();
+ }
+
+ @Test
+ public void testWritePartialUpdate() throws Exception {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testWritePartialUpdate();
+ }
+
+ @Test
+ public void testReadWithMetadata() throws Exception {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testReadWithMetadata();
+ }
+
+ @Test
+ public void testDefaultRetryPredicate() throws IOException {
+ elasticsearchIOTestCommon.testDefaultRetryPredicate(getRestClient());
+ }
+
+ @Test
+ public void testWriteRetry() throws Throwable {
+ elasticsearchIOTestCommon.setExpectedException(expectedException);
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testWriteRetry();
+ }
+
+ @Test
+ public void testWriteRetryValidRequest() throws Throwable {
+ elasticsearchIOTestCommon.setPipeline(pipeline);
+ elasticsearchIOTestCommon.testWriteRetryValidRequest();
+ }
+}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/elasticsearch/bootstrap/JarHell.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/elasticsearch/bootstrap/JarHell.java
new file mode 100644
index 0000000..be74371
--- /dev/null
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/src/test/java/org/elasticsearch/bootstrap/JarHell.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.elasticsearch.bootstrap;
+
+import java.util.function.Consumer;
+
+/**
+ * We need a real Elasticsearch instance to properly test the IO (split, slice API, scroll API,
+ * ...). Starting at ES 5, to have Elasticsearch embedded, we are forced to use Elasticsearch test
+ * framework. But this framework checks for class duplicates in classpath and it cannot be
+ * deactivated. When the class duplication come from a dependency, then it cannot be avoided.
+ * Elasticsearch community does not provide a way of deactivating the jar hell test, so skip it by
+ * making this hack. In this case duplicate class is class:
+ * org.apache.maven.surefire.report.SafeThrowable jar1: surefire-api-2.20.jar jar2:
+ * surefire-junit47-2.20.jar
+ */
+class JarHell {
+
+ @SuppressWarnings("EmptyMethod")
+ public static void checkJarHell(Consumer<String> output) {}
+}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle
index 53a1ff4..830abac 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle
@@ -26,15 +26,20 @@
ext.summary = "Common test classes for ElasticsearchIO"
def jna_version = "4.1.0"
-def log4j_version = "2.6.2"
+def log4j_version = "2.11.1"
+def elastic_search_version = "7.5.0"
+
+configurations.all {
+ resolutionStrategy {
+ // Make sure the log4j versions for api and core match instead of taking the default
+ // Gradle rule of using the latest.
+ force "org.apache.logging.log4j:log4j-api:$log4j_version"
+ force "org.apache.logging.log4j:log4j-core:$log4j_version"
+ }
+}
dependencies {
testCompile library.java.jackson_databind
- testCompile "org.apache.httpcomponents:httpasyncclient:4.1.4"
- testCompile "org.apache.httpcomponents:httpcore-nio:4.4.10"
- testCompile "org.apache.httpcomponents:httpcore:4.4.10"
- testCompile "org.apache.httpcomponents:httpclient:4.5.6"
-
testCompile project(path: ":sdks:java:core", configuration: "shadow")
testCompile project(":sdks:java:io:elasticsearch")
testCompile project(path: ":sdks:java:io:common", configuration: "testRuntime")
@@ -44,9 +49,8 @@
testCompile "org.apache.logging.log4j:log4j-core:$log4j_version"
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
- testCompile library.java.commons_io_1x
testCompile library.java.junit
- testCompile "org.elasticsearch.client:elasticsearch-rest-client:6.4.0"
+ testCompile "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version"
testRuntimeOnly library.java.slf4j_jdk14
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java
index 386a518..de1f8b0 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java
@@ -22,6 +22,7 @@
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Read;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.RetryConfiguration.DEFAULT_RETRY_PREDICATE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write;
+import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.getBackendVersion;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.FAMOUS_SCIENTISTS;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.NUM_SCIENTISTS;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.countByMatch;
@@ -63,6 +64,7 @@
import org.apache.http.HttpEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.nio.entity.NStringEntity;
+import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.RestClient;
import org.hamcrest.CustomMatcher;
@@ -440,7 +442,11 @@
for (String scientist : FAMOUS_SCIENTISTS) {
String index = scientist.toLowerCase();
long count =
- refreshIndexAndGetCurrentNumDocs(restClient, index, connectionConfiguration.getType());
+ refreshIndexAndGetCurrentNumDocs(
+ restClient,
+ index,
+ connectionConfiguration.getType(),
+ getBackendVersion(connectionConfiguration));
assertEquals(scientist + " index holds incorrect count", docsPerScientist, count);
}
}
@@ -485,7 +491,11 @@
for (int i = 0; i < 2; i++) {
String type = "TYPE_" + i;
long count =
- refreshIndexAndGetCurrentNumDocs(restClient, connectionConfiguration.getIndex(), type);
+ refreshIndexAndGetCurrentNumDocs(
+ restClient,
+ connectionConfiguration.getIndex(),
+ type,
+ getBackendVersion(connectionConfiguration));
assertEquals(type + " holds incorrect count", adjustedNumDocs / 2, count);
}
}
@@ -514,7 +524,9 @@
String index = scientist.toLowerCase();
for (int i = 0; i < 2; i++) {
String type = "TYPE_" + scientist.hashCode() % 2;
- long count = refreshIndexAndGetCurrentNumDocs(restClient, index, type);
+ long count =
+ refreshIndexAndGetCurrentNumDocs(
+ restClient, index, type, getBackendVersion(connectionConfiguration));
assertEquals("Incorrect count for " + index + "/" + type, numDocs / NUM_SCIENTISTS, count);
}
}
@@ -563,41 +575,6 @@
assertEquals(numDocs / 2, countByMatch(connectionConfiguration, restClient, "group", "1"));
}
- /** Tests partial updates with errors by adding some invalid info to test set. */
- void testWritePartialUpdateWithErrors() throws Exception {
- // put a mapping to simulate error of insertion
- ElasticsearchIOTestUtils.setIndexMapping(connectionConfiguration, restClient);
-
- if (!useAsITests) {
- ElasticsearchIOTestUtils.insertTestDocuments(connectionConfiguration, numDocs, restClient);
- }
-
- // try to partial update a document with an incompatible date format for the age to generate
- // an update error
- List<String> data = new ArrayList<>();
- data.add("{\"id\" : 1, \"age\" : \"2018-08-10:00:00\"}");
-
- try {
- pipeline
- .apply(Create.of(data))
- .apply(
- ElasticsearchIO.write()
- .withConnectionConfiguration(connectionConfiguration)
- .withIdFn(new ExtractValueFn("id"))
- .withUsePartialUpdate(true));
- pipeline.run();
- } catch (Exception e) {
- boolean matches =
- e.getLocalizedMessage()
- .matches(
- "(?is).*Error writing to Elasticsearch, some elements could not be inserted:"
- + ".*Document id .+: failed to parse .*Caused by: .*"
- + ".*For input string: \"2018-08-10:00:00\".*");
-
- assertTrue(matches);
- }
- }
-
/**
* Function for checking if any string in iterable contains expected substring. Fails if no match
* is found.
@@ -627,13 +604,15 @@
void testDefaultRetryPredicate(RestClient restClient) throws IOException {
HttpEntity entity1 = new NStringEntity(BAD_REQUEST, ContentType.APPLICATION_JSON);
- Response response1 =
- restClient.performRequest("POST", "/_bulk", Collections.emptyMap(), entity1);
+ Request request = new Request("POST", "/_bulk");
+ request.addParameters(Collections.emptyMap());
+ request.setEntity(entity1);
+ Response response1 = restClient.performRequest(request);
assertTrue(CUSTOM_RETRY_PREDICATE.test(response1.getEntity()));
HttpEntity entity2 = new NStringEntity(OK_REQUEST, ContentType.APPLICATION_JSON);
- Response response2 =
- restClient.performRequest("POST", "/_bulk", Collections.emptyMap(), entity2);
+ request.setEntity(entity2);
+ Response response2 = restClient.performRequest(request);
assertFalse(DEFAULT_RETRY_PREDICATE.test(response2.getEntity()));
}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestUtils.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestUtils.java
index ee9e47c..cc5a18e 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestUtils.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestUtils.java
@@ -62,14 +62,16 @@
}
private static void closeIndex(RestClient restClient, String index) throws IOException {
- restClient.performRequest("POST", String.format("/%s/_close", index));
+ Request request = new Request("POST", String.format("/%s/_close", index));
+ restClient.performRequest(request);
}
private static void deleteIndex(RestClient restClient, String index) throws IOException {
try {
closeIndex(restClient, index);
- restClient.performRequest(
- "DELETE", String.format("/%s", index), Collections.singletonMap("refresh", "wait_for"));
+ Request request = new Request("DELETE", String.format("/%s", index));
+ request.addParameters(Collections.singletonMap("refresh", "wait_for"));
+ restClient.performRequest(request);
} catch (IOException e) {
// it is fine to ignore this expression as deleteIndex occurs in @before,
// so when the first tests is run, the index does not exist yet
@@ -91,8 +93,10 @@
"{\"source\" : { \"index\" : \"%s\" }, \"dest\" : { \"index\" : \"%s\" } }",
source, target),
ContentType.APPLICATION_JSON);
- restClient.performRequest(
- "POST", "/_reindex", Collections.singletonMap("refresh", "wait_for"), entity);
+ Request request = new Request("POST", "/_reindex");
+ request.addParameters(Collections.singletonMap("refresh", "wait_for"));
+ request.setEntity(entity);
+ restClient.performRequest(request);
}
/** Inserts the given number of test documents into Elasticsearch. */
@@ -118,9 +122,10 @@
"/%s/%s/_bulk", connectionConfiguration.getIndex(), connectionConfiguration.getType());
HttpEntity requestBody =
new NStringEntity(bulkRequest.toString(), ContentType.APPLICATION_JSON);
- Response response =
- restClient.performRequest(
- "POST", endPoint, Collections.singletonMap("refresh", "wait_for"), requestBody);
+ Request request = new Request("POST", endPoint);
+ request.addParameters(Collections.singletonMap("refresh", "wait_for"));
+ request.setEntity(requestBody);
+ Response response = restClient.performRequest(request);
ElasticsearchIO.checkForErrors(
response.getEntity(), ElasticsearchIO.getBackendVersion(connectionConfiguration), false);
}
@@ -136,7 +141,10 @@
static long refreshIndexAndGetCurrentNumDocs(
ConnectionConfiguration connectionConfiguration, RestClient restClient) throws IOException {
return refreshIndexAndGetCurrentNumDocs(
- restClient, connectionConfiguration.getIndex(), connectionConfiguration.getType());
+ restClient,
+ connectionConfiguration.getIndex(),
+ connectionConfiguration.getType(),
+ getBackendVersion(connectionConfiguration));
}
/**
@@ -148,17 +156,23 @@
* @return The number of docs in the index
* @throws IOException On error communicating with Elasticsearch
*/
- static long refreshIndexAndGetCurrentNumDocs(RestClient restClient, String index, String type)
- throws IOException {
+ static long refreshIndexAndGetCurrentNumDocs(
+ RestClient restClient, String index, String type, int backenVersion) throws IOException {
long result = 0;
try {
String endPoint = String.format("/%s/_refresh", index);
- restClient.performRequest("POST", endPoint);
+ Request request = new Request("POST", endPoint);
+ restClient.performRequest(request);
endPoint = String.format("/%s/%s/_search", index, type);
- Response response = restClient.performRequest("GET", endPoint);
+ request = new Request("GET", endPoint);
+ Response response = restClient.performRequest(request);
JsonNode searchResult = ElasticsearchIO.parseResponse(response.getEntity());
- result = searchResult.path("hits").path("total").asLong();
+ if (backenVersion >= 7) {
+ result = searchResult.path("hits").path("total").path("value").asLong();
+ } else {
+ result = searchResult.path("hits").path("total").asLong();
+ }
} catch (IOException e) {
// it is fine to ignore bellow exceptions because in testWriteWithBatchSize* sometimes,
// we call upgrade before any doc have been written
@@ -199,7 +213,7 @@
* @param connectionConfiguration Specifies the index and type
* @param restClient To use to execute the call
* @param scientistName The scientist to query for
- * @return The cound of documents found
+ * @return The count of documents found
* @throws IOException On error talking to Elasticsearch
*/
static int countByScientistName(
@@ -239,24 +253,16 @@
"/%s/%s/_search",
connectionConfiguration.getIndex(), connectionConfiguration.getType());
HttpEntity httpEntity = new NStringEntity(requestBody, ContentType.APPLICATION_JSON);
- Response response =
- restClient.performRequest("GET", endPoint, Collections.emptyMap(), httpEntity);
- JsonNode searchResult = parseResponse(response.getEntity());
- return searchResult.path("hits").path("total").asInt();
- }
- public static void setIndexMapping(
- ConnectionConfiguration connectionConfiguration, RestClient restClient) throws IOException {
- String endpoint = String.format("/%s", connectionConfiguration.getIndex());
- String requestString =
- String.format(
- "{\"mappings\":{\"%s\":{\"properties\":{\"age\":{\"type\":\"long\"},"
- + " \"scientist\":{\"type\":\"%s\"}, \"id\":{\"type\":\"long\"}}}}}",
- connectionConfiguration.getType(),
- getBackendVersion(connectionConfiguration) == 2 ? "string" : "text");
- HttpEntity requestBody = new NStringEntity(requestString, ContentType.APPLICATION_JSON);
- Request request = new Request("PUT", endpoint);
- request.setEntity(requestBody);
- restClient.performRequest(request);
+ Request request = new Request("GET", endPoint);
+ request.addParameters(Collections.emptyMap());
+ request.setEntity(httpEntity);
+ Response response = restClient.performRequest(request);
+ JsonNode searchResult = parseResponse(response.getEntity());
+ if (getBackendVersion(connectionConfiguration) >= 7) {
+ return searchResult.path("hits").path("total").path("value").asInt();
+ } else {
+ return searchResult.path("hits").path("total").asInt();
+ }
}
}
diff --git a/sdks/java/io/elasticsearch/build.gradle b/sdks/java/io/elasticsearch/build.gradle
index 6eca559..c8a3dc5 100644
--- a/sdks/java/io/elasticsearch/build.gradle
+++ b/sdks/java/io/elasticsearch/build.gradle
@@ -27,10 +27,6 @@
compile project(path: ":sdks:java:core", configuration: "shadow")
compile library.java.jackson_databind
compile library.java.jackson_annotations
- compile "org.elasticsearch.client:elasticsearch-rest-client:6.4.0"
- compile "org.apache.httpcomponents:httpasyncclient:4.1.4"
- compile "org.apache.httpcomponents:httpcore-nio:4.4.10"
- compile "org.apache.httpcomponents:httpcore:4.4.10"
- compile "org.apache.httpcomponents:httpclient:4.5.6"
+ compile "org.elasticsearch.client:elasticsearch-rest-client:7.5.0"
testCompile project(path: ":sdks:java:io:common", configuration: "testRuntime")
}
diff --git a/sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java b/sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
index 59f6057..ee56e03 100644
--- a/sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
+++ b/sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
@@ -20,11 +20,12 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
-import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.module.SimpleModule;
+import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import com.google.auto.value.AutoValue;
import java.io.File;
import java.io.FileInputStream;
@@ -146,9 +147,9 @@
* <p>When {withUsePartialUpdate()} is enabled, the input document must contain an id field and
* {@code withIdFn()} must be used to allow its extraction by the ElasticsearchIO.
*
- * <p>Optionally, {@code withSocketAndRetryTimeout()} can be used to override the default retry
- * timeout and socket timeout of 30000ms. {@code withConnectTimeout()} can be used to override the
- * default connect timeout of 1000ms.
+ * <p>Optionally, {@code withSocketTimeout()} can be used to override the default retry timeout and
+ * socket timeout of 30000ms. {@code withConnectTimeout()} can be used to override the default
+ * connect timeout of 1000ms.
*/
@Experimental(Experimental.Kind.SOURCE_SINK)
public class ElasticsearchIO {
@@ -203,7 +204,7 @@
} else {
if (backendVersion == 2) {
errorRootName = "create";
- } else if (backendVersion == 5 || backendVersion == 6) {
+ } else if (backendVersion >= 5) {
errorRootName = "index";
}
}
@@ -249,7 +250,7 @@
public abstract String getType();
@Nullable
- public abstract Integer getSocketAndRetryTimeout();
+ public abstract Integer getSocketTimeout();
@Nullable
public abstract Integer getConnectTimeout();
@@ -274,7 +275,7 @@
abstract Builder setType(String type);
- abstract Builder setSocketAndRetryTimeout(Integer maxRetryTimeout);
+ abstract Builder setSocketTimeout(Integer maxRetryTimeout);
abstract Builder setConnectTimeout(Integer connectTimeout);
@@ -374,13 +375,13 @@
* and the default socket timeout (30000ms) in the {@link RequestConfig} of the Elastic {@link
* RestClient}.
*
- * @param socketAndRetryTimeout the socket and retry timeout in millis.
+ * @param socketTimeout the socket and retry timeout in millis.
* @return a {@link ConnectionConfiguration} describes a connection configuration to
* Elasticsearch.
*/
- public ConnectionConfiguration withSocketAndRetryTimeout(Integer socketAndRetryTimeout) {
- checkArgument(socketAndRetryTimeout != null, "socketAndRetryTimeout can not be null");
- return builder().setSocketAndRetryTimeout(socketAndRetryTimeout).build();
+ public ConnectionConfiguration withSocketTimeout(Integer socketTimeout) {
+ checkArgument(socketTimeout != null, "socketTimeout can not be null");
+ return builder().setSocketTimeout(socketTimeout).build();
}
/**
@@ -402,7 +403,7 @@
builder.add(DisplayData.item("type", getType()));
builder.addIfNotNull(DisplayData.item("username", getUsername()));
builder.addIfNotNull(DisplayData.item("keystore.path", getKeystorePath()));
- builder.addIfNotNull(DisplayData.item("socketAndRetryTimeout", getSocketAndRetryTimeout()));
+ builder.addIfNotNull(DisplayData.item("socketTimeout", getSocketTimeout()));
builder.addIfNotNull(DisplayData.item("connectTimeout", getConnectTimeout()));
builder.addIfNotNull(DisplayData.item("trustSelfSignedCerts", isTrustSelfSignedCerts()));
}
@@ -452,16 +453,12 @@
if (getConnectTimeout() != null) {
requestConfigBuilder.setConnectTimeout(getConnectTimeout());
}
- if (getSocketAndRetryTimeout() != null) {
- requestConfigBuilder.setSocketTimeout(getSocketAndRetryTimeout());
+ if (getSocketTimeout() != null) {
+ requestConfigBuilder.setSocketTimeout(getSocketTimeout());
}
return requestConfigBuilder;
}
});
- if (getSocketAndRetryTimeout() != null) {
- restClientBuilder.setMaxRetryTimeoutMillis(getSocketAndRetryTimeout());
- }
-
return restClientBuilder.build();
}
}
@@ -670,7 +667,7 @@
new BoundedElasticsearchSource(spec, shardId, null, null, null, backendVersion));
}
checkArgument(!sources.isEmpty(), "No shard found");
- } else if (backendVersion == 5 || backendVersion == 6) {
+ } else if (backendVersion >= 5) {
long indexSize = getEstimatedSizeBytes(options);
float nbBundlesFloat = (float) indexSize / desiredBundleSizeBytes;
int nbBundles = (int) Math.ceil(nbBundlesFloat);
@@ -794,7 +791,9 @@
}
String endpoint = String.format("/%s/_stats", connectionConfiguration.getIndex());
try (RestClient restClient = connectionConfiguration.createClient()) {
- return parseResponse(restClient.performRequest("GET", endpoint, params).getEntity());
+ Request request = new Request("GET", endpoint);
+ request.addParameters(params);
+ return parseResponse(restClient.performRequest(request).getEntity());
}
}
}
@@ -820,9 +819,7 @@
if (query == null) {
query = "{\"query\": { \"match_all\": {} }}";
}
- if ((source.backendVersion == 5 || source.backendVersion == 6)
- && source.numSlices != null
- && source.numSlices > 1) {
+ if ((source.backendVersion >= 5) && source.numSlices != null && source.numSlices > 1) {
// if there is more than one slice, add the slice to the user query
String sliceQuery =
String.format("\"slice\": {\"id\": %s,\"max\": %s}", source.sliceId, source.numSlices);
@@ -842,7 +839,10 @@
}
}
HttpEntity queryEntity = new NStringEntity(query, ContentType.APPLICATION_JSON);
- Response response = restClient.performRequest("GET", endPoint, params, queryEntity);
+ Request request = new Request("GET", endPoint);
+ request.addParameters(params);
+ request.setEntity(queryEntity);
+ Response response = restClient.performRequest(request);
JsonNode searchResult = parseResponse(response.getEntity());
updateScrollId(searchResult);
return readNextBatchAndReturnFirstDocument(searchResult);
@@ -863,9 +863,10 @@
"{\"scroll\" : \"%s\",\"scroll_id\" : \"%s\"}",
source.spec.getScrollKeepalive(), scrollId);
HttpEntity scrollEntity = new NStringEntity(requestBody, ContentType.APPLICATION_JSON);
- Response response =
- restClient.performRequest(
- "GET", "/_search/scroll", Collections.emptyMap(), scrollEntity);
+ Request request = new Request("GET", "/_search/scroll");
+ request.addParameters(Collections.emptyMap());
+ request.setEntity(scrollEntity);
+ Response response = restClient.performRequest(request);
JsonNode searchResult = parseResponse(response.getEntity());
updateScrollId(searchResult);
return readNextBatchAndReturnFirstDocument(searchResult);
@@ -910,7 +911,10 @@
String requestBody = String.format("{\"scroll_id\" : [\"%s\"]}", scrollId);
HttpEntity entity = new NStringEntity(requestBody, ContentType.APPLICATION_JSON);
try {
- restClient.performRequest("DELETE", "/_search/scroll", Collections.emptyMap(), entity);
+ Request request = new Request("DELETE", "/_search/scroll");
+ request.addParameters(Collections.emptyMap());
+ request.setEntity(entity);
+ restClient.performRequest(request);
} finally {
if (restClient != null) {
restClient.close();
@@ -1241,19 +1245,10 @@
private long currentBatchSizeBytes;
// Encapsulates the elements which form the metadata for an Elasticsearch bulk operation
- @JsonPropertyOrder({"_index", "_type", "_id"})
- @JsonInclude(JsonInclude.Include.NON_NULL)
private static class DocumentMetadata implements Serializable {
- @JsonProperty("_index")
final String index;
-
- @JsonProperty("_type")
final String type;
-
- @JsonProperty("_id")
final String id;
-
- @JsonProperty("_retry_on_conflict")
final Integer retryOnConflict;
DocumentMetadata(String index, String type, String id, Integer retryOnConflict) {
@@ -1293,6 +1288,35 @@
currentBatchSizeBytes = 0;
}
+ private class DocumentMetadataSerializer extends StdSerializer<DocumentMetadata> {
+
+ private DocumentMetadataSerializer() {
+ super(DocumentMetadata.class);
+ }
+
+ @Override
+ public void serialize(
+ DocumentMetadata value, JsonGenerator gen, SerializerProvider provider)
+ throws IOException {
+ gen.writeStartObject();
+ if (value.index != null) {
+ gen.writeStringField("_index", value.index);
+ }
+ if (value.type != null) {
+ gen.writeStringField("_type", value.type);
+ }
+ if (value.id != null) {
+ gen.writeStringField("_id", value.id);
+ }
+ if (value.retryOnConflict != null && (backendVersion <= 6)) {
+ gen.writeNumberField("_retry_on_conflict", value.retryOnConflict);
+ }
+ if (value.retryOnConflict != null && backendVersion >= 7) {
+ gen.writeNumberField("retry_on_conflict", value.retryOnConflict);
+ }
+ gen.writeEndObject();
+ }
+ }
/**
* Extracts the components that comprise the document address from the document using the
* {@link FieldValueExtractFn} configured. This allows any or all of the index, type and
@@ -1317,8 +1341,10 @@
spec.getTypeFn() != null ? spec.getTypeFn().apply(parsedDocument) : null,
spec.getIdFn() != null ? spec.getIdFn().apply(parsedDocument) : null,
spec.getUsePartialUpdate() ? DEFAULT_RETRY_ON_CONFLICT : null);
+ SimpleModule module = new SimpleModule();
+ module.addSerializer(DocumentMetadata.class, new DocumentMetadataSerializer());
+ OBJECT_MAPPER.registerModule(module);
return OBJECT_MAPPER.writeValueAsString(metadata);
-
} else {
return "{}"; // use configuration and auto-generated document IDs
}
@@ -1378,7 +1404,10 @@
spec.getConnectionConfiguration().getType());
HttpEntity requestBody =
new NStringEntity(bulkRequest.toString(), ContentType.APPLICATION_JSON);
- response = restClient.performRequest("POST", endPoint, Collections.emptyMap(), requestBody);
+ Request request = new Request("POST", endPoint);
+ request.addParameters(Collections.emptyMap());
+ request.setEntity(requestBody);
+ response = restClient.performRequest(request);
responseEntity = new BufferedHttpEntity(response.getEntity());
if (spec.getRetryConfiguration() != null
&& spec.getRetryConfiguration().getRetryPredicate().test(responseEntity)) {
@@ -1399,7 +1428,10 @@
// while retry policy exists
while (BackOffUtils.next(sleeper, backoff)) {
LOG.warn(String.format(RETRY_ATTEMPT_LOG, ++attempt));
- response = restClient.performRequest(method, endpoint, params, requestBody);
+ Request request = new Request(method, endpoint);
+ request.addParameters(params);
+ request.setEntity(requestBody);
+ response = restClient.performRequest(request);
responseEntity = new BufferedHttpEntity(response.getEntity());
// if response has no 429 errors
if (!spec.getRetryConfiguration().getRetryPredicate().test(responseEntity)) {
@@ -1420,15 +1452,19 @@
static int getBackendVersion(ConnectionConfiguration connectionConfiguration) {
try (RestClient restClient = connectionConfiguration.createClient()) {
- Response response = restClient.performRequest("GET", "");
+ Request request = new Request("GET", "");
+ Response response = restClient.performRequest(request);
JsonNode jsonNode = parseResponse(response.getEntity());
int backendVersion =
Integer.parseInt(jsonNode.path("version").path("number").asText().substring(0, 1));
checkArgument(
- (backendVersion == 2 || backendVersion == 5 || backendVersion == 6),
+ (backendVersion == 2
+ || backendVersion == 5
+ || backendVersion == 6
+ || backendVersion == 7),
"The Elasticsearch version to connect to is %s.x. "
+ "This version of the ElasticsearchIO is only compatible with "
- + "Elasticsearch v6.x, v5.x and v2.x",
+ + "Elasticsearch v7.x, v6.x, v5.x and v2.x",
backendVersion);
return backendVersion;
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index 3bd9d8c..2301a5b 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -1643,6 +1643,7 @@
.setExtendedErrorInfo(false)
.setSkipInvalidRows(false)
.setIgnoreUnknownValues(false)
+ .setIgnoreInsertIds(false)
.setMaxFilesPerPartition(BatchLoads.DEFAULT_MAX_FILES_PER_PARTITION)
.setMaxBytesPerPartition(BatchLoads.DEFAULT_MAX_BYTES_PER_PARTITION)
.setOptimizeWrites(false)
@@ -1774,6 +1775,8 @@
abstract Boolean getIgnoreUnknownValues();
+ abstract Boolean getIgnoreInsertIds();
+
@Nullable
abstract String getKmsKey();
@@ -1846,6 +1849,8 @@
abstract Builder<T> setIgnoreUnknownValues(Boolean ignoreUnknownValues);
+ abstract Builder<T> setIgnoreInsertIds(Boolean ignoreInsertIds);
+
abstract Builder<T> setKmsKey(String kmsKey);
abstract Builder<T> setOptimizeWrites(Boolean optimizeWrites);
@@ -2241,6 +2246,15 @@
return toBuilder().setIgnoreUnknownValues(true).build();
}
+ /**
+ * Setting this option to true disables insertId based data deduplication offered by BigQuery.
+ * For more information, please see
+ * https://cloud.google.com/bigquery/streaming-data-into-bigquery#disabling_best_effort_de-duplication.
+ */
+ public Write<T> ignoreInsertIds() {
+ return toBuilder().setIgnoreInsertIds(true).build();
+ }
+
public Write<T> withKmsKey(String kmsKey) {
return toBuilder().setKmsKey(kmsKey).build();
}
@@ -2600,6 +2614,7 @@
.withExtendedErrorInfo(getExtendedErrorInfo())
.withSkipInvalidRows(getSkipInvalidRows())
.withIgnoreUnknownValues(getIgnoreUnknownValues())
+ .withIgnoreInsertIds(getIgnoreInsertIds())
.withKmsKey(getKmsKey());
return input.apply(streamingInserts);
} else {
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
index ecd4a85..ce02423 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServices.java
@@ -158,7 +158,8 @@
List<ValueInSingleWindow<T>> failedInserts,
ErrorContainer<T> errorContainer,
boolean skipInvalidRows,
- boolean ignoreUnknownValues)
+ boolean ignoreUnknownValues,
+ boolean ignoreInsertIds)
throws IOException, InterruptedException;
/** Patch BigQuery {@link Table} description. */
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index 147a862..bf2bd3c 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -713,7 +713,8 @@
List<ValueInSingleWindow<T>> failedInserts,
ErrorContainer<T> errorContainer,
boolean skipInvalidRows,
- boolean ignoreUnkownValues)
+ boolean ignoreUnkownValues,
+ boolean ignoreInsertIds)
throws IOException, InterruptedException {
checkNotNull(ref, "ref");
if (executor == null) {
@@ -733,7 +734,10 @@
// These lists contain the rows to publish. Initially the contain the entire list.
// If there are failures, they will contain only the failed rows to be retried.
List<ValueInSingleWindow<TableRow>> rowsToPublish = rowList;
- List<String> idsToPublish = insertIdList;
+ List<String> idsToPublish = null;
+ if (!ignoreInsertIds) {
+ idsToPublish = insertIdList;
+ }
while (true) {
List<ValueInSingleWindow<TableRow>> retryRows = new ArrayList<>();
List<String> retryIds = (idsToPublish != null) ? new ArrayList<>() : null;
@@ -741,7 +745,7 @@
int strideIndex = 0;
// Upload in batches.
List<TableDataInsertAllRequest.Rows> rows = new ArrayList<>();
- int dataSize = 0;
+ long dataSize = 0L;
List<Future<List<TableDataInsertAllResponse.InsertErrors>>> futures = new ArrayList<>();
List<Integer> strideIndices = new ArrayList<>();
@@ -755,7 +759,12 @@
out.setJson(row.getUnknownKeys());
rows.add(out);
- dataSize += row.toString().length();
+ try {
+ dataSize += TableRowJsonCoder.of().getEncodedElementByteSize(row);
+ } catch (Exception ex) {
+ throw new RuntimeException("Failed to convert the row to JSON", ex);
+ }
+
if (dataSize >= maxRowBatchSize
|| rows.size() >= maxRowsPerBatch
|| i == rowsToPublish.size() - 1) {
@@ -796,7 +805,7 @@
retTotalDataSize += dataSize;
- dataSize = 0;
+ dataSize = 0L;
strideIndex = i + 1;
rows = new ArrayList<>();
}
@@ -866,7 +875,8 @@
List<ValueInSingleWindow<T>> failedInserts,
ErrorContainer<T> errorContainer,
boolean skipInvalidRows,
- boolean ignoreUnknownValues)
+ boolean ignoreUnknownValues,
+ boolean ignoreInsertIds)
throws IOException, InterruptedException {
return insertAll(
ref,
@@ -878,7 +888,8 @@
failedInserts,
errorContainer,
skipInvalidRows,
- ignoreUnknownValues);
+ ignoreUnknownValues,
+ ignoreInsertIds);
}
@Override
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java
index 2c7725c..3efeb6b 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java
@@ -24,8 +24,8 @@
import com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions;
import com.google.cloud.bigquery.storage.v1beta1.Storage.CreateReadSessionRequest;
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession;
+import com.google.cloud.bigquery.storage.v1beta1.Storage.ShardingStrategy;
import com.google.cloud.bigquery.storage.v1beta1.Storage.Stream;
-import com.google.protobuf.UnknownFieldSet;
import java.io.IOException;
import java.util.List;
import javax.annotation.Nullable;
@@ -118,12 +118,7 @@
.setParent("projects/" + bqOptions.getProject())
.setTableReference(BigQueryHelpers.toTableRefProto(targetTable.getTableReference()))
.setRequestedStreams(streamCount)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build());
+ .setShardingStrategy(ShardingStrategy.BALANCED);
if (selectedFieldsProvider != null || rowRestrictionProvider != null) {
TableReadOptions.Builder builder = TableReadOptions.newBuilder();
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamSource.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamSource.java
index 965e3b4..5f8825f 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamSource.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamSource.java
@@ -30,7 +30,6 @@
import com.google.cloud.bigquery.storage.v1beta1.Storage.SplitReadStreamResponse;
import com.google.cloud.bigquery.storage.v1beta1.Storage.Stream;
import com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition;
-import com.google.protobuf.UnknownFieldSet;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
@@ -52,7 +51,6 @@
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -292,16 +290,7 @@
SplitReadStreamRequest splitRequest =
SplitReadStreamRequest.newBuilder()
.setOriginalStream(source.stream)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFraction().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(java.lang.Float.floatToIntBits((float) fraction))
- .build())
- .build())
+ .setFraction((float) fraction)
.build();
SplitReadStreamResponse splitResponse = storageClient.splitReadStream(splitRequest);
@@ -390,16 +379,7 @@
}
private static float getFractionConsumed(ReadRowsResponse response) {
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use getFractionConsumed().
- List<Integer> fractionConsumedField =
- response.getStatus().getUnknownFields().getField(2).getFixed32List();
- if (fractionConsumedField.isEmpty()) {
- Metrics.counter(BigQueryStorageStreamReader.class, "fraction-consumed-not-set").inc();
- return 0f;
- }
-
- return Float.intBitsToFloat(Iterables.getOnlyElement(fractionConsumedField));
+ return response.getStatus().getFractionConsumed();
}
}
}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
index 6dad989..d00adbb 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
@@ -38,6 +38,7 @@
private boolean extendedErrorInfo;
private final boolean skipInvalidRows;
private final boolean ignoreUnknownValues;
+ private final boolean ignoreInsertIds;
private final String kmsKey;
private final Coder<ElementT> elementCoder;
private final SerializableFunction<ElementT, TableRow> toTableRow;
@@ -56,6 +57,7 @@
false,
false,
false,
+ false,
elementCoder,
toTableRow,
null);
@@ -70,6 +72,7 @@
boolean extendedErrorInfo,
boolean skipInvalidRows,
boolean ignoreUnknownValues,
+ boolean ignoreInsertIds,
Coder<ElementT> elementCoder,
SerializableFunction<ElementT, TableRow> toTableRow,
String kmsKey) {
@@ -80,6 +83,7 @@
this.extendedErrorInfo = extendedErrorInfo;
this.skipInvalidRows = skipInvalidRows;
this.ignoreUnknownValues = ignoreUnknownValues;
+ this.ignoreInsertIds = ignoreInsertIds;
this.elementCoder = elementCoder;
this.toTableRow = toTableRow;
this.kmsKey = kmsKey;
@@ -96,6 +100,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow,
kmsKey);
@@ -111,6 +116,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow,
kmsKey);
@@ -125,6 +131,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow,
kmsKey);
@@ -139,6 +146,22 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
+ elementCoder,
+ toTableRow,
+ kmsKey);
+ }
+
+ StreamingInserts<DestinationT, ElementT> withIgnoreInsertIds(boolean ignoreInsertIds) {
+ return new StreamingInserts<>(
+ createDisposition,
+ dynamicDestinations,
+ bigQueryServices,
+ retryPolicy,
+ extendedErrorInfo,
+ skipInvalidRows,
+ ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow,
kmsKey);
@@ -153,6 +176,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow,
kmsKey);
@@ -167,6 +191,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow,
kmsKey);
@@ -188,6 +213,7 @@
.withExtendedErrorInfo(extendedErrorInfo)
.withSkipInvalidRows(skipInvalidRows)
.withIgnoreUnknownValues(ignoreUnknownValues)
+ .withIgnoreInsertIds(ignoreInsertIds)
.withElementCoder(elementCoder)
.withToTableRow(toTableRow));
}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
index f56cf01..4e12018 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
@@ -49,6 +49,7 @@
private final ErrorContainer<ErrorT> errorContainer;
private final boolean skipInvalidRows;
private final boolean ignoreUnknownValues;
+ private final boolean ignoreInsertIds;
private final SerializableFunction<ElementT, TableRow> toTableRow;
/** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
@@ -67,6 +68,7 @@
ErrorContainer<ErrorT> errorContainer,
boolean skipInvalidRows,
boolean ignoreUnknownValues,
+ boolean ignoreInsertIds,
SerializableFunction<ElementT, TableRow> toTableRow) {
this.bqServices = bqServices;
this.retryPolicy = retryPolicy;
@@ -74,6 +76,7 @@
this.errorContainer = errorContainer;
this.skipInvalidRows = skipInvalidRows;
this.ignoreUnknownValues = ignoreUnknownValues;
+ this.ignoreInsertIds = ignoreInsertIds;
this.toTableRow = toTableRow;
}
@@ -145,7 +148,8 @@
failedInserts,
errorContainer,
skipInvalidRows,
- ignoreUnknownValues);
+ ignoreUnknownValues,
+ ignoreInsertIds);
byteCounter.inc(totalBytes);
} catch (IOException e) {
throw new RuntimeException(e);
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
index ea2c020..81f097a 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
@@ -53,6 +53,7 @@
private static final String FAILED_INSERTS_TAG_ID = "failedInserts";
private final boolean skipInvalidRows;
private final boolean ignoreUnknownValues;
+ private final boolean ignoreInsertIds;
private final Coder<ElementT> elementCoder;
private final SerializableFunction<ElementT, TableRow> toTableRow;
@@ -63,6 +64,7 @@
false, // extendedErrorInfo
false, // skipInvalidRows
false, // ignoreUnknownValues
+ false, // ignoreInsertIds
null, // elementCoder
null); // toTableRow
}
@@ -73,6 +75,7 @@
boolean extendedErrorInfo,
boolean skipInvalidRows,
boolean ignoreUnknownValues,
+ boolean ignoreInsertIds,
Coder<ElementT> elementCoder,
SerializableFunction<ElementT, TableRow> toTableRow) {
this.bigQueryServices = bigQueryServices;
@@ -80,6 +83,7 @@
this.extendedErrorInfo = extendedErrorInfo;
this.skipInvalidRows = skipInvalidRows;
this.ignoreUnknownValues = ignoreUnknownValues;
+ this.ignoreInsertIds = ignoreInsertIds;
this.elementCoder = elementCoder;
this.toTableRow = toTableRow;
}
@@ -91,6 +95,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow);
}
@@ -102,6 +107,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow);
}
@@ -113,6 +119,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow);
}
@@ -124,6 +131,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow);
}
@@ -135,6 +143,19 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
+ elementCoder,
+ toTableRow);
+ }
+
+ StreamingWriteTables<ElementT> withIgnoreInsertIds(boolean ignoreInsertIds) {
+ return new StreamingWriteTables<>(
+ bigQueryServices,
+ retryPolicy,
+ extendedErrorInfo,
+ skipInvalidRows,
+ ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow);
}
@@ -146,6 +167,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow);
}
@@ -158,6 +180,7 @@
extendedErrorInfo,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
elementCoder,
toTableRow);
}
@@ -240,6 +263,7 @@
errorContainer,
skipInvalidRows,
ignoreUnknownValues,
+ ignoreInsertIds,
toTableRow))
.withOutputTags(mainOutputTag, TupleTagList.of(failedInsertsTag)));
PCollection<T> failedInserts = tuple.get(failedInsertsTag);
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowJsonCoder.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowJsonCoder.java
index a39a331..7e82eec 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowJsonCoder.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowJsonCoder.java
@@ -58,7 +58,7 @@
}
@Override
- protected long getEncodedElementByteSize(TableRow value) throws Exception {
+ public long getEncodedElementByteSize(TableRow value) throws Exception {
String strValue = MAPPER.writeValueAsString(value);
return StringUtf8Coder.of().getEncodedElementByteSize(strValue);
}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java
index 07d6da6..6f0f54d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java
@@ -21,10 +21,12 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
import com.google.api.client.util.DateTime;
+import com.google.auto.value.AutoValue;
+import com.google.protobuf.ByteString;
+import com.google.pubsub.v1.PubsubMessage;
import java.io.Closeable;
import java.io.IOException;
import java.io.Serializable;
-import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ThreadLocalRandom;
@@ -298,59 +300,37 @@
* <p>NOTE: This class is {@link Serializable} only to support the {@link PubsubTestClient}. Java
* serialization is never used for non-test clients.
*/
- public static class OutgoingMessage implements Serializable {
- /** Underlying (encoded) element. */
- public final byte[] elementBytes;
+ @AutoValue
+ public abstract static class OutgoingMessage implements Serializable {
- public final Map<String, String> attributes;
+ /** Underlying Message. May not have publish timestamp set. */
+ public abstract PubsubMessage message();
/** Timestamp for element (ms since epoch). */
- public final long timestampMsSinceEpoch;
+ public abstract long timestampMsSinceEpoch();
/**
* If using an id attribute, the record id to associate with this record's metadata so the
* receiver can reject duplicates. Otherwise {@literal null}.
*/
- @Nullable public final String recordId;
+ @Nullable
+ public abstract String recordId();
- public OutgoingMessage(
- byte[] elementBytes,
- Map<String, String> attributes,
+ public static OutgoingMessage of(
+ PubsubMessage message, long timestampMsSinceEpoch, @Nullable String recordId) {
+ return new AutoValue_PubsubClient_OutgoingMessage(message, timestampMsSinceEpoch, recordId);
+ }
+
+ public static OutgoingMessage of(
+ org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage message,
long timestampMsSinceEpoch,
@Nullable String recordId) {
- this.elementBytes = elementBytes;
- this.attributes = attributes;
- this.timestampMsSinceEpoch = timestampMsSinceEpoch;
- this.recordId = recordId;
- }
-
- @Override
- public String toString() {
- return String.format(
- "OutgoingMessage(%db, %dms)", elementBytes.length, timestampMsSinceEpoch);
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
+ PubsubMessage.Builder builder =
+ PubsubMessage.newBuilder().setData(ByteString.copyFrom(message.getPayload()));
+ if (message.getAttributeMap() != null) {
+ builder.putAllAttributes(message.getAttributeMap());
}
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
-
- OutgoingMessage that = (OutgoingMessage) o;
-
- return timestampMsSinceEpoch == that.timestampMsSinceEpoch
- && Arrays.equals(elementBytes, that.elementBytes)
- && Objects.equal(attributes, that.attributes)
- && Objects.equal(recordId, that.recordId);
- }
-
- @Override
- public int hashCode() {
- return Objects.hashCode(
- Arrays.hashCode(elementBytes), attributes, timestampMsSinceEpoch, recordId);
+ return of(builder.build(), timestampMsSinceEpoch, recordId);
}
}
@@ -360,86 +340,35 @@
* <p>NOTE: This class is {@link Serializable} only to support the {@link PubsubTestClient}. Java
* serialization is never used for non-test clients.
*/
- static class IncomingMessage implements Serializable {
- /** Underlying (encoded) element. */
- public final byte[] elementBytes;
+ @AutoValue
+ abstract static class IncomingMessage implements Serializable {
- public Map<String, String> attributes;
+ /** Underlying Message. */
+ public abstract PubsubMessage message();
/**
* Timestamp for element (ms since epoch). Either Pubsub's processing time, or the custom
* timestamp associated with the message.
*/
- public final long timestampMsSinceEpoch;
+ public abstract long timestampMsSinceEpoch();
/** Timestamp (in system time) at which we requested the message (ms since epoch). */
- public final long requestTimeMsSinceEpoch;
+ public abstract long requestTimeMsSinceEpoch();
/** Id to pass back to Pubsub to acknowledge receipt of this message. */
- public final String ackId;
+ public abstract String ackId();
/** Id to pass to the runner to distinguish this message from all others. */
- public final String recordId;
+ public abstract String recordId();
- public IncomingMessage(
- byte[] elementBytes,
- Map<String, String> attributes,
+ public static IncomingMessage of(
+ PubsubMessage message,
long timestampMsSinceEpoch,
long requestTimeMsSinceEpoch,
String ackId,
String recordId) {
- this.elementBytes = elementBytes;
- this.attributes = attributes;
- this.timestampMsSinceEpoch = timestampMsSinceEpoch;
- this.requestTimeMsSinceEpoch = requestTimeMsSinceEpoch;
- this.ackId = ackId;
- this.recordId = recordId;
- }
-
- public IncomingMessage withRequestTime(long requestTimeMsSinceEpoch) {
- return new IncomingMessage(
- elementBytes,
- attributes,
- timestampMsSinceEpoch,
- requestTimeMsSinceEpoch,
- ackId,
- recordId);
- }
-
- @Override
- public String toString() {
- return String.format(
- "IncomingMessage(%db, %dms)", elementBytes.length, timestampMsSinceEpoch);
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
- }
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
-
- IncomingMessage that = (IncomingMessage) o;
-
- return timestampMsSinceEpoch == that.timestampMsSinceEpoch
- && requestTimeMsSinceEpoch == that.requestTimeMsSinceEpoch
- && ackId.equals(that.ackId)
- && recordId.equals(that.recordId)
- && Arrays.equals(elementBytes, that.elementBytes)
- && Objects.equal(attributes, that.attributes);
- }
-
- @Override
- public int hashCode() {
- return Objects.hashCode(
- Arrays.hashCode(elementBytes),
- attributes,
- timestampMsSinceEpoch,
- requestTimeMsSinceEpoch,
- ackId,
- recordId);
+ return new AutoValue_PubsubClient_IncomingMessage(
+ message, timestampMsSinceEpoch, requestTimeMsSinceEpoch, ackId, recordId);
}
}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java
index d15dadb..a3b6b8d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java
@@ -20,7 +20,6 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
import com.google.auth.Credentials;
-import com.google.protobuf.ByteString;
import com.google.protobuf.Timestamp;
import com.google.pubsub.v1.AcknowledgeRequest;
import com.google.pubsub.v1.DeleteSubscriptionRequest;
@@ -51,43 +50,68 @@
import io.grpc.netty.NegotiationType;
import io.grpc.netty.NettyChannelBuilder;
import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
-import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
-/**
- * A helper class for talking to Pubsub via grpc.
- *
- * <p>CAUTION: Currently uses the application default credentials and does not respect any
- * credentials-related arguments in {@link GcpOptions}.
- */
+/** A helper class for talking to Pubsub via grpc. */
public class PubsubGrpcClient extends PubsubClient {
- private static final String PUBSUB_ADDRESS = "pubsub.googleapis.com";
- private static final int PUBSUB_PORT = 443;
private static final int LIST_BATCH_SIZE = 1000;
private static final int DEFAULT_TIMEOUT_S = 15;
+ private static ManagedChannel channelForRootUrl(String urlString) throws IOException {
+ URL url;
+ try {
+ url = new URL(urlString);
+ } catch (MalformedURLException e) {
+ throw new IllegalArgumentException(
+ String.format("Could not parse pubsub root url \"%s\"", urlString), e);
+ }
+
+ int port = url.getPort();
+
+ if (port < 0) {
+ switch (url.getProtocol()) {
+ case "https":
+ port = 443;
+ break;
+ case "http":
+ port = 80;
+ break;
+ default:
+ throw new IllegalArgumentException(
+ String.format(
+ "Could not determine port for pubsub root url \"%s\". You must either specify the port or use the protocol \"https\" or \"http\"",
+ urlString));
+ }
+ }
+
+ return NettyChannelBuilder.forAddress(url.getHost(), port)
+ .negotiationType(NegotiationType.TLS)
+ .sslContext(GrpcSslContexts.forClient().ciphers(null).build())
+ .build();
+ }
+
private static class PubsubGrpcClientFactory implements PubsubClientFactory {
@Override
public PubsubClient newClient(
@Nullable String timestampAttribute, @Nullable String idAttribute, PubsubOptions options)
throws IOException {
- ManagedChannel channel =
- NettyChannelBuilder.forAddress(PUBSUB_ADDRESS, PUBSUB_PORT)
- .negotiationType(NegotiationType.TLS)
- .sslContext(GrpcSslContexts.forClient().ciphers(null).build())
- .build();
-
return new PubsubGrpcClient(
- timestampAttribute, idAttribute, DEFAULT_TIMEOUT_S, channel, options.getGcpCredential());
+ timestampAttribute,
+ idAttribute,
+ DEFAULT_TIMEOUT_S,
+ channelForRootUrl(options.getPubsubRootUrl()),
+ options.getGcpCredential());
}
@Override
@@ -188,21 +212,15 @@
public int publish(TopicPath topic, List<OutgoingMessage> outgoingMessages) throws IOException {
PublishRequest.Builder request = PublishRequest.newBuilder().setTopic(topic.getPath());
for (OutgoingMessage outgoingMessage : outgoingMessages) {
- PubsubMessage.Builder message =
- PubsubMessage.newBuilder().setData(ByteString.copyFrom(outgoingMessage.elementBytes));
-
- if (outgoingMessage.attributes != null) {
- message.putAllAttributes(outgoingMessage.attributes);
- }
+ PubsubMessage.Builder message = outgoingMessage.message().toBuilder();
if (timestampAttribute != null) {
- message
- .getMutableAttributes()
- .put(timestampAttribute, String.valueOf(outgoingMessage.timestampMsSinceEpoch));
+ message.putAttributes(
+ timestampAttribute, String.valueOf(outgoingMessage.timestampMsSinceEpoch()));
}
- if (idAttribute != null && !Strings.isNullOrEmpty(outgoingMessage.recordId)) {
- message.getMutableAttributes().put(idAttribute, outgoingMessage.recordId);
+ if (idAttribute != null && !Strings.isNullOrEmpty(outgoingMessage.recordId())) {
+ message.putAttributes(idAttribute, outgoingMessage.recordId());
}
request.addMessages(message);
@@ -234,9 +252,6 @@
PubsubMessage pubsubMessage = message.getMessage();
@Nullable Map<String, String> attributes = pubsubMessage.getAttributes();
- // Payload.
- byte[] elementBytes = pubsubMessage.getData().toByteArray();
-
// Timestamp.
String pubsubTimestampString = null;
Timestamp timestampProto = pubsubMessage.getPublishTime();
@@ -262,13 +277,8 @@
}
incomingMessages.add(
- new IncomingMessage(
- elementBytes,
- attributes,
- timestampMsSinceEpoch,
- requestTimeMsSinceEpoch,
- ackId,
- recordId));
+ IncomingMessage.of(
+ pubsubMessage, timestampMsSinceEpoch, requestTimeMsSinceEpoch, ackId, recordId));
}
return incomingMessages;
}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java
index da5266f..5f6d044 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java
@@ -1303,7 +1303,14 @@
}
// NOTE: The record id is always null.
- output.add(new OutgoingMessage(payload, attributes, c.timestamp().getMillis(), null));
+ output.add(
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFrom(payload))
+ .putAllAttributes(attributes)
+ .build(),
+ c.timestamp().getMillis(),
+ null));
currentOutputBytes += payload.length;
}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java
index 136b1d2..1ae5a55 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java
@@ -39,8 +39,10 @@
import com.google.auth.Credentials;
import com.google.auth.http.HttpCredentialsAdapter;
import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer;
+import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -123,8 +125,12 @@
public int publish(TopicPath topic, List<OutgoingMessage> outgoingMessages) throws IOException {
List<PubsubMessage> pubsubMessages = new ArrayList<>(outgoingMessages.size());
for (OutgoingMessage outgoingMessage : outgoingMessages) {
- PubsubMessage pubsubMessage = new PubsubMessage().encodeData(outgoingMessage.elementBytes);
+ PubsubMessage pubsubMessage =
+ new PubsubMessage().encodeData(outgoingMessage.message().getData().toByteArray());
pubsubMessage.setAttributes(getMessageAttributes(outgoingMessage));
+ if (!outgoingMessage.message().getOrderingKey().isEmpty()) {
+ pubsubMessage.put("orderingKey", outgoingMessage.message().getOrderingKey());
+ }
pubsubMessages.add(pubsubMessage);
}
PublishRequest request = new PublishRequest().setMessages(pubsubMessages);
@@ -135,16 +141,16 @@
private Map<String, String> getMessageAttributes(OutgoingMessage outgoingMessage) {
Map<String, String> attributes = null;
- if (outgoingMessage.attributes == null) {
+ if (outgoingMessage.message().getAttributesMap() == null) {
attributes = new TreeMap<>();
} else {
- attributes = new TreeMap<>(outgoingMessage.attributes);
+ attributes = new TreeMap<>(outgoingMessage.message().getAttributesMap());
}
if (timestampAttribute != null) {
- attributes.put(timestampAttribute, String.valueOf(outgoingMessage.timestampMsSinceEpoch));
+ attributes.put(timestampAttribute, String.valueOf(outgoingMessage.timestampMsSinceEpoch()));
}
- if (idAttribute != null && !Strings.isNullOrEmpty(outgoingMessage.recordId)) {
- attributes.put(idAttribute, outgoingMessage.recordId);
+ if (idAttribute != null && !Strings.isNullOrEmpty(outgoingMessage.recordId())) {
+ attributes.put(idAttribute, outgoingMessage.recordId());
}
return attributes;
}
@@ -166,7 +172,12 @@
List<IncomingMessage> incomingMessages = new ArrayList<>(response.getReceivedMessages().size());
for (ReceivedMessage message : response.getReceivedMessages()) {
PubsubMessage pubsubMessage = message.getMessage();
- @Nullable Map<String, String> attributes = pubsubMessage.getAttributes();
+ Map<String, String> attributes;
+ if (pubsubMessage.getAttributes() != null) {
+ attributes = pubsubMessage.getAttributes();
+ } else {
+ attributes = new HashMap<>();
+ }
// Payload.
byte[] elementBytes = pubsubMessage.getData() == null ? null : pubsubMessage.decodeData();
@@ -184,7 +195,7 @@
// Record id, if any.
@Nullable String recordId = null;
- if (idAttribute != null && attributes != null) {
+ if (idAttribute != null) {
recordId = attributes.get(idAttribute);
}
if (Strings.isNullOrEmpty(recordId)) {
@@ -192,10 +203,15 @@
recordId = pubsubMessage.getMessageId();
}
+ com.google.pubsub.v1.PubsubMessage.Builder protoMessage =
+ com.google.pubsub.v1.PubsubMessage.newBuilder();
+ protoMessage.setData(ByteString.copyFrom(elementBytes));
+ protoMessage.putAllAttributes(attributes);
+ protoMessage.setOrderingKey(
+ (String) pubsubMessage.getUnknownKeys().getOrDefault("orderingKey", ""));
incomingMessages.add(
- new IncomingMessage(
- elementBytes,
- attributes,
+ IncomingMessage.of(
+ protoMessage.build(),
timestampMsSinceEpoch,
requestTimeMsSinceEpoch,
ackId,
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java
index 6b20b56..c3b915d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java
@@ -309,12 +309,17 @@
IncomingMessage incomingMessage = pendItr.next();
pendItr.remove();
IncomingMessage incomingMessageWithRequestTime =
- incomingMessage.withRequestTime(requestTimeMsSinceEpoch);
+ IncomingMessage.of(
+ incomingMessage.message(),
+ incomingMessage.timestampMsSinceEpoch(),
+ requestTimeMsSinceEpoch,
+ incomingMessage.ackId(),
+ incomingMessage.recordId());
incomingMessages.add(incomingMessageWithRequestTime);
STATE.pendingAckIncomingMessages.put(
- incomingMessageWithRequestTime.ackId, incomingMessageWithRequestTime);
+ incomingMessageWithRequestTime.ackId(), incomingMessageWithRequestTime);
STATE.ackDeadline.put(
- incomingMessageWithRequestTime.ackId,
+ incomingMessageWithRequestTime.ackId(),
requestTimeMsSinceEpoch + STATE.ackTimeoutSec * 1000);
if (incomingMessages.size() >= batchSize) {
break;
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSink.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSink.java
index 1258d0b..8be8c56 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSink.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSink.java
@@ -30,7 +30,6 @@
import javax.annotation.Nullable;
import org.apache.beam.sdk.coders.AtomicCoder;
import org.apache.beam.sdk.coders.BigEndianLongCoder;
-import org.apache.beam.sdk.coders.ByteArrayCoder;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.CoderException;
import org.apache.beam.sdk.coders.KvCoder;
@@ -38,6 +37,7 @@
import org.apache.beam.sdk.coders.NullableCoder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.extensions.protobuf.ProtoCoder;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.OutgoingMessage;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.PubsubClientFactory;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath;
@@ -101,19 +101,18 @@
@Override
public void encode(OutgoingMessage value, OutputStream outStream)
throws CoderException, IOException {
- ByteArrayCoder.of().encode(value.elementBytes, outStream);
- ATTRIBUTES_CODER.encode(value.attributes, outStream);
- BigEndianLongCoder.of().encode(value.timestampMsSinceEpoch, outStream);
- RECORD_ID_CODER.encode(value.recordId, outStream);
+ ProtoCoder.of(com.google.pubsub.v1.PubsubMessage.class).encode(value.message(), outStream);
+ BigEndianLongCoder.of().encode(value.timestampMsSinceEpoch(), outStream);
+ RECORD_ID_CODER.encode(value.recordId(), outStream);
}
@Override
public OutgoingMessage decode(InputStream inStream) throws CoderException, IOException {
- byte[] elementBytes = ByteArrayCoder.of().decode(inStream);
- Map<String, String> attributes = ATTRIBUTES_CODER.decode(inStream);
+ com.google.pubsub.v1.PubsubMessage message =
+ ProtoCoder.of(com.google.pubsub.v1.PubsubMessage.class).decode(inStream);
long timestampMsSinceEpoch = BigEndianLongCoder.of().decode(inStream);
@Nullable String recordId = RECORD_ID_CODER.decode(inStream);
- return new OutgoingMessage(elementBytes, attributes, timestampMsSinceEpoch, recordId);
+ return OutgoingMessage.of(message, timestampMsSinceEpoch, recordId);
}
}
@@ -154,7 +153,6 @@
elementCounter.inc();
PubsubMessage message = c.element();
byte[] elementBytes = message.getPayload();
- Map<String, String> attributes = message.getAttributeMap();
long timestampMsSinceEpoch = c.timestamp().getMillis();
@Nullable String recordId = null;
@@ -175,7 +173,7 @@
c.output(
KV.of(
ThreadLocalRandom.current().nextInt(numShards),
- new OutgoingMessage(elementBytes, attributes, timestampMsSinceEpoch, recordId)));
+ OutgoingMessage.of(message, timestampMsSinceEpoch, recordId)));
}
@Override
@@ -246,7 +244,8 @@
List<OutgoingMessage> pubsubMessages = new ArrayList<>(publishBatchSize);
int bytes = 0;
for (OutgoingMessage message : c.element().getValue()) {
- if (!pubsubMessages.isEmpty() && bytes + message.elementBytes.length > publishBatchBytes) {
+ if (!pubsubMessages.isEmpty()
+ && bytes + message.message().getData().size() > publishBatchBytes) {
// Break large (in bytes) batches into smaller.
// (We've already broken by batch size using the trigger below, though that may
// run slightly over the actual PUBLISH_BATCH_SIZE. We'll consider that ok since
@@ -257,7 +256,7 @@
bytes = 0;
}
pubsubMessages.add(message);
- bytes += message.elementBytes.length;
+ bytes += message.message().getData().size();
}
if (!pubsubMessages.isEmpty()) {
// BLOCKS until published.
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSource.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSource.java
index d8abfe1..230161c 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSource.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSource.java
@@ -727,18 +727,18 @@
// Capture the received messages.
for (PubsubClient.IncomingMessage incomingMessage : receivedMessages) {
notYetRead.add(incomingMessage);
- notYetReadBytes += incomingMessage.elementBytes.length;
+ notYetReadBytes += incomingMessage.message().getData().size();
inFlight.put(
- incomingMessage.ackId,
+ incomingMessage.ackId(),
new InFlightState(requestTimeMsSinceEpoch, deadlineMsSinceEpoch));
numReceived++;
numReceivedRecently.add(requestTimeMsSinceEpoch, 1L);
minReceivedTimestampMsSinceEpoch.add(
- requestTimeMsSinceEpoch, incomingMessage.timestampMsSinceEpoch);
+ requestTimeMsSinceEpoch, incomingMessage.timestampMsSinceEpoch());
maxReceivedTimestampMsSinceEpoch.add(
- requestTimeMsSinceEpoch, incomingMessage.timestampMsSinceEpoch);
+ requestTimeMsSinceEpoch, incomingMessage.timestampMsSinceEpoch());
minUnreadTimestampMsSinceEpoch.add(
- requestTimeMsSinceEpoch, incomingMessage.timestampMsSinceEpoch);
+ requestTimeMsSinceEpoch, incomingMessage.timestampMsSinceEpoch());
}
}
@@ -837,7 +837,7 @@
if (current != null) {
// Current is consumed. It can no longer contribute to holding back the watermark.
- minUnreadTimestampMsSinceEpoch.remove(current.requestTimeMsSinceEpoch);
+ minUnreadTimestampMsSinceEpoch.remove(current.requestTimeMsSinceEpoch());
current = null;
}
@@ -864,18 +864,18 @@
// Try again later.
return false;
}
- notYetReadBytes -= current.elementBytes.length;
+ notYetReadBytes -= current.message().getData().size();
checkState(notYetReadBytes >= 0);
long nowMsSinceEpoch = now();
- numReadBytes.add(nowMsSinceEpoch, current.elementBytes.length);
- minReadTimestampMsSinceEpoch.add(nowMsSinceEpoch, current.timestampMsSinceEpoch);
- if (current.timestampMsSinceEpoch < lastWatermarkMsSinceEpoch) {
+ numReadBytes.add(nowMsSinceEpoch, current.message().getData().size());
+ minReadTimestampMsSinceEpoch.add(nowMsSinceEpoch, current.timestampMsSinceEpoch());
+ if (current.timestampMsSinceEpoch() < lastWatermarkMsSinceEpoch) {
numLateMessages.add(nowMsSinceEpoch, 1L);
}
// Current message can be considered 'read' and will be persisted by the next
// checkpoint. So it is now safe to ACK back to Pubsub.
- safeToAckIds.add(current.ackId);
+ safeToAckIds.add(current.ackId());
return true;
}
@@ -884,7 +884,10 @@
if (current == null) {
throw new NoSuchElementException();
}
- return new PubsubMessage(current.elementBytes, current.attributes, current.recordId);
+ return new PubsubMessage(
+ current.message().getData().toByteArray(),
+ current.message().getAttributesMap(),
+ current.recordId());
}
@Override
@@ -892,7 +895,7 @@
if (current == null) {
throw new NoSuchElementException();
}
- return new Instant(current.timestampMsSinceEpoch);
+ return new Instant(current.timestampMsSinceEpoch());
}
@Override
@@ -900,7 +903,7 @@
if (current == null) {
throw new NoSuchElementException();
}
- return current.recordId.getBytes(StandardCharsets.UTF_8);
+ return current.recordId().getBytes(StandardCharsets.UTF_8);
}
/**
@@ -984,7 +987,7 @@
List<String> snapshotSafeToAckIds = Lists.newArrayList(safeToAckIds);
List<String> snapshotNotYetReadIds = new ArrayList<>(notYetRead.size());
for (PubsubClient.IncomingMessage incomingMessage : notYetRead) {
- snapshotNotYetReadIds.add(incomingMessage.ackId);
+ snapshotNotYetReadIds.add(incomingMessage.ackId());
}
if (outer.subscriptionPath == null) {
// need to include the subscription in case we resume, as it's not stored in the source.
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsub.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsub.java
index 1e75d43..e1e8711 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsub.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsub.java
@@ -22,12 +22,14 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsInAnyOrder;
+import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeoutException;
import javax.annotation.Nullable;
+import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.IncomingMessage;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.ProjectPath;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.SubscriptionPath;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath;
@@ -205,11 +207,16 @@
if (!messages.isEmpty()) {
pubsub.acknowledge(
subscriptionPath,
- messages.stream().map(msg -> msg.ackId).collect(ImmutableList.toImmutableList()));
+ messages.stream().map(IncomingMessage::ackId).collect(ImmutableList.toImmutableList()));
}
return messages.stream()
- .map(msg -> new PubsubMessage(msg.elementBytes, msg.attributes, msg.recordId))
+ .map(
+ msg ->
+ new PubsubMessage(
+ msg.message().getData().toByteArray(),
+ msg.message().getAttributesMap(),
+ msg.recordId()))
.collect(ImmutableList.toImmutableList());
}
@@ -292,7 +299,12 @@
}
private PubsubClient.OutgoingMessage toOutgoingMessage(PubsubMessage message) {
- return new PubsubClient.OutgoingMessage(
- message.getPayload(), message.getAttributeMap(), DateTime.now().getMillis(), null);
+ return PubsubClient.OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFrom(message.getPayload()))
+ .putAllAttributes(message.getAttributeMap())
+ .build(),
+ DateTime.now().getMillis(),
+ null);
}
}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsubSignal.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsubSignal.java
index f4f8b18..de4a715 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsubSignal.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/TestPubsubSignal.java
@@ -17,7 +17,6 @@
*/
package org.apache.beam.sdk.io.gcp.pubsub;
-import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.stream.Collectors.toList;
import static org.apache.beam.sdk.io.gcp.pubsub.TestPubsub.createTopicName;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
@@ -30,6 +29,7 @@
import java.util.concurrent.ThreadLocalRandom;
import javax.annotation.Nullable;
import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.IncomingMessage;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.SubscriptionPath;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath;
import org.apache.beam.sdk.state.BagState;
@@ -251,7 +251,7 @@
try {
signal = pubsub.pull(DateTime.now().getMillis(), signalSubscriptionPath, 1, false);
pubsub.acknowledge(
- signalSubscriptionPath, signal.stream().map(m -> m.ackId).collect(toList()));
+ signalSubscriptionPath, signal.stream().map(IncomingMessage::ackId).collect(toList()));
break;
} catch (StatusRuntimeException e) {
if (!Status.DEADLINE_EXCEEDED.equals(e.getStatus())) {
@@ -271,7 +271,7 @@
signalSubscriptionPath, duration.getStandardSeconds()));
}
- return new String(signal.get(0).elementBytes, UTF_8);
+ return signal.get(0).message().getData().toStringUtf8();
}
private void sleep(long t) {
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/FakeDatasetService.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/FakeDatasetService.java
index 7916513..2ab1fdc 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/FakeDatasetService.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/FakeDatasetService.java
@@ -32,7 +32,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers;
@@ -90,6 +89,13 @@
}
}
+ public List<String> getAllIds(String projectId, String datasetId, String tableId)
+ throws InterruptedException, IOException {
+ synchronized (tables) {
+ return getTableContainer(projectId, datasetId, tableId).getIds();
+ }
+ }
+
private TableContainer getTableContainer(String projectId, String datasetId, String tableId)
throws InterruptedException, IOException {
synchronized (tables) {
@@ -215,7 +221,15 @@
PaneInfo.ON_TIME_AND_ONLY_FIRING));
}
return insertAll(
- ref, windowedRows, insertIdList, InsertRetryPolicy.alwaysRetry(), null, null, false, false);
+ ref,
+ windowedRows,
+ insertIdList,
+ InsertRetryPolicy.alwaysRetry(),
+ null,
+ null,
+ false,
+ false,
+ false);
}
@Override
@@ -227,17 +241,17 @@
List<ValueInSingleWindow<T>> failedInserts,
ErrorContainer<T> errorContainer,
boolean skipInvalidRows,
- boolean ignoreUnknownValues)
+ boolean ignoreUnknownValues,
+ boolean ignoreInsertIds)
throws IOException, InterruptedException {
Map<TableRow, List<TableDataInsertAllResponse.InsertErrors>> insertErrors = getInsertErrors();
synchronized (tables) {
+ if (ignoreInsertIds) {
+ insertIdList = null;
+ }
+
if (insertIdList != null) {
assertEquals(rowList.size(), insertIdList.size());
- } else {
- insertIdList = Lists.newArrayListWithExpectedSize(rowList.size());
- for (int i = 0; i < rowList.size(); ++i) {
- insertIdList.add(Integer.toString(ThreadLocalRandom.current().nextInt()));
- }
}
long dataSize = 0;
@@ -258,7 +272,11 @@
}
}
if (shouldInsert) {
- dataSize += tableContainer.addRow(row, insertIdList.get(i));
+ if (insertIdList == null) {
+ dataSize += tableContainer.addRow(row, null);
+ } else {
+ dataSize += tableContainer.addRow(row, insertIdList.get(i));
+ }
} else {
errorContainer.add(
failedInserts, allErrors.get(allErrors.size() - 1), ref, rowList.get(i));
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/TableContainer.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/TableContainer.java
index cc7ad4f..46c9f37 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/TableContainer.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/testing/TableContainer.java
@@ -21,6 +21,7 @@
import com.google.api.services.bigquery.model.TableRow;
import java.util.ArrayList;
import java.util.List;
+import org.apache.beam.sdk.io.gcp.bigquery.TableRowJsonCoder;
/** Encapsulates a BigQuery Table, and it's contents. */
class TableContainer {
@@ -39,15 +40,17 @@
long addRow(TableRow row, String id) {
rows.add(row);
- ids.add(id);
- long rowSize = row.toString().length();
- Long tableSize = table.getNumBytes();
- if (tableSize == null) {
- table.setNumBytes(rowSize);
- } else {
- table.setNumBytes(tableSize + rowSize);
+ if (id != null) {
+ ids.add(id);
}
- return rowSize;
+ long tableSize = table.getNumBytes() == null ? 0L : table.getNumBytes();
+ try {
+ long rowSize = TableRowJsonCoder.of().getEncodedElementByteSize(row);
+ table.setNumBytes(tableSize + rowSize);
+ return rowSize;
+ } catch (Exception ex) {
+ throw new RuntimeException("Failed to convert the row to JSON", ex);
+ }
}
Table getTable() {
@@ -57,4 +60,8 @@
List<TableRow> getRows() {
return rows;
}
+
+ List<String> getIds() {
+ return ids;
+ }
}
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/GcpApiSurfaceTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/GcpApiSurfaceTest.java
index 50f6548..bc146ce 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/GcpApiSurfaceTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/GcpApiSurfaceTest.java
@@ -67,6 +67,7 @@
classesInPackage("com.google.cloud.bigtable.config"),
classesInPackage("com.google.cloud.bigtable.data"),
classesInPackage("com.google.spanner.v1"),
+ classesInPackage("com.google.pubsub.v1"),
Matchers.equalTo(com.google.api.gax.rpc.ApiException.class),
Matchers.<Class<?>>equalTo(com.google.api.gax.longrunning.OperationFuture.class),
Matchers.<Class<?>>equalTo(com.google.api.gax.longrunning.OperationSnapshot.class),
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java
index 3edd6e3..a2e5006 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java
@@ -627,7 +627,11 @@
.toSource(stepUuid, TableRowJsonCoder.of(), BigQueryIO.TableRowParser.INSTANCE);
PipelineOptions options = PipelineOptionsFactory.create();
- assertEquals(108, bqSource.getEstimatedSizeBytes(options));
+
+ // Each row should have 24 bytes (See StringUtf8Coder in detail):
+ // first 1 byte indicating length and following 23 bytes: {"name":"a","number":1}
+ long expectedSize = 24L * data.size();
+ assertEquals(expectedSize, bqSource.getEstimatedSizeBytes(options));
}
@Test
@@ -661,7 +665,12 @@
.toSource(stepUuid, TableRowJsonCoder.of(), BigQueryIO.TableRowParser.INSTANCE);
PipelineOptions options = PipelineOptionsFactory.create();
- assertEquals(118, bqSource.getEstimatedSizeBytes(options));
+
+ // Each row should have 24 bytes (See StringUtf8Coder in detail):
+ // first 1 byte indicating length and following 23 bytes: {"name":"a","number":1}
+ // 10 bytes comes from the estimated bytes of the Streamingbuffer
+ long expectedSize = 24L * data.size() + 10;
+ assertEquals(expectedSize, bqSource.getEstimatedSizeBytes(options));
}
@Test
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java
index c208d17..b53ef9f 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java
@@ -45,11 +45,11 @@
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest;
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse;
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession;
+import com.google.cloud.bigquery.storage.v1beta1.Storage.ShardingStrategy;
import com.google.cloud.bigquery.storage.v1beta1.Storage.Stream;
import com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition;
import com.google.cloud.bigquery.storage.v1beta1.Storage.StreamStatus;
import com.google.protobuf.ByteString;
-import com.google.protobuf.UnknownFieldSet;
import java.io.ByteArrayOutputStream;
import java.util.Collection;
import java.util.List;
@@ -387,12 +387,7 @@
.setParent("projects/" + options.getProject())
.setTableReference(BigQueryHelpers.toTableRefProto(tempTableReference))
.setRequestedStreams(requestedStreamCount)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession.Builder builder = ReadSession.newBuilder();
@@ -463,12 +458,7 @@
.setParent("projects/" + options.getProject())
.setTableReference(BigQueryHelpers.toTableRefProto(tempTableReference))
.setRequestedStreams(1024)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession.Builder builder = ReadSession.newBuilder();
@@ -544,19 +534,7 @@
AvroRows.newBuilder()
.setSerializedBinaryRows(ByteString.copyFrom(outputStream.toByteArray()))
.setRowCount(genericRecords.size()))
- .setStatus(
- StreamStatus.newBuilder()
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFractionConsumed().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(
- java.lang.Float.floatToIntBits((float) fractionConsumed))
- .build())
- .build()))
+ .setStatus(StreamStatus.newBuilder().setFractionConsumed((float) fractionConsumed))
.build();
}
@@ -616,12 +594,7 @@
.setParent("projects/" + options.getProject())
.setTableReference(BigQueryHelpers.toTableRefProto(tempTableReference))
.setRequestedStreams(10)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession emptyReadSession = ReadSession.newBuilder().build();
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java
index fe8b4d9..8a0f448 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java
@@ -45,13 +45,13 @@
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest;
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse;
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession;
+import com.google.cloud.bigquery.storage.v1beta1.Storage.ShardingStrategy;
import com.google.cloud.bigquery.storage.v1beta1.Storage.SplitReadStreamRequest;
import com.google.cloud.bigquery.storage.v1beta1.Storage.SplitReadStreamResponse;
import com.google.cloud.bigquery.storage.v1beta1.Storage.Stream;
import com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition;
import com.google.cloud.bigquery.storage.v1beta1.Storage.StreamStatus;
import com.google.protobuf.ByteString;
-import com.google.protobuf.UnknownFieldSet;
import io.grpc.Status;
import io.grpc.Status.Code;
import io.grpc.StatusRuntimeException;
@@ -417,12 +417,7 @@
.setParent("projects/project-id")
.setTableReference(BigQueryHelpers.toTableRefProto(tableRef))
.setRequestedStreams(streamCount)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession.Builder builder = ReadSession.newBuilder();
@@ -480,12 +475,7 @@
.setTableReference(BigQueryHelpers.toTableRefProto(tableRef))
.setRequestedStreams(10)
.setReadOptions(readOptions)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession.Builder builder = ReadSession.newBuilder();
@@ -543,12 +533,7 @@
.setTableReference(BigQueryHelpers.toTableRefProto(tableRef))
.setRequestedStreams(10)
.setReadOptions(readOptions)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession.Builder builder = ReadSession.newBuilder();
@@ -593,12 +578,7 @@
.setParent("projects/project-id")
.setTableReference(BigQueryHelpers.toTableRefProto(tableRef))
.setRequestedStreams(1024)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession.Builder builder = ReadSession.newBuilder();
@@ -643,12 +623,7 @@
.setParent("projects/project-id")
.setTableReference(BigQueryHelpers.toTableRefProto(tableRef))
.setRequestedStreams(1024)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession emptyReadSession = ReadSession.newBuilder().build();
@@ -734,19 +709,7 @@
AvroRows.newBuilder()
.setSerializedBinaryRows(ByteString.copyFrom(outputStream.toByteArray()))
.setRowCount(genericRecords.size()))
- .setStatus(
- StreamStatus.newBuilder()
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFractionConsumed().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(
- java.lang.Float.floatToIntBits((float) fractionConsumed))
- .build())
- .build()))
+ .setStatus(StreamStatus.newBuilder().setFractionConsumed((float) fractionConsumed))
.build();
}
@@ -949,16 +912,7 @@
when(fakeStorageClient.splitReadStream(
SplitReadStreamRequest.newBuilder()
.setOriginalStream(parentStream)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFraction().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(java.lang.Float.floatToIntBits(0.5f))
- .build())
- .build())
+ .setFraction(0.5f)
.build()))
.thenReturn(
SplitReadStreamResponse.newBuilder()
@@ -1052,16 +1006,7 @@
when(fakeStorageClient.splitReadStream(
SplitReadStreamRequest.newBuilder()
.setOriginalStream(parentStream)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFraction().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(java.lang.Float.floatToIntBits(0.5f))
- .build())
- .build())
+ .setFraction(0.5f)
.build()))
.thenReturn(
SplitReadStreamResponse.newBuilder()
@@ -1166,16 +1111,7 @@
when(fakeStorageClient.splitReadStream(
SplitReadStreamRequest.newBuilder()
.setOriginalStream(streams.get(0))
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFraction().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(java.lang.Float.floatToIntBits(0.83f))
- .build())
- .build())
+ .setFraction(0.83f)
.build()))
.thenReturn(
SplitReadStreamResponse.newBuilder()
@@ -1206,16 +1142,7 @@
when(fakeStorageClient.splitReadStream(
SplitReadStreamRequest.newBuilder()
.setOriginalStream(streams.get(1))
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFraction().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(java.lang.Float.floatToIntBits(0.75f))
- .build())
- .build())
+ .setFraction(0.75f)
.build()))
.thenReturn(
SplitReadStreamResponse.newBuilder()
@@ -1302,16 +1229,7 @@
when(fakeStorageClient.splitReadStream(
SplitReadStreamRequest.newBuilder()
.setOriginalStream(parentStream)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFraction().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(java.lang.Float.floatToIntBits(0.5f))
- .build())
- .build())
+ .setFraction(0.5f)
.build()))
.thenReturn(SplitReadStreamResponse.getDefaultInstance());
@@ -1379,16 +1297,7 @@
when(fakeStorageClient.splitReadStream(
SplitReadStreamRequest.newBuilder()
.setOriginalStream(parentStream)
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setFraction().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(
- 2,
- UnknownFieldSet.Field.newBuilder()
- .addFixed32(java.lang.Float.floatToIntBits(0.5f))
- .build())
- .build())
+ .setFraction(0.5f)
.build()))
.thenReturn(
SplitReadStreamResponse.newBuilder()
@@ -1472,12 +1381,7 @@
.setRequestedStreams(10)
.setReadOptions(
TableReadOptions.newBuilder().addSelectedFields("name").addSelectedFields("number"))
- // TODO(aryann): Once we rebuild the generated client code, we should change this to
- // use setShardingStrategy().
- .setUnknownFields(
- UnknownFieldSet.newBuilder()
- .addField(7, UnknownFieldSet.Field.newBuilder().addVarint(2).build())
- .build())
+ .setShardingStrategy(ShardingStrategy.BALANCED)
.build();
ReadSession readSession =
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
index 99e3fba..624a62d 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
@@ -684,6 +684,35 @@
p.run();
}
+ @Test
+ public void testWriteWithoutInsertId() throws Exception {
+ TableRow row1 = new TableRow().set("name", "a").set("number", 1);
+ TableRow row2 = new TableRow().set("name", "b").set("number", 2);
+ TableRow row3 = new TableRow().set("name", "c").set("number", 3);
+ p.apply(Create.of(row1, row2, row3).withCoder(TableRowJsonCoder.of()))
+ .apply(
+ BigQueryIO.writeTableRows()
+ .to("project-id:dataset-id.table-id")
+ .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+ .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
+ .withSchema(
+ new TableSchema()
+ .setFields(
+ ImmutableList.of(
+ new TableFieldSchema().setName("name").setType("STRING"),
+ new TableFieldSchema().setName("number").setType("INTEGER"))))
+ .withTestServices(fakeBqServices)
+ .ignoreInsertIds()
+ .withoutValidation());
+ p.run();
+ assertThat(
+ fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
+ containsInAnyOrder(row1, row2, row3));
+ // Verify no insert id is added.
+ assertThat(
+ fakeDatasetService.getAllIds("project-id", "dataset-id", "table-id"), containsInAnyOrder());
+ }
+
@AutoValue
abstract static class InputRecord implements Serializable {
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
index fb88bb4..bac6c23 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
@@ -517,6 +517,7 @@
null,
null,
false,
+ false,
false);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
@@ -551,6 +552,7 @@
null,
null,
false,
+ false,
false);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
@@ -601,6 +603,7 @@
null,
null,
false,
+ false,
false);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
@@ -646,6 +649,7 @@
null,
null,
false,
+ false,
false);
fail();
} catch (IOException e) {
@@ -693,6 +697,7 @@
null,
null,
false,
+ false,
false);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
@@ -761,17 +766,18 @@
failedInserts,
ErrorContainer.TABLE_ROW_ERROR_CONTAINER,
false,
+ false,
false);
assertEquals(1, failedInserts.size());
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
/**
- * Tests that {@link DatasetServiceImpl#insertAll} respects the skipInvalidRows and
- * ignoreUnknownValues parameters.
+ * Tests that {@link DatasetServiceImpl#insertAll} respects the skipInvalidRows,
+ * ignoreUnknownValues and ignoreInsertIds parameters.
*/
@Test
- public void testSkipInvalidRowsIgnoreUnknownValuesStreaming()
+ public void testSkipInvalidRowsIgnoreUnknownIgnoreInsertIdsValuesStreaming()
throws InterruptedException, IOException {
TableReference ref =
new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
@@ -790,7 +796,7 @@
DatasetServiceImpl dataService =
new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
- // First, test with both flags disabled
+ // First, test with all flags disabled
dataService.insertAll(
ref,
rows,
@@ -801,6 +807,7 @@
Lists.newArrayList(),
ErrorContainer.TABLE_ROW_ERROR_CONTAINER,
false,
+ false,
false);
TableDataInsertAllRequest parsedRequest =
@@ -809,7 +816,7 @@
assertFalse(parsedRequest.getSkipInvalidRows());
assertFalse(parsedRequest.getIgnoreUnknownValues());
- // Then with both enabled
+ // Then with all enabled
dataService.insertAll(
ref,
rows,
@@ -820,12 +827,15 @@
Lists.newArrayList(),
ErrorContainer.TABLE_ROW_ERROR_CONTAINER,
true,
+ true,
true);
parsedRequest = fromString(request.getContentAsString(), TableDataInsertAllRequest.class);
assertTrue(parsedRequest.getSkipInvalidRows());
assertTrue(parsedRequest.getIgnoreUnknownValues());
+ assertNull(parsedRequest.getRows().get(0).getInsertId());
+ assertNull(parsedRequest.getRows().get(1).getInsertId());
}
/** A helper to convert a string response back to a {@link GenericJson} subclass. */
@@ -1002,6 +1012,7 @@
failedInserts,
ErrorContainer.TABLE_ROW_ERROR_CONTAINER,
false,
+ false,
false);
assertThat(failedInserts, is(rows));
@@ -1056,6 +1067,7 @@
failedInserts,
ErrorContainer.BIG_QUERY_INSERT_ERROR_ERROR_CONTAINER,
false,
+ false,
false);
assertThat(failedInserts, is(expected));
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
index 12adc2e..98b8bc8 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilTest.java
@@ -18,8 +18,8 @@
package org.apache.beam.sdk.io.gcp.bigquery;
import static org.junit.Assert.assertEquals;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.atLeastOnce;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
@@ -205,11 +205,11 @@
try {
totalBytes =
datasetService.insertAll(
- ref, rows, ids, InsertRetryPolicy.alwaysRetry(), null, null, false, false);
+ ref, rows, ids, InsertRetryPolicy.alwaysRetry(), null, null, false, false, false);
} finally {
verifyInsertAll(5);
- // Each of the 25 rows is 23 bytes: "{f=[{v=foo}, {v=1234}]}"
- assertEquals("Incorrect byte count", 25L * 23L, totalBytes);
+ // Each of the 25 rows has 1 byte for length and 30 bytes: '{"f":[{"v":"foo"},{"v":1234}]}'
+ assertEquals("Incorrect byte count", 25L * 31L, totalBytes);
}
}
}
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClientTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClientTest.java
index 7c53170..4dd719b 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClientTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClientTest.java
@@ -142,11 +142,11 @@
List<IncomingMessage> acutalMessages = client.pull(REQ_TIME, SUBSCRIPTION, 10, true);
assertEquals(1, acutalMessages.size());
IncomingMessage actualMessage = acutalMessages.get(0);
- assertEquals(ACK_ID, actualMessage.ackId);
- assertEquals(DATA, new String(actualMessage.elementBytes, StandardCharsets.UTF_8));
- assertEquals(RECORD_ID, actualMessage.recordId);
- assertEquals(REQ_TIME, actualMessage.requestTimeMsSinceEpoch);
- assertEquals(MESSAGE_TIME, actualMessage.timestampMsSinceEpoch);
+ assertEquals(ACK_ID, actualMessage.ackId());
+ assertEquals(DATA, actualMessage.message().getData().toStringUtf8());
+ assertEquals(RECORD_ID, actualMessage.recordId());
+ assertEquals(REQ_TIME, actualMessage.requestTimeMsSinceEpoch());
+ assertEquals(MESSAGE_TIME, actualMessage.timestampMsSinceEpoch());
assertEquals(expectedRequest, Iterables.getOnlyElement(requestsReceived));
} finally {
server.shutdownNow();
@@ -187,8 +187,13 @@
InProcessServerBuilder.forName(channelName).addService(publisherImplBase).build().start();
try {
OutgoingMessage actualMessage =
- new OutgoingMessage(
- DATA.getBytes(StandardCharsets.UTF_8), ATTRIBUTES, MESSAGE_TIME, RECORD_ID);
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .putAllAttributes(ATTRIBUTES)
+ .build(),
+ MESSAGE_TIME,
+ RECORD_ID);
int n = client.publish(TOPIC, ImmutableList.of(actualMessage));
assertEquals(1, n);
assertEquals(expectedRequest, Iterables.getOnlyElement(requestsReceived));
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java
index 65b89a7..0dc910f 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java
@@ -29,6 +29,7 @@
import static org.junit.Assert.assertThat;
import com.google.api.client.util.Clock;
+import com.google.protobuf.ByteString;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
@@ -391,9 +392,10 @@
})
.map(
ba ->
- new IncomingMessage(
- ba,
- null,
+ IncomingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFrom(ba))
+ .build(),
1234L,
0,
UUID.randomUUID().toString(),
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java
index f7fc0f3..aad9729 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java
@@ -34,6 +34,7 @@
import com.google.api.services.pubsub.model.ReceivedMessage;
import com.google.api.services.pubsub.model.Subscription;
import com.google.api.services.pubsub.model.Topic;
+import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
@@ -73,6 +74,7 @@
private static final String DATA = "testData";
private static final String RECORD_ID = "testRecordId";
private static final String ACK_ID = "testAckId";
+ private static final String ORDERING_KEY = "testOrderingKey";
@Before
public void setup() {
@@ -98,7 +100,8 @@
.setPublishTime(String.valueOf(PUB_TIME))
.setAttributes(
ImmutableMap.of(
- TIMESTAMP_ATTRIBUTE, String.valueOf(MESSAGE_TIME), ID_ATTRIBUTE, RECORD_ID));
+ TIMESTAMP_ATTRIBUTE, String.valueOf(MESSAGE_TIME), ID_ATTRIBUTE, RECORD_ID))
+ .set("orderingKey", ORDERING_KEY);
ReceivedMessage expectedReceivedMessage =
new ReceivedMessage().setMessage(expectedPubsubMessage).setAckId(ACK_ID);
PullResponse expectedResponse =
@@ -113,11 +116,42 @@
List<IncomingMessage> acutalMessages = client.pull(REQ_TIME, SUBSCRIPTION, 10, true);
assertEquals(1, acutalMessages.size());
IncomingMessage actualMessage = acutalMessages.get(0);
- assertEquals(ACK_ID, actualMessage.ackId);
- assertEquals(DATA, new String(actualMessage.elementBytes, StandardCharsets.UTF_8));
- assertEquals(RECORD_ID, actualMessage.recordId);
- assertEquals(REQ_TIME, actualMessage.requestTimeMsSinceEpoch);
- assertEquals(MESSAGE_TIME, actualMessage.timestampMsSinceEpoch);
+ assertEquals(ACK_ID, actualMessage.ackId());
+ assertEquals(DATA, actualMessage.message().getData().toStringUtf8());
+ assertEquals(RECORD_ID, actualMessage.recordId());
+ assertEquals(REQ_TIME, actualMessage.requestTimeMsSinceEpoch());
+ assertEquals(MESSAGE_TIME, actualMessage.timestampMsSinceEpoch());
+ assertEquals(ORDERING_KEY, actualMessage.message().getOrderingKey());
+ }
+
+ @Test
+ public void pullOneMessageEmptyAttributes() throws IOException {
+ client = new PubsubJsonClient(null, null, mockPubsub);
+ String expectedSubscription = SUBSCRIPTION.getPath();
+ PullRequest expectedRequest = new PullRequest().setReturnImmediately(true).setMaxMessages(10);
+ PubsubMessage expectedPubsubMessage =
+ new PubsubMessage()
+ .setMessageId(MESSAGE_ID)
+ .encodeData(DATA.getBytes(StandardCharsets.UTF_8))
+ .setPublishTime(String.valueOf(PUB_TIME));
+ ReceivedMessage expectedReceivedMessage =
+ new ReceivedMessage().setMessage(expectedPubsubMessage).setAckId(ACK_ID);
+ PullResponse expectedResponse =
+ new PullResponse().setReceivedMessages(ImmutableList.of(expectedReceivedMessage));
+ when((Object)
+ (mockPubsub
+ .projects()
+ .subscriptions()
+ .pull(expectedSubscription, expectedRequest)
+ .execute()))
+ .thenReturn(expectedResponse);
+ List<IncomingMessage> acutalMessages = client.pull(REQ_TIME, SUBSCRIPTION, 10, true);
+ assertEquals(1, acutalMessages.size());
+ IncomingMessage actualMessage = acutalMessages.get(0);
+ assertEquals(ACK_ID, actualMessage.ackId());
+ assertEquals(DATA, actualMessage.message().getData().toStringUtf8());
+ assertEquals(REQ_TIME, actualMessage.requestTimeMsSinceEpoch());
+ assertEquals(PUB_TIME, actualMessage.timestampMsSinceEpoch());
}
@Test
@@ -146,7 +180,7 @@
List<IncomingMessage> acutalMessages = client.pull(REQ_TIME, SUBSCRIPTION, 10, true);
assertEquals(1, acutalMessages.size());
IncomingMessage actualMessage = acutalMessages.get(0);
- assertArrayEquals(new byte[0], actualMessage.elementBytes);
+ assertArrayEquals(new byte[0], actualMessage.message().getData().toByteArray());
}
@Test
@@ -160,7 +194,8 @@
.put(TIMESTAMP_ATTRIBUTE, String.valueOf(MESSAGE_TIME))
.put(ID_ATTRIBUTE, RECORD_ID)
.put("k", "v")
- .build());
+ .build())
+ .set("orderingKey", ORDERING_KEY);
PublishRequest expectedRequest =
new PublishRequest().setMessages(ImmutableList.of(expectedPubsubMessage));
PublishResponse expectedResponse =
@@ -171,7 +206,14 @@
Map<String, String> attrs = new HashMap<>();
attrs.put("k", "v");
OutgoingMessage actualMessage =
- new OutgoingMessage(DATA.getBytes(StandardCharsets.UTF_8), attrs, MESSAGE_TIME, RECORD_ID);
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .putAllAttributes(attrs)
+ .setOrderingKey(ORDERING_KEY)
+ .build(),
+ MESSAGE_TIME,
+ RECORD_ID);
int n = client.publish(TOPIC, ImmutableList.of(actualMessage));
assertEquals(1, n);
}
@@ -195,8 +237,12 @@
(mockPubsub.projects().topics().publish(expectedTopic, expectedRequest).execute()))
.thenReturn(expectedResponse);
OutgoingMessage actualMessage =
- new OutgoingMessage(
- DATA.getBytes(StandardCharsets.UTF_8), ImmutableMap.of(), MESSAGE_TIME, RECORD_ID);
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .build(),
+ MESSAGE_TIME,
+ RECORD_ID);
int n = client.publish(TOPIC, ImmutableList.of(actualMessage));
assertEquals(1, n);
}
@@ -222,7 +268,13 @@
Map<String, String> attrs = new HashMap<>();
attrs.put("k", "v");
OutgoingMessage actualMessage =
- new OutgoingMessage(DATA.getBytes(StandardCharsets.UTF_8), attrs, MESSAGE_TIME, RECORD_ID);
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .putAllAttributes(attrs)
+ .build(),
+ MESSAGE_TIME,
+ RECORD_ID);
int n = client.publish(TOPIC, ImmutableList.of(actualMessage));
assertEquals(1, n);
}
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClientTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClientTest.java
index 2b698f0..6b920e8 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClientTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClientTest.java
@@ -20,8 +20,9 @@
import static org.junit.Assert.assertEquals;
import com.google.api.client.util.Clock;
+import com.google.protobuf.ByteString;
+import com.google.pubsub.v1.PubsubMessage;
import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.IncomingMessage;
@@ -54,9 +55,8 @@
final AtomicLong now = new AtomicLong();
Clock clock = now::get;
IncomingMessage expectedIncomingMessage =
- new IncomingMessage(
- DATA.getBytes(StandardCharsets.UTF_8),
- null,
+ IncomingMessage.of(
+ PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(DATA)).build(),
MESSAGE_TIME,
REQ_TIME,
ACK_ID,
@@ -75,7 +75,14 @@
client.advance();
incomingMessages = client.pull(now.get(), SUBSCRIPTION, 1, true);
assertEquals(1, incomingMessages.size());
- assertEquals(expectedIncomingMessage.withRequestTime(now.get()), incomingMessages.get(0));
+ assertEquals(
+ IncomingMessage.of(
+ expectedIncomingMessage.message(),
+ expectedIncomingMessage.timestampMsSinceEpoch(),
+ now.get(),
+ expectedIncomingMessage.ackId(),
+ expectedIncomingMessage.recordId()),
+ incomingMessages.get(0));
now.addAndGet(10 * 1000);
client.advance();
// Extend ack
@@ -85,7 +92,14 @@
client.advance();
incomingMessages = client.pull(now.get(), SUBSCRIPTION, 1, true);
assertEquals(1, incomingMessages.size());
- assertEquals(expectedIncomingMessage.withRequestTime(now.get()), incomingMessages.get(0));
+ assertEquals(
+ IncomingMessage.of(
+ expectedIncomingMessage.message(),
+ expectedIncomingMessage.timestampMsSinceEpoch(),
+ now.get(),
+ expectedIncomingMessage.ackId(),
+ expectedIncomingMessage.recordId()),
+ incomingMessages.get(0));
// Extend ack
client.modifyAckDeadline(SUBSCRIPTION, ImmutableList.of(ACK_ID), 20);
// Ack
@@ -99,7 +113,10 @@
@Test
public void publishOneMessage() throws IOException {
OutgoingMessage expectedOutgoingMessage =
- new OutgoingMessage(DATA.getBytes(StandardCharsets.UTF_8), null, MESSAGE_TIME, MESSAGE_ID);
+ OutgoingMessage.of(
+ PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(DATA)).build(),
+ MESSAGE_TIME,
+ MESSAGE_ID);
try (PubsubTestClientFactory factory =
PubsubTestClient.createFactoryForPublish(
TOPIC, Sets.newHashSet(expectedOutgoingMessage), ImmutableList.of())) {
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSinkTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSinkTest.java
index f588e05..f8cd86e 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSinkTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSinkTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.sdk.io.gcp.pubsub;
+import com.google.protobuf.ByteString;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
@@ -83,8 +84,12 @@
@Test
public void saneCoder() throws Exception {
OutgoingMessage message =
- new OutgoingMessage(
- DATA.getBytes(StandardCharsets.UTF_8), ImmutableMap.of(), TIMESTAMP, getRecordId(DATA));
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .build(),
+ TIMESTAMP,
+ getRecordId(DATA));
CoderProperties.coderDecodeEncodeEqual(PubsubUnboundedSink.CODER, message);
CoderProperties.coderSerializable(PubsubUnboundedSink.CODER);
}
@@ -93,8 +98,13 @@
public void sendOneMessage() throws IOException {
List<OutgoingMessage> outgoing =
ImmutableList.of(
- new OutgoingMessage(
- DATA.getBytes(StandardCharsets.UTF_8), ATTRIBUTES, TIMESTAMP, getRecordId(DATA)));
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .putAllAttributes(ATTRIBUTES)
+ .build(),
+ TIMESTAMP,
+ getRecordId(DATA)));
int batchSize = 1;
int batchBytes = 1;
try (PubsubTestClientFactory factory =
@@ -121,9 +131,10 @@
public void sendOneMessageWithoutAttributes() throws IOException {
List<OutgoingMessage> outgoing =
ImmutableList.of(
- new OutgoingMessage(
- DATA.getBytes(StandardCharsets.UTF_8),
- null /* attributes */,
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .build(),
TIMESTAMP,
getRecordId(DATA)));
try (PubsubTestClientFactory factory =
@@ -157,9 +168,10 @@
for (int i = 0; i < batchSize * 10; i++) {
String str = String.valueOf(i);
outgoing.add(
- new OutgoingMessage(
- str.getBytes(StandardCharsets.UTF_8),
- ImmutableMap.of(),
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(str))
+ .build(),
TIMESTAMP,
getRecordId(str)));
data.add(str);
@@ -198,9 +210,10 @@
}
String str = sb.toString();
outgoing.add(
- new OutgoingMessage(
- str.getBytes(StandardCharsets.UTF_8),
- ImmutableMap.of(),
+ OutgoingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(str))
+ .build(),
TIMESTAMP,
getRecordId(str)));
data.add(str);
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSourceTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSourceTest.java
index b2dacf0..43ecbdc 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSourceTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSourceTest.java
@@ -31,6 +31,7 @@
import static org.junit.Assert.assertTrue;
import com.google.api.client.util.Clock;
+import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
@@ -100,8 +101,14 @@
private void setupOneMessage() {
setupOneMessage(
ImmutableList.of(
- new IncomingMessage(
- DATA.getBytes(StandardCharsets.UTF_8), null, TIMESTAMP, 0, ACK_ID, RECORD_ID)));
+ IncomingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(DATA))
+ .build(),
+ TIMESTAMP,
+ 0,
+ ACK_ID,
+ RECORD_ID)));
}
@After
@@ -219,8 +226,14 @@
String data = String.format("data_%d", i);
String ackid = String.format("ackid_%d", i);
incoming.add(
- new IncomingMessage(
- data.getBytes(StandardCharsets.UTF_8), null, TIMESTAMP, 0, ackid, RECORD_ID));
+ IncomingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(data))
+ .build(),
+ TIMESTAMP,
+ 0,
+ ackid,
+ RECORD_ID));
}
setupOneMessage(incoming);
PubsubReader reader = primSource.createReader(p.getOptions(), null);
@@ -279,9 +292,10 @@
String recid = String.format("recordid_%d", messageNum);
String ackId = String.format("ackid_%d", messageNum);
incoming.add(
- new IncomingMessage(
- data.getBytes(StandardCharsets.UTF_8),
- null,
+ IncomingMessage.of(
+ com.google.pubsub.v1.PubsubMessage.newBuilder()
+ .setData(ByteString.copyFromUtf8(data))
+ .build(),
messageNumToTimestamp(messageNum),
0,
ackId,
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryClientTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryClientTest.java
index fb443f5..da8f944 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryClientTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryClientTest.java
@@ -17,8 +17,8 @@
*/
package org.apache.beam.sdk.io.gcp.testing;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.atLeast;
import static org.mockito.Mockito.spy;
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryMatcherTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryMatcherTest.java
index 89ebdb5..6bf6f92 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryMatcherTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/testing/BigqueryMatcherTest.java
@@ -18,7 +18,7 @@
package org.apache.beam.sdk.io.gcp.testing;
import static org.hamcrest.MatcherAssert.assertThat;
-import static org.mockito.Matchers.anyString;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
diff --git a/sdks/java/io/hadoop-common/build.gradle b/sdks/java/io/hadoop-common/build.gradle
index 08f60c6..c4d6cad 100644
--- a/sdks/java/io/hadoop-common/build.gradle
+++ b/sdks/java/io/hadoop-common/build.gradle
@@ -27,7 +27,6 @@
provided library.java.hadoop_client
provided library.java.hadoop_common
provided library.java.hadoop_mapreduce_client_core
- testCompile library.java.commons_lang3
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
testCompile library.java.junit
diff --git a/sdks/java/io/hadoop-common/src/test/java/org/apache/beam/sdk/io/hadoop/SerializableConfigurationTest.java b/sdks/java/io/hadoop-common/src/test/java/org/apache/beam/sdk/io/hadoop/SerializableConfigurationTest.java
index f85cf6b..07b5b7f 100644
--- a/sdks/java/io/hadoop-common/src/test/java/org/apache/beam/sdk/io/hadoop/SerializableConfigurationTest.java
+++ b/sdks/java/io/hadoop-common/src/test/java/org/apache/beam/sdk/io/hadoop/SerializableConfigurationTest.java
@@ -20,7 +20,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
-import org.apache.commons.lang3.SerializationUtils;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.SerializationUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.junit.Rule;
diff --git a/sdks/java/io/hadoop-format/build.gradle b/sdks/java/io/hadoop-format/build.gradle
index 1e0cf94..8adf06e 100644
--- a/sdks/java/io/hadoop-format/build.gradle
+++ b/sdks/java/io/hadoop-format/build.gradle
@@ -75,9 +75,10 @@
}
// elasticsearch-hadoop 5.0.0 uses commons-httpclient's URIException
testCompile "commons-httpclient:commons-httpclient:3.1"
+ testCompile library.java.commons_io
testCompile library.java.cassandra_driver_core
testCompile library.java.cassandra_driver_mapping
- testCompile "org.apache.cassandra:cassandra-all:3.11.3"
+ testCompile "org.apache.cassandra:cassandra-all:3.11.5"
testCompile library.java.postgres
testCompile "org.apache.logging.log4j:log4j-core:$log4j_version"
testCompile library.java.junit
@@ -85,7 +86,6 @@
testCompile library.java.hamcrest_library
testRuntimeOnly library.java.slf4j_jdk14
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
- compile library.java.commons_io_2x
delegate.add("sparkRunner", project(":sdks:java:io:hadoop-format"))
delegate.add("sparkRunner", project(path: ":sdks:java:io:hadoop-format", configuration: "testRuntime"))
diff --git a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java
index 9933aee..1a693ba 100644
--- a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java
+++ b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java
@@ -220,28 +220,34 @@
private Set<Function<MetricsReader, NamedTestResult>> getWriteSuppliers(
String uuid, String timestamp) {
Set<Function<MetricsReader, NamedTestResult>> suppliers = new HashSet<>();
+ suppliers.add(getTimeMetric(uuid, timestamp, "write_time"));
suppliers.add(
- reader -> {
- long writeStart = reader.getStartTimeMetric("write_time");
- long writeEnd = reader.getEndTimeMetric("write_time");
- return NamedTestResult.create(
- uuid, timestamp, "write_time", (writeEnd - writeStart) / 1e3);
- });
+ reader ->
+ NamedTestResult.create(
+ uuid,
+ timestamp,
+ "data_size",
+ DatabaseTestHelper.getPostgresTableSize(dataSource, tableName)
+ .orElseThrow(() -> new IllegalStateException("Unable to fetch table size"))));
return suppliers;
}
private Set<Function<MetricsReader, NamedTestResult>> getReadSuppliers(
String uuid, String timestamp) {
Set<Function<MetricsReader, NamedTestResult>> suppliers = new HashSet<>();
- suppliers.add(
- reader -> {
- long readStart = reader.getStartTimeMetric("read_time");
- long readEnd = reader.getEndTimeMetric("read_time");
- return NamedTestResult.create(uuid, timestamp, "read_time", (readEnd - readStart) / 1e3);
- });
+ suppliers.add(getTimeMetric(uuid, timestamp, "read_time"));
return suppliers;
}
+ private Function<MetricsReader, NamedTestResult> getTimeMetric(
+ final String uuid, final String timestamp, final String metricName) {
+ return reader -> {
+ long startTime = reader.getStartTimeMetric(metricName);
+ long endTime = reader.getEndTimeMetric(metricName);
+ return NamedTestResult.create(uuid, timestamp, metricName, (endTime - startTime) / 1e3);
+ };
+ }
+
/**
* Uses the input {@link TestRow} values as seeds to produce new {@link KV}s for {@link
* HadoopFormatIO}.
diff --git a/sdks/java/io/hbase/build.gradle b/sdks/java/io/hbase/build.gradle
index e2dd902..d33e0ca 100644
--- a/sdks/java/io/hbase/build.gradle
+++ b/sdks/java/io/hbase/build.gradle
@@ -44,7 +44,6 @@
compile "org.apache.hbase:hbase-shaded-client:$hbase_version"
testCompile project(path: ":sdks:java:io:common", configuration: "testRuntime")
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
- testCompile library.java.commons_lang3
testCompile library.java.junit
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
diff --git a/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/HBaseIOTest.java b/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/HBaseIOTest.java
index 5f29185..eb8cec7 100644
--- a/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/HBaseIOTest.java
+++ b/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/HBaseIOTest.java
@@ -31,6 +31,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.hbase.HBaseIO.HBaseSource;
@@ -43,7 +44,6 @@
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.values.PCollection;
-import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
diff --git a/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/SerializableScanTest.java b/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/SerializableScanTest.java
index 0085ea1..a5258cb 100644
--- a/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/SerializableScanTest.java
+++ b/sdks/java/io/hbase/src/test/java/org/apache/beam/sdk/io/hbase/SerializableScanTest.java
@@ -21,7 +21,7 @@
import static org.junit.Assert.assertNotNull;
import java.nio.charset.StandardCharsets;
-import org.apache.commons.lang3.SerializationUtils;
+import org.apache.beam.repackaged.core.org.apache.commons.lang3.SerializationUtils;
import org.apache.hadoop.hbase.client.Scan;
import org.junit.Rule;
import org.junit.Test;
diff --git a/sdks/java/io/hcatalog/build.gradle b/sdks/java/io/hcatalog/build.gradle
index e32277d..e0b43a2 100644
--- a/sdks/java/io/hcatalog/build.gradle
+++ b/sdks/java/io/hcatalog/build.gradle
@@ -59,7 +59,7 @@
exclude group: "com.google.protobuf", module: "protobuf-java"
}
testCompile project(":sdks:java:io:common").sourceSets.test.output
- testCompile library.java.commons_io_2x
+ testCompile library.java.commons_io
testCompile library.java.junit
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
diff --git a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOTest.java b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOTest.java
index 046c061..72985cb 100644
--- a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOTest.java
+++ b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOTest.java
@@ -21,8 +21,8 @@
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java
index 37cf836..2c0ad51 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java
@@ -17,7 +17,7 @@
*/
package org.apache.beam.sdk.io.kinesis;
-import static org.apache.commons.lang.builder.HashCodeBuilder.reflectionHashCode;
+import static org.apache.commons.lang3.builder.HashCodeBuilder.reflectionHashCode;
import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber;
import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord;
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
index 10de1df..5abe605 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
@@ -23,7 +23,7 @@
import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_SEQUENCE_NUMBER;
import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_TIMESTAMP;
import static org.assertj.core.api.Assertions.assertThat;
-import static org.mockito.Matchers.anyString;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Matchers.isNull;
import static org.mockito.Mockito.mock;
diff --git a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBIOIT.java b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBIOIT.java
index 39af3af..121f538 100644
--- a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBIOIT.java
+++ b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBIOIT.java
@@ -21,7 +21,8 @@
import static org.apache.beam.sdk.io.common.IOITHelper.getHashForRecordCount;
import com.google.cloud.Timestamp;
-import com.mongodb.MongoClient;
+import com.mongodb.client.MongoClient;
+import com.mongodb.client.MongoClients;
import java.util.Date;
import java.util.HashSet;
import java.util.Map;
@@ -48,6 +49,7 @@
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
import org.bson.Document;
+import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
@@ -80,6 +82,11 @@
private static final String NAMESPACE = MongoDBIOIT.class.getName();
private static String bigQueryDataset;
private static String bigQueryTable;
+ private static String mongoUrl;
+ private static MongoClient mongoClient;
+
+ private double initialCollectionSize;
+ private double finalCollectionSize;
/** MongoDBIOIT options. */
public interface MongoDBPipelineOptions extends IOTestPipelineOptions {
@@ -121,6 +128,15 @@
collection = String.format("test_%s", new Date().getTime());
bigQueryDataset = options.getBigQueryDataset();
bigQueryTable = options.getBigQueryTable();
+ mongoUrl =
+ String.format("mongodb://%s:%s", options.getMongoDBHostName(), options.getMongoDBPort());
+ mongoClient = MongoClients.create(mongoUrl);
+ }
+
+ @After
+ public void cleanUp() {
+ initialCollectionSize = -1d;
+ finalCollectionSize = -1d;
}
@AfterClass
@@ -128,16 +144,13 @@
executeWithRetry(MongoDBIOIT::dropDatabase);
}
- public static void dropDatabase() throws Exception {
- new MongoClient(options.getMongoDBHostName())
- .getDatabase(options.getMongoDBDatabaseName())
- .drop();
+ public static void dropDatabase() {
+ mongoClient.getDatabase(options.getMongoDBDatabaseName()).drop();
}
@Test
public void testWriteAndRead() {
- final String mongoUrl =
- String.format("mongodb://%s:%s", options.getMongoDBHostName(), options.getMongoDBPort());
+ initialCollectionSize = getCollectionSizeInBytes(collection);
writePipeline
.apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords()))
@@ -152,6 +165,8 @@
PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
+ finalCollectionSize = getCollectionSizeInBytes(collection);
+
PCollection<String> consolidatedHashcode =
readPipeline
.apply(
@@ -172,6 +187,15 @@
collectAndPublishMetrics(writeResult, readResult);
}
+ private double getCollectionSizeInBytes(final String collectionName) {
+ return mongoClient.getDatabase(options.getMongoDBDatabaseName())
+ .runCommand(new Document("collStats", collectionName)).entrySet().stream()
+ .filter(entry -> entry.getKey().equals("size"))
+ .map(entry -> Double.parseDouble(String.valueOf(entry.getValue())))
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("Unable to retrieve collection stats"));
+ }
+
private void collectAndPublishMetrics(PipelineResult writeResult, PipelineResult readResult) {
String uuid = UUID.randomUUID().toString();
String timestamp = Timestamp.now().toString();
@@ -188,30 +212,37 @@
}
private Set<Function<MetricsReader, NamedTestResult>> getWriteSuppliers(
- String uuid, String timestamp) {
- Set<Function<MetricsReader, NamedTestResult>> suppliers = new HashSet<>();
+ final String uuid, final String timestamp) {
+ final Set<Function<MetricsReader, NamedTestResult>> suppliers = new HashSet<>();
+ suppliers.add(getTimeMetric(uuid, timestamp, "write_time"));
suppliers.add(
- reader -> {
- long writeStart = reader.getStartTimeMetric("write_time");
- long writeEnd = reader.getEndTimeMetric("write_time");
- return NamedTestResult.create(
- uuid, timestamp, "write_time", (writeEnd - writeStart) / 1e3);
- });
+ reader -> NamedTestResult.create(uuid, timestamp, "data_size", getWrittenDataSize()));
return suppliers;
}
+ private double getWrittenDataSize() {
+ if (initialCollectionSize == -1d || finalCollectionSize == -1d) {
+ throw new IllegalStateException("Collection size not fetched");
+ }
+ return finalCollectionSize - initialCollectionSize;
+ }
+
private Set<Function<MetricsReader, NamedTestResult>> getReadSuppliers(
String uuid, String timestamp) {
Set<Function<MetricsReader, NamedTestResult>> suppliers = new HashSet<>();
- suppliers.add(
- reader -> {
- long readStart = reader.getStartTimeMetric("read_time");
- long readEnd = reader.getEndTimeMetric("read_time");
- return NamedTestResult.create(uuid, timestamp, "read_time", (readEnd - readStart) / 1e3);
- });
+ suppliers.add(getTimeMetric(uuid, timestamp, "read_time"));
return suppliers;
}
+ private Function<MetricsReader, NamedTestResult> getTimeMetric(
+ final String uuid, final String timestamp, final String metricName) {
+ return reader -> {
+ long writeStart = reader.getStartTimeMetric(metricName);
+ long writeEnd = reader.getEndTimeMetric(metricName);
+ return NamedTestResult.create(uuid, timestamp, metricName, (writeEnd - writeStart) / 1e3);
+ };
+ }
+
private static class LongToDocumentFn extends SimpleFunction<Long, Document> {
@Override
public Document apply(Long input) {
diff --git a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java
index 726acf6..bef77e4 100644
--- a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java
+++ b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java
@@ -28,6 +28,7 @@
import java.nio.channels.WritableByteChannel;
import javax.annotation.Nullable;
import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.coders.AvroCoder;
@@ -73,6 +74,11 @@
*
* <p>As {@link Read} is based on {@link FileIO}, it supports any filesystem (hdfs, ...).
*
+ * <p>When using schemas created via reflection, it may be useful to generate {@link GenericRecord}
+ * instances rather than instances of the class associated with the schema. {@link Read} and {@link
+ * ReadFiles} provide {@link ParquetIO.Read#withAvroDataModel(GenericData)} allowing implementations
+ * to set the data model associated with the {@link AvroParquetReader}
+ *
* <p>For more advanced use cases, like reading each file in a {@link PCollection} of {@link
* FileIO.ReadableFile}, use the {@link ReadFiles} transform.
*
@@ -144,6 +150,9 @@
@Nullable
abstract Schema getSchema();
+ @Nullable
+ abstract GenericData getAvroDataModel();
+
abstract Builder toBuilder();
@AutoValue.Builder
@@ -152,6 +161,8 @@
abstract Builder setSchema(Schema schema);
+ abstract Builder setAvroDataModel(GenericData model);
+
abstract Read build();
}
@@ -165,6 +176,13 @@
return from(ValueProvider.StaticValueProvider.of(filepattern));
}
+ /**
+ * Define the Avro data model; see {@link AvroParquetReader.Builder#withDataModel(GenericData)}.
+ */
+ public Read withAvroDataModel(GenericData model) {
+ return toBuilder().setAvroDataModel(model).build();
+ }
+
@Override
public PCollection<GenericRecord> expand(PBegin input) {
checkNotNull(getFilepattern(), "Filepattern cannot be null.");
@@ -173,7 +191,7 @@
.apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of()))
.apply(FileIO.matchAll())
.apply(FileIO.readMatches())
- .apply(readFiles(getSchema()));
+ .apply(readFiles(getSchema()).withAvroDataModel(getAvroDataModel()));
}
@Override
@@ -192,21 +210,43 @@
@Nullable
abstract Schema getSchema();
+ @Nullable
+ abstract GenericData getAvroDataModel();
+
+ abstract Builder toBuilder();
+
@AutoValue.Builder
abstract static class Builder {
abstract Builder setSchema(Schema schema);
+ abstract Builder setAvroDataModel(GenericData model);
+
abstract ReadFiles build();
}
+ /**
+ * Define the Avro data model; see {@link AvroParquetReader.Builder#withDataModel(GenericData)}.
+ */
+ public ReadFiles withAvroDataModel(GenericData model) {
+ return toBuilder().setAvroDataModel(model).build();
+ }
+
@Override
public PCollection<GenericRecord> expand(PCollection<FileIO.ReadableFile> input) {
checkNotNull(getSchema(), "Schema can not be null");
- return input.apply(ParDo.of(new ReadFn())).setCoder(AvroCoder.of(getSchema()));
+ return input
+ .apply(ParDo.of(new ReadFn(getAvroDataModel())))
+ .setCoder(AvroCoder.of(getSchema()));
}
static class ReadFn extends DoFn<FileIO.ReadableFile, GenericRecord> {
+ private Class<? extends GenericData> modelClass;
+
+ ReadFn(GenericData model) {
+ this.modelClass = model != null ? model.getClass() : null;
+ }
+
@ProcessElement
public void processElement(ProcessContext processContext) throws Exception {
FileIO.ReadableFile file = processContext.element();
@@ -218,9 +258,14 @@
SeekableByteChannel seekableByteChannel = file.openSeekable();
- try (ParquetReader<GenericRecord> reader =
- AvroParquetReader.<GenericRecord>builder(new BeamParquetInputFile(seekableByteChannel))
- .build()) {
+ AvroParquetReader.Builder builder =
+ AvroParquetReader.<GenericRecord>builder(new BeamParquetInputFile(seekableByteChannel));
+ if (modelClass != null) {
+ // all GenericData implementations have a static get method
+ builder = builder.withDataModel((GenericData) modelClass.getMethod("get").invoke(null));
+ }
+
+ try (ParquetReader<GenericRecord> reader = builder.build()) {
GenericRecord read;
while ((read = reader.read()) != null) {
processContext.output(read);
diff --git a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java
index ed80f90..6840260 100644
--- a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java
+++ b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java
@@ -23,8 +23,10 @@
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.avro.reflect.ReflectData;
import org.apache.beam.sdk.coders.AvroCoder;
import org.apache.beam.sdk.io.FileIO;
import org.apache.beam.sdk.testing.PAssert;
@@ -126,4 +128,57 @@
Assert.assertThat(displayData, hasDisplayItem("filePattern", "foo.parquet"));
}
+
+ public static class TestRecord {
+ String name;
+
+ public TestRecord(String name) {
+ this.name = name;
+ }
+ }
+
+ @Test(expected = org.apache.beam.sdk.Pipeline.PipelineExecutionException.class)
+ public void testWriteAndReadUsingReflectDataSchemaWithoutDataModelThrowsException() {
+ Schema testRecordSchema = ReflectData.get().getSchema(TestRecord.class);
+
+ List<GenericRecord> records = generateGenericRecords(1000);
+ mainPipeline
+ .apply(Create.of(records).withCoder(AvroCoder.of(testRecordSchema)))
+ .apply(
+ FileIO.<GenericRecord>write()
+ .via(ParquetIO.sink(testRecordSchema))
+ .to(temporaryFolder.getRoot().getAbsolutePath()));
+ mainPipeline.run().waitUntilFinish();
+
+ PCollection<GenericRecord> readBack =
+ readPipeline.apply(
+ ParquetIO.read(testRecordSchema)
+ .from(temporaryFolder.getRoot().getAbsolutePath() + "/*"));
+
+ PAssert.that(readBack).containsInAnyOrder(records);
+ readPipeline.run().waitUntilFinish();
+ }
+
+ @Test
+ public void testWriteAndReadUsingReflectDataSchemaWithDataModel() {
+ Schema testRecordSchema = ReflectData.get().getSchema(TestRecord.class);
+
+ List<GenericRecord> records = generateGenericRecords(1000);
+ mainPipeline
+ .apply(Create.of(records).withCoder(AvroCoder.of(testRecordSchema)))
+ .apply(
+ FileIO.<GenericRecord>write()
+ .via(ParquetIO.sink(testRecordSchema))
+ .to(temporaryFolder.getRoot().getAbsolutePath()));
+ mainPipeline.run().waitUntilFinish();
+
+ PCollection<GenericRecord> readBack =
+ readPipeline.apply(
+ ParquetIO.read(testRecordSchema)
+ .withAvroDataModel(GenericData.get())
+ .from(temporaryFolder.getRoot().getAbsolutePath() + "/*"));
+
+ PAssert.that(readBack).containsInAnyOrder(records);
+ readPipeline.run().waitUntilFinish();
+ }
}
diff --git a/sdks/java/testing/nexmark/build.gradle b/sdks/java/testing/nexmark/build.gradle
index c294c8e..290a62c 100644
--- a/sdks/java/testing/nexmark/build.gradle
+++ b/sdks/java/testing/nexmark/build.gradle
@@ -67,7 +67,6 @@
compile library.java.avro
compile library.java.joda_time
compile library.java.slf4j_api
- compile library.java.commons_lang3
compile library.java.kafka_clients
provided library.java.junit
provided library.java.hamcrest_core
diff --git a/sdks/java/testing/test-utils/build.gradle b/sdks/java/testing/test-utils/build.gradle
index 45b007d..c184d9c 100644
--- a/sdks/java/testing/test-utils/build.gradle
+++ b/sdks/java/testing/test-utils/build.gradle
@@ -29,6 +29,8 @@
compile project(path: ":sdks:java:core", configuration: "shadow")
compile library.java.vendored_guava_26_0_jre
compile library.java.google_cloud_bigquery
+ // google_cloud_bigquery is not declaring google_http_client_jackson dependency correctly
+ compile library.java.google_http_client_jackson
compile project(":sdks:java:extensions:google-cloud-platform-core")
testCompile library.java.junit
diff --git a/sdks/python/.pylintrc b/sdks/python/.pylintrc
index bb404d1..fb226b7 100644
--- a/sdks/python/.pylintrc
+++ b/sdks/python/.pylintrc
@@ -143,6 +143,7 @@
unnecessary-lambda,
unnecessary-pass,
unneeded-not,
+ unsubscriptable-object,
unused-argument,
unused-wildcard-import,
useless-object-inheritance,
@@ -178,6 +179,7 @@
ignore-long-lines=(?x)
(^\s*(import|from)\s
|^\s*(\#\ )?<?(https?|ftp):\/\/[^\s\/$.?#].[^\s]*>?$
+ |^.*\#\ type\:
)
[VARIABLES]
diff --git a/sdks/python/apache_beam/coders/avro_record.py b/sdks/python/apache_beam/coders/avro_record.py
index a5b8b60..c551c90 100644
--- a/sdks/python/apache_beam/coders/avro_record.py
+++ b/sdks/python/apache_beam/coders/avro_record.py
@@ -17,6 +17,8 @@
"""AvroRecord for AvroGenericCoder."""
+# pytype: skip-file
+
from __future__ import absolute_import
__all__ = ['AvroRecord']
diff --git a/sdks/python/apache_beam/coders/coder_impl.pxd b/sdks/python/apache_beam/coders/coder_impl.pxd
index e4b2832..6659078 100644
--- a/sdks/python/apache_beam/coders/coder_impl.pxd
+++ b/sdks/python/apache_beam/coders/coder_impl.pxd
@@ -211,5 +211,12 @@
cpdef encode_to_stream(self, value, OutputStream stream, bint nested)
+cdef class ParamWindowedValueCoderImpl(WindowedValueCoderImpl):
+ """A coder for windowed values with constant timestamp, windows and pane info."""
+ cdef readonly libc.stdint.int64_t _timestamp
+ cdef readonly object _windows
+ cdef readonly windowed_value.PaneInfo _pane_info
+
+
cdef class LengthPrefixCoderImpl(StreamCoderImpl):
cdef CoderImpl _value_coder
diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py
index 561d36d..fb76875 100644
--- a/sdks/python/apache_beam/coders/coder_impl.py
+++ b/sdks/python/apache_beam/coders/coder_impl.py
@@ -31,6 +31,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -38,6 +40,15 @@
from builtins import chr
from builtins import object
from io import BytesIO
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import Iterable
+from typing import Iterator
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
from fastavro import parse_schema
from fastavro import schemaless_reader
@@ -52,8 +63,31 @@
from apache_beam.utils.timestamp import MIN_TIMESTAMP
from apache_beam.utils.timestamp import Timestamp
-# pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
+if TYPE_CHECKING:
+ from apache_beam.transforms.window import IntervalWindow
+
try:
+ from . import stream # pylint: disable=unused-import
+except ImportError:
+ SLOW_STREAM = True
+else:
+ SLOW_STREAM = False
+
+if TYPE_CHECKING or SLOW_STREAM:
+ from .slow_stream import InputStream as create_InputStream
+ from .slow_stream import OutputStream as create_OutputStream
+ from .slow_stream import ByteCountingOutputStream
+ from .slow_stream import get_varint_size
+
+ if False: # pylint: disable=using-constant-test
+ # This clause is interpreted by the compiler.
+ from cython import compiled as is_compiled
+ else:
+ is_compiled = False
+ fits_in_64_bits = lambda x: -(1 << 63) <= x <= (1 << 63) - 1
+
+else:
+ # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
from .stream import InputStream as create_InputStream
from .stream import OutputStream as create_OutputStream
from .stream import ByteCountingOutputStream
@@ -63,64 +97,65 @@
globals()['create_InputStream'] = create_InputStream
globals()['create_OutputStream'] = create_OutputStream
globals()['ByteCountingOutputStream'] = ByteCountingOutputStream
-except ImportError:
- from .slow_stream import InputStream as create_InputStream
- from .slow_stream import OutputStream as create_OutputStream
- from .slow_stream import ByteCountingOutputStream
- from .slow_stream import get_varint_size
- if False: # pylint: disable=using-constant-test
- # This clause is interpreted by the compiler.
- from cython import compiled as is_compiled
- else:
- is_compiled = False
- fits_in_64_bits = lambda x: -(1 << 63) <= x <= (1 << 63) - 1
-# pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
+ # pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
_TIME_SHIFT = 1 << 63
MIN_TIMESTAMP_micros = MIN_TIMESTAMP.micros
MAX_TIMESTAMP_micros = MAX_TIMESTAMP.micros
+IterableStateReader = Callable[[bytes, 'CoderImpl'], Iterable]
+IterableStateWriter = Callable[[Iterable, 'CoderImpl'], bytes]
+Observables = List[Tuple[observable.ObservableMixin, 'CoderImpl']]
class CoderImpl(object):
"""For internal use only; no backwards-compatibility guarantees."""
def encode_to_stream(self, value, stream, nested):
+ # type: (Any, create_OutputStream, bool) -> None
"""Reads object from potentially-nested encoding in stream."""
raise NotImplementedError
def decode_from_stream(self, stream, nested):
+ # type: (create_InputStream, bool) -> Any
"""Reads object from potentially-nested encoding in stream."""
raise NotImplementedError
def encode(self, value):
+ # type: (Any) -> bytes
"""Encodes an object to an unnested string."""
raise NotImplementedError
def decode(self, encoded):
+ # type: (bytes) -> Any
"""Decodes an object to an unnested string."""
raise NotImplementedError
def encode_all(self, values):
+ # type: (Iterable[Any]) -> bytes
out = create_OutputStream()
for value in values:
self.encode_to_stream(value, out, True)
return out.get()
def decode_all(self, encoded):
+ # type: (bytes) -> Iterator[Any]
input_stream = create_InputStream(encoded)
while input_stream.size() > 0:
yield self.decode_from_stream(input_stream, True)
def encode_nested(self, value):
+ # type: (Any) -> bytes
out = create_OutputStream()
self.encode_to_stream(value, out, True)
return out.get()
def decode_nested(self, encoded):
+ # type: (bytes) -> Any
return self.decode_from_stream(create_InputStream(encoded), True)
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
"""Estimates the encoded size of the given value, in bytes."""
out = ByteCountingOutputStream()
self.encode_to_stream(value, out, nested)
@@ -133,6 +168,7 @@
return varint_size + inner_size
def get_estimated_size_and_observables(self, value, nested=False):
+ # type: (Any, bool) -> Tuple[int, Observables]
"""Returns estimated size of value along with any nested observables.
The list of nested observables is returned as a list of 2-tuples of
@@ -157,10 +193,12 @@
Subclass of CoderImpl implementing stream methods using encode/decode."""
def encode_to_stream(self, value, stream, nested):
+ # type: (Any, create_OutputStream, bool) -> None
"""Reads object from potentially-nested encoding in stream."""
stream.write(self.encode(value), nested)
def decode_from_stream(self, stream, nested):
+ # type: (create_InputStream, bool) -> Any
"""Reads object from potentially-nested encoding in stream."""
return self.decode(stream.read_all(nested))
@@ -171,14 +209,17 @@
Subclass of CoderImpl implementing encode/decode using stream methods."""
def encode(self, value):
+ # type: (Any) -> bytes
out = create_OutputStream()
self.encode_to_stream(value, out, False)
return out.get()
def decode(self, encoded):
+ # type: (bytes) -> Any
return self.decode_from_stream(create_InputStream(encoded), False)
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
"""Estimates the encoded size of the given value, in bytes."""
out = ByteCountingOutputStream()
self.encode_to_stream(value, out, nested)
@@ -203,9 +244,11 @@
return len(self.encode(value))
def encode_to_stream(self, value, stream, nested):
+ # type: (Any, create_OutputStream, bool) -> None
return stream.write(self._encoder(value), nested)
def decode_from_stream(self, stream, nested):
+ # type: (create_InputStream, bool) -> Any
return self._decoder(stream.read_all(nested))
def encode(self, value):
@@ -215,9 +258,11 @@
return self._decoder(encoded)
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
return self._get_nested_size(self._size_estimator(value), nested)
def get_estimated_size_and_observables(self, value, nested=False):
+ # type: (Any, bool) -> Tuple[int, Observables]
# TODO(robertwb): Remove this once all coders are correct.
if isinstance(value, observable.ObservableMixin):
# CallbackCoderImpl can presumably encode the elements too.
@@ -252,10 +297,12 @@
value, type(value), self._step_label))
def encode_to_stream(self, value, stream, nested):
+ # type: (Any, create_OutputStream, bool) -> None
self._check_safe(value)
return self._underlying_coder.encode_to_stream(value, stream, nested)
def decode_from_stream(self, stream, nested):
+ # type: (create_InputStream, bool) -> Any
return self._underlying_coder.decode_from_stream(stream, nested)
def encode(self, value):
@@ -266,9 +313,11 @@
return self._underlying_coder.decode(encoded)
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
return self._underlying_coder.estimate_size(value, nested)
def get_estimated_size_and_observables(self, value, nested=False):
+ # type: (Any, bool) -> Tuple[int, Observables]
return self._underlying_coder.get_estimated_size_and_observables(
value, nested)
@@ -328,6 +377,7 @@
_ITERABLE_LIKE_TYPES.add(t)
def get_estimated_size_and_observables(self, value, nested=False):
+ # type: (Any, bool) -> Tuple[int, Observables]
if isinstance(value, observable.ObservableMixin):
# FastPrimitivesCoderImpl can presumably encode the elements too.
return 1, [(value, self)]
@@ -337,6 +387,7 @@
return out.get_count(), []
def encode_to_stream(self, value, stream, nested):
+ # type: (Any, create_OutputStream, bool) -> None
t = type(value)
if value is None:
stream.write_byte(NONE_TYPE)
@@ -349,7 +400,7 @@
# when value does not fit into int64.
int_value = value
# If Cython is not used, we must do a (slower) check ourselves.
- if not is_compiled:
+ if not TYPE_CHECKING and not is_compiled:
if not fits_in_64_bits(value):
raise OverflowError()
stream.write_byte(INT_TYPE)
@@ -391,6 +442,7 @@
self.fallback_coder_impl.encode_to_stream(value, stream, nested)
def decode_from_stream(self, stream, nested):
+ # type: (create_InputStream, bool) -> Any
t = stream.read_byte()
if t == NONE_TYPE:
return None
@@ -433,9 +485,11 @@
A coder for bytes/str objects."""
def encode_to_stream(self, value, out, nested):
+ # type: (bytes, create_OutputStream, bool) -> None
out.write(value, nested)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> bytes
return in_stream.read_all(nested)
def encode(self, value):
@@ -482,17 +536,21 @@
"""For internal use only; no backwards-compatibility guarantees."""
def encode_to_stream(self, value, out, nested):
+ # type: (float, create_OutputStream, bool) -> None
out.write_bigendian_double(value)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> float
return in_stream.read_bigendian_double()
def estimate_size(self, unused_value, nested=False):
+ # type: (Any, bool) -> int
# A double is encoded as 8 bytes, regardless of nesting.
return 8
-IntervalWindow = None
+if not TYPE_CHECKING:
+ IntervalWindow = None
class IntervalWindowCoderImpl(StreamCoderImpl):
@@ -508,6 +566,7 @@
return value + _TIME_SHIFT
def encode_to_stream(self, value, out, nested):
+ # type: (IntervalWindow, create_OutputStream, bool) -> None
typed_value = value
span_millis = (typed_value._end_micros // 1000
- typed_value._start_micros // 1000)
@@ -516,10 +575,13 @@
out.write_var_int64(span_millis)
def decode_from_stream(self, in_, nested):
- global IntervalWindow
- if IntervalWindow is None:
- from apache_beam.transforms.window import IntervalWindow
- typed_value = IntervalWindow(None, None)
+ # type: (create_InputStream, bool) -> IntervalWindow
+ if not TYPE_CHECKING:
+ global IntervalWindow
+ if IntervalWindow is None:
+ from apache_beam.transforms.window import IntervalWindow
+ # instantiating with None is not part of the public interface
+ typed_value = IntervalWindow(None, None) # type: ignore[arg-type]
typed_value._end_micros = (
1000 * self._to_normal_time(in_.read_bigendian_uint64()))
typed_value._start_micros = (
@@ -527,6 +589,7 @@
return typed_value
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
# An IntervalWindow is context-insensitive, with a timestamp (8 bytes)
# and a varint timespam.
typed_value = value
@@ -545,6 +608,7 @@
"""
def encode_to_stream(self, value, out, nested):
+ # type: (Timestamp, create_OutputStream, bool) -> None
millis = value.micros // 1000
if millis >= 0:
millis = millis - _TIME_SHIFT
@@ -553,6 +617,7 @@
out.write_bigendian_int64(millis)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> Timestamp
millis = in_stream.read_bigendian_int64()
if millis < 0:
millis = millis + _TIME_SHIFT
@@ -573,10 +638,12 @@
self._payload_coder_impl = payload_coder_impl
def encode_to_stream(self, value, out, nested):
+ # type: (dict, create_OutputStream, bool) -> None
self._timestamp_coder_impl.encode_to_stream(value['timestamp'], out, True)
self._payload_coder_impl.encode_to_stream(value.get('payload'), out, True)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> dict
# TODO(robertwb): Consider using a concrete class rather than a dict here.
return dict(
timestamp=self._timestamp_coder_impl.decode_from_stream(
@@ -593,9 +660,11 @@
A coder for long/int objects."""
def encode_to_stream(self, value, out, nested):
+ # type: (int, create_OutputStream, bool) -> None
out.write_var_int64(value)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> int
return in_stream.read_var_int64()
def encode(self, value):
@@ -612,6 +681,7 @@
return StreamCoderImpl.decode(self, encoded)
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
# Note that VarInts are encoded the same way regardless of nesting.
return get_varint_size(value)
@@ -625,9 +695,11 @@
self._value = value
def encode_to_stream(self, value, stream, nested):
+ # type: (Any, create_OutputStream, bool) -> None
pass
def decode_from_stream(self, stream, nested):
+ # type: (create_InputStream, bool) -> Any
return self._value
def encode(self, value):
@@ -638,6 +710,7 @@
return self._value
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
return 0
@@ -658,6 +731,7 @@
raise NotImplementedError
def encode_to_stream(self, value, out, nested):
+ # type: (Any, create_OutputStream, bool) -> None
values = self._extract_components(value)
if len(self._coder_impls) != len(values):
raise ValueError(
@@ -668,12 +742,14 @@
nested or i + 1 < len(self._coder_impls))
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> Any
return self._construct_from_components(
[c.decode_from_stream(in_stream,
nested or i + 1 < len(self._coder_impls))
for i, c in enumerate(self._coder_impls)])
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
"""Estimates the encoded size of the given value, in bytes."""
# TODO(ccy): This ignores sizes of observable components.
estimated_size, _ = (
@@ -681,10 +757,11 @@
return estimated_size
def get_estimated_size_and_observables(self, value, nested=False):
+ # type: (Any, bool) -> Tuple[int, Observables]
"""Returns estimated size of value along with any nested observables."""
values = self._extract_components(value)
estimated_size = 0
- observables = []
+ observables = [] # type: Observables
for i in range(0, len(self._coder_impls)):
c = self._coder_impls[i] # type cast
child_size, child_observables = (
@@ -724,10 +801,12 @@
class _ConcatSequence(object):
def __init__(self, head, tail):
+ # type: (Iterable[Any], Iterable[Any]) -> None
self._head = head
self._tail = tail
def __iter__(self):
+ # type: () -> Iterator[Any]
for elem in self._head:
yield elem
for elem in self._tail:
@@ -782,8 +861,12 @@
# Default buffer size of 64kB of handling iterables of unknown length.
_DEFAULT_BUFFER_SIZE = 64 * 1024
- def __init__(self, elem_coder,
- read_state=None, write_state=None, write_state_threshold=0):
+ def __init__(self,
+ elem_coder, # type: CoderImpl
+ read_state=None, # type: Optional[IterableStateReader]
+ write_state=None, # type: Optional[IterableStateWriter]
+ write_state_threshold=0 # type: int
+ ):
self._elem_coder = elem_coder
self._read_state = read_state
self._write_state = write_state
@@ -793,6 +876,7 @@
raise NotImplementedError
def encode_to_stream(self, value, out, nested):
+ # type: (Sequence, create_OutputStream, bool) -> None
# Compatible with Java's IterableLikeCoder.
if hasattr(value, '__len__') and self._write_state is None:
out.write_bigendian_int32(len(value))
@@ -838,11 +922,12 @@
out.write_var_int64(0)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> Sequence
size = in_stream.read_bigendian_int32()
if size >= 0:
elements = [self._elem_coder.decode_from_stream(in_stream, True)
- for _ in range(size)]
+ for _ in range(size)] # type: Iterable[Any]
else:
elements = []
count = in_stream.read_var_int64()
@@ -863,6 +948,7 @@
return self._construct_from_sequence(elements)
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
"""Estimates the encoded size of the given value, in bytes."""
# TODO(ccy): This ignores element sizes.
estimated_size, _ = (
@@ -870,6 +956,7 @@
return estimated_size
def get_estimated_size_and_observables(self, value, nested=False):
+ # type: (Any, bool) -> Tuple[int, Observables]
"""Returns estimated size of value along with any nested observables."""
estimated_size = 0
# Size of 32-bit integer storing number of elements.
@@ -877,7 +964,7 @@
if isinstance(value, observable.ObservableMixin):
return estimated_size, [(value, self._elem_coder)]
- observables = []
+ observables = [] # type: Observables
for elem in value:
child_size, child_observables = (
self._elem_coder.get_estimated_size_and_observables(
@@ -948,6 +1035,7 @@
return PaneInfoEncoding.TWO_INDICES
def encode_to_stream(self, value, out, nested):
+ # type: (windowed_value.PaneInfo, create_OutputStream, bool) -> None
pane_info = value # cast
encoding_type = self._choose_encoding(pane_info)
out.write_byte(pane_info._encoded_byte | (encoding_type << 4))
@@ -962,6 +1050,7 @@
raise NotImplementedError('Invalid PaneInfoEncoding: %s' % encoding_type)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> windowed_value.PaneInfo
encoded_first_byte = in_stream.read_byte()
base = windowed_value._BYTE_TO_PANE_INFO[encoded_first_byte & 0xF]
assert base is not None
@@ -983,6 +1072,7 @@
base.is_first, base.is_last, base.timing, index, nonspeculative_index)
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
"""Estimates the encoded size of the given value, in bytes."""
size = 1
encoding_type = self._choose_encoding(value)
@@ -1019,6 +1109,7 @@
self._pane_info_coder = PaneInfoCoderImpl()
def encode_to_stream(self, value, out, nested):
+ # type: (windowed_value.WindowedValue, create_OutputStream, bool) -> None
wv = value # type cast
# Avoid creation of Timestamp object.
restore_sign = -1 if wv.timestamp_micros < 0 else 1
@@ -1041,6 +1132,7 @@
self._value_coder.encode_to_stream(wv.value, out, nested)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> windowed_value.WindowedValue
timestamp = self._to_normal_time(in_stream.read_bigendian_uint64())
# Restore MIN/MAX timestamps to their actual values as encoding incurs loss
# of precision while converting to millis.
@@ -1067,13 +1159,14 @@
pane_info)
def get_estimated_size_and_observables(self, value, nested=False):
+ # type: (Any, bool) -> Tuple[int, Observables]
"""Returns estimated size of value along with any nested observables."""
if isinstance(value, observable.ObservableMixin):
# Should never be here.
# TODO(robertwb): Remove when coders are set correctly.
return 0, [(value, self._value_coder)]
estimated_size = 0
- observables = []
+ observables = [] # type: Observables
value_estimated_size, value_observables = (
self._value_coder.get_estimated_size_and_observables(
value.value, nested=nested))
@@ -1088,23 +1181,76 @@
return estimated_size, observables
+class ParamWindowedValueCoderImpl(WindowedValueCoderImpl):
+ """For internal use only; no backwards-compatibility guarantees.
+
+ A coder for windowed values with constant timestamp, windows and
+ pane info. The coder drops timestamp, windows and pane info during
+ encoding, and uses the supplied parameterized timestamp, windows
+ and pane info values during decoding when reconstructing the windowed
+ value."""
+
+ def __init__(self, value_coder, window_coder, payload):
+ super(ParamWindowedValueCoderImpl, self).__init__(
+ value_coder, TimestampCoderImpl(), window_coder)
+ self._timestamp, self._windows, self._pane_info = self._from_proto(
+ payload, window_coder)
+
+ def _from_proto(self, payload, window_coder):
+ windowed_value_coder = WindowedValueCoderImpl(
+ BytesCoderImpl(), TimestampCoderImpl(), window_coder)
+ wv = windowed_value_coder.decode(payload)
+ return wv.timestamp_micros, wv.windows, wv.pane_info
+
+ def encode_to_stream(self, value, out, nested):
+ wv = value # type cast
+ self._value_coder.encode_to_stream(wv.value, out, nested)
+
+ def decode_from_stream(self, in_stream, nested):
+ value = self._value_coder.decode_from_stream(in_stream, nested)
+ return windowed_value.create(
+ value,
+ self._timestamp,
+ self._windows,
+ self._pane_info)
+
+ def get_estimated_size_and_observables(self, value, nested=False):
+ """Returns estimated size of value along with any nested observables."""
+ if isinstance(value, observable.ObservableMixin):
+ # Should never be here.
+ # TODO(robertwb): Remove when coders are set correctly.
+ return 0, [(value, self._value_coder)]
+ estimated_size = 0
+ observables = []
+ value_estimated_size, value_observables = (
+ self._value_coder.get_estimated_size_and_observables(
+ value.value, nested=nested))
+ estimated_size += value_estimated_size
+ observables += value_observables
+ return estimated_size, observables
+
+
class LengthPrefixCoderImpl(StreamCoderImpl):
"""For internal use only; no backwards-compatibility guarantees.
Coder which prefixes the length of the encoded object in the stream."""
def __init__(self, value_coder):
+ # type: (CoderImpl) -> None
self._value_coder = value_coder
def encode_to_stream(self, value, out, nested):
+ # type: (Any, create_OutputStream, bool) -> None
encoded_value = self._value_coder.encode(value)
out.write_var_int64(len(encoded_value))
out.write(encoded_value)
def decode_from_stream(self, in_stream, nested):
+ # type: (create_InputStream, bool) -> Any
value_length = in_stream.read_var_int64()
return self._value_coder.decode(in_stream.read(value_length))
def estimate_size(self, value, nested=False):
+ # type: (Any, bool) -> int
value_size = self._value_coder.estimate_size(value)
return get_varint_size(value_size) + value_size
diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py
index 449e959..bff6dd0 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -19,12 +19,25 @@
Only those coders listed in __all__ are part of the public API of this module.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
import sys
-import typing
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+from typing import overload
import google.protobuf.wrappers_pb2
from future.moves import pickle
@@ -38,6 +51,11 @@
from apache_beam.typehints import typehints
from apache_beam.utils import proto_utils
+if TYPE_CHECKING:
+ from google.protobuf import message # pylint: disable=ungrouped-imports
+ from apache_beam.coders.typecoders import CoderRegistry
+ from apache_beam.runners.pipeline_context import PipelineContext
+
# pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
try:
from .stream import get_varint_size
@@ -64,9 +82,18 @@
'FastPrimitivesCoder', 'FloatCoder', 'IterableCoder', 'PickleCoder',
'ProtoCoder', 'SingletonCoder', 'StrUtf8Coder', 'TimestampCoder',
'TupleCoder', 'TupleSequenceCoder', 'VarIntCoder',
- 'WindowedValueCoder'
+ 'WindowedValueCoder', 'ParamWindowedValueCoder'
]
+T = TypeVar('T')
+CoderT = TypeVar('CoderT', bound='Coder')
+ProtoCoderT = TypeVar('ProtoCoderT', bound='ProtoCoder')
+ConstructorFn = Callable[
+ [Optional[Any],
+ List['Coder'],
+ 'PipelineContext'],
+ Any]
+
def serialize_coder(coder):
from apache_beam.internal import pickler
@@ -84,6 +111,7 @@
"""Base class for coders."""
def encode(self, value):
+ # type: (Any) -> bytes
"""Encodes the given object into a byte string."""
raise NotImplementedError('Encode not implemented: %s.' % self)
@@ -100,6 +128,7 @@
return self.get_impl().decode_nested(encoded)
def is_deterministic(self):
+ # type: () -> bool
"""Whether this coder is guaranteed to encode values deterministically.
A deterministic coder is required for key coders in GroupByKey operations
@@ -152,6 +181,7 @@
# ===========================================================================
def _create_impl(self):
+ # type: () -> coder_impl.CoderImpl
"""Creates a CoderImpl to do the actual encoding and decoding.
"""
return coder_impl.CallbackCoderImpl(self.encode, self.decode,
@@ -182,25 +212,30 @@
@classmethod
def from_type_hint(cls, unused_typehint, unused_registry):
+ # type: (Type[CoderT], Any, CoderRegistry) -> CoderT
# If not overridden, just construct the coder without arguments.
return cls()
def is_kv_coder(self):
+ # () -> bool
return False
def key_coder(self):
+ # type: () -> Coder
if self.is_kv_coder():
raise NotImplementedError('key_coder: %s' % self)
else:
raise ValueError('Not a KV coder: %s.' % self)
def value_coder(self):
+ # type: () -> Coder
if self.is_kv_coder():
raise NotImplementedError('value_coder: %s' % self)
else:
raise ValueError('Not a KV coder: %s.' % self)
def _get_component_coders(self):
+ # type: () -> Sequence[Coder]
"""For internal use only; no backwards-compatibility guarantees.
Returns the internal component coders of this coder."""
@@ -248,7 +283,26 @@
def __hash__(self):
return hash(type(self))
- _known_urns = {}
+ _known_urns = {} # type: Dict[str, Tuple[type, ConstructorFn]]
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: Optional[Type[T]]
+ ):
+ # type: (...) -> Callable[[Callable[[T, List[Coder], PipelineContext], Any]], Callable[[T, List[Coder], PipelineContext], Any]]
+ pass
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: Optional[Type[T]]
+ fn # type: Callable[[T, List[Coder], PipelineContext], Any]
+ ):
+ # type: (...) -> None
+ pass
@classmethod
def register_urn(cls, urn, parameter_type, fn=None):
@@ -274,6 +328,7 @@
return register
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.Coder
urn, typed_param, components = self.to_runner_api_parameter(context)
return beam_runner_api_pb2.Coder(
spec=beam_runner_api_pb2.FunctionSpec(
@@ -285,6 +340,7 @@
@classmethod
def from_runner_api(cls, coder_proto, context):
+ # type: (Type[CoderT], beam_runner_api_pb2.Coder, PipelineContext) -> CoderT
"""Converts from an FunctionSpec to a Fn object.
Prefer registering a urn with its parameter type and constructor.
@@ -299,10 +355,11 @@
context)
except Exception:
if context.allow_proto_holders:
- return RunnerAPICoderHolder(coder_proto)
+ return RunnerAPICoderHolder(coder_proto) # type: ignore # too ambiguous
raise
def to_runner_api_parameter(self, context):
+ # type: (Optional[PipelineContext]) -> Tuple[str, Any, Sequence[Coder]]
return (
python_urns.PICKLED_CODER,
google.protobuf.wrappers_pb2.BytesValue(value=serialize_coder(self)),
@@ -310,6 +367,7 @@
@staticmethod
def register_structured_urn(urn, cls):
+ # type: (str, Type[Coder]) -> None
"""Register a coder that's completely defined by its urn and its
component(s), if any, which are passed to construct the instance.
"""
@@ -403,6 +461,7 @@
return coder_impl.BytesCoderImpl()
def is_deterministic(self):
+ # type: () -> bool
return True
def to_type_hint(self):
@@ -450,6 +509,7 @@
return coder_impl.VarIntCoderImpl()
def is_deterministic(self):
+ # type: () -> bool
return True
def to_type_hint(self):
@@ -477,6 +537,7 @@
return coder_impl.FloatCoderImpl()
def is_deterministic(self):
+ # type: () -> bool
return True
def to_type_hint(self):
@@ -499,6 +560,7 @@
return coder_impl.TimestampCoderImpl()
def is_deterministic(self):
+ # () -> bool
return True
def __eq__(self, other):
@@ -513,15 +575,18 @@
For internal use."""
def __init__(self, payload_coder):
+ # type: (Coder) -> None
self._payload_coder = payload_coder
def _get_component_coders(self):
+ # type: () -> List[Coder]
return [self._payload_coder]
def _create_impl(self):
return coder_impl.TimerCoderImpl(self._payload_coder.get_impl())
def is_deterministic(self):
+ # () -> bool
return self._payload_coder.is_deterministic()
def __eq__(self, other):
@@ -546,6 +611,7 @@
return coder_impl.SingletonCoderImpl(self._value)
def is_deterministic(self):
+ # () -> bool
return True
def __eq__(self, other):
@@ -577,6 +643,7 @@
"""Base class for pickling coders."""
def is_deterministic(self):
+ # () -> bool
# Note that the default coder, the PickleCoder, is not deterministic (for
# example, the ordering of picked entries in maps may vary across
# executions), and so is not in general suitable for usage as a key coder in
@@ -602,6 +669,7 @@
# we can't always infer the return values of lambdas in ParDo operations, the
# result of which may be used in a GroupBykey.
def is_kv_coder(self):
+ # () -> bool
return True
def key_coder(self):
@@ -630,7 +698,7 @@
return DeterministicFastPrimitivesCoder(self, step_label)
def to_type_hint(self):
- return typing.Any
+ return Any
class DillCoder(_PickleCoderBase):
@@ -652,9 +720,11 @@
self._underlying_coder.get_impl(), self._step_label)
def is_deterministic(self):
+ # () -> bool
return True
def is_kv_coder(self):
+ # () -> bool
return True
def key_coder(self):
@@ -664,7 +734,7 @@
return self
def to_type_hint(self):
- return typing.Any
+ return Any
class FastPrimitivesCoder(FastCoder):
@@ -673,6 +743,7 @@
For unknown types, falls back to another coder (e.g. PickleCoder).
"""
def __init__(self, fallback_coder=PickleCoder()):
+ # type: (Coder) -> None
self._fallback_coder = fallback_coder
def _create_impl(self):
@@ -680,6 +751,7 @@
self._fallback_coder.get_impl())
def is_deterministic(self):
+ # () -> bool
return self._fallback_coder.is_deterministic()
def as_deterministic_coder(self, step_label, error_message=None):
@@ -689,7 +761,7 @@
return DeterministicFastPrimitivesCoder(self, step_label)
def to_type_hint(self):
- return typing.Any
+ return Any
def as_cloud_object(self, coders_context=None, is_pair_like=True):
value = super(FastCoder, self).as_cloud_object(coders_context)
@@ -710,6 +782,7 @@
# since we can't always infer the return values of lambdas in ParDo
# operations, the result of which may be used in a GroupBykey.
def is_kv_coder(self):
+ # () -> bool
return True
def key_coder(self):
@@ -737,6 +810,7 @@
return pickle.loads(base64.b64decode(encoded))
def is_deterministic(self):
+ # () -> bool
# Note that the Base64PickleCoder is not deterministic. See the
# corresponding comments for PickleCoder above.
return False
@@ -771,12 +845,14 @@
"""
def __init__(self, proto_message_type):
+ # type: (google.protobuf.message.Message) -> None
self.proto_message_type = proto_message_type
def _create_impl(self):
return coder_impl.ProtoCoderImpl(self.proto_message_type)
def is_deterministic(self):
+ # () -> bool
# TODO(vikasrk): A proto message can be deterministic if it does not contain
# a Map.
return False
@@ -813,6 +889,7 @@
return coder_impl.DeterministicProtoCoderImpl(self.proto_message_type)
def is_deterministic(self):
+ # () -> bool
return True
def as_deterministic_coder(self, step_label, error_message=None):
@@ -857,12 +934,14 @@
"""Coder of tuple objects."""
def __init__(self, components):
+ # type: (Iterable[Coder]) -> None
self._coders = tuple(components)
def _create_impl(self):
return coder_impl.TupleCoderImpl([c.get_impl() for c in self._coders])
def is_deterministic(self):
+ # () -> bool
return all(c.is_deterministic() for c in self._coders)
def as_deterministic_coder(self, step_label, error_message=None):
@@ -877,6 +956,7 @@
@staticmethod
def from_type_hint(typehint, registry):
+ # type: (typehints.TupleConstraint, CoderRegistry) -> TupleCoder
return TupleCoder([registry.get_coder(t) for t in typehint.tuple_types])
def as_cloud_object(self, coders_context=None):
@@ -895,20 +975,25 @@
return super(TupleCoder, self).as_cloud_object(coders_context)
def _get_component_coders(self):
+ # type: () -> Tuple[Coder, ...]
return self.coders()
def coders(self):
+ # type: () -> Tuple[Coder, ...]
return self._coders
def is_kv_coder(self):
+ # () -> bool
return len(self._coders) == 2
def key_coder(self):
+ # type: () -> Coder
if len(self._coders) != 2:
raise ValueError('TupleCoder does not have exactly 2 components.')
return self._coders[0]
def value_coder(self):
+ # type: () -> Coder
if len(self._coders) != 2:
raise ValueError('TupleCoder does not have exactly 2 components.')
return self._coders[1]
@@ -938,6 +1023,7 @@
"""Coder of homogeneous tuple objects."""
def __init__(self, elem_coder):
+ # type: (Coder) -> None
self._elem_coder = elem_coder
def value_coder(self):
@@ -947,6 +1033,7 @@
return coder_impl.TupleSequenceCoderImpl(self._elem_coder.get_impl())
def is_deterministic(self):
+ # () -> bool
return self._elem_coder.is_deterministic()
def as_deterministic_coder(self, step_label, error_message=None):
@@ -958,9 +1045,11 @@
@staticmethod
def from_type_hint(typehint, registry):
+ # type: (Any, CoderRegistry) -> TupleSequenceCoder
return TupleSequenceCoder(registry.get_coder(typehint.inner_type))
def _get_component_coders(self):
+ # type: () -> Tuple[Coder, ...]
return (self._elem_coder,)
def __repr__(self):
@@ -978,12 +1067,14 @@
"""Coder of iterables of homogeneous objects."""
def __init__(self, elem_coder):
+ # type: (Coder) -> None
self._elem_coder = elem_coder
def _create_impl(self):
return coder_impl.IterableCoderImpl(self._elem_coder.get_impl())
def is_deterministic(self):
+ # () -> bool
return self._elem_coder.is_deterministic()
def as_deterministic_coder(self, step_label, error_message=None):
@@ -1012,9 +1103,11 @@
@staticmethod
def from_type_hint(typehint, registry):
+ # type: (Any, CoderRegistry) -> IterableCoder
return IterableCoder(registry.get_coder(typehint.inner_type))
def _get_component_coders(self):
+ # type: () -> Tuple[Coder, ...]
return (self._elem_coder,)
def __repr__(self):
@@ -1055,6 +1148,7 @@
return coder_impl.IntervalWindowCoderImpl()
def is_deterministic(self):
+ # () -> bool
return True
def as_cloud_object(self, coders_context=None):
@@ -1077,6 +1171,7 @@
"""Coder for windowed values."""
def __init__(self, wrapped_value_coder, window_coder=None):
+ # type: (Coder, Optional[Coder]) -> None
if not window_coder:
window_coder = PickleCoder()
self.wrapped_value_coder = wrapped_value_coder
@@ -1090,6 +1185,7 @@
self.window_coder.get_impl())
def is_deterministic(self):
+ # () -> bool
return all(c.is_deterministic() for c in [self.wrapped_value_coder,
self.timestamp_coder,
self.window_coder])
@@ -1107,15 +1203,19 @@
}
def _get_component_coders(self):
+ # type: () -> List[Coder]
return [self.wrapped_value_coder, self.window_coder]
def is_kv_coder(self):
+ # () -> bool
return self.wrapped_value_coder.is_kv_coder()
def key_coder(self):
+ # type: () -> Coder
return self.wrapped_value_coder.key_coder()
def value_coder(self):
+ # type: () -> Coder
return self.wrapped_value_coder.value_coder()
def __repr__(self):
@@ -1136,18 +1236,64 @@
common_urns.coders.WINDOWED_VALUE.urn, WindowedValueCoder)
+class ParamWindowedValueCoder(WindowedValueCoder):
+ """A coder used for parameterized windowed values."""
+
+ def __init__(self, payload, components):
+ super(ParamWindowedValueCoder, self).__init__(components[0], components[1])
+ self.payload = payload
+
+ def _create_impl(self):
+ return coder_impl.ParamWindowedValueCoderImpl(
+ self.wrapped_value_coder.get_impl(),
+ self.window_coder.get_impl(),
+ self.payload)
+
+ def is_deterministic(self):
+ return self.wrapped_value_coder.is_deterministic()
+
+ def as_cloud_object(self, coders_context=None):
+ raise NotImplementedError(
+ "as_cloud_object not supported for ParamWindowedValueCoder")
+
+ def __repr__(self):
+ return 'ParamWindowedValueCoder[%s]' % self.wrapped_value_coder
+
+ def __eq__(self, other):
+ return (type(self) == type(other)
+ and self.wrapped_value_coder == other.wrapped_value_coder
+ and self.window_coder == other.window_coder
+ and self.payload == other.payload)
+
+ def __hash__(self):
+ return hash((self.wrapped_value_coder,
+ self.window_coder,
+ self.payload))
+
+ @Coder.register_urn(common_urns.coders.PARAM_WINDOWED_VALUE.urn, bytes)
+ def from_runner_api_parameter(payload, components, unused_context):
+ return ParamWindowedValueCoder(payload, components)
+
+ def to_runner_api_parameter(self, context):
+ return (common_urns.coders.PARAM_WINDOWED_VALUE.urn,
+ self.payload,
+ (self.wrapped_value_coder, self.window_coder))
+
+
class LengthPrefixCoder(FastCoder):
"""For internal use only; no backwards-compatibility guarantees.
Coder which prefixes the length of the encoded object in the stream."""
def __init__(self, value_coder):
+ # type: (Coder) -> None
self._value_coder = value_coder
def _create_impl(self):
return coder_impl.LengthPrefixCoderImpl(self._value_coder.get_impl())
def is_deterministic(self):
+ # () -> bool
return self._value_coder.is_deterministic()
def estimate_size(self, value):
@@ -1167,6 +1313,7 @@
}
def _get_component_coders(self):
+ # type: () -> Tuple[Coder, ...]
return (self._value_coder,)
def __repr__(self):
@@ -1187,9 +1334,9 @@
class StateBackedIterableCoder(FastCoder):
def __init__(
self,
- element_coder,
- read_state=None,
- write_state=None,
+ element_coder, # type: Coder
+ read_state=None, # type: Optional[coder_impl.IterableStateReader]
+ write_state=None, # type: Optional[coder_impl.IterableStateWriter]
write_state_threshold=1):
self._element_coder = element_coder
self._read_state = read_state
@@ -1204,9 +1351,11 @@
self._write_state_threshold)
def is_deterministic(self):
+ # () -> bool
return False
def _get_component_coders(self):
+ # type: () -> Tuple[Coder, ...]
return (self._element_coder,)
def __repr__(self):
@@ -1221,6 +1370,7 @@
return hash((type(self), self._element_coder, self._write_state_threshold))
def to_runner_api_parameter(self, context):
+ # type: (Optional[PipelineContext]) -> Tuple[str, Any, Sequence[Coder]]
return (
common_urns.coders.STATE_BACKED_ITERABLE.urn,
str(self._write_state_threshold).encode('ascii'),
@@ -1254,4 +1404,4 @@
return self._proto
def to_type_hint(self):
- return typing.Any
+ return Any
diff --git a/sdks/python/apache_beam/coders/coders_test.py b/sdks/python/apache_beam/coders/coders_test.py
index 9b39962..a151a4a 100644
--- a/sdks/python/apache_beam/coders/coders_test.py
+++ b/sdks/python/apache_beam/coders/coders_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py
index 122cbfd..f57407b 100644
--- a/sdks/python/apache_beam/coders/coders_test_common.py
+++ b/sdks/python/apache_beam/coders/coders_test_common.py
@@ -16,6 +16,8 @@
#
"""Tests common to all coder implementations."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -382,6 +384,62 @@
(windowed_value.WindowedValue(1.5, 0, ()),
windowed_value.WindowedValue("abc", 10, ('window',))))
+ def test_param_windowed_value_coder(self):
+ from apache_beam.transforms.window import IntervalWindow
+ from apache_beam.utils.windowed_value import PaneInfo
+ wv = windowed_value.create(
+ b'',
+ # Milliseconds to microseconds
+ 1000 * 1000,
+ (IntervalWindow(11, 21),),
+ PaneInfo(True, False, 1, 2, 3))
+ windowed_value_coder = coders.WindowedValueCoder(
+ coders.BytesCoder(), coders.IntervalWindowCoder())
+ payload = windowed_value_coder.encode(wv)
+ coder = coders.ParamWindowedValueCoder(
+ payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()])
+
+ # Test binary representation
+ self.assertEqual(b'\x01',
+ coder.encode(window.GlobalWindows.windowed_value(1)))
+
+ # Test unnested
+ self.check_coder(
+ coders.ParamWindowedValueCoder(
+ payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()]),
+ windowed_value.WindowedValue(
+ 3,
+ 1,
+ (window.IntervalWindow(11, 21),),
+ PaneInfo(True, False, 1, 2, 3)),
+ windowed_value.WindowedValue(
+ 1,
+ 1,
+ (window.IntervalWindow(11, 21),),
+ PaneInfo(True, False, 1, 2, 3)))
+
+ # Test nested
+ self.check_coder(
+ coders.TupleCoder((
+ coders.ParamWindowedValueCoder(
+ payload, [
+ coders.FloatCoder(),
+ coders.IntervalWindowCoder()]),
+ coders.ParamWindowedValueCoder(
+ payload, [
+ coders.StrUtf8Coder(),
+ coders.IntervalWindowCoder()]))),
+ (windowed_value.WindowedValue(
+ 1.5,
+ 1,
+ (window.IntervalWindow(11, 21),),
+ PaneInfo(True, False, 1, 2, 3)),
+ windowed_value.WindowedValue(
+ "abc",
+ 1,
+ (window.IntervalWindow(11, 21),),
+ PaneInfo(True, False, 1, 2, 3))))
+
def test_proto_coder(self):
# For instructions on how these test proto message were generated,
# see coders_test.py
diff --git a/sdks/python/apache_beam/coders/fast_coders_test.py b/sdks/python/apache_beam/coders/fast_coders_test.py
index 6247a60..c2f795c 100644
--- a/sdks/python/apache_beam/coders/fast_coders_test.py
+++ b/sdks/python/apache_beam/coders/fast_coders_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for compiled implementation of coder impls."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/coders/observable.py b/sdks/python/apache_beam/coders/observable.py
index 3d0a7fc..b744f5e 100644
--- a/sdks/python/apache_beam/coders/observable.py
+++ b/sdks/python/apache_beam/coders/observable.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
diff --git a/sdks/python/apache_beam/coders/observable_test.py b/sdks/python/apache_beam/coders/observable_test.py
index a56a320..7bf5ab8 100644
--- a/sdks/python/apache_beam/coders/observable_test.py
+++ b/sdks/python/apache_beam/coders/observable_test.py
@@ -16,10 +16,14 @@
#
"""Tests for the Observable mixin class."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
import unittest
+from typing import List
+from typing import Optional
from apache_beam.coders import observable
@@ -27,7 +31,7 @@
class ObservableMixinTest(unittest.TestCase):
observed_count = 0
observed_sum = 0
- observed_keys = []
+ observed_keys = [] # type: List[Optional[str]]
def observer(self, value, key=None):
self.observed_count += 1
diff --git a/sdks/python/apache_beam/coders/row_coder.py b/sdks/python/apache_beam/coders/row_coder.py
index a259f36..73886c1 100644
--- a/sdks/python/apache_beam/coders/row_coder.py
+++ b/sdks/python/apache_beam/coders/row_coder.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import itertools
diff --git a/sdks/python/apache_beam/coders/row_coder_test.py b/sdks/python/apache_beam/coders/row_coder_test.py
index dbdc5fc..76088f6 100644
--- a/sdks/python/apache_beam/coders/row_coder_test.py
+++ b/sdks/python/apache_beam/coders/row_coder_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/coders/slow_coders_test.py b/sdks/python/apache_beam/coders/slow_coders_test.py
index 2ddc46e..9d3da5d 100644
--- a/sdks/python/apache_beam/coders/slow_coders_test.py
+++ b/sdks/python/apache_beam/coders/slow_coders_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for uncompiled implementation of coder impls."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py
index 3b740c7..d8501d4 100644
--- a/sdks/python/apache_beam/coders/slow_stream.py
+++ b/sdks/python/apache_beam/coders/slow_stream.py
@@ -19,12 +19,15 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import struct
import sys
from builtins import chr
from builtins import object
+from typing import List
class OutputStream(object):
@@ -33,10 +36,11 @@
A pure Python implementation of stream.OutputStream."""
def __init__(self):
- self.data = []
+ self.data = [] # type: List[bytes]
self.byte_count = 0
def write(self, b, nested=False):
+ # type: (bytes, bool) -> None
assert isinstance(b, bytes)
if nested:
self.write_var_int64(len(b))
@@ -48,6 +52,7 @@
self.byte_count += 1
def write_var_int64(self, v):
+ # type: (int) -> None
if v < 0:
v += 1 << 64
if v <= 0:
@@ -74,12 +79,15 @@
self.write(struct.pack('>d', v))
def get(self):
+ # type: () -> bytes
return b''.join(self.data)
def size(self):
+ # type: () -> int
return self.byte_count
def _clear(self):
+ # type: () -> None
self.data = []
self.byte_count = 0
@@ -95,6 +103,7 @@
self.count = 0
def write(self, byte_array, nested=False):
+ # type: (bytes, bool) -> None
blen = len(byte_array)
if nested:
self.write_var_int64(blen)
@@ -119,6 +128,7 @@
A pure Python implementation of stream.InputStream."""
def __init__(self, data):
+ # type: (bytes) -> None
self.data = data
self.pos = 0
@@ -139,17 +149,22 @@
return len(self.data) - self.pos
def read(self, size):
+ # type: (int) -> bytes
self.pos += size
return self.data[self.pos - size : self.pos]
def read_all(self, nested):
+ # type: (bool) -> bytes
return self.read(self.read_var_int64() if nested else self.size())
def read_byte_py2(self):
+ # type: () -> int
self.pos += 1
- return ord(self.data[self.pos - 1])
+ # mypy tests against python 3.x, where this is an error:
+ return ord(self.data[self.pos - 1]) # type: ignore[arg-type]
def read_byte_py3(self):
+ # type: () -> int
self.pos += 1
return self.data[self.pos - 1]
diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py
index 5ffbeea..bd8ea16 100644
--- a/sdks/python/apache_beam/coders/standard_coders_test.py
+++ b/sdks/python/apache_beam/coders/standard_coders_test.py
@@ -17,6 +17,8 @@
"""Unit tests for coders that must be consistent across all Beam SDKs.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
@@ -27,6 +29,8 @@
import sys
import unittest
from builtins import map
+from typing import Dict
+from typing import Tuple
import yaml
@@ -39,6 +43,8 @@
from apache_beam.typehints import schemas
from apache_beam.utils import windowed_value
from apache_beam.utils.timestamp import Timestamp
+from apache_beam.utils.windowed_value import PaneInfo
+from apache_beam.utils.windowed_value import PaneInfoTiming
STANDARD_CODERS_YAML = os.path.normpath(os.path.join(
os.path.dirname(__file__), '../portability/api/standard_coders.yaml'))
@@ -123,6 +129,16 @@
lambda x, value_parser, window_parser: windowed_value.create(
value_parser(x['value']), x['timestamp'] * 1000,
tuple([window_parser(w) for w in x['windows']])),
+ 'beam:coder:param_windowed_value:v1':
+ lambda x, value_parser, window_parser: windowed_value.create(
+ value_parser(x['value']), x['timestamp'] * 1000,
+ tuple([window_parser(w) for w in x['windows']]),
+ PaneInfo(
+ x['pane']['is_first'],
+ x['pane']['is_last'],
+ PaneInfoTiming.from_string(x['pane']['timing']),
+ x['pane']['index'],
+ x['pane']['on_time_index'])),
'beam:coder:timer:v1':
lambda x, payload_parser: dict(
payload=payload_parser(x['payload']),
@@ -191,7 +207,7 @@
# Used when --fix is passed.
fix = False
- to_fix = {}
+ to_fix = {} # type: Dict[Tuple[int, bytes], bytes]
@classmethod
def tearDownClass(cls):
diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py
index e627ebb..d82a816 100644
--- a/sdks/python/apache_beam/coders/stream_test.py
+++ b/sdks/python/apache_beam/coders/stream_test.py
@@ -16,6 +16,8 @@
#
"""Tests for the stream implementations."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/coders/typecoders.py b/sdks/python/apache_beam/coders/typecoders.py
index 6f6f322..cd694af 100644
--- a/sdks/python/apache_beam/coders/typecoders.py
+++ b/sdks/python/apache_beam/coders/typecoders.py
@@ -63,9 +63,17 @@
See apache_beam.typehints.decorators module for more details.
"""
+
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
+from typing import Any
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Type
from past.builtins import unicode
@@ -79,8 +87,8 @@
"""A coder registry for typehint/coder associations."""
def __init__(self, fallback_coder=None):
- self._coders = {}
- self.custom_types = []
+ self._coders = {} # type: Dict[Any, Type[coders.Coder]]
+ self.custom_types = [] # type: List[Any]
self.register_standard_coders(fallback_coder)
def register_standard_coders(self, fallback_coder):
@@ -97,9 +105,11 @@
self._fallback_coder = fallback_coder or FirstOf(default_fallback_coders)
def _register_coder_internal(self, typehint_type, typehint_coder_class):
+ # type: (Any, Type[coders.Coder]) -> None
self._coders[typehint_type] = typehint_coder_class
def register_coder(self, typehint_type, typehint_coder_class):
+ # type: (Any, Type[coders.Coder]) -> None
if not isinstance(typehint_coder_class, type):
raise TypeError('Coder registration requires a coder class object. '
'Received %r instead.' % typehint_coder_class)
@@ -108,6 +118,7 @@
self._register_coder_internal(typehint_type, typehint_coder_class)
def get_coder(self, typehint):
+ # type: (Any) -> coders.Coder
coder = self._coders.get(
typehint.__class__ if isinstance(typehint, typehints.TypeConstraint)
else typehint, None)
@@ -164,6 +175,7 @@
A class used to get the first matching coder from a list of coders."""
def __init__(self, coders):
+ # type: (Iterable[Type[coders.Coder]]) -> None
self._coders = coders
def from_type_hint(self, typehint, registry):
diff --git a/sdks/python/apache_beam/coders/typecoders_test.py b/sdks/python/apache_beam/coders/typecoders_test.py
index 52e32fb..3bc8aef 100644
--- a/sdks/python/apache_beam/coders/typecoders_test.py
+++ b/sdks/python/apache_beam/coders/typecoders_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the typecoders module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/error.py b/sdks/python/apache_beam/error.py
index 47fec08..3165842 100644
--- a/sdks/python/apache_beam/error.py
+++ b/sdks/python/apache_beam/error.py
@@ -17,6 +17,8 @@
"""Python Dataflow error classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
diff --git a/sdks/python/apache_beam/examples/avro_bitcoin.py b/sdks/python/apache_beam/examples/avro_bitcoin.py
index f6ab89e..917c8c4 100644
--- a/sdks/python/apache_beam/examples/avro_bitcoin.py
+++ b/sdks/python/apache_beam/examples/avro_bitcoin.py
@@ -24,6 +24,8 @@
--compress --fastavro --output fastavro-compressed
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/complete/autocomplete.py b/sdks/python/apache_beam/examples/complete/autocomplete.py
index 03b8500..6be41ad 100644
--- a/sdks/python/apache_beam/examples/complete/autocomplete.py
+++ b/sdks/python/apache_beam/examples/complete/autocomplete.py
@@ -17,6 +17,8 @@
"""A workflow emitting the top k most common words for each prefix."""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/complete/autocomplete_test.py b/sdks/python/apache_beam/examples/complete/autocomplete_test.py
index 29d8203..863ed51 100644
--- a/sdks/python/apache_beam/examples/complete/autocomplete_test.py
+++ b/sdks/python/apache_beam/examples/complete/autocomplete_test.py
@@ -17,6 +17,8 @@
"""Test for the autocomplete example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/examples/complete/distribopt.py b/sdks/python/apache_beam/examples/complete/distribopt.py
index 42c7b83..20c0a8c 100644
--- a/sdks/python/apache_beam/examples/complete/distribopt.py
+++ b/sdks/python/apache_beam/examples/complete/distribopt.py
@@ -49,6 +49,8 @@
- Selecting the mapping with the lowest cost.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/examples/complete/distribopt_test.py b/sdks/python/apache_beam/examples/complete/distribopt_test.py
index ffdbd99..bce76a1 100644
--- a/sdks/python/apache_beam/examples/complete/distribopt_test.py
+++ b/sdks/python/apache_beam/examples/complete/distribopt_test.py
@@ -17,6 +17,8 @@
"""Test for the distrib_optimization example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/estimate_pi.py b/sdks/python/apache_beam/examples/complete/estimate_pi.py
index aa41d02..9870075 100644
--- a/sdks/python/apache_beam/examples/complete/estimate_pi.py
+++ b/sdks/python/apache_beam/examples/complete/estimate_pi.py
@@ -24,6 +24,8 @@
we multiply our counts ratio by four to estimate π.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/examples/complete/estimate_pi_test.py b/sdks/python/apache_beam/examples/complete/estimate_pi_test.py
index 78376b6..2ea4da2 100644
--- a/sdks/python/apache_beam/examples/complete/estimate_pi_test.py
+++ b/sdks/python/apache_beam/examples/complete/estimate_pi_test.py
@@ -17,6 +17,8 @@
"""Test for the estimate_pi example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/game_stats.py b/sdks/python/apache_beam/examples/complete/game/game_stats.py
index 8f446e6..13be705 100644
--- a/sdks/python/apache_beam/examples/complete/game/game_stats.py
+++ b/sdks/python/apache_beam/examples/complete/game/game_stats.py
@@ -70,6 +70,8 @@
--temp_location gs://$BUCKET/user_score/temp
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/complete/game/game_stats_it_test.py b/sdks/python/apache_beam/examples/complete/game/game_stats_it_test.py
index 70dafb0..96e57b1 100644
--- a/sdks/python/apache_beam/examples/complete/game/game_stats_it_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/game_stats_it_test.py
@@ -30,6 +30,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/game_stats_test.py b/sdks/python/apache_beam/examples/complete/game/game_stats_test.py
index 209f0cf..e3058d8 100644
--- a/sdks/python/apache_beam/examples/complete/game/game_stats_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/game_stats_test.py
@@ -17,6 +17,8 @@
"""Test for the game_stats example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/hourly_team_score.py b/sdks/python/apache_beam/examples/complete/game/hourly_team_score.py
index e0a5c47..caa7a90 100644
--- a/sdks/python/apache_beam/examples/complete/game/hourly_team_score.py
+++ b/sdks/python/apache_beam/examples/complete/game/hourly_team_score.py
@@ -64,6 +64,8 @@
--temp_location gs://$BUCKET/user_score/temp
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/complete/game/hourly_team_score_it_test.py b/sdks/python/apache_beam/examples/complete/game/hourly_team_score_it_test.py
index 8d86f18..4499d66 100644
--- a/sdks/python/apache_beam/examples/complete/game/hourly_team_score_it_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/hourly_team_score_it_test.py
@@ -30,6 +30,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/hourly_team_score_test.py b/sdks/python/apache_beam/examples/complete/game/hourly_team_score_test.py
index 8c2497a..79db0bc 100644
--- a/sdks/python/apache_beam/examples/complete/game/hourly_team_score_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/hourly_team_score_test.py
@@ -17,6 +17,8 @@
"""Test for the user_score example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/leader_board.py b/sdks/python/apache_beam/examples/complete/game/leader_board.py
index 2288d16..c297532 100644
--- a/sdks/python/apache_beam/examples/complete/game/leader_board.py
+++ b/sdks/python/apache_beam/examples/complete/game/leader_board.py
@@ -78,6 +78,8 @@
--temp_location gs://$BUCKET/user_score/temp
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/complete/game/leader_board_it_test.py b/sdks/python/apache_beam/examples/complete/game/leader_board_it_test.py
index af2f2e6..d718f70 100644
--- a/sdks/python/apache_beam/examples/complete/game/leader_board_it_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/leader_board_it_test.py
@@ -30,6 +30,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/leader_board_test.py b/sdks/python/apache_beam/examples/complete/game/leader_board_test.py
index 3aad052..08ac76b 100644
--- a/sdks/python/apache_beam/examples/complete/game/leader_board_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/leader_board_test.py
@@ -17,6 +17,8 @@
"""Test for the leader_board example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/user_score.py b/sdks/python/apache_beam/examples/complete/game/user_score.py
index 74b47ba..962104b 100644
--- a/sdks/python/apache_beam/examples/complete/game/user_score.py
+++ b/sdks/python/apache_beam/examples/complete/game/user_score.py
@@ -53,6 +53,8 @@
--temp_location gs://$BUCKET/user_score/temp
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/examples/complete/game/user_score_it_test.py b/sdks/python/apache_beam/examples/complete/game/user_score_it_test.py
index 5e5ba97..00b6bfe 100644
--- a/sdks/python/apache_beam/examples/complete/game/user_score_it_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/user_score_it_test.py
@@ -30,6 +30,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/game/user_score_test.py b/sdks/python/apache_beam/examples/complete/game/user_score_test.py
index f41006f..3dbe6df 100644
--- a/sdks/python/apache_beam/examples/complete/game/user_score_test.py
+++ b/sdks/python/apache_beam/examples/complete/game/user_score_test.py
@@ -17,6 +17,8 @@
"""Test for the user_score example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset.py
index f861e48..9b55708 100644
--- a/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset.py
+++ b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset.py
@@ -20,6 +20,8 @@
We use the quadratic polinomial f(z) = z*z + c, with c = -.62772 +.42193i
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py
index a7bc0f9..1b0b41a 100644
--- a/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py
+++ b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py
@@ -17,6 +17,8 @@
"""Test for the juliaset example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py
index 9d72772..32361cf 100644
--- a/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py
+++ b/sdks/python/apache_beam/examples/complete/juliaset/juliaset_main.py
@@ -47,6 +47,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/juliaset/setup.py b/sdks/python/apache_beam/examples/complete/juliaset/setup.py
index 8cb6d4c..ab4306a 100644
--- a/sdks/python/apache_beam/examples/complete/juliaset/setup.py
+++ b/sdks/python/apache_beam/examples/complete/juliaset/setup.py
@@ -25,14 +25,15 @@
when running the workflow for remote execution.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
import subprocess
-from distutils.command.build import build as _build
+from distutils.command.build import build as _build # type: ignore
-# TODO: (BEAM-8411): re-enable lint check.
-import setuptools # pylint: disable-all
+import setuptools
# This class handles the pip install mechanism.
diff --git a/sdks/python/apache_beam/examples/complete/tfidf.py b/sdks/python/apache_beam/examples/complete/tfidf.py
index 77ee4c1..6069416 100644
--- a/sdks/python/apache_beam/examples/complete/tfidf.py
+++ b/sdks/python/apache_beam/examples/complete/tfidf.py
@@ -21,6 +21,8 @@
http://en.wikipedia.org/wiki/Tf-idf
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/examples/complete/tfidf_test.py b/sdks/python/apache_beam/examples/complete/tfidf_test.py
index 4b19269..5a9b456 100644
--- a/sdks/python/apache_beam/examples/complete/tfidf_test.py
+++ b/sdks/python/apache_beam/examples/complete/tfidf_test.py
@@ -17,6 +17,8 @@
"""Test for the TF-IDF example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py b/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py
index 6b04a00..4e65525 100644
--- a/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py
+++ b/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py
@@ -39,6 +39,8 @@
be overridden with --input.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions_test.py b/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions_test.py
index fcef981..8befc86 100644
--- a/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions_test.py
+++ b/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions_test.py
@@ -17,6 +17,8 @@
"""Test for the top wikipedia sessions example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_schema.py b/sdks/python/apache_beam/examples/cookbook/bigquery_schema.py
index c7b1ccd..7d3ab49 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_schema.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_schema.py
@@ -22,6 +22,8 @@
nested and repeated fields.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
index fb7ee42..8abb8f4 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
@@ -27,6 +27,8 @@
a word that should be ignored when forming groups.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input_test.py b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input_test.py
index 031eeb3..97bcbd6 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input_test.py
@@ -17,6 +17,8 @@
"""Test for the BigQuery side input example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py
index c7c837b..5ba995f 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py
@@ -32,6 +32,8 @@
represents table rows as plain Python dictionaries.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_it_test.py b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_it_test.py
index f7eb93b..4beb214 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_it_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_it_test.py
@@ -17,6 +17,8 @@
"""End-to-end test for Bigquery tornadoes example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_test.py b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_test.py
index cad34d8..581ef56 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes_test.py
@@ -17,6 +17,8 @@
"""Test for the BigQuery tornadoes example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py b/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py
index ca5c4a5..5c06295 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py
@@ -16,6 +16,8 @@
#
"""Unittest for GCP Bigtable testing."""
+# pytype: skip-file
+
from __future__ import absolute_import
import datetime
@@ -24,6 +26,8 @@
import string
import unittest
import uuid
+from typing import TYPE_CHECKING
+from typing import List
import pytz
@@ -47,8 +51,10 @@
_microseconds_from_datetime = lambda label_stamp: label_stamp
_datetime_from_microseconds = lambda micro: micro
+if TYPE_CHECKING:
+ import google.cloud.bigtable.instance
-EXISTING_INSTANCES = []
+EXISTING_INSTANCES = [] # type: List[google.cloud.bigtable.instance.Instance]
LABEL_KEY = u'python-bigtable-beam'
label_stamp = datetime.datetime.utcnow().replace(tzinfo=UTC)
label_stamp_micros = _microseconds_from_datetime(label_stamp)
diff --git a/sdks/python/apache_beam/examples/cookbook/coders.py b/sdks/python/apache_beam/examples/cookbook/coders.py
index b0f2a2b..68845cf 100644
--- a/sdks/python/apache_beam/examples/cookbook/coders.py
+++ b/sdks/python/apache_beam/examples/cookbook/coders.py
@@ -28,6 +28,8 @@
[TEAM_NAME, POINTS]
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/coders_test.py b/sdks/python/apache_beam/examples/cookbook/coders_test.py
index 7a3b7f8..b706123 100644
--- a/sdks/python/apache_beam/examples/cookbook/coders_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/coders_test.py
@@ -17,6 +17,8 @@
"""Test for the coders example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/combiners_test.py b/sdks/python/apache_beam/examples/cookbook/combiners_test.py
index 15714c0..587978f 100644
--- a/sdks/python/apache_beam/examples/cookbook/combiners_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/combiners_test.py
@@ -23,6 +23,8 @@
checked directly on the last PCollection produced.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/custom_ptransform.py b/sdks/python/apache_beam/examples/cookbook/custom_ptransform.py
index db86003..4d0a64c 100644
--- a/sdks/python/apache_beam/examples/cookbook/custom_ptransform.py
+++ b/sdks/python/apache_beam/examples/cookbook/custom_ptransform.py
@@ -20,6 +20,8 @@
These example show the different ways you can write custom PTransforms.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/custom_ptransform_test.py b/sdks/python/apache_beam/examples/cookbook/custom_ptransform_test.py
index 7620dae..c56a31a 100644
--- a/sdks/python/apache_beam/examples/cookbook/custom_ptransform_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/custom_ptransform_test.py
@@ -17,6 +17,8 @@
"""Tests for the various custom Count implementation examples."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py
index 3ee39b7..8b110b5 100644
--- a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py
+++ b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py
@@ -61,6 +61,8 @@
https://github.com/googleapis/googleapis/tree/master/google/datastore/v1
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount_it_test.py b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount_it_test.py
index 5a4fea9..08305bc 100644
--- a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount_it_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount_it_test.py
@@ -17,6 +17,8 @@
"""End-to-end test for Datastore Wordcount example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/filters.py b/sdks/python/apache_beam/examples/cookbook/filters.py
index d1b0201..06ec8a2 100644
--- a/sdks/python/apache_beam/examples/cookbook/filters.py
+++ b/sdks/python/apache_beam/examples/cookbook/filters.py
@@ -24,6 +24,8 @@
as well as global aggregates computed during pipeline execution.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/filters_test.py b/sdks/python/apache_beam/examples/cookbook/filters_test.py
index 5187a2f..c0d8e12 100644
--- a/sdks/python/apache_beam/examples/cookbook/filters_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/filters_test.py
@@ -17,6 +17,8 @@
"""Test for the filters example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/group_with_coder.py b/sdks/python/apache_beam/examples/cookbook/group_with_coder.py
index 2202b8c..171eada 100644
--- a/sdks/python/apache_beam/examples/cookbook/group_with_coder.py
+++ b/sdks/python/apache_beam/examples/cookbook/group_with_coder.py
@@ -25,6 +25,8 @@
and score.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py b/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py
index 6f1b796..f73726d 100644
--- a/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/group_with_coder_test.py
@@ -17,6 +17,8 @@
"""Test for the custom coders example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -28,7 +30,7 @@
# Patch group_with_coder.PlayerCoder.decode(). To test that the PlayerCoder was
# used, we do not strip the prepended 'x:' string when decoding a Player object.
-group_with_coder.PlayerCoder.decode = lambda self, s: group_with_coder.Player(
+group_with_coder.PlayerCoder.decode = lambda self, s: group_with_coder.Player( # type: ignore[assignment]
s.decode('utf-8'))
diff --git a/sdks/python/apache_beam/examples/cookbook/mergecontacts.py b/sdks/python/apache_beam/examples/cookbook/mergecontacts.py
index 9bd6a96..7d427c4 100644
--- a/sdks/python/apache_beam/examples/cookbook/mergecontacts.py
+++ b/sdks/python/apache_beam/examples/cookbook/mergecontacts.py
@@ -29,6 +29,8 @@
Non-linear pipelines (i.e., pipelines with branches)
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py b/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py
index 23f22bc..8d9e85d 100644
--- a/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/mergecontacts_test.py
@@ -17,6 +17,8 @@
"""Test for the mergecontacts example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
index 7896027..e8cdc7f 100644
--- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
+++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
@@ -49,6 +49,8 @@
--output gs://YOUR_OUTPUT_PREFIX
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py
index afe350d..01a5768 100644
--- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo_test.py
@@ -17,6 +17,8 @@
"""Test for the multiple_output_pardo example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/fastavro_it_test.py b/sdks/python/apache_beam/examples/fastavro_it_test.py
index 48dfe11..5b1a3c5 100644
--- a/sdks/python/apache_beam/examples/fastavro_it_test.py
+++ b/sdks/python/apache_beam/examples/fastavro_it_test.py
@@ -42,6 +42,8 @@
"
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/examples/flink/flink_streaming_impulse.py b/sdks/python/apache_beam/examples/flink/flink_streaming_impulse.py
index 0cfaf5d..24ca510 100644
--- a/sdks/python/apache_beam/examples/flink/flink_streaming_impulse.py
+++ b/sdks/python/apache_beam/examples/flink/flink_streaming_impulse.py
@@ -20,6 +20,8 @@
This can only be used with the Flink portable runner.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/snippets/snippets.py b/sdks/python/apache_beam/examples/snippets/snippets.py
index 69f514a..76521c5de 100644
--- a/sdks/python/apache_beam/examples/snippets/snippets.py
+++ b/sdks/python/apache_beam/examples/snippets/snippets.py
@@ -29,6 +29,8 @@
prefix the PATH_TO_HTML where they are included followed by a descriptive
string. The tags can contain only letters, digits and _.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -174,33 +176,34 @@
def model_pcollection(argv):
"""Creating a PCollection from data in local memory."""
+ # [START model_pcollection]
+ import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
- class MyOptions(PipelineOptions):
-
- @classmethod
- def _add_argparse_args(cls, parser):
- parser.add_argument('--output',
- dest='output',
- required=True,
- help='Output file to write results to.')
-
+ # argv = None # if None, uses sys.argv
pipeline_options = PipelineOptions(argv)
- my_options = pipeline_options.view_as(MyOptions)
-
- # [START model_pcollection]
- with beam.Pipeline(options=pipeline_options) as p:
-
- lines = (p
- | beam.Create([
- 'To be, or not to be: that is the question: ',
- 'Whether \'tis nobler in the mind to suffer ',
- 'The slings and arrows of outrageous fortune, ',
- 'Or to take arms against a sea of troubles, ']))
+ with beam.Pipeline(options=pipeline_options) as pipeline:
+ lines = (
+ pipeline
+ | beam.Create([
+ 'To be, or not to be: that is the question: ',
+ "Whether 'tis nobler in the mind to suffer ",
+ 'The slings and arrows of outrageous fortune, ',
+ 'Or to take arms against a sea of troubles, ',
+ ])
+ )
# [END model_pcollection]
- (lines
- | beam.io.WriteToText(my_options.output))
+ class MyOptions(PipelineOptions):
+ @classmethod
+ def _add_argparse_args(cls, parser):
+ parser.add_argument('--output',
+ dest='output',
+ required=True,
+ help='Output file to write results to.')
+
+ my_options = pipeline_options.view_as(MyOptions)
+ lines | beam.io.WriteToText(my_options.output)
def pipeline_options_remote(argv):
diff --git a/sdks/python/apache_beam/examples/snippets/snippets_test.py b/sdks/python/apache_beam/examples/snippets/snippets_test.py
index f0f53e2..1bcaaca 100644
--- a/sdks/python/apache_beam/examples/snippets/snippets_test.py
+++ b/sdks/python/apache_beam/examples/snippets/snippets_test.py
@@ -17,6 +17,8 @@
#
"""Tests for all code snippets used in public docs."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -958,6 +960,7 @@
| 'pair_with_one' >> beam.Map(lambda x: (x, 1))
| WindowInto(FixedWindows(15),
trigger=trigger,
+ allowed_lateness=20,
accumulation_mode=AccumulationMode.DISCARDING)
| 'group' >> beam.GroupByKey()
| 'count' >> beam.Map(
@@ -1014,6 +1017,7 @@
FixedWindows(1 * 60),
trigger=AfterWatermark(
late=AfterProcessingTime(10 * 60)),
+ allowed_lateness=10,
accumulation_mode=AccumulationMode.DISCARDING)
# [END model_composite_triggers]
| 'group' >> beam.GroupByKey()
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey.py
index c507e03..c1ddf8d 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey_test.py
index ff86628..fc1b0a7 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/cogroupbykey_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally.py
index f9c097a..a198f19 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally_test.py
index 5990d2e..e849377 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineglobally_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey.py
index 2fba8e4..fc864f6 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_test.py
index e5fc2ac..ec23167 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues.py
index 92fc6c6..7c52113 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_test.py
index 97d0b3d..3e233c9 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count.py
index 22b1140..ab1600a 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count_test.py
index 123d5fb..fff2406 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/count_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct.py
index 930fdbe..8062e8a 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct_test.py
index ea1a7b2..70d1341 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/distinct_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey.py
index 83e4f87..72c5437 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey_test.py
index 4d8283a..3ea278c 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/latest.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/latest.py
new file mode 100644
index 0000000..fdc228c
--- /dev/null
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/latest.py
@@ -0,0 +1,86 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+
+def latest_globally(test=None):
+ # [START latest_globally]
+ import apache_beam as beam
+ import time
+
+ def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'):
+ return time.mktime(time.strptime(time_str, format))
+
+ with beam.Pipeline() as pipeline:
+ latest_element = (
+ pipeline
+ | 'Create crops' >> beam.Create([
+ {'item': '🥬', 'harvest': '2020-02-24 00:00:00'},
+ {'item': '🍓', 'harvest': '2020-06-16 00:00:00'},
+ {'item': '🥕', 'harvest': '2020-07-17 00:00:00'},
+ {'item': '🍆', 'harvest': '2020-10-26 00:00:00'},
+ {'item': '🍅', 'harvest': '2020-10-01 00:00:00'},
+ ])
+ | 'With timestamps' >> beam.Map(
+ lambda crop: beam.window.TimestampedValue(
+ crop['item'], to_unix_time(crop['harvest'])))
+ | 'Get latest element' >> beam.combiners.Latest.Globally()
+ | beam.Map(print)
+ )
+ # [END latest_globally]
+ if test:
+ test(latest_element)
+
+
+def latest_per_key(test=None):
+ # [START latest_per_key]
+ import apache_beam as beam
+ import time
+
+ def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'):
+ return time.mktime(time.strptime(time_str, format))
+
+ with beam.Pipeline() as pipeline:
+ latest_elements_per_key = (
+ pipeline
+ | 'Create crops' >> beam.Create([
+ ('spring', {'item': '🥕', 'harvest': '2020-06-28 00:00:00'}),
+ ('spring', {'item': '🍓', 'harvest': '2020-06-16 00:00:00'}),
+ ('summer', {'item': '🥕', 'harvest': '2020-07-17 00:00:00'}),
+ ('summer', {'item': '🍓', 'harvest': '2020-08-26 00:00:00'}),
+ ('summer', {'item': '🍆', 'harvest': '2020-09-04 00:00:00'}),
+ ('summer', {'item': '🥬', 'harvest': '2020-09-18 00:00:00'}),
+ ('summer', {'item': '🍅', 'harvest': '2020-09-22 00:00:00'}),
+ ('autumn', {'item': '🍅', 'harvest': '2020-10-01 00:00:00'}),
+ ('autumn', {'item': '🥬', 'harvest': '2020-10-20 00:00:00'}),
+ ('autumn', {'item': '🍆', 'harvest': '2020-10-26 00:00:00'}),
+ ('winter', {'item': '🥬', 'harvest': '2020-02-24 00:00:00'}),
+ ])
+ | 'With timestamps' >> beam.Map(
+ lambda pair: beam.window.TimestampedValue(
+ (pair[0], pair[1]['item']), to_unix_time(pair[1]['harvest'])))
+ | 'Get latest elements per key' >> beam.combiners.Latest.PerKey()
+ | beam.Map(print)
+ )
+ # [END latest_per_key]
+ if test:
+ test(latest_elements_per_key)
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/latest_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/latest_test.py
new file mode 100644
index 0000000..6f9bffd
--- /dev/null
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/latest_test.py
@@ -0,0 +1,63 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+import unittest
+
+import mock
+
+from apache_beam.examples.snippets.util import assert_matches_stdout
+from apache_beam.testing.test_pipeline import TestPipeline
+
+from . import latest
+
+
+def check_latest_element(actual):
+ expected = '''[START latest_element]
+🍆
+[END latest_element]'''.splitlines()[1:-1]
+ assert_matches_stdout(actual, expected)
+
+
+def check_latest_elements_per_key(actual):
+ expected = '''[START latest_elements_per_key]
+('spring', '🥕')
+('summer', '🍅')
+('autumn', '🍆')
+('winter', '🥬')
+[END latest_elements_per_key]'''.splitlines()[1:-1]
+ assert_matches_stdout(actual, expected)
+
+
+@mock.patch('apache_beam.Pipeline', TestPipeline)
+@mock.patch(
+ 'apache_beam.examples.snippets.transforms.aggregation.latest.print', str)
+class LatestTest(unittest.TestCase):
+ def test_latest_globally(self):
+ latest.latest_globally(check_latest_element)
+
+ def test_latest_per_key(self):
+ latest.latest_per_key(check_latest_elements_per_key)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/max.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/max.py
new file mode 100644
index 0000000..37f3481
--- /dev/null
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/max.py
@@ -0,0 +1,62 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+
+def max_globally(test=None):
+ # [START max_globally]
+ import apache_beam as beam
+
+ with beam.Pipeline() as pipeline:
+ max_element = (
+ pipeline
+ | 'Create numbers' >> beam.Create([3, 4, 1, 2])
+ | 'Get max value' >> beam.CombineGlobally(
+ lambda elements: max(elements or [None]))
+ | beam.Map(print)
+ )
+ # [END max_globally]
+ if test:
+ test(max_element)
+
+
+def max_per_key(test=None):
+ # [START max_per_key]
+ import apache_beam as beam
+
+ with beam.Pipeline() as pipeline:
+ elements_with_max_value_per_key = (
+ pipeline
+ | 'Create produce' >> beam.Create([
+ ('🥕', 3),
+ ('🥕', 2),
+ ('🍆', 1),
+ ('🍅', 4),
+ ('🍅', 5),
+ ('🍅', 3),
+ ])
+ | 'Get max value per key' >> beam.CombinePerKey(max)
+ | beam.Map(print)
+ )
+ # [END max_per_key]
+ if test:
+ test(elements_with_max_value_per_key)
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/max_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/max_test.py
new file mode 100644
index 0000000..af43781
--- /dev/null
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/max_test.py
@@ -0,0 +1,62 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+import unittest
+
+import mock
+
+from apache_beam.examples.snippets.util import assert_matches_stdout
+from apache_beam.testing.test_pipeline import TestPipeline
+
+from . import max as beam_max
+
+
+def check_max_element(actual):
+ expected = '''[START max_element]
+4
+[END max_element]'''.splitlines()[1:-1]
+ assert_matches_stdout(actual, expected)
+
+
+def check_elements_with_max_value_per_key(actual):
+ expected = '''[START elements_with_max_value_per_key]
+('🥕', 3)
+('🍆', 1)
+('🍅', 5)
+[END elements_with_max_value_per_key]'''.splitlines()[1:-1]
+ assert_matches_stdout(actual, expected)
+
+
+@mock.patch('apache_beam.Pipeline', TestPipeline)
+@mock.patch(
+ 'apache_beam.examples.snippets.transforms.aggregation.max.print', str)
+class MaxTest(unittest.TestCase):
+ def test_max_globally(self):
+ beam_max.max_globally(check_max_element)
+
+ def test_max_per_key(self):
+ beam_max.max_per_key(check_elements_with_max_value_per_key)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py
index 36fa5b5..bda260d 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean_test.py
index 38b27a0..796bbbf 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py
index d5abc37..a81ab67 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample_test.py
index 22cd656..bb337ee 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter.py
index 44b11b8..ddb58bd 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_test.py
index 724b1b9..10ad79d 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap.py
index 50ffe7a..ef2856c 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py
index 5c326e9..9e67e74 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys.py
index 01c9d6b..2a975be 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys_test.py
index e4a843b..9455836 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/keys_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap.py
index 2107fd5..e6646bf 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap_test.py
index 83f211d..c3eed27 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/kvswap_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map.py
index 9defd47..19d30d7 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_test.py
index eb77675..2f34a62 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo.py
index 4ecd74d..8e39556 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo_test.py
index cbf4903..ae8515d 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/pardo_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition.py
index 5633607..9820bbf 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition_test.py
index 4f98ab1..db81891 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/partition_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex.py
index b39b534..6f9e230 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex_test.py
index 9df9f62..d783b1f 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/regex_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring.py
index 1d0b7dd..3275433 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring_test.py
index 04939a7..e142278 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/tostring_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values.py
index 8504ff4..a1ee8b2 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values_test.py
index 7a3b8f3..6c2caaa 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/values_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps.py
index 79a9c44..bf4a4d4 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_test.py
index ad8c31b..191d114 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/examples/snippets/util.py b/sdks/python/apache_beam/examples/snippets/util.py
index 60c2c7e..a14cd36 100644
--- a/sdks/python/apache_beam/examples/snippets/util.py
+++ b/sdks/python/apache_beam/examples/snippets/util.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import ast
diff --git a/sdks/python/apache_beam/examples/snippets/util_test.py b/sdks/python/apache_beam/examples/snippets/util_test.py
index fcf3955..756f857 100644
--- a/sdks/python/apache_beam/examples/snippets/util_test.py
+++ b/sdks/python/apache_beam/examples/snippets/util_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/examples/streaming_wordcount.py b/sdks/python/apache_beam/examples/streaming_wordcount.py
index f0db06a..461e073 100644
--- a/sdks/python/apache_beam/examples/streaming_wordcount.py
+++ b/sdks/python/apache_beam/examples/streaming_wordcount.py
@@ -18,6 +18,8 @@
"""A streaming word-counting workflow.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/streaming_wordcount_debugging.py b/sdks/python/apache_beam/examples/streaming_wordcount_debugging.py
index edaedb5..db5304d 100644
--- a/sdks/python/apache_beam/examples/streaming_wordcount_debugging.py
+++ b/sdks/python/apache_beam/examples/streaming_wordcount_debugging.py
@@ -32,6 +32,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py b/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py
index d87d0f4..96a6331 100644
--- a/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py
+++ b/sdks/python/apache_beam/examples/streaming_wordcount_it_test.py
@@ -17,6 +17,8 @@
"""End-to-end test for the streaming wordcount example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/windowed_wordcount.py b/sdks/python/apache_beam/examples/windowed_wordcount.py
index 5eb05c0..c59c83b 100644
--- a/sdks/python/apache_beam/examples/windowed_wordcount.py
+++ b/sdks/python/apache_beam/examples/windowed_wordcount.py
@@ -21,6 +21,8 @@
and is not yet available for use.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py
index a8f17e3..6fd53b1 100644
--- a/sdks/python/apache_beam/examples/wordcount.py
+++ b/sdks/python/apache_beam/examples/wordcount.py
@@ -17,6 +17,8 @@
"""A word-counting workflow."""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py
index 389bdd6..bd35bfbb 100644
--- a/sdks/python/apache_beam/examples/wordcount_debugging.py
+++ b/sdks/python/apache_beam/examples/wordcount_debugging.py
@@ -39,6 +39,8 @@
--output gs://YOUR_OUTPUT_PREFIX
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/wordcount_debugging_test.py b/sdks/python/apache_beam/examples/wordcount_debugging_test.py
index 124b680..51f150a 100644
--- a/sdks/python/apache_beam/examples/wordcount_debugging_test.py
+++ b/sdks/python/apache_beam/examples/wordcount_debugging_test.py
@@ -17,6 +17,8 @@
"""Test for the debugging wordcount example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/wordcount_it_test.py b/sdks/python/apache_beam/examples/wordcount_it_test.py
index 8a9b2c5..bf12ba3 100644
--- a/sdks/python/apache_beam/examples/wordcount_it_test.py
+++ b/sdks/python/apache_beam/examples/wordcount_it_test.py
@@ -17,6 +17,8 @@
"""End-to-end test for the wordcount example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py
index 2bfb6ec..92982bd 100644
--- a/sdks/python/apache_beam/examples/wordcount_minimal.py
+++ b/sdks/python/apache_beam/examples/wordcount_minimal.py
@@ -44,6 +44,8 @@
pipeline. You can see the results in your output bucket in the GCS browser.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/examples/wordcount_minimal_test.py b/sdks/python/apache_beam/examples/wordcount_minimal_test.py
index 9a772d5..a3912a1 100644
--- a/sdks/python/apache_beam/examples/wordcount_minimal_test.py
+++ b/sdks/python/apache_beam/examples/wordcount_minimal_test.py
@@ -17,6 +17,8 @@
"""Test for the minimal wordcount example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/examples/wordcount_test.py b/sdks/python/apache_beam/examples/wordcount_test.py
index 84b14f2..0f140d4 100644
--- a/sdks/python/apache_beam/examples/wordcount_test.py
+++ b/sdks/python/apache_beam/examples/wordcount_test.py
@@ -18,6 +18,8 @@
"""Test for the wordcount example."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/examples/wordcount_xlang.py b/sdks/python/apache_beam/examples/wordcount_xlang.py
index fe1994d..d8c8745 100644
--- a/sdks/python/apache_beam/examples/wordcount_xlang.py
+++ b/sdks/python/apache_beam/examples/wordcount_xlang.py
@@ -17,6 +17,8 @@
"""A cross-language word-counting workflow."""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/internal/gcp/auth.py b/sdks/python/apache_beam/internal/gcp/auth.py
index 8a94acf..5f248b3 100644
--- a/sdks/python/apache_beam/internal/gcp/auth.py
+++ b/sdks/python/apache_beam/internal/gcp/auth.py
@@ -17,6 +17,8 @@
"""Dataflow credentials and authentication."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/internal/gcp/json_value.py b/sdks/python/apache_beam/internal/gcp/json_value.py
index c02b639..469da80 100644
--- a/sdks/python/apache_beam/internal/gcp/json_value.py
+++ b/sdks/python/apache_beam/internal/gcp/json_value.py
@@ -17,6 +17,8 @@
"""JSON conversion utility functions."""
+# pytype: skip-file
+
from __future__ import absolute_import
from past.builtins import long
diff --git a/sdks/python/apache_beam/internal/gcp/json_value_test.py b/sdks/python/apache_beam/internal/gcp/json_value_test.py
index 5605d41..d473c30 100644
--- a/sdks/python/apache_beam/internal/gcp/json_value_test.py
+++ b/sdks/python/apache_beam/internal/gcp/json_value_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the json_value module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/internal/http_client.py b/sdks/python/apache_beam/internal/http_client.py
index 1263687..c86c78f 100644
--- a/sdks/python/apache_beam/internal/http_client.py
+++ b/sdks/python/apache_beam/internal/http_client.py
@@ -19,6 +19,8 @@
For internal use only. No backwards compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/internal/http_client_test.py b/sdks/python/apache_beam/internal/http_client_test.py
index c3c0f83..98fc3f2 100644
--- a/sdks/python/apache_beam/internal/http_client_test.py
+++ b/sdks/python/apache_beam/internal/http_client_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the http_client module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import os
diff --git a/sdks/python/apache_beam/internal/module_test.py b/sdks/python/apache_beam/internal/module_test.py
index e2fee34..45f5792 100644
--- a/sdks/python/apache_beam/internal/module_test.py
+++ b/sdks/python/apache_beam/internal/module_test.py
@@ -17,11 +17,14 @@
"""Module used to define functions and classes used by the coder unit tests."""
+# pytype: skip-file
+
from __future__ import absolute_import
import re
import sys
from builtins import object
+from typing import Type
class TopClass(object):
@@ -70,7 +73,7 @@
class RecursiveClass(object):
"""A class that contains a reference to itself."""
- SELF_TYPE = None
+ SELF_TYPE = None # type: Type[RecursiveClass]
def __init__(self, datum):
self.datum = 'RecursiveClass:%s' % datum
diff --git a/sdks/python/apache_beam/internal/pickler.py b/sdks/python/apache_beam/internal/pickler.py
index 8083091..072add2 100644
--- a/sdks/python/apache_beam/internal/pickler.py
+++ b/sdks/python/apache_beam/internal/pickler.py
@@ -28,6 +28,8 @@
the coders.*PickleCoder classes should be used instead.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
@@ -37,6 +39,9 @@
import traceback
import types
import zlib
+from typing import Any
+from typing import Dict
+from typing import Tuple
import dill
@@ -72,6 +77,7 @@
def _is_nested_class(cls):
"""Returns true if argument is a class object that appears to be nested."""
return (isinstance(cls, type)
+ and cls.__module__ is not None
and cls.__module__ != 'builtins' # Python 3
and cls.__module__ != '__builtin__' # Python 2
and cls.__name__ not in sys.modules[cls.__module__].__dict__)
@@ -174,7 +180,7 @@
# Pickle module dictionaries (commonly found in lambda's globals)
# by referencing their module.
old_save_module_dict = dill.dill.save_module_dict
- known_module_dicts = {}
+ known_module_dicts = {} # type: Dict[int, Tuple[types.ModuleType, Dict[str, Any]]]
@dill.dill.register(dict)
def new_save_module_dict(pickler, obj):
@@ -244,6 +250,7 @@
# pickler.loads() being used for data, which results in an unnecessary base64
# encoding. This should be cleaned up.
def dumps(o, enable_trace=True):
+ # type: (...) -> bytes
"""For internal use only; no backwards-compatibility guarantees."""
with pickle_lock_unless_py2:
try:
diff --git a/sdks/python/apache_beam/internal/pickler_test.py b/sdks/python/apache_beam/internal/pickler_test.py
index e18c726..e70c28c 100644
--- a/sdks/python/apache_beam/internal/pickler_test.py
+++ b/sdks/python/apache_beam/internal/pickler_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the pickler module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
diff --git a/sdks/python/apache_beam/internal/util.py b/sdks/python/apache_beam/internal/util.py
index 499214f..a161440 100644
--- a/sdks/python/apache_beam/internal/util.py
+++ b/sdks/python/apache_beam/internal/util.py
@@ -20,6 +20,8 @@
For internal use only. No backwards compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -27,6 +29,16 @@
import weakref
from builtins import object
from multiprocessing.pool import ThreadPool
+from typing import Any
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+from typing import Union
+
+T = TypeVar('T')
class ArgumentPlaceholder(object):
@@ -62,7 +74,11 @@
return hash(type(self))
-def remove_objects_from_args(args, kwargs, pvalue_class):
+def remove_objects_from_args(args, # type: Iterable[Any]
+ kwargs, # type: Dict[str, Any]
+ pvalue_class # type: Union[Type[T], Tuple[Type[T], ...]]
+ ):
+ # type: (...) -> Tuple[List[Any], Dict[str, Any], List[T]]
"""For internal use only; no backwards-compatibility guarantees.
Replaces all objects of a given type in args/kwargs with a placeholder.
diff --git a/sdks/python/apache_beam/internal/util_test.py b/sdks/python/apache_beam/internal/util_test.py
index c3ae191..27f1362 100644
--- a/sdks/python/apache_beam/internal/util_test.py
+++ b/sdks/python/apache_beam/internal/util_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the util module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py
index b1cf6bd..6f68494 100644
--- a/sdks/python/apache_beam/io/avroio.py
+++ b/sdks/python/apache_beam/io/avroio.py
@@ -41,6 +41,8 @@
that can be used to write a given ``PCollection`` of Python objects to an
Avro file.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import io
diff --git a/sdks/python/apache_beam/io/avroio_test.py b/sdks/python/apache_beam/io/avroio_test.py
index 5ea9b9b..147be62 100644
--- a/sdks/python/apache_beam/io/avroio_test.py
+++ b/sdks/python/apache_beam/io/avroio_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -24,6 +26,7 @@
import tempfile
import unittest
from builtins import range
+from typing import List
import sys
# patches unittest.TestCase to be python3 compatible
@@ -90,7 +93,7 @@
class AvroBase(object):
- _temp_files = []
+ _temp_files = [] # type: List[str]
def __init__(self, methodName='runTest'):
super(AvroBase, self).__init__(methodName)
diff --git a/sdks/python/apache_beam/io/aws/__init__.py b/sdks/python/apache_beam/io/aws/__init__.py
new file mode 100644
index 0000000..6569e3f
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
diff --git a/sdks/python/apache_beam/io/aws/clients/__init__.py b/sdks/python/apache_beam/io/aws/clients/__init__.py
new file mode 100644
index 0000000..f4f43cb
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/clients/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from __future__ import absolute_import
diff --git a/sdks/python/apache_beam/io/aws/clients/s3/__init__.py b/sdks/python/apache_beam/io/aws/clients/s3/__init__.py
new file mode 100644
index 0000000..f4f43cb
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/clients/s3/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from __future__ import absolute_import
diff --git a/sdks/python/apache_beam/io/aws/clients/s3/boto3_client.py b/sdks/python/apache_beam/io/aws/clients/s3/boto3_client.py
new file mode 100644
index 0000000..9750c9b
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/clients/s3/boto3_client.py
@@ -0,0 +1,242 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+from apache_beam.io.aws.clients.s3 import messages
+
+try:
+ # pylint: disable=wrong-import-order, wrong-import-position
+ # pylint: disable=ungrouped-imports
+ import boto3
+
+except ImportError:
+ boto3 = None
+
+
+class Client(object):
+ """
+ Wrapper for boto3 library
+ """
+
+ def __init__(self):
+ assert boto3 is not None, 'Missing boto3 requirement'
+ self.client = boto3.client('s3')
+
+ def get_object_metadata(self, request):
+ r"""Retrieves an object's metadata.
+
+ Args:
+ request: (GetRequest) input message
+
+ Returns:
+ (Object) The response message.
+ """
+ kwargs = {'Bucket': request.bucket, 'Key': request.object}
+
+ try:
+ boto_response = self.client.head_object(**kwargs)
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
+
+ item = messages.Item(boto_response['ETag'],
+ request.object,
+ boto_response['LastModified'],
+ boto_response['ContentLength'],
+ boto_response['ContentType'])
+
+ return item
+
+ def get_range(self, request, start, end):
+ r"""Retrieves an object's contents.
+
+ Args:
+ request: (GetRequest) request
+ Returns:
+ (bytes) The response message.
+ """
+ try:
+ boto_response = self.client.get_object(Bucket=request.bucket,
+ Key=request.object,
+ Range='bytes={}-{}'.format(
+ start,
+ end - 1))
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
+
+ return boto_response['Body'].read() # A bytes object
+
+ def list(self, request):
+ r"""Retrieves a list of objects matching the criteria.
+
+ Args:
+ request: (ListRequest) input message
+ Returns:
+ (ListResponse) The response message.
+ """
+ kwargs = {'Bucket': request.bucket,
+ 'Prefix': request.prefix}
+
+ if request.continuation_token is not None:
+ kwargs['ContinuationToken'] = request.continuation_token
+
+ try:
+ boto_response = self.client.list_objects_v2(**kwargs)
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
+
+ if boto_response['KeyCount'] == 0:
+ message = 'Tried to list nonexistent S3 path: s3://%s/%s' % (
+ request.bucket, request.prefix)
+ raise messages.S3ClientError(message, 404)
+
+ items = [messages.Item(etag=content['ETag'],
+ key=content['Key'],
+ last_modified=content['LastModified'],
+ size=content['Size'])
+ for content in boto_response['Contents']]
+
+ try:
+ next_token = boto_response['NextContinuationToken']
+ except KeyError:
+ next_token = None
+
+ response = messages.ListResponse(items, next_token)
+ return response
+
+ def create_multipart_upload(self, request):
+ r"""Initates a multipart upload to S3 for a given object
+
+ Args:
+ request: (UploadRequest) input message
+ Returns:
+ (UploadResponse) The response message.
+ """
+ try:
+ boto_response = self.client.create_multipart_upload(
+ Bucket=request.bucket,
+ Key=request.object,
+ ContentType=request.mime_type
+ )
+ response = messages.UploadResponse(boto_response['UploadId'])
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
+ return response
+
+ def upload_part(self, request):
+ r"""Uploads part of a file to S3 during a multipart upload
+
+ Args:
+ request: (UploadPartRequest) input message
+ Returns:
+ (UploadPartResponse) The response message.
+ """
+ try:
+ boto_response = self.client.upload_part(Body=request.bytes,
+ Bucket=request.bucket,
+ Key=request.object,
+ PartNumber=request.part_number,
+ UploadId=request.upload_id)
+ response = messages.UploadPartResponse(boto_response['ETag'],
+ request.part_number)
+ return response
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
+
+ def complete_multipart_upload(self, request):
+ r"""Completes a multipart upload to S3
+
+ Args:
+ request: (UploadPartRequest) input message
+ Returns:
+ (Void) The response message.
+ """
+ parts = {'Parts': request.parts}
+ try:
+ self.client.complete_multipart_upload(Bucket=request.bucket,
+ Key=request.object,
+ UploadId=request.upload_id,
+ MultipartUpload=parts)
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
+
+ def delete(self, request):
+ r"""Deletes given object from bucket
+ Args:
+ request: (DeleteRequest) input message
+ Returns:
+ (void) Void, otherwise will raise if an error occurs
+ """
+ try:
+ self.client.delete_object(Bucket=request.bucket,
+ Key=request.object)
+
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
+
+ def delete_batch(self, request):
+
+ aws_request = {
+ 'Bucket': request.bucket,
+ 'Delete': {
+ 'Objects': [{'Key': object} for object in request.objects]
+ }
+ }
+
+ try:
+ aws_response = self.client.delete_objects(**aws_request)
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = int(e.response['ResponseMetadata']['HTTPStatusCode'])
+ raise messages.S3ClientError(message, code)
+
+ deleted = [obj['Key'] for obj in aws_response.get('Deleted', [])]
+
+ failed = [obj['Key'] for obj in aws_response.get('Errors', [])]
+
+ errors = [messages.S3ClientError(obj['Message'], obj['Code'])
+ for obj in aws_response.get('Errors', [])]
+
+ return messages.DeleteBatchResponse(deleted, failed, errors)
+
+ def copy(self, request):
+ try:
+ copy_src = {
+ 'Bucket': request.src_bucket,
+ 'Key': request.src_key
+ }
+ self.client.copy(copy_src, request.dest_bucket, request.dest_key)
+ except Exception as e:
+ message = e.response['Error']['Message']
+ code = e.response['ResponseMetadata']['HTTPStatusCode']
+ raise messages.S3ClientError(message, code)
diff --git a/sdks/python/apache_beam/io/aws/clients/s3/client_test.py b/sdks/python/apache_beam/io/aws/clients/s3/client_test.py
new file mode 100644
index 0000000..434d173
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/clients/s3/client_test.py
@@ -0,0 +1,256 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pytype: skip-file
+
+import logging
+import os
+import unittest
+
+from apache_beam.io.aws import s3io
+from apache_beam.io.aws.clients.s3 import fake_client
+from apache_beam.io.aws.clients.s3 import messages
+
+
+class ClientErrorTest(unittest.TestCase):
+
+ def setUp(self):
+
+ # These tests can be run locally against a mock S3 client, or as integration
+ # tests against the real S3 client.
+ self.USE_MOCK = True
+
+ # If you're running integration tests with S3, set this variable to be an
+ # s3 path that you have access to where test data can be written. If you're
+ # just running tests against the mock, this can be any s3 path. It should
+ # end with a '/'.
+ self.TEST_DATA_PATH = 's3://random-data-sets/beam_tests/'
+
+ self.test_bucket, self.test_path = s3io.parse_s3_path(self.TEST_DATA_PATH)
+
+ if self.USE_MOCK:
+ self.client = fake_client.FakeS3Client()
+ test_data_bucket, _ = s3io.parse_s3_path(self.TEST_DATA_PATH)
+ self.client.known_buckets.add(test_data_bucket)
+ self.aws = s3io.S3IO(self.client)
+ else:
+ self.aws = s3io.S3IO()
+
+
+ def test_get_object_metadata(self):
+
+ # Test nonexistent object
+ object = self.test_path + 'nonexistent_file_doesnt_exist'
+ request = messages.GetRequest(self.test_bucket, object)
+ self.assertRaises(messages.S3ClientError,
+ self.client.get_object_metadata,
+ request)
+
+ try:
+ self.client.get_object_metadata(request)
+ except Exception as e:
+ self.assertIsInstance(e, messages.S3ClientError)
+ self.assertEqual(e.code, 404)
+
+ def test_get_range_nonexistent(self):
+
+ # Test nonexistent object
+ object = self.test_path + 'nonexistent_file_doesnt_exist'
+ request = messages.GetRequest(self.test_bucket, object)
+ self.assertRaises(messages.S3ClientError,
+ self.client.get_range,
+ request, 0, 10)
+
+ try:
+ self.client.get_range(request, 0, 10)
+ except Exception as e:
+ self.assertIsInstance(e, messages.S3ClientError)
+ self.assertEqual(e.code, 404)
+
+ def test_get_range_bad_start_end(self):
+
+ file_name = self.TEST_DATA_PATH + 'get_range'
+ contents = os.urandom(1024)
+
+ with self.aws.open(file_name, 'w') as f:
+ f.write(contents)
+ bucket, object = s3io.parse_s3_path(file_name)
+
+ response = self.client.get_range(messages.GetRequest(bucket, object),
+ -10, 20)
+ self.assertEqual(response, contents)
+
+ response = self.client.get_range(messages.GetRequest(bucket, object),
+ 20, 10)
+ self.assertEqual(response, contents)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+
+ def test_upload_part_nonexistent_upload_id(self):
+
+ object = self.test_path + 'upload_part'
+ upload_id = 'not-an-id-12345'
+ part_number = 1
+ contents = os.urandom(1024)
+
+ request = messages.UploadPartRequest(self.test_bucket,
+ object,
+ upload_id,
+ part_number,
+ contents)
+
+ self.assertRaises(messages.S3ClientError,
+ self.client.upload_part,
+ request)
+
+ try:
+ self.client.upload_part(request)
+ except Exception as e:
+ self.assertIsInstance(e, messages.S3ClientError)
+ self.assertEqual(e.code, 404)
+
+
+ def test_copy_nonexistent(self):
+
+ src_key = self.test_path + 'not_a_real_file_does_not_exist'
+ dest_key = self.test_path + 'destination_file_location'
+
+ request = messages.CopyRequest(self.test_bucket,
+ src_key,
+ self.test_bucket,
+ dest_key)
+
+ with self.assertRaises(messages.S3ClientError) as e:
+ self.client.copy(request)
+
+ self.assertEqual(e.exception.code, 404)
+
+
+
+ def test_upload_part_bad_number(self):
+
+ object = self.test_path + 'upload_part'
+ contents = os.urandom(1024)
+
+ request = messages.UploadRequest(self.test_bucket, object, None)
+ response = self.client.create_multipart_upload(request)
+ upload_id = response.upload_id
+
+ part_number = 0.5
+ request = messages.UploadPartRequest(self.test_bucket,
+ object,
+ upload_id,
+ part_number,
+ contents)
+
+ self.assertRaises(messages.S3ClientError,
+ self.client.upload_part,
+ request)
+
+ try:
+ response = self.client.upload_part(request)
+ except Exception as e:
+ self.assertIsInstance(e, messages.S3ClientError)
+ self.assertEqual(e.code, 400)
+
+ def test_complete_multipart_upload_too_small(self):
+
+ object = self.test_path + 'upload_part'
+ request = messages.UploadRequest(self.test_bucket, object, None)
+ response = self.client.create_multipart_upload(request)
+ upload_id = response.upload_id
+
+ part_number = 1
+ contents_1 = os.urandom(1024)
+ request_1 = messages.UploadPartRequest(self.test_bucket,
+ object,
+ upload_id,
+ part_number,
+ contents_1)
+ response_1 = self.client.upload_part(request_1)
+
+
+ part_number = 2
+ contents_2 = os.urandom(1024)
+ request_2 = messages.UploadPartRequest(self.test_bucket,
+ object,
+ upload_id,
+ part_number,
+ contents_2)
+ response_2 = self.client.upload_part(request_2)
+
+ parts = [
+ {'PartNumber': 1, 'ETag': response_1.etag},
+ {'PartNumber': 2, 'ETag': response_2.etag}
+ ]
+ complete_request = messages.CompleteMultipartUploadRequest(self.test_bucket,
+ object,
+ upload_id,
+ parts)
+
+ try:
+ self.client.complete_multipart_upload(complete_request)
+ except Exception as e:
+ self.assertIsInstance(e, messages.S3ClientError)
+ self.assertEqual(e.code, 400)
+
+ def test_complete_multipart_upload_too_many(self):
+
+ object = self.test_path + 'upload_part'
+ request = messages.UploadRequest(self.test_bucket, object, None)
+ response = self.client.create_multipart_upload(request)
+ upload_id = response.upload_id
+
+ part_number = 1
+ contents_1 = os.urandom(5 * 1024)
+ request_1 = messages.UploadPartRequest(self.test_bucket,
+ object,
+ upload_id,
+ part_number,
+ contents_1)
+ response_1 = self.client.upload_part(request_1)
+
+
+ part_number = 2
+ contents_2 = os.urandom(1024)
+ request_2 = messages.UploadPartRequest(self.test_bucket,
+ object,
+ upload_id,
+ part_number,
+ contents_2)
+ response_2 = self.client.upload_part(request_2)
+
+ parts = [
+ {'PartNumber': 1, 'ETag': response_1.etag},
+ {'PartNumber': 2, 'ETag': response_2.etag},
+ {'PartNumber': 3, 'ETag': 'fake-etag'},
+ ]
+ complete_request = messages.CompleteMultipartUploadRequest(self.test_bucket,
+ object,
+ upload_id,
+ parts)
+
+ try:
+ self.client.complete_multipart_upload(complete_request)
+ except Exception as e:
+ self.assertIsInstance(e, messages.S3ClientError)
+ self.assertEqual(e.code, 400)
+
+if __name__ == '__main__':
+ logging.getLogger().setLevel(logging.INFO)
+ unittest.main()
diff --git a/sdks/python/apache_beam/io/aws/clients/s3/fake_client.py b/sdks/python/apache_beam/io/aws/clients/s3/fake_client.py
new file mode 100644
index 0000000..597c0e3
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/clients/s3/fake_client.py
@@ -0,0 +1,238 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import datetime
+import time
+
+from apache_beam.io.aws.clients.s3 import messages
+
+
+class FakeFile(object):
+
+ def __init__(self, bucket, key, contents, etag=None):
+ self.bucket = bucket
+ self.key = key
+ self.contents = contents
+
+ self.last_modified = time.time()
+
+ if not etag:
+ self.etag = '"%s-1"' % ('x' * 32)
+ else:
+ self.etag = etag
+
+ def get_metadata(self):
+ last_modified_datetime = None
+ if self.last_modified:
+ last_modified_datetime = datetime.datetime.utcfromtimestamp(
+ self.last_modified)
+
+ return messages.Item(self.etag,
+ self.key,
+ last_modified_datetime,
+ len(self.contents),
+ mime_type=None)
+
+
+class FakeS3Client(object):
+ def __init__(self):
+ self.files = {}
+ self.list_continuation_tokens = {}
+ self.multipart_uploads = {}
+
+ # boto3 has different behavior when running some operations against a bucket
+ # that exists vs. against one that doesn't. To emulate that behavior, the
+ # mock client keeps a set of bucket names that it knows "exist".
+ self.known_buckets = set()
+
+ def add_file(self, f):
+ self.files[(f.bucket, f.key)] = f
+ if f.bucket not in self.known_buckets:
+ self.known_buckets.add(f.bucket)
+
+ def get_file(self, bucket, obj):
+ try:
+ return self.files[bucket, obj]
+ except:
+ raise messages.S3ClientError('Not Found', 404)
+
+ def delete_file(self, bucket, obj):
+ del self.files[(bucket, obj)]
+
+ def get_object_metadata(self, request):
+ r"""Retrieves an object's metadata.
+
+ Args:
+ request: (GetRequest) input message
+
+ Returns:
+ (Item) The response message.
+ """
+ # TODO: Do we want to mock out a lack of credentials?
+ file_ = self.get_file(request.bucket, request.object)
+ return file_.get_metadata()
+
+ def list(self, request):
+ bucket = request.bucket
+ prefix = request.prefix or ''
+ matching_files = []
+
+ for file_bucket, file_name in sorted(iter(self.files)):
+ if bucket == file_bucket and file_name.startswith(prefix):
+ file_object = self.get_file(file_bucket, file_name).get_metadata()
+ matching_files.append(file_object)
+
+ if not matching_files:
+ message = 'Tried to list nonexistent S3 path: s3://%s/%s' % (
+ bucket, prefix)
+ raise messages.S3ClientError(message, 404)
+
+ # Handle pagination.
+ items_per_page = 5
+ if not request.continuation_token:
+ range_start = 0
+ else:
+ if request.continuation_token not in self.list_continuation_tokens:
+ raise ValueError('Invalid page token.')
+ range_start = self.list_continuation_tokens[request.continuation_token]
+ del self.list_continuation_tokens[request.continuation_token]
+
+ result = messages.ListResponse(
+ items=matching_files[range_start:range_start + items_per_page])
+
+ if range_start + items_per_page < len(matching_files):
+ next_range_start = range_start + items_per_page
+ next_continuation_token = '_page_token_%s_%s_%d' % (bucket, prefix,
+ next_range_start)
+ self.list_continuation_tokens[next_continuation_token] = next_range_start
+ result.next_token = next_continuation_token
+
+ return result
+
+ def get_range(self, request, start, end):
+ r"""Retrieves an object.
+
+ Args:
+ request: (GetRequest) request
+ Returns:
+ (bytes) The response message.
+ """
+
+ file_ = self.get_file(request.bucket, request.object)
+
+ # Replicates S3's behavior, per the spec here:
+ # https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
+ if start < 0 or end <= start:
+ return file_.contents
+
+ return file_.contents[start:end]
+
+ def delete(self, request):
+ if request.bucket not in self.known_buckets:
+ raise messages.S3ClientError('The specified bucket does not exist', 404)
+
+ if (request.bucket, request.object) in self.files:
+ self.delete_file(request.bucket, request.object)
+ else:
+ # S3 doesn't raise an error if you try to delete a nonexistent file from
+ # an extant bucket
+ return
+
+ def delete_batch(self, request):
+
+ deleted, failed, errors = [], [], []
+ for object in request.objects:
+ try:
+ delete_request = messages.DeleteRequest(request.bucket, object)
+ self.delete(delete_request)
+ deleted.append(object)
+ except messages.S3ClientError as e:
+ failed.append(object)
+ errors.append(e)
+
+ return messages.DeleteBatchResponse(deleted, failed, errors)
+
+ def copy(self, request):
+
+ src_file = self.get_file(request.src_bucket, request.src_key)
+ dest_file = FakeFile(request.dest_bucket,
+ request.dest_key,
+ src_file.contents)
+ self.add_file(dest_file)
+
+ def create_multipart_upload(self, request):
+ # Create hash of bucket and key
+ # Store upload_id internally
+ upload_id = request.bucket + request.object
+ self.multipart_uploads[upload_id] = {}
+ return messages.UploadResponse(upload_id)
+
+ def upload_part(self, request):
+ # Save off bytes passed to internal data store
+ upload_id, part_number = request.upload_id, request.part_number
+
+ if part_number < 0 or not isinstance(part_number, int):
+ raise messages.S3ClientError('Param validation failed on part number',
+ 400)
+
+ if upload_id not in self.multipart_uploads:
+ raise messages.S3ClientError('The specified upload does not exist', 404)
+
+ self.multipart_uploads[upload_id][part_number] = request.bytes
+
+ etag = '"%s"' % ('x' * 32)
+ return messages.UploadPartResponse(etag, part_number)
+
+ def complete_multipart_upload(self, request):
+ MIN_PART_SIZE = 5 * 2**10 # 5 KiB
+
+ parts_received = self.multipart_uploads[request.upload_id]
+
+ # Check that we got all the parts that they intended to send
+ part_numbers_to_confirm = set(part['PartNumber'] for part in request.parts)
+
+ # Make sure all the expected parts are present
+ if part_numbers_to_confirm != set(parts_received.keys()):
+ raise messages.S3ClientError(
+ 'One or more of the specified parts could not be found', 400)
+
+ # Sort by part number
+ sorted_parts = sorted(parts_received.items(), key=lambda pair: pair[0])
+ sorted_bytes = [bytes_ for (_, bytes_) in sorted_parts]
+
+ # Make sure that the parts aren't too small (except the last part)
+ part_sizes = [len(bytes_) for bytes_ in sorted_bytes]
+ if any(size < MIN_PART_SIZE for size in part_sizes[:-1]):
+ e_message = """
+ All parts but the last must be larger than %d bytes
+ """ % MIN_PART_SIZE
+ raise messages.S3ClientError(e_message, 400)
+
+ # String together all bytes for the given upload
+ final_contents = b''.join(sorted_bytes)
+
+ # Create FakeFile object
+ num_parts = len(parts_received)
+ etag = '"%s-%d"' % ('x' * 32, num_parts)
+ file_ = FakeFile(request.bucket, request.object, final_contents, etag=etag)
+
+ # Store FakeFile in self.files
+ self.add_file(file_)
diff --git a/sdks/python/apache_beam/io/aws/clients/s3/messages.py b/sdks/python/apache_beam/io/aws/clients/s3/messages.py
new file mode 100644
index 0000000..40e548d
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/clients/s3/messages.py
@@ -0,0 +1,169 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+
+class GetRequest():
+ """
+ S3 request object for `Get` command
+ """
+
+ def __init__(self, bucket, object):
+ self.bucket = bucket
+ self.object = object
+
+
+class UploadResponse():
+ """
+ S3 response object for `StartUpload` command
+ """
+
+ def __init__(self, upload_id):
+ self.upload_id = upload_id
+
+
+class UploadRequest():
+ """
+ S3 request object for `StartUpload` command
+ """
+
+ def __init__(self, bucket, object, mime_type):
+ self.bucket = bucket
+ self.object = object
+ self.mime_type = mime_type
+
+
+class UploadPartRequest():
+ """
+ S3 request object for `UploadPart` command
+ """
+
+ def __init__(self, bucket, object, upload_id, part_number, bytes):
+ self.bucket = bucket
+ self.object = object
+ self.upload_id = upload_id
+ self.part_number = part_number
+ self.bytes = bytes
+ # self.mime_type = mime_type
+
+
+class UploadPartResponse():
+ """
+ S3 response object for `UploadPart` command
+ """
+
+ def __init__(self, etag, part_number):
+ self.etag = etag
+ self.part_number = part_number
+
+
+class CompleteMultipartUploadRequest():
+ """
+ S3 request object for `UploadPart` command
+ """
+
+ def __init__(self, bucket, object, upload_id, parts):
+ # parts is a list of objects of the form
+ # {'ETag': response.etag, 'PartNumber': response.part_number}
+ self.bucket = bucket
+ self.object = object
+ self.upload_id = upload_id
+ self.parts = parts
+ # self.mime_type = mime_type
+
+
+class ListRequest():
+ """
+ S3 request object for `List` command
+ """
+
+ def __init__(self, bucket, prefix, continuation_token=None):
+ self.bucket = bucket
+ self.prefix = prefix
+ self.continuation_token = continuation_token
+
+
+class ListResponse():
+ """
+ S3 response object for `List` command
+ """
+
+ def __init__(self, items, next_token=None):
+ self.items = items
+ self.next_token = next_token
+
+
+class Item():
+ """
+ An item in S3
+ """
+
+ def __init__(self, etag, key, last_modified, size, mime_type=None):
+ self.etag = etag
+ self.key = key
+ self.last_modified = last_modified
+ self.size = size
+ self.mime_type = mime_type
+
+
+class DeleteRequest():
+ """
+ S3 request object for `Delete` command
+ """
+
+ def __init__(self, bucket, object):
+ self.bucket = bucket
+ self.object = object
+
+
+class DeleteBatchRequest():
+
+ def __init__(self, bucket, objects):
+ # `objects` is a list of strings corresponding to the keys to be deleted
+ # in the bucket
+ self.bucket = bucket
+ self.objects = objects
+
+
+class DeleteBatchResponse():
+
+ def __init__(self, deleted, failed, errors):
+ # `deleted` is a list of strings corresponding to the keys that were deleted
+ # `failed` is a list of strings corresponding to the keys that caused errors
+ # `errors` is a list of S3ClientErrors, aligned with the order of `failed`
+ self.deleted = deleted
+ self.failed = failed
+ self.errors = errors
+
+
+class CopyRequest():
+
+ def __init__(self, src_bucket, src_key, dest_bucket, dest_key):
+ self.src_bucket = src_bucket
+ self.src_key = src_key
+ self.dest_bucket = dest_bucket
+ self.dest_key = dest_key
+
+
+class S3ClientError(Exception):
+
+ def __init__(self, message = None, code = None):
+ self.message = message
+ self.code = code
diff --git a/sdks/python/apache_beam/io/aws/s3filesystem.py b/sdks/python/apache_beam/io/aws/s3filesystem.py
new file mode 100644
index 0000000..684345a
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/s3filesystem.py
@@ -0,0 +1,277 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""S3 file system implementation for accessing files on AWS S3."""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+from future.utils import iteritems
+
+from apache_beam.io.aws import s3io
+from apache_beam.io.filesystem import BeamIOError
+from apache_beam.io.filesystem import CompressedFile
+from apache_beam.io.filesystem import CompressionTypes
+from apache_beam.io.filesystem import FileMetadata
+from apache_beam.io.filesystem import FileSystem
+
+__all__ = ['S3FileSystem']
+
+
+class S3FileSystem(FileSystem):
+ """An S3 `FileSystem` implementation for accessing files on AWS S3
+ """
+
+ CHUNK_SIZE = s3io.MAX_BATCH_OPERATION_SIZE
+ S3_PREFIX = 's3://'
+
+ @classmethod
+ def scheme(cls):
+ """URI scheme for the FileSystem
+ """
+ return 's3'
+
+ def join(self, basepath, *paths):
+ """Join two or more pathname components for the filesystem
+
+ Args:
+ basepath: string path of the first component of the path
+ paths: path components to be added
+
+ Returns: full path after combining all of the return nulled components
+ """
+ if not basepath.startswith(S3FileSystem.S3_PREFIX):
+ raise ValueError('Basepath %r must be S3 path.' % basepath)
+
+ path = basepath
+ for p in paths:
+ path = path.rstrip('/') + '/' + p.lstrip('/')
+ return path
+
+ def split(self, path):
+ """Splits the given path into two parts.
+
+ Splits the path into a pair (head, tail) such that tail contains the last
+ component of the path and head contains everything up to that.
+
+ Head will include the S3 prefix ('s3://').
+
+ Args:
+ path: path as a string
+ Returns:
+ a pair of path components as strings.
+ """
+ path = path.strip()
+ if not path.startswith(S3FileSystem.S3_PREFIX):
+ raise ValueError('Path %r must be S3 path.' % path)
+
+ prefix_len = len(S3FileSystem.S3_PREFIX)
+ last_sep = path[prefix_len:].rfind('/')
+ if last_sep >= 0:
+ last_sep += prefix_len
+
+ if last_sep > 0:
+ return (path[:last_sep], path[last_sep + 1:])
+ elif last_sep < 0:
+ return (path, '')
+ else:
+ raise ValueError('Invalid path: %s' % path)
+
+ def mkdirs(self, path):
+ """Recursively create directories for the provided path.
+
+ Args:
+ path: string path of the directory structure that should be created
+
+ Raises:
+ IOError if leaf directory already exists.
+ """
+ pass
+
+ def has_dirs(self):
+ """Whether this FileSystem supports directories."""
+ return False
+
+ def _list(self, dir_or_prefix):
+ """List files in a location.
+
+ Listing is non-recursive, for filesystems that support directories.
+
+ Args:
+ dir_or_prefix: (string) A directory or location prefix (for filesystems
+ that don't have directories).
+
+ Returns:
+ Generator of ``FileMetadata`` objects.
+
+ Raises:
+ ``BeamIOError`` if listing fails, but not if no files were found.
+ """
+ try:
+ for path, size in iteritems(s3io.S3IO().list_prefix(dir_or_prefix)):
+ yield FileMetadata(path, size)
+ except Exception as e: # pylint: disable=broad-except
+ raise BeamIOError("List operation failed", {dir_or_prefix: e})
+
+ def _path_open(self, path, mode, mime_type='application/octet-stream',
+ compression_type=CompressionTypes.AUTO):
+ """Helper functions to open a file in the provided mode.
+ """
+ compression_type = FileSystem._get_compression_type(path, compression_type)
+ mime_type = CompressionTypes.mime_type(compression_type, mime_type)
+ raw_file = s3io.S3IO().open(path, mode, mime_type=mime_type)
+ if compression_type == CompressionTypes.UNCOMPRESSED:
+ return raw_file
+ return CompressedFile(raw_file, compression_type=compression_type)
+
+ def create(self, path, mime_type='application/octet-stream',
+ compression_type=CompressionTypes.AUTO):
+ """Returns a write channel for the given file path.
+
+ Args:
+ path: string path of the file object to be written to the system
+ mime_type: MIME type to specify the type of content in the file object
+ compression_type: Type of compression to be used for this object
+
+ Returns: file handle with a close function for the user to use
+ """
+ return self._path_open(path, 'wb', mime_type, compression_type)
+
+ def open(self, path, mime_type='application/octet-stream',
+ compression_type=CompressionTypes.AUTO):
+ """Returns a read channel for the given file path.
+
+ Args:
+ path: string path of the file object to be written to the system
+ mime_type: MIME type to specify the type of content in the file object
+ compression_type: Type of compression to be used for this object
+
+ Returns: file handle with a close function for the user to use
+ """
+ return self._path_open(path, 'rb', mime_type, compression_type)
+
+ def copy(self, source_file_names, destination_file_names):
+ """Recursively copy the file tree from the source to the destination
+
+ Args:
+ source_file_names: list of source file objects that needs to be copied
+ destination_file_names: list of destination of the new object
+
+ Raises:
+ ``BeamIOError`` if any of the copy operations fail
+ """
+ if not len(source_file_names) == len(destination_file_names):
+ message = 'Unable to copy unequal number of sources and destinations'
+ raise BeamIOError(message)
+ src_dest_pairs = list(zip(source_file_names, destination_file_names))
+ return s3io.S3IO().copy_paths(src_dest_pairs)
+
+ def rename(self, source_file_names, destination_file_names):
+ """Rename the files at the source list to the destination list.
+ Source and destination lists should be of the same size.
+
+ Args:
+ source_file_names: List of file paths that need to be moved
+ destination_file_names: List of destination_file_names for the files
+
+ Raises:
+ ``BeamIOError`` if any of the rename operations fail
+ """
+ if not len(source_file_names) == len(destination_file_names):
+ message = 'Unable to rename unequal number of sources and destinations'
+ raise BeamIOError(message)
+ src_dest_pairs = list(zip(source_file_names, destination_file_names))
+ results = s3io.S3IO().rename_files(src_dest_pairs)
+ exceptions = {(src, dest): error for (src, dest, error) in results
+ if error is not None}
+ if exceptions:
+ raise BeamIOError("Rename operation failed", exceptions)
+
+ def exists(self, path):
+ """Check if the provided path exists on the FileSystem.
+
+ Args:
+ path: string path that needs to be checked.
+
+ Returns: boolean flag indicating if path exists
+ """
+ try:
+ return s3io.S3IO().exists(path)
+ except Exception as e: # pylint: disable=broad-except
+ raise BeamIOError("exists() operation failed", {path: e})
+
+ def size(self, path):
+ """Get size of path on the FileSystem.
+
+ Args:
+ path: string path in question.
+
+ Returns: int size of path according to the FileSystem.
+
+ Raises:
+ ``BeamIOError`` if path doesn't exist.
+ """
+ try:
+ return s3io.S3IO().size(path)
+ except Exception as e: # pylint: disable=broad-except
+ raise BeamIOError("size() operation failed", {path: e})
+
+ def last_updated(self, path):
+ """Get UNIX Epoch time in seconds on the FileSystem.
+
+ Args:
+ path: string path of file.
+
+ Returns: float UNIX Epoch time
+
+ Raises:
+ ``BeamIOError`` if path doesn't exist.
+ """
+ try:
+ return s3io.S3IO().last_updated(path)
+ except Exception as e: # pylint: disable=broad-except
+ raise BeamIOError("last_updated operation failed", {path: e})
+
+ def checksum(self, path):
+ """Fetch checksum metadata of a file on the
+ :class:`~apache_beam.io.filesystem.FileSystem`.
+
+ Args:
+ path: string path of a file.
+
+ Returns: string containing checksum
+
+ Raises:
+ ``BeamIOError`` if path isn't a file or doesn't exist.
+ """
+ try:
+ return s3io.S3IO().checksum(path)
+ except Exception as e: # pylint: disable=broad-except
+ raise BeamIOError("Checksum operation failed", {path: e})
+
+ def delete(self, paths):
+ """Deletes files or directories at the provided paths.
+ Directories will be deleted recursively.
+
+ Args:
+ paths: list of paths that give the file objects to be deleted
+ """
+ results = s3io.S3IO().delete_paths(paths)
+ exceptions = {path: error for (path, error) in results
+ if error is not None}
+ if exceptions:
+ raise BeamIOError("Delete operation failed", exceptions)
diff --git a/sdks/python/apache_beam/io/aws/s3filesystem_test.py b/sdks/python/apache_beam/io/aws/s3filesystem_test.py
new file mode 100644
index 0000000..2ffac3b
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/s3filesystem_test.py
@@ -0,0 +1,269 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Unit tests for the S3 File System"""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import logging
+import unittest
+
+import mock
+
+from apache_beam.io.aws.clients.s3 import messages
+from apache_beam.io.filesystem import BeamIOError
+from apache_beam.io.filesystem import FileMetadata
+from apache_beam.options.pipeline_options import PipelineOptions
+
+# Protect against environments where boto3 library is not available.
+# pylint: disable=wrong-import-order, wrong-import-position
+try:
+ from apache_beam.io.aws import s3filesystem
+except ImportError:
+ s3filesystem = None
+# pylint: enable=wrong-import-order, wrong-import-position
+
+
+@unittest.skipIf(s3filesystem is None, 'AWS dependencies are not installed')
+class S3FileSystemTest(unittest.TestCase):
+
+ def setUp(self):
+ pipeline_options = PipelineOptions()
+ self.fs = s3filesystem.S3FileSystem(pipeline_options=pipeline_options)
+
+ def test_scheme(self):
+ self.assertEqual(self.fs.scheme(), 's3')
+ self.assertEqual(s3filesystem.S3FileSystem.scheme(), 's3')
+
+ def test_join(self):
+ self.assertEqual('s3://bucket/path/to/file',
+ self.fs.join('s3://bucket/path', 'to', 'file'))
+ self.assertEqual('s3://bucket/path/to/file',
+ self.fs.join('s3://bucket/path', 'to/file'))
+ self.assertEqual('s3://bucket/path/to/file',
+ self.fs.join('s3://bucket/path', '/to/file'))
+ self.assertEqual('s3://bucket/path/to/file',
+ self.fs.join('s3://bucket/path/', 'to', 'file'))
+ self.assertEqual('s3://bucket/path/to/file',
+ self.fs.join('s3://bucket/path/', 'to/file'))
+ self.assertEqual('s3://bucket/path/to/file',
+ self.fs.join('s3://bucket/path/', '/to/file'))
+ with self.assertRaises(ValueError):
+ self.fs.join('/bucket/path/', '/to/file')
+
+ def test_split(self):
+ self.assertEqual(('s3://foo/bar', 'baz'),
+ self.fs.split('s3://foo/bar/baz'))
+ self.assertEqual(('s3://foo', ''),
+ self.fs.split('s3://foo/'))
+ self.assertEqual(('s3://foo', ''),
+ self.fs.split('s3://foo'))
+
+ with self.assertRaises(ValueError):
+ self.fs.split('/no/s3/prefix')
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_match_multiples(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+ s3io_mock.list_prefix.return_value = {
+ 's3://bucket/file1': 1,
+ 's3://bucket/file2': 2
+ }
+ expected_results = set([
+ FileMetadata('s3://bucket/file1', 1),
+ FileMetadata('s3://bucket/file2', 2)
+ ])
+ match_result = self.fs.match(['s3://bucket/'])[0]
+
+ self.assertEqual(
+ set(match_result.metadata_list),
+ expected_results)
+ s3io_mock.list_prefix.assert_called_once_with('s3://bucket/')
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_match_multiples_limit(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ limit = 1
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+ s3io_mock.list_prefix.return_value = {
+ 's3://bucket/file1': 1
+ }
+ expected_results = set([
+ FileMetadata('s3://bucket/file1', 1)
+ ])
+ match_result = self.fs.match(['s3://bucket/'], [limit])[0]
+ self.assertEqual(
+ set(match_result.metadata_list),
+ expected_results)
+ self.assertEqual(
+ len(match_result.metadata_list),
+ limit)
+ s3io_mock.list_prefix.assert_called_once_with('s3://bucket/')
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_match_multiples_error(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+ exception = IOError('Failed')
+ s3io_mock.list_prefix.side_effect = exception
+
+ with self.assertRaises(BeamIOError) as error:
+ self.fs.match(['s3://bucket/'])
+
+ self.assertIn('Match operation failed', str(error.exception))
+ s3io_mock.list_prefix.assert_called_once_with('s3://bucket/')
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_match_multiple_patterns(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+ s3io_mock.list_prefix.side_effect = [
+ {'s3://bucket/file1': 1},
+ {'s3://bucket/file2': 2},
+ ]
+ expected_results = [
+ [FileMetadata('s3://bucket/file1', 1)],
+ [FileMetadata('s3://bucket/file2', 2)]
+ ]
+ result = self.fs.match(['s3://bucket/file1*', 's3://bucket/file2*'])
+ self.assertEqual(
+ [mr.metadata_list for mr in result],
+ expected_results)
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_create(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+ # Issue file copy
+ _ = self.fs.create('s3://bucket/from1', 'application/octet-stream')
+
+ s3io_mock.open.assert_called_once_with(
+ 's3://bucket/from1', 'wb', mime_type='application/octet-stream')
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_open(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+ # Issue file copy
+ _ = self.fs.open('s3://bucket/from1', 'application/octet-stream')
+
+ s3io_mock.open.assert_called_once_with(
+ 's3://bucket/from1', 'rb', mime_type='application/octet-stream')
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_copy_file(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+
+ sources = ['s3://bucket/from1', 's3://bucket/from2']
+ destinations = ['s3://bucket/to1', 's3://bucket/to2']
+
+ # Issue file copy
+ self.fs.copy(sources, destinations)
+
+ src_dest_pairs = list(zip(sources, destinations))
+ s3io_mock.copy_paths.assert_called_once_with(src_dest_pairs)
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_copy_file_error(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+
+ sources = ['s3://bucket/from1', 's3://bucket/from2', 's3://bucket/from3']
+ destinations = ['s3://bucket/to1', 's3://bucket/to2']
+
+ # Issue file copy
+ with self.assertRaises(BeamIOError):
+ self.fs.copy(sources, destinations)
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_delete(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+ s3io_mock.size.return_value = 0
+ files = [
+ 's3://bucket/from1',
+ 's3://bucket/from2',
+ 's3://bucket/from3',
+ ]
+
+ # Issue batch delete.
+ self.fs.delete(files)
+ s3io_mock.delete_paths.assert_called_once_with(files)
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_delete_error(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+
+ problematic_directory = 's3://nonexistent-bucket/tree/'
+ exception = messages.S3ClientError('Not found', 404)
+
+ s3io_mock.delete_paths.return_value = [
+ (problematic_directory, exception),
+ ('s3://bucket/object1', None),
+ ('s3://bucket/object2', None)
+ ]
+ s3io_mock.size.return_value = 0
+ files = [
+ problematic_directory,
+ 's3://bucket/object1',
+ 's3://bucket/object2',
+ ]
+ expected_results = {problematic_directory: exception}
+
+ # Issue batch delete.
+ with self.assertRaises(BeamIOError) as error:
+ self.fs.delete(files)
+ self.assertIn('Delete operation failed', str(error.exception))
+ self.assertEqual(error.exception.exception_details, expected_results)
+ s3io_mock.delete_paths.assert_called()
+
+ @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
+ def test_rename(self, unused_mock_arg):
+ # Prepare mocks.
+ s3io_mock = mock.MagicMock()
+ s3filesystem.s3io.S3IO = lambda: s3io_mock
+
+ sources = ['s3://bucket/from1', 's3://bucket/from2']
+ destinations = ['s3://bucket/to1', 's3://bucket/to2']
+
+ # Issue file copy
+ self.fs.rename(sources, destinations)
+
+ src_dest_pairs = list(zip(sources, destinations))
+ s3io_mock.rename_files.assert_called_once_with(src_dest_pairs)
+
+
+
+if __name__ == '__main__':
+ logging.getLogger().setLevel(logging.INFO)
+ unittest.main()
diff --git a/sdks/python/apache_beam/io/aws/s3io.py b/sdks/python/apache_beam/io/aws/s3io.py
new file mode 100644
index 0000000..53f057a
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/s3io.py
@@ -0,0 +1,608 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""AWS S3 client
+"""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import errno
+import io
+import logging
+import re
+import time
+import traceback
+from builtins import object
+
+from apache_beam.io.aws.clients.s3 import messages
+from apache_beam.io.filesystemio import Downloader
+from apache_beam.io.filesystemio import DownloaderStream
+from apache_beam.io.filesystemio import Uploader
+from apache_beam.io.filesystemio import UploaderStream
+from apache_beam.utils import retry
+
+try:
+ # pylint: disable=wrong-import-order, wrong-import-position
+ # pylint: disable=ungrouped-imports
+ from apache_beam.io.aws.clients.s3 import boto3_client
+ BOTO3_INSTALLED = True
+except ImportError:
+ BOTO3_INSTALLED = False
+
+MAX_BATCH_OPERATION_SIZE = 100
+
+
+def parse_s3_path(s3_path, object_optional=False):
+ """Return the bucket and object names of the given s3:// path."""
+ match = re.match('^s3://([^/]+)/(.*)$', s3_path)
+ if match is None or (match.group(2) == '' and not object_optional):
+ raise ValueError('S3 path must be in the form s3://<bucket>/<object>.')
+ return match.group(1), match.group(2)
+
+
+class S3IO(object):
+ """S3 I/O client."""
+
+ def __init__(self, client=None):
+ if client is not None:
+ self.client = client
+ elif BOTO3_INSTALLED:
+ self.client = boto3_client.Client()
+ else:
+ message = 'AWS dependencies are not installed, and no alternative ' \
+ 'client was provided to S3IO.'
+ raise RuntimeError(message)
+
+ def open(self,
+ filename,
+ mode='r',
+ read_buffer_size=16*1024*1024,
+ mime_type='application/octet-stream'):
+ """Open an S3 file path for reading or writing.
+
+ Args:
+ filename (str): S3 file path in the form ``s3://<bucket>/<object>``.
+ mode (str): ``'r'`` for reading or ``'w'`` for writing.
+ read_buffer_size (int): Buffer size to use during read operations.
+ mime_type (str): Mime type to set for write operations.
+
+ Returns:
+ S3 file object.
+
+ Raises:
+ ~exceptions.ValueError: Invalid open file mode.
+ """
+ if mode == 'r' or mode == 'rb':
+ downloader = S3Downloader(self.client, filename,
+ buffer_size=read_buffer_size)
+ return io.BufferedReader(DownloaderStream(downloader, mode=mode),
+ buffer_size=read_buffer_size)
+ elif mode == 'w' or mode == 'wb':
+ uploader = S3Uploader(self.client, filename, mime_type)
+ return io.BufferedWriter(UploaderStream(uploader, mode=mode),
+ buffer_size=128 * 1024)
+ else:
+ raise ValueError('Invalid file open mode: %s.' % mode)
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def list_prefix(self, path):
+ """Lists files matching the prefix.
+
+ Args:
+ path: S3 file path pattern in the form s3://<bucket>/[name].
+
+ Returns:
+ Dictionary of file name -> size.
+ """
+ bucket, prefix = parse_s3_path(path, object_optional=True)
+ request = messages.ListRequest(bucket=bucket, prefix=prefix)
+
+ file_sizes = {}
+ counter = 0
+ start_time = time.time()
+
+ logging.info("Starting the size estimation of the input")
+
+ while True:
+ response = self.client.list(request)
+ for item in response.items:
+ file_name = 's3://%s/%s' % (bucket, item.key)
+ file_sizes[file_name] = item.size
+ counter += 1
+ if counter % 10000 == 0:
+ logging.info("Finished computing size of: %s files", len(file_sizes))
+ if response.next_token:
+ request.continuation_token = response.next_token
+ else:
+ break
+
+ logging.info("Finished listing %s files in %s seconds.",
+ counter, time.time() - start_time)
+
+ return file_sizes
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def checksum(self, path):
+ """Looks up the checksum of an S3 object.
+
+ Args:
+ path: S3 file path pattern in the form s3://<bucket>/<name>.
+ """
+ bucket, object_path = parse_s3_path(path)
+ request = messages.GetRequest(bucket, object_path)
+ item = self.client.get_object_metadata(request)
+ return item.etag
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def copy(self, src, dest):
+ """Copies a single S3 file object from src to dest.
+
+ Args:
+ src: S3 file path pattern in the form s3://<bucket>/<name>.
+ dest: S3 file path pattern in the form s3://<bucket>/<name>.
+
+ Raises:
+ TimeoutError on timeout.
+ """
+ src_bucket, src_key = parse_s3_path(src)
+ dest_bucket, dest_key = parse_s3_path(dest)
+ request = messages.CopyRequest(src_bucket, src_key, dest_bucket, dest_key)
+ self.client.copy(request)
+
+ # We intentionally do not decorate this method with a retry, since the
+ # underlying copy and delete operations are already idempotent operations
+ # protected by retry decorators.
+ def copy_paths(self, src_dest_pairs):
+ """Copies the given S3 objects from src to dest. This can handle directory
+ or file paths.
+
+ Args:
+ src_dest_pairs: list of (src, dest) tuples of s3://<bucket>/<name> file
+ paths to copy from src to dest
+ Returns: List of tuples of (src, dest, exception) in the same order as the
+ src_dest_pairs argument, where exception is None if the operation
+ succeeded or the relevant exception if the operation failed.
+ """
+ if not src_dest_pairs: return []
+
+ results = []
+
+ for src_path, dest_path in src_dest_pairs:
+
+ # Copy a directory with self.copy_tree
+ if src_path.endswith('/') and dest_path.endswith('/'):
+ try:
+ results += self.copy_tree(src_path, dest_path)
+ except messages.S3ClientError as err:
+ results.append((src_path, dest_path, err))
+
+ # Copy individual files with self.copy
+ elif not src_path.endswith('/') and not dest_path.endswith('/'):
+ src_bucket, src_key = parse_s3_path(src_path)
+ dest_bucket, dest_key = parse_s3_path(dest_path)
+ request = messages.CopyRequest(src_bucket,
+ src_key,
+ dest_bucket,
+ dest_key)
+
+ try:
+ self.client.copy(request)
+ results.append((src_path, dest_path, None))
+ except messages.S3ClientError as e:
+ results.append((src_path, dest_path, e))
+
+ # Mismatched paths (one directory, one non-directory) get an error result
+ else:
+ err = messages.S3ClientError(
+ "Can't copy mismatched paths (one directory, one non-directory):" +
+ ' %s, %s' % (src_path, dest_path),
+ 400)
+ results.append((src_path, dest_path, err))
+
+ return results
+
+ # We intentionally do not decorate this method with a retry, since the
+ # underlying copy and delete operations are already idempotent operations
+ # protected by retry decorators.
+ def copy_tree(self, src, dest):
+ """Renames the given S3 directory and it's contents recursively
+ from src to dest.
+
+ Args:
+ src: S3 file path pattern in the form s3://<bucket>/<name>/.
+ dest: S3 file path pattern in the form s3://<bucket>/<name>/.
+
+ Returns:
+ List of tuples of (src, dest, exception) where exception is None if the
+ operation succeeded or the relevant exception if the operation failed.
+ """
+ assert src.endswith('/')
+ assert dest.endswith('/')
+
+ results = []
+ for entry in self.list_prefix(src):
+ rel_path = entry[len(src):]
+ try:
+ self.copy(entry, dest + rel_path)
+ results.append((entry, dest + rel_path, None))
+ except messages.S3ClientError as e:
+ results.append((entry, dest + rel_path, e))
+
+ return results
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def delete(self, path):
+ """Deletes a single S3 file object from src to dest.
+
+ Args:
+ src: S3 file path pattern in the form s3://<bucket>/<name>/.
+ dest: S3 file path pattern in the form s3://<bucket>/<name>/.
+
+ Returns:
+ List of tuples of (src, dest, exception) in the same order as the
+ src_dest_pairs argument, where exception is None if the operation
+ succeeded or the relevant exception if the operation failed.
+ """
+ bucket, object_path = parse_s3_path(path)
+ request = messages.DeleteRequest(bucket, object_path)
+
+ try:
+ self.client.delete(request)
+ except messages.S3ClientError as e:
+ if e.code == 404:
+ return # Same behavior as GCS - don't surface a 404 error
+ else:
+ logging.error('HTTP error while deleting file %s: %s', path,
+ 3)
+ raise e
+
+ # We intentionally do not decorate this method with a retry, since the
+ # underlying copy and delete operations are already idempotent operations
+ # protected by retry decorators.
+ def delete_paths(self, paths):
+ """Deletes the given S3 objects from src to dest. This can handle directory
+ or file paths.
+
+ Args:
+ src: S3 file path pattern in the form s3://<bucket>/<name>/.
+ dest: S3 file path pattern in the form s3://<bucket>/<name>/.
+
+ Returns:
+ List of tuples of (src, dest, exception) in the same order as the
+ src_dest_pairs argument, where exception is None if the operation
+ succeeded or the relevant exception if the operation failed.
+ """
+ directories, not_directories = [], []
+ for path in paths:
+ if path.endswith('/'): directories.append(path)
+ else: not_directories.append(path)
+
+ results = {}
+
+ for directory in directories:
+ dir_result = dict(self.delete_tree(directory))
+ results.update(dir_result)
+
+ not_directory_results = dict(self.delete_files(not_directories))
+ results.update(not_directory_results)
+
+ return results
+
+ # We intentionally do not decorate this method with a retry, since the
+ # underlying copy and delete operations are already idempotent operations
+ # protected by retry decorators.
+ def delete_files(self, paths, max_batch_size=1000):
+ """Deletes the given S3 file object from src to dest.
+
+ Args:
+ paths: List of S3 file paths in the form s3://<bucket>/<name>
+ max_batch_size: Largest number of keys to send to the client to be deleted
+ simultaneously
+
+ Returns: List of tuples of (path, exception) in the same order as the paths
+ argument, where exception is None if the operation succeeded or
+ the relevant exception if the operation failed.
+ """
+ if not paths: return []
+
+ # Sort paths into bucket: [keys]
+ buckets, keys = zip(*[parse_s3_path(path) for path in paths])
+ grouped_keys = {bucket: [] for bucket in buckets}
+ for bucket, key in zip(buckets, keys): grouped_keys[bucket].append(key)
+
+ # For each bucket, delete minibatches of keys
+ results = {}
+ for bucket, keys in grouped_keys.items():
+ for i in range(0, len(keys), max_batch_size):
+ minibatch_keys = keys[i : i + max_batch_size]
+ results.update(self._delete_minibatch(bucket, minibatch_keys))
+
+ # Organize final results
+ final_results = [(path, results[parse_s3_path(path)]) for path in paths]
+
+ return final_results
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def _delete_minibatch(self, bucket, keys):
+ """A helper method. Boto3 allows batch deletions
+ for files within the same bucket.
+
+ Args:
+ bucket: String bucket name
+ keys: List of keys to be deleted in the bucket
+
+ Returns: dict of the form {(bucket, key): error}, where error is None if the
+ operation succeeded
+ """
+ request = messages.DeleteBatchRequest(bucket, keys)
+ results = {}
+ try:
+ response = self.client.delete_batch(request)
+
+ for key in response.deleted:
+ results[(bucket, key)] = None
+
+ for key, error in zip(response.failed, response.errors):
+ results[(bucket, key)] = error
+
+ except messages.S3ClientError as e:
+ for key in keys:
+ results[(bucket, key)] = e
+
+ return results
+
+ # We intentionally do not decorate this method with a retry, since the
+ # underlying copy and delete operations are already idempotent operations
+ # protected by retry decorators.
+ def delete_tree(self, root):
+ """Deletes all objects under the given S3 directory.
+
+ Args:
+ path: S3 root path in the form s3://<bucket>/<name>/ (ending with a "/")
+
+ Returns: List of tuples of (path, exception), where each path is an object
+ under the given root. exception is None if the operation succeeded
+ or the relevant exception if the operation failed.
+ """
+ assert root.endswith('/')
+
+ paths = self.list_prefix(root)
+ return self.delete_files(paths)
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def size(self, path):
+ """Returns the size of a single S3 object.
+
+ This method does not perform glob expansion. Hence the given path must be
+ for a single S3 object.
+
+ Returns: size of the S3 object in bytes.
+ """
+ bucket, object_path = parse_s3_path(path)
+ request = messages.GetRequest(bucket, object_path)
+ item = self.client.get_object_metadata(request)
+ return item.size
+
+ # We intentionally do not decorate this method with a retry, since the
+ # underlying copy and delete operations are already idempotent operations
+ # protected by retry decorators.
+ def rename(self, src, dest):
+ """Renames the given S3 object from src to dest.
+
+ Args:
+ src: S3 file path pattern in the form s3://<bucket>/<name>.
+ dest: S3 file path pattern in the form s3://<bucket>/<name>.
+ """
+ self.copy(src, dest)
+ self.delete(src)
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def last_updated(self, path):
+ """Returns the last updated epoch time of a single S3 object.
+
+ This method does not perform glob expansion. Hence the given path must be
+ for a single S3 object.
+
+ Returns: last updated time of the S3 object in second.
+ """
+ bucket, object = parse_s3_path(path)
+ request = messages.GetRequest(bucket, object)
+ datetime = self.client.get_object_metadata(request).last_modified
+ return (time.mktime(datetime.timetuple()) - time.timezone
+ + datetime.microsecond / 1000000.0)
+
+ def exists(self, path):
+ """Returns whether the given S3 object exists.
+
+ Args:
+ path: S3 file path pattern in the form s3://<bucket>/<name>.
+ """
+ bucket, object = parse_s3_path(path)
+ request = messages.GetRequest(bucket, object)
+ try:
+ self.client.get_object_metadata(request)
+ return True
+ except messages.S3ClientError as e:
+ if e.code == 404:
+ # HTTP 404 indicates that the file did not exist
+ return False
+ else:
+ # We re-raise all other exceptions
+ raise
+
+ def rename_files(self, src_dest_pairs):
+ """Renames the given S3 objects from src to dest.
+
+ Args:
+ src_dest_pairs: list of (src, dest) tuples of s3://<bucket>/<name> file
+ paths to rename from src to dest
+ Returns: List of tuples of (src, dest, exception) in the same order as the
+ src_dest_pairs argument, where exception is None if the operation
+ succeeded or the relevant exception if the operation failed.
+ """
+ if not src_dest_pairs: return []
+
+ # TODO: Throw value error if path has directory
+ for src, dest in src_dest_pairs:
+ if src.endswith('/') or dest.endswith('/'):
+ raise ValueError('Cannot rename a directory')
+
+ copy_results = self.copy_paths(src_dest_pairs)
+ paths_to_delete = [src for (src, _, err) in copy_results if err is None]
+ delete_results = self.delete_files(paths_to_delete)
+
+ delete_results_dict = {src: err for (src, err) in delete_results}
+ rename_results = []
+ for src, dest, err in copy_results:
+ if err is not None: rename_results.append((src, dest, err))
+ elif delete_results_dict[src] is not None:
+ rename_results.append(src, dest, delete_results_dict[src])
+ else: rename_results.append((src, dest, None))
+
+ return rename_results
+
+
+class S3Downloader(Downloader):
+ def __init__(self, client, path, buffer_size):
+ self._client = client
+ self._path = path
+ self._bucket, self._name = parse_s3_path(path)
+ self._buffer_size = buffer_size
+
+ # Get object state.
+ self._get_request = (messages.GetRequest(
+ bucket=self._bucket,
+ object=self._name))
+
+ try:
+ metadata = self._get_object_metadata(self._get_request)
+
+ except messages.S3ClientError as e:
+ if e.code == 404:
+ raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
+ else:
+ logging.error('HTTP error while requesting file %s: %s', self._path,
+ 3)
+ raise
+
+ self._size = metadata.size
+
+ @retry.with_exponential_backoff(
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def _get_object_metadata(self, get_request):
+ return self._client.get_object_metadata(get_request)
+
+ @property
+ def size(self):
+ return self._size
+
+ def get_range(self, start, end):
+ return self._client.get_range(self._get_request, start, end)
+
+
+class S3Uploader(Uploader):
+ def __init__(self, client, path, mime_type='application/octet-stream'):
+ self._client = client
+ self._path = path
+ self._bucket, self._name = parse_s3_path(path)
+ self._mime_type = mime_type
+
+ self.part_number = 1
+ self.buffer = b''
+
+ self.last_error = None
+
+ self.upload_id = None
+
+ self.parts = []
+
+ self._start_upload()
+
+ # There is retry logic in the underlying transfer library but we should make
+ # it more explicit so we can control the retry parameters.
+ @retry.no_retries # Using no_retries marks this as an integration point.
+ def _start_upload(self):
+ # The uploader by default transfers data in chunks of 1024 * 1024 bytes at
+ # a time, buffering writes until that size is reached.
+ try:
+ request = messages.UploadRequest(self._bucket,
+ self._name,
+ self._mime_type)
+ response = self._client.create_multipart_upload(request)
+ self.upload_id = response.upload_id
+ except Exception as e: # pylint: disable=broad-except
+ logging.error('Error in _start_upload while inserting file %s: %s',
+ self._path, traceback.format_exc())
+ self.last_error = e
+ raise e
+
+ def put(self, data):
+
+ MIN_WRITE_SIZE = 5 * 1024 * 1024
+ MAX_WRITE_SIZE = 5 * 1024 * 1024 * 1024
+
+ # TODO: Byte strings might not be the most performant way to handle this
+ self.buffer += data.tobytes()
+
+ while len(self.buffer) >= MIN_WRITE_SIZE:
+ # Take the first chunk off the buffer and write it to S3
+ chunk = self.buffer[:MAX_WRITE_SIZE]
+ self._write_to_s3(chunk)
+ # Remove the written chunk from the buffer
+ self.buffer = self.buffer[MAX_WRITE_SIZE:]
+
+ def _write_to_s3(self, data):
+
+ try:
+ request = messages.UploadPartRequest(self._bucket,
+ self._name,
+ self.upload_id,
+ self.part_number,
+ data)
+ response = self._client.upload_part(request)
+ self.parts.append({'ETag': response.etag,
+ 'PartNumber': response.part_number})
+ self.part_number = self.part_number + 1
+ except messages.S3ClientError as e:
+ self.last_error = e
+ if e.code == 404:
+ raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
+ else:
+ logging.error('HTTP error while requesting file %s: %s', self._path,
+ 3)
+ raise
+
+ def finish(self):
+
+ self._write_to_s3(self.buffer)
+
+ if self.last_error is not None:
+ raise self.last_error # pylint: disable=raising-bad-type
+
+ request = messages.CompleteMultipartUploadRequest(self._bucket,
+ self._name,
+ self.upload_id,
+ self.parts)
+ self._client.complete_multipart_upload(request)
diff --git a/sdks/python/apache_beam/io/aws/s3io_test.py b/sdks/python/apache_beam/io/aws/s3io_test.py
new file mode 100644
index 0000000..040871d
--- /dev/null
+++ b/sdks/python/apache_beam/io/aws/s3io_test.py
@@ -0,0 +1,785 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Tests for S3 client."""
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import logging
+import os
+import random
+import time
+import unittest
+
+from apache_beam.io.aws import s3io
+from apache_beam.io.aws.clients.s3 import fake_client
+from apache_beam.io.aws.clients.s3 import messages
+
+
+class TestS3PathParser(unittest.TestCase):
+
+ BAD_S3_PATHS = [
+ 's3://',
+ 's3://bucket',
+ 's3:///name',
+ 's3:///',
+ 's3:/blah/bucket/name',
+ ]
+
+ def test_s3_path(self):
+ self.assertEqual(
+ s3io.parse_s3_path('s3://bucket/name'), ('bucket', 'name'))
+ self.assertEqual(
+ s3io.parse_s3_path('s3://bucket/name/sub'), ('bucket', 'name/sub'))
+
+ def test_bad_s3_path(self):
+ for path in self.BAD_S3_PATHS:
+ self.assertRaises(ValueError, s3io.parse_s3_path, path)
+ self.assertRaises(ValueError, s3io.parse_s3_path, 's3://bucket/')
+
+ def test_s3_path_object_optional(self):
+ self.assertEqual(
+ s3io.parse_s3_path('s3://bucket/name', object_optional=True),
+ ('bucket', 'name'))
+ self.assertEqual(
+ s3io.parse_s3_path('s3://bucket/', object_optional=True),
+ ('bucket', ''))
+
+ def test_bad_s3_path_object_optional(self):
+ for path in self.BAD_S3_PATHS:
+ self.assertRaises(ValueError, s3io.parse_s3_path, path, True)
+
+
+class TestS3IO(unittest.TestCase):
+
+ def _insert_random_file(self, client, path, size):
+ bucket, name = s3io.parse_s3_path(path)
+ contents = os.urandom(size)
+ fakeFile = fake_client.FakeFile(bucket, name, contents)
+
+ if self.USE_MOCK:
+ self.client.add_file(fakeFile)
+
+ else:
+ f = self.aws.open(path, 'w')
+ f.write(contents)
+ f.close()
+
+ return fakeFile
+
+ def setUp(self):
+
+ # These tests can be run locally against a mock S3 client, or as integration
+ # tests against the real S3 client.
+ self.USE_MOCK = True
+
+ # If you're running integration tests with S3, set this variable to be an
+ # s3 path that you have access to where test data can be written. If you're
+ # just running tests against the mock, this can be any s3 path. It should
+ # end with a '/'.
+ self.TEST_DATA_PATH = 's3://random-data-sets/beam_tests/'
+
+ if self.USE_MOCK:
+ self.client = fake_client.FakeS3Client()
+ test_data_bucket, _ = s3io.parse_s3_path(self.TEST_DATA_PATH)
+ self.client.known_buckets.add(test_data_bucket)
+ self.aws = s3io.S3IO(self.client)
+
+ else:
+ self.aws = s3io.S3IO()
+ self.client = self.aws.client
+
+ def test_size(self):
+ file_name = self.TEST_DATA_PATH + 'dummy_file'
+ file_size = 1234
+
+ self._insert_random_file(self.client, file_name, file_size)
+ self.assertTrue(self.aws.exists(file_name))
+ self.assertEqual(1234, self.aws.size(file_name))
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_last_updated(self):
+ file_name = self.TEST_DATA_PATH + 'dummy_file'
+ file_size = 1234
+
+ self._insert_random_file(self.client, file_name, file_size)
+ self.assertTrue(self.aws.exists(file_name))
+
+ tolerance = 5 * 60 # 5 mins
+ low_bound, high_bound = time.time() - tolerance, time.time() + tolerance
+ result = self.aws.last_updated(file_name)
+ self.assertTrue(low_bound <= result <= high_bound)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_checksum(self):
+
+ file_name = self.TEST_DATA_PATH + 'checksum'
+ file_size = 1024
+ file_ = self._insert_random_file(self.client, file_name, file_size)
+
+ original_etag = self.aws.checksum(file_name)
+
+ self.aws.delete(file_name)
+
+ with self.aws.open(file_name, 'w') as f:
+ f.write(file_.contents)
+
+ rewritten_etag = self.aws.checksum(file_name)
+
+ self.assertEqual(original_etag, rewritten_etag)
+ self.assertEqual(len(original_etag), 36)
+ self.assertTrue(original_etag.endswith('-1"'))
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_copy(self):
+ src_file_name = self.TEST_DATA_PATH + 'source'
+ dest_file_name = self.TEST_DATA_PATH + 'dest'
+ file_size = 1024
+ self._insert_random_file(self.client, src_file_name, file_size)
+
+ self.assertTrue(src_file_name in
+ self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertFalse(dest_file_name in
+ self.aws.list_prefix(self.TEST_DATA_PATH))
+
+ self.aws.copy(src_file_name, dest_file_name)
+
+ self.assertTrue(src_file_name in
+ self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertTrue(dest_file_name in
+ self.aws.list_prefix(self.TEST_DATA_PATH))
+
+ # Clean up
+ self.aws.delete_files([src_file_name, dest_file_name])
+
+ # Test copy of non-existent files.
+ with self.assertRaises(messages.S3ClientError) as err:
+ self.aws.copy(self.TEST_DATA_PATH + 'non-existent',
+ self.TEST_DATA_PATH + 'non-existent-destination')
+
+ self.assertTrue('Not Found' in err.exception.message)
+
+ def test_copy_paths(self):
+ from_name_pattern = self.TEST_DATA_PATH + 'copy_me_%d'
+ to_name_pattern = self.TEST_DATA_PATH + 'destination_%d'
+ file_size = 1024
+ num_files = 10
+
+ src_dest_pairs = [(from_name_pattern % i, to_name_pattern % i)
+ for i in range(num_files)]
+
+ result = self.aws.copy_paths(src_dest_pairs)
+
+ self.assertTrue(result)
+ for i, (src, dest, exception) in enumerate(result):
+ self.assertEqual(src, from_name_pattern % i)
+ self.assertEqual(dest, to_name_pattern % i)
+ self.assertTrue(isinstance(exception, messages.S3ClientError))
+ self.assertEqual(exception.code, 404)
+ self.assertFalse(self.aws.exists(from_name_pattern % i))
+ self.assertFalse(self.aws.exists(to_name_pattern % i))
+
+ # Insert some files.
+ for i in range(num_files):
+ self._insert_random_file(self.client, from_name_pattern % i, file_size)
+
+ # Check files inserted properly.
+ for i in range(num_files):
+ self.assertTrue(self.aws.exists(from_name_pattern % i))
+
+ # Execute batch copy.
+ result = self.aws.copy_paths(src_dest_pairs)
+
+ # Check files copied properly.
+ for i in range(num_files):
+ self.assertTrue(self.aws.exists(from_name_pattern % i))
+ self.assertTrue(self.aws.exists(to_name_pattern % i))
+
+ # Check results
+ for i, (src, dest, exception) in enumerate(result):
+ self.assertEqual(src_dest_pairs[i], (src, dest))
+ self.assertEqual(exception, None)
+
+ # Clean up
+ all_files = set().union(*[set(pair) for pair in src_dest_pairs])
+ self.aws.delete_files(all_files)
+
+ def test_copy_paths_error(self):
+ n_real_files = 3
+
+ # Create some files
+ from_path = self.TEST_DATA_PATH + 'copy_paths/'
+ files = [from_path + '%d' % i for i in range(n_real_files)]
+ to_path = self.TEST_DATA_PATH + 'destination/'
+ destinations = [to_path + '%d' % i for i in range(n_real_files)]
+ for file_ in files: self._insert_random_file(self.client, file_, 1024)
+
+ # Add nonexistent files to the sources and destinations
+ sources = files + [
+ from_path + 'X',
+ from_path + 'fake_directory_1/',
+ from_path + 'fake_directory_2/'
+ ]
+ destinations += [
+ to_path + 'X',
+ to_path + 'fake_directory_1/',
+ to_path + 'fake_directory_2'
+ ]
+
+ result = self.aws.copy_paths(list(zip(sources, destinations)))
+ self.assertEqual(len(result), len(sources))
+
+ for _, _, err in result[:n_real_files]:
+ self.assertTrue(err is None)
+
+ for _, _, err in result[n_real_files:]:
+ self.assertIsInstance(err, messages.S3ClientError)
+
+ self.assertEqual(result[-3][2].code, 404)
+ self.assertEqual(result[-2][2].code, 404)
+ self.assertEqual(result[-1][2].code, 400)
+
+ # Clean up
+ self.aws.delete_files(files)
+ self.aws.delete_files(destinations)
+
+
+ def test_copy_tree(self):
+ src_dir_name = self.TEST_DATA_PATH + 'source/'
+ dest_dir_name = self.TEST_DATA_PATH + 'dest/'
+ file_size = 1024
+ paths = ['a', 'b/c', 'b/d']
+ for path in paths:
+ src_file_name = src_dir_name + path
+ dest_file_name = dest_dir_name + path
+ self._insert_random_file(self.client, src_file_name, file_size)
+ self.assertTrue(
+ src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertFalse(
+ dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+
+ results = self.aws.copy_tree(src_dir_name, dest_dir_name)
+
+ for src_file_name, dest_file_name, err in results:
+
+ self.assertTrue(src_dir_name in src_file_name)
+ self.assertTrue(dest_dir_name in dest_file_name)
+ self.assertIsNone(err)
+
+ self.assertTrue(
+ src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertTrue(
+ dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+
+ # Clean up
+ for path in paths:
+ src_file_name = src_dir_name + path
+ dest_file_name = dest_dir_name + path
+ self.aws.delete_files([src_file_name, dest_file_name])
+
+ def test_rename(self):
+ src_file_name = self.TEST_DATA_PATH + 'source'
+ dest_file_name = self.TEST_DATA_PATH + 'dest'
+ file_size = 1024
+
+ self._insert_random_file(self.client, src_file_name, file_size)
+
+ self.assertTrue(
+ src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertFalse(
+ dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+
+ self.aws.rename(src_file_name, dest_file_name)
+
+ self.assertFalse(
+ src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+ self.assertTrue(
+ dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
+
+ # Clean up
+ self.aws.delete_files([src_file_name, dest_file_name])
+
+ def test_rename_files(self):
+ from_name_pattern = self.TEST_DATA_PATH + 'to_rename_%d'
+ to_name_pattern = self.TEST_DATA_PATH + 'been_renamed_%d'
+ file_size = 1024
+ num_files = 10
+
+ src_dest_pairs = [(from_name_pattern % i, to_name_pattern % i)
+ for i in range(num_files)]
+
+ result = self.aws.rename_files(src_dest_pairs)
+
+ self.assertTrue(result)
+ for i, (src, dest, exception) in enumerate(result):
+ self.assertEqual(src, from_name_pattern % i)
+ self.assertEqual(dest, to_name_pattern % i)
+ self.assertTrue(isinstance(exception, messages.S3ClientError))
+ self.assertEqual(exception.code, 404)
+ self.assertFalse(self.aws.exists(from_name_pattern % i))
+ self.assertFalse(self.aws.exists(to_name_pattern % i))
+
+ # Insert some files.
+ for i in range(num_files):
+ self._insert_random_file(self.client, from_name_pattern % i, file_size)
+
+ # Check files inserted properly.
+ for i in range(num_files):
+ self.assertTrue(self.aws.exists(from_name_pattern % i))
+ self.assertFalse(self.aws.exists(to_name_pattern % i))
+
+ # Execute batch rename.
+ self.aws.rename_files(src_dest_pairs)
+
+ # Check files were renamed properly.
+ for i in range(num_files):
+ self.assertFalse(self.aws.exists(from_name_pattern % i))
+ self.assertTrue(self.aws.exists(to_name_pattern % i))
+
+ # Clean up
+ all_files = set().union(*[set(pair) for pair in src_dest_pairs])
+ self.aws.delete_files(all_files)
+
+ def test_rename_files_with_errors(self):
+ real_prefix = self.TEST_DATA_PATH + 'rename_batch_%s'
+ fake_prefix = 's3://fake-bucket-68ae4b0ef7b9/rename_batch_%s'
+ src_dest_pairs = [(prefix % 'src', prefix % 'dest')
+ for prefix in (real_prefix, fake_prefix)]
+
+ # Create the file in the real bucket
+ self._insert_random_file(self.client, real_prefix % 'src', 1024)
+
+ # Execute batch rename
+ result = self.aws.rename_files(src_dest_pairs)
+
+ # First is the file in the real bucket, which shouldn't throw an error
+ self.assertEqual(result[0][0], src_dest_pairs[0][0])
+ self.assertEqual(result[0][1], src_dest_pairs[0][1])
+ self.assertIsNone(result[0][2])
+
+ # Second is the file in the fake bucket, which should throw a 404
+ self.assertEqual(result[1][0], src_dest_pairs[1][0])
+ self.assertEqual(result[1][1], src_dest_pairs[1][1])
+ self.assertEqual(result[1][2].code, 404)
+
+ # Clean up
+ self.aws.delete(real_prefix % 'dest')
+
+ def test_rename_files_with_errors_directory(self):
+
+ # Make file
+ dir_name = self.TEST_DATA_PATH + 'rename_dir/'
+ file_name = dir_name + 'file'
+ self._insert_random_file(self.client, file_name, 1024)
+
+ self.assertTrue(self.aws.exists(file_name))
+
+ with self.assertRaises(ValueError):
+ self.aws.rename_files([(file_name, self.TEST_DATA_PATH + 'dir_dest/')])
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_delete_paths(self):
+ # Make files
+ prefix = self.TEST_DATA_PATH + 'delete_paths/'
+ file_names = [prefix + 'a', prefix + 'b/c']
+ for file_name in file_names:
+ self._insert_random_file(self.client, file_name, 1024)
+
+ self.assertTrue(self.aws.exists(file_names[0]))
+ self.assertTrue(self.aws.exists(file_names[1]))
+
+ # Delete paths
+ paths = [prefix + 'a', prefix + 'b/']
+ self.aws.delete_paths(paths)
+
+ self.assertFalse(self.aws.exists(file_names[0]))
+ self.assertFalse(self.aws.exists(file_names[1]))
+
+ def test_delete(self):
+ file_name = self.TEST_DATA_PATH + 'delete_file'
+ file_size = 1024
+
+ # Test deletion of non-existent file (shouldn't raise any error)
+ self.aws.delete(file_name)
+
+ # Create the file and check that it was created
+ self._insert_random_file(self.aws.client, file_name, file_size)
+ files = self.aws.list_prefix(self.TEST_DATA_PATH)
+ self.assertTrue(file_name in files)
+
+ # Delete the file and check that it was deleted
+ self.aws.delete(file_name)
+ self.assertFalse(self.aws.exists(file_name))
+
+ def test_delete_files(self, *unused_args):
+ file_name_pattern = self.TEST_DATA_PATH + 'delete_batch/%d'
+ file_size = 1024
+ num_files = 5
+
+ # Test deletion of non-existent files.
+ result = self.aws.delete_files(
+ [file_name_pattern % i for i in range(num_files)])
+ self.assertTrue(result)
+ for i, (file_name, exception) in enumerate(result):
+ self.assertEqual(file_name, file_name_pattern % i)
+ self.assertEqual(exception, None)
+ self.assertFalse(self.aws.exists(file_name_pattern % i))
+
+ # Insert some files.
+ for i in range(num_files):
+ self._insert_random_file(self.client, file_name_pattern % i, file_size)
+
+ # Check files inserted properly.
+ for i in range(num_files):
+ self.assertTrue(self.aws.exists(file_name_pattern % i))
+
+ # Execute batch delete.
+ self.aws.delete_files([file_name_pattern % i for i in range(num_files)])
+
+ # Check files deleted properly.
+ for i in range(num_files):
+ self.assertFalse(self.aws.exists(file_name_pattern % i))
+
+ def test_delete_files_with_errors(self, *unused_args):
+ real_file = self.TEST_DATA_PATH + 'delete_batch/file'
+ fake_file = 's3://fake-bucket-68ae4b0ef7b9/delete_batch/file'
+ filenames = [real_file, fake_file]
+
+ result = self.aws.delete_files(filenames)
+
+ # First is the file in the real bucket, which shouldn't throw an error
+ self.assertEqual(result[0][0], filenames[0])
+ self.assertIsNone(result[0][1])
+
+ # Second is the file in the fake bucket, which should throw a 404
+ self.assertEqual(result[1][0], filenames[1])
+ self.assertEqual(result[1][1].code, 404)
+
+ def test_delete_tree(self):
+
+ root_path = self.TEST_DATA_PATH + 'delete_tree/'
+ leaf_paths = ['a', 'b/c', 'b/d', 'b/d/e']
+ paths = [root_path + leaf for leaf in leaf_paths]
+
+ # Create file tree
+ file_size = 1024
+ for path in paths:
+ self._insert_random_file(self.client, path, file_size)
+
+ # Check that the files exist
+ for path in paths:
+ self.assertTrue(self.aws.exists(path))
+
+ # Delete the tree
+ self.aws.delete_tree(root_path)
+
+ # Check that the files have been deleted
+ for path in paths:
+ self.assertFalse(self.aws.exists(path))
+
+ def test_exists(self):
+ file_name = self.TEST_DATA_PATH + 'exists'
+ file_size = 1024
+
+ self.assertFalse(self.aws.exists(file_name))
+
+ self._insert_random_file(self.aws.client, file_name, file_size)
+
+ self.assertTrue(self.aws.exists(file_name))
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ self.assertFalse(self.aws.exists(file_name))
+
+ def test_file_mode(self):
+ file_name = self.TEST_DATA_PATH + 'jerry/pigpen/bobby'
+ with self.aws.open(file_name, 'w') as f:
+ assert f.mode == 'w'
+ with self.aws.open(file_name, 'r') as f:
+ assert f.mode == 'r'
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_full_file_read(self):
+ file_name = self.TEST_DATA_PATH + 'jerry/pigpen/phil'
+ file_size = 1024
+
+ f = self._insert_random_file(self.aws.client, file_name, file_size)
+ contents = f.contents
+
+ f = self.aws.open(file_name)
+ self.assertEqual(f.mode, 'r')
+ f.seek(0, os.SEEK_END)
+ self.assertEqual(f.tell(), file_size)
+ self.assertEqual(f.read(), b'')
+ f.seek(0)
+ self.assertEqual(f.read(), contents)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_file_write(self):
+ file_name = self.TEST_DATA_PATH + 'write_file'
+ file_size = 8 * 1024 * 1024 + 2000
+ contents = os.urandom(file_size)
+ f = self.aws.open(file_name, 'w')
+ self.assertEqual(f.mode, 'w')
+ f.write(contents[0:1000])
+ f.write(contents[1000:1024 * 1024])
+ f.write(contents[1024 * 1024:])
+ f.close()
+ new_f = self.aws.open(file_name, 'r')
+ new_f_contents = new_f.read()
+ self.assertEqual(
+ new_f_contents, contents)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_file_mime_type(self):
+ if self.USE_MOCK:
+ self.skipTest("The boto3_client mock doesn't support mime_types")
+
+ mime_type = 'example/example'
+ file_name = self.TEST_DATA_PATH + 'write_file'
+ f = self.aws.open(file_name, 'w', mime_type=mime_type)
+ f.write(b'a string of binary text')
+ f.close()
+
+ bucket, key = s3io.parse_s3_path(file_name)
+ metadata = self.client.get_object_metadata(messages.GetRequest(bucket, key))
+
+ self.assertEqual(mime_type, metadata.mime_type)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+
+ def test_file_random_seek(self):
+ file_name = self.TEST_DATA_PATH + 'write_seek_file'
+ file_size = 5 * 1024 * 1024 - 100
+ contents = os.urandom(file_size)
+ with self.aws.open(file_name, 'w') as wf:
+ wf.write(contents)
+
+ f = self.aws.open(file_name)
+ random.seed(0)
+
+ for _ in range(0, 10):
+ a = random.randint(0, file_size - 1)
+ b = random.randint(0, file_size - 1)
+ start, end = min(a, b), max(a, b)
+ f.seek(start)
+
+ self.assertEqual(f.tell(), start)
+
+ self.assertEqual(
+ f.read(end - start + 1), contents[start:end + 1]
+ )
+ self.assertEqual(f.tell(), end + 1)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_file_flush(self):
+ file_name = self.TEST_DATA_PATH + 'flush_file'
+ file_size = 5 * 1024 * 1024 + 2000
+ contents = os.urandom(file_size)
+ f = self.aws.open(file_name, 'w')
+ self.assertEqual(f.mode, 'w')
+ f.write(contents[0:1000])
+ f.flush()
+ f.write(contents[1000:1024 * 1024])
+ f.flush()
+ f.flush() # Should be a NOOP.
+ f.write(contents[1024 * 1024:])
+ f.close() # This should al`read`y call the equivalent of flush() in its body
+ new_f = self.aws.open(file_name, 'r')
+ new_f_contents = new_f.read()
+ self.assertEqual(
+ new_f_contents, contents)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_file_iterator(self):
+ file_name = self.TEST_DATA_PATH + 'iterate_file'
+ lines = []
+ line_count = 10
+ for _ in range(line_count):
+ line_length = random.randint(100, 500)
+ line = os.urandom(line_length).replace(b'\n', b' ') + b'\n'
+ lines.append(line)
+
+ contents = b''.join(lines)
+
+ with self.aws.open(file_name, 'w') as wf:
+ wf.write(contents)
+
+ f = self.aws.open(file_name)
+
+ read_lines = 0
+ for line in f:
+ read_lines += 1
+
+ self.assertEqual(read_lines, line_count)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_file_read_line(self):
+ file_name = self.TEST_DATA_PATH + 'read_line_file'
+ lines = []
+
+ # Set a small buffer size to exercise refilling the buffer.
+ # First line is carefully crafted so the newline falls as the last character
+ # of the buffer to exercise this code path.
+ read_buffer_size = 1099
+ lines.append(b'x' * 1023 + b'\n')
+
+ for _ in range(1, 1000):
+ line_length = random.randint(100, 500)
+ line = os.urandom(line_length).replace(b'\n', b' ') + b'\n'
+ lines.append(line)
+ contents = b''.join(lines)
+
+ file_size = len(contents)
+
+ with self.aws.open(file_name, 'wb') as wf:
+ wf.write(contents)
+
+ f = self.aws.open(file_name, 'rb', read_buffer_size=read_buffer_size)
+
+ # Test read of first two lines.
+ f.seek(0)
+ self.assertEqual(f.readline(), lines[0])
+ self.assertEqual(f.tell(), len(lines[0]))
+ self.assertEqual(f.readline(), lines[1])
+
+ # Test read at line boundary.
+ f.seek(file_size - len(lines[-1]) - 1)
+ self.assertEqual(f.readline(), b'\n')
+
+ # Test read at end of file.
+ f.seek(file_size)
+ self.assertEqual(f.readline(), b'')
+
+ # Test reads at random positions.
+ random.seed(0)
+ for _ in range(0, 10):
+ start = random.randint(0, file_size - 1)
+ line_index = 0
+ # Find line corresponding to start index.
+ chars_left = start
+ while True:
+ next_line_length = len(lines[line_index])
+ if chars_left - next_line_length < 0:
+ break
+ chars_left -= next_line_length
+ line_index += 1
+ f.seek(start)
+ self.assertEqual(f.readline(), lines[line_index][chars_left:])
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_file_close(self):
+ file_name = self.TEST_DATA_PATH + 'close_file'
+ file_size = 5 * 1024 * 1024 + 2000
+ contents = os.urandom(file_size)
+ f = self.aws.open(file_name, 'w')
+ self.assertEqual(f.mode, 'w')
+ f.write(contents)
+ f.close()
+ f.close() # This should not crash.
+
+ with self.aws.open(file_name, 'r') as f:
+ read_contents = f.read()
+
+ self.assertEqual(
+ read_contents, contents)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_context_manager(self):
+ # Test writing with a context manager.
+ file_name = self.TEST_DATA_PATH + 'context_manager_file'
+ file_size = 1024
+ contents = os.urandom(file_size)
+ with self.aws.open(file_name, 'w') as f:
+ f.write(contents)
+
+ with self.aws.open(file_name, 'r') as f:
+ self.assertEqual(f.read(), contents)
+
+ # Clean up
+ self.aws.delete(file_name)
+
+ def test_list_prefix(self):
+
+ objects = [
+ ('jerry/pigpen/phil', 5),
+ ('jerry/pigpen/bobby', 3),
+ ('jerry/billy/bobby', 4),
+ ]
+
+ for (object_name, size) in objects:
+ file_name = self.TEST_DATA_PATH + object_name
+ self._insert_random_file(self.aws.client, file_name, size)
+
+ test_cases = [
+ (self.TEST_DATA_PATH + 'j', [
+ ('jerry/pigpen/phil', 5),
+ ('jerry/pigpen/bobby', 3),
+ ('jerry/billy/bobby', 4),
+ ]),
+ (self.TEST_DATA_PATH + 'jerry/', [
+ ('jerry/pigpen/phil', 5),
+ ('jerry/pigpen/bobby', 3),
+ ('jerry/billy/bobby', 4),
+ ]),
+ (self.TEST_DATA_PATH + 'jerry/pigpen/phil', [
+ ('jerry/pigpen/phil', 5),
+ ]),
+ ]
+
+ for file_pattern, expected_object_names in test_cases:
+ expected_file_names = [(self.TEST_DATA_PATH + object_name, size)
+ for (object_name, size) in expected_object_names]
+ self.assertEqual(
+ set(self.aws.list_prefix(file_pattern).items()),
+ set(expected_file_names))
+
+ # Clean up
+ for (object_name, size) in objects:
+ self.aws.delete(self.TEST_DATA_PATH + object_name)
+
+
+if __name__ == '__main__':
+ logging.getLogger().setLevel(logging.INFO)
+ unittest.main()
diff --git a/sdks/python/apache_beam/io/concat_source.py b/sdks/python/apache_beam/io/concat_source.py
index ddf3a77..4446e60 100644
--- a/sdks/python/apache_beam/io/concat_source.py
+++ b/sdks/python/apache_beam/io/concat_source.py
@@ -19,6 +19,8 @@
Concat Source, which reads the union of several other sources.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/concat_source_test.py b/sdks/python/apache_beam/io/concat_source_test.py
index 8a5b0fa..eea44e0 100644
--- a/sdks/python/apache_beam/io/concat_source_test.py
+++ b/sdks/python/apache_beam/io/concat_source_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the sources framework."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -37,7 +39,7 @@
class RangeSource(iobase.BoundedSource):
- __hash__ = None
+ __hash__ = None # type: ignore[assignment]
def __init__(self, start, end, split_freq=1):
assert start <= end
diff --git a/sdks/python/apache_beam/io/external/gcp/pubsub.py b/sdks/python/apache_beam/io/external/gcp/pubsub.py
index f0988ed..d417b42 100644
--- a/sdks/python/apache_beam/io/external/gcp/pubsub.py
+++ b/sdks/python/apache_beam/io/external/gcp/pubsub.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import typing
diff --git a/sdks/python/apache_beam/io/external/generate_sequence.py b/sdks/python/apache_beam/io/external/generate_sequence.py
index a17ec7b..47f9297 100644
--- a/sdks/python/apache_beam/io/external/generate_sequence.py
+++ b/sdks/python/apache_beam/io/external/generate_sequence.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from apache_beam.transforms.external import ExternalTransform
diff --git a/sdks/python/apache_beam/io/external/generate_sequence_test.py b/sdks/python/apache_beam/io/external/generate_sequence_test.py
index 95d6fbc..652e47b 100644
--- a/sdks/python/apache_beam/io/external/generate_sequence_test.py
+++ b/sdks/python/apache_beam/io/external/generate_sequence_test.py
@@ -17,6 +17,8 @@
"""Unit tests for cross-language generate sequence."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/io/external/kafka.py b/sdks/python/apache_beam/io/external/kafka.py
index 04d91a7..44bce78 100644
--- a/sdks/python/apache_beam/io/external/kafka.py
+++ b/sdks/python/apache_beam/io/external/kafka.py
@@ -35,6 +35,8 @@
- https://beam.apache.org/roadmap/portability/
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import typing
diff --git a/sdks/python/apache_beam/io/external/xlang_parquetio_test.py b/sdks/python/apache_beam/io/external/xlang_parquetio_test.py
index 434bb3b..ed49a58 100644
--- a/sdks/python/apache_beam/io/external/xlang_parquetio_test.py
+++ b/sdks/python/apache_beam/io/external/xlang_parquetio_test.py
@@ -17,6 +17,8 @@
"""Unit tests for cross-language parquet io read/write."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/io/filebasedsink.py b/sdks/python/apache_beam/io/filebasedsink.py
index 76143dd..e9dfa05 100644
--- a/sdks/python/apache_beam/io/filebasedsink.py
+++ b/sdks/python/apache_beam/io/filebasedsink.py
@@ -17,6 +17,8 @@
"""File-based sink."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -63,7 +65,7 @@
# Max number of threads to be used for renaming.
_MAX_RENAME_THREADS = 64
- __hash__ = None
+ __hash__ = None # type: ignore[assignment]
def __init__(self,
file_path_prefix,
diff --git a/sdks/python/apache_beam/io/filebasedsink_test.py b/sdks/python/apache_beam/io/filebasedsink_test.py
index 4c5ef6b..6bfebd5 100644
--- a/sdks/python/apache_beam/io/filebasedsink_test.py
+++ b/sdks/python/apache_beam/io/filebasedsink_test.py
@@ -18,6 +18,8 @@
"""Unit tests for file sinks."""
+# pytype: skip-file
+
from __future__ import absolute_import
import glob
diff --git a/sdks/python/apache_beam/io/filebasedsource.py b/sdks/python/apache_beam/io/filebasedsource.py
index ec16b06..422b15b 100644
--- a/sdks/python/apache_beam/io/filebasedsource.py
+++ b/sdks/python/apache_beam/io/filebasedsource.py
@@ -26,8 +26,12 @@
:class:`~apache_beam.io._AvroSource`.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
+from typing import Callable
+
from past.builtins import long
from past.builtins import unicode
@@ -71,7 +75,7 @@
file_pattern (str): the file glob to read a string or a
:class:`~apache_beam.options.value_provider.ValueProvider`
(placeholder to inject a runtime value).
- min_bundle_size (str): minimum size of bundles that should be generated
+ min_bundle_size (int): minimum size of bundles that should be generated
when performing initial splitting on this source.
compression_type (str): Used to handle compressed output files.
Typical value is :attr:`CompressionTypes.AUTO
@@ -128,6 +132,7 @@
@check_accessible(['_pattern'])
def _get_concat_source(self):
+ # type: () -> concat_source.ConcatSource
if self._concat_source is None:
pattern = self._pattern.get()
@@ -358,6 +363,7 @@
class _ReadRange(DoFn):
def __init__(self, source_from_file):
+ # type: (Callable[[str], iobase.BoundedSource]) -> None
self._source_from_file = source_from_file
def process(self, element, *args, **kwargs):
@@ -380,9 +386,13 @@
read a PCollection of files.
"""
- def __init__(
- self, splittable, compression_type, desired_bundle_size, min_bundle_size,
- source_from_file):
+ def __init__(self,
+ splittable, # type: bool
+ compression_type,
+ desired_bundle_size, # type: int
+ min_bundle_size, # type: int
+ source_from_file, # type: Callable[[str], iobase.BoundedSource]
+ ):
"""
Args:
splittable: If False, files won't be split into sub-ranges. If True,
diff --git a/sdks/python/apache_beam/io/filebasedsource_test.py b/sdks/python/apache_beam/io/filebasedsource_test.py
index e777e9f..2c5bd98 100644
--- a/sdks/python/apache_beam/io/filebasedsource_test.py
+++ b/sdks/python/apache_beam/io/filebasedsource_test.py
@@ -14,6 +14,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py
index 14c35bc..ce8f0bc 100644
--- a/sdks/python/apache_beam/io/fileio.py
+++ b/sdks/python/apache_beam/io/fileio.py
@@ -88,12 +88,21 @@
No backward compatibility guarantees. Everything in this module is experimental.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
import logging
import random
import uuid
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import BinaryIO # pylint: disable=unused-import
+from typing import Callable
+from typing import DefaultDict
+from typing import Dict
+from typing import Tuple
from past.builtins import unicode
@@ -107,6 +116,9 @@
from apache_beam.transforms.window import GlobalWindow
from apache_beam.utils.annotations import experimental
+if TYPE_CHECKING:
+ from apache_beam.transforms.window import BoundedWindow
+
__all__ = ['EmptyMatchTreatment',
'MatchFiles',
'MatchAll',
@@ -272,6 +284,7 @@
"""
def open(self, fh):
+ # type: (BinaryIO) -> None
raise NotImplementedError
def write(self, record):
@@ -454,6 +467,7 @@
@staticmethod
def _get_sink_fn(input_sink):
+ # type: (...) -> Callable[[Any], FileSink]
if isinstance(input_sink, FileSink):
return lambda x: input_sink
elif callable(input_sink):
@@ -463,6 +477,7 @@
@staticmethod
def _get_destination_fn(destination):
+ # type: (...) -> Callable[[Any], str]
if isinstance(destination, ValueProvider):
return lambda elm: destination.get()
elif callable(destination):
@@ -603,7 +618,11 @@
class _WriteShardedRecordsFn(beam.DoFn):
- def __init__(self, base_path, sink_fn, shards):
+ def __init__(self,
+ base_path,
+ sink_fn, # type: Callable[[Any], FileSink]
+ shards # type: int
+ ):
self.base_path = base_path
self.sink_fn = sink_fn
self.shards = shards
@@ -641,13 +660,16 @@
class _AppendShardedDestination(beam.DoFn):
- def __init__(self, destination, shards):
+ def __init__(self,
+ destination, # type: Callable[[Any], str]
+ shards # type: int
+ ):
self.destination_fn = destination
self.shards = shards
# We start the shards for a single destination at an arbitrary point.
self._shard_counter = collections.defaultdict(
- lambda: random.randrange(self.shards))
+ lambda: random.randrange(self.shards)) # type: DefaultDict[str, int]
def _next_shard_for_destination(self, destination):
self._shard_counter[destination] = (
@@ -667,6 +689,9 @@
SPILLED_RECORDS = 'spilled_records'
WRITTEN_FILES = 'written_files'
+ _writers_and_sinks = None # type: Dict[Tuple[str, BoundedWindow], Tuple[BinaryIO, FileSink]]
+ _file_names = None # type: Dict[Tuple[str, BoundedWindow], str]
+
def __init__(self,
base_path,
destination_fn,
diff --git a/sdks/python/apache_beam/io/fileio_test.py b/sdks/python/apache_beam/io/fileio_test.py
index b724910..946fac0 100644
--- a/sdks/python/apache_beam/io/fileio_test.py
+++ b/sdks/python/apache_beam/io/fileio_test.py
@@ -17,6 +17,8 @@
"""Tests for transforms defined in apache_beam.io.fileio."""
+# pytype: skip-file
+
from __future__ import absolute_import
import csv
@@ -513,6 +515,7 @@
# TODO(BEAM-3759): Add many firings per window after getting PaneInfo.
ts.advance_processing_time(5)
ts.advance_watermark_to(timestamp)
+ ts.advance_watermark_to_infinity()
def no_colon_file_naming(*args):
file_name = fileio.destination_prefix_naming()(*args)
@@ -572,7 +575,8 @@
.add_elements([next(input), next(input)])
.advance_watermark_to(30)
.add_elements([next(input), next(input)])
- .advance_watermark_to(40))
+ .advance_watermark_to(40)
+ .advance_watermark_to_infinity())
def no_colon_file_naming(*args):
file_name = fileio.destination_prefix_naming()(*args)
diff --git a/sdks/python/apache_beam/io/filesystem.py b/sdks/python/apache_beam/io/filesystem.py
index c2bc312..6643443 100644
--- a/sdks/python/apache_beam/io/filesystem.py
+++ b/sdks/python/apache_beam/io/filesystem.py
@@ -21,6 +21,8 @@
LocalFileSystem, which gets unix-style paths in the form /foo/bar.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -35,6 +37,8 @@
import zlib
from builtins import object
from builtins import zip
+from typing import BinaryIO # pylint: disable=unused-import
+from typing import Tuple
from future.utils import with_metaclass
from past.builtins import long
@@ -445,7 +449,7 @@
self.exception_details = exception_details
-class FileSystem(with_metaclass(abc.ABCMeta, BeamPlugin)):
+class FileSystem(with_metaclass(abc.ABCMeta, BeamPlugin)): # type: ignore[misc]
"""A class that defines the functions that can be performed on a filesystem.
All methods are abstract and they are for file system providers to
@@ -478,6 +482,7 @@
@abc.abstractmethod
def join(self, basepath, *paths):
+ # type: (str, *str) -> str
"""Join two or more pathname components for the filesystem
Args:
@@ -490,6 +495,7 @@
@abc.abstractmethod
def split(self, path):
+ # type: (str) -> Tuple[str, str]
"""Splits the given path into two parts.
Splits the path into a pair (head, tail) such that tail contains the last
@@ -717,6 +723,7 @@
@abc.abstractmethod
def create(self, path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a write channel for the given file path.
Args:
@@ -731,6 +738,7 @@
@abc.abstractmethod
def open(self, path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a read channel for the given file path.
Args:
@@ -771,6 +779,7 @@
@abc.abstractmethod
def exists(self, path):
+ # type: (str) -> bool
"""Check if the provided path exists on the FileSystem.
Args:
@@ -782,6 +791,7 @@
@abc.abstractmethod
def size(self, path):
+ # type: (str) -> int
"""Get size in bytes of a file on the FileSystem.
Args:
diff --git a/sdks/python/apache_beam/io/filesystem_test.py b/sdks/python/apache_beam/io/filesystem_test.py
index fbc094f..6880163 100644
--- a/sdks/python/apache_beam/io/filesystem_test.py
+++ b/sdks/python/apache_beam/io/filesystem_test.py
@@ -17,6 +17,8 @@
#
"""Unit tests for filesystem module."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/filesystemio.py b/sdks/python/apache_beam/io/filesystemio.py
index b2d74c1..6197046 100644
--- a/sdks/python/apache_beam/io/filesystemio.py
+++ b/sdks/python/apache_beam/io/filesystemio.py
@@ -16,6 +16,8 @@
#
"""Utilities for ``FileSystem`` implementations."""
+# pytype: skip-file
+
from __future__ import absolute_import
import abc
@@ -29,7 +31,7 @@
'PipeStream']
-class Downloader(with_metaclass(abc.ABCMeta, object)):
+class Downloader(with_metaclass(abc.ABCMeta, object)): # type: ignore[misc]
"""Download interface for a single file.
Implementations should support random access reads.
@@ -55,7 +57,7 @@
"""
-class Uploader(with_metaclass(abc.ABCMeta, object)):
+class Uploader(with_metaclass(abc.ABCMeta, object)): # type: ignore[misc]
"""Upload interface for a single file."""
@abc.abstractmethod
diff --git a/sdks/python/apache_beam/io/filesystemio_test.py b/sdks/python/apache_beam/io/filesystemio_test.py
index 7797eb8..90a397e 100644
--- a/sdks/python/apache_beam/io/filesystemio_test.py
+++ b/sdks/python/apache_beam/io/filesystemio_test.py
@@ -16,6 +16,8 @@
#
"""Tests for filesystemio."""
+# pytype: skip-file
+
from __future__ import absolute_import
import io
diff --git a/sdks/python/apache_beam/io/filesystems.py b/sdks/python/apache_beam/io/filesystems.py
index d8b3a4a..e3a072d 100644
--- a/sdks/python/apache_beam/io/filesystems.py
+++ b/sdks/python/apache_beam/io/filesystems.py
@@ -17,10 +17,13 @@
"""FileSystems interface class for accessing the correct filesystem"""
+# pytype: skip-file
+
from __future__ import absolute_import
import re
from builtins import object
+from typing import BinaryIO # pylint: disable=unused-import
from past.builtins import unicode
@@ -50,6 +53,11 @@
except ImportError:
pass
+try:
+ from apache_beam.io.aws.s3filesystem import S3FileSystem
+except ImportError:
+ pass
+
# pylint: enable=wrong-import-position, unused-import
@@ -82,6 +90,7 @@
@staticmethod
def get_filesystem(path):
+ # type: (str) -> FileSystems
"""Get the correct filesystem for the specified path
"""
try:
@@ -89,7 +98,10 @@
systems = [fs for fs in FileSystem.get_all_subclasses()
if fs.scheme() == path_scheme]
if len(systems) == 0:
- raise ValueError('Unable to get the Filesystem for path %s' % path)
+ raise ValueError(
+ 'Unable to get filesystem from specified path, please use the '
+ 'correct path or ensure the required dependency is installed, '
+ 'e.g., pip install apache_beam[gcp]. Path specified: %s' % path)
elif len(systems) == 1:
# Pipeline options could come either from the Pipeline itself (using
# direct runner), or via RuntimeValueProvider (other runners).
@@ -105,6 +117,7 @@
@staticmethod
def join(basepath, *paths):
+ # type: (str, *str) -> str
"""Join two or more pathname components for the filesystem
Args:
@@ -189,6 +202,7 @@
@staticmethod
def create(path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a write channel for the given file path.
Args:
@@ -205,6 +219,7 @@
@staticmethod
def open(path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a read channel for the given file path.
Args:
diff --git a/sdks/python/apache_beam/io/filesystems_test.py b/sdks/python/apache_beam/io/filesystems_test.py
index 17cec46..d2133b0 100644
--- a/sdks/python/apache_beam/io/filesystems_test.py
+++ b/sdks/python/apache_beam/io/filesystems_test.py
@@ -18,6 +18,8 @@
"""Unit tests for LocalFileSystem."""
+# pytype: skip-file
+
from __future__ import absolute_import
import filecmp
diff --git a/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py b/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py
index 1edf743..05b2f2b 100644
--- a/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py
+++ b/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py
@@ -20,9 +20,13 @@
This can only be used with the flink runner.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
+from typing import Any
+from typing import Dict
from apache_beam import PTransform
from apache_beam import Windowing
@@ -33,7 +37,7 @@
class FlinkStreamingImpulseSource(PTransform):
URN = "flink:transform:streaming_impulse:v1"
- config = {}
+ config = {} # type: Dict[str, Any]
def expand(self, pbegin):
assert isinstance(pbegin, pvalue.PBegin), (
diff --git a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
index d357946..91c100e 100644
--- a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
@@ -18,6 +18,8 @@
Integration test for Google Cloud BigQuery.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
diff --git a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_pipeline.py b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_pipeline.py
index fec9f6b..6034aaa 100644
--- a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_pipeline.py
+++ b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_pipeline.py
@@ -21,6 +21,8 @@
big query table at the end of the pipeline.
"""
+# pytype: skip-file
+
# pylint: disable=wrong-import-order, wrong-import-position
from __future__ import absolute_import
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index 61c93ff..78cd98d 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -229,9 +229,12 @@
returned as base64-encoded bytes.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
+import decimal
import itertools
import json
import logging
@@ -248,12 +251,20 @@
from apache_beam import pvalue
from apache_beam.internal.gcp.json_value import from_json_value
from apache_beam.internal.gcp.json_value import to_json_value
+from apache_beam.io.filesystems import CompressionTypes
+from apache_beam.io.filesystems import FileSystems
from apache_beam.io.gcp import bigquery_tools
from apache_beam.io.gcp.internal.clients import bigquery
+from apache_beam.io.iobase import BoundedSource
+from apache_beam.io.iobase import RangeTracker
+from apache_beam.io.iobase import SourceBundle
+from apache_beam.io.textio import _TextSource as TextSource
from apache_beam.options import value_provider as vp
from apache_beam.options.pipeline_options import DebugOptions
from apache_beam.options.pipeline_options import GoogleCloudOptions
from apache_beam.options.pipeline_options import StandardOptions
+from apache_beam.options.value_provider import StaticValueProvider
+from apache_beam.options.value_provider import ValueProvider
from apache_beam.runners.dataflow.native_io import iobase as dataflow_io
from apache_beam.transforms import DoFn
from apache_beam.transforms import ParDo
@@ -262,6 +273,7 @@
from apache_beam.transforms.window import GlobalWindows
from apache_beam.utils import retry
from apache_beam.utils.annotations import deprecated
+from apache_beam.utils.annotations import experimental
__all__ = [
'TableRowJsonCoder',
@@ -269,6 +281,7 @@
'BigQuerySource',
'BigQuerySink',
'WriteToBigQuery',
+ '_ReadFromBigQuery',
'SCHEMA_AUTODETECT',
]
@@ -499,6 +512,194 @@
kms_key=self.kms_key)
+FieldSchema = collections.namedtuple('FieldSchema', 'fields mode name type')
+
+
+def _to_bool(value):
+ return value == 'true'
+
+
+def _to_decimal(value):
+ return decimal.Decimal(value)
+
+
+def _to_bytes(value):
+ """Converts value from str to bytes on Python 3.x. Does nothing on
+ Python 2.7."""
+ return value.encode('utf-8')
+
+
+class _JsonToDictCoder(coders.Coder):
+ """A coder for a JSON string to a Python dict."""
+
+ def __init__(self, table_schema):
+ self.fields = self._convert_to_tuple(table_schema.fields)
+ self._converters = {
+ 'INTEGER': int,
+ 'INT64': int,
+ 'FLOAT': float,
+ 'BOOLEAN': _to_bool,
+ 'NUMERIC': _to_decimal,
+ 'BYTES': _to_bytes,
+ }
+
+ @classmethod
+ def _convert_to_tuple(cls, table_field_schemas):
+ """Recursively converts the list of TableFieldSchema instances to the
+ list of tuples to prevent errors when pickling and unpickling
+ TableFieldSchema instances.
+ """
+ if not table_field_schemas:
+ return []
+
+ return [FieldSchema(cls._convert_to_tuple(x.fields), x.mode, x.name,
+ x.type)
+ for x in table_field_schemas]
+
+ def decode(self, value):
+ value = json.loads(value.decode('utf-8'))
+ return self._decode_with_schema(value, self.fields)
+
+ def _decode_with_schema(self, value, schema_fields):
+ for field in schema_fields:
+ if field.name not in value:
+ # The field exists in the schema, but it doesn't exist in this row.
+ # It probably means its value was null, as the extract to JSON job
+ # doesn't preserve null fields
+ value[field.name] = None
+ continue
+
+ if field.type == 'RECORD':
+ value[field.name] = self._decode_with_schema(value[field.name],
+ field.fields)
+ else:
+ try:
+ converter = self._converters[field.type]
+ value[field.name] = converter(value[field.name])
+ except KeyError:
+ # No need to do any conversion
+ pass
+ return value
+
+ def is_deterministic(self):
+ return True
+
+ def to_type_hint(self):
+ return dict
+
+
+class _CustomBigQuerySource(BoundedSource):
+ def __init__(self, gcs_location=None, table=None, dataset=None,
+ project=None, query=None, validate=False, coder=None,
+ use_standard_sql=False, flatten_results=True, kms_key=None):
+ if table is not None and query is not None:
+ raise ValueError('Both a BigQuery table and a query were specified.'
+ ' Please specify only one of these.')
+ elif table is None and query is None:
+ raise ValueError('A BigQuery table or a query must be specified')
+ elif table is not None:
+ self.table_reference = bigquery_tools.parse_table_reference(
+ table, dataset, project)
+ self.query = None
+ self.use_legacy_sql = True
+ else:
+ self.query = query
+ # TODO(BEAM-1082): Change the internal flag to be standard_sql
+ self.use_legacy_sql = not use_standard_sql
+ self.table_reference = None
+
+ self.gcs_location = gcs_location
+ self.project = project
+ self.validate = validate
+ self.flatten_results = flatten_results
+ self.coder = coder or _JsonToDictCoder
+ self.kms_key = kms_key
+ self.split_result = None
+
+ def estimate_size(self):
+ bq = bigquery_tools.BigQueryWrapper()
+ if self.table_reference is not None:
+ table = bq.get_table(self.table_reference.projectId,
+ self.table_reference.datasetId,
+ self.table_reference.tableId)
+ return int(table.numBytes)
+ else:
+ job = bq._start_query_job(self.project, self.query,
+ self.use_legacy_sql, self.flatten_results,
+ job_id=uuid.uuid4().hex, dry_run=True,
+ kms_key=self.kms_key)
+ size = int(job.statistics.totalBytesProcessed)
+ return size
+
+ def split(self, desired_bundle_size, start_position=None, stop_position=None):
+ if self.split_result is None:
+ bq = bigquery_tools.BigQueryWrapper()
+
+ if self.query is not None:
+ self._setup_temporary_dataset(bq)
+ self.table_reference = self._execute_query(bq)
+
+ schema, metadata_list = self._export_files(bq)
+ self.split_result = [TextSource(metadata.path, 0,
+ CompressionTypes.UNCOMPRESSED, True,
+ self.coder(schema))
+ for metadata in metadata_list]
+
+ if self.query is not None:
+ bq.clean_up_temporary_dataset(self.project)
+
+ for source in self.split_result:
+ yield SourceBundle(0, source, None, None)
+
+ def get_range_tracker(self, start_position, stop_position):
+ class CustomBigQuerySourceRangeTracker(RangeTracker):
+ """A RangeTracker that always returns positions as None."""
+
+ def start_position(self):
+ return None
+
+ def stop_position(self):
+ return None
+
+ return CustomBigQuerySourceRangeTracker()
+
+ def read(self, range_tracker):
+ raise NotImplementedError('BigQuery source must be split before being read')
+
+ def _setup_temporary_dataset(self, bq):
+ location = bq.get_query_location(self.project, self.query,
+ self.use_legacy_sql)
+ bq.create_temporary_dataset(self.project, location)
+
+ def _execute_query(self, bq):
+ job = bq._start_query_job(self.project, self.query,
+ self.use_legacy_sql, self.flatten_results,
+ job_id=uuid.uuid4().hex, kms_key=self.kms_key)
+ job_ref = job.jobReference
+ bq.wait_for_bq_job(job_ref)
+ return bq._get_temp_table(self.project)
+
+ def _export_files(self, bq):
+ """Runs a BigQuery export job.
+
+ Returns:
+ bigquery.TableSchema instance, a list of FileMetadata instances
+ """
+ job_id = uuid.uuid4().hex
+ job_ref = bq.perform_extract_job([self.gcs_location], job_id,
+ self.table_reference,
+ bigquery_tools.ExportFileFormat.JSON,
+ include_header=False)
+ bq.wait_for_bq_job(job_ref)
+ metadata_list = FileSystems.match([self.gcs_location])[0].metadata_list
+
+ table = bq.get_table(self.table_reference.projectId,
+ self.table_reference.datasetId,
+ self.table_reference.tableId)
+
+ return table.schema, metadata_list
+
+
@deprecated(since='2.11.0', current="WriteToBigQuery")
class BigQuerySink(dataflow_io.NativeSink):
"""A sink based on a BigQuery table.
@@ -1274,3 +1475,139 @@
tableSpec)
res['table'] = DisplayDataItem(tableSpec, label='Table')
return res
+
+
+class _PassThroughThenCleanup(PTransform):
+ """A PTransform that invokes a DoFn after the input PCollection has been
+ processed.
+ """
+ def __init__(self, cleanup_dofn):
+ self.cleanup_dofn = cleanup_dofn
+
+ def expand(self, input):
+ class PassThrough(beam.DoFn):
+ def process(self, element):
+ yield element
+
+ output = input | beam.ParDo(PassThrough()).with_outputs('cleanup_signal',
+ main='main')
+ main_output = output['main']
+ cleanup_signal = output['cleanup_signal']
+
+ _ = (input.pipeline
+ | beam.Create([None])
+ | beam.ParDo(self.cleanup_dofn, beam.pvalue.AsSingleton(
+ cleanup_signal)))
+
+ return main_output
+
+
+@experimental()
+class _ReadFromBigQuery(PTransform):
+ """Read data from BigQuery.
+
+ This PTransform uses a BigQuery export job to take a snapshot of the table
+ on GCS, and then reads from each produced JSON file.
+
+ Do note that currently this source does not work with DirectRunner.
+
+ Args:
+ table (str, callable, ValueProvider): The ID of the table, or a callable
+ that returns it. The ID must contain only letters ``a-z``, ``A-Z``,
+ numbers ``0-9``, or underscores ``_``. If dataset argument is
+ :data:`None` then the table argument must contain the entire table
+ reference specified as: ``'DATASET.TABLE'``
+ or ``'PROJECT:DATASET.TABLE'``. If it's a callable, it must receive one
+ argument representing an element to be written to BigQuery, and return
+ a TableReference, or a string table name as specified above.
+ dataset (str): The ID of the dataset containing this table or
+ :data:`None` if the table reference is specified entirely by the table
+ argument.
+ project (str): The ID of the project containing this table.
+ query (str): A query to be used instead of arguments table, dataset, and
+ project.
+ validate (bool): If :data:`True`, various checks will be done when source
+ gets initialized (e.g., is table present?). This should be
+ :data:`True` for most scenarios in order to catch errors as early as
+ possible (pipeline construction instead of pipeline execution). It
+ should be :data:`False` if the table is created during pipeline
+ execution by a previous step.
+ coder (~apache_beam.coders.coders.Coder): The coder for the table
+ rows. If :data:`None`, then the default coder is
+ _JsonToDictCoder, which will interpret every row as a JSON
+ serialized dictionary.
+ use_standard_sql (bool): Specifies whether to use BigQuery's standard SQL
+ dialect for this query. The default value is :data:`False`.
+ If set to :data:`True`, the query will use BigQuery's updated SQL
+ dialect with improved standards compliance.
+ This parameter is ignored for table inputs.
+ flatten_results (bool): Flattens all nested and repeated fields in the
+ query results. The default value is :data:`True`.
+ kms_key (str): Experimental. Optional Cloud KMS key name for use when
+ creating new temporary tables.
+ gcs_location (str): The name of the Google Cloud Storage bucket where
+ the extracted table should be written as a string or
+ a :class:`~apache_beam.options.value_provider.ValueProvider`. If
+ :data:`None`, then the temp_location parameter is used.
+ """
+ def __init__(self, gcs_location=None, validate=False, *args, **kwargs):
+ if gcs_location:
+ if not isinstance(gcs_location, (str, unicode, ValueProvider)):
+ raise TypeError('%s: gcs_location must be of type string'
+ ' or ValueProvider; got %r instead'
+ % (self.__class__.__name__, type(gcs_location)))
+
+ if isinstance(gcs_location, (str, unicode)):
+ gcs_location = StaticValueProvider(str, gcs_location)
+ self.gcs_location = gcs_location
+ self.validate = validate
+
+ self._args = args
+ self._kwargs = kwargs
+
+ def _get_destination_uri(self, temp_location):
+ """Returns the fully qualified Google Cloud Storage URI where the
+ extracted table should be written.
+ """
+ file_pattern = 'bigquery-table-dump-*.json'
+
+ if self.gcs_location is not None:
+ gcs_base = self.gcs_location.get()
+ elif temp_location is not None:
+ gcs_base = temp_location
+ logging.debug("gcs_location is empty, using temp_location instead")
+ else:
+ raise ValueError('{} requires a GCS location to be provided'
+ .format(self.__class__.__name__))
+ if self.validate:
+ self._validate_gcs_location(gcs_base)
+
+ job_id = uuid.uuid4().hex
+ return FileSystems.join(gcs_base, job_id, file_pattern)
+
+ @staticmethod
+ def _validate_gcs_location(gcs_location):
+ if not gcs_location.startswith('gs://'):
+ raise ValueError('Invalid GCS location: {}'.format(gcs_location))
+
+ def expand(self, pcoll):
+ class RemoveJsonFiles(beam.DoFn):
+ def __init__(self, gcs_location):
+ self._gcs_location = gcs_location
+
+ def process(self, unused_element, signal):
+ match_result = FileSystems.match([self._gcs_location])[0].metadata_list
+ logging.debug("%s: matched %s files", self.__class__.__name__,
+ len(match_result))
+ paths = [x.path for x in match_result]
+ FileSystems.delete(paths)
+
+ temp_location = pcoll.pipeline.options.view_as(
+ GoogleCloudOptions).temp_location
+ gcs_location = self._get_destination_uri(temp_location)
+
+ return (pcoll
+ | beam.io.Read(_CustomBigQuerySource(gcs_location=gcs_location,
+ validate=self.validate,
+ *self._args, **self._kwargs))
+ | _PassThroughThenCleanup(RemoveJsonFiles(gcs_location)))
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py
index 06525cd..fdce60b 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py
@@ -26,13 +26,14 @@
NOTHING IN THIS FILE HAS BACKWARDS COMPATIBILITY GUARANTEES.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import datetime
import hashlib
import logging
import random
-import time
import uuid
from future.utils import iteritems
@@ -493,11 +494,8 @@
Experimental; no backwards compatibility guarantees.
"""
- ALL_DONE = object()
- FAILED = object()
- WAITING = object()
- def __init__(self, test_client):
+ def __init__(self, test_client=None):
self.test_client = test_client
def start_bundle(self):
@@ -505,34 +503,10 @@
def process(self, element, dest_ids_list):
job_references = [elm[1] for elm in dest_ids_list]
-
- while True:
- status = self._check_job_states(job_references)
- if status == WaitForBQJobs.FAILED:
- raise Exception(
- 'BigQuery jobs failed. BQ error: %s', self._latest_error)
- elif status == WaitForBQJobs.ALL_DONE:
- return dest_ids_list # Pass the list of destination-jobs downstream
- time.sleep(10)
-
- def _check_job_states(self, job_references):
for ref in job_references:
- job = self.bq_wrapper.get_job(ref.projectId,
- ref.jobId,
- ref.location)
+ self.bq_wrapper.wait_for_bq_job(ref, sleep_duration_sec=10)
- _LOGGER.info("Job status: %s", job.status)
- if job.status.state == 'DONE' and job.status.errorResult:
- _LOGGER.warning("Job %s seems to have failed. Error Result: %s",
- ref.jobId, job.status.errorResult)
- self._latest_error = job.status
- return WaitForBQJobs.FAILED
- elif job.status.state == 'DONE':
- continue
- else:
- return WaitForBQJobs.WAITING
-
- return WaitForBQJobs.ALL_DONE
+ return dest_ids_list # Pass the list of destination-jobs downstream
class DeleteTablesFn(beam.DoFn):
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
index 92e37c6..eb673a6 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
@@ -17,6 +17,8 @@
"""Unit tests for BigQuery file loads utilities."""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_io_read_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_io_read_it_test.py
index 72e1bb7..b9a5cf4 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_io_read_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_io_read_it_test.py
@@ -21,6 +21,8 @@
Can be configured to simulate slow reading for a given number of rows.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_io_read_pipeline.py b/sdks/python/apache_beam/io/gcp/bigquery_io_read_pipeline.py
index 142182a..81cbab4 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_io_read_pipeline.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_io_read_pipeline.py
@@ -20,6 +20,8 @@
Can be configured to simulate slow reading for a given number of rows.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py
index ff63eda..1b37d49 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py
@@ -18,6 +18,8 @@
#
"""Unit tests for BigQuery sources and sinks."""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
@@ -26,6 +28,7 @@
import time
import unittest
from decimal import Decimal
+from functools import wraps
from future.utils import iteritems
from nose.plugins.attrib import attr
@@ -49,35 +52,73 @@
_LOGGER = logging.getLogger(__name__)
+def skip(runners):
+ if not isinstance(runners, list):
+ runners = [runners]
+
+ def inner(fn):
+ @wraps(fn)
+ def wrapped(self):
+ if self.runner_name in runners:
+ self.skipTest('This test doesn\'t work on these runners: {}'.format(
+ runners))
+ else:
+ return fn(self)
+ return wrapped
+ return inner
+
+
class BigQueryReadIntegrationTests(unittest.TestCase):
BIG_QUERY_DATASET_ID = 'python_read_table_'
- def setUp(self):
- self.test_pipeline = TestPipeline(is_integration_test=True)
- self.runner_name = type(self.test_pipeline.runner).__name__
- self.project = self.test_pipeline.get_option('project')
+ @classmethod
+ def setUpClass(cls):
+ cls.test_pipeline = TestPipeline(is_integration_test=True)
+ cls.args = cls.test_pipeline.get_full_options_as_args()
+ cls.runner_name = type(cls.test_pipeline.runner).__name__
+ cls.project = cls.test_pipeline.get_option('project')
- self.bigquery_client = BigQueryWrapper()
- self.dataset_id = '%s%s%d' % (self.BIG_QUERY_DATASET_ID,
- str(int(time.time())),
- random.randint(0, 10000))
- self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id)
+ cls.bigquery_client = BigQueryWrapper()
+ cls.dataset_id = '%s%s%d' % (cls.BIG_QUERY_DATASET_ID,
+ str(int(time.time())),
+ random.randint(0, 10000))
+ cls.bigquery_client.get_or_create_dataset(cls.project, cls.dataset_id)
_LOGGER.info("Created dataset %s in project %s",
- self.dataset_id, self.project)
+ cls.dataset_id, cls.project)
- def tearDown(self):
+ @classmethod
+ def tearDownClass(cls):
request = bigquery.BigqueryDatasetsDeleteRequest(
- projectId=self.project, datasetId=self.dataset_id,
+ projectId=cls.project, datasetId=cls.dataset_id,
deleteContents=True)
try:
_LOGGER.info("Deleting dataset %s in project %s",
- self.dataset_id, self.project)
- self.bigquery_client.client.datasets.Delete(request)
+ cls.dataset_id, cls.project)
+ cls.bigquery_client.client.datasets.Delete(request)
except HttpError:
_LOGGER.debug('Failed to clean up dataset %s in project %s',
- self.dataset_id, self.project)
+ cls.dataset_id, cls.project)
- def create_table(self, tablename):
+
+class ReadTests(BigQueryReadIntegrationTests):
+ TABLE_DATA = [
+ {'number': 1, 'str': 'abc'},
+ {'number': 2, 'str': 'def'},
+ {'number': 3, 'str': u'你好'},
+ {'number': 4, 'str': u'привет'}
+ ]
+
+ @classmethod
+ def setUpClass(cls):
+ super(ReadTests, cls).setUpClass()
+ cls.table_name = 'python_write_table'
+ cls.create_table(cls.table_name)
+
+ table_id = '{}.{}'.format(cls.dataset_id, cls.table_name)
+ cls.query = 'SELECT number, str FROM `%s`' % table_id
+
+ @classmethod
+ def create_table(cls, table_name):
table_schema = bigquery.TableSchema()
table_field = bigquery.TableFieldSchema()
table_field.name = 'number'
@@ -89,23 +130,45 @@
table_schema.fields.append(table_field)
table = bigquery.Table(
tableReference=bigquery.TableReference(
- projectId=self.project,
- datasetId=self.dataset_id,
- tableId=tablename),
+ projectId=cls.project,
+ datasetId=cls.dataset_id,
+ tableId=table_name),
schema=table_schema)
request = bigquery.BigqueryTablesInsertRequest(
- projectId=self.project, datasetId=self.dataset_id, table=table)
- self.bigquery_client.client.tables.Insert(request)
- table_data = [
- {'number': 1, 'str': 'abc'},
- {'number': 2, 'str': 'def'},
- {'number': 3, 'str': u'你好'},
- {'number': 4, 'str': u'привет'}
- ]
- self.bigquery_client.insert_rows(
- self.project, self.dataset_id, tablename, table_data)
+ projectId=cls.project, datasetId=cls.dataset_id, table=table)
+ cls.bigquery_client.client.tables.Insert(request)
+ cls.bigquery_client.insert_rows(
+ cls.project, cls.dataset_id, table_name, cls.TABLE_DATA)
- def create_table_new_types(self, table_name):
+ @skip(['PortableRunner', 'FlinkRunner'])
+ @attr('IT')
+ def test_native_source(self):
+ with beam.Pipeline(argv=self.args) as p:
+ result = (p | 'read' >> beam.io.Read(beam.io.BigQuerySource(
+ query=self.query, use_standard_sql=True)))
+ assert_that(result, equal_to(self.TABLE_DATA))
+
+ @attr('IT')
+ def test_iobase_source(self):
+ with beam.Pipeline(argv=self.args) as p:
+ result = (p | 'read' >> beam.io._ReadFromBigQuery(
+ query=self.query, use_standard_sql=True, project=self.project))
+ assert_that(result, equal_to(self.TABLE_DATA))
+
+
+class ReadNewTypesTests(BigQueryReadIntegrationTests):
+ @classmethod
+ def setUpClass(cls):
+ super(ReadNewTypesTests, cls).setUpClass()
+ cls.table_name = 'python_new_types'
+ cls.create_table(cls.table_name)
+
+ table_id = '{}.{}'.format(cls.dataset_id, cls.table_name)
+ cls.query = 'SELECT float, numeric, bytes, date, time, datetime,' \
+ 'timestamp, geo FROM `%s`' % table_id
+
+ @classmethod
+ def create_table(cls, table_name):
table_schema = bigquery.TableSchema()
table_field = bigquery.TableFieldSchema()
table_field.name = 'float'
@@ -141,13 +204,13 @@
table_schema.fields.append(table_field)
table = bigquery.Table(
tableReference=bigquery.TableReference(
- projectId=self.project,
- datasetId=self.dataset_id,
+ projectId=cls.project,
+ datasetId=cls.dataset_id,
tableId=table_name),
schema=table_schema)
request = bigquery.BigqueryTablesInsertRequest(
- projectId=self.project, datasetId=self.dataset_id, table=table)
- self.bigquery_client.client.tables.Insert(request)
+ projectId=cls.project, datasetId=cls.dataset_id, table=table)
+ cls.bigquery_client.client.tables.Insert(request)
row_data = {
'float': 0.33, 'numeric': Decimal('10'), 'bytes':
base64.b64encode(b'\xab\xac').decode('utf-8'), 'date': '3000-12-31',
@@ -160,34 +223,10 @@
for key, value in iteritems(row_data):
table_data.append({key: value})
- self.bigquery_client.insert_rows(
- self.project, self.dataset_id, table_name, table_data)
+ cls.bigquery_client.insert_rows(
+ cls.project, cls.dataset_id, table_name, table_data)
- @attr('IT')
- def test_big_query_read(self):
- table_name = 'python_write_table'
- self.create_table(table_name)
- table_id = '{}.{}'.format(self.dataset_id, table_name)
-
- args = self.test_pipeline.get_full_options_as_args()
-
- with beam.Pipeline(argv=args) as p:
- result = (p | 'read' >> beam.io.Read(beam.io.BigQuerySource(
- query='SELECT number, str FROM `%s`' % table_id,
- use_standard_sql=True)))
- assert_that(result, equal_to([{'number': 1, 'str': 'abc'},
- {'number': 2, 'str': 'def'},
- {'number': 3, 'str': u'你好'},
- {'number': 4, 'str': u'привет'}]))
-
- @attr('IT')
- def test_big_query_read_new_types(self):
- table_name = 'python_new_types'
- self.create_table_new_types(table_name)
- table_id = '{}.{}'.format(self.dataset_id, table_name)
-
- args = self.test_pipeline.get_full_options_as_args()
-
+ def get_expected_data(self):
expected_row = {
'float': 0.33, 'numeric': Decimal('10'), 'bytes':
base64.b64encode(b'\xab\xac'), 'date': '3000-12-31',
@@ -203,12 +242,22 @@
row[key] = value
expected_data.append(row)
- with beam.Pipeline(argv=args) as p:
+ return expected_data
+
+ @skip(['PortableRunner', 'FlinkRunner'])
+ @attr('IT')
+ def test_native_source(self):
+ with beam.Pipeline(argv=self.args) as p:
result = (p | 'read' >> beam.io.Read(beam.io.BigQuerySource(
- query='SELECT float, numeric, bytes, date, time, datetime,'
- 'timestamp, geo FROM `%s`' % table_id,
- use_standard_sql=True)))
- assert_that(result, equal_to(expected_data))
+ query=self.query, use_standard_sql=True)))
+ assert_that(result, equal_to(self.get_expected_data()))
+
+ @attr('IT')
+ def test_iobase_source(self):
+ with beam.Pipeline(argv=self.args) as p:
+ result = (p | 'read' >> beam.io._ReadFromBigQuery(
+ query=self.query, use_standard_sql=True, project=self.project))
+ assert_that(result, equal_to(self.get_expected_data()))
if __name__ == '__main__':
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_read_perf_test.py b/sdks/python/apache_beam/io/gcp/bigquery_read_perf_test.py
index 5b48287..5d1fc98 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_read_perf_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_read_perf_test.py
@@ -49,6 +49,8 @@
--tests apache_beam.io.gcp.bigquery_read_perf_test
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py
index ac62774..7564edb 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py
@@ -16,12 +16,15 @@
#
"""Unit tests for BigQuery sources and sinks."""
+# pytype: skip-file
+
from __future__ import absolute_import
import decimal
import json
import logging
import os
+import pickle
import random
import re
import time
@@ -35,11 +38,13 @@
from nose.plugins.attrib import attr
import apache_beam as beam
+from apache_beam.internal import pickler
from apache_beam.internal.gcp.json_value import to_json_value
from apache_beam.io.filebasedsink_test import _TestCaseWithTempDirCleanUp
from apache_beam.io.gcp import bigquery_tools
from apache_beam.io.gcp.bigquery import TableRowJsonCoder
from apache_beam.io.gcp.bigquery import WriteToBigQuery
+from apache_beam.io.gcp.bigquery import _JsonToDictCoder
from apache_beam.io.gcp.bigquery import _StreamToBigQuery
from apache_beam.io.gcp.bigquery_file_loads_test import _ELEMENTS
from apache_beam.io.gcp.bigquery_tools import JSON_COMPLIANCE_ERROR
@@ -51,6 +56,7 @@
from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryFullResultStreamingMatcher
from apache_beam.io.gcp.tests.bigquery_matcher import BigQueryTableMatcher
from apache_beam.options import value_provider
+from apache_beam.options.pipeline_options import GoogleCloudOptions
from apache_beam.options.pipeline_options import StandardOptions
from apache_beam.runners.dataflow.test_dataflow_runner import TestDataflowRunner
from apache_beam.runners.runner import PipelineState
@@ -245,6 +251,105 @@
@unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
+class TestJsonToDictCoder(unittest.TestCase):
+
+ @staticmethod
+ def _make_schema(fields):
+ def _fill_schema(fields):
+ for field in fields:
+ table_field = bigquery.TableFieldSchema()
+ table_field.name, table_field.type, nested_fields = field
+ if nested_fields:
+ table_field.fields = list(_fill_schema(nested_fields))
+ yield table_field
+
+ schema = bigquery.TableSchema()
+ schema.fields = list(_fill_schema(fields))
+ return schema
+
+ def test_coder_is_pickable(self):
+ try:
+ schema = self._make_schema([
+ ('record', 'RECORD', [
+ ('float', 'FLOAT', []),
+ ]),
+ ('integer', 'INTEGER', []),
+ ])
+ coder = _JsonToDictCoder(schema)
+ pickler.loads(pickler.dumps(coder))
+ except pickle.PicklingError:
+ self.fail('{} is not pickable'.format(coder.__class__.__name__))
+
+ def test_values_are_converted(self):
+ input_row = b'{"float": "10.5", "string": "abc"}'
+ expected_row = {'float': 10.5, 'string': 'abc'}
+ schema = self._make_schema([
+ ('float', 'FLOAT', []),
+ ('string', 'STRING', [])
+ ])
+ coder = _JsonToDictCoder(schema)
+
+ actual = coder.decode(input_row)
+ self.assertEqual(expected_row, actual)
+
+ def test_null_fields_are_preserved(self):
+ input_row = b'{"float": "10.5"}'
+ expected_row = {'float': 10.5, 'string': None}
+ schema = self._make_schema([
+ ('float', 'FLOAT', []),
+ ('string', 'STRING', [])
+ ])
+ coder = _JsonToDictCoder(schema)
+
+ actual = coder.decode(input_row)
+ self.assertEqual(expected_row, actual)
+
+ def test_record_field_is_properly_converted(self):
+ input_row = b'{"record": {"float": "55.5"}, "integer": 10}'
+ expected_row = {'record': {'float': 55.5}, 'integer': 10}
+ schema = self._make_schema([
+ ('record', 'RECORD', [
+ ('float', 'FLOAT', []),
+ ]),
+ ('integer', 'INTEGER', []),
+ ])
+ coder = _JsonToDictCoder(schema)
+
+ actual = coder.decode(input_row)
+ self.assertEqual(expected_row, actual)
+
+
+@unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
+class TestReadFromBigQuery(unittest.TestCase):
+
+ def test_exception_is_raised_when_gcs_location_cannot_be_specified(self):
+ with self.assertRaises(ValueError):
+ p = beam.Pipeline()
+ _ = p | beam.io._ReadFromBigQuery(project='project', dataset='dataset',
+ table='table')
+
+ @mock.patch('apache_beam.io.gcp.bigquery_tools.BigQueryWrapper')
+ def test_fallback_to_temp_location(self, BigQueryWrapper):
+ pipeline_options = beam.pipeline.PipelineOptions()
+ pipeline_options.view_as(GoogleCloudOptions).temp_location = 'gs://bucket'
+ try:
+ p = beam.Pipeline(options=pipeline_options)
+ _ = p | beam.io._ReadFromBigQuery(project='project', dataset='dataset',
+ table='table')
+ except ValueError:
+ self.fail('ValueError was raised unexpectedly')
+
+ def test_gcs_location_validation_works_properly(self):
+ with self.assertRaises(ValueError) as context:
+ p = beam.Pipeline()
+ _ = p | beam.io._ReadFromBigQuery(project='project', dataset='dataset',
+ table='table', validate=True,
+ gcs_location='fs://bad_location')
+ self.assertEqual('Invalid GCS location: fs://bad_location',
+ str(context.exception))
+
+
+@unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
class TestBigQuerySink(unittest.TestCase):
def test_table_spec_display_data(self):
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
index dfd477b..b74c967 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
@@ -25,6 +25,8 @@
NOTHING IN THIS FILE HAS BACKWARDS COMPATIBILITY GUARANTEES.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import datetime
@@ -69,6 +71,19 @@
JSON_COMPLIANCE_ERROR = 'NAN, INF and -INF values are not JSON compliant.'
+class ExportFileFormat(object):
+ CSV = 'CSV'
+ JSON = 'NEWLINE_DELIMITED_JSON'
+ AVRO = 'AVRO'
+
+
+class ExportCompression(object):
+ GZIP = 'GZIP'
+ DEFLATE = 'DEFLATE'
+ SNAPPY = 'SNAPPY'
+ NONE = 'NONE'
+
+
def default_encoder(obj):
if isinstance(obj, decimal.Decimal):
return str(obj)
@@ -359,7 +374,7 @@
num_retries=MAX_RETRIES,
retry_filter=retry.retry_on_server_errors_and_timeout_filter)
def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results,
- job_id, dry_run=False):
+ job_id, dry_run=False, kms_key=None):
reference = bigquery.JobReference(jobId=job_id, projectId=project_id)
request = bigquery.BigqueryJobsInsertRequest(
projectId=project_id,
@@ -369,13 +384,46 @@
query=bigquery.JobConfigurationQuery(
query=query,
useLegacySql=use_legacy_sql,
- allowLargeResults=True,
- destinationTable=self._get_temp_table(project_id),
- flattenResults=flatten_results)),
+ allowLargeResults=not dry_run,
+ destinationTable=self._get_temp_table(project_id) if not
+ dry_run else None,
+ flattenResults=flatten_results,
+ destinationEncryptionConfiguration=bigquery
+ .EncryptionConfiguration(kmsKeyName=kms_key))),
jobReference=reference))
response = self.client.jobs.Insert(request)
- return response.jobReference.jobId, response.jobReference.location
+ return response
+
+ def wait_for_bq_job(self, job_reference, sleep_duration_sec=5,
+ max_retries=60):
+ """Poll job until it is DONE.
+
+ Args:
+ job_reference: bigquery.JobReference instance.
+ sleep_duration_sec: Specifies the delay in seconds between retries.
+ max_retries: The total number of times to retry. If equals to 0,
+ the function waits forever.
+
+ Raises:
+ `RuntimeError`: If the job is FAILED or the number of retries has been
+ reached.
+ """
+ retry = 0
+ while True:
+ retry += 1
+ job = self.get_job(job_reference.projectId, job_reference.jobId,
+ job_reference.location)
+ logging.info('Job status: %s', job.status.state)
+ if job.status.state == 'DONE' and job.status.errorResult:
+ raise RuntimeError('BigQuery job {} failed. Error Result: {}'.format(
+ job_reference.jobId, job.status.errorResult))
+ elif job.status.state == 'DONE':
+ return True
+ else:
+ time.sleep(sleep_duration_sec)
+ if max_retries != 0 and retry >= max_retries:
+ raise RuntimeError('The maximum number of retries has been reached')
@retry.with_exponential_backoff(
num_retries=MAX_RETRIES,
@@ -601,6 +649,37 @@
@retry.with_exponential_backoff(
num_retries=MAX_RETRIES,
retry_filter=retry.retry_on_server_errors_and_timeout_filter)
+ def perform_extract_job(self, destination, job_id, table_reference,
+ destination_format, include_header=True,
+ compression=ExportCompression.NONE):
+ """Starts a job to export data from BigQuery.
+
+ Returns:
+ bigquery.JobReference with the information about the job that was started.
+ """
+ job_reference = bigquery.JobReference(jobId=job_id,
+ projectId=table_reference.projectId)
+ request = bigquery.BigqueryJobsInsertRequest(
+ projectId=table_reference.projectId,
+ job=bigquery.Job(
+ configuration=bigquery.JobConfiguration(
+ extract=bigquery.JobConfigurationExtract(
+ destinationUris=destination,
+ sourceTable=table_reference,
+ printHeader=include_header,
+ destinationFormat=destination_format,
+ compression=compression,
+ )
+ ),
+ jobReference=job_reference,
+ )
+ )
+ response = self.client.jobs.Insert(request)
+ return response.jobReference
+
+ @retry.with_exponential_backoff(
+ num_retries=MAX_RETRIES,
+ retry_filter=retry.retry_on_server_errors_and_timeout_filter)
def get_or_create_table(
self, project_id, dataset_id, table_id, schema,
create_disposition, write_disposition, additional_create_parameters=None):
@@ -700,10 +779,12 @@
def run_query(self, project_id, query, use_legacy_sql, flatten_results,
dry_run=False):
- job_id, location = self._start_query_job(project_id, query,
- use_legacy_sql, flatten_results,
- job_id=uuid.uuid4().hex,
- dry_run=dry_run)
+ job = self._start_query_job(project_id, query, use_legacy_sql,
+ flatten_results, job_id=uuid.uuid4().hex,
+ dry_run=dry_run)
+ job_id = job.jobReference.jobId
+ location = job.jobReference.location
+
if dry_run:
# If this was a dry run then the fact that we get here means the
# query has no errors. The start_query_job would raise an error otherwise.
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
index 7b04570..e8eaaa0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import datetime
@@ -196,6 +198,35 @@
]), False, False)
self.assertEqual(new_table, 'table_id')
+ def test_wait_for_job_returns_true_when_job_is_done(self):
+ def make_response(state):
+ m = mock.Mock()
+ m.status.errorResult = None
+ m.status.state = state
+ return m
+
+ client, job_ref = mock.Mock(), mock.Mock()
+ wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client)
+ # Return 'DONE' the second time get_job is called.
+ wrapper.get_job = mock.Mock(side_effect=[make_response('RUNNING'),
+ make_response('DONE')])
+
+ result = wrapper.wait_for_bq_job(job_ref, sleep_duration_sec=0,
+ max_retries=5)
+ self.assertTrue(result)
+
+ def test_wait_for_job_retries_fail(self):
+ client, response, job_ref = mock.Mock(), mock.Mock(), mock.Mock()
+ response.status.state = 'RUNNING'
+ wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client)
+ # Return 'RUNNING' response forever.
+ wrapper.get_job = lambda *args: response
+
+ with self.assertRaises(RuntimeError) as context:
+ wrapper.wait_for_bq_job(job_ref, sleep_duration_sec=0, max_retries=5)
+ self.assertEqual('The maximum number of retries has been reached',
+ str(context.exception))
+
@unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
class TestBigQueryReader(unittest.TestCase):
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
index ae56e35..759e319 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
@@ -18,6 +18,8 @@
#
"""Unit tests for BigQuery sources and sinks."""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_perf_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_perf_test.py
index c8dd304..e6ced26 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_write_perf_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_write_perf_test.py
@@ -48,6 +48,8 @@
This setup will result in a table of 1MB size.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
diff --git a/sdks/python/apache_beam/io/gcp/bigtableio.py b/sdks/python/apache_beam/io/gcp/bigtableio.py
index ccb10c5..928e6d8 100644
--- a/sdks/python/apache_beam/io/gcp/bigtableio.py
+++ b/sdks/python/apache_beam/io/gcp/bigtableio.py
@@ -35,6 +35,8 @@
instance_id,
table_id))
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import apache_beam as beam
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler.py b/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler.py
index f6c65a5..9b38545 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler.py
@@ -19,6 +19,8 @@
#
# For internal use only; no backwards-compatibility guarantees.
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler_test.py
index e3ccb92..9d7fdfd 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/adaptive_throttler_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py
index 9af7674..fa3c869 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py
@@ -22,6 +22,8 @@
``apache_beam.io.gcp.datastore.v1new.datastoreio`` will replace it in the
next Beam major release.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py
index ca9a129..6c77204 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/fake_datastore.py b/sdks/python/apache_beam/io/gcp/datastore/v1/fake_datastore.py
index 054df9d..535af10 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/fake_datastore.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/fake_datastore.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import uuid
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
index 4ea2898..dbb39e2 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import errno
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py
index e71255a..31e94b3 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py
@@ -16,6 +16,8 @@
#
"""Tests for datastore helper."""
+# pytype: skip-file
+
from __future__ import absolute_import
import errno
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py b/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py
index ef3c1e4a..f660324 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py
@@ -16,6 +16,8 @@
#
"""Implements a Cloud Datastore query splitter."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -37,7 +39,7 @@
PropertyFilter.GREATER_THAN,
PropertyFilter.GREATER_THAN_OR_EQUAL]
except ImportError:
- UNSUPPORTED_OPERATORS = None
+ UNSUPPORTED_OPERATORS = None # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter_test.py
index 8d376e0..1791d8d 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter_test.py
@@ -17,10 +17,13 @@
"""Cloud Datastore query splitter test."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
import unittest
+from typing import Type
# patches unittest.TestCase to be python3 compatible
import future.tests.base # pylint: disable=unused-import
@@ -37,7 +40,7 @@
from google.cloud.proto.datastore.v1.query_pb2 import PropertyFilter
except (ImportError, TypeError):
datastore_pb2 = None
- query_splitter = None
+ query_splitter = None # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position
@@ -65,7 +68,7 @@
test_filter.property_filter.op = PropertyFilter.GREATER_THAN
return query
- split_error = ValueError
+ split_error = ValueError # type: Type[Exception]
query_splitter = query_splitter
def test_get_splits_query_with_multiple_kinds(self):
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/util.py b/sdks/python/apache_beam/io/gcp/datastore/v1/util.py
index 5595a5d..5d6b597 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/util.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/util.py
@@ -19,6 +19,8 @@
#
# For internal use only; no backwards-compatibility guarantees.
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py
index 4669106..738c5b4 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py
@@ -16,6 +16,8 @@
#
"""Tests for util.py."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_pipeline.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_pipeline.py
index efe80c8..ea17f3f 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_pipeline.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_pipeline.py
@@ -27,6 +27,8 @@
5. Query the written Entities, verify no results.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_test.py
index b8a290c..69aea0e 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastore_write_it_test.py
@@ -25,6 +25,8 @@
results in the pipeline.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -42,7 +44,7 @@
from apache_beam.io.gcp.datastore.v1new import datastore_write_it_pipeline
# TODO(BEAM-4543): Remove TypeError once googledatastore dependency is removed.
except (ImportError, TypeError):
- datastore_write_it_pipeline = None
+ datastore_write_it_pipeline = None # type: ignore
class DatastoreWriteIT(unittest.TestCase):
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py
index a70ea95..633ed52 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py
@@ -27,6 +27,8 @@
This module is experimental, no backwards compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py
index 28065bb..9eb0db0 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py
@@ -17,6 +17,8 @@
"""Unit tests for datastoreio."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -51,7 +53,7 @@
# TODO(BEAM-4543): Remove TypeError once googledatastore dependency is removed.
except (ImportError, TypeError):
client = None
- DatastoreioTestBase = unittest.TestCase
+ DatastoreioTestBase = unittest.TestCase # type: ignore
class FakeMutation(object):
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py
index 2bce903..2dcf7fa 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py
@@ -21,11 +21,15 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import os
import uuid
from builtins import range
+from typing import List
+from typing import Union
from google.api_core import exceptions
from google.cloud import environment_vars
@@ -61,7 +65,7 @@
def create_entities(count, id_or_name=False):
"""Creates a list of entities with random keys."""
if id_or_name:
- ids_or_names = [uuid.uuid4().int & ((1 << 63) - 1) for _ in range(count)]
+ ids_or_names = [uuid.uuid4().int & ((1 << 63) - 1) for _ in range(count)] # type: List[Union[str, int]]
else:
ids_or_names = [str(uuid.uuid4()) for _ in range(count)]
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter.py
index 6db4ca4..4f4dc0d 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter.py
@@ -20,6 +20,8 @@
For internal use only. No backwards compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter_test.py
index df09490..6f13f7f 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/query_splitter_test.py
@@ -17,6 +17,8 @@
"""Cloud Datastore query splitter test."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
@@ -38,9 +40,9 @@
# TODO(BEAM-4543): Remove TypeError once googledatastore dependency is removed.
except (ImportError, TypeError):
- query_splitter = None
- SplitNotPossibleError = None
- QuerySplitterTestBase = unittest.TestCase
+ query_splitter = None # type: ignore
+ SplitNotPossibleError = None # type: ignore
+ QuerySplitterTestBase = unittest.TestCase # type: ignore
@unittest.skipIf(query_splitter is None, 'GCP dependencies are not installed')
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py
index a664ec7..e8d950e 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py
@@ -21,6 +21,8 @@
This module is experimental, no backwards compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import copy
@@ -179,7 +181,7 @@
return self.parent is None and other.parent is None
- __hash__ = None
+ __hash__ = None # type: ignore[assignment]
def __repr__(self):
return '<%s(%s, parent=%s, project=%s, namespace=%s)>' % (
@@ -252,7 +254,7 @@
self.exclude_from_indexes == other.exclude_from_indexes and
self.properties == other.properties)
- __hash__ = None
+ __hash__ = None # type: ignore[assignment]
def __repr__(self):
return "<%s(key=%s, exclude_from_indexes=%s) properties=%s>" % (
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/types_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/types_test.py
index c3bf8ef..021caec 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1new/types_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/types_test.py
@@ -17,6 +17,8 @@
"""Unit tests for types module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import datetime
diff --git a/sdks/python/apache_beam/io/gcp/datastore_write_it_pipeline.py b/sdks/python/apache_beam/io/gcp/datastore_write_it_pipeline.py
index 2d0be8f..9950fa1 100644
--- a/sdks/python/apache_beam/io/gcp/datastore_write_it_pipeline.py
+++ b/sdks/python/apache_beam/io/gcp/datastore_write_it_pipeline.py
@@ -27,6 +27,8 @@
5. Query the written Entities, verify no results.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/io/gcp/datastore_write_it_test.py b/sdks/python/apache_beam/io/gcp/datastore_write_it_test.py
index 827cdfd..0a32ea1 100644
--- a/sdks/python/apache_beam/io/gcp/datastore_write_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore_write_it_test.py
@@ -25,6 +25,8 @@
results in the pipeline.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -43,7 +45,7 @@
try:
from apache_beam.io.gcp import datastore_write_it_pipeline
except TypeError:
- datastore_write_it_pipeline = None
+ datastore_write_it_pipeline = None # type: ignore
@unittest.skipIf(sys.version_info[0] == 3 and
diff --git a/sdks/python/apache_beam/io/gcp/gcsfilesystem.py b/sdks/python/apache_beam/io/gcp/gcsfilesystem.py
index a6081c8..d6dc838 100644
--- a/sdks/python/apache_beam/io/gcp/gcsfilesystem.py
+++ b/sdks/python/apache_beam/io/gcp/gcsfilesystem.py
@@ -16,9 +16,12 @@
#
"""GCS file system implementation for accessing files on GCS."""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import zip
+from typing import BinaryIO # pylint: disable=unused-import
from future.utils import iteritems
@@ -139,6 +142,7 @@
def create(self, path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a write channel for the given file path.
Args:
@@ -152,6 +156,7 @@
def open(self, path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a read channel for the given file path.
Args:
diff --git a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py
index e181d7c..31d324e 100644
--- a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py
+++ b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py
@@ -18,6 +18,8 @@
"""Unit tests for GCS File System."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -37,7 +39,7 @@
try:
from apache_beam.io.gcp import gcsfilesystem
except ImportError:
- gcsfilesystem = None
+ gcsfilesystem = None # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position
diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py
index c1e0314..180ca41 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio.py
@@ -20,6 +20,8 @@
https://github.com/GoogleCloudPlatform/appengine-gcs-client.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import errno
diff --git a/sdks/python/apache_beam/io/gcp/gcsio_integration_test.py b/sdks/python/apache_beam/io/gcp/gcsio_integration_test.py
index d4e387b..9ba8f61 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio_integration_test.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio_integration_test.py
@@ -36,6 +36,8 @@
-DkmsKeyName=KMS_KEY_NAME
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -50,7 +52,7 @@
try:
from apache_beam.io.gcp import gcsio
except ImportError:
- gcsio = None
+ gcsio = None # type: ignore
@unittest.skipIf(gcsio is None, 'GCP dependencies are not installed')
diff --git a/sdks/python/apache_beam/io/gcp/gcsio_overrides.py b/sdks/python/apache_beam/io/gcp/gcsio_overrides.py
index 1be587d..72fde36 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio_overrides.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio_overrides.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/gcp/gcsio_test.py b/sdks/python/apache_beam/io/gcp/gcsio_test.py
index 5dcfbd8..fa71dd4 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio_test.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
"""Tests for Google Cloud Storage client."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/__init__.py b/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/__init__.py
index e5d35e8..3630d04 100644
--- a/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/__init__.py
+++ b/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/__init__.py
@@ -32,4 +32,4 @@
pass
# pylint: enable=wrong-import-order, wrong-import-position
-__path__ = pkgutil.extend_path(__path__, __name__)
+__path__ = pkgutil.extend_path(__path__, __name__) # type: ignore
diff --git a/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py b/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py
index 25d3e52..5cae1c1 100644
--- a/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py
+++ b/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py
@@ -17,6 +17,7 @@
"""Generated client library for bigquery version v2."""
# NOTE: This file is autogenerated and should not be edited by hand.
+
from __future__ import absolute_import
from apitools.base.py import base_api
diff --git a/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py b/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py
index 61c64e5..6929149 100644
--- a/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py
+++ b/sdks/python/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py
@@ -20,6 +20,7 @@
A data platform for customers to create, manage, share and query data.
"""
# NOTE: This file is autogenerated and should not be edited by hand.
+
from __future__ import absolute_import
from apitools.base.protorpclite import message_types as _message_types
diff --git a/sdks/python/apache_beam/io/gcp/internal/clients/storage/__init__.py b/sdks/python/apache_beam/io/gcp/internal/clients/storage/__init__.py
index c263323..22011f8 100644
--- a/sdks/python/apache_beam/io/gcp/internal/clients/storage/__init__.py
+++ b/sdks/python/apache_beam/io/gcp/internal/clients/storage/__init__.py
@@ -32,4 +32,4 @@
pass
# pylint: enable=wrong-import-order, wrong-import-position
-__path__ = pkgutil.extend_path(__path__, __name__)
+__path__ = pkgutil.extend_path(__path__, __name__) # type: ignore
diff --git a/sdks/python/apache_beam/io/gcp/pubsub.py b/sdks/python/apache_beam/io/gcp/pubsub.py
index 0711b70..0496707 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub.py
@@ -22,10 +22,14 @@
This API is currently under development and is subject to change.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import re
from builtins import object
+from typing import Any
+from typing import Optional
from future.utils import iteritems
from past.builtins import unicode
@@ -87,6 +91,7 @@
@staticmethod
def _from_proto_str(proto_msg):
+ # type: (bytes) -> PubsubMessage
"""Construct from serialized form of ``PubsubMessage``.
Args:
@@ -121,6 +126,7 @@
@staticmethod
def _from_message(msg):
+ # type: (Any) -> PubsubMessage
"""Construct from ``google.cloud.pubsub_v1.subscriber.message.Message``.
https://googleapis.github.io/google-cloud-python/latest/pubsub/subscriber/api/message.html
@@ -134,8 +140,14 @@
"""A ``PTransform`` for reading from Cloud Pub/Sub."""
# Implementation note: This ``PTransform`` is overridden by Directrunner.
- def __init__(self, topic=None, subscription=None, id_label=None,
- with_attributes=False, timestamp_attribute=None):
+ def __init__(self,
+ topic=None, # type: Optional[str]
+ subscription=None, # type: Optional[str]
+ id_label=None, # type: Optional[str]
+ with_attributes=False, # type: bool
+ timestamp_attribute=None # type: Optional[str]
+ ):
+ # type: (...) -> None
"""Initializes ``ReadFromPubSub``.
Args:
@@ -242,8 +254,13 @@
"""A ``PTransform`` for writing messages to Cloud Pub/Sub."""
# Implementation note: This ``PTransform`` is overridden by Directrunner.
- def __init__(self, topic, with_attributes=False, id_label=None,
- timestamp_attribute=None):
+ def __init__(self,
+ topic, # type: str
+ with_attributes=False, # type: bool
+ id_label=None, # type: Optional[str]
+ timestamp_attribute=None # type: Optional[str]
+ ):
+ # type: (...) -> None
"""Initializes ``WriteToPubSub``.
Args:
@@ -267,6 +284,7 @@
@staticmethod
def to_proto_str(element):
+ # type: (PubsubMessage) -> bytes
if not isinstance(element, PubsubMessage):
raise TypeError('Unexpected element. Type: %s (expected: PubsubMessage), '
'value: %r' % (type(element), element))
@@ -327,8 +345,13 @@
fetches ``PubsubMessage`` protobufs.
"""
- def __init__(self, topic=None, subscription=None, id_label=None,
- with_attributes=False, timestamp_attribute=None):
+ def __init__(self,
+ topic=None, # type: Optional[str]
+ subscription=None, # type: Optional[str]
+ id_label=None, # type: Optional[str]
+ with_attributes=False, # type: bool
+ timestamp_attribute=None # type: Optional[str]
+ ):
self.coder = coders.BytesCoder()
self.full_topic = topic
self.full_subscription = subscription
@@ -385,7 +408,12 @@
This ``NativeSource`` is overridden by a native Pubsub implementation.
"""
- def __init__(self, topic, id_label, with_attributes, timestamp_attribute):
+ def __init__(self,
+ topic, # type: str
+ id_label, # type: Optional[str]
+ with_attributes, # type: bool
+ timestamp_attribute # type: Optional[str]
+ ):
self.coder = coders.BytesCoder()
self.full_topic = topic
self.id_label = id_label
diff --git a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
index 2c43786..6910d24 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
@@ -17,6 +17,8 @@
"""
Integration test for Google Cloud Pub/Sub.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py b/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py
index 8a8c8b4..37e2e71 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py
@@ -18,6 +18,8 @@
Test pipeline for use by pubsub_integration_test.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/io/gcp/pubsub_test.py b/sdks/python/apache_beam/io/gcp/pubsub_test.py
index 22c11b1..8912a3f 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub_test.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub_test.py
@@ -18,6 +18,8 @@
"""Unit tests for PubSub sources and sinks."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -326,7 +328,7 @@
transform_evaluator.TransformEvaluatorRegistry._test_evaluators_overrides = {
- _DirectReadFromPubSub: TestPubSubReadEvaluator,
+ _DirectReadFromPubSub: TestPubSubReadEvaluator, # type: ignore[dict-item]
}
diff --git a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py
index 2e50763..57e9d83 100644
--- a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py
+++ b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py
@@ -17,8 +17,11 @@
"""Bigquery data verifier for end-to-end test."""
+# pytype: skip-file
+
from __future__ import absolute_import
+import concurrent
import logging
import sys
import time
@@ -48,9 +51,11 @@
_LOGGER = logging.getLogger(__name__)
-def retry_on_http_and_value_error(exception):
+def retry_on_http_timeout_and_value_error(exception):
"""Filter allowing retries on Bigquery errors and value error."""
- return isinstance(exception, (GoogleCloudError, ValueError))
+ return isinstance(exception, (GoogleCloudError,
+ ValueError,
+ concurrent.futures.TimeoutError))
class BigqueryMatcher(BaseMatcher):
@@ -94,7 +99,7 @@
@retry.with_exponential_backoff(
num_retries=MAX_RETRIES,
- retry_filter=retry_on_http_and_value_error)
+ retry_filter=retry_on_http_timeout_and_value_error)
def _query_with_retry(self):
"""Run Bigquery query with retry if got error http response"""
_LOGGER.info('Attempting to perform query %s to BQ', self.query)
@@ -204,7 +209,7 @@
@retry.with_exponential_backoff(
num_retries=MAX_RETRIES,
- retry_filter=retry_on_http_and_value_error)
+ retry_filter=retry_on_http_timeout_and_value_error)
def _get_table_with_retry(self, bigquery_wrapper):
return bigquery_wrapper.get_table(self.project, self.dataset, self.table)
diff --git a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py
index f6f4394..cfd562f 100644
--- a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py
+++ b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py
@@ -17,6 +17,8 @@
"""Unit test for Bigquery verifier"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
index 53ebae4..0b9622c 100644
--- a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
+++ b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher.py
@@ -17,6 +17,8 @@
"""PubSub verifier used for end-to-end test."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py
index 6107b35..f8adc8b 100644
--- a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py
+++ b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py
@@ -17,6 +17,8 @@
"""Unit test for PubSub verifier."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -135,9 +137,9 @@
hc_assert_that(self.mock_presult, self.pubsub_matcher)
self.assertEqual(mock_sub.pull.call_count, 1)
self.assertCountEqual([b'c', b'd'], self.pubsub_matcher.messages)
- self.assertTrue(
- '\nExpected: Expected 1 messages.\n but: Got 2 messages.'
- in str(error.exception.args[0]))
+ self.assertIn(
+ '\nExpected: Expected 1 messages.\n but: Got 2 messages.',
+ str(error.exception.args[0]))
self.assertEqual(mock_sub.pull.call_count, 1)
self.assertEqual(mock_sub.acknowledge.call_count, 1)
@@ -161,9 +163,9 @@
with self.assertRaises(AssertionError) as error:
hc_assert_that(self.mock_presult, self.pubsub_matcher)
self.assertEqual(mock_sub.pull.call_count, 1)
- self.assertTrue(
- '\nExpected: Expected 1 messages.\n but: Got 2 messages.'
- in str(error.exception.args[0]))
+ self.assertIn(
+ '\nExpected: Expected 1 messages.\n but: Got 2 messages.',
+ str(error.exception.args[0]))
def test_message_count_matcher_above_fail(self, mock_get_sub, unused_mock):
self.init_counter_matcher(expected_msg_len=1)
diff --git a/sdks/python/apache_beam/io/gcp/tests/utils.py b/sdks/python/apache_beam/io/gcp/tests/utils.py
index dbf8ac9..0664a00 100644
--- a/sdks/python/apache_beam/io/gcp/tests/utils.py
+++ b/sdks/python/apache_beam/io/gcp/tests/utils.py
@@ -18,6 +18,8 @@
"""Utility methods for testing on GCP."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/gcp/tests/utils_test.py b/sdks/python/apache_beam/io/gcp/tests/utils_test.py
index db547d9..de69f96 100644
--- a/sdks/python/apache_beam/io/gcp/tests/utils_test.py
+++ b/sdks/python/apache_beam/io/gcp/tests/utils_test.py
@@ -17,6 +17,8 @@
"""Unittest for GCP testing utils."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/hadoopfilesystem.py b/sdks/python/apache_beam/io/hadoopfilesystem.py
index 0abdbaf..dde807b 100644
--- a/sdks/python/apache_beam/io/hadoopfilesystem.py
+++ b/sdks/python/apache_beam/io/hadoopfilesystem.py
@@ -18,6 +18,8 @@
""":class:`~apache_beam.io.filesystem.FileSystem` implementation for accessing
Hadoop Distributed File System files."""
+# pytype: skip-file
+
from __future__ import absolute_import
import io
@@ -25,6 +27,7 @@
import posixpath
import re
from builtins import zip
+from typing import BinaryIO # pylint: disable=unused-import
import hdfs
@@ -209,6 +212,7 @@
def create(self, url, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""
Returns:
A Python File-like object.
@@ -226,6 +230,7 @@
def open(self, url, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""
Returns:
A Python File-like object.
@@ -316,6 +321,7 @@
raise BeamIOError('Rename operation failed', exceptions)
def exists(self, url):
+ # type: (str) -> bool
"""Checks existence of url in HDFS.
Args:
diff --git a/sdks/python/apache_beam/io/hadoopfilesystem_test.py b/sdks/python/apache_beam/io/hadoopfilesystem_test.py
index 42d1e2d..5c1414e 100644
--- a/sdks/python/apache_beam/io/hadoopfilesystem_test.py
+++ b/sdks/python/apache_beam/io/hadoopfilesystem_test.py
@@ -17,6 +17,8 @@
"""Unit tests for :class:`HadoopFileSystem`."""
+# pytype: skip-file
+
from __future__ import absolute_import
import io
@@ -38,7 +40,7 @@
class FakeFile(io.BytesIO):
"""File object for FakeHdfs"""
- __hash__ = None
+ __hash__ = None # type: ignore[assignment]
def __init__(self, path, mode='', type='FILE'):
io.BytesIO.__init__(self)
diff --git a/sdks/python/apache_beam/io/iobase.py b/sdks/python/apache_beam/io/iobase.py
index 0a1b211..affb6c1 100644
--- a/sdks/python/apache_beam/io/iobase.py
+++ b/sdks/python/apache_beam/io/iobase.py
@@ -29,6 +29,8 @@
the sink.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -40,6 +42,11 @@
from builtins import object
from builtins import range
from collections import namedtuple
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Iterator
+from typing import Optional
+from typing import Tuple
from apache_beam import coders
from apache_beam import pvalue
@@ -57,6 +64,11 @@
from apache_beam.utils import urns
from apache_beam.utils.windowed_value import WindowedValue
+if TYPE_CHECKING:
+ from apache_beam.io import restriction_trackers
+ from apache_beam.runners.pipeline_context import PipelineContext
+ from apache_beam.utils.timestamp import Timestamp
+
__all__ = ['BoundedSource', 'RangeTracker', 'Read', 'RestrictionTracker',
'Sink', 'Write', 'Writer']
@@ -90,6 +102,10 @@
"""
urns.RunnerApiFn.register_pickle_urn(python_urns.PICKLED_SOURCE)
+ def is_bounded(self):
+ # type: () -> bool
+ raise NotImplementedError
+
class BoundedSource(SourceBase):
"""A source that reads a finite amount of input records.
@@ -128,6 +144,7 @@
"""
def estimate_size(self):
+ # type: () -> Optional[int]
"""Estimates the size of source in bytes.
An estimate of the total size (in bytes) of the data that would be read
@@ -140,7 +157,12 @@
"""
raise NotImplementedError
- def split(self, desired_bundle_size, start_position=None, stop_position=None):
+ def split(self,
+ desired_bundle_size, # type: int
+ start_position=None, # type: Optional[Any]
+ stop_position=None, # type: Optional[Any]
+ ):
+ # type: (...) -> Iterator[SourceBundle]
"""Splits the source into a set of bundles.
Bundles should be approximately of size ``desired_bundle_size`` bytes.
@@ -157,7 +179,11 @@
"""
raise NotImplementedError
- def get_range_tracker(self, start_position, stop_position):
+ def get_range_tracker(self,
+ start_position, # type: Optional[Any]
+ stop_position, # type: Optional[Any]
+ ):
+ # type: (...) -> RangeTracker
"""Returns a RangeTracker for a given position range.
Framework may invoke ``read()`` method with the RangeTracker object returned
@@ -841,6 +867,7 @@
"""A transform that reads a PCollection."""
def __init__(self, source):
+ # type: (SourceBase) -> None
"""Initializes a Read transform.
Args:
@@ -867,9 +894,11 @@
is_bounded=self.source.is_bounded())
def get_windowing(self, unused_inputs):
+ # type: (...) -> core.Windowing
return core.Windowing(window.GlobalWindows())
def _infer_output_coder(self, input_type=None, input_coder=None):
+ # type: (...) -> Optional[coders.Coder]
if isinstance(self.source, BoundedSource):
return self.source.default_output_coder()
else:
@@ -881,6 +910,7 @@
'source_dd': self.source}
def to_runner_api_parameter(self, context):
+ # type: (PipelineContext) -> Tuple[str, beam_runner_api_pb2.ReadPayload]
return (common_urns.deprecated_primitives.READ.urn,
beam_runner_api_pb2.ReadPayload(
source=self.source.to_runner_api(context),
@@ -890,6 +920,7 @@
@staticmethod
def from_runner_api_parameter(parameter, context):
+ # type: (beam_runner_api_pb2.ReadPayload, PipelineContext) -> Read
return Read(SourceBase.from_runner_api(parameter.source, context))
@@ -960,6 +991,7 @@
"""Implements the writing of custom sinks."""
def __init__(self, sink):
+ # type: (Sink) -> None
super(WriteImpl, self).__init__()
self.sink = sink
@@ -1072,6 +1104,7 @@
class _RoundRobinKeyFn(core.DoFn):
def __init__(self, count):
+ # type: (int) -> None
self.count = count
def start_bundle(self):
@@ -1117,6 +1150,7 @@
raise NotImplementedError
def current_progress(self):
+ # type: () -> RestrictionProgress
"""Returns a RestrictionProgress object representing the current progress.
This API is recommended to be implemented. The runner can do a better job
@@ -1273,6 +1307,7 @@
return self._restriction_tracker.try_split(fraction_of_remainder)
def deferred_status(self):
+ # type: () -> Optional[Tuple[Any, Timestamp]]
"""Returns deferred work which is produced by ``defer_remainder()``.
When there is a self-checkpoint performed, the system needs to fulfill the
@@ -1449,6 +1484,7 @@
self.restriction = restriction
def current_progress(self):
+ # type: () -> RestrictionProgress
return RestrictionProgress(
fraction=self.restriction.range_tracker().fraction_consumed())
diff --git a/sdks/python/apache_beam/io/iobase_test.py b/sdks/python/apache_beam/io/iobase_test.py
index a574d20..886ade2 100644
--- a/sdks/python/apache_beam/io/iobase_test.py
+++ b/sdks/python/apache_beam/io/iobase_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the SDFRestrictionProvider module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import time
diff --git a/sdks/python/apache_beam/io/localfilesystem.py b/sdks/python/apache_beam/io/localfilesystem.py
index 20748a2..99e894e 100644
--- a/sdks/python/apache_beam/io/localfilesystem.py
+++ b/sdks/python/apache_beam/io/localfilesystem.py
@@ -16,11 +16,14 @@
#
"""Local File system implementation for accessing files on disk."""
+# pytype: skip-file
+
from __future__ import absolute_import
import os
import shutil
from builtins import zip
+from typing import BinaryIO # pylint: disable=unused-import
from apache_beam.io.filesystem import BeamIOError
from apache_beam.io.filesystem import CompressedFile
@@ -139,6 +142,7 @@
def create(self, path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a write channel for the given file path.
Args:
@@ -152,6 +156,7 @@
def open(self, path, mime_type='application/octet-stream',
compression_type=CompressionTypes.AUTO):
+ # type: (...) -> BinaryIO
"""Returns a read channel for the given file path.
Args:
diff --git a/sdks/python/apache_beam/io/localfilesystem_test.py b/sdks/python/apache_beam/io/localfilesystem_test.py
index 20fa593..c22c657 100644
--- a/sdks/python/apache_beam/io/localfilesystem_test.py
+++ b/sdks/python/apache_beam/io/localfilesystem_test.py
@@ -18,6 +18,8 @@
"""Unit tests for LocalFileSystem."""
+# pytype: skip-file
+
from __future__ import absolute_import
import filecmp
diff --git a/sdks/python/apache_beam/io/mongodbio.py b/sdks/python/apache_beam/io/mongodbio.py
index fba5b43..8ec8df4 100644
--- a/sdks/python/apache_beam/io/mongodbio.py
+++ b/sdks/python/apache_beam/io/mongodbio.py
@@ -51,6 +51,8 @@
No backward compatibility guarantees. Everything in this module is experimental.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/mongodbio_it_test.py b/sdks/python/apache_beam/io/mongodbio_it_test.py
index b315562..c8fe2e0 100644
--- a/sdks/python/apache_beam/io/mongodbio_it_test.py
+++ b/sdks/python/apache_beam/io/mongodbio_it_test.py
@@ -15,12 +15,16 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
import logging
import time
+from pymongo import MongoClient
+
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.testing.test_pipeline import TestPipeline
@@ -29,6 +33,16 @@
_LOGGER = logging.getLogger(__name__)
+class GenerateDocs(beam.DoFn):
+
+ def process(self, num_docs, *args, **kwargs):
+ for i in range(num_docs):
+ yield {
+ 'number': i,
+ 'number_mod_2': i % 2,
+ 'number_mod_3': i % 3
+ }
+
def run(argv=None):
default_db = 'beam_mongodbio_it_db'
@@ -50,6 +64,7 @@
type=int)
parser.add_argument('--batch_size',
default=10000,
+ type=int,
help=('batch size for writing to mongodb'))
known_args, pipeline_args = parser.parse_known_args(argv)
@@ -57,17 +72,13 @@
with TestPipeline(options=PipelineOptions(pipeline_args)) as p:
start_time = time.time()
_LOGGER.info('Writing %d documents to mongodb' % known_args.num_documents)
- docs = [{
- 'number': x,
- 'number_mod_2': x % 2,
- 'number_mod_3': x % 3
- } for x in range(known_args.num_documents)]
- _ = p | 'Create documents' >> beam.Create(docs) \
- | 'WriteToMongoDB' >> beam.io.WriteToMongoDB(known_args.mongo_uri,
- known_args.mongo_db,
- known_args.mongo_coll,
- known_args.batch_size)
+ _ = (p | beam.Create([known_args.num_documents])
+ | 'Create documents' >> beam.ParDo(GenerateDocs())
+ | 'WriteToMongoDB' >> beam.io.WriteToMongoDB(known_args.mongo_uri,
+ known_args.mongo_db,
+ known_args.mongo_coll,
+ known_args.batch_size))
elapsed = time.time() - start_time
_LOGGER.info('Writing %d documents to mongodb finished in %.3f seconds' %
(known_args.num_documents, elapsed))
@@ -77,18 +88,20 @@
start_time = time.time()
_LOGGER.info('Reading from mongodb %s:%s' %
(known_args.mongo_db, known_args.mongo_coll))
- r = p | 'ReadFromMongoDB' >> \
- beam.io.ReadFromMongoDB(known_args.mongo_uri,
- known_args.mongo_db,
- known_args.mongo_coll,
- projection=['number']) \
- | 'Map' >> beam.Map(lambda doc: doc['number'])
+ r = (p | 'ReadFromMongoDB' >> beam.io.ReadFromMongoDB(known_args.mongo_uri,
+ known_args.mongo_db,
+ known_args.mongo_coll,
+ projection=['number'])
+ | 'Map' >> beam.Map(lambda doc: doc['number'])
+ | 'Combine' >> beam.CombineGlobally(sum))
assert_that(
- r, equal_to([number for number in range(known_args.num_documents)]))
+ r, equal_to([sum(range(known_args.num_documents))]))
elapsed = time.time() - start_time
_LOGGER.info('Read %d documents from mongodb finished in %.3f seconds' %
(known_args.num_documents, elapsed))
+ with MongoClient(host=known_args.mongo_uri) as client:
+ client.drop_database(known_args.mongo_db)
if __name__ == "__main__":
diff --git a/sdks/python/apache_beam/io/mongodbio_test.py b/sdks/python/apache_beam/io/mongodbio_test.py
index 72ae0ad..0c9c56a 100644
--- a/sdks/python/apache_beam/io/mongodbio_test.py
+++ b/sdks/python/apache_beam/io/mongodbio_test.py
@@ -14,6 +14,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/parquetio.py b/sdks/python/apache_beam/io/parquetio.py
index a4e894cd..062b46f 100644
--- a/sdks/python/apache_beam/io/parquetio.py
+++ b/sdks/python/apache_beam/io/parquetio.py
@@ -27,6 +27,8 @@
that can be used to write a given ``PCollection`` of Python objects to a
Parquet file.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from functools import partial
diff --git a/sdks/python/apache_beam/io/parquetio_it_test.py b/sdks/python/apache_beam/io/parquetio_it_test.py
index 0b58f47..cc85cd2 100644
--- a/sdks/python/apache_beam/io/parquetio_it_test.py
+++ b/sdks/python/apache_beam/io/parquetio_it_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/parquetio_test.py b/sdks/python/apache_beam/io/parquetio_test.py
index 719bf55..195d52d 100644
--- a/sdks/python/apache_beam/io/parquetio_test.py
+++ b/sdks/python/apache_beam/io/parquetio_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import json
diff --git a/sdks/python/apache_beam/io/range_trackers.py b/sdks/python/apache_beam/io/range_trackers.py
index d4845fb..4deb5f4 100644
--- a/sdks/python/apache_beam/io/range_trackers.py
+++ b/sdks/python/apache_beam/io/range_trackers.py
@@ -17,6 +17,8 @@
"""iobase.RangeTracker implementations provided with Apache Beam.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/range_trackers_test.py b/sdks/python/apache_beam/io/range_trackers_test.py
index e80401f..782807d 100644
--- a/sdks/python/apache_beam/io/range_trackers_test.py
+++ b/sdks/python/apache_beam/io/range_trackers_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the range_trackers module."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/restriction_trackers.py b/sdks/python/apache_beam/io/restriction_trackers.py
index 20bb5c1..36b2214 100644
--- a/sdks/python/apache_beam/io/restriction_trackers.py
+++ b/sdks/python/apache_beam/io/restriction_trackers.py
@@ -16,10 +16,13 @@
#
"""`iobase.RestrictionTracker` implementations provided with Apache Beam."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from builtins import object
+from typing import Tuple
from apache_beam.io.iobase import RestrictionProgress
from apache_beam.io.iobase import RestrictionTracker
@@ -66,6 +69,7 @@
current_split_start = current_split_stop
def split_at(self, split_pos):
+ # type: (...) -> Tuple[OffsetRange, OffsetRange]
return OffsetRange(self.start, split_pos), OffsetRange(split_pos, self.stop)
def new_tracker(self):
@@ -82,6 +86,7 @@
"""
def __init__(self, offset_range):
+ # type: (OffsetRange) -> None
assert isinstance(offset_range, OffsetRange)
self._range = offset_range
self._current_position = None
@@ -101,6 +106,7 @@
return self._range
def current_progress(self):
+ # type: () -> RestrictionProgress
if self._current_position is None:
fraction = 0.0
elif self._range.stop == self._range.start:
diff --git a/sdks/python/apache_beam/io/restriction_trackers_test.py b/sdks/python/apache_beam/io/restriction_trackers_test.py
index 4a57d98..aaebb09 100644
--- a/sdks/python/apache_beam/io/restriction_trackers_test.py
+++ b/sdks/python/apache_beam/io/restriction_trackers_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the range_trackers module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/source_test_utils.py b/sdks/python/apache_beam/io/source_test_utils.py
index 7291786..3afc650 100644
--- a/sdks/python/apache_beam/io/source_test_utils.py
+++ b/sdks/python/apache_beam/io/source_test_utils.py
@@ -43,6 +43,8 @@
* apache_beam.io.source_test_utils_test.py
* apache_beam.io.avroio_test.py
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/source_test_utils_test.py b/sdks/python/apache_beam/io/source_test_utils_test.py
index a8c3d82..3ee00be 100644
--- a/sdks/python/apache_beam/io/source_test_utils_test.py
+++ b/sdks/python/apache_beam/io/source_test_utils_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/sources_test.py b/sdks/python/apache_beam/io/sources_test.py
index 8908681..e210a5b 100644
--- a/sdks/python/apache_beam/io/sources_test.py
+++ b/sdks/python/apache_beam/io/sources_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the sources framework."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/io/textio.py b/sdks/python/apache_beam/io/textio.py
index 3b426cc..0514992 100644
--- a/sdks/python/apache_beam/io/textio.py
+++ b/sdks/python/apache_beam/io/textio.py
@@ -18,12 +18,15 @@
"""A source and a sink for reading from and writing to text files."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
from builtins import object
from builtins import range
from functools import partial
+from typing import Optional
from past.builtins import long
@@ -95,7 +98,7 @@
min_bundle_size,
compression_type,
strip_trailing_newlines,
- coder,
+ coder, # type: coders.Coder
buffer_size=DEFAULT_READ_BUFFER_SIZE,
validate=True,
skip_header_lines=0,
@@ -341,7 +344,7 @@
append_trailing_newlines=True,
num_shards=0,
shard_name_template=None,
- coder=coders.ToStringCoder(),
+ coder=coders.ToStringCoder(), # type: coders.Coder
compression_type=CompressionTypes.AUTO,
header=None):
"""Initialize a _TextSink.
@@ -443,7 +446,7 @@
desired_bundle_size=DEFAULT_DESIRED_BUNDLE_SIZE,
compression_type=CompressionTypes.AUTO,
strip_trailing_newlines=True,
- coder=coders.StrUtf8Coder(),
+ coder=coders.StrUtf8Coder(), # type: coders.Coder
skip_header_lines=0,
**kwargs):
"""Initialize the ``ReadAllFromText`` transform.
@@ -504,7 +507,7 @@
min_bundle_size=0,
compression_type=CompressionTypes.AUTO,
strip_trailing_newlines=True,
- coder=coders.StrUtf8Coder(),
+ coder=coders.StrUtf8Coder(), # type: coders.Coder
validate=True,
skip_header_lines=0,
**kwargs):
@@ -559,12 +562,12 @@
def __init__(
self,
- file_path_prefix,
+ file_path_prefix, # type: str
file_name_suffix='',
append_trailing_newlines=True,
num_shards=0,
- shard_name_template=None,
- coder=coders.ToStringCoder(),
+ shard_name_template=None, # type: Optional[str]
+ coder=coders.ToStringCoder(), # type: coders.Coder
compression_type=CompressionTypes.AUTO,
header=None):
r"""Initialize a :class:`WriteToText` transform.
diff --git a/sdks/python/apache_beam/io/textio_test.py b/sdks/python/apache_beam/io/textio_test.py
index ad336c5..0761e13 100644
--- a/sdks/python/apache_beam/io/textio_test.py
+++ b/sdks/python/apache_beam/io/textio_test.py
@@ -16,6 +16,8 @@
#
"""Tests for textio module."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/io/tfrecordio.py b/sdks/python/apache_beam/io/tfrecordio.py
index ab7d2f5..734c598 100644
--- a/sdks/python/apache_beam/io/tfrecordio.py
+++ b/sdks/python/apache_beam/io/tfrecordio.py
@@ -16,6 +16,8 @@
#
"""TFRecord sources and sinks."""
+# pytype: skip-file
+
from __future__ import absolute_import
import codecs
@@ -64,7 +66,7 @@
return _default_crc32c_fn.fn(value)
-_default_crc32c_fn.fn = None
+_default_crc32c_fn.fn = None # type: ignore
class _TFRecordUtil(object):
diff --git a/sdks/python/apache_beam/io/tfrecordio_test.py b/sdks/python/apache_beam/io/tfrecordio_test.py
index dfb154a..9ed525b 100644
--- a/sdks/python/apache_beam/io/tfrecordio_test.py
+++ b/sdks/python/apache_beam/io/tfrecordio_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import binascii
diff --git a/sdks/python/apache_beam/io/utils.py b/sdks/python/apache_beam/io/utils.py
index d5912ae..8b12fea 100644
--- a/sdks/python/apache_beam/io/utils.py
+++ b/sdks/python/apache_beam/io/utils.py
@@ -19,6 +19,8 @@
on transforms.ptransform_test.test_read_metrics.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import range
diff --git a/sdks/python/apache_beam/io/utils_test.py b/sdks/python/apache_beam/io/utils_test.py
index 94003cc..99b4b49 100644
--- a/sdks/python/apache_beam/io/utils_test.py
+++ b/sdks/python/apache_beam/io/utils_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/io/vcfio.py b/sdks/python/apache_beam/io/vcfio.py
index aed3579..c6e5502 100644
--- a/sdks/python/apache_beam/io/vcfio.py
+++ b/sdks/python/apache_beam/io/vcfio.py
@@ -20,6 +20,8 @@
The 4.2 spec is available at https://samtools.github.io/hts-specs/VCFv4.2.pdf.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -82,7 +84,7 @@
Each object corresponds to a single record in a VCF file.
"""
- __hash__ = None
+ __hash__ = None # type: ignore[assignment]
def __init__(self,
reference_name=None,
@@ -202,7 +204,7 @@
variant. It may include associated information such as quality and phasing.
"""
- __hash__ = None
+ __hash__ = None # type: ignore[assignment]
def __init__(self, name=None, genotype=None, phaseset=None, info=None):
"""Initialize the :class:`VariantCall` object.
diff --git a/sdks/python/apache_beam/io/vcfio_test.py b/sdks/python/apache_beam/io/vcfio_test.py
index 9a4b793..0c820ab 100644
--- a/sdks/python/apache_beam/io/vcfio_test.py
+++ b/sdks/python/apache_beam/io/vcfio_test.py
@@ -17,6 +17,8 @@
"""Tests for vcfio module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/metrics/cells.py b/sdks/python/apache_beam/metrics/cells.py
index 103be44..0b6caa6 100644
--- a/sdks/python/apache_beam/metrics/cells.py
+++ b/sdks/python/apache_beam/metrics/cells.py
@@ -21,6 +21,8 @@
context.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -363,11 +365,13 @@
return GaugeData(value, timestamp=timestamp)
def to_runner_api(self):
+ # type: () -> beam_fn_api_pb2.Metrics.User.GaugeData
return beam_fn_api_pb2.Metrics.User.GaugeData(
value=self.value, timestamp=proto_utils.to_Timestamp(self.timestamp))
@staticmethod
def from_runner_api(proto):
+ # type: (beam_fn_api_pb2.Metrics.User.GaugeData) -> GaugeData
return GaugeData(proto.value,
timestamp=proto_utils.from_Timestamp(proto.timestamp))
@@ -440,11 +444,13 @@
return DistributionData(value, 1, value, value)
def to_runner_api(self):
+ # type: () -> beam_fn_api_pb2.Metrics.User.DistributionData
return beam_fn_api_pb2.Metrics.User.DistributionData(
count=self.count, sum=self.sum, min=self.min, max=self.max)
@staticmethod
def from_runner_api(proto):
+ # type: (beam_fn_api_pb2.Metrics.User.DistributionData) -> DistributionData
return DistributionData(proto.sum, proto.count, proto.min, proto.max)
def to_runner_api_monitoring_info(self):
diff --git a/sdks/python/apache_beam/metrics/cells_test.py b/sdks/python/apache_beam/metrics/cells_test.py
index d50cc9c..ca59695 100644
--- a/sdks/python/apache_beam/metrics/cells_test.py
+++ b/sdks/python/apache_beam/metrics/cells_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import threading
diff --git a/sdks/python/apache_beam/metrics/execution.py b/sdks/python/apache_beam/metrics/execution.py
index 6918914..ac08407 100644
--- a/sdks/python/apache_beam/metrics/execution.py
+++ b/sdks/python/apache_beam/metrics/execution.py
@@ -30,6 +30,8 @@
unit-of-commit (bundle).
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
diff --git a/sdks/python/apache_beam/metrics/execution_test.py b/sdks/python/apache_beam/metrics/execution_test.py
index fc363a4..ebaa106 100644
--- a/sdks/python/apache_beam/metrics/execution_test.py
+++ b/sdks/python/apache_beam/metrics/execution_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/metrics/metric.py b/sdks/python/apache_beam/metrics/metric.py
index 8bbe191..edefae5 100644
--- a/sdks/python/apache_beam/metrics/metric.py
+++ b/sdks/python/apache_beam/metrics/metric.py
@@ -24,6 +24,8 @@
- Metrics - This class lets pipeline and transform writers create and access
metric objects such as counters, distributions, etc.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import inspect
diff --git a/sdks/python/apache_beam/metrics/metric_test.py b/sdks/python/apache_beam/metrics/metric_test.py
index cb18dc7..ce5bc2e 100644
--- a/sdks/python/apache_beam/metrics/metric_test.py
+++ b/sdks/python/apache_beam/metrics/metric_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/metrics/metricbase.py b/sdks/python/apache_beam/metrics/metricbase.py
index 420bfef..af78771 100644
--- a/sdks/python/apache_beam/metrics/metricbase.py
+++ b/sdks/python/apache_beam/metrics/metricbase.py
@@ -32,6 +32,8 @@
- MetricName - Namespace and name used to refer to a Metric.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
diff --git a/sdks/python/apache_beam/metrics/monitoring_infos.py b/sdks/python/apache_beam/metrics/monitoring_infos.py
index b75ef3a..fac301c 100644
--- a/sdks/python/apache_beam/metrics/monitoring_infos.py
+++ b/sdks/python/apache_beam/metrics/monitoring_infos.py
@@ -18,11 +18,16 @@
# cython: language_level=3
# cython: profile=True
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
import time
from functools import reduce
+from typing import FrozenSet
+from typing import Hashable
+from typing import List
from google.protobuf import timestamp_pb2
@@ -129,6 +134,7 @@
def int64_user_counter(namespace, name, metric, ptransform=None, tag=None):
+ # type: (...) -> metrics_pb2.MonitoringInfo
"""Return the counter monitoring info for the specifed URN, metric and labels.
Args:
@@ -151,6 +157,7 @@
def int64_counter(urn, metric, ptransform=None, tag=None):
+ # type: (...) -> metrics_pb2.MonitoringInfo
"""Return the counter monitoring info for the specifed URN, metric and labels.
Args:
@@ -187,6 +194,7 @@
def int64_distribution(urn, metric, ptransform=None, tag=None):
+ # type: (...) -> metrics_pb2.MonitoringInfo
"""Return a distribution monitoring info for the URN, metric and labels.
Args:
@@ -201,6 +209,7 @@
def int64_user_gauge(namespace, name, metric, ptransform=None, tag=None):
+ # type: (...) -> metrics_pb2.MonitoringInfo
"""Return the gauge monitoring info for the URN, metric and labels.
Args:
@@ -237,6 +246,7 @@
def create_monitoring_info(urn, type_urn, metric_proto, labels=None):
+ # type: (...) -> metrics_pb2.MonitoringInfo
"""Return the gauge monitoring info for the URN, type, metric and labels.
Args:
@@ -327,11 +337,12 @@
def to_key(monitoring_info_proto):
+ # type: (metrics_pb2.MonitoringInfo) -> FrozenSet[Hashable]
"""Returns a key based on the URN and labels.
This is useful in maps to prevent reporting the same MonitoringInfo twice.
"""
- key_items = list(monitoring_info_proto.labels.items())
+ key_items = list(monitoring_info_proto.labels.items()) # type: List[Hashable]
key_items.append(monitoring_info_proto.urn)
return frozenset(key_items)
diff --git a/sdks/python/apache_beam/metrics/monitoring_infos_test.py b/sdks/python/apache_beam/metrics/monitoring_infos_test.py
index 466969a..696d629 100644
--- a/sdks/python/apache_beam/metrics/monitoring_infos_test.py
+++ b/sdks/python/apache_beam/metrics/monitoring_infos_test.py
@@ -14,6 +14,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py
index cc403c3..3d6293e 100644
--- a/sdks/python/apache_beam/options/pipeline_options.py
+++ b/sdks/python/apache_beam/options/pipeline_options.py
@@ -17,6 +17,8 @@
"""Pipeline options obtained from command line parsing."""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
@@ -26,6 +28,13 @@
import subprocess
from builtins import list
from builtins import object
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Type
+from typing import TypeVar
from apache_beam.options.value_provider import RuntimeValueProvider
from apache_beam.options.value_provider import StaticValueProvider
@@ -47,6 +56,8 @@
'TestOptions',
]
+PipelineOptionsT = TypeVar('PipelineOptionsT', bound='PipelineOptions')
+
_LOGGER = logging.getLogger(__name__)
@@ -161,6 +172,7 @@
the options.
"""
def __init__(self, flags=None, **kwargs):
+ # type: (Optional[List[str]], **Any) -> None
"""Initialize an options class.
The initializer will traverse all subclasses, add all their argparse
@@ -175,6 +187,9 @@
**kwargs: Add overrides for arguments passed in flags.
"""
+ # Initializing logging configuration in case the user did not set it up.
+ logging.basicConfig()
+
# self._flags stores a list of not yet parsed arguments, typically,
# command-line flags. This list is shared across different views.
# See: view_as().
@@ -187,7 +202,7 @@
if cls == PipelineOptions:
break
elif '_add_argparse_args' in cls.__dict__:
- cls._add_argparse_args(parser)
+ cls._add_argparse_args(parser) # type: ignore
# The _visible_options attribute will contain options that were recognized
# by the parser.
@@ -211,6 +226,7 @@
@classmethod
def _add_argparse_args(cls, parser):
+ # type: (_BeamArgumentParser) -> None
# Override this in subclasses to provide options.
pass
@@ -237,7 +253,11 @@
return cls(flags)
- def get_all_options(self, drop_default=False, add_extra_args_fn=None):
+ def get_all_options(self,
+ drop_default=False,
+ add_extra_args_fn=None # type: Optional[Callable[[_BeamArgumentParser], None]]
+ ):
+ # type: (...) -> Dict[str, Any]
"""Returns a dictionary of all defined arguments.
Returns a dictionary of all defined arguments (arguments that are defined in
@@ -283,6 +303,7 @@
return self.get_all_options(True)
def view_as(self, cls):
+ # type: (Type[PipelineOptionsT]) -> PipelineOptionsT
"""Returns a view of current object as provided PipelineOption subclass.
Example Usage::
@@ -321,10 +342,12 @@
return view
def _visible_option_list(self):
+ # type: () -> List[str]
return sorted(option
for option in dir(self._visible_options) if option[0] != '_')
def __dir__(self):
+ # type: () -> List[str]
return sorted(dir(type(self)) + list(self.__dict__) +
self._visible_option_list())
@@ -417,6 +440,12 @@
type=int,
default=1,
help='number of parallel running workers.')
+ parser.add_argument(
+ '--direct_running_mode',
+ default='in_memory',
+ choices=['in_memory', 'multi_threading', 'multi_processing'],
+ help='Workers running environment.'
+ )
class GoogleCloudOptions(PipelineOptions):
@@ -972,6 +1001,28 @@
' cluster address. Requires Python 3.6+.')
+class SparkRunnerOptions(PipelineOptions):
+ @classmethod
+ def _add_argparse_args(cls, parser):
+ parser.add_argument('--spark_master_url',
+ default='local[4]',
+ help='Spark master URL (spark://HOST:PORT). '
+ 'Use "local" (single-threaded) or "local[*]" '
+ '(multi-threaded) to start a local cluster for '
+ 'the execution.')
+ parser.add_argument('--spark_job_server_jar',
+ help='Path or URL to a Beam Spark jobserver jar.')
+ parser.add_argument('--spark_submit_uber_jar',
+ default=False,
+ action='store_true',
+ help='Create and upload an uber jar to the Spark REST'
+ ' endpoint, rather than starting up a job server.'
+ ' Requires Python 3.6+.')
+ parser.add_argument('--spark_rest_url',
+ help='URL for the Spark REST endpoint. '
+ 'Only required when using spark_submit_uber_jar.')
+
+
class TestOptions(PipelineOptions):
@classmethod
@@ -1034,7 +1085,7 @@
Can also be used as a decorator.
"""
- overrides = []
+ overrides = [] # type: List[Dict[str, Any]]
def __init__(self, **options):
self.options = options
diff --git a/sdks/python/apache_beam/options/pipeline_options_test.py b/sdks/python/apache_beam/options/pipeline_options_test.py
index fdedfc4..50b6d32 100644
--- a/sdks/python/apache_beam/options/pipeline_options_test.py
+++ b/sdks/python/apache_beam/options/pipeline_options_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the pipeline options module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -54,6 +56,20 @@
'mock_option': None,
'mock_multi_option': None},
'display_data': [DisplayDataItemMatcher('direct_num_workers', 5)]},
+ {'flags': ['--direct_running_mode', 'multi_threading'],
+ 'expected': {'direct_running_mode': 'multi_threading',
+ 'mock_flag': False,
+ 'mock_option': None,
+ 'mock_multi_option': None},
+ 'display_data': [DisplayDataItemMatcher('direct_running_mode',
+ 'multi_threading')]},
+ {'flags': ['--direct_running_mode', 'multi_processing'],
+ 'expected': {'direct_running_mode': 'multi_processing',
+ 'mock_flag': False,
+ 'mock_option': None,
+ 'mock_multi_option': None},
+ 'display_data': [DisplayDataItemMatcher('direct_running_mode',
+ 'multi_processing')]},
{
'flags': [
'--profile_cpu', '--profile_location', 'gs://bucket/', 'ignored'],
diff --git a/sdks/python/apache_beam/options/pipeline_options_validator.py b/sdks/python/apache_beam/options/pipeline_options_validator.py
index 33c35b3..900a617 100644
--- a/sdks/python/apache_beam/options/pipeline_options_validator.py
+++ b/sdks/python/apache_beam/options/pipeline_options_validator.py
@@ -19,6 +19,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import re
diff --git a/sdks/python/apache_beam/options/pipeline_options_validator_test.py b/sdks/python/apache_beam/options/pipeline_options_validator_test.py
index f380f9e..e7b474d 100644
--- a/sdks/python/apache_beam/options/pipeline_options_validator_test.py
+++ b/sdks/python/apache_beam/options/pipeline_options_validator_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the pipeline options validator module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/options/value_provider.py b/sdks/python/apache_beam/options/value_provider.py
index ca6536d..4ff5ad7 100644
--- a/sdks/python/apache_beam/options/value_provider.py
+++ b/sdks/python/apache_beam/options/value_provider.py
@@ -19,10 +19,13 @@
and dynamically provided values.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
from functools import wraps
+from typing import Set
from apache_beam import error
@@ -79,7 +82,7 @@
class RuntimeValueProvider(ValueProvider):
runtime_options = None
- experiments = set()
+ experiments = set() # type: Set[str]
def __init__(self, option_name, value_type, default_value):
self.option_name = option_name
diff --git a/sdks/python/apache_beam/options/value_provider_test.py b/sdks/python/apache_beam/options/value_provider_test.py
index 8f530a0..6c81bb6 100644
--- a/sdks/python/apache_beam/options/value_provider_test.py
+++ b/sdks/python/apache_beam/options/value_provider_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the ValueProvider class."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -34,6 +36,11 @@
# <file name acronym>_non_vp_arg<number> for non-value-provider arguments.
# The number will grow per file as tests are added.
class ValueProviderTests(unittest.TestCase):
+ def setUp(self):
+ # Reset runtime options, since the is_accessible assertions require them to
+ # be uninitialized.
+ RuntimeValueProvider.set_runtime_options(None)
+
def test_static_value_provider_keyword_argument(self):
class UserDefinedOptions(PipelineOptions):
@classmethod
@@ -196,9 +203,6 @@
self.assertTrue(isinstance(RuntimeValueProvider.experiments, set))
self.assertTrue('feature_1' in RuntimeValueProvider.experiments)
self.assertTrue('feature_2' in RuntimeValueProvider.experiments)
- # Clean up runtime_options after this test case finish, otherwise, it'll
- # affect other cases since runtime_options is static attr
- RuntimeValueProvider.set_runtime_options(None)
def test_experiments_options_setup(self):
options = PipelineOptions(['--experiments', 'a', '--experiments', 'b,c'])
diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py
index bc52e72..1dbafd9 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py
@@ -44,6 +44,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import abc
@@ -54,6 +56,15 @@
import tempfile
from builtins import object
from builtins import zip
+from typing import TYPE_CHECKING
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Set
+from typing import Tuple
+from typing import Union
from future.utils import with_metaclass
@@ -76,9 +87,15 @@
from apache_beam.utils.annotations import deprecated
from apache_beam.utils.interactive_utils import alter_label_if_ipython
+if TYPE_CHECKING:
+ from apache_beam.portability.api import beam_runner_api_pb2
+ from apache_beam.runners.pipeline_context import PipelineContext
+ from apache_beam.runners.runner import PipelineResult
+
__all__ = ['Pipeline', 'PTransformOverride']
+
class Pipeline(object):
"""A pipeline object that manages a DAG of
:class:`~apache_beam.pvalue.PValue` s and their
@@ -94,7 +111,18 @@
(e.g. ``input | "label" >> my_tranform``).
"""
+ # TODO: BEAM-9001 - set environment ID in all transforms and allow runners to
+ # override.
+ @classmethod
+ def sdk_transforms_with_environment(cls):
+ from apache_beam.runners.portability import fn_api_runner_transforms
+ sets = [fn_api_runner_transforms.PAR_DO_URNS,
+ fn_api_runner_transforms.COMBINE_URNS,
+ frozenset([common_urns.primitives.ASSIGN_WINDOWS.urn])]
+ return frozenset().union(*sets)
+
def __init__(self, runner=None, options=None, argv=None):
+ # type: (Optional[Union[str, PipelineRunner]], Optional[PipelineOptions], Optional[List[str]]) -> None
"""Initialize a pipeline object.
Args:
@@ -115,6 +143,9 @@
~exceptions.ValueError: if either the runner or options argument is not
of the expected type.
"""
+ # Initializing logging configuration in case the user did not set it up.
+ logging.basicConfig()
+
if options is not None:
if isinstance(options, PipelineOptions):
self._options = options
@@ -169,9 +200,9 @@
# Set of transform labels (full labels) applied to the pipeline.
# If a transform is applied and the full label is already in the set
# then the transform will have to be cloned with a new label.
- self.applied_labels = set()
+ self.applied_labels = set() # type: Set[str]
- @property
+ @property # type: ignore[misc] # decorated property not supported
@deprecated(since='First stable release',
extra_message='References to <pipeline>.options'
' will not be supported')
@@ -179,14 +210,17 @@
return self._options
def _current_transform(self):
+ # type: () -> AppliedPTransform
"""Returns the transform currently on the top of the stack."""
return self.transforms_stack[-1]
def _root_transform(self):
+ # type: () -> AppliedPTransform
"""Returns the root transform of the transform stack."""
return self.transforms_stack[0]
def _remove_labels_recursively(self, applied_transform):
+ # type: (AppliedPTransform) -> None
for part in applied_transform.parts:
if part.full_label in self.applied_labels:
self.applied_labels.remove(part.full_label)
@@ -206,6 +240,7 @@
""""A visitor that replaces the matching PTransforms."""
def __init__(self, pipeline):
+ # type: (Pipeline) -> None
self.pipeline = pipeline
def _replace_if_needed(self, original_transform_node):
@@ -293,9 +328,11 @@
self.pipeline.transforms_stack.pop()
def enter_composite_transform(self, transform_node):
+ # type: (AppliedPTransform) -> None
self._replace_if_needed(transform_node)
def visit_transform(self, transform_node):
+ # type: (AppliedPTransform) -> None
self._replace_if_needed(transform_node)
self.visit(TransformUpdater(self))
@@ -312,12 +349,15 @@
"""
def __init__(self, pipeline):
+ # type: (Pipeline) -> None
self.pipeline = pipeline
def enter_composite_transform(self, transform_node):
+ # type: (AppliedPTransform) -> None
self.visit_transform(transform_node)
def visit_transform(self, transform_node):
+ # type: (AppliedPTransform) -> None
replace_output = False
for tag in transform_node.outputs:
if transform_node.outputs[tag] in output_map:
@@ -383,6 +423,7 @@
self.visit(ReplacementValidator())
def replace_all(self, replacements):
+ # type: (Iterable[PTransformOverride]) -> None
""" Dynamically replaces PTransforms in the currently populated hierarchy.
Currently this only works for replacements where input and output types
@@ -407,6 +448,7 @@
self._check_replacement(override)
def run(self, test_runner_api=True):
+ # type: (...) -> PipelineResult
"""Runs the pipeline. Returns whatever our runner returns after running."""
# When possible, invoke a round trip through the runner API.
@@ -437,6 +479,7 @@
self.run().wait_until_finish()
def visit(self, visitor):
+ # type: (PipelineVisitor) -> None
"""Visits depth-first every node of a pipeline's DAG.
Runner-internal implementation detail; no backwards-compatibility guarantees
@@ -454,7 +497,7 @@
belong to this pipeline instance.
"""
- visited = set()
+ visited = set() # type: Set[pvalue.PValue]
self._root_transform().visit(visitor, self, visited)
def apply(self, transform, pvalueish=None, label=None):
@@ -632,9 +675,13 @@
self.visit(Visitor())
return Visitor.ok
- def to_runner_api(
- self, return_context=False, context=None, use_fake_coders=False,
- default_environment=None):
+ def to_runner_api(self,
+ return_context=False,
+ context=None, # type: Optional[PipelineContext]
+ use_fake_coders=False,
+ default_environment=None # type: Optional[beam_runner_api_pb2.Environment]
+ ):
+ # type: (...) -> beam_runner_api_pb2.Pipeline
"""For internal use only; no backwards-compatibility guarantees."""
from apache_beam.runners import pipeline_context
from apache_beam.portability.api import beam_runner_api_pb2
@@ -687,13 +734,18 @@
proto.components.transforms[root_transform_id].unique_name = (
root_transform_id)
if return_context:
- return proto, context
+ return proto, context # type: ignore # too complicated for now
else:
return proto
@staticmethod
- def from_runner_api(proto, runner, options, return_context=False,
- allow_proto_holders=False):
+ def from_runner_api(proto, # type: beam_runner_api_pb2.Pipeline
+ runner, # type: PipelineRunner
+ options, # type: PipelineOptions
+ return_context=False,
+ allow_proto_holders=False
+ ):
+ # type: (...) -> Pipeline
"""For internal use only; no backwards-compatibility guarantees."""
p = Pipeline(runner=runner, options=options)
from apache_beam.runners import pipeline_context
@@ -721,7 +773,7 @@
transform.inputs = (pvalue.PBegin(p),)
if return_context:
- return p, context
+ return p, context # type: ignore # too complicated for now
else:
return p
@@ -734,6 +786,7 @@
"""
def visit_value(self, value, producer_node):
+ # type: (pvalue.PValue, AppliedPTransform) -> None
"""Callback for visiting a PValue in the pipeline DAG.
Args:
@@ -744,14 +797,17 @@
pass
def visit_transform(self, transform_node):
+ # type: (AppliedPTransform) -> None
"""Callback for visiting a transform leaf node in the pipeline DAG."""
pass
def enter_composite_transform(self, transform_node):
+ # type: (AppliedPTransform) -> None
"""Callback for entering traversal of a composite transform node."""
pass
def leave_composite_transform(self, transform_node):
+ # type: (AppliedPTransform) -> None
"""Callback for leaving traversal of a composite transform node."""
pass
@@ -763,7 +819,13 @@
(used internally by Pipeline for bookeeping purposes).
"""
- def __init__(self, parent, transform, full_label, inputs):
+ def __init__(self,
+ parent,
+ transform, # type: ptransform.PTransform
+ full_label, # type: str
+ inputs, # type: Optional[Sequence[Union[pvalue.PBegin, pvalue.PCollection]]]
+ environment_id=None # type: Optional[str]
+ ):
self.parent = parent
self.transform = transform
# Note that we want the PipelineVisitor classes to use the full_label,
@@ -773,15 +835,21 @@
# any interference. This is particularly useful for composite transforms.
self.full_label = full_label
self.inputs = inputs or ()
- self.side_inputs = () if transform is None else tuple(transform.side_inputs)
- self.outputs = {}
- self.parts = []
+
+ self.side_inputs = () if transform is None else tuple(transform.side_inputs) # type: Tuple[pvalue.AsSideInput, ...]
+ self.outputs = {} # type: Dict[Union[str, int, None], pvalue.PValue]
+ self.parts = [] # type: List[AppliedPTransform]
+ self.environment_id = environment_id if environment_id else None # type: Optional[str]
def __repr__(self):
return "%s(%s, %s)" % (self.__class__.__name__, self.full_label,
type(self.transform).__name__)
- def replace_output(self, output, tag=None):
+ def replace_output(self,
+ output, # type: Union[pvalue.PValue, pvalue.DoOutputsTuple]
+ tag=None # type: Union[str, int, None]
+ ):
+ # type: (...) -> None
"""Replaces the output defined by the given tag with the given output.
Args:
@@ -798,7 +866,11 @@
else:
raise TypeError("Unexpected output type: %s" % output)
- def add_output(self, output, tag=None):
+ def add_output(self,
+ output, # type: Union[pvalue.DoOutputsTuple, pvalue.PValue]
+ tag=None # type: Union[str, int, None]
+ ):
+ # type: (...) -> None
if isinstance(output, pvalue.DoOutputsTuple):
self.add_output(output[output._main_tag])
elif isinstance(output, pvalue.PValue):
@@ -814,10 +886,12 @@
raise TypeError("Unexpected output type: %s" % output)
def add_part(self, part):
+ # type: (AppliedPTransform) -> None
assert isinstance(part, AppliedPTransform)
self.parts.append(part)
def is_composite(self):
+ # type: () -> bool
"""Returns whether this is a composite transform.
A composite transform has parts (inner transforms) or isn't the
@@ -827,7 +901,12 @@
return bool(self.parts) or all(
pval.producer is not self for pval in self.outputs.values())
- def visit(self, visitor, pipeline, visited):
+ def visit(self,
+ visitor, # type: PipelineVisitor
+ pipeline, # type: Pipeline
+ visited # type: Set[pvalue.PValue]
+ ):
+ # type: (...) -> None
"""Visits all nodes reachable from the current node."""
for pval in self.inputs:
@@ -876,6 +955,7 @@
visitor.visit_value(v, self)
def named_inputs(self):
+ # type: () -> Dict[str, pvalue.PCollection]
# TODO(BEAM-1833): Push names up into the sdk construction.
main_inputs = {str(ix): input
for ix, input in enumerate(self.inputs)
@@ -885,10 +965,12 @@
return dict(main_inputs, **side_inputs)
def named_outputs(self):
+ # type: () -> Dict[str, pvalue.PCollection]
return {str(tag): output for tag, output in self.outputs.items()
if isinstance(output, pvalue.PCollection)}
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.PTransform
# External tranforms require more splicing than just setting the spec.
from apache_beam.transforms import external
if isinstance(self.transform, external.ExternalTransform):
@@ -896,27 +978,41 @@
from apache_beam.portability.api import beam_runner_api_pb2
- def transform_to_runner_api(transform, context):
+ def transform_to_runner_api(transform, # type: Optional[ptransform.PTransform]
+ context # type: PipelineContext
+ ):
+ # type: (...) -> Optional[beam_runner_api_pb2.FunctionSpec]
if transform is None:
return None
else:
return transform.to_runner_api(context, has_parts=bool(self.parts))
# Iterate over inputs and outputs by sorted key order, so that ids are
# consistently generated for multiple runs of the same pipeline.
+ transform_spec = transform_to_runner_api(self.transform, context)
+ environment_id = self.environment_id
+ transform_urn = transform_spec.urn if transform_spec else None
+ if (not environment_id and transform_urn and
+ (transform_urn in Pipeline.sdk_transforms_with_environment())):
+ environment_id = context.default_environment_id()
+
return beam_runner_api_pb2.PTransform(
unique_name=self.full_label,
- spec=transform_to_runner_api(self.transform, context),
+ spec=transform_spec,
subtransforms=[context.transforms.get_id(part, label=part.full_label)
for part in self.parts],
inputs={tag: context.pcollections.get_id(pc)
for tag, pc in sorted(self.named_inputs().items())},
outputs={str(tag): context.pcollections.get_id(out)
for tag, out in sorted(self.named_outputs().items())},
+ environment_id=environment_id,
# TODO(BEAM-366): Add display_data.
display_data=None)
@staticmethod
- def from_runner_api(proto, context):
+ def from_runner_api(proto, # type: beam_runner_api_pb2.PTransform
+ context # type: PipelineContext
+ ):
+ # type: (...) -> AppliedPTransform
def is_side_input(tag):
# As per named_inputs() above.
return tag.startswith('side')
@@ -933,7 +1029,8 @@
parent=None,
transform=ptransform.PTransform.from_runner_api(proto.spec, context),
full_label=proto.unique_name,
- inputs=main_inputs)
+ inputs=main_inputs,
+ environment_id=proto.environment_id)
if result.transform and result.transform.side_inputs:
for si, pcoll in zip(result.transform.side_inputs, side_inputs):
si.pvalue = pcoll
@@ -959,7 +1056,7 @@
return result
-class PTransformOverride(with_metaclass(abc.ABCMeta, object)):
+class PTransformOverride(with_metaclass(abc.ABCMeta, object)): # type: ignore[misc]
"""For internal use only; no backwards-compatibility guarantees.
Gives a matcher and replacements for matching PTransforms.
@@ -970,6 +1067,7 @@
@abc.abstractmethod
def matches(self, applied_ptransform):
+ # type: (AppliedPTransform) -> bool
"""Determines whether the given AppliedPTransform matches.
Note that the matching will happen *after* Runner API proto translation.
@@ -989,6 +1087,7 @@
@abc.abstractmethod
def get_replacement_transform(self, ptransform):
+ # type: (AppliedPTransform) -> AppliedPTransform
"""Provides a runner specific override for a given PTransform.
Args:
diff --git a/sdks/python/apache_beam/pipeline_test.py b/sdks/python/apache_beam/pipeline_test.py
index 0e11a9a..48d3b0d 100644
--- a/sdks/python/apache_beam/pipeline_test.py
+++ b/sdks/python/apache_beam/pipeline_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the Pipeline class."""
+# pytype: skip-file
+
from __future__ import absolute_import
import copy
diff --git a/sdks/python/apache_beam/portability/common_urns.py b/sdks/python/apache_beam/portability/common_urns.py
index c1164e3..9c43570 100644
--- a/sdks/python/apache_beam/portability/common_urns.py
+++ b/sdks/python/apache_beam/portability/common_urns.py
@@ -17,6 +17,8 @@
""" Accessors for URNs of common Beam entities. """
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
diff --git a/sdks/python/apache_beam/pvalue.py b/sdks/python/apache_beam/pvalue.py
index e246f32..7e5e7c5 100644
--- a/sdks/python/apache_beam/pvalue.py
+++ b/sdks/python/apache_beam/pvalue.py
@@ -24,13 +24,23 @@
produced when the pipeline gets executed.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
import itertools
-import typing
from builtins import hex
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Dict
+from typing import Generic
+from typing import Iterator
+from typing import Optional
+from typing import Sequence
+from typing import TypeVar
+from typing import Union
from past.builtins import unicode
@@ -40,6 +50,14 @@
from apache_beam.portability import python_urns
from apache_beam.portability.api import beam_runner_api_pb2
+if TYPE_CHECKING:
+ from apache_beam.transforms import sideinputs
+ from apache_beam.transforms.core import ParDo
+ from apache_beam.transforms.core import Windowing
+ from apache_beam.pipeline import AppliedPTransform
+ from apache_beam.pipeline import Pipeline
+ from apache_beam.runners.pipeline_context import PipelineContext
+
__all__ = [
'PCollection',
'TaggedOutput',
@@ -50,6 +68,8 @@
'EmptySideInput',
]
+T = TypeVar('T')
+
class PValue(object):
"""Base class for PCollection.
@@ -63,8 +83,13 @@
(3) Has a value which is meaningful if the transform was executed.
"""
- def __init__(self, pipeline, tag=None, element_type=None, windowing=None,
- is_bounded=True):
+ def __init__(self,
+ pipeline, # type: Pipeline
+ tag=None, # type: Optional[str]
+ element_type=None, # type: Optional[type]
+ windowing=None, # type: Optional[Windowing]
+ is_bounded=True,
+ ):
"""Initializes a PValue with all arguments hidden behind keyword arguments.
Args:
@@ -78,7 +103,7 @@
# The AppliedPTransform instance for the application of the PTransform
# generating this PValue. The field gets initialized when a transform
# gets applied.
- self.producer = None
+ self.producer = None # type: Optional[AppliedPTransform]
self.is_bounded = is_bounded
if windowing:
self._windowing = windowing
@@ -113,7 +138,7 @@
return self.pipeline.apply(ptransform, self)
-class PCollection(PValue, typing.Generic[typing.TypeVar('T')]):
+class PCollection(PValue, Generic[T]):
"""A multiple values (potentially huge) container.
Dataflow users should not construct PCollection objects directly in their
@@ -133,6 +158,7 @@
@property
def windowing(self):
+ # type: () -> Windowing
if not hasattr(self, '_windowing'):
self._windowing = self.producer.transform.get_windowing(
self.producer.inputs)
@@ -146,6 +172,7 @@
@staticmethod
def from_(pcoll):
+ # type: (PValue) -> PCollection
"""Create a PCollection, using another PCollection as a starting point.
Transfers relevant attributes.
@@ -153,6 +180,7 @@
return PCollection(pcoll.pipeline, is_bounded=pcoll.is_bounded)
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.PCollection
return beam_runner_api_pb2.PCollection(
unique_name=self._unique_name(),
coder_id=context.coder_id_from_element_type(self.element_type),
@@ -163,6 +191,7 @@
self.windowing))
def _unique_name(self):
+ # type: () -> str
if self.producer:
return '%d%s.%s' % (
len(self.producer.full_label), self.producer.full_label, self.tag)
@@ -171,6 +200,7 @@
@staticmethod
def from_runner_api(proto, context):
+ # type: (beam_runner_api_pb2.PCollection, PipelineContext) -> PCollection
# Producer and tag will be filled in later, the key point is that the
# same object is returned for the same pcollection id.
return PCollection(
@@ -204,7 +234,12 @@
class DoOutputsTuple(object):
"""An object grouping the multiple outputs of a ParDo or FlatMap transform."""
- def __init__(self, pipeline, transform, tags, main_tag):
+ def __init__(self,
+ pipeline, # type: Pipeline
+ transform, # type: ParDo
+ tags, # type: Sequence[str]
+ main_tag # type: Optional[str]
+ ):
self._pipeline = pipeline
self._tags = tags
self._main_tag = main_tag
@@ -212,9 +247,9 @@
# The ApplyPTransform instance for the application of the multi FlatMap
# generating this value. The field gets initialized when a transform
# gets applied.
- self.producer = None
+ self.producer = None # type: Optional[AppliedPTransform]
# Dictionary of PCollections already associated with tags.
- self._pcolls = {}
+ self._pcolls = {} # type: Dict[Optional[str], PValue]
def __str__(self):
return '<%s>' % self._str_internal()
@@ -227,6 +262,7 @@
self.__class__.__name__, self._main_tag, self._tags, self._transform)
def __iter__(self):
+ # type: () -> Iterator[PValue]
"""Iterates over tags returning for each call a (tag, pvalue) pair."""
if self._main_tag is not None:
yield self[self._main_tag]
@@ -234,13 +270,15 @@
yield self[tag]
def __getattr__(self, tag):
+ # type: (str) -> PValue
# Special methods which may be accessed before the object is
# fully constructed (e.g. in unpickling).
if tag[:2] == tag[-2:] == '__':
- return object.__getattr__(self, tag)
+ return object.__getattr__(self, tag) # type: ignore
return self[tag]
def __getitem__(self, tag):
+ # type: (Union[int, str, None]) -> PValue
# Accept int tags so that we can look at Partition tags with the
# same ints that we used in the partition function.
# TODO(gildea): Consider requiring string-based tags everywhere.
@@ -258,9 +296,10 @@
if tag in self._pcolls:
return self._pcolls[tag]
+ assert self.producer is not None
if tag is not None:
self._transform.output_tags.add(tag)
- pcoll = PCollection(self._pipeline, tag=tag, element_type=typehints.Any)
+ pcoll = PCollection(self._pipeline, tag=tag, element_type=typehints.Any) # type: PValue
# Transfer the producer from the DoOutputsTuple to the resulting
# PCollection.
pcoll.producer = self.producer.parts[0]
@@ -286,6 +325,7 @@
"""
def __init__(self, tag, value):
+ # type: (str, Any) -> None
if not isinstance(tag, (str, unicode)):
raise TypeError(
'Attempting to create a TaggedOutput with non-string tag %s' % (tag,))
@@ -305,6 +345,7 @@
"""
def __init__(self, pcoll):
+ # type: (PCollection) -> None
from apache_beam.transforms import sideinputs
self.pvalue = pcoll
self._window_mapping_fn = sideinputs.default_window_mapping_fn(
@@ -327,6 +368,7 @@
# TODO(robertwb): Get rid of _from_runtime_iterable and _view_options
# in favor of _side_input_data().
def _side_input_data(self):
+ # type: () -> SideInputData
view_options = self._view_options()
from_runtime_iterable = type(self)._from_runtime_iterable
return SideInputData(
@@ -335,19 +377,28 @@
lambda iterable: from_runtime_iterable(iterable, view_options))
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.SideInput
return self._side_input_data().to_runner_api(context)
@staticmethod
- def from_runner_api(proto, context):
+ def from_runner_api(proto, # type: beam_runner_api_pb2.SideInput
+ context # type: PipelineContext
+ ):
+ # type: (...) -> _UnpickledSideInput
return _UnpickledSideInput(
SideInputData.from_runner_api(proto, context))
+ @staticmethod
+ def _from_runtime_iterable(it, options):
+ raise NotImplementedError
+
def requires_keyed_input(self):
return False
class _UnpickledSideInput(AsSideInput):
def __init__(self, side_input_data):
+ # type: (SideInputData) -> None
self._data = side_input_data
self._window_mapping_fn = side_input_data.window_mapping_fn
@@ -368,35 +419,37 @@
class SideInputData(object):
"""All of the data about a side input except for the bound PCollection."""
- def __init__(self, access_pattern, window_mapping_fn, view_fn):
+ def __init__(self,
+ access_pattern, # type: str
+ window_mapping_fn, # type: sideinputs.WindowMappingFn
+ view_fn
+ ):
self.access_pattern = access_pattern
self.window_mapping_fn = window_mapping_fn
self.view_fn = view_fn
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.SideInput
return beam_runner_api_pb2.SideInput(
access_pattern=beam_runner_api_pb2.FunctionSpec(
urn=self.access_pattern),
- view_fn=beam_runner_api_pb2.SdkFunctionSpec(
- environment_id=context.default_environment_id(),
- spec=beam_runner_api_pb2.FunctionSpec(
- urn=python_urns.PICKLED_VIEWFN,
- payload=pickler.dumps(self.view_fn))),
- window_mapping_fn=beam_runner_api_pb2.SdkFunctionSpec(
- environment_id=context.default_environment_id(),
- spec=beam_runner_api_pb2.FunctionSpec(
- urn=python_urns.PICKLED_WINDOW_MAPPING_FN,
- payload=pickler.dumps(self.window_mapping_fn))))
+ view_fn=beam_runner_api_pb2.FunctionSpec(
+ urn=python_urns.PICKLED_VIEWFN,
+ payload=pickler.dumps(self.view_fn)),
+ window_mapping_fn=beam_runner_api_pb2.FunctionSpec(
+ urn=python_urns.PICKLED_WINDOW_MAPPING_FN,
+ payload=pickler.dumps(self.window_mapping_fn)))
@staticmethod
def from_runner_api(proto, unused_context):
- assert proto.view_fn.spec.urn == python_urns.PICKLED_VIEWFN
- assert (proto.window_mapping_fn.spec.urn ==
+ # type: (beam_runner_api_pb2.SideInput, PipelineContext) -> SideInputData
+ assert proto.view_fn.urn == python_urns.PICKLED_VIEWFN
+ assert (proto.window_mapping_fn.urn ==
python_urns.PICKLED_WINDOW_MAPPING_FN)
return SideInputData(
proto.access_pattern.urn,
- pickler.loads(proto.window_mapping_fn.spec.payload),
- pickler.loads(proto.view_fn.spec.payload))
+ pickler.loads(proto.window_mapping_fn.payload),
+ pickler.loads(proto.view_fn.payload))
class AsSingleton(AsSideInput):
@@ -418,6 +471,7 @@
_NO_DEFAULT = object()
def __init__(self, pcoll, default_value=_NO_DEFAULT):
+ # type: (PCollection, Any) -> None
super(AsSingleton, self).__init__(pcoll)
self.default_value = default_value
@@ -469,6 +523,7 @@
return it
def _side_input_data(self):
+ # type: () -> SideInputData
return SideInputData(
common_urns.side_inputs.ITERABLE.urn,
self._window_mapping_fn,
@@ -499,6 +554,7 @@
return list(it)
def _side_input_data(self):
+ # type: () -> SideInputData
return SideInputData(
common_urns.side_inputs.ITERABLE.urn,
self._window_mapping_fn,
@@ -526,6 +582,7 @@
return dict(it)
def _side_input_data(self):
+ # type: () -> SideInputData
return SideInputData(
common_urns.side_inputs.ITERABLE.urn,
self._window_mapping_fn,
@@ -552,6 +609,7 @@
return result
def _side_input_data(self):
+ # type: () -> SideInputData
return SideInputData(
common_urns.side_inputs.MULTIMAP.urn,
self._window_mapping_fn,
diff --git a/sdks/python/apache_beam/pvalue_test.py b/sdks/python/apache_beam/pvalue_test.py
index b3f02e3..de689a0 100644
--- a/sdks/python/apache_beam/pvalue_test.py
+++ b/sdks/python/apache_beam/pvalue_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the PValue and PCollection classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py
index c045231..220e3d3 100644
--- a/sdks/python/apache_beam/runners/common.py
+++ b/sdks/python/apache_beam/runners/common.py
@@ -21,12 +21,22 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import traceback
from builtins import next
from builtins import object
from builtins import zip
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Mapping
+from typing import Optional
+from typing import Tuple
from future.utils import raise_with_traceback
from past.builtins import unicode
@@ -46,11 +56,17 @@
from apache_beam.utils.timestamp import Timestamp
from apache_beam.utils.windowed_value import WindowedValue
+if TYPE_CHECKING:
+ from apache_beam.io import iobase
+ from apache_beam.transforms import sideinputs
+ from apache_beam.transforms.core import TimerSpec
+
class NameContext(object):
"""Holds the name information for a step."""
def __init__(self, step_name, transform_id=None):
+ # type: (str, Optional[str]) -> None
"""Creates a new step NameContext.
Args:
@@ -130,6 +146,7 @@
"""
def receive(self, windowed_value):
+ # type: (WindowedValue) -> None
raise NotImplementedError
@@ -160,11 +177,11 @@
self.method_value = getattr(obj_to_invoke, method_name)
self.has_userstate_arguments = False
- self.state_args_to_replace = {}
- self.timer_args_to_replace = {}
- self.timestamp_arg_name = None
- self.window_arg_name = None
- self.key_arg_name = None
+ self.state_args_to_replace = {} # type: Dict[str, core.StateSpec]
+ self.timer_args_to_replace = {} # type: Dict[str, core.TimerSpec]
+ self.timestamp_arg_name = None # type: Optional[str]
+ self.window_arg_name = None # type: Optional[str]
+ self.key_arg_name = None # type: Optional[str]
self.restriction_provider = None
self.restriction_provider_arg_name = None
self.watermark_estimator = None
@@ -229,6 +246,7 @@
"""
def __init__(self, do_fn):
+ # type: (core.DoFn) -> None
# We add a property here for all methods defined by Beam DoFn features.
assert isinstance(do_fn, core.DoFn)
@@ -258,7 +276,7 @@
# Handle stateful DoFns.
self._is_stateful_dofn = userstate.is_stateful_dofn(do_fn)
- self.timer_methods = {}
+ self.timer_methods = {} # type: Dict[TimerSpec, MethodWrapper]
if self._is_stateful_dofn:
# Populate timer firing methods, keyed by TimerSpec.
_, all_timer_specs = userstate.get_dofn_specs(do_fn)
@@ -267,6 +285,7 @@
self.timer_methods[timer_spec] = MethodWrapper(do_fn, method.__name__)
def get_restriction_provider(self):
+ # type: () -> RestrictionProvider
return self.process_method.restriction_provider
def get_watermark_estimator(self):
@@ -301,12 +320,15 @@
userstate.validate_stateful_dofn(self.do_fn)
def is_splittable_dofn(self):
+ # type: () -> bool
return self.get_restriction_provider() is not None
def is_stateful_dofn(self):
+ # type: () -> bool
return self._is_stateful_dofn
def has_timers(self):
+ # type: () -> bool
_, all_timer_specs = userstate.get_dofn_specs(self.do_fn)
return bool(all_timer_specs)
@@ -317,7 +339,11 @@
A DoFnInvoker describes a particular way for invoking methods of a DoFn
represented by a given DoFnSignature."""
- def __init__(self, output_processor, signature):
+ def __init__(self,
+ output_processor, # type: Optional[_OutputProcessor]
+ signature # type: DoFnSignature
+ ):
+ # type: (...) -> None
"""
Initializes `DoFnInvoker`
@@ -327,17 +353,21 @@
"""
self.output_processor = output_processor
self.signature = signature
- self.user_state_context = None
- self.bundle_finalizer_param = None
+ self.user_state_context = None # type: Optional[userstate.UserStateContext]
+ self.bundle_finalizer_param = None # type: Optional[core._BundleFinalizerParam]
@staticmethod
def create_invoker(
- signature,
- output_processor=None,
- context=None, side_inputs=None, input_args=None, input_kwargs=None,
+ signature, # type: DoFnSignature
+ output_processor=None, # type: Optional[_OutputProcessor]
+ context=None, # type: Optional[DoFnContext]
+ side_inputs=None, # type: Optional[List[sideinputs.SideInputMap]]
+ input_args=None, input_kwargs=None,
process_invocation=True,
- user_state_context=None,
- bundle_finalizer_param=None):
+ user_state_context=None, # type: Optional[userstate.UserStateContext]
+ bundle_finalizer_param=None # type: Optional[core._BundleFinalizerParam]
+ ):
+ # type: (...) -> DoFnInvoker
""" Creates a new DoFnInvoker based on given arguments.
Args:
@@ -375,9 +405,14 @@
signature, context, side_inputs, input_args, input_kwargs,
user_state_context, bundle_finalizer_param)
- def invoke_process(self, windowed_value, restriction_tracker=None,
- output_processor=None,
- additional_args=None, additional_kwargs=None):
+ def invoke_process(self,
+ windowed_value, # type: WindowedValue
+ restriction_tracker=None, # type: Optional[iobase.RestrictionTracker]
+ output_processor=None, # type: Optional[OutputProcessor]
+ additional_args=None,
+ additional_kwargs=None
+ ):
+ # type: (...) -> Optional[Tuple[WindowedValue, Timestamp]]
"""Invokes the DoFn.process() function.
Args:
@@ -393,23 +428,27 @@
raise NotImplementedError
def invoke_setup(self):
+ # type: () -> None
"""Invokes the DoFn.setup() method
"""
self.signature.setup_lifecycle_method.method_value()
def invoke_start_bundle(self):
+ # type: () -> None
"""Invokes the DoFn.start_bundle() method.
"""
self.output_processor.start_bundle_outputs(
self.signature.start_bundle_method.method_value())
def invoke_finish_bundle(self):
+ # type: () -> None
"""Invokes the DoFn.finish_bundle() method.
"""
self.output_processor.finish_bundle_outputs(
self.signature.finish_bundle_method.method_value())
def invoke_teardown(self):
+ # type: () -> None
"""Invokes the DoFn.teardown() method
"""
self.signature.teardown_lifecycle_method.method_value()
@@ -436,13 +475,22 @@
class SimpleInvoker(DoFnInvoker):
"""An invoker that processes elements ignoring windowing information."""
- def __init__(self, output_processor, signature):
+ def __init__(self,
+ output_processor, # type: Optional[_OutputProcessor]
+ signature # type: DoFnSignature
+ ):
+ # type: (...) -> None
super(SimpleInvoker, self).__init__(output_processor, signature)
self.process_method = signature.process_method.method_value
- def invoke_process(self, windowed_value, restriction_tracker=None,
- output_processor=None,
- additional_args=None, additional_kwargs=None):
+ def invoke_process(self,
+ windowed_value, # type: WindowedValue
+ restriction_tracker=None, # type: Optional[iobase.RestrictionTracker]
+ output_processor=None, # type: Optional[OutputProcessor]
+ additional_args=None,
+ additional_kwargs=None
+ ):
+ # type: (...) -> None
if not output_processor:
output_processor = self.output_processor
output_processor.process_outputs(
@@ -452,9 +500,16 @@
class PerWindowInvoker(DoFnInvoker):
"""An invoker that processes elements considering windowing information."""
- def __init__(self, output_processor, signature, context,
- side_inputs, input_args, input_kwargs, user_state_context,
- bundle_finalizer_param):
+ def __init__(self,
+ output_processor, # type: Optional[_OutputProcessor]
+ signature, # type: DoFnSignature
+ context, # type: DoFnContext
+ side_inputs, # type: Iterable[sideinputs.SideInputMap]
+ input_args,
+ input_kwargs,
+ user_state_context, # type: Optional[userstate.UserStateContext]
+ bundle_finalizer_param # type: Optional[core._BundleFinalizerParam]
+ ):
super(PerWindowInvoker, self).__init__(output_processor, signature)
self.side_inputs = side_inputs
self.context = context
@@ -550,9 +605,14 @@
self.args_for_process = args_with_placeholders
self.kwargs_for_process = input_kwargs
- def invoke_process(self, windowed_value, restriction_tracker=None,
- output_processor=None,
- additional_args=None, additional_kwargs=None):
+ def invoke_process(self,
+ windowed_value, # type: WindowedValue
+ restriction_tracker=None,
+ output_processor=None, # type: Optional[OutputProcessor]
+ additional_args=None,
+ additional_kwargs=None
+ ):
+ # type: (...) -> Optional[Tuple[WindowedValue, Timestamp]]
if not additional_args:
additional_args = []
if not additional_kwargs:
@@ -610,9 +670,13 @@
self._invoke_process_per_window(
windowed_value, additional_args, additional_kwargs, output_processor)
- def _invoke_process_per_window(
- self, windowed_value, additional_args,
- additional_kwargs, output_processor):
+ def _invoke_process_per_window(self,
+ windowed_value, # type: WindowedValue
+ additional_args,
+ additional_kwargs,
+ output_processor # type: OutputProcessor
+ ):
+ # type: (...) -> Optional[Tuple[WindowedValue, Timestamp]]
if self.has_windowed_inputs:
window, = windowed_value.windows
side_inputs = [si[window] for si in self.side_inputs]
@@ -727,6 +791,7 @@
(element, residual), residual_size)), current_watermark), None))
def current_element_progress(self):
+ # type: () -> Optional[iobase.RestrictionProgress]
restriction_tracker = self.threadsafe_restriction_tracker
if restriction_tracker:
return restriction_tracker.current_progress()
@@ -739,18 +804,19 @@
"""
def __init__(self,
- fn,
+ fn, # type: core.DoFn
args,
kwargs,
- side_inputs,
+ side_inputs, # type: Iterable[sideinputs.SideInputMap]
windowing,
- tagged_receivers=None,
- step_name=None,
+ tagged_receivers=None, # type: Mapping[Optional[str], Receiver]
+ step_name=None, # type: Optional[str]
logging_context=None,
state=None,
scoped_metrics_container=None,
operation_name=None,
- user_state_context=None):
+ user_state_context=None # type: Optional[userstate.UserStateContext]
+ ):
"""Initializes a DoFnRunner.
Args:
@@ -806,15 +872,18 @@
bundle_finalizer_param=self.bundle_finalizer_param)
def receive(self, windowed_value):
+ # type: (WindowedValue) -> None
self.process(windowed_value)
def process(self, windowed_value):
+ # type: (WindowedValue) -> Optional[Tuple[WindowedValue, Timestamp]]
try:
return self.do_fn_invoker.invoke_process(windowed_value)
except BaseException as exn:
self._reraise_augmented(exn)
def process_with_sized_restriction(self, windowed_value):
+ # type: (WindowedValue) -> Optional[Tuple[WindowedValue, Timestamp]]
(element, restriction), _ = windowed_value.value
return self.do_fn_invoker.invoke_process(
windowed_value.with_value(element),
@@ -825,6 +894,7 @@
return self.do_fn_invoker.try_split(fraction)
def current_element_progress(self):
+ # type: () -> Optional[iobase.RestrictionProgress]
return self.do_fn_invoker.current_element_progress()
def process_user_timer(self, timer_spec, key, window, timestamp):
@@ -885,6 +955,7 @@
class OutputProcessor(object):
def process_outputs(self, windowed_input_element, results):
+ # type: (WindowedValue, Iterable[Any]) -> None
raise NotImplementedError
@@ -893,8 +964,8 @@
def __init__(self,
window_fn,
- main_receivers,
- tagged_receivers,
+ main_receivers, # type: Receiver
+ tagged_receivers, # type: Mapping[Optional[str], Receiver]
per_element_output_counter):
"""Initializes ``_OutputProcessor``.
@@ -911,6 +982,7 @@
self.per_element_output_counter = per_element_output_counter
def process_outputs(self, windowed_input_element, results):
+ # type: (WindowedValue, Iterable[Any]) -> None
"""Dispatch the result of process computation to the appropriate receivers.
A value wrapped in a TaggedOutput object will be unwrapped and
@@ -1041,6 +1113,7 @@
self.set_element(element)
def set_element(self, windowed_value):
+ # type: (Optional[WindowedValue]) -> None
self.windowed_value = windowed_value
@property
diff --git a/sdks/python/apache_beam/runners/common_test.py b/sdks/python/apache_beam/runners/common_test.py
index 9377708..62e7147 100644
--- a/sdks/python/apache_beam/runners/common_test.py
+++ b/sdks/python/apache_beam/runners/common_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py
index f0d6df9..eb8963e 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py
@@ -17,6 +17,8 @@
"""A word-counting workflow."""
+# pytype: skip-file
+
from __future__ import absolute_import
import time
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline_test.py
index d1afbcf..acbb5ae 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline_test.py
@@ -17,6 +17,8 @@
"""A word-counting workflow."""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline.py b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline.py
index b0e75e6..aff56cf 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline.py
@@ -17,6 +17,8 @@
"""A word-counting workflow."""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline_test.py
index 2a351e2..3cfe244 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline_test.py
@@ -17,6 +17,8 @@
"""A word-counting workflow."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py
index d518bb0..16a14ef 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py
@@ -21,6 +21,8 @@
service.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py
index 9899176..db1f5a7 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py
@@ -19,6 +19,8 @@
the DataflowMetrics class.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import types
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 75aedd1..db35635 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -20,6 +20,8 @@
The runner will create a JSON description of the job graph and then submit it
to the Dataflow Service for remote execution by a worker.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -65,13 +67,12 @@
from apache_beam.utils.interactive_utils import is_in_notebook
from apache_beam.utils.plugin import BeamPlugin
-try: # Python 3
+if sys.version_info[0] > 2:
unquote_to_bytes = urllib.parse.unquote_to_bytes
quote = urllib.parse.quote
-except AttributeError: # Python 2
- # pylint: disable=deprecated-urllib-function
- unquote_to_bytes = urllib.unquote
- quote = urllib.quote
+else:
+ unquote_to_bytes = urllib.unquote # pylint: disable=deprecated-urllib-function
+ quote = urllib.quote # pylint: disable=deprecated-urllib-function
__all__ = ['DataflowRunner']
@@ -493,8 +494,8 @@
# inputs, hence we enforce that here.
pipeline.visit(self.flatten_input_visitor())
- # The superclass's run will trigger a traversal of all reachable nodes.
- super(DataflowRunner, self).run_pipeline(pipeline, options)
+ # Trigger a traversal of all reachable nodes.
+ self.visit_transforms(pipeline, options)
test_options = options.view_as(TestOptions)
# If it is a dry run, return without submitting the job.
@@ -1239,7 +1240,7 @@
# TestStream source doesn't do any decoding of elements,
# so we won't set test_stream_payload.coder_id.
output_coder = transform._infer_output_coder() # pylint: disable=protected-access
- for event in transform.events:
+ for event in transform._events:
new_event = test_stream_payload.events.add()
if isinstance(event, ElementEvent):
for tv in event.timestamped_values:
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
index 58c722c..74d6e57 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the DataflowRunner class."""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
@@ -60,7 +62,7 @@
try:
from apache_beam.runners.dataflow.internal import apiclient
except ImportError:
- apiclient = None
+ apiclient = None # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position
# SpecialParDo and SpecialDoFn are used in test_remote_runner_display_data.
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index da37813..200dbcc 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -19,6 +19,8 @@
Dataflow client utility functions."""
+# pytype: skip-file
+
from __future__ import absolute_import
import codecs
@@ -479,8 +481,9 @@
@retry.no_retries # Using no_retries marks this as an integration point.
def _gcs_file_copy(self, from_path, to_path):
to_folder, to_name = os.path.split(to_path)
+ total_size = os.path.getsize(from_path)
with open(from_path, 'rb') as f:
- self.stage_file(to_folder, to_name, f)
+ self.stage_file(to_folder, to_name, f, total_size=total_size)
def _stage_resources(self, options):
google_cloud_options = options.view_as(GoogleCloudOptions)
@@ -497,7 +500,7 @@
return resources
def stage_file(self, gcs_or_local_path, file_name, stream,
- mime_type='application/octet-stream'):
+ mime_type='application/octet-stream', total_size=None):
"""Stages a file at a GCS or local path with stream-supplied contents."""
if not gcs_or_local_path.startswith('gs://'):
local_path = FileSystems.join(gcs_or_local_path, file_name)
@@ -512,7 +515,7 @@
bucket=bucket, name=name)
start_time = time.time()
_LOGGER.info('Starting GCS upload to %s...', gcs_location)
- upload = storage.Upload(stream, mime_type)
+ upload = storage.Upload(stream, mime_type, total_size)
try:
response = self._storage_client.objects.Insert(request, upload=upload)
except exceptions.HttpError as e:
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
index f9a82dc..cbaed0b 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the apiclient module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
@@ -36,7 +38,7 @@
try:
from apache_beam.runners.dataflow.internal import apiclient
except ImportError:
- apiclient = None
+ apiclient = None # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position
FAKE_PIPELINE_URL = "gs://invalid-bucket/anywhere"
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/__init__.py b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/__init__.py
index ce260c5..0100df6 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/__init__.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/__init__.py
@@ -32,4 +32,4 @@
pass
# pylint: enable=wrong-import-order, wrong-import-position
-__path__ = pkgutil.extend_path(__path__, __name__)
+__path__ = pkgutil.extend_path(__path__, __name__) # type: ignore
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers.py b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers.py
index 8389e62..6749418 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from future.utils import iteritems
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers_test.py b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers_test.py
index 3e6b6d7..3d18013 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/message_matchers_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py
index 111259d..7bc0295 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/names.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py
@@ -20,6 +20,8 @@
# All constants are for internal use only; no backwards-compatibility
# guarantees.
+# pytype: skip-file
+
from __future__ import absolute_import
# Standard file names used for staging files.
@@ -38,10 +40,10 @@
# Update this version to the next version whenever there is a change that will
# require changes to legacy Dataflow worker execution environment.
-BEAM_CONTAINER_VERSION = 'beam-master-20191112'
+BEAM_CONTAINER_VERSION = 'beam-master-20191220'
# Update this version to the next version whenever there is a change that
# requires changes to SDK harness container or SDK harness launcher.
-BEAM_FNAPI_CONTAINER_VERSION = 'beam-master-20191112'
+BEAM_FNAPI_CONTAINER_VERSION = 'beam-master-20191220'
# TODO(BEAM-5939): Remove these shared names once Dataflow worker is updated.
PICKLED_MAIN_SESSION_FILE = 'pickled_main_session'
diff --git a/sdks/python/apache_beam/runners/dataflow/native_io/iobase.py b/sdks/python/apache_beam/runners/dataflow/native_io/iobase.py
index 619ed54..ab3039a 100644
--- a/sdks/python/apache_beam/runners/dataflow/native_io/iobase.py
+++ b/sdks/python/apache_beam/runners/dataflow/native_io/iobase.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/dataflow/native_io/iobase_test.py b/sdks/python/apache_beam/runners/dataflow/native_io/iobase_test.py
index a0a6541..f3f2e75 100644
--- a/sdks/python/apache_beam/runners/dataflow/native_io/iobase_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/native_io/iobase_test.py
@@ -17,6 +17,8 @@
"""Tests corresponding to Dataflow's iobase module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py b/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py
index e3e76a5..c6753ea 100644
--- a/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py
+++ b/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py
@@ -17,6 +17,8 @@
"""Ptransform overrides for DataflowRunner."""
+# pytype: skip-file
+
from __future__ import absolute_import
from apache_beam.pipeline import PTransformOverride
diff --git a/sdks/python/apache_beam/runners/dataflow/template_runner_test.py b/sdks/python/apache_beam/runners/dataflow/template_runner_test.py
index a3988bc..e6d0d66 100644
--- a/sdks/python/apache_beam/runners/dataflow/template_runner_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/template_runner_test.py
@@ -17,6 +17,8 @@
"""Unit tests for templated pipelines."""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
@@ -33,7 +35,7 @@
try:
from apache_beam.runners.dataflow.internal import apiclient
except ImportError:
- apiclient = None
+ apiclient = None # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position
diff --git a/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py
index 2e92bef..ffffe4e 100644
--- a/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py
@@ -17,6 +17,8 @@
"""Wrapper of Beam runners that's built for running and verifying e2e tests."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/direct/bundle_factory.py b/sdks/python/apache_beam/runners/direct/bundle_factory.py
index 382cf52..c7677f4 100644
--- a/sdks/python/apache_beam/runners/direct/bundle_factory.py
+++ b/sdks/python/apache_beam/runners/direct/bundle_factory.py
@@ -17,9 +17,15 @@
"""A factory that creates UncommittedBundles."""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
+from typing import Iterable
+from typing import Iterator
+from typing import List
+from typing import Union
from apache_beam import pvalue
from apache_beam.runners import common
@@ -38,12 +44,15 @@
"""
def __init__(self, stacked):
+ # type: (bool) -> None
self._stacked = stacked
def create_bundle(self, output_pcollection):
+ # type: (Union[pvalue.PBegin, pvalue.PCollection]) -> _Bundle
return _Bundle(output_pcollection, self._stacked)
def create_empty_committed_bundle(self, output_pcollection):
+ # type: (Union[pvalue.PBegin, pvalue.PCollection]) -> _Bundle
bundle = self.create_bundle(output_pcollection)
bundle.commit(None)
return bundle
@@ -107,6 +116,7 @@
self._appended_values.append(value)
def windowed_values(self):
+ # type: () -> Iterator[WindowedValue]
# yield first windowed_value as is, then iterate through
# _appended_values to yield WindowedValue on the fly.
yield self._initial_windowed_value
@@ -114,14 +124,16 @@
yield self._initial_windowed_value.with_value(v)
def __init__(self, pcollection, stacked=True):
+ # type: (Union[pvalue.PBegin, pvalue.PCollection], bool) -> None
assert isinstance(pcollection, (pvalue.PBegin, pvalue.PCollection))
self._pcollection = pcollection
- self._elements = []
+ self._elements = [] # type: List[Union[WindowedValue, _Bundle._StackedWindowedValues]]
self._stacked = stacked
self._committed = False
self._tag = None # optional tag information for this bundle
def get_elements_iterable(self, make_copy=False):
+ # type: (bool) -> Iterable[WindowedValue]
"""Returns iterable elements.
Args:
@@ -193,6 +205,7 @@
self.add(element)
def receive(self, element):
+ # type: (WindowedValue) -> None
self.add(element)
def commit(self, synchronized_processing_time):
diff --git a/sdks/python/apache_beam/runners/direct/clock.py b/sdks/python/apache_beam/runners/direct/clock.py
index 6dbf8b2..54b5701 100644
--- a/sdks/python/apache_beam/runners/direct/clock.py
+++ b/sdks/python/apache_beam/runners/direct/clock.py
@@ -19,6 +19,8 @@
For internal use only. No backwards compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import time
diff --git a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py
index d625d3c..eaa2f59 100644
--- a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py
+++ b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py
@@ -17,11 +17,21 @@
"""ConsumerTrackingPipelineVisitor, a PipelineVisitor object."""
+# pytype: skip-file
+
from __future__ import absolute_import
+from typing import TYPE_CHECKING
+from typing import Dict
+from typing import List
+from typing import Set
+
from apache_beam import pvalue
from apache_beam.pipeline import PipelineVisitor
+if TYPE_CHECKING:
+ from apache_beam.pipeline import AppliedPTransform
+
class ConsumerTrackingPipelineVisitor(PipelineVisitor):
"""For internal use only; no backwards-compatibility guarantees.
@@ -34,14 +44,15 @@
"""
def __init__(self):
- self.value_to_consumers = {} # Map from PValue to [AppliedPTransform].
- self.root_transforms = set() # set of (root) AppliedPTransforms.
- self.views = [] # list of side inputs.
- self.step_names = {} # Map from AppliedPTransform to String.
+ self.value_to_consumers = {} # type: Dict[pvalue.PValue, List[AppliedPTransform]]
+ self.root_transforms = set() # type: Set[AppliedPTransform]
+ self.views = [] # type: List[pvalue.AsSideInput]
+ self.step_names = {} # type: Dict[AppliedPTransform, str]
self._num_transforms = 0
def visit_transform(self, applied_ptransform):
+ # type: (AppliedPTransform) -> None
inputs = list(applied_ptransform.inputs)
if inputs:
for input_value in inputs:
diff --git a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py
index 6d21e55..3961fd4 100644
--- a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py
+++ b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py
@@ -16,6 +16,8 @@
#
"""Tests for consumer_tracking_pipeline_visitor."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/direct/direct_metrics.py b/sdks/python/apache_beam/runners/direct/direct_metrics.py
index 91ee03d..accf642 100644
--- a/sdks/python/apache_beam/runners/direct/direct_metrics.py
+++ b/sdks/python/apache_beam/runners/direct/direct_metrics.py
@@ -20,6 +20,8 @@
responding to queries of current metrics, but also of keeping the common
state consistent.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import threading
diff --git a/sdks/python/apache_beam/runners/direct/direct_metrics_test.py b/sdks/python/apache_beam/runners/direct/direct_metrics_test.py
index 3ce42c1..f40a508 100644
--- a/sdks/python/apache_beam/runners/direct/direct_metrics_test.py
+++ b/sdks/python/apache_beam/runners/direct/direct_metrics_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/direct/direct_runner.py b/sdks/python/apache_beam/runners/direct/direct_runner.py
index 59ed0aa..d584f6b 100644
--- a/sdks/python/apache_beam/runners/direct/direct_runner.py
+++ b/sdks/python/apache_beam/runners/direct/direct_runner.py
@@ -21,6 +21,8 @@
graph of transformations belonging to a pipeline on the local machine.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import itertools
@@ -75,12 +77,53 @@
def is_fnapi_compatible(self):
return BundleBasedDirectRunner.is_fnapi_compatible()
+ def apply_TestStream(self, transform, pbegin, options):
+ """Expands the TestStream into the DirectRunner implementation.
+
+ Takes the TestStream transform and creates a _TestStream -> multiplexer ->
+ _WatermarkController.
+ """
+
+ from apache_beam.runners.direct.test_stream_impl import _TestStream
+ from apache_beam.runners.direct.test_stream_impl import _WatermarkController
+ from apache_beam import pvalue
+ assert isinstance(pbegin, pvalue.PBegin)
+
+ # If there is only one tag there is no need to add the multiplexer.
+ if len(transform.output_tags) == 1:
+ return (pbegin
+ | _TestStream(transform.output_tags, events=transform._events)
+ | _WatermarkController())
+
+ # This multiplexing the multiple output PCollections.
+ def mux(event):
+ if event.tag:
+ yield pvalue.TaggedOutput(event.tag, event)
+ else:
+ yield event
+ mux_output = (pbegin
+ | _TestStream(transform.output_tags, events=transform._events)
+ | 'TestStream Multiplexer' >> beam.ParDo(mux).with_outputs())
+
+ # Apply a way to control the watermark per output. It is necessary to
+ # have an individual _WatermarkController per PCollection because the
+ # calculation of the input watermark of a transform is based on the event
+ # timestamp of the elements flowing through it. Meaning, it is impossible
+ # to control the output watermarks of the individual PCollections solely
+ # on the event timestamps.
+ outputs = {}
+ for tag in transform.output_tags:
+ label = '_WatermarkController[{}]'.format(tag)
+ outputs[tag] = (mux_output[tag] | label >> _WatermarkController())
+
+ return outputs
+
def run_pipeline(self, pipeline, options):
from apache_beam.pipeline import PipelineVisitor
from apache_beam.runners.dataflow.native_io.iobase import NativeSource
from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite
- from apache_beam.testing.test_stream import _TestStream
+ from apache_beam.runners.direct.test_stream_impl import _TestStream
class _FnApiRunnerSupportVisitor(PipelineVisitor):
"""Visitor determining if a Pipeline can be run on the FnApiRunner."""
@@ -250,6 +293,7 @@
def _infer_output_coder(self, unused_input_type=None,
unused_input_coder=None):
+ # type: (...) -> typing.Optional[coders.Coder]
return coders.BytesCoder()
def get_windowing(self, inputs):
@@ -359,7 +403,7 @@
from apache_beam.runners.direct.executor import Executor
from apache_beam.runners.direct.transform_evaluator import \
TransformEvaluatorRegistry
- from apache_beam.testing.test_stream import _TestStream
+ from apache_beam.runners.direct.test_stream_impl import _TestStream
# Performing configured PTransform overrides.
pipeline.replace_all(_get_transform_overrides(options))
diff --git a/sdks/python/apache_beam/runners/direct/direct_runner_test.py b/sdks/python/apache_beam/runners/direct/direct_runner_test.py
index 95df81d..50164b9 100644
--- a/sdks/python/apache_beam/runners/direct/direct_runner_test.py
+++ b/sdks/python/apache_beam/runners/direct/direct_runner_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import threading
diff --git a/sdks/python/apache_beam/runners/direct/direct_userstate.py b/sdks/python/apache_beam/runners/direct/direct_userstate.py
index 42afaa3..dab4b71 100644
--- a/sdks/python/apache_beam/runners/direct/direct_userstate.py
+++ b/sdks/python/apache_beam/runners/direct/direct_userstate.py
@@ -16,6 +16,8 @@
#
"""Support for user state in the BundleBasedDirectRunner."""
+# pytype: skip-file
+
from __future__ import absolute_import
import itertools
diff --git a/sdks/python/apache_beam/runners/direct/evaluation_context.py b/sdks/python/apache_beam/runners/direct/evaluation_context.py
index 54397b8..c004ce5 100644
--- a/sdks/python/apache_beam/runners/direct/evaluation_context.py
+++ b/sdks/python/apache_beam/runners/direct/evaluation_context.py
@@ -17,11 +17,22 @@
"""EvaluationContext tracks global state, triggers and watermarks."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
import threading
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import DefaultDict
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Optional
+from typing import Tuple
+from typing import Union
from apache_beam.runners.direct.direct_metrics import DirectMetrics
from apache_beam.runners.direct.executor import TransformExecutor
@@ -30,6 +41,15 @@
from apache_beam.transforms.trigger import InMemoryUnmergedState
from apache_beam.utils import counters
+if TYPE_CHECKING:
+ from apache_beam import pvalue
+ from apache_beam.pipeline import AppliedPTransform
+ from apache_beam.runners.direct.bundle_factory import BundleFactory, _Bundle
+ from apache_beam.runners.direct.util import TimerFiring
+ from apache_beam.runners.direct.util import TransformResult
+ from apache_beam.runners.direct.watermark_manager import _TransformWatermarks
+ from apache_beam.utils.timestamp import Timestamp
+
class _ExecutionContext(object):
"""Contains the context for the execution of a single PTransform.
@@ -37,7 +57,10 @@
It holds the watermarks for that transform, as well as keyed states.
"""
- def __init__(self, watermarks, keyed_states):
+ def __init__(self,
+ watermarks, # type: _TransformWatermarks
+ keyed_states
+ ):
self.watermarks = watermarks
self.keyed_states = keyed_states
@@ -75,10 +98,11 @@
"""
def __init__(self, side_inputs):
+ # type: (Iterable[pvalue.AsSideInput]) -> None
self._lock = threading.Lock()
- self._views = {}
- self._transform_to_side_inputs = collections.defaultdict(list)
- self._side_input_to_blocked_tasks = collections.defaultdict(list)
+ self._views = {} # type: Dict[pvalue.AsSideInput, _SideInputView]
+ self._transform_to_side_inputs = collections.defaultdict(list) # type: DefaultDict[Optional[AppliedPTransform], List[pvalue.AsSideInput]]
+ self._side_input_to_blocked_tasks = collections.defaultdict(list) # type: ignore # usused?
for side in side_inputs:
self._views[side] = _SideInputView(side)
@@ -89,7 +113,12 @@
if self._views else '[]')
return '_SideInputsContainer(_views=%s)' % views_string
- def get_value_or_block_until_ready(self, side_input, task, block_until):
+ def get_value_or_block_until_ready(self,
+ side_input,
+ task, # type: TransformExecutor
+ block_until # type: Timestamp
+ ):
+ # type: (...) -> Any
"""Returns the value of a view whose task is unblocked or blocks its task.
It gets the value of a view whose watermark has been updated and
@@ -121,6 +150,7 @@
def update_watermarks_for_transform_and_unblock_tasks(self,
ptransform,
watermark):
+ # type: (...) -> List[Tuple[TransformExecutor, Timestamp]]
"""Updates _SideInputsContainer after a watermark update and unbloks tasks.
It traverses the list of side inputs per PTransform and calls
@@ -143,6 +173,7 @@
def _update_watermarks_for_side_input_and_unblock_tasks(self,
side_input,
watermark):
+ # type: (...) -> List[Tuple[TransformExecutor, Timestamp]]
"""Helps update _SideInputsContainer after a watermark update.
For each view of the side input, it updates the value of the watermark
@@ -210,15 +241,22 @@
global watermarks, and executing any callbacks that can be executed.
"""
- def __init__(self, pipeline_options, bundle_factory, root_transforms,
- value_to_consumers, step_names, views, clock):
+ def __init__(self,
+ pipeline_options,
+ bundle_factory, # type: BundleFactory
+ root_transforms,
+ value_to_consumers,
+ step_names,
+ views, # type: Iterable[pvalue.AsSideInput]
+ clock
+ ):
self.pipeline_options = pipeline_options
self._bundle_factory = bundle_factory
self._root_transforms = root_transforms
self._value_to_consumers = value_to_consumers
self._step_names = step_names
self.views = views
- self._pcollection_to_views = collections.defaultdict(list)
+ self._pcollection_to_views = collections.defaultdict(list) # type: DefaultDict[pvalue.PCollection, List[pvalue.AsSideInput]]
for view in views:
self._pcollection_to_views[view.pvalue].append(view)
self._transform_keyed_states = self._initialize_keyed_states(
@@ -227,7 +265,7 @@
self._watermark_manager = WatermarkManager(
clock, root_transforms, value_to_consumers,
self._transform_keyed_states)
- self._pending_unblocked_tasks = []
+ self._pending_unblocked_tasks = [] # type: List[Tuple[TransformExecutor, Timestamp]]
self._counter_factory = counters.CounterFactory()
self._metrics = DirectMetrics()
@@ -251,10 +289,15 @@
return self._metrics
def is_root_transform(self, applied_ptransform):
+ # type: (AppliedPTransform) -> bool
return applied_ptransform in self._root_transforms
- def handle_result(
- self, completed_bundle, completed_timers, result):
+ def handle_result(self,
+ completed_bundle, # type: _Bundle
+ completed_timers,
+ result # type: TransformResult
+ ):
+
"""Handle the provided result produced after evaluating the input bundle.
Handle the provided TransformResult, produced after evaluating
@@ -303,7 +346,10 @@
existing_keyed_state[k] = v
return committed_bundles
- def _update_side_inputs_container(self, committed_bundles, result):
+ def _update_side_inputs_container(self,
+ committed_bundles, # type: Iterable[_Bundle]
+ result # type: TransformResult
+ ):
"""Update the side inputs container if we are outputting into a side input.
Look at the result, and if it's outputing into a PCollection that we have
@@ -330,7 +376,11 @@
executor_service.submit(task)
self._pending_unblocked_tasks = []
- def _commit_bundles(self, uncommitted_bundles, unprocessed_bundles):
+ def _commit_bundles(self,
+ uncommitted_bundles, # type: Iterable[_Bundle]
+ unprocessed_bundles # type: Iterable[_Bundle]
+ ):
+ # type: (...) -> Tuple[Tuple[_Bundle, ...], Tuple[_Bundle, ...]]
"""Commits bundles and returns a immutable set of committed bundles."""
for in_progress_bundle in uncommitted_bundles:
producing_applied_ptransform = in_progress_bundle.pcollection.producer
@@ -343,23 +393,28 @@
return tuple(uncommitted_bundles), tuple(unprocessed_bundles)
def get_execution_context(self, applied_ptransform):
+ # type: (AppliedPTransform) -> _ExecutionContext
return _ExecutionContext(
self._watermark_manager.get_watermarks(applied_ptransform),
self._transform_keyed_states[applied_ptransform])
def create_bundle(self, output_pcollection):
+ # type: (Union[pvalue.PBegin, pvalue.PCollection]) -> _Bundle
"""Create an uncommitted bundle for the specified PCollection."""
return self._bundle_factory.create_bundle(output_pcollection)
def create_empty_committed_bundle(self, output_pcollection):
+ # type: (pvalue.PCollection) -> _Bundle
"""Create empty bundle useful for triggering evaluation."""
return self._bundle_factory.create_empty_committed_bundle(
output_pcollection)
def extract_all_timers(self):
+ # type: () -> Tuple[List[Tuple[AppliedPTransform, List[TimerFiring]]], bool]
return self._watermark_manager.extract_all_timers()
def is_done(self, transform=None):
+ # type: (Optional[AppliedPTransform]) -> bool
"""Checks completion of a step or the pipeline.
Args:
@@ -378,6 +433,7 @@
return True
def _is_transform_done(self, transform):
+ # type: (AppliedPTransform) -> bool
tw = self._watermark_manager.get_watermarks(transform)
return tw.output_watermark == WatermarkManager.WATERMARK_POS_INF
diff --git a/sdks/python/apache_beam/runners/direct/executor.py b/sdks/python/apache_beam/runners/direct/executor.py
index e69cd79..e1a4692 100644
--- a/sdks/python/apache_beam/runners/direct/executor.py
+++ b/sdks/python/apache_beam/runners/direct/executor.py
@@ -17,6 +17,8 @@
"""An executor that schedules and executes applied ptransforms."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
@@ -27,6 +29,12 @@
import traceback
from builtins import object
from builtins import range
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Dict
+from typing import FrozenSet
+from typing import Optional
+from typing import Set
from weakref import WeakValueDictionary
from future.moves import queue
@@ -37,6 +45,12 @@
from apache_beam.transforms import sideinputs
from apache_beam.utils import counters
+if TYPE_CHECKING:
+ from apache_beam import pvalue
+ from apache_beam.runners.direct.bundle_factory import _Bundle
+ from apache_beam.runners.direct.evaluation_context import EvaluationContext
+ from apache_beam.runners.direct.transform_evaluator import TransformEvaluatorRegistry
+
_LOGGER = logging.getLogger(__name__)
@@ -58,7 +72,10 @@
# Amount to block waiting for getting an item from the queue in seconds.
TIMEOUT = 5
- def __init__(self, queue, index):
+ def __init__(self,
+ queue, # type: queue.Queue[_ExecutorService.CallableTask]
+ index
+ ):
super(_ExecutorService._ExecutorServiceWorker, self).__init__()
self.queue = queue
self._index = index
@@ -79,6 +96,7 @@
self._index, name, 'executing' if task else 'idle')
def _get_task_or_none(self):
+ # type: () -> Optional[_ExecutorService.CallableTask]
try:
# Do not block indefinitely, otherwise we may not act for a requested
# shutdown.
@@ -105,12 +123,13 @@
self.shutdown_requested = True
def __init__(self, num_workers):
- self.queue = queue.Queue()
+ self.queue = queue.Queue() # type: queue.Queue[_ExecutorService.CallableTask]
self.workers = [_ExecutorService._ExecutorServiceWorker(
self.queue, i) for i in range(num_workers)]
self.shutdown_requested = False
def submit(self, task):
+ # type: (_ExecutorService.CallableTask) -> None
assert isinstance(task, _ExecutorService.CallableTask)
if not self.shutdown_requested:
self.queue.put(task)
@@ -138,7 +157,10 @@
class _TransformEvaluationState(object):
- def __init__(self, executor_service, scheduled):
+ def __init__(self,
+ executor_service,
+ scheduled # type: Set[TransformExecutor]
+ ):
self.executor_service = executor_service
self.scheduled = scheduled
@@ -206,16 +228,19 @@
"""
def __init__(self, executor_service):
+ # type: (_ExecutorService) -> None
self._executor_service = executor_service
- self._scheduled = set()
+ self._scheduled = set() # type: Set[TransformExecutor]
self._parallel = _ParallelEvaluationState(
self._executor_service, self._scheduled)
- self._serial_cache = WeakValueDictionary()
+ self._serial_cache = WeakValueDictionary() # type: WeakValueDictionary[Any, _SerialEvaluationState]
def parallel(self):
+ # type: () -> _ParallelEvaluationState
return self._parallel
def serial(self, step):
+ # type: (Any) -> _SerialEvaluationState
cached = self._serial_cache.get(step)
if not cached:
cached = _SerialEvaluationState(self._executor_service, self._scheduled)
@@ -224,6 +249,7 @@
@property
def executors(self):
+ # type: () -> FrozenSet[TransformExecutor]
return frozenset(self._scheduled)
@@ -235,7 +261,11 @@
or for a source transform.
"""
- def __init__(self, evaluation_context, all_updates, timer_firings=None):
+ def __init__(self,
+ evaluation_context, # type: EvaluationContext
+ all_updates,
+ timer_firings=None
+ ):
self._evaluation_context = evaluation_context
self._all_updates = all_updates
self._timer_firings = timer_firings or []
@@ -273,9 +303,15 @@
_MAX_RETRY_PER_BUNDLE = 4
- def __init__(self, transform_evaluator_registry, evaluation_context,
- input_bundle, fired_timers, applied_ptransform,
- completion_callback, transform_evaluation_state):
+ def __init__(self,
+ transform_evaluator_registry, # type: TransformEvaluatorRegistry
+ evaluation_context, # type: EvaluationContext
+ input_bundle, # type: _Bundle
+ fired_timers,
+ applied_ptransform,
+ completion_callback,
+ transform_evaluation_state # type: _TransformEvaluationState
+ ):
self._transform_evaluator_registry = transform_evaluator_registry
self._evaluation_context = evaluation_context
self._input_bundle = input_bundle
@@ -291,7 +327,7 @@
self._applied_ptransform = applied_ptransform
self._completion_callback = completion_callback
self._transform_evaluation_state = transform_evaluation_state
- self._side_input_values = {}
+ self._side_input_values = {} # type: Dict[pvalue.AsSideInput, Any]
self.blocked = False
self._call_count = 0
self._retry_count = 0
@@ -410,8 +446,11 @@
NUM_WORKERS = 1
- def __init__(self, value_to_consumers, transform_evaluator_registry,
- evaluation_context):
+ def __init__(self,
+ value_to_consumers,
+ transform_evaluator_registry,
+ evaluation_context # type: EvaluationContext
+ ):
self.executor_service = _ExecutorService(
_ExecutorServiceParallelExecutor.NUM_WORKERS)
self.transform_executor_services = _TransformExecutorServices(
@@ -454,6 +493,7 @@
self.executor_service.shutdown()
def schedule_consumers(self, committed_bundle):
+ # type: (_Bundle) -> None
if committed_bundle.pcollection in self.value_to_consumers:
consumers = self.value_to_consumers[committed_bundle.pcollection]
for applied_ptransform in consumers:
@@ -464,8 +504,12 @@
unprocessed_bundle):
self.node_to_pending_bundles[applied_ptransform].append(unprocessed_bundle)
- def schedule_consumption(self, consumer_applied_ptransform, committed_bundle,
- fired_timers, on_complete):
+ def schedule_consumption(self,
+ consumer_applied_ptransform,
+ committed_bundle, # type: _Bundle
+ fired_timers,
+ on_complete
+ ):
"""Schedules evaluation of the given bundle with the transform."""
assert consumer_applied_ptransform
assert committed_bundle
@@ -473,7 +517,7 @@
if self.transform_evaluator_registry.should_execute_serially(
consumer_applied_ptransform):
transform_executor_service = self.transform_executor_services.serial(
- consumer_applied_ptransform)
+ consumer_applied_ptransform) # type: _TransformEvaluationState
else:
transform_executor_service = self.transform_executor_services.parallel()
@@ -550,6 +594,7 @@
"""MonitorTask continuously runs to ensure that pipeline makes progress."""
def __init__(self, executor):
+ # type: (_ExecutorServiceParallelExecutor) -> None
self._executor = executor
@property
@@ -587,6 +632,7 @@
self._executor.executor_service.submit(self)
def _should_shutdown(self):
+ # type: () -> bool
"""Checks whether the pipeline is completed and should be shut down.
If there is anything in the queue of tasks to do or
@@ -648,6 +694,7 @@
return bool(transform_fired_timers)
def _is_executing(self):
+ # type: () -> bool
"""Checks whether the job is still executing.
Returns:
diff --git a/sdks/python/apache_beam/runners/direct/helper_transforms.py b/sdks/python/apache_beam/runners/direct/helper_transforms.py
index 2cdff58..da16d94 100644
--- a/sdks/python/apache_beam/runners/direct/helper_transforms.py
+++ b/sdks/python/apache_beam/runners/direct/helper_transforms.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/runners/direct/sdf_direct_runner.py b/sdks/python/apache_beam/runners/direct/sdf_direct_runner.py
index 84b0218..e7234de 100644
--- a/sdks/python/apache_beam/runners/direct/sdf_direct_runner.py
+++ b/sdks/python/apache_beam/runners/direct/sdf_direct_runner.py
@@ -18,12 +18,16 @@
"""This module contains Splittable DoFn logic that is specific to DirectRunner.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import uuid
from builtins import object
from threading import Lock
from threading import Timer
+from typing import Any
+from typing import Iterable
import apache_beam as beam
from apache_beam import TimeDomain
@@ -499,4 +503,5 @@
self.output_iter = None
def process_outputs(self, windowed_input_element, output_iter):
+ # type: (WindowedValue, Iterable[Any]) -> None
self.output_iter = output_iter
diff --git a/sdks/python/apache_beam/runners/direct/sdf_direct_runner_test.py b/sdks/python/apache_beam/runners/direct/sdf_direct_runner_test.py
index d9d68cc..40c761e 100644
--- a/sdks/python/apache_beam/runners/direct/sdf_direct_runner_test.py
+++ b/sdks/python/apache_beam/runners/direct/sdf_direct_runner_test.py
@@ -17,6 +17,8 @@
"""Unit tests for SDF implementation for DirectRunner."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/runners/direct/test_direct_runner.py b/sdks/python/apache_beam/runners/direct/test_direct_runner.py
index 04dbe50..81f5870 100644
--- a/sdks/python/apache_beam/runners/direct/test_direct_runner.py
+++ b/sdks/python/apache_beam/runners/direct/test_direct_runner.py
@@ -17,6 +17,8 @@
"""Wrapper of Beam runners that's built for running and verifying e2e tests."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/direct/test_stream_impl.py b/sdks/python/apache_beam/runners/direct/test_stream_impl.py
new file mode 100644
index 0000000..c1930fa
--- /dev/null
+++ b/sdks/python/apache_beam/runners/direct/test_stream_impl.py
@@ -0,0 +1,175 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""The TestStream implementation for the DirectRunner
+
+The DirectRunner implements TestStream as the _TestStream class which is used
+to store the events in memory, the _WatermarkController which is used to set the
+watermark and emit events, and the multiplexer which sends events to the correct
+tagged PCollection.
+"""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+from apache_beam import coders
+from apache_beam import pvalue
+from apache_beam.testing.test_stream import WatermarkEvent
+from apache_beam.transforms import PTransform
+from apache_beam.transforms import core
+from apache_beam.transforms import window
+from apache_beam.utils import timestamp
+
+
+class _WatermarkController(PTransform):
+ """A runner-overridable PTransform Primitive to control the watermark.
+
+ Expected implementation behavior:
+ - If the instance recieves a WatermarkEvent, it sets its output watermark to
+ the specified value then drops the event.
+ - If the instance receives an ElementEvent, it emits all specified elements
+ to the Global Window with the event time set to the element's timestamp.
+ """
+ def get_windowing(self, _):
+ return core.Windowing(window.GlobalWindows())
+
+ def expand(self, pcoll):
+ return pvalue.PCollection.from_(pcoll)
+
+
+class _TestStream(PTransform):
+ """Test stream that generates events on an unbounded PCollection of elements.
+
+ Each event emits elements, advances the watermark or advances the processing
+ time. After all of the specified elements are emitted, ceases to produce
+ output.
+
+ Expected implementation behavior:
+ - If the instance receives a WatermarkEvent with the WATERMARK_CONTROL_TAG
+ then the instance sets its own watermark hold at the specified value and
+ drops the event.
+ - If the instance receives any other WatermarkEvent or ElementEvent, it
+ passes it to the consumer.
+ """
+
+ # This tag is used on WatermarkEvents to control the watermark at the root
+ # TestStream.
+ WATERMARK_CONTROL_TAG = '_TestStream_Watermark'
+
+ def __init__(self, output_tags, coder=coders.FastPrimitivesCoder(),
+ events=None):
+ assert coder is not None
+ self.coder = coder
+ self._raw_events = events
+ self._events = self._add_watermark_advancements(output_tags, events)
+
+ def _watermark_starts(self, output_tags):
+ """Sentinel values to hold the watermark of outputs to -inf.
+
+ The output watermarks of the output PCollections (fake unbounded sources) in
+ a TestStream are controlled by watermark holds. This sets the hold of each
+ output PCollection so that the individual holds can be controlled by the
+ given events.
+ """
+ return [WatermarkEvent(timestamp.MIN_TIMESTAMP, tag) for tag in output_tags]
+
+ def _watermark_stops(self, output_tags):
+ """Sentinel values to close the watermark of outputs."""
+ return [WatermarkEvent(timestamp.MAX_TIMESTAMP, tag) for tag in output_tags]
+
+ def _test_stream_start(self):
+ """Sentinel value to move the watermark hold of the TestStream to +inf.
+
+ This sets a hold to +inf such that the individual holds of the output
+ PCollections are allowed to modify their individial output watermarks with
+ their holds. This is because the calculation of the output watermark is a
+ min over all input watermarks.
+ """
+ return [WatermarkEvent(timestamp.MAX_TIMESTAMP - timestamp.TIME_GRANULARITY,
+ _TestStream.WATERMARK_CONTROL_TAG)]
+
+ def _test_stream_stop(self):
+ """Sentinel value to close the watermark of the TestStream."""
+ return [WatermarkEvent(timestamp.MAX_TIMESTAMP,
+ _TestStream.WATERMARK_CONTROL_TAG)]
+
+ def _test_stream_init(self):
+ """Sentinel value to hold the watermark of the TestStream to -inf.
+
+ This sets a hold to ensure that the output watermarks of the output
+ PCollections do not advance to +inf before their watermark holds are set.
+ """
+ return [WatermarkEvent(timestamp.MIN_TIMESTAMP,
+ _TestStream.WATERMARK_CONTROL_TAG)]
+
+ def _set_up(self, output_tags):
+ return (self._test_stream_init()
+ + self._watermark_starts(output_tags)
+ + self._test_stream_start())
+
+ def _tear_down(self, output_tags):
+ return self._watermark_stops(output_tags) + self._test_stream_stop()
+
+ def _add_watermark_advancements(self, output_tags, events):
+ """Adds watermark advancements to the given events.
+
+ The following watermark advancements can be done on the runner side.
+ However, it makes the logic on the runner side much more complicated than
+ it needs to be.
+
+ In order for watermarks to be properly advanced in a TestStream, a specific
+ sequence of watermark holds must be sent:
+
+ 1. Hold the root watermark at -inf (this prevents the pipeline from
+ immediately returning).
+ 2. Hold the watermarks at the WatermarkControllerss at -inf (this prevents
+ the pipeline from immediately returning).
+ 3. Advance the root watermark to +inf - 1 (this allows the downstream
+ WatermarkControllers to control their watermarks via holds).
+ 4. Advance watermarks as normal.
+ 5. Advance WatermarkController watermarks to +inf
+ 6. Advance root watermark to +inf.
+ """
+ if not events:
+ return []
+
+ return self._set_up(output_tags) + events + self._tear_down(output_tags)
+
+ def get_windowing(self, unused_inputs):
+ return core.Windowing(window.GlobalWindows())
+
+ def expand(self, pcoll):
+ return pvalue.PCollection(pcoll.pipeline, is_bounded=False)
+
+ def _infer_output_coder(self, input_type=None, input_coder=None):
+ return self.coder
+
+ def _events_from_script(self, index):
+ yield self._events[index]
+
+ def events(self, index):
+ return self._events_from_script(index)
+
+ def begin(self):
+ return 0
+
+ def end(self, index):
+ return index >= len(self._events)
+
+ def next(self, index):
+ return index + 1
diff --git a/sdks/python/apache_beam/runners/direct/transform_evaluator.py b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
index d790465..0f47b1c 100644
--- a/sdks/python/apache_beam/runners/direct/transform_evaluator.py
+++ b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
@@ -17,6 +17,8 @@
"""An evaluator of a specific application of a transform."""
+# pytype: skip-file
+
from __future__ import absolute_import
import atexit
@@ -24,8 +26,14 @@
import logging
import random
import time
-import typing
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Tuple
+from typing import Type
+from typing import Union
from future.utils import iteritems
@@ -44,14 +52,14 @@
from apache_beam.runners.direct.sdf_direct_runner import ProcessElements
from apache_beam.runners.direct.sdf_direct_runner import ProcessFn
from apache_beam.runners.direct.sdf_direct_runner import SDFProcessElementInvoker
+from apache_beam.runners.direct.test_stream_impl import _TestStream
+from apache_beam.runners.direct.test_stream_impl import _WatermarkController
from apache_beam.runners.direct.util import KeyedWorkItem
from apache_beam.runners.direct.util import TransformResult
from apache_beam.runners.direct.watermark_manager import WatermarkManager
from apache_beam.testing.test_stream import ElementEvent
from apache_beam.testing.test_stream import ProcessingTimeEvent
from apache_beam.testing.test_stream import WatermarkEvent
-from apache_beam.testing.test_stream import _TestStream
-from apache_beam.testing.test_stream import _WatermarkController
from apache_beam.transforms import core
from apache_beam.transforms.trigger import InMemoryUnmergedState
from apache_beam.transforms.trigger import TimeDomain
@@ -68,6 +76,12 @@
from apache_beam.utils.timestamp import MIN_TIMESTAMP
from apache_beam.utils.timestamp import Timestamp
+if TYPE_CHECKING:
+ from apache_beam.io.gcp.pubsub import _PubSubSource
+ from apache_beam.io.gcp.pubsub import PubsubMessage
+ from apache_beam.pipeline import AppliedPTransform
+ from apache_beam.runners.direct.evaluation_context import EvaluationContext
+
_LOGGER = logging.getLogger(__name__)
@@ -77,9 +91,10 @@
Creates instances of TransformEvaluator for the application of a transform.
"""
- _test_evaluators_overrides = {}
+ _test_evaluators_overrides = {} # type: Dict[Type[core.PTransform], Type[_TransformEvaluator]]
def __init__(self, evaluation_context):
+ # type: (EvaluationContext) -> None
assert evaluation_context
self._evaluation_context = evaluation_context
self._evaluators = {
@@ -95,7 +110,7 @@
_TestStream: _TestStreamEvaluator,
ProcessElements: _ProcessElementsEvaluator,
_WatermarkController: _WatermarkControllerEvaluator,
- }
+ } # type: Dict[Type[core.PTransform], Type[_TransformEvaluator]]
self._evaluators.update(self._test_evaluators_overrides)
self._root_bundle_providers = {
core.PTransform: DefaultRootBundleProvider,
@@ -208,8 +223,12 @@
class _TransformEvaluator(object):
"""An evaluator of a specific application of a transform."""
- def __init__(self, evaluation_context, applied_ptransform,
- input_committed_bundle, side_inputs):
+ def __init__(self,
+ evaluation_context, # type: EvaluationContext
+ applied_ptransform, # type: AppliedPTransform
+ input_committed_bundle,
+ side_inputs
+ ):
self._evaluation_context = evaluation_context
self._applied_ptransform = applied_ptransform
self._input_committed_bundle = input_committed_bundle
@@ -288,6 +307,7 @@
raise NotImplementedError('%s do not process elements.' % type(self))
def finish_bundle(self):
+ # type: () -> TransformResult
"""Finishes the bundle and produces output."""
pass
@@ -466,7 +486,7 @@
# A mapping of transform to _PubSubSubscriptionWrapper.
# TODO(BEAM-7750): Prevents garbage collection of pipeline instances.
- _subscription_cache = {}
+ _subscription_cache = {} # type: Dict[AppliedPTransform, str]
def __init__(self, evaluation_context, applied_ptransform,
input_committed_bundle, side_inputs):
@@ -475,7 +495,7 @@
evaluation_context, applied_ptransform, input_committed_bundle,
side_inputs)
- self.source = self._applied_ptransform.transform._source
+ self.source = self._applied_ptransform.transform._source # type: _PubSubSource
if self.source.id_label:
raise NotImplementedError(
'DirectRunner: id_label is not supported for PubSub reads')
@@ -510,6 +530,7 @@
pass
def _read_from_pubsub(self, timestamp_attribute):
+ # type: (...) -> List[Tuple[Timestamp, PubsubMessage]]
from apache_beam.io.gcp.pubsub import PubsubMessage
from google.cloud import pubsub
@@ -549,6 +570,7 @@
return results
def finish_bundle(self):
+ # type: () -> TransformResult
data = self._read_from_pubsub(self.source.timestamp_attribute)
if data:
output_pcollection = list(self._outputs)[0]
@@ -565,7 +587,7 @@
else:
bundles = []
if self._applied_ptransform.inputs:
- input_pvalue = self._applied_ptransform.inputs[0]
+ input_pvalue = self._applied_ptransform.inputs[0] # type: Union[pvalue.PBegin, pvalue.PCollection]
else:
input_pvalue = pvalue.PBegin(self._applied_ptransform.transform.pipeline)
unprocessed_bundle = self._evaluation_context.create_bundle(
@@ -622,6 +644,7 @@
"""Ignores undeclared outputs, default execution mode."""
def receive(self, element):
+ # type: (WindowedValue) -> None
pass
class _InMemoryReceiver(common.Receiver):
@@ -632,6 +655,7 @@
self._tag = tag
def receive(self, element):
+ # type: (WindowedValue) -> None
self._target[self._tag].append(element)
def __missing__(self, key):
@@ -643,9 +667,13 @@
class _ParDoEvaluator(_TransformEvaluator):
"""TransformEvaluator for ParDo transform."""
- def __init__(self, evaluation_context, applied_ptransform,
- input_committed_bundle, side_inputs,
- perform_dofn_pickle_test=True):
+ def __init__(self,
+ evaluation_context, # type: EvaluationContext
+ applied_ptransform, # type: AppliedPTransform
+ input_committed_bundle,
+ side_inputs,
+ perform_dofn_pickle_test=True
+ ):
super(_ParDoEvaluator, self).__init__(
evaluation_context, applied_ptransform, input_committed_bundle,
side_inputs)
@@ -677,11 +705,11 @@
self.user_timer_map = {}
if is_stateful_dofn(dofn):
kv_type_hint = self._applied_ptransform.inputs[0].element_type
- if kv_type_hint and kv_type_hint != typing.Any:
+ if kv_type_hint and kv_type_hint != Any:
coder = coders.registry.get_coder(kv_type_hint)
self.key_coder = coder.key_coder()
else:
- self.key_coder = coders.registry.get_coder(typing.Any)
+ self.key_coder = coders.registry.get_coder(Any)
self.user_state_context = DirectUserStateContext(
self._step_context, dofn, self.key_coder)
@@ -837,7 +865,7 @@
# The input type of a GroupByKey will be Tuple[Any, Any] or more specific.
kv_type_hint = self._applied_ptransform.inputs[0].element_type
key_type_hint = (kv_type_hint.tuple_types[0] if kv_type_hint
- else typing.Any)
+ else Any)
self.key_coder = coders.registry.get_coder(key_type_hint)
def process_element(self, element):
@@ -893,7 +921,7 @@
# GroupAlsoByWindow will be Tuple[Any, Iter[Any]] or more specific.
kv_type_hint = self._applied_ptransform.outputs[None].element_type
key_type_hint = (kv_type_hint.tuple_types[0] if kv_type_hint
- else typing.Any)
+ else Any)
self.key_coder = coders.registry.get_coder(key_type_hint)
def process_element(self, element):
diff --git a/sdks/python/apache_beam/runners/direct/util.py b/sdks/python/apache_beam/runners/direct/util.py
index 57650ac..478a981 100644
--- a/sdks/python/apache_beam/runners/direct/util.py
+++ b/sdks/python/apache_beam/runners/direct/util.py
@@ -20,6 +20,8 @@
For internal use only. No backwards compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
diff --git a/sdks/python/apache_beam/runners/direct/watermark_manager.py b/sdks/python/apache_beam/runners/direct/watermark_manager.py
index 23431f1..8856ee5 100644
--- a/sdks/python/apache_beam/runners/direct/watermark_manager.py
+++ b/sdks/python/apache_beam/runners/direct/watermark_manager.py
@@ -17,10 +17,18 @@
"""Manages watermarks of PCollections and AppliedPTransforms."""
+# pytype: skip-file
+
from __future__ import absolute_import
import threading
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Set
+from typing import Tuple
from apache_beam import pipeline
from apache_beam import pvalue
@@ -29,6 +37,11 @@
from apache_beam.utils.timestamp import MIN_TIMESTAMP
from apache_beam.utils.timestamp import TIME_GRANULARITY
+if TYPE_CHECKING:
+ from apache_beam.pipeline import AppliedPTransform
+ from apache_beam.runners.direct.bundle_factory import _Bundle
+ from apache_beam.utils.timestamp import Timestamp
+
class WatermarkManager(object):
"""For internal use only; no backwards-compatibility guarantees.
@@ -45,7 +58,7 @@
self._value_to_consumers = value_to_consumers
self._transform_keyed_states = transform_keyed_states
# AppliedPTransform -> TransformWatermarks
- self._transform_to_watermarks = {}
+ self._transform_to_watermarks = {} # type: Dict[AppliedPTransform, _TransformWatermarks]
for root_transform in root_transforms:
self._transform_to_watermarks[root_transform] = _TransformWatermarks(
@@ -61,6 +74,7 @@
self._update_input_transform_watermarks(consumer)
def _update_input_transform_watermarks(self, applied_ptransform):
+ # type: (AppliedPTransform) -> None
assert isinstance(applied_ptransform, pipeline.AppliedPTransform)
input_transform_watermarks = []
for input_pvalue in applied_ptransform.inputs:
@@ -73,6 +87,7 @@
input_transform_watermarks)
def get_watermarks(self, applied_ptransform):
+ # type: (AppliedPTransform) -> _TransformWatermarks
"""Gets the input and output watermarks for an AppliedPTransform.
If the applied_ptransform has not processed any elements, return a
@@ -93,9 +108,15 @@
return self._transform_to_watermarks[applied_ptransform]
- def update_watermarks(self, completed_committed_bundle, applied_ptransform,
- completed_timers, outputs, unprocessed_bundles,
- keyed_earliest_holds, side_inputs_container):
+ def update_watermarks(self,
+ completed_committed_bundle, # type: _Bundle
+ applied_ptransform, # type: AppliedPTransform
+ completed_timers,
+ outputs,
+ unprocessed_bundles,
+ keyed_earliest_holds,
+ side_inputs_container
+ ):
assert isinstance(applied_ptransform, pipeline.AppliedPTransform)
self._update_pending(
completed_committed_bundle, applied_ptransform, completed_timers,
@@ -104,9 +125,13 @@
tw.hold(keyed_earliest_holds)
return self._refresh_watermarks(applied_ptransform, side_inputs_container)
- def _update_pending(self, input_committed_bundle, applied_ptransform,
- completed_timers, output_committed_bundles,
- unprocessed_bundles):
+ def _update_pending(self,
+ input_committed_bundle,
+ applied_ptransform, # type: AppliedPTransform
+ completed_timers,
+ output_committed_bundles, # type: Iterable[_Bundle]
+ unprocessed_bundles # type: Iterable[_Bundle]
+ ):
"""Updated list of pending bundles for the given AppliedPTransform."""
# Update pending elements. Filter out empty bundles. They do not impact
@@ -153,9 +178,10 @@
return unblocked_tasks
def extract_all_timers(self):
+ # type: () -> Tuple[List[Tuple[AppliedPTransform, List[TimerFiring]]], bool]
"""Extracts fired timers for all transforms
and reports if there are any timers set."""
- all_timers = []
+ all_timers = [] # type: List[Tuple[AppliedPTransform, List[TimerFiring]]]
has_realtime_timer = False
for applied_ptransform, tw in self._transform_to_watermarks.items():
fired_timers, had_realtime_timer = tw.extract_transform_timers()
@@ -175,17 +201,19 @@
def __init__(self, clock, keyed_states, transform):
self._clock = clock
self._keyed_states = keyed_states
- self._input_transform_watermarks = []
+ self._input_transform_watermarks = [] # type: List[_TransformWatermarks]
self._input_watermark = WatermarkManager.WATERMARK_NEG_INF
self._output_watermark = WatermarkManager.WATERMARK_NEG_INF
self._keyed_earliest_holds = {}
- self._pending = set() # Scheduled bundles targeted for this transform.
+ # Scheduled bundles targeted for this transform.
+ self._pending = set() # type: Set[_Bundle]
self._fired_timers = set()
self._lock = threading.Lock()
self._label = str(transform)
def update_input_transform_watermarks(self, input_transform_watermarks):
+ # type: (List[_TransformWatermarks]) -> None
with self._lock:
self._input_transform_watermarks = input_transform_watermarks
@@ -196,11 +224,13 @@
@property
def input_watermark(self):
+ # type: () -> Timestamp
with self._lock:
return self._input_watermark
@property
def output_watermark(self):
+ # type: () -> Timestamp
with self._lock:
return self._output_watermark
@@ -213,10 +243,12 @@
del self._keyed_earliest_holds[key]
def add_pending(self, pending):
+ # type: (_Bundle) -> None
with self._lock:
self._pending.add(pending)
def remove_pending(self, completed):
+ # type: (_Bundle) -> None
with self._lock:
# Ignore repeated removes. This will happen if a transform has a repeated
# input.
@@ -224,6 +256,7 @@
self._pending.remove(completed)
def refresh(self):
+ # type: () -> bool
"""Refresh the watermark for a given transform.
This method looks at the watermark coming from all input PTransforms, and
@@ -272,6 +305,7 @@
return self._clock.time()
def extract_transform_timers(self):
+ # type: () -> Tuple[List[TimerFiring], bool]
"""Extracts fired timers and reports of any timers set per transform."""
with self._lock:
fired_timers = []
diff --git a/sdks/python/apache_beam/runners/interactive/README.md b/sdks/python/apache_beam/runners/interactive/README.md
index bdcb85d..ffab947 100644
--- a/sdks/python/apache_beam/runners/interactive/README.md
+++ b/sdks/python/apache_beam/runners/interactive/README.md
@@ -219,38 +219,15 @@
You can choose to run Interactive Beam on Flink with the following settings.
-* Install [docker](https://www.docker.com/).
-
-* Build the SDK container and start the local FlinkService.
-
- ```bash
- $ ./gradlew -p sdks/python/container/py35 docker # Optionally replace py35 with the Python version of your choice
- $ ./gradlew :runners:flink:1.9:job-server:runShadow # Blocking
- ```
-
-* Run `$ jupyter notebook` in another terminal.
-
* Use
- [`portable_runner.PortableRunner()`](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/runners/portability/portable_runner.py)
- as the underlying runner, while providing a
- [`pipeline_options.PortableOptions()`](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/options/pipeline_options.py)
- to the pipeline as follows.
+ [`flink_runner.FlinkRunner()`](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/runners/portability/flink_runner.py)
+ as the underlying runner.
```python
- options = pipeline_options.PipelineOptions()
- options.view_as(pipeline_options.PortableOptions).job_endpoint = 'localhost:8099'
- options.view_as(pipeline_options.SetupOptions).sdk_location = 'container'
- options.view_as(pipeline_options.DebugOptions).experiments = 'beam_fn_api'
-
- cache_dir = 'gs://bucket-name/dir'
- underlying_runner = portable_runner.PortableRunner()
- runner = interactive_runner.InteractiveRunner(underlying_runner=underlying_runner, cache_dir=cache_dir)
- p = beam.Pipeline(runner=runner, options=options)
+ p = beam.Pipeline(interactive_runner.InteractiveRunner(underlying_runner=flink_runner.FlinkRunner()))
```
-**Note**: Python Flink Runner (combination of PortableRunner and FlinkService)
-is being actively developed now, so these setups and commands are subject to
-changes. This guide and
+**Note**: This guide and
[Interactive Beam Running on Flink.ipynb](examples/Interactive%20Beam%20Running%20on%20Flink.ipynb)
capture the status of the world when it's last updated.
diff --git a/sdks/python/apache_beam/runners/interactive/background_caching_job.py b/sdks/python/apache_beam/runners/interactive/background_caching_job.py
new file mode 100644
index 0000000..d46317a
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/background_caching_job.py
@@ -0,0 +1,159 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Module to build and run background caching job.
+
+For internal use only; no backwards-compatibility guarantees.
+
+A background caching job is a job that caches events for all unbounded sources
+of a given pipeline. With Interactive Beam, one such job is started when a
+pipeline run happens (which produces a main job in contrast to the background
+caching job) and meets the following conditions:
+
+ #. The pipeline contains unbounded sources.
+ #. No such background job is running.
+ #. No such background job has completed successfully and the cached events are
+ still valid (invalidated when unbounded sources change in the pipeline).
+
+Once started, the background caching job runs asynchronously until it hits some
+cache size limit. Meanwhile, the main job and future main jobs from the pipeline
+will run using the deterministic replay-able cached events until they are
+invalidated.
+"""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import apache_beam as beam
+from apache_beam import runners
+from apache_beam.runners.interactive import interactive_environment as ie
+
+
+def attempt_to_run_background_caching_job(runner, user_pipeline, options=None):
+ """Attempts to run a background caching job for a user-defined pipeline.
+
+ The pipeline result is automatically tracked by Interactive Beam in case
+ future cancellation/cleanup is needed.
+ """
+ if is_background_caching_job_needed(user_pipeline):
+ # Cancel non-terminal jobs if there is any before starting a new one.
+ attempt_to_cancel_background_caching_job(user_pipeline)
+ # Evict all caches if there is any.
+ ie.current_env().cleanup()
+ # TODO(BEAM-8335): refactor background caching job logic from
+ # pipeline_instrument module to this module and aggregate tests.
+ from apache_beam.runners.interactive import pipeline_instrument as instr
+ runner_pipeline = beam.pipeline.Pipeline.from_runner_api(
+ user_pipeline.to_runner_api(use_fake_coders=True),
+ runner,
+ options)
+ background_caching_job_result = beam.pipeline.Pipeline.from_runner_api(
+ instr.pin(runner_pipeline).background_caching_pipeline_proto(),
+ runner,
+ options).run()
+ ie.current_env().set_pipeline_result(user_pipeline,
+ background_caching_job_result,
+ is_main_job=False)
+
+
+def is_background_caching_job_needed(user_pipeline):
+ """Determines if a background caching job needs to be started."""
+ background_caching_job_result = ie.current_env().pipeline_result(
+ user_pipeline, is_main_job=False)
+ # Checks if the pipeline contains any source that needs to be cached.
+ return (has_source_to_cache(user_pipeline) and
+ # Checks if it's the first time running a job from the pipeline.
+ (not background_caching_job_result or
+ # Or checks if there is no previous job.
+ background_caching_job_result.state not in (
+ # DONE means a previous job has completed successfully and the
+ # cached events are still valid.
+ runners.runner.PipelineState.DONE,
+ # RUNNING means a previous job has been started and is still
+ # running.
+ runners.runner.PipelineState.RUNNING) or
+ # Or checks if we can invalidate the previous job.
+ is_source_to_cache_changed(user_pipeline)))
+
+
+def has_source_to_cache(user_pipeline):
+ """Determines if a user-defined pipeline contains any source that need to be
+ cached."""
+ from apache_beam.runners.interactive import pipeline_instrument as instr
+ # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources.
+ # Add logic for other cacheable sources here when they are available.
+ return instr.has_unbounded_sources(user_pipeline)
+
+
+def attempt_to_cancel_background_caching_job(user_pipeline):
+ """Attempts to cancel background caching job for a user-defined pipeline.
+
+ If no background caching job needs to be cancelled, NOOP. Otherwise, cancel
+ such job.
+ """
+ background_caching_job_result = ie.current_env().pipeline_result(
+ user_pipeline, is_main_job=False)
+ if (background_caching_job_result and
+ not ie.current_env().is_terminated(user_pipeline, is_main_job=False)):
+ background_caching_job_result.cancel()
+
+
+def is_source_to_cache_changed(user_pipeline):
+ """Determines if there is any change in the sources that need to be cached
+ used by the user-defined pipeline.
+
+ Due to the expensiveness of computations and for the simplicity of usage, this
+ function is not idempotent because Interactive Beam automatically discards
+ previously tracked signature of transforms and tracks the current signature of
+ transforms for the user-defined pipeline if there is any change.
+
+ When it's True, there is addition/deletion/mutation of source transforms that
+ requires a new background caching job.
+ """
+ # By default gets empty set if the user_pipeline is first time seen because
+ # we can treat it as adding transforms.
+ recorded_signature = ie.current_env().get_cached_source_signature(
+ user_pipeline)
+ current_signature = extract_source_to_cache_signature(user_pipeline)
+ is_changed = not current_signature.issubset(recorded_signature)
+ # The computation of extract_unbounded_source_signature is expensive, track on
+ # change by default.
+ if is_changed:
+ ie.current_env().set_cached_source_signature(user_pipeline,
+ current_signature)
+ return is_changed
+
+
+def extract_source_to_cache_signature(user_pipeline):
+ """Extracts a set of signature for sources that need to be cached in the
+ user-defined pipeline.
+
+ A signature is a str representation of urn and payload of a source.
+ """
+ from apache_beam.runners.interactive import pipeline_instrument as instr
+ # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources.
+ # Add logic for other cacheable sources here when they are available.
+ unbounded_sources_as_applied_transforms = instr.unbounded_sources(
+ user_pipeline)
+ unbounded_sources_as_ptransforms = set(
+ map(lambda x: x.transform, unbounded_sources_as_applied_transforms))
+ context, _ = user_pipeline.to_runner_api(
+ return_context=True, use_fake_coders=True)
+ signature = set(map(lambda transform: str(transform.to_runner_api(context)),
+ unbounded_sources_as_ptransforms))
+ return signature
diff --git a/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py b/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py
new file mode 100644
index 0000000..409d338
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py
@@ -0,0 +1,250 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Tests for apache_beam.runners.interactive.background_caching_job."""
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import unittest
+
+import apache_beam as beam
+from apache_beam.pipeline import PipelineVisitor
+from apache_beam.runners import runner
+from apache_beam.runners.interactive import background_caching_job as bcj
+from apache_beam.runners.interactive import interactive_beam as ib
+from apache_beam.runners.interactive import interactive_environment as ie
+from apache_beam.runners.interactive import interactive_runner
+from apache_beam.runners.interactive.testing.mock_ipython import mock_get_ipython
+from apache_beam.testing.test_stream import TestStream
+from apache_beam.transforms.window import TimestampedValue
+
+# TODO(BEAM-8288): clean up the work-around of nose tests using Python2 without
+# unittest.mock module.
+try:
+ from unittest.mock import patch
+except ImportError:
+ from mock import patch
+
+_FOO_PUBSUB_SUB = 'projects/test-project/subscriptions/foo'
+_BAR_PUBSUB_SUB = 'projects/test-project/subscriptions/bar'
+
+
+def _build_a_test_stream_pipeline():
+ test_stream = (TestStream()
+ .advance_watermark_to(0)
+ .add_elements([TimestampedValue('a', 1)])
+ .advance_processing_time(5)
+ .advance_watermark_to_infinity())
+ p = beam.Pipeline(runner=interactive_runner.InteractiveRunner())
+ events = p | test_stream # pylint: disable=possibly-unused-variable
+ ib.watch(locals())
+ return p
+
+
+def _build_an_empty_stream_pipeline():
+ from apache_beam.options.pipeline_options import PipelineOptions
+ from apache_beam.options.pipeline_options import StandardOptions
+ pipeline_options = PipelineOptions()
+ pipeline_options.view_as(StandardOptions).streaming = True
+ p = beam.Pipeline(interactive_runner.InteractiveRunner(),
+ options=pipeline_options)
+ ib.watch({'pipeline': p})
+ return p
+
+
+@unittest.skipIf(not ie.current_env().is_interactive_ready,
+ '[interactive] dependency is not installed.')
+class BackgroundCachingJobTest(unittest.TestCase):
+
+ def tearDown(self):
+ ie.new_env()
+
+ # TODO(BEAM-8335): remove the patches when there are appropriate test sources
+ # that meet the boundedness checks.
+ @patch('apache_beam.runners.interactive.pipeline_instrument'
+ '.has_unbounded_sources', lambda x: True)
+ def test_background_caching_job_starts_when_none_such_job_exists(self):
+ p = _build_a_test_stream_pipeline()
+ p.run()
+ self.assertIsNotNone(
+ ie.current_env().pipeline_result(p, is_main_job=False))
+
+ @patch('apache_beam.runners.interactive.pipeline_instrument'
+ '.has_unbounded_sources', lambda x: False)
+ def test_background_caching_job_not_start_for_batch_pipeline(self):
+ p = _build_a_test_stream_pipeline()
+ p.run()
+ self.assertIsNone(
+ ie.current_env().pipeline_result(p, is_main_job=False))
+
+ @patch('apache_beam.runners.interactive.pipeline_instrument'
+ '.has_unbounded_sources', lambda x: True)
+ def test_background_caching_job_not_start_when_such_job_exists(self):
+ p = _build_a_test_stream_pipeline()
+ a_running_result = runner.PipelineResult(runner.PipelineState.RUNNING)
+ ie.current_env().set_pipeline_result(p, a_running_result, is_main_job=False)
+ main_job_result = p.run()
+ # No background caching job is started so result is still the running one.
+ self.assertIs(a_running_result,
+ ie.current_env().pipeline_result(p, is_main_job=False))
+ # A new main job is started so result of the main job is set.
+ self.assertIs(main_job_result,
+ ie.current_env().pipeline_result(p))
+
+ @patch('apache_beam.runners.interactive.pipeline_instrument'
+ '.has_unbounded_sources', lambda x: True)
+ def test_background_caching_job_not_start_when_such_job_is_done(self):
+ p = _build_a_test_stream_pipeline()
+ a_done_result = runner.PipelineResult(runner.PipelineState.DONE)
+ ie.current_env().set_pipeline_result(p, a_done_result, is_main_job=False)
+ main_job_result = p.run()
+ # No background caching job is started so result is still the running one.
+ self.assertIs(a_done_result,
+ ie.current_env().pipeline_result(p, is_main_job=False))
+ # A new main job is started so result of the main job is set.
+ self.assertIs(main_job_result,
+ ie.current_env().pipeline_result(p))
+
+ @patch('IPython.get_ipython', new_callable=mock_get_ipython)
+ def test_source_to_cache_changed_when_pipeline_is_first_time_seen(self, cell):
+ with cell: # Cell 1
+ pipeline = _build_an_empty_stream_pipeline()
+
+ with cell: # Cell 2
+ read_foo = pipeline | 'Read' >> beam.io.ReadFromPubSub(
+ subscription=_FOO_PUBSUB_SUB)
+ ib.watch({'read_foo': read_foo})
+
+ self.assertTrue(bcj.is_source_to_cache_changed(pipeline))
+
+ @patch('IPython.get_ipython', new_callable=mock_get_ipython)
+ def test_source_to_cache_changed_when_new_source_is_added(self, cell):
+ with cell: # Cell 1
+ pipeline = _build_an_empty_stream_pipeline()
+ read_foo = pipeline | 'Read' >> beam.io.ReadFromPubSub(
+ subscription=_FOO_PUBSUB_SUB)
+ ib.watch({'read_foo': read_foo})
+
+ # Sets the signature for current pipeline state.
+ ie.current_env().set_cached_source_signature(
+ pipeline, bcj.extract_source_to_cache_signature(pipeline))
+
+ with cell: # Cell 2
+ read_bar = pipeline | 'Read' >> beam.io.ReadFromPubSub(
+ subscription=_BAR_PUBSUB_SUB)
+ ib.watch({'read_bar': read_bar})
+
+ self.assertTrue(bcj.is_source_to_cache_changed(pipeline))
+
+ @patch('IPython.get_ipython', new_callable=mock_get_ipython)
+ def test_source_to_cache_changed_when_source_is_altered(self, cell):
+ with cell: # Cell 1
+ pipeline = _build_an_empty_stream_pipeline()
+ transform = beam.io.ReadFromPubSub(subscription=_FOO_PUBSUB_SUB)
+ read_foo = pipeline | 'Read' >> transform
+ ib.watch({'read_foo': read_foo})
+
+ # Sets the signature for current pipeline state.
+ ie.current_env().set_cached_source_signature(
+ pipeline, bcj.extract_source_to_cache_signature(pipeline))
+
+ with cell: # Cell 2
+ from apache_beam.io.gcp.pubsub import _PubSubSource
+ # Alter the transform.
+ transform._source = _PubSubSource(subscription=_BAR_PUBSUB_SUB)
+
+ self.assertTrue(bcj.is_source_to_cache_changed(pipeline))
+
+ @patch('IPython.get_ipython', new_callable=mock_get_ipython)
+ def test_source_to_cache_not_changed_for_same_source(self, cell):
+ with cell: # Cell 1
+ pipeline = _build_an_empty_stream_pipeline()
+ transform = beam.io.ReadFromPubSub(subscription=_FOO_PUBSUB_SUB)
+
+ with cell: # Cell 2
+ read_foo_1 = pipeline | 'Read' >> transform
+ ib.watch({'read_foo_1': read_foo_1})
+
+ # Sets the signature for current pipeline state.
+ ie.current_env().set_cached_source_signature(
+ pipeline, bcj.extract_source_to_cache_signature(pipeline))
+
+ with cell: # Cell 3
+ # Apply exactly the same transform and the same instance.
+ read_foo_2 = pipeline | 'Read' >> transform
+ ib.watch({'read_foo_2': read_foo_2})
+
+ self.assertFalse(bcj.is_source_to_cache_changed(pipeline))
+
+ with cell: # Cell 4
+ # Apply the same transform but represented in a different instance.
+ # The signature representing the urn and payload is still the same, so it
+ # is not treated as a new unbounded source.
+ read_foo_3 = pipeline | 'Read' >> beam.io.ReadFromPubSub(
+ subscription=_FOO_PUBSUB_SUB)
+ ib.watch({'read_foo_3': read_foo_3})
+
+ self.assertFalse(bcj.is_source_to_cache_changed(pipeline))
+
+ @patch('IPython.get_ipython', new_callable=mock_get_ipython)
+ def test_source_to_cache_not_changed_when_source_is_removed(self, cell):
+ with cell: # Cell 1
+ pipeline = _build_an_empty_stream_pipeline()
+ foo_transform = beam.io.ReadFromPubSub(subscription=_FOO_PUBSUB_SUB)
+ bar_transform = beam.io.ReadFromPubSub(subscription=_BAR_PUBSUB_SUB)
+
+ with cell: # Cell 2
+ read_foo = pipeline | 'Read' >> foo_transform
+ ib.watch({'read_foo': read_foo})
+
+ signature_with_only_foo = bcj.extract_source_to_cache_signature(pipeline)
+
+ with cell: # Cell 3
+ read_bar = pipeline | 'Read' >> bar_transform
+ ib.watch({'read_bar': read_bar})
+
+ self.assertTrue(bcj.is_source_to_cache_changed(pipeline))
+ signature_with_foo_bar = ie.current_env().get_cached_source_signature(
+ pipeline)
+ self.assertNotEqual(signature_with_only_foo, signature_with_foo_bar)
+
+ class BarPruneVisitor(PipelineVisitor):
+
+ def enter_composite_transform(self, transform_node):
+ pruned_parts = list(transform_node.parts)
+ for part in transform_node.parts:
+ if part.transform is bar_transform:
+ pruned_parts.remove(part)
+ transform_node.parts = tuple(pruned_parts)
+ self.visit_transform(transform_node)
+
+ def visit_transform(self, transform_node):
+ if transform_node.transform is bar_transform:
+ transform_node.parent = None
+
+ v = BarPruneVisitor()
+ pipeline.visit(v)
+
+ signature_after_pruning_bar = bcj.extract_source_to_cache_signature(
+ pipeline)
+ self.assertEqual(signature_with_only_foo, signature_after_pruning_bar)
+ self.assertFalse(bcj.is_source_to_cache_changed(pipeline))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/sdks/python/apache_beam/runners/interactive/cache_manager.py b/sdks/python/apache_beam/runners/interactive/cache_manager.py
index 20d84e3..1531e6d 100644
--- a/sdks/python/apache_beam/runners/interactive/cache_manager.py
+++ b/sdks/python/apache_beam/runners/interactive/cache_manager.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -22,6 +24,7 @@
import collections
import datetime
import os
+import sys
import tempfile
import urllib
@@ -32,13 +35,12 @@
from apache_beam.io import tfrecordio
from apache_beam.transforms import combiners
-try: # Python 3
+if sys.version_info[0] > 2:
unquote_to_bytes = urllib.parse.unquote_to_bytes
quote = urllib.parse.quote
-except AttributeError: # Python 2
- # pylint: disable=deprecated-urllib-function
- unquote_to_bytes = urllib.unquote
- quote = urllib.quote
+else:
+ unquote_to_bytes = urllib.unquote # pylint: disable=deprecated-urllib-function
+ quote = urllib.quote # pylint: disable=deprecated-urllib-function
class CacheManager(object):
diff --git a/sdks/python/apache_beam/runners/interactive/cache_manager_test.py b/sdks/python/apache_beam/runners/interactive/cache_manager_test.py
index 3ad81b8..6ab51b4 100644
--- a/sdks/python/apache_beam/runners/interactive/cache_manager_test.py
+++ b/sdks/python/apache_beam/runners/interactive/cache_manager_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -41,7 +43,7 @@
tested with InteractiveRunner as a part of integration tests instead.
"""
- cache_format = None
+ cache_format = None # type: str
def setUp(self):
self.test_dir = tempfile.mkdtemp()
diff --git a/sdks/python/apache_beam/runners/interactive/caching/streaming_cache.py b/sdks/python/apache_beam/runners/interactive/caching/streaming_cache.py
index 2348c78..d401cd0 100644
--- a/sdks/python/apache_beam/runners/interactive/caching/streaming_cache.py
+++ b/sdks/python/apache_beam/runners/interactive/caching/streaming_cache.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from apache_beam.portability.api.beam_runner_api_pb2 import TestStreamPayload
diff --git a/sdks/python/apache_beam/runners/interactive/caching/streaming_cache_test.py b/sdks/python/apache_beam/runners/interactive/caching/streaming_cache_test.py
index 5b65992..1fb2d7c 100644
--- a/sdks/python/apache_beam/runners/interactive/caching/streaming_cache_test.py
+++ b/sdks/python/apache_beam/runners/interactive/caching/streaming_cache_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/interactive/display/display_manager.py b/sdks/python/apache_beam/runners/interactive/display/display_manager.py
index c6ead9d..5cd27f1 100644
--- a/sdks/python/apache_beam/runners/interactive/display/display_manager.py
+++ b/sdks/python/apache_beam/runners/interactive/display/display_manager.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -27,6 +29,7 @@
import collections
import threading
import time
+from typing import TYPE_CHECKING
from apache_beam.runners.interactive.display import interactive_pipeline_graph
@@ -37,11 +40,12 @@
# _display_progress defines how outputs are printed on the frontend.
_display_progress = ip_display
- def _formatter(string, pp, cycle): # pylint: disable=unused-argument
- pp.text(string)
- if get_ipython():
- plain = get_ipython().display_formatter.formatters['text/plain'] # pylint: disable=undefined-variable
- plain.for_type(str, _formatter)
+ if not TYPE_CHECKING:
+ def _formatter(string, pp, cycle): # pylint: disable=unused-argument
+ pp.text(string)
+ if get_ipython():
+ plain = get_ipython().display_formatter.formatters['text/plain'] # pylint: disable=undefined-variable
+ plain.for_type(str, _formatter)
except ImportError:
IPython = None
diff --git a/sdks/python/apache_beam/runners/interactive/display/interactive_pipeline_graph.py b/sdks/python/apache_beam/runners/interactive/display/interactive_pipeline_graph.py
index c482561..aec3b36 100644
--- a/sdks/python/apache_beam/runners/interactive/display/interactive_pipeline_graph.py
+++ b/sdks/python/apache_beam/runners/interactive/display/interactive_pipeline_graph.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
index bdd34ca..9f98f7c 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
Only works with Python 3.5+.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import base64
diff --git a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization_test.py b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization_test.py
index 8eefec7..28411e2 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization_test.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization_test.py
@@ -16,23 +16,26 @@
#
"""Tests for apache_beam.runners.interactive.display.pcoll_visualization."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
-import time
import unittest
import apache_beam as beam
from apache_beam.runners import runner
+from apache_beam.runners.interactive import interactive_beam as ib
from apache_beam.runners.interactive import interactive_environment as ie
+from apache_beam.runners.interactive import interactive_runner as ir
from apache_beam.runners.interactive.display import pcoll_visualization as pv
# TODO(BEAM-8288): clean up the work-around of nose tests using Python2 without
# unittest.mock module.
try:
- from unittest.mock import patch
+ from unittest.mock import patch, ANY
except ImportError:
- from mock import patch
+ from mock import patch, ANY
try:
import timeloop
@@ -47,6 +50,7 @@
class PCollectionVisualizationTest(unittest.TestCase):
def setUp(self):
+ ie.new_env()
# Allow unit test to run outside of ipython kernel since we don't test the
# frontend rendering in unit tests.
pv._pcoll_visualization_ready = True
@@ -54,9 +58,11 @@
# ipython kernel by forcefully setting notebook check to True.
ie.current_env()._is_in_notebook = True
- self._p = beam.Pipeline()
+ self._p = beam.Pipeline(ir.InteractiveRunner())
# pylint: disable=range-builtin-not-iterating
- self._pcoll = self._p | 'Create' >> beam.Create(range(1000))
+ self._pcoll = self._p | 'Create' >> beam.Create(range(5))
+ ib.watch(self)
+ self._p.run()
def test_raise_error_for_non_pcoll_input(self):
class Foo(object):
@@ -74,76 +80,61 @@
self.assertNotEqual(pv_1._overview_display_id, pv_2._overview_display_id)
self.assertNotEqual(pv_1._df_display_id, pv_2._df_display_id)
- @patch('apache_beam.runners.interactive.display.pcoll_visualization'
- '.PCollectionVisualization._to_element_list', lambda x: [1, 2, 3])
def test_one_shot_visualization_not_return_handle(self):
self.assertIsNone(pv.visualize(self._pcoll))
- def _mock_to_element_list(self):
- yield [1, 2, 3]
- yield [1, 2, 3, 4]
- yield [1, 2, 3, 4, 5]
- yield [1, 2, 3, 4, 5, 6]
- yield [1, 2, 3, 4, 5, 6, 7]
- yield [1, 2, 3, 4, 5, 6, 7, 8]
-
- @patch('apache_beam.runners.interactive.display.pcoll_visualization'
- '.PCollectionVisualization._to_element_list', _mock_to_element_list)
def test_dynamic_plotting_return_handle(self):
h = pv.visualize(self._pcoll, dynamic_plotting_interval=1)
self.assertIsInstance(h, timeloop.Timeloop)
h.stop()
@patch('apache_beam.runners.interactive.display.pcoll_visualization'
- '.PCollectionVisualization._to_element_list', _mock_to_element_list)
+ '.PCollectionVisualization._display_dive')
@patch('apache_beam.runners.interactive.display.pcoll_visualization'
- '.PCollectionVisualization.display_facets')
- def test_dynamic_plotting_update_same_display(self,
- mocked_display_facets):
- fake_pipeline_result = runner.PipelineResult(runner.PipelineState.RUNNING)
- ie.current_env().set_pipeline_result(self._p, fake_pipeline_result)
- # Starts async dynamic plotting that never ends in this test.
- h = pv.visualize(self._pcoll, dynamic_plotting_interval=0.001)
- # Blocking so the above async task can execute some iterations.
- time.sleep(1)
- # The first iteration doesn't provide updating_pv to display_facets.
- _, first_kwargs = mocked_display_facets.call_args_list[0]
- self.assertEqual(first_kwargs, {})
- # The following iterations use the same updating_pv to display_facets and so
- # on.
- _, second_kwargs = mocked_display_facets.call_args_list[1]
- updating_pv = second_kwargs['updating_pv']
- for call in mocked_display_facets.call_args_list[2:]:
- _, kwargs = call
- self.assertIs(kwargs['updating_pv'], updating_pv)
- h.stop()
+ '.PCollectionVisualization._display_overview')
+ @patch('apache_beam.runners.interactive.display.pcoll_visualization'
+ '.PCollectionVisualization._display_dataframe')
+ def test_dynamic_plotting_updates_same_display(self,
+ mocked_display_dataframe,
+ mocked_display_overview,
+ mocked_display_dive):
+ original_pcollection_visualization = pv.PCollectionVisualization(
+ self._pcoll)
+ # Dynamic plotting always creates a new PCollectionVisualization.
+ new_pcollection_visualization = pv.PCollectionVisualization(self._pcoll)
+ # The display uses ANY data the moment display is invoked, and updates
+ # web elements with ids fetched from the given updating_pv.
+ new_pcollection_visualization.display_facets(
+ updating_pv=original_pcollection_visualization)
+ mocked_display_dataframe.assert_called_once_with(
+ ANY, original_pcollection_visualization._df_display_id)
+ mocked_display_overview.assert_called_once_with(
+ ANY, original_pcollection_visualization._overview_display_id)
+ mocked_display_dive.assert_called_once_with(
+ ANY, original_pcollection_visualization._dive_display_id)
- @patch('apache_beam.runners.interactive.display.pcoll_visualization'
- '.PCollectionVisualization._to_element_list', _mock_to_element_list)
- @patch('timeloop.Timeloop.stop')
- def test_auto_stop_dynamic_plotting_when_job_is_terminated(
- self,
- mocked_timeloop):
+ def test_auto_stop_dynamic_plotting_when_job_is_terminated(self):
fake_pipeline_result = runner.PipelineResult(runner.PipelineState.RUNNING)
- ie.current_env().set_pipeline_result(self._p, fake_pipeline_result)
- # Starts non-stopping async dynamic plotting until the job is terminated.
- pv.visualize(self._pcoll, dynamic_plotting_interval=0.001)
- # Blocking so the above async task can execute some iterations.
- time.sleep(1)
- mocked_timeloop.assert_not_called()
+ ie.current_env().set_pipeline_result(
+ self._p,
+ fake_pipeline_result,
+ is_main_job=True)
+ # When job is running, the dynamic plotting will not be stopped.
+ self.assertFalse(ie.current_env().is_terminated(self._p))
+
fake_pipeline_result = runner.PipelineResult(runner.PipelineState.DONE)
- ie.current_env().set_pipeline_result(self._p, fake_pipeline_result)
- # Blocking so the above async task can execute some iterations.
- time.sleep(1)
- # "assert_called" is new in Python 3.6.
- mocked_timeloop.assert_called()
+ ie.current_env().set_pipeline_result(
+ self._p,
+ fake_pipeline_result,
+ is_main_job=True)
+ # When job is done, the dynamic plotting will be stopped.
+ self.assertTrue(ie.current_env().is_terminated(self._p))
- @patch('apache_beam.runners.interactive.display.pcoll_visualization'
- '.PCollectionVisualization._to_element_list', lambda x: [1, 2, 3])
@patch('pandas.DataFrame.sample')
def test_display_plain_text_when_kernel_has_no_frontend(self,
_mocked_sample):
- ie.new_env() # Resets the notebook check. Should be False in unit tests.
+ # Resets the notebook check to False.
+ ie.current_env()._is_in_notebook = False
self.assertIsNone(pv.visualize(self._pcoll))
_mocked_sample.assert_called_once()
diff --git a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
index 4d9a46e..b2d9b3a 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -27,6 +29,12 @@
import collections
import logging
import threading
+from typing import DefaultDict
+from typing import Dict
+from typing import Iterator
+from typing import List
+from typing import Tuple
+from typing import Union
import pydot
@@ -44,7 +52,7 @@
"""Creates a DOT representing the pipeline. Thread-safe. Runner agnostic."""
def __init__(self,
- pipeline,
+ pipeline, # type: Union[beam_runner_api_pb2.Pipeline, beam.Pipeline]
default_vertex_attrs={'shape': 'box'},
default_edge_attrs=None,
render_option=None):
@@ -67,7 +75,7 @@
rendered. See display.pipeline_graph_renderer for available options.
"""
self._lock = threading.Lock()
- self._graph = None
+ self._graph = None # type: pydot.Dot
self._pipeline_instrument = None
if isinstance(pipeline, beam.Pipeline):
self._pipeline_instrument = inst.PipelineInstrument(pipeline)
@@ -85,9 +93,9 @@
type(pipeline)))
# A dict from PCollection ID to a list of its consuming Transform IDs
- self._consumers = collections.defaultdict(list)
+ self._consumers = collections.defaultdict(list) # type: DefaultDict[str, List[str]]
# A dict from PCollection ID to its producing Transform ID
- self._producers = {}
+ self._producers = {} # type: Dict[str, str]
for transform_id, transform_proto in self._top_level_transforms():
for pcoll_id in transform_proto.inputs.values():
@@ -110,6 +118,7 @@
self._renderer = pipeline_graph_renderer.get_renderer(render_option)
def get_dot(self):
+ # type: () -> str
return self._get_graph().to_string()
def display_graph(self):
@@ -124,6 +133,7 @@
'pipeline graph.')
def _top_level_transforms(self):
+ # type: () -> Iterator[Tuple[str, beam_runner_api_pb2.PTransform]]
"""Yields all top level PTransforms (subtransforms of the root PTransform).
Yields: (str, PTransform proto) ID, proto pair of top level PTransforms.
diff --git a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py
index 2df5c61..5bc637e 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -27,25 +29,33 @@
import abc
import os
import subprocess
+from typing import TYPE_CHECKING
+from typing import Optional
+from typing import Type
from future.utils import with_metaclass
from apache_beam.utils.plugin import BeamPlugin
+if TYPE_CHECKING:
+ from apache_beam.runners.interactive.display.pipeline_graph import PipelineGraph
-class PipelineGraphRenderer(with_metaclass(abc.ABCMeta, BeamPlugin)):
+
+class PipelineGraphRenderer(with_metaclass(abc.ABCMeta, BeamPlugin)): # type: ignore[misc]
"""Abstract class for renderers, who decide how pipeline graphs are rendered.
"""
@classmethod
@abc.abstractmethod
def option(cls):
+ # type: () -> str
"""The corresponding rendering option for the renderer.
"""
raise NotImplementedError
@abc.abstractmethod
def render_pipeline_graph(self, pipeline_graph):
+ # type: (PipelineGraph) -> str
"""Renders the pipeline graph in HTML-compatible format.
Args:
@@ -63,9 +73,11 @@
@classmethod
def option(cls):
+ # type: () -> str
return 'mute'
def render_pipeline_graph(self, pipeline_graph):
+ # type: (PipelineGraph) -> str
return ''
@@ -75,9 +87,11 @@
@classmethod
def option(cls):
+ # type: () -> str
return 'text'
def render_pipeline_graph(self, pipeline_graph):
+ # type: (PipelineGraph) -> str
return pipeline_graph.get_dot()
@@ -91,13 +105,16 @@
@classmethod
def option(cls):
+ # type: () -> str
return 'graph'
def render_pipeline_graph(self, pipeline_graph):
+ # type: (PipelineGraph) -> str
return pipeline_graph._get_graph().create_svg().decode("utf-8") # pylint: disable=protected-access
def get_renderer(option=None):
+ # type: (Optional[str]) -> Type[PipelineGraphRenderer]
"""Get an instance of PipelineGraphRenderer given rendering option.
Args:
diff --git a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_test.py b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_test.py
index e73dbd6..780e70c 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_test.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_test.py
@@ -16,6 +16,8 @@
#
"""Tests for apache_beam.runners.interactive.display.pipeline_graph."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
@@ -91,24 +93,24 @@
'}\n'),
pipeline_graph.PipelineGraph(p).get_dot())
- @patch('IPython.get_ipython', mock_get_ipython)
- def test_get_dot_within_notebook(self):
+ @patch('IPython.get_ipython', new_callable=mock_get_ipython)
+ def test_get_dot_within_notebook(self, cell):
# Assume a mocked ipython kernel and notebook frontend have been set up.
ie.current_env()._is_in_ipython = True
ie.current_env()._is_in_notebook = True
- with mock_get_ipython(): # Cell 1
+ with cell: # Cell 1
p = beam.Pipeline(ir.InteractiveRunner())
# Immediately track this local pipeline so that ipython prompts when
# applying transforms will be tracked and used for labels.
ib.watch(locals())
- with mock_get_ipython(): # Cell 2
+ with cell: # Cell 2
init_pcoll = p | 'Init' >> beam.Create(range(10))
- with mock_get_ipython(): # Cell 3
+ with cell: # Cell 3
squares = init_pcoll | 'Square' >> beam.Map(lambda x: x * x)
- with mock_get_ipython(): # Cell 4
+ with cell: # Cell 4
cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x ** 3)
# Tracks all PCollections defined so far.
diff --git a/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Example.ipynb b/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Example.ipynb
index b461a26..bc54a99 100644
--- a/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Example.ipynb
+++ b/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Example.ipynb
@@ -28,7 +28,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -38,100 +38,9 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
- "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
- " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
- "<!-- Generated by graphviz version 2.43.0 (0)\n",
- " -->\n",
- "<!-- Title: G Pages: 1 -->\n",
- "<svg width=\"208pt\" height=\"349pt\"\n",
- " viewBox=\"0.00 0.00 208.43 349.26\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
- "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 345.26)\">\n",
- "<title>G</title>\n",
- "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-345.26 204.43,-345.26 204.43,4 -4,4\"/>\n",
- "<!-- Cell 2: Create -->\n",
- "<g id=\"node1\" class=\"node\">\n",
- "<title>Cell 2: Create</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"148.8,-341.26 55.3,-341.26 55.3,-305.26 148.8,-305.26 148.8,-341.26\"/>\n",
- "<text text-anchor=\"middle\" x=\"102.05\" y=\"-319.06\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 2: Create</text>\n",
- "</g>\n",
- "<!-- init_pcoll -->\n",
- "<g id=\"node2\" class=\"node\">\n",
- "<title>init_pcoll</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"102.05\" cy=\"-225.26\" rx=\"44.01\" ry=\"44.01\"/>\n",
- "<text text-anchor=\"middle\" x=\"102.05\" y=\"-221.06\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">init_pcoll</text>\n",
- "</g>\n",
- "<!-- Cell 2: Create->init_pcoll -->\n",
- "<g id=\"edge1\" class=\"edge\">\n",
- "<title>Cell 2: Create->init_pcoll</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M102.05,-305.1C102.05,-297.81 102.05,-288.88 102.05,-279.68\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"105.55,-279.65 102.05,-269.65 98.55,-279.65 105.55,-279.65\"/>\n",
- "</g>\n",
- "<!-- Cell 2: Square -->\n",
- "<g id=\"node3\" class=\"node\">\n",
- "<title>Cell 2: Square</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"96.15,-145.26 -0.05,-145.26 -0.05,-109.26 96.15,-109.26 96.15,-145.26\"/>\n",
- "<text text-anchor=\"middle\" x=\"48.05\" y=\"-123.06\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 2: Square</text>\n",
- "</g>\n",
- "<!-- init_pcoll->Cell 2: Square -->\n",
- "<g id=\"edge2\" class=\"edge\">\n",
- "<title>init_pcoll->Cell 2: Square</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M80.86,-186.59C74.77,-175.77 68.27,-164.2 62.67,-154.25\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"65.57,-152.28 57.62,-145.28 59.47,-155.71 65.57,-152.28\"/>\n",
- "</g>\n",
- "<!-- Cell 2: Cube -->\n",
- "<g id=\"node5\" class=\"node\">\n",
- "<title>Cell 2: Cube</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"200.32,-145.26 113.78,-145.26 113.78,-109.26 200.32,-109.26 200.32,-145.26\"/>\n",
- "<text text-anchor=\"middle\" x=\"157.05\" y=\"-123.06\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 2: Cube</text>\n",
- "</g>\n",
- "<!-- init_pcoll->Cell 2: Cube -->\n",
- "<g id=\"edge3\" class=\"edge\">\n",
- "<title>init_pcoll->Cell 2: Cube</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M123.63,-186.59C129.83,-175.77 136.46,-164.2 142.16,-154.25\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"145.36,-155.69 147.3,-145.28 139.29,-152.21 145.36,-155.69\"/>\n",
- "</g>\n",
- "<!-- squares -->\n",
- "<g id=\"node4\" class=\"node\">\n",
- "<title>squares</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"48.05\" cy=\"-36.63\" rx=\"36.76\" ry=\"36.76\"/>\n",
- "<text text-anchor=\"middle\" x=\"48.05\" y=\"-32.43\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">squares</text>\n",
- "</g>\n",
- "<!-- Cell 2: Square->squares -->\n",
- "<g id=\"edge4\" class=\"edge\">\n",
- "<title>Cell 2: Square->squares</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M48.05,-109.17C48.05,-101.79 48.05,-92.76 48.05,-83.63\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"51.55,-83.37 48.05,-73.37 44.55,-83.37 51.55,-83.37\"/>\n",
- "</g>\n",
- "<!-- cubes -->\n",
- "<g id=\"node6\" class=\"node\">\n",
- "<title>cubes</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"157.05\" cy=\"-36.63\" rx=\"30.49\" ry=\"30.49\"/>\n",
- "<text text-anchor=\"middle\" x=\"157.05\" y=\"-32.43\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">cubes</text>\n",
- "</g>\n",
- "<!-- Cell 2: Cube->cubes -->\n",
- "<g id=\"edge5\" class=\"edge\">\n",
- "<title>Cell 2: Cube->cubes</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M157.05,-109.17C157.05,-100 157.05,-88.3 157.05,-77.03\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"160.55,-76.88 157.05,-66.88 153.55,-76.88 160.55,-76.88\"/>\n",
- "</g>\n",
- "</g>\n",
- "</svg>\n"
- ],
- "text/plain": [
- "<IPython.core.display.HTML object>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"p = beam.Pipeline(interactive_runner.InteractiveRunner())\n",
"init_pcoll = p | beam.Create(range(10))\n",
@@ -143,36 +52,9 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: matplotlib in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (3.1.1)\n",
- "Requirement already satisfied: cycler>=0.10 in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (from matplotlib) (0.10.0)\n",
- "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (from matplotlib) (1.1.0)\n",
- "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (from matplotlib) (2.4.2)\n",
- "Requirement already satisfied: python-dateutil>=2.1 in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (from matplotlib) (2.8.0)\n",
- "Requirement already satisfied: numpy>=1.11 in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (from matplotlib) (1.17.3)\n",
- "Requirement already satisfied: six in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (from cycler>=0.10->matplotlib) (1.12.0)\n",
- "Requirement already satisfied: setuptools in /Users/ningk/workspace/p3_ib_venv/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib) (41.6.0)\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAafElEQVR4nO3df3DU9b3v8ec7AcxZ9IpAjqMEEhwRFWoEouLVHlGutwhOsePR6ixKHaa5tXqoV+uRnnTu7Z0hMzLTasv0HDo7lVO47LVYjg60Oh4VdOpvGvAnoDUKgSBKRI2VlPIj7/vH9xvYYCCbZHe/y3dfj5nMfr/v73d339kJr/3w2e9+v+buiIhIvJRF3YCIiOSewl1EJIYU7iIiMaRwFxGJIYW7iEgMDYq6AYCRI0d6TU1N1G2IiJxQNmzY8Im7V/a0rSjCvaamhqampqjbEBE5oZhZy7G2aVpGRCSGFO4iIjGkcBcRiaGimHPvyYEDB2htbWXfvn1Rt1L0KioqqKqqYvDgwVG3IiJFomjDvbW1lVNOOYWamhrMLOp2ipa7s2fPHlpbWxk7dmzU7YhIkSjaaZl9+/YxYsQIBXsvzIwRI0bofzgi0k3RhjugYM+SXieRE0M6DTU1UFYW3KbT+Xuuop2WERGJk3Qa6uuhoyNYb2kJ1gGSydw/X1GP3EVE4qKh4Uiwd+noCOr5oHAvEocOHYq6BRHJo+3b+1YfqPiEex4ms/bu3cusWbOora1l4sSJrFy5kieffJJzzz2XyZMnM3/+fK699loAfvKTn/DTn/708H0nTpzItm3bALjuuuuYMmUKEyZMIJVKHd7n5JNP5p577qG2tpaXX36ZDRs2cMUVVzBlyhS+8Y1vsGvXLgAWL17M+eefzwUXXMBNN9004N9LRApvzJi+1QcqHnPueZrMevLJJznzzDN5/PHHAWhvb2fixImsW7eOs88+m29/+9tZPc7SpUsZPnw4f/3rX7nooou4/vrrGTFiBHv37uWSSy7hZz/7GQcOHOCKK65g9erVVFZWsnLlShoaGli6dCn3338/W7du5aSTTuLzzz/v9+8jItFpbOweUwCJRFDPh3iM3PM0mfW1r32Np59+mvvuu4/nn3+erVu3MnbsWMaNG4eZMWfOnKweZ/HixdTW1jJ16lR27NjBe++9B0B5eTnXX389AO+++y5vv/02V199NRdeeCELFy6ktbUVgAsuuIBkMsmKFSsYNCge78cipSaZhFQKqqvBLLhNpfLzYSpkMXI3s/HAyozSWcD/ApaH9RpgG3Cju39mwXF5vwBmAh3Ad9x9Y27bPkqeJrPOOeccNm7cyBNPPMGPf/xjpk+ffsx9Bw0aRGdn5+H1ruPOn3vuOZ555hlefvllEokE06ZNO7ytoqKC8vJyIPgy0oQJE3j55Ze/8tiPP/44f/zjH/n9739PY2Mjb731lkJe5ASUTOYvzI/W68jd3d919wvd/UJgCkFgPwYsANa6+zhgbbgOcA0wLvypB5bko/Fu8jSZ9eGHH5JIJJgzZw733nsvL730Etu2beP9998H4OGHHz68b01NDRs3Bu9hGzduZOvWrUAwlXPaaaeRSCR45513eOWVV3p8rvHjx9PW1nY43A8cOMCmTZvo7Oxkx44dXHnllSxatIj29na+/PLLAf1eIhJ/fR3+TQfed/cWM5sNTAvry4DngPuA2cByd3fgFTMbZmZnuPuuHPX8VXmazHrrrbe49957KSsrY/DgwSxZsoRPPvmEWbNmkUgk+PrXv85f/vIXAK6//nqWL1/OhAkTuOSSSzjnnHMAmDFjBr/61a8477zzGD9+PFOnTu3xuYYMGcKqVauYP38+7e3tHDx4kLvuuotzzjmHOXPm0N7ejrszf/58hg0bNqDfS0RKgLtn/QMsBe4Mlz/PqFvXOvAH4PKMbWuBuh4eqx5oAprGjBnjR9u8efNXase1YoV7dbW7WXC7YkXf7t8Pzz77rM+aNSvvz5ONPr9eInLCA5r8GHmd9cjdzIYA3wR+1MMbhJuZ9/FNJQWkAOrq6vp03x4VcjJLRKTI9WVa5hpgo7t/HK5/3DXdYmZnALvD+k5gdMb9qsJa7EybNo1p06ZF3YaIyFf05VDIm4GHM9bXAHPD5bnA6oz6rRaYCrR7PufbRUTkK7IauZvZUOBq4H9klO8HHjGzeUALcGNYf4LgMMhmgiNrbstZtyIikpWswt3d9wIjjqrtITh65uh9HbgjJ92JiEi/xOMbqiIi0o3CPUeee+65wycRExGJmsJdRCSGYhPu+bp81fLly7nggguora3llltu4Tvf+Q6rVq06vP3kk08+vPzFF18wa9Ysxo8fz/e+973D55p56qmnuPTSS5k8eTI33HDD4dMHLFiw4PCpfH/4wx/mpmEREWJyyt98Xb5q06ZNLFy4kJdeeomRI0fy6aefcvfddx9z//Xr17N582aqq6uZMWMGjz76KNOmTWPhwoU888wzDB06lEWLFvHAAw9wxx138Nhjj/HOO+9gZjqVr4jkVCzC/Xhn/B1IuK9bt44bbriBkSNHAjB8+PDj7n/xxRdz1llnAXDzzTfzwgsvUFFRwebNm7nssssA2L9/P5deeimnnnoqFRUVzJs3j2uvvVbz9SKSU7EI90Jevirz1L6dnZ3s37//8LbgbMd0W3d3rr766m5nkOyyfv161q5dy6pVq/jlL3/JunXrct+wiJSkWMy55+vyVVdddRW/+93v2LNnDwCffvopNTU1bNiwAYA1a9Zw4MCBw/uvX7+erVu30tnZycqVK7n88suZOnUqL774Is3NzUBw6b4///nPfPnll7S3tzNz5kwefPBB3njjjYE1KyKSIRYj93xdvmrChAk0NDRwxRVXUF5ezqRJk1i0aBGzZ8+mtraWGTNmMHTo0MP7X3TRRdx55500Nzdz5ZVX8q1vfYuysjJ+85vfcPPNN/O3v/0NgIULF3LKKacwe/Zs9u3bh7vzwAMPDKxZEZEMFnyhNFp1dXXe1NTUrbZlyxbOO++8rB8jnQ7m2LdvD0bsjY2ldZLIvr5eInLiM7MN7l7X07ZYjNxBZ/wVEckUizl3ERHprqjDvRimjE4Eep1E5GhFG+4VFRXs2bNHwdULd2fPnj1UVFRE3YqIFJGinXOvqqqitbWVtra2qFspehUVFVRVVUXdhogUkaIN98GDBzN27Nio2xAROSEV7bSMiIj0n8JdRCSGsgp3MxtmZqvM7B0z22Jml5rZcDN72szeC29PC/c1M1tsZs1m9qaZTc7vryAiIkfLduT+C+BJdz8XqAW2AAuAte4+DlgbrgNcA4wLf+qBJTntWEREetVruJvZqcA/AA8BuPt+d/8cmA0sC3dbBlwXLs8GlnvgFWCYmZ2R885FROSYshm5jwXagH83s9fM7NdmNhQ43d13hft8BJweLo8CdmTcvzWsdWNm9WbWZGZNOtxRRCS3sgn3QcBkYIm7TwL2cmQKBgAPvmnUp28buXvK3evcva6ysrIvdxURkV5kE+6tQKu7vxquryII+4+7plvC293h9p3A6Iz7V4U1EREpkF7D3d0/AnaY2fiwNB3YDKwB5oa1ucDqcHkNcGt41MxUoD1j+kZERAog22+o/hOQNrMhwAfAbQRvDI+Y2TygBbgx3PcJYCbQDHSE+4qISAFlFe7u/jrQ0wnhp/ewrwN3DLAvEREZAH1DVUQkhhTuIiIxpHAXEYkhhbuISAwp3EVEYkjhLiISQwp3EZEYUriLiMSQwl1EJIYU7iIiMaRwFxGJIYW7iEgMKdxFRGJI4S4iEkMKdxGRGFK4i0hJSKehpgbKyoLbdDrqjvIr2ysxiYicsNJpqK+Hjo5gvaUlWAdIJqPrK580cheR2GtoOBLsXTo6gnpcZRXuZrbNzN4ys9fNrCmsDTezp83svfD2tLBuZrbYzJrN7E0zm5zPX0BEpDfbt/etHgd9Gblf6e4XunvXtVQXAGvdfRywNlwHuAYYF/7UA0ty1ayISH+MGdO3ehwMZFpmNrAsXF4GXJdRX+6BV4BhZnbGAJ5HRGRAGhshkeheSySCelxlG+4OPGVmG8ws/BiC0919V7j8EXB6uDwK2JFx39aw1o2Z1ZtZk5k1tbW19aN1EZHsJJOQSkF1NZgFt6lUfD9MheyPlrnc3Xea2d8DT5vZO5kb3d3NzPvyxO6eAlIAdXV1fbqviEhfJZPxDvOjZTVyd/ed4e1u4DHgYuDjrumW8HZ3uPtOYHTG3avCmoiIFEiv4W5mQ83slK5l4L8DbwNrgLnhbnOB1eHyGuDW8KiZqUB7xvSNiIgUQDbTMqcDj5lZ1/7/z92fNLM/AY+Y2TygBbgx3P8JYCbQDHQAt+W8axEROa5ew93dPwBqe6jvAab3UHfgjpx0JyIi/aJvqIqIxJDCXUQkhhTuIiIxpHAXEYkhhbuISAwp3EVEYkjhLiISQwp3EZEYUriLiMSQwl1EJIYU7iIiMaRwFxGJIYW7iEgMKdxFRGJI4S4iEkMKdxGRGFK4i4jEkMJdRCSGsg53Mys3s9fM7A/h+lgze9XMms1spZkNCesnhevN4faa/LQuIiLH0peR+w+ALRnri4AH3f1s4DNgXlifB3wW1h8M9xMRkQLKKtzNrAqYBfw6XDfgKmBVuMsy4LpweXa4Trh9eri/iIgUSLYj958D/wx0husjgM/d/WC43gqMCpdHATsAwu3t4f7dmFm9mTWZWVNbW1s/2xcRkZ70Gu5mdi2w29035PKJ3T3l7nXuXldZWZnLhxYRKXmDstjnMuCbZjYTqAD+C/ALYJiZDQpH51XAznD/ncBooNXMBgGnAnty3rmIiBxTryN3d/+Ru1e5ew1wE7DO3ZPAs8A/hrvNBVaHy2vCdcLt69zdc9q1iIgc10COc78PuNvMmgnm1B8K6w8BI8L63cCCgbUoIiJ9lc20zGHu/hzwXLj8AXBxD/vsA27IQW8iItJP+oaqiEgMKdxFRGJI4S4iEkMKdxGRGFK4i4jEkMJdRCSGFO4iIjGkcBeRvEqnoaYGysqC23Q66o5KQ5++xCQi0hfpNNTXQ0dHsN7SEqwDJJPR9VUKNHIXkbxpaDgS7F06OoK65JfCXUTyZvv2vtUldxTuIpI3Y8b0rS65o3AXkbxpbIREonstkQjqkl8KdxHJm2QSUimorgaz4DaV0oephaCjZUQkr5JJhXkUNHIXEYkhhbuISAwp3EVEYqjXcDezCjNbb2ZvmNkmM/s/YX2smb1qZs1mttLMhoT1k8L15nB7TX5/BREROVo2I/e/AVe5ey1wITDDzKYCi4AH3f1s4DNgXrj/POCzsP5guJ+IiBRQr+HugS/D1cHhjwNXAavC+jLgunB5drhOuH26mVnOOhYRkV5lNeduZuVm9jqwG3gaeB/43N0Phru0AqPC5VHADoBwezswIpdNi4jI8WUV7u5+yN0vBKqAi4FzB/rEZlZvZk1m1tTW1jbQhxMRkQx9OlrG3T8HngUuBYaZWdeXoKqAneHyTmA0QLj9VGBPD4+Vcvc6d6+rrKzsZ/siItKTbI6WqTSzYeHy3wFXA1sIQv4fw93mAqvD5TXhOuH2de7uuWxaRESOL5vTD5wBLDOzcoI3g0fc/Q9mthn4rZktBF4DHgr3fwj4v2bWDHwK3JSHvkVE5Dh6DXd3fxOY1EP9A4L596Pr+4AbctKdiIj0i76hKiISQwp3EZEYUriLiMSQwl1EJIYU7iIiMaRwFxGJIYW7iEgMKdxFRGJI4S4iEkMKdxGRGFK4i4jEkMJdRCSGFO4iIjGkcBcRiSGFu0iMpdNQUwNlZcFtOh11R1Io2VysQ0ROQOk01NdDR0ew3tISrAMkk9H1JYWhkbtITDU0HAn2Lh0dQV3iT+EuElPbt/etLvGSzQWyR5vZs2a22cw2mdkPwvpwM3vazN4Lb08L62Zmi82s2czeNLPJ+f4lROSrxozpW13iJZuR+0HgHnc/H5gK3GFm5wMLgLXuPg5YG64DXAOMC3/qgSU571pEetXYCIlE91oiEdQl/noNd3ff5e4bw+W/AFuAUcBsYFm42zLgunB5NrDcA68Aw8zsjJx3LiLHlUxCKgXV1WAW3KZS+jC1VPTpaBkzqwEmAa8Cp7v7rnDTR8Dp4fIoYEfG3VrD2i5EpKCSSYV5qcr6A1UzOxn4D+Aud/8ic5u7O+B9eWIzqzezJjNramtr68tdRUSkF1mFu5kNJgj2tLs/GpY/7ppuCW93h/WdwOiMu1eFtW7cPeXude5eV1lZ2d/+RUSkB9kcLWPAQ8AWd38gY9MaYG64PBdYnVG/NTxqZirQnjF9IyIiBZDNnPtlwC3AW2b2elj7F+B+4BEzmwe0ADeG254AZgLNQAdwW047FhGRXvUa7u7+AmDH2Dy9h/0duGOAfYmIyADoG6oiIjGkcBcRiSGFu4hIDCncRURiSOEuIhJDCncRkRhSuIuIxJDCXUQkhhTuIiIxpHAXEYkhhbuISAwp3EVEYkjhLpIn6TTU1EBZWXCbTkfdkZSSPl1mT0Syk05DfT10dATrLS3BOuiyd1IYGrmL5EFDw5Fg79LREdRFCkHhLpIH27f3rS6Sawp3kTwYM6ZvdZFcU7iL5EFjIyQS3WuJRFAXKYRsLpC91Mx2m9nbGbXhZva0mb0X3p4W1s3MFptZs5m9aWaT89m8SLFKJiGVgupqMAtuUyl9mCqFk83I/TfAjKNqC4C17j4OWBuuA1wDjAt/6oEluWlT5MSTTMK2bdDZGdwq2KWQeg13d/8j8OlR5dnAsnB5GXBdRn25B14BhpnZGblqVkREstPfOffT3X1XuPwRcHq4PArYkbFfa1gTEZECGvAHqu7ugPf1fmZWb2ZNZtbU1tY20DZERCRDf8P9467plvB2d1jfCYzO2K8qrH2Fu6fcvc7d6yorK/vZhoiI9KS/4b4GmBsuzwVWZ9RvDY+amQq0Z0zfiIiUtgKecKjXc8uY2cPANGCkmbUC/xu4H3jEzOYBLcCN4e5PADOBZqADuC0PPYuInHgKfMIhC6bMo1VXV+dNTU1RtyEikj81NUGgH626OjhWth/MbIO71/W0Td9QFREphAKfcEjhLrGkc6nLV0T9R1HgEw4p3CV2uqY2W1rA/cjUpgK+hBXDH0WBTzikOXeJnTxMbcqJrlj+KNLp4KT+27cHI/bGxgF9mHq8OXeFu8ROWVkwODuaWXCeFylBMf2j0AeqUlJ0LvUiE/VcN5TkH4XCXWJH51IvIsUw1w0l+UehcJfY0bnUi0ixXEy2BP8oNOcuIvkT07nuYqE5d5FSFfV8dwnOdRcLhbtIXBXDfHcJznUXC4W75FzUg0UJFcN8dwnOdRcLhbvkVDEMFotG1O9yBT6XyTHpYrKRULhLThXDYLEoFMO7nOa7S5rCXXKqWAaLkY+ai+FdTvPdJU3hLjlVFIPFYhg1F8O7nOa7S5rCPWaiHrA2NkJiyMFutcSQg4UdLBbDqLko3uXQfHcJU7jHSDEMWJOkSfl3qWYbRifVbCPl3yVJiY2aNSUiUXP3nP8AM4B3Ca6luqC3/adMmeInuhW3P+/V5TvcOOTV5Tt8xe3PF7yH6mr3INa7/1RXl1gTxdCDu/uKFcFzmgW3K1YU9vkl9oAmP1YOH2tDf3+AcuB94CxgCPAGcP7x7jPQcI86WFfc/rwn+LJbjiT4suB9GJ09ZprRWcAmrOdgNStcDytWuCcS3Z8/kVC4SuwcL9zzMS1zMdDs7h+4+37gt8DsPDwPAOnvv0D9kkm0HKrCKaPlUBX1SyaR/v4L+XrKr2hI1dDB0G61DobSkKopWA8AY8p39qmenyaKYK5ZHySK5CXcRwE7MtZbw1peFEOwbj90Zp/q+dJ46D4S7O1WS7CXxkP3FbCJIplr1geJUuIi+0DVzOrNrMnMmtra2vr9OMUQrGPKP+xTPV+S1S+S4qgPM/kuyeoXC9iERs0ixSAf4b4TGJ2xXhXWunH3lLvXuXtdZWVlv5+sGIK1sX5bzyPm+m0F6yFopJFkYjXbGEsn5WxjLMnEao2aRUpQPsL9T8A4MxtrZkOAm4A1eXgeoDiCNflvl5O6/TWqy1uDEXN5K6nbXyP5b5cXrIegEY2aRSSQl4t1mNlM4OcER84sdffjDh0HerGO9PdfoCFVw/ZDZzKm/EMa67cVPlhFRArseBfr0JWYREROULoSk4hIiVG4i4jEkMJdRCSGFO4iIjGkcBcRiaGiOFrGzNqAlhw81Ejgkxw8ThzotQjodQjodTgiTq9Ftbv3+C3Qogj3XDGzpmMdFlRq9FoE9DoE9DocUSqvhaZlRERiSOEuIhJDcQv3VNQNFBG9FgG9DgG9DkeUxGsRqzl3EREJxG3kLiIiKNxFRGIpNuFuZjPM7F0zazazBVH3EwUzG21mz5rZZjPbZGY/iLqnKJlZuZm9ZmZ/iLqXKJnZMDNbZWbvmNkWM7s06p6iYGb/M/x38baZPWxmFVH3lE+xCHczKwf+FbgGOB+42czOj7arSBwE7nH384GpwB0l+jp0+QGwJeomisAvgCfd/VyglhJ8TcxsFDAfqHP3iQTXmrgp2q7yKxbhDlwMNLv7B+6+H/gtMDvingrO3Xe5+8Zw+S8E/4jzdnHyYmZmVcAs4NdR9xIlMzsV+AfgIQB33+/un0fbVWQGAX9nZoOABFDYixwXWFzCfRSwI2O9lRINtS5mVgNMAl6NtpPI/Bz4Z6Az6kYiNhZoA/49nKL6tZkNjbqpQnP3ncBPge3ALqDd3Z+Ktqv8iku4SwYzOxn4D+Aud/8i6n4KzcyuBXa7+4aoeykCg4DJwBJ3nwTsBUruMykzO43gf/NjgTOBoWY2J9qu8isu4b4TGJ2xXhXWSo6ZDSYI9rS7Pxp1PxG5DPimmW0jmKK7ysxWRNtSZFqBVnfv+h/cKoKwLzX/Ddjq7m3ufgB4FPivEfeUV3EJ9z8B48xsrJkNIfigZE3EPRWcmRnB3OoWd38g6n6i4u4/cvcqd68h+FtY5+6xHqUdi7t/BOwws/FhaTqwOcKWorIdmGpmifDfyXRi/sHyoKgbyAV3P2hmdwL/SfAp+FJ33xRxW1G4DLgFeMvMXg9r/+LuT0TYk0Tvn4B0OPD5ALgt4n4Kzt1fNbNVwEaCo8peI+anIdDpB0REYigu0zIiIpJB4S4iEkMKdxGRGFK4i4jEkMJdRCSGFO4iIjGkcBcRiaH/DwETy1MYnSGJAAAAAElFTkSuQmCC\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"init_list = list(range(10))\n",
"squares_list = list(result.get(squares))\n",
@@ -193,7 +75,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -216,7 +98,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {
"scrolled": true
},
@@ -228,159 +110,15 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
- "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
- " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
- "<!-- Generated by graphviz version 2.43.0 (0)\n",
- " -->\n",
- "<!-- Title: G Pages: 1 -->\n",
- "<svg width=\"308pt\" height=\"587pt\"\n",
- " viewBox=\"0.00 0.00 307.55 587.46\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
- "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 583.46)\">\n",
- "<title>G</title>\n",
- "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-583.46 303.55,-583.46 303.55,4 -4,4\"/>\n",
- "<!-- Cell 2: Create -->\n",
- "<g id=\"node1\" class=\"node\">\n",
- "<title>Cell 2: Create</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"210.36,-579.46 116.85,-579.46 116.85,-543.46 210.36,-543.46 210.36,-579.46\"/>\n",
- "<text text-anchor=\"middle\" x=\"163.61\" y=\"-557.26\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 2: Create</text>\n",
- "</g>\n",
- "<!-- init_pcoll -->\n",
- "<g id=\"node2\" class=\"node\">\n",
- "<title>init_pcoll</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"163.61\" cy=\"-463.46\" rx=\"44.01\" ry=\"44.01\"/>\n",
- "<text text-anchor=\"middle\" x=\"163.61\" y=\"-459.26\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">init_pcoll</text>\n",
- "</g>\n",
- "<!-- Cell 2: Create->init_pcoll -->\n",
- "<g id=\"edge1\" class=\"edge\">\n",
- "<title>Cell 2: Create->init_pcoll</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M163.61,-543.3C163.61,-536.01 163.61,-527.08 163.61,-517.88\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"167.11,-517.85 163.61,-507.85 160.11,-517.85 167.11,-517.85\"/>\n",
- "</g>\n",
- "<!-- Cell 2: Square -->\n",
- "<g id=\"node3\" class=\"node\">\n",
- "<title>Cell 2: Square</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"157.7,-383.45 61.51,-383.45 61.51,-347.45 157.7,-347.45 157.7,-383.45\"/>\n",
- "<text text-anchor=\"middle\" x=\"109.61\" y=\"-361.25\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 2: Square</text>\n",
- "</g>\n",
- "<!-- init_pcoll->Cell 2: Square -->\n",
- "<g id=\"edge2\" class=\"edge\">\n",
- "<title>init_pcoll->Cell 2: Square</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M142.42,-424.79C136.33,-413.96 129.82,-402.4 124.23,-392.44\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"127.13,-390.47 119.18,-383.47 121.03,-393.9 127.13,-390.47\"/>\n",
- "</g>\n",
- "<!-- Cell 2: Cube -->\n",
- "<g id=\"node5\" class=\"node\">\n",
- "<title>Cell 2: Cube</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"267.88,-383.45 181.33,-383.45 181.33,-347.45 267.88,-347.45 267.88,-383.45\"/>\n",
- "<text text-anchor=\"middle\" x=\"224.61\" y=\"-361.25\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 2: Cube</text>\n",
- "</g>\n",
- "<!-- init_pcoll->Cell 2: Cube -->\n",
- "<g id=\"edge3\" class=\"edge\">\n",
- "<title>init_pcoll->Cell 2: Cube</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M186.84,-425.89C193.98,-414.66 201.68,-402.54 208.25,-392.2\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"211.28,-393.95 213.69,-383.63 205.38,-390.19 211.28,-393.95\"/>\n",
- "</g>\n",
- "<!-- squares -->\n",
- "<g id=\"node4\" class=\"node\">\n",
- "<title>squares</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"97.61\" cy=\"-274.82\" rx=\"36.76\" ry=\"36.76\"/>\n",
- "<text text-anchor=\"middle\" x=\"97.61\" y=\"-270.62\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">squares</text>\n",
- "</g>\n",
- "<!-- Cell 2: Square->squares -->\n",
- "<g id=\"edge4\" class=\"edge\">\n",
- "<title>Cell 2: Square->squares</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M107.29,-347.36C106.27,-339.83 105.02,-330.58 103.76,-321.26\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"107.21,-320.64 102.4,-311.2 100.27,-321.58 107.21,-320.64\"/>\n",
- "</g>\n",
- "<!-- Cell 5: Average Square -->\n",
- "<g id=\"node7\" class=\"node\">\n",
- "<title>Cell 5: Average Square</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"145.32,-202.2 -0.11,-202.2 -0.11,-166.2 145.32,-166.2 145.32,-202.2\"/>\n",
- "<text text-anchor=\"middle\" x=\"72.61\" y=\"-180\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 5: Average Square</text>\n",
- "</g>\n",
- "<!-- squares->Cell 5: Average Square -->\n",
- "<g id=\"edge5\" class=\"edge\">\n",
- "<title>squares->Cell 5: Average Square</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M87.87,-239.3C85.33,-230.32 82.64,-220.77 80.23,-212.23\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"83.55,-211.11 77.47,-202.43 76.81,-213.01 83.55,-211.11\"/>\n",
- "</g>\n",
- "<!-- cubes -->\n",
- "<g id=\"node6\" class=\"node\">\n",
- "<title>cubes</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"227.61\" cy=\"-274.82\" rx=\"30.49\" ry=\"30.49\"/>\n",
- "<text text-anchor=\"middle\" x=\"227.61\" y=\"-270.62\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">cubes</text>\n",
- "</g>\n",
- "<!-- Cell 2: Cube->cubes -->\n",
- "<g id=\"edge6\" class=\"edge\">\n",
- "<title>Cell 2: Cube->cubes</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M225.18,-347.36C225.5,-338.2 225.89,-326.5 226.27,-315.22\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"229.78,-315.19 226.62,-305.07 222.78,-314.95 229.78,-315.19\"/>\n",
- "</g>\n",
- "<!-- Cell 5: Average Cube -->\n",
- "<g id=\"node9\" class=\"node\">\n",
- "<title>Cell 5: Average Cube</title>\n",
- "<polygon fill=\"none\" stroke=\"blue\" points=\"299.5,-202.2 163.72,-202.2 163.72,-166.2 299.5,-166.2 299.5,-202.2\"/>\n",
- "<text text-anchor=\"middle\" x=\"231.61\" y=\"-180\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cell 5: Average Cube</text>\n",
- "</g>\n",
- "<!-- cubes->Cell 5: Average Cube -->\n",
- "<g id=\"edge7\" class=\"edge\">\n",
- "<title>cubes->Cell 5: Average Cube</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M228.94,-244.31C229.4,-234.11 229.91,-222.72 230.37,-212.68\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"233.87,-212.59 230.83,-202.44 226.88,-212.27 233.87,-212.59\"/>\n",
- "</g>\n",
- "<!-- average_square -->\n",
- "<g id=\"node8\" class=\"node\">\n",
- "<title>average_square</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"72.61\" cy=\"-65.1\" rx=\"65.2\" ry=\"65.2\"/>\n",
- "<text text-anchor=\"middle\" x=\"72.61\" y=\"-60.9\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">average_square</text>\n",
- "</g>\n",
- "<!-- Cell 5: Average Square->average_square -->\n",
- "<g id=\"edge8\" class=\"edge\">\n",
- "<title>Cell 5: Average Square->average_square</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M72.61,-166.17C72.61,-158.97 72.61,-150.04 72.61,-140.5\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"76.11,-140.39 72.61,-130.39 69.11,-140.39 76.11,-140.39\"/>\n",
- "</g>\n",
- "<!-- average_cube -->\n",
- "<g id=\"node10\" class=\"node\">\n",
- "<title>average_cube</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"231.61\" cy=\"-65.1\" rx=\"58.92\" ry=\"58.92\"/>\n",
- "<text text-anchor=\"middle\" x=\"231.61\" y=\"-60.9\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">average_cube</text>\n",
- "</g>\n",
- "<!-- Cell 5: Average Cube->average_cube -->\n",
- "<g id=\"edge9\" class=\"edge\">\n",
- "<title>Cell 5: Average Cube->average_cube</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M231.61,-166.17C231.61,-157.35 231.61,-145.94 231.61,-133.99\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"235.11,-133.88 231.61,-123.88 228.11,-133.88 235.11,-133.88\"/>\n",
- "</g>\n",
- "</g>\n",
- "</svg>\n"
- ],
- "text/plain": [
- "<IPython.core.display.HTML object>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"result = p.run()"
]
}
],
"metadata": {
- "kernelspec": {
- "display_name": "Python3 (ib_venv)",
- "language": "python",
- "name": "p3_ib_venv"
- },
"language_info": {
"codemirror_mode": {
"name": "ipython",
@@ -391,7 +129,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.4"
+ "version": "3.7.5rc1"
}
},
"nbformat": 4,
diff --git a/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Running on Flink.ipynb b/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Running on Flink.ipynb
index c30c81b..5afec24 100644
--- a/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Running on Flink.ipynb
+++ b/sdks/python/apache_beam/runners/interactive/examples/Interactive Beam Running on Flink.ipynb
@@ -28,171 +28,24 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import apache_beam as beam\n",
"from apache_beam.runners.interactive import interactive_runner\n",
- "from apache_beam.runners.portability import portable_runner\n",
- "from apache_beam.options import pipeline_options"
+ "from apache_beam.runners.portability import flink_runner\n",
+ "\n",
+ "p = beam.Pipeline(interactive_runner.InteractiveRunner(underlying_runner=flink_runner.FlinkRunner()))"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "options = pipeline_options.PipelineOptions()\n",
- "options.view_as(pipeline_options.PortableOptions).job_endpoint = 'localhost:8099'\n",
- "options.view_as(pipeline_options.SetupOptions).sdk_location = 'container'\n",
- "options.view_as(pipeline_options.DebugOptions).experiments = 'beam_fn_api'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "cache_dir = \"gs://bucket-name/path/to/dir\"\n",
- "underlying_runner = portable_runner.PortableRunner()\n",
- "runner = interactive_runner.InteractiveRunner(underlying_runner=underlying_runner, cache_dir=cache_dir)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
- "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
- " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
- "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
- " -->\n",
- "<!-- Title: G Pages: 1 -->\n",
- "<svg width=\"194pt\" height=\"218pt\"\n",
- " viewBox=\"0.00 0.00 193.60 218.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
- "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 214)\">\n",
- "<title>G</title>\n",
- "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-214 189.596,-214 189.596,4 -4,4\"/>\n",
- "<!-- leaf7581 -->\n",
- "<!-- Create -->\n",
- "<g id=\"node2\" class=\"node\"><title>Create</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"96.5963\" cy=\"-192\" rx=\"33.5952\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"96.5963\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Create</text>\n",
- "</g>\n",
- "<!-- Square -->\n",
- "<g id=\"node3\" class=\"node\"><title>Square</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"50.5963\" cy=\"-105\" rx=\"35.194\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"50.5963\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Square</text>\n",
- "</g>\n",
- "<!-- Create->Square -->\n",
- "<g id=\"edge4\" class=\"edge\"><title>Create->Square</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M83.5132,-175.21C79.0649,-169.393 74.2897,-162.607 70.5963,-156 66.4908,-148.655 62.8,-140.295 59.7398,-132.541\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"63.0108,-131.296 56.2171,-123.166 56.4581,-133.758 63.0108,-131.296\"/>\n",
- "<g id=\"a_edge4-label\"><a xlink:title=\"{1, 3, 5, 0, 6, 7, 4, 8, 2, 9}\">\n",
- "<text text-anchor=\"middle\" x=\"90.5963\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\">{1, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- Cube -->\n",
- "<g id=\"node5\" class=\"node\"><title>Cube</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"137.596\" cy=\"-105\" rx=\"29.4969\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"137.596\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Cube</text>\n",
- "</g>\n",
- "<!-- Create->Cube -->\n",
- "<g id=\"edge1\" class=\"edge\"><title>Create->Cube</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M104.697,-174.207C110.575,-162.021 118.631,-145.318 125.301,-131.491\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"128.46,-132.997 129.652,-122.469 122.155,-129.956 128.46,-132.997\"/>\n",
- "<g id=\"a_edge1-label\"><a xlink:title=\"{1, 3, 5, 0, 6, 7, 4, 8, 2, 9}\">\n",
- "<text text-anchor=\"middle\" x=\"138.596\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\">{1, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- leaf7582 -->\n",
- "<!-- Square->leaf7582 -->\n",
- "<g id=\"edge2\" class=\"edge\"><title>Square->leaf7582</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M48.775,-86.799C47.5429,-75.1626 45.8896,-59.5479 44.4802,-46.2368\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"47.9485,-45.7513 43.4149,-36.1754 40.9874,-46.4884 47.9485,-45.7513\"/>\n",
- "<g id=\"a_edge2-label\"><a xlink:title=\"{16, 25, 64, 36, 81, 49, 4, 0, 1, 9}\">\n",
- "<text text-anchor=\"middle\" x=\"69.5963\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">{16, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- Cube->leaf7581 -->\n",
- "<g id=\"edge3\" class=\"edge\"><title>Cube->leaf7581</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M138.608,-86.799C139.293,-75.1626 140.211,-59.5479 140.994,-46.2368\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"144.493,-46.3637 141.586,-36.1754 137.505,-45.9526 144.493,-46.3637\"/>\n",
- "<g id=\"a_edge3-label\"><a xlink:title=\"{27, 512, 216, 125, 729, 64, 343, 1, 8, 0}\">\n",
- "<text text-anchor=\"middle\" x=\"162.596\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">{27, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "</g>\n",
- "</svg>\n"
- ],
- "text/plain": [
- "<IPython.core.display.HTML object>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Running..."
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Using 0 cached PCollections\n",
- "Executing 8 of 3 transforms."
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Cube produced {27, 512, 216, 125, 729, ...}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Square produced {16, 25, 64, 36, 81, ...}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Create produced {1, 3, 5, 0, 6, ...}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "p = beam.Pipeline(runner=runner, options=options)\n",
- "init_pcoll = p | beam.Create(range(10))\n",
+ "init_pcoll = p | beam.Create(range(10))\n",
"squares = init_pcoll | 'Square' >> beam.Map(lambda x: x*x)\n",
"cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3)\n",
"result = p.run()\n",
@@ -201,7 +54,16 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result.get(squares)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -224,196 +86,39 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
- "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
- " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
- "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
- " -->\n",
- "<!-- Title: G Pages: 1 -->\n",
- "<svg width=\"284pt\" height=\"305pt\"\n",
- " viewBox=\"0.00 0.00 284.34 305.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
- "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 301)\">\n",
- "<title>G</title>\n",
- "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-301 280.339,-301 280.339,4 -4,4\"/>\n",
- "<!-- Square -->\n",
- "<g id=\"node1\" class=\"node\"><title>Square</title>\n",
- "<ellipse fill=\"none\" stroke=\"grey\" cx=\"107.594\" cy=\"-192\" rx=\"35.194\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"107.594\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"gray\">Square</text>\n",
- "</g>\n",
- "<!-- Average Square -->\n",
- "<g id=\"node3\" class=\"node\"><title>Average Square</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"67.594\" cy=\"-105\" rx=\"67.6881\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"67.594\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Average Square</text>\n",
- "</g>\n",
- "<!-- Square->Average Square -->\n",
- "<g id=\"edge1\" class=\"edge\"><title>Square->Average Square</title>\n",
- "<path fill=\"none\" stroke=\"red\" d=\"M99.6913,-174.207C94.085,-162.293 86.4474,-146.063 80.0289,-132.424\"/>\n",
- "<polygon fill=\"red\" stroke=\"red\" points=\"83.0421,-130.607 75.6172,-123.049 76.7084,-133.588 83.0421,-130.607\"/>\n",
- "<g id=\"a_edge1-label\"><a xlink:title=\"{16, 25, 64, 36, 81, 49, 4, 0, 1, 9}\">\n",
- "<text text-anchor=\"middle\" x=\"113.594\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\">{16, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- Create -->\n",
- "<g id=\"node2\" class=\"node\"><title>Create</title>\n",
- "<ellipse fill=\"none\" stroke=\"grey\" cx=\"152.594\" cy=\"-279\" rx=\"33.5952\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"152.594\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"gray\">Create</text>\n",
- "</g>\n",
- "<!-- Create->Square -->\n",
- "<g id=\"edge3\" class=\"edge\"><title>Create->Square</title>\n",
- "<path fill=\"none\" stroke=\"grey\" d=\"M139.447,-262.245C134.993,-256.432 130.23,-249.639 126.594,-243 122.624,-235.751 119.118,-227.499 116.233,-219.818\"/>\n",
- "<polygon fill=\"grey\" stroke=\"grey\" points=\"119.456,-218.434 112.812,-210.181 112.859,-220.776 119.456,-218.434\"/>\n",
- "<g id=\"a_edge3-label\"><a xlink:title=\"{1, 3, 5, 0, 6, 7, 4, 8, 2, 9}\">\n",
- "<text text-anchor=\"middle\" x=\"146.594\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\">{1, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- Cube -->\n",
- "<g id=\"node6\" class=\"node\"><title>Cube</title>\n",
- "<ellipse fill=\"none\" stroke=\"grey\" cx=\"193.594\" cy=\"-192\" rx=\"29.4969\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"193.594\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"gray\">Cube</text>\n",
- "</g>\n",
- "<!-- Create->Cube -->\n",
- "<g id=\"edge6\" class=\"edge\"><title>Create->Cube</title>\n",
- "<path fill=\"none\" stroke=\"grey\" d=\"M160.694,-261.207C166.572,-249.021 174.629,-232.318 181.298,-218.491\"/>\n",
- "<polygon fill=\"grey\" stroke=\"grey\" points=\"184.458,-219.997 185.65,-209.469 178.153,-216.956 184.458,-219.997\"/>\n",
- "<g id=\"a_edge6-label\"><a xlink:title=\"{1, 3, 5, 0, 6, 7, 4, 8, 2, 9}\">\n",
- "<text text-anchor=\"middle\" x=\"194.594\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\">{1, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- leaf7567 -->\n",
- "<!-- Average Square->leaf7567 -->\n",
- "<g id=\"edge2\" class=\"edge\"><title>Average Square->leaf7567</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M67.594,-86.799C67.594,-75.1626 67.594,-59.5479 67.594,-46.2368\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"71.0941,-46.1754 67.594,-36.1754 64.0941,-46.1755 71.0941,-46.1754\"/>\n",
- "<g id=\"a_edge2-label\"><a xlink:title=\"{28.5}\">\n",
- "<text text-anchor=\"middle\" x=\"86.594\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">{28.5}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- leaf7570 -->\n",
- "<!-- Average Cube -->\n",
- "<g id=\"node7\" class=\"node\"><title>Average Cube</title>\n",
- "<ellipse fill=\"none\" stroke=\"blue\" cx=\"214.594\" cy=\"-105\" rx=\"61.99\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"214.594\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"blue\">Average Cube</text>\n",
- "</g>\n",
- "<!-- Cube->Average Cube -->\n",
- "<g id=\"edge4\" class=\"edge\"><title>Cube->Average Cube</title>\n",
- "<path fill=\"none\" stroke=\"red\" d=\"M197.844,-173.799C200.719,-162.163 204.576,-146.548 207.865,-133.237\"/>\n",
- "<polygon fill=\"red\" stroke=\"red\" points=\"211.35,-133.723 210.351,-123.175 204.554,-132.044 211.35,-133.723\"/>\n",
- "<g id=\"a_edge4-label\"><a xlink:title=\"{27, 512, 216, 125, 729, 64, 343, 1, 8, 0}\">\n",
- "<text text-anchor=\"middle\" x=\"227.594\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\">{27, ...}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "<!-- Average Cube->leaf7570 -->\n",
- "<g id=\"edge5\" class=\"edge\"><title>Average Cube->leaf7570</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M214.594,-86.799C214.594,-75.1626 214.594,-59.5479 214.594,-46.2368\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"218.094,-46.1754 214.594,-36.1754 211.094,-46.1755 218.094,-46.1754\"/>\n",
- "<g id=\"a_edge5-label\"><a xlink:title=\"{202.5}\">\n",
- "<text text-anchor=\"middle\" x=\"237.094\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">{202.5}</text>\n",
- "</a>\n",
- "</g>\n",
- "</g>\n",
- "</g>\n",
- "</svg>\n"
- ],
- "text/plain": [
- "<IPython.core.display.HTML object>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Running..."
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Using 2 cached PCollections\n",
- "Executing 8 of 5 transforms."
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Average Cube produced {202.5}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Cube produced {27, 512, 216, 125, 729, ...}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Square produced {16, 25, 64, 36, 81, ...}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "Average Square produced {28.5}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"average_square = squares | 'Average Square' >> beam.CombineGlobally(AverageFn())\n",
"average_cube = cubes | 'Average Cube' >> beam.CombineGlobally(AverageFn())\n",
"result = p.run()"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result.get(average_square)"
+ ]
}
],
"metadata": {
- "kernelspec": {
- "display_name": "Python (beam_venv)",
- "language": "python",
- "name": "beam_venv_kernel"
- },
"language_info": {
"codemirror_mode": {
"name": "ipython",
- "version": 2
+ "version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.13"
+ "pygments_lexer": "ipython3",
+ "version": "3.7.5rc1"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_beam.py b/sdks/python/apache_beam/runners/interactive/interactive_beam.py
index a7a7584..3769dd7 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_beam.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_beam.py
@@ -28,6 +28,9 @@
Note: If you want backward-compatibility, only invoke interfaces provided by
this module in your notebook or application code.
"""
+
+# pytype: skip-file
+
from __future__ import absolute_import
from apache_beam.runners.interactive import interactive_environment as ie
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py b/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py
index 7660b1a..720f443 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py
@@ -16,6 +16,8 @@
#
"""Tests for apache_beam.runners.interactive.interactive_beam."""
+# pytype: skip-file
+
from __future__ import absolute_import
import importlib
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_environment.py b/sdks/python/apache_beam/runners/interactive/interactive_environment.py
index 39d1445..d5f850d 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_environment.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_environment.py
@@ -22,6 +22,8 @@
External Interactive Beam users please use interactive_beam module in
application code or notebook.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import atexit
@@ -79,11 +81,17 @@
self._watching_set = set()
# Holds variables list of (Dict[str, object]).
self._watching_dict_list = []
- # Holds results of pipeline runs as Dict[Pipeline, PipelineResult].
+ # Holds results of main jobs as Dict[Pipeline, PipelineResult].
# Each key is a pipeline instance defined by the end user. The
# InteractiveRunner is responsible for populating this dictionary
# implicitly.
- self._pipeline_results = {}
+ self._main_pipeline_results = {}
+ # Holds results of background caching jobs as
+ # Dict[Pipeline, PipelineResult]. Each key is a pipeline instance defined by
+ # the end user. The InteractiveRunner is responsible for populating this
+ # dictionary implicitly when a background caching jobs is started.
+ self._background_caching_pipeline_results = {}
+ self._cached_source_signature = {}
self._tracked_user_pipelines = set()
# Always watch __main__ module.
self.watch('__main__')
@@ -199,31 +207,48 @@
"""Gets the cache manager held by current Interactive Environment."""
return self._cache_manager
- def set_pipeline_result(self, pipeline, result):
- """Sets the pipeline run result. Adds one if absent. Otherwise, replace."""
+ def set_pipeline_result(self, pipeline, result, is_main_job):
+ """Sets the pipeline run result. Adds one if absent. Otherwise, replace.
+
+ When is_main_job is True, set the result for the main job; otherwise, set
+ the result for the background caching job.
+ """
assert issubclass(type(pipeline), beam.Pipeline), (
'pipeline must be an instance of apache_beam.Pipeline or its subclass')
assert issubclass(type(result), runner.PipelineResult), (
'result must be an instance of '
'apache_beam.runners.runner.PipelineResult or its subclass')
- self._pipeline_results[pipeline] = result
+ if is_main_job:
+ self._main_pipeline_results[pipeline] = result
+ else:
+ self._background_caching_pipeline_results[pipeline] = result
- def evict_pipeline_result(self, pipeline):
+ def evict_pipeline_result(self, pipeline, is_main_job=True):
"""Evicts the tracking of given pipeline run. Noop if absent."""
- return self._pipeline_results.pop(pipeline, None)
+ if is_main_job:
+ return self._main_pipeline_results.pop(pipeline, None)
+ return self._background_caching_pipeline_results.pop(pipeline, None)
- def pipeline_result(self, pipeline):
+ def pipeline_result(self, pipeline, is_main_job=True):
"""Gets the pipeline run result. None if absent."""
- return self._pipeline_results.get(pipeline, None)
+ if is_main_job:
+ return self._main_pipeline_results.get(pipeline, None)
+ return self._background_caching_pipeline_results.get(pipeline, None)
- def is_terminated(self, pipeline):
+ def is_terminated(self, pipeline, is_main_job=True):
"""Queries if the most recent job (by executing the given pipeline) state
is in a terminal state. True if absent."""
- result = self.pipeline_result(pipeline)
+ result = self.pipeline_result(pipeline, is_main_job=is_main_job)
if result:
return runner.PipelineState.is_terminal(result.state)
return True
+ def set_cached_source_signature(self, pipeline, signature):
+ self._cached_source_signature[pipeline] = signature
+
+ def get_cached_source_signature(self, pipeline):
+ return self._cached_source_signature.get(pipeline, set())
+
def track_user_pipelines(self):
"""Record references to all user-defined pipeline instances watched in
current environment.
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py b/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py
index 76c29b8..85f546d 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py
@@ -16,6 +16,8 @@
#
"""Tests for apache_beam.runners.interactive.interactive_environment."""
+# pytype: skip-file
+
from __future__ import absolute_import
import importlib
@@ -109,7 +111,8 @@
with self.assertRaises(AssertionError) as ctx:
ie.current_env().set_pipeline_result(NotPipeline(),
runner.PipelineResult(
- runner.PipelineState.RUNNING))
+ runner.PipelineState.RUNNING),
+ is_main_job=True)
self.assertTrue('pipeline must be an instance of apache_beam.Pipeline '
'or its subclass' in ctx.exception)
@@ -118,7 +121,10 @@
pass
with self.assertRaises(AssertionError) as ctx:
- ie.current_env().set_pipeline_result(self._p, NotResult())
+ ie.current_env().set_pipeline_result(
+ self._p,
+ NotResult(),
+ is_main_job=True)
self.assertTrue('result must be an instance of '
'apache_beam.runners.runner.PipelineResult or its '
'subclass' in ctx.exception)
@@ -132,7 +138,10 @@
pipeline = PipelineSubClass()
pipeline_result = PipelineResultSubClass(runner.PipelineState.RUNNING)
- ie.current_env().set_pipeline_result(pipeline, pipeline_result)
+ ie.current_env().set_pipeline_result(
+ pipeline,
+ pipeline_result,
+ is_main_job=True)
self.assertIs(ie.current_env().pipeline_result(pipeline), pipeline_result)
def test_determine_terminal_state(self):
@@ -141,8 +150,10 @@
runner.PipelineState.CANCELLED,
runner.PipelineState.UPDATED,
runner.PipelineState.DRAINED):
- ie.current_env().set_pipeline_result(self._p, runner.PipelineResult(
- state))
+ ie.current_env().set_pipeline_result(
+ self._p,
+ runner.PipelineResult(state),
+ is_main_job=True)
self.assertTrue(ie.current_env().is_terminated(self._p))
for state in (runner.PipelineState.UNKNOWN,
runner.PipelineState.STARTING,
@@ -152,13 +163,18 @@
runner.PipelineState.PENDING,
runner.PipelineState.CANCELLING,
runner.PipelineState.UNRECOGNIZED):
- ie.current_env().set_pipeline_result(self._p, runner.PipelineResult(
- state))
+ ie.current_env().set_pipeline_result(
+ self._p,
+ runner.PipelineResult(state),
+ is_main_job=True)
self.assertFalse(ie.current_env().is_terminated(self._p))
def test_evict_pipeline_result(self):
pipeline_result = runner.PipelineResult(runner.PipelineState.DONE)
- ie.current_env().set_pipeline_result(self._p, pipeline_result)
+ ie.current_env().set_pipeline_result(
+ self._p,
+ pipeline_result,
+ is_main_job=True)
self.assertIs(ie.current_env().evict_pipeline_result(self._p),
pipeline_result)
self.assertIs(ie.current_env().pipeline_result(self._p), None)
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_runner.py b/sdks/python/apache_beam/runners/interactive/interactive_runner.py
index b0222c3..5e05cb5 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_runner.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_runner.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -32,6 +34,7 @@
from apache_beam.runners.interactive import cache_manager as cache
from apache_beam.runners.interactive import interactive_environment as ie
from apache_beam.runners.interactive import pipeline_instrument as inst
+from apache_beam.runners.interactive import background_caching_job
from apache_beam.runners.interactive.display import pipeline_graph
# size of PCollection samples cached.
@@ -126,6 +129,17 @@
def run_pipeline(self, pipeline, options):
pipeline_instrument = inst.pin(pipeline, options)
+ # The user_pipeline analyzed might be None if the pipeline given has nothing
+ # to be cached and tracing back to the user defined pipeline is impossible.
+ # When it's None, there is no need to cache including the background
+ # caching job and no result to track since no background caching job is
+ # started at all.
+ user_pipeline = pipeline_instrument.user_pipeline
+ if user_pipeline:
+ # Should use the underlying runner and run asynchronously.
+ background_caching_job.attempt_to_run_background_caching_job(
+ self._underlying_runner, user_pipeline, options)
+
pipeline_to_execute = beam.pipeline.Pipeline.from_runner_api(
pipeline_instrument.instrumented_pipeline_proto(),
self._underlying_runner,
@@ -137,10 +151,19 @@
render_option=self._render_option)
a_pipeline_graph.display_graph()
- result = pipeline_to_execute.run()
- result.wait_until_finish()
+ main_job_result = PipelineResult(pipeline_to_execute.run(),
+ pipeline_instrument)
+ # In addition to this pipeline result setting, redundant result setting from
+ # outer scopes are also recommended since the user_pipeline might not be
+ # available from within this scope.
+ if user_pipeline:
+ ie.current_env().set_pipeline_result(
+ user_pipeline,
+ main_job_result,
+ is_main_job=True)
+ main_job_result.wait_until_finish()
- return PipelineResult(result, pipeline_instrument)
+ return main_job_result
class PipelineResult(beam.runners.runner.PipelineResult):
@@ -161,8 +184,7 @@
self._pipeline_instrument = pipeline_instrument
def wait_until_finish(self):
- # PipelineResult is not constructed until pipeline execution is finished.
- return
+ self._underlying_result.wait_until_finish()
def get(self, pcoll):
key = self._pipeline_instrument.cache_key(pcoll)
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py b/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py
index 36ebce8..9ef8b13 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -29,11 +31,11 @@
import apache_beam as beam
from apache_beam.runners.direct import direct_runner
from apache_beam.runners.interactive import interactive_beam as ib
+from apache_beam.runners.interactive import interactive_environment as ie
from apache_beam.runners.interactive import interactive_runner
def print_with_message(msg):
-
def printer(elem):
print(msg, elem)
return elem
@@ -43,15 +45,20 @@
class InteractiveRunnerTest(unittest.TestCase):
+ def setUp(self):
+ ie.new_env()
+
def test_basic(self):
p = beam.Pipeline(
runner=interactive_runner.InteractiveRunner(
direct_runner.DirectRunner()))
+ ib.watch({'p': p})
p.run().wait_until_finish()
pc0 = (
p | 'read' >> beam.Create([1, 2, 3])
| 'Print1.1' >> beam.Map(print_with_message('Run1.1')))
pc = pc0 | 'Print1.2' >> beam.Map(print_with_message('Run1.2'))
+ ib.watch(locals())
p.run().wait_until_finish()
_ = pc | 'Print2' >> beam.Map(print_with_message('Run2'))
p.run().wait_until_finish()
@@ -59,7 +66,6 @@
p.run().wait_until_finish()
def test_wordcount(self):
-
class WordExtractingDoFn(beam.DoFn):
def process(self, element):
diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_analyzer.py b/sdks/python/apache_beam/runners/interactive/pipeline_analyzer.py
index ab56921..bc72465 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_analyzer.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_analyzer.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_analyzer_test.py b/sdks/python/apache_beam/runners/interactive/pipeline_analyzer_test.py
index b0433ff..9aaeaba 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_analyzer_test.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_analyzer_test.py
@@ -20,6 +20,8 @@
This module is experimental. No backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py
index b9255e4..01224c8 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py
@@ -21,6 +21,8 @@
This module accesses current interactive environment and analyzes given pipeline
to transform original pipeline into a one-shot pipeline with interactivity.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import apache_beam as beam
@@ -100,6 +102,11 @@
# (Dict[str, AppliedPTransform]).
self._cached_pcoll_read = {}
+ # Reference to the user defined pipeline instance based on the given
+ # pipeline. The class never mutates it.
+ # Note: the original pipeline is not the user pipeline.
+ self._user_pipeline = None
+
def instrumented_pipeline_proto(self):
"""Always returns a new instance of portable instrumented proto."""
return self._pipeline.to_runner_api(use_fake_coders=True)
@@ -258,6 +265,20 @@
"""Returns a snapshot of the pipeline before instrumentation."""
return self._pipeline_snap
+ @property
+ def user_pipeline(self):
+ """Returns a reference to the pipeline instance defined by the user. If a
+ pipeline has no cacheable PCollection and the user pipeline cannot be
+ found, return None indicating there is nothing to be cached in the user
+ pipeline.
+
+ The pipeline given for instrumenting and mutated in this class is not
+ necessarily the pipeline instance defined by the user. From the watched
+ scopes, this class figures out what the user pipeline instance is.
+ This metadata can be used for tracking pipeline results.
+ """
+ return self._user_pipeline
+
def instrument(self):
"""Instruments original pipeline with cache.
@@ -330,6 +351,10 @@
cacheable_key = self._pin._cacheable_key(pcoll)
if (cacheable_key in self._pin.cacheables and
self._pin.cacheables[cacheable_key]['pcoll'] != pcoll):
+ if not self._pin._user_pipeline:
+ # Retrieve a reference to the user defined pipeline instance.
+ self._pin._user_pipeline = self._pin.cacheables[cacheable_key][
+ 'pcoll'].pipeline
self._pin.cacheables[cacheable_key]['pcoll'] = pcoll
v = PreprocessVisitor(self)
diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py b/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py
index 09b646e..eab2172 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py
@@ -16,6 +16,8 @@
#
"""Tests for apache_beam.runners.interactive.pipeline_instrument."""
+# pytype: skip-file
+
from __future__ import absolute_import
import tempfile
@@ -273,6 +275,23 @@
p_origin.visit(v)
assert_pipeline_equal(self, p_origin, p_copy)
+ def test_find_out_correct_user_pipeline(self):
+ # This is the user pipeline instance we care in the watched scope.
+ user_pipeline, _, _ = self._example_pipeline()
+ # This is a new runner pipeline instance with the same pipeline graph to
+ # what the user_pipeline represents.
+ runner_pipeline = beam.pipeline.Pipeline.from_runner_api(
+ user_pipeline.to_runner_api(use_fake_coders=True),
+ user_pipeline.runner,
+ options=None)
+ # This is a totally irrelevant user pipeline in the watched scope.
+ irrelevant_user_pipeline = beam.Pipeline(
+ interactive_runner.InteractiveRunner())
+ ib.watch({'irrelevant_user_pipeline': irrelevant_user_pipeline})
+ # Build instrument from the runner pipeline.
+ pipeline_instrument = instr.pin(runner_pipeline)
+ self.assertIs(pipeline_instrument.user_pipeline, user_pipeline)
+
if __name__ == '__main__':
unittest.main()
diff --git a/sdks/python/apache_beam/runners/interactive/testing/mock_ipython.py b/sdks/python/apache_beam/runners/interactive/testing/mock_ipython.py
index ee3acd8..fc11e4d 100644
--- a/sdks/python/apache_beam/runners/interactive/testing/mock_ipython.py
+++ b/sdks/python/apache_beam/runners/interactive/testing/mock_ipython.py
@@ -15,9 +15,6 @@
# limitations under the License.
#
-# Mocked object returned by invoking get_ipython() in an ipython environment.
-_mocked_get_ipython = None
-
def mock_get_ipython():
"""Mock an ipython environment w/o setting up real ipython kernel.
@@ -27,17 +24,20 @@
Examples::
- # Usage, before each test function, append:
- @patch('IPython.get_ipython', mock_get_ipython)
+ # Usage, before each test function, prepend:
+ @patch('IPython.get_ipython', new_callable=mock_get_ipython)
- # Group lines of code into a cell:
- with mock_get_ipython():
+ # In the test function's signature, add an argument for the patch, e.g.:
+ def some_test(self, cell):
+
+ # Group lines of code into a cell using the argument:
+ with cell:
# arbitrary python code
# ...
# arbitrary python code
# Next cell with prompt increased by one:
- with mock_get_ipython(): # Auto-incremental
+ with cell: # Auto-incremental
# arbitrary python code
# ...
# arbitrary python code
@@ -48,6 +48,9 @@
def __init__(self):
self._execution_count = 0
+ def __call__(self):
+ return self
+
@property
def execution_count(self):
"""Execution count always starts from 1 and is constant within a cell."""
@@ -61,7 +64,4 @@
"""Marks exiting of a cell/prompt."""
pass
- global _mocked_get_ipython
- if not _mocked_get_ipython:
- _mocked_get_ipython = MockedGetIpython()
- return _mocked_get_ipython
+ return MockedGetIpython()
diff --git a/sdks/python/apache_beam/runners/job/manager.py b/sdks/python/apache_beam/runners/job/manager.py
index 991d2fe..ae8eb94 100644
--- a/sdks/python/apache_beam/runners/job/manager.py
+++ b/sdks/python/apache_beam/runners/job/manager.py
@@ -18,6 +18,8 @@
"""A object to control to the Job API Co-Process
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/job/utils.py b/sdks/python/apache_beam/runners/job/utils.py
index 3e34751..1a90adb 100644
--- a/sdks/python/apache_beam/runners/job/utils.py
+++ b/sdks/python/apache_beam/runners/job/utils.py
@@ -18,6 +18,8 @@
"""Utility functions for efficiently processing with the job API
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
@@ -27,8 +29,10 @@
def dict_to_struct(dict_obj):
+ # type: (dict) -> struct_pb2.Struct
return json_format.ParseDict(dict_obj, struct_pb2.Struct())
def struct_to_dict(struct_obj):
+ # type: (struct_pb2.Struct) -> dict
return json.loads(json_format.MessageToJson(struct_obj))
diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py
index e4328df..b3c4d5b 100644
--- a/sdks/python/apache_beam/runners/pipeline_context.py
+++ b/sdks/python/apache_beam/runners/pipeline_context.py
@@ -20,9 +20,17 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Dict
+from typing import Mapping
+from typing import Optional
+from typing import Union
from apache_beam import coders
from apache_beam import pipeline
@@ -34,6 +42,11 @@
from apache_beam.transforms import environments
from apache_beam.typehints import native_type_compatibility
+if TYPE_CHECKING:
+ from google.protobuf import message # pylint: disable=ungrouped-imports
+ from apache_beam.coders.coder_impl import IterableStateReader
+ from apache_beam.coders.coder_impl import IterableStateWriter
+
class _PipelineContextMap(object):
"""This is a bi-directional map between objects and ids.
@@ -41,16 +54,22 @@
Under the hood it encodes and decodes these objects into runner API
representations.
"""
- def __init__(self, context, obj_type, namespace, proto_map=None):
+ def __init__(self,
+ context,
+ obj_type,
+ namespace, # type: str
+ proto_map=None # type: Optional[Mapping[str, message.Message]]
+ ):
self._pipeline_context = context
self._obj_type = obj_type
self._namespace = namespace
- self._obj_to_id = {}
- self._id_to_obj = {}
+ self._obj_to_id = {} # type: Dict[Any, str]
+ self._id_to_obj = {} # type: Dict[str, Any]
self._id_to_proto = dict(proto_map) if proto_map else {}
self._counter = 0
def _unique_ref(self, obj=None, label=None):
+ # type: (Optional[Any], Optional[str]) -> str
self._counter += 1
return "%s_%s_%s_%d" % (
self._namespace,
@@ -59,10 +78,12 @@
self._counter)
def populate_map(self, proto_map):
+ # type: (Mapping[str, message.Message]) -> None
for id, proto in self._id_to_proto.items():
proto_map[id].CopyFrom(proto)
def get_id(self, obj, label=None):
+ # type: (Any, Optional[str]) -> str
if obj not in self._obj_to_id:
id = self._unique_ref(obj, label)
self._id_to_obj[id] = obj
@@ -71,15 +92,18 @@
return self._obj_to_id[obj]
def get_proto(self, obj, label=None):
+ # type: (Any, Optional[str]) -> message.Message
return self._id_to_proto[self.get_id(obj, label)]
def get_by_id(self, id):
+ # type: (str) -> Any
if id not in self._id_to_obj:
self._id_to_obj[id] = self._obj_type.from_runner_api(
self._id_to_proto[id], self._pipeline_context)
return self._id_to_obj[id]
def get_by_proto(self, maybe_new_proto, label=None, deduplicate=False):
+ # type: (message.Message, Optional[str], bool) -> str
if deduplicate:
for id, proto in self._id_to_proto.items():
if proto == maybe_new_proto:
@@ -87,18 +111,22 @@
return self.put_proto(self._unique_ref(label), maybe_new_proto)
def get_id_to_proto_map(self):
+ # type: () -> Dict[str, message.Message]
return self._id_to_proto
def put_proto(self, id, proto):
+ # type: (str, message.Message) -> str
if id in self._id_to_proto:
raise ValueError("Id '%s' is already taken." % id)
self._id_to_proto[id] = proto
return id
def __getitem__(self, id):
+ # type: (str) -> Any
return self.get_by_id(id)
def __contains__(self, id):
+ # type: (str) -> bool
return id in self._id_to_proto
@@ -116,10 +144,15 @@
'environments': environments.Environment,
}
- def __init__(
- self, proto=None, default_environment=None, use_fake_coders=False,
- iterable_state_read=None, iterable_state_write=None,
- namespace='ref', allow_proto_holders=False):
+ def __init__(self,
+ proto=None, # type: Optional[Union[beam_runner_api_pb2.Components, beam_fn_api_pb2.ProcessBundleDescriptor]]
+ default_environment=None, # type: Optional[environments.Environment]
+ use_fake_coders=False,
+ iterable_state_read=None, # type: Optional[IterableStateReader]
+ iterable_state_write=None, # type: Optional[IterableStateWriter]
+ namespace='ref',
+ allow_proto_holders=False
+ ):
if isinstance(proto, beam_fn_api_pb2.ProcessBundleDescriptor):
proto = beam_runner_api_pb2.Components(
coders=dict(proto.coders.items()),
@@ -144,12 +177,14 @@
# as well as performing a round-trip through protos.
# TODO(BEAM-2717): Remove once this is no longer needed.
def coder_id_from_element_type(self, element_type):
+ # type: (Any) -> str
if self.use_fake_coders:
return pickler.dumps(element_type)
else:
return self.coders.get_id(coders.registry.get_coder(element_type))
def element_type_from_coder_id(self, coder_id):
+ # type: (str) -> Any
if self.use_fake_coders or coder_id not in self.coders:
return pickler.loads(coder_id)
else:
@@ -158,13 +193,16 @@
@staticmethod
def from_runner_api(proto):
+ # type: (beam_runner_api_pb2.Components) -> PipelineContext
return PipelineContext(proto)
def to_runner_api(self):
+ # type: () -> beam_runner_api_pb2.Components
context_proto = beam_runner_api_pb2.Components()
for name in self._COMPONENT_TYPES:
getattr(self, name).populate_map(getattr(context_proto, name))
return context_proto
def default_environment_id(self):
+ # type: () -> Optional[str]
return self._default_environment_id
diff --git a/sdks/python/apache_beam/runners/pipeline_context_test.py b/sdks/python/apache_beam/runners/pipeline_context_test.py
index 6f1ec74..697ea6d 100644
--- a/sdks/python/apache_beam/runners/pipeline_context_test.py
+++ b/sdks/python/apache_beam/runners/pipeline_context_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the windowing classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/portability/abstract_job_service.py b/sdks/python/apache_beam/runners/portability/abstract_job_service.py
index b86d0f5..50532f2 100644
--- a/sdks/python/apache_beam/runners/portability/abstract_job_service.py
+++ b/sdks/python/apache_beam/runners/portability/abstract_job_service.py
@@ -14,19 +14,40 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import itertools
+import json
import logging
+import shutil
+import tempfile
import uuid
+import zipfile
from builtins import object
+from concurrent import futures
+from typing import TYPE_CHECKING
+from typing import Dict
+from typing import Iterator
+from typing import Optional
+from typing import Union
+import grpc
+from google.protobuf import json_format
from google.protobuf import timestamp_pb2
+from apache_beam.portability.api import beam_artifact_api_pb2_grpc
from apache_beam.portability.api import beam_job_api_pb2
from apache_beam.portability.api import beam_job_api_pb2_grpc
+from apache_beam.portability.api import endpoints_pb2
+from apache_beam.runners.portability import artifact_service
from apache_beam.utils.timestamp import Timestamp
+if TYPE_CHECKING:
+ from google.protobuf import struct_pb2 # pylint: disable=ungrouped-imports
+ from apache_beam.portability.api import beam_runner_api_pb2
+
_LOGGER = logging.getLogger(__name__)
@@ -50,13 +71,24 @@
Servicer for the Beam Job API.
"""
def __init__(self):
- self._jobs = {}
+ self._jobs = {} # type: Dict[str, AbstractBeamJob]
- def create_beam_job(self, preparation_id, job_name, pipeline, options):
+ def create_beam_job(self,
+ preparation_id, # stype: str
+ job_name, # type: str
+ pipeline, # type: beam_runner_api_pb2.Pipeline
+ options # type: struct_pb2.Struct
+ ):
+ # type: (...) -> AbstractBeamJob
"""Returns an instance of AbstractBeamJob specific to this servicer."""
raise NotImplementedError(type(self))
- def Prepare(self, request, context=None, timeout=None):
+ def Prepare(self,
+ request, # type: beam_job_api_pb2.PrepareJobRequest
+ context=None,
+ timeout=None
+ ):
+ # type: (...) -> beam_job_api_pb2.PrepareJobResponse
_LOGGER.debug('Got Prepare request.')
preparation_id = '%s-%s' % (request.job_name, uuid.uuid4())
self._jobs[preparation_id] = self.create_beam_job(
@@ -72,31 +104,56 @@
preparation_id].artifact_staging_endpoint(),
staging_session_token=preparation_id)
- def Run(self, request, context=None, timeout=None):
+ def Run(self,
+ request, # type: beam_job_api_pb2.RunJobRequest
+ context=None,
+ timeout=None
+ ):
+ # type: (...) -> beam_job_api_pb2.RunJobResponse
# For now, just use the preparation id as the job id.
job_id = request.preparation_id
_LOGGER.info("Running job '%s'", job_id)
self._jobs[job_id].run()
return beam_job_api_pb2.RunJobResponse(job_id=job_id)
- def GetJobs(self, request, context=None, timeout=None):
+ def GetJobs(self,
+ request, # type: beam_job_api_pb2.GetJobsRequest
+ context=None,
+ timeout=None
+ ):
+ # type: (...) -> beam_job_api_pb2.GetJobsResponse
return beam_job_api_pb2.GetJobsResponse(
job_info=[job.to_runner_api() for job in self._jobs.values()])
- def GetState(self, request, context=None):
+ def GetState(self,
+ request, # type: beam_job_api_pb2.GetJobStateRequest
+ context=None
+ ):
+ # type: (...) -> beam_job_api_pb2.GetJobStateResponse
return beam_job_api_pb2.JobStateEvent(
state=self._jobs[request.job_id].get_state())
- def GetPipeline(self, request, context=None, timeout=None):
+ def GetPipeline(self,
+ request, # type: beam_job_api_pb2.GetJobPipelineRequest
+ context=None,
+ timeout=None
+ ):
+ # type: (...) -> beam_job_api_pb2.GetJobPipelineResponse
return beam_job_api_pb2.GetJobPipelineResponse(
pipeline=self._jobs[request.job_id].get_pipeline())
- def Cancel(self, request, context=None, timeout=None):
+ def Cancel(self,
+ request, # type: beam_job_api_pb2.CancelJobRequest
+ context=None,
+ timeout=None
+ ):
+ # type: (...) -> beam_job_api_pb2.CancelJobResponse
self._jobs[request.job_id].cancel()
return beam_job_api_pb2.CancelJobResponse(
state=self._jobs[request.job_id].get_state())
def GetStateStream(self, request, context=None, timeout=None):
+ # type: (...) -> Iterator[beam_job_api_pb2.GetJobStateResponse]
"""Yields state transitions since the stream started.
"""
if request.job_id not in self._jobs:
@@ -107,6 +164,7 @@
yield make_state_event(state, timestamp)
def GetMessageStream(self, request, context=None, timeout=None):
+ # type: (...) -> Iterator[beam_job_api_pb2.JobMessagesResponse]
"""Yields messages since the stream started.
"""
if request.job_id not in self._jobs:
@@ -122,13 +180,19 @@
yield resp
def DescribePipelineOptions(self, request, context=None, timeout=None):
+ # type: (...) -> beam_job_api_pb2.DescribePipelineOptionsResponse
return beam_job_api_pb2.DescribePipelineOptionsResponse()
class AbstractBeamJob(object):
"""Abstract baseclass for managing a single Beam job."""
- def __init__(self, job_id, job_name, pipeline, options):
+ def __init__(self,
+ job_id, # type: str
+ job_name, # type: str
+ pipeline, # type: beam_runner_api_pb2.Pipeline
+ options # type: struct_pb2.Struct
+ ):
self._job_id = job_id
self._job_name = job_name
self._pipeline_proto = pipeline
@@ -136,12 +200,31 @@
self._state_history = [(beam_job_api_pb2.JobState.STOPPED,
Timestamp.now())]
- def _to_implement(self):
+ def prepare(self):
+ # type: () -> None
+ """Called immediately after this class is instantiated"""
raise NotImplementedError(self)
- prepare = run = cancel = _to_implement
- artifact_staging_endpoint = _to_implement
- get_state_stream = get_message_stream = _to_implement
+ def run(self):
+ # type: () -> None
+ raise NotImplementedError(self)
+
+ def cancel(self):
+ # type: () -> Optional[beam_job_api_pb2.JobState.Enum]
+ raise NotImplementedError(self)
+
+ def artifact_staging_endpoint(self):
+ # type: () -> Optional[endpoints_pb2.ApiServiceDescriptor]
+ raise NotImplementedError(self)
+
+ def get_state_stream(self):
+ # type: () -> Iterator[Optional[beam_job_api_pb2.JobState.Enum]]
+ raise NotImplementedError(self)
+
+ def get_message_stream(self):
+ # type: () -> Iterator[Union[int, Optional[beam_job_api_pb2.JobMessage]]]
+ raise NotImplementedError(self)
+
@property
def state(self):
@@ -173,6 +256,7 @@
return itertools.chain(self._state_history[:], state_stream)
def get_pipeline(self):
+ # type: () -> beam_runner_api_pb2.Pipeline
return self._pipeline_proto
@staticmethod
@@ -181,8 +265,76 @@
return state in portable_runner.TERMINAL_STATES
def to_runner_api(self):
+ # type: () -> beam_job_api_pb2.JobInfo
return beam_job_api_pb2.JobInfo(
job_id=self._job_id,
job_name=self._job_name,
pipeline_options=self._pipeline_options,
state=self.state)
+
+
+class UberJarBeamJob(AbstractBeamJob):
+ """Abstract baseclass for creating a Beam job. The resulting job will be
+ packaged and run in an executable uber jar."""
+
+ # These must agree with those defined in PortablePipelineJarUtils.java.
+ PIPELINE_FOLDER = 'BEAM-PIPELINE'
+ PIPELINE_MANIFEST = PIPELINE_FOLDER + '/pipeline-manifest.json'
+
+ # We only stage a single pipeline in the jar.
+ PIPELINE_NAME = 'pipeline'
+ PIPELINE_PATH = '/'.join(
+ [PIPELINE_FOLDER, PIPELINE_NAME, "pipeline.json"])
+ PIPELINE_OPTIONS_PATH = '/'.join(
+ [PIPELINE_FOLDER, PIPELINE_NAME, 'pipeline-options.json'])
+ ARTIFACT_MANIFEST_PATH = '/'.join(
+ [PIPELINE_FOLDER, PIPELINE_NAME, 'artifact-manifest.json'])
+ ARTIFACT_FOLDER = '/'.join([PIPELINE_FOLDER, PIPELINE_NAME, 'artifacts'])
+
+ def __init__(
+ self, executable_jar, job_id, job_name, pipeline, options,
+ artifact_port=0):
+ super(UberJarBeamJob, self).__init__(job_id, job_name, pipeline, options)
+ self._executable_jar = executable_jar
+ self._jar_uploaded = False
+ self._artifact_port = artifact_port
+
+ def prepare(self):
+ # Copy the executable jar, injecting the pipeline and options as resources.
+ with tempfile.NamedTemporaryFile(suffix='.jar') as tout:
+ self._jar = tout.name
+ shutil.copy(self._executable_jar, self._jar)
+ with zipfile.ZipFile(self._jar, 'a', compression=zipfile.ZIP_DEFLATED) as z:
+ with z.open(self.PIPELINE_PATH, 'w') as fout:
+ fout.write(json_format.MessageToJson(
+ self._pipeline_proto).encode('utf-8'))
+ with z.open(self.PIPELINE_OPTIONS_PATH, 'w') as fout:
+ fout.write(json_format.MessageToJson(
+ self._pipeline_options).encode('utf-8'))
+ with z.open(self.PIPELINE_MANIFEST, 'w') as fout:
+ fout.write(json.dumps(
+ {'defaultJobName': self.PIPELINE_NAME}).encode('utf-8'))
+ self._start_artifact_service(self._jar, self._artifact_port)
+
+ def _start_artifact_service(self, jar, requested_port):
+ self._artifact_staging_service = artifact_service.ZipFileArtifactService(
+ jar, self.ARTIFACT_FOLDER)
+ self._artifact_staging_server = grpc.server(futures.ThreadPoolExecutor())
+ port = self._artifact_staging_server.add_insecure_port(
+ '[::]:%s' % requested_port)
+ beam_artifact_api_pb2_grpc.add_ArtifactStagingServiceServicer_to_server(
+ self._artifact_staging_service, self._artifact_staging_server)
+ self._artifact_staging_endpoint = endpoints_pb2.ApiServiceDescriptor(
+ url='localhost:%d' % port)
+ self._artifact_staging_server.start()
+ _LOGGER.info('Artifact server started on port %s', port)
+ return port
+
+ def _stop_artifact_service(self):
+ self._artifact_staging_server.stop(1)
+ self._artifact_staging_service.close()
+ self._artifact_manifest_location = (
+ self._artifact_staging_service.retrieval_token(self._job_id))
+
+ def artifact_staging_endpoint(self):
+ return self._artifact_staging_endpoint
diff --git a/sdks/python/apache_beam/runners/portability/artifact_service.py b/sdks/python/apache_beam/runners/portability/artifact_service.py
index 1ba9602..17f9b70 100644
--- a/sdks/python/apache_beam/runners/portability/artifact_service.py
+++ b/sdks/python/apache_beam/runners/portability/artifact_service.py
@@ -18,6 +18,8 @@
The staging service here can be backed by any beam filesystem.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -26,6 +28,7 @@
import sys
import threading
import zipfile
+from typing import Iterator
from google.protobuf import json_format
@@ -48,39 +51,49 @@
return hashlib.sha256(string.encode('utf-8')).hexdigest()
def _join(self, *args):
+ # type: (*str) -> str
raise NotImplementedError(type(self))
def _dirname(self, path):
+ # type: (str) -> str
raise NotImplementedError(type(self))
def _temp_path(self, path):
+ # type: (str) -> str
return path + '.tmp'
def _open(self, path, mode):
raise NotImplementedError(type(self))
def _rename(self, src, dest):
+ # type: (str, str) -> None
raise NotImplementedError(type(self))
def _delete(self, path):
+ # type: (str) -> None
raise NotImplementedError(type(self))
def _artifact_path(self, retrieval_token, name):
+ # type: (str, str) -> str
return self._join(self._dirname(retrieval_token), self._sha256(name))
def _manifest_path(self, retrieval_token):
+ # type: (str) -> str
return retrieval_token
def _get_manifest_proxy(self, retrieval_token):
+ # type: (str) -> beam_artifact_api_pb2.ProxyManifest
with self._open(self._manifest_path(retrieval_token), 'r') as fin:
return json_format.Parse(
fin.read().decode('utf-8'), beam_artifact_api_pb2.ProxyManifest())
def retrieval_token(self, staging_session_token):
+ # type: (str) -> str
return self._join(
self._root, self._sha256(staging_session_token), 'MANIFEST')
def PutArtifact(self, request_iterator, context=None):
+ # type: (...) -> beam_artifact_api_pb2.PutArtifactResponse
first = True
for request in request_iterator:
if first:
@@ -104,7 +117,10 @@
self._rename(temp_path, artifact_path)
return beam_artifact_api_pb2.PutArtifactResponse()
- def CommitManifest(self, request, context=None):
+ def CommitManifest(self,
+ request, # type: beam_artifact_api_pb2.CommitManifestRequest
+ context=None):
+ # type: (...) -> beam_artifact_api_pb2.CommitManifestResponse
retrieval_token = self.retrieval_token(request.staging_session_token)
proxy_manifest = beam_artifact_api_pb2.ProxyManifest(
manifest=request.manifest,
@@ -118,11 +134,17 @@
return beam_artifact_api_pb2.CommitManifestResponse(
retrieval_token=retrieval_token)
- def GetManifest(self, request, context=None):
+ def GetManifest(self,
+ request, # type: beam_artifact_api_pb2.GetManifestRequest
+ context=None):
+ # type: (...) -> beam_artifact_api_pb2.GetManifestResponse
return beam_artifact_api_pb2.GetManifestResponse(
manifest=self._get_manifest_proxy(request.retrieval_token).manifest)
- def GetArtifact(self, request, context=None):
+ def GetArtifact(self,
+ request, # type: beam_artifact_api_pb2.GetArtifactRequest
+ context=None):
+ # type: (...) -> Iterator[beam_artifact_api_pb2.ArtifactChunk]
for artifact in self._get_manifest_proxy(request.retrieval_token).location:
if artifact.name == request.name:
with self._open(artifact.uri, 'r') as fin:
@@ -156,18 +178,23 @@
self._lock = threading.Lock()
def _join(self, *args):
+ # type: (*str) -> str
return '/'.join(args)
def _dirname(self, path):
+ # type: (str) -> str
return path.rsplit('/', 1)[0]
def _temp_path(self, path):
+ # type: (str) -> str
return path # ZipFile offers no move operation.
def _rename(self, src, dest):
+ # type: (str, str) -> None
assert src == dest
def _delete(self, path):
+ # type: (str) -> None
# ZipFile offers no delete operation: https://bugs.python.org/issue6818
pass
@@ -209,15 +236,19 @@
class BeamFilesystemArtifactService(AbstractArtifactService):
def _join(self, *args):
+ # type: (*str) -> str
return filesystems.FileSystems.join(*args)
def _dirname(self, path):
+ # type: (str) -> str
return filesystems.FileSystems.split(path)[0]
def _rename(self, src, dest):
+ # type: (str, str) -> None
filesystems.FileSystems.rename([src], [dest])
def _delete(self, path):
+ # type: (str) -> None
filesystems.FileSystems.delete([path])
def _open(self, path, mode='r'):
diff --git a/sdks/python/apache_beam/runners/portability/artifact_service_test.py b/sdks/python/apache_beam/runners/portability/artifact_service_test.py
index 6efb60d..9c59a6b 100644
--- a/sdks/python/apache_beam/runners/portability/artifact_service_test.py
+++ b/sdks/python/apache_beam/runners/portability/artifact_service_test.py
@@ -15,6 +15,8 @@
#
"""Test cases for :module:`artifact_service_client`."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/expansion_service.py b/sdks/python/apache_beam/runners/portability/expansion_service.py
index 55a526d..d47e59e 100644
--- a/sdks/python/apache_beam/runners/portability/expansion_service.py
+++ b/sdks/python/apache_beam/runners/portability/expansion_service.py
@@ -17,6 +17,8 @@
"""A PipelineExpansion service.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/expansion_service_test.py b/sdks/python/apache_beam/runners/portability/expansion_service_test.py
index 8426311..2919d2f 100644
--- a/sdks/python/apache_beam/runners/portability/expansion_service_test.py
+++ b/sdks/python/apache_beam/runners/portability/expansion_service_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/runners/portability/flink_runner.py b/sdks/python/apache_beam/runners/portability/flink_runner.py
index 4941cec..8096a3a 100644
--- a/sdks/python/apache_beam/runners/portability/flink_runner.py
+++ b/sdks/python/apache_beam/runners/portability/flink_runner.py
@@ -17,6 +17,8 @@
"""A runner for executing portable pipelines on Flink."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/flink_runner_test.py b/sdks/python/apache_beam/runners/portability/flink_runner_test.py
index 5102290..7d750df 100644
--- a/sdks/python/apache_beam/runners/portability/flink_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/flink_runner_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py b/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py
index b318971..5f0ae78 100644
--- a/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py
+++ b/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py
@@ -17,28 +17,22 @@
"""A job server submitting portable pipelines as uber jars to Flink."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
-import json
import logging
import os
-import shutil
import tempfile
import time
import zipfile
-from concurrent import futures
-import grpc
import requests
-from google.protobuf import json_format
from apache_beam.options import pipeline_options
-from apache_beam.portability.api import beam_artifact_api_pb2_grpc
from apache_beam.portability.api import beam_job_api_pb2
-from apache_beam.portability.api import endpoints_pb2
from apache_beam.runners.portability import abstract_job_service
-from apache_beam.runners.portability import artifact_service
from apache_beam.runners.portability import job_server
_LOGGER = logging.getLogger(__name__)
@@ -89,77 +83,24 @@
artifact_port=self._artifact_port)
-class FlinkBeamJob(abstract_job_service.AbstractBeamJob):
+class FlinkBeamJob(abstract_job_service.UberJarBeamJob):
"""Runs a single Beam job on Flink by staging all contents into a Jar
and uploading it via the Flink Rest API."""
- # These must agree with those defined in PortablePipelineJarUtils.java.
- PIPELINE_FOLDER = 'BEAM-PIPELINE'
- PIPELINE_MANIFEST = PIPELINE_FOLDER + '/pipeline-manifest.json'
-
- # We only stage a single pipeline in the jar.
- PIPELINE_NAME = 'pipeline'
- PIPELINE_PATH = '/'.join(
- [PIPELINE_FOLDER, PIPELINE_NAME, "pipeline.json"])
- PIPELINE_OPTIONS_PATH = '/'.join(
- [PIPELINE_FOLDER, PIPELINE_NAME, 'pipeline-options.json'])
- ARTIFACT_MANIFEST_PATH = '/'.join(
- [PIPELINE_FOLDER, PIPELINE_NAME, 'artifact-manifest.json'])
- ARTIFACT_FOLDER = '/'.join([PIPELINE_FOLDER, PIPELINE_NAME, 'artifacts'])
-
def __init__(
self, master_url, executable_jar, job_id, job_name, pipeline, options,
artifact_port=0):
- super(FlinkBeamJob, self).__init__(job_id, job_name, pipeline, options)
+ super(FlinkBeamJob, self).__init__(
+ executable_jar, job_id, job_name, pipeline, options,
+ artifact_port=artifact_port)
self._master_url = master_url
- self._executable_jar = executable_jar
- self._jar_uploaded = False
- self._artifact_port = artifact_port
-
- def prepare(self):
- # Copy the executable jar, injecting the pipeline and options as resources.
- with tempfile.NamedTemporaryFile(suffix='.jar') as tout:
- self._jar = tout.name
- shutil.copy(self._executable_jar, self._jar)
- with zipfile.ZipFile(self._jar, 'a', compression=zipfile.ZIP_DEFLATED) as z:
- with z.open(self.PIPELINE_PATH, 'w') as fout:
- fout.write(json_format.MessageToJson(
- self._pipeline_proto).encode('utf-8'))
- with z.open(self.PIPELINE_OPTIONS_PATH, 'w') as fout:
- fout.write(json_format.MessageToJson(
- self._pipeline_options).encode('utf-8'))
- with z.open(self.PIPELINE_MANIFEST, 'w') as fout:
- fout.write(json.dumps(
- {'defaultJobName': self.PIPELINE_NAME}).encode('utf-8'))
- self._start_artifact_service(self._jar, self._artifact_port)
-
- def _start_artifact_service(self, jar, requested_port):
- self._artifact_staging_service = artifact_service.ZipFileArtifactService(
- jar, self.ARTIFACT_FOLDER)
- self._artifact_staging_server = grpc.server(futures.ThreadPoolExecutor())
- port = self._artifact_staging_server.add_insecure_port(
- '[::]:%s' % requested_port)
- beam_artifact_api_pb2_grpc.add_ArtifactStagingServiceServicer_to_server(
- self._artifact_staging_service, self._artifact_staging_server)
- self._artifact_staging_endpoint = endpoints_pb2.ApiServiceDescriptor(
- url='localhost:%d' % port)
- self._artifact_staging_server.start()
- _LOGGER.info('Artifact server started on port %s', port)
- return port
-
- def _stop_artifact_service(self):
- self._artifact_staging_server.stop(1)
- self._artifact_staging_service.close()
- self._artifact_manifest_location = (
- self._artifact_staging_service.retrieval_token(self._job_id))
-
- def artifact_staging_endpoint(self):
- return self._artifact_staging_endpoint
def request(self, method, path, expected_status=200, **kwargs):
- response = method('%s/%s' % (self._master_url, path), **kwargs)
+ url = '%s/%s' % (self._master_url, path)
+ response = method(url, **kwargs)
if response.status_code != expected_status:
- raise RuntimeError(response.text)
+ raise RuntimeError("Request to %s failed with status %d: %s" %
+ (url, response.status_code, response.text))
if response.text:
return response.json()
diff --git a/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server_test.py b/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server_test.py
index 2c24cbd..ca5bfef 100644
--- a/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server_test.py
+++ b/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index 71343e5..6100efa 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -17,6 +17,8 @@
"""A PipelineRunner using the SDK harness.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
@@ -33,6 +35,21 @@
import time
import uuid
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import DefaultDict
+from typing import Dict
+from typing import Iterable
+from typing import Iterator
+from typing import List
+from typing import Mapping
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+from typing import Union
import grpc
@@ -79,6 +96,27 @@
from apache_beam.utils import windowed_value
from apache_beam.utils.thread_pool_executor import UnboundedThreadPoolExecutor
+if TYPE_CHECKING:
+ from google.protobuf import message # pylint: disable=ungrouped-imports
+ from apache_beam.pipeline import Pipeline
+ from apache_beam.coders.coder_impl import CoderImpl
+ from apache_beam.coders.coder_impl import WindowedValueCoderImpl
+ from apache_beam.portability.api import metrics_pb2
+ from apache_beam.transforms.window import BoundedWindow
+
+T = TypeVar('T')
+ConstructorFn = Callable[
+ [Union['message.Message', bytes],
+ 'FnApiRunner.StateServicer',
+ Optional['ExtendedProvisionInfo'],
+ 'GrpcServer'],
+ 'WorkerHandler']
+DataSideInput = Dict[Tuple[str, str],
+ Tuple[bytes, beam_runner_api_pb2.FunctionSpec]]
+DataOutput = Dict[str, bytes]
+BundleProcessResult = Tuple[beam_fn_api_pb2.InstructionResponse,
+ List[beam_fn_api_pb2.ProcessBundleSplitResponse]]
+
# This module is experimental. No backwards-compatibility guarantees.
ENCODED_IMPULSE_VALUE = beam.coders.WindowedValueCoder(
@@ -91,6 +129,9 @@
# The cache is disabled in production for other runners.
STATE_CACHE_SIZE = 100
+# Time-based flush is enabled in the fn_api_runner by default.
+DATA_BUFFER_TIME_LIMIT_MS = 1000
+
_LOGGER = logging.getLogger(__name__)
@@ -100,9 +141,9 @@
_lock = threading.Lock()
def __init__(self):
- self._push_queue = queue.Queue()
- self._input = None
- self._futures_by_id = dict()
+ self._push_queue = queue.Queue() # type: queue.Queue[beam_fn_api_pb2.InstructionRequest]
+ self._input = None # type: Optional[Iterable[beam_fn_api_pb2.InstructionResponse]]
+ self._futures_by_id = dict() # type: Dict[str, ControlFuture]
self._read_thread = threading.Thread(
name='beam_control_read', target=self._read)
self._state = BeamFnControlServicer.UNSTARTED_STATE
@@ -112,6 +153,7 @@
self._futures_by_id.pop(data.instruction_id).set(data)
def push(self, req):
+ # type: (...) -> Optional[ControlFuture]
if req == BeamFnControlServicer._DONE_MARKER:
self._push_queue.put(req)
return None
@@ -125,9 +167,11 @@
return future
def get_req(self):
+ # type: () -> beam_fn_api_pb2.InstructionRequest
return self._push_queue.get()
def set_input(self, input):
+ # type: (Iterable[beam_fn_api_pb2.InstructionResponse]) -> None
with ControlConnection._lock:
if self._input:
raise RuntimeError('input is already set.')
@@ -136,6 +180,7 @@
self._state = BeamFnControlServicer.STARTED_STATE
def close(self):
+ # type: () -> None
with ControlConnection._lock:
if self._state == BeamFnControlServicer.STARTED_STATE:
self.push(BeamFnControlServicer._DONE_MARKER)
@@ -161,13 +206,18 @@
self._req_sent = collections.defaultdict(int)
self._req_worker_mapping = {}
self._log_req = logging.getLogger().getEffectiveLevel() <= logging.DEBUG
- self._connections_by_worker_id = collections.defaultdict(ControlConnection)
+ self._connections_by_worker_id = collections.defaultdict(ControlConnection) # type: DefaultDict[str, ControlConnection]
def get_conn_by_worker_id(self, worker_id):
+ # type: (str) -> ControlConnection
with self._lock:
return self._connections_by_worker_id[worker_id]
- def Control(self, iterator, context):
+ def Control(self,
+ iterator, # type: Iterable[beam_fn_api_pb2.InstructionResponse]
+ context
+ ):
+ # type: (...) -> Iterator[beam_fn_api_pb2.InstructionRequest]
with self._lock:
if self._state == self.DONE_STATE:
return
@@ -202,20 +252,27 @@
class _ListBuffer(list):
"""Used to support parititioning of a list."""
def partition(self, n):
+ # type: (int) -> List[List[bytes]]
return [self[k::n] for k in range(n)]
class _GroupingBuffer(object):
"""Used to accumulate groupded (shuffled) results."""
- def __init__(self, pre_grouped_coder, post_grouped_coder, windowing):
+ def __init__(self,
+ pre_grouped_coder, # type: coders.Coder
+ post_grouped_coder, # type: coders.Coder
+ windowing
+ ):
+ # type: (...) -> None
self._key_coder = pre_grouped_coder.key_coder()
self._pre_grouped_coder = pre_grouped_coder
self._post_grouped_coder = post_grouped_coder
- self._table = collections.defaultdict(list)
+ self._table = collections.defaultdict(list) # type: Optional[DefaultDict[bytes, List[Any]]]
self._windowing = windowing
- self._grouped_output = None
+ self._grouped_output = None # type: Optional[List[List[bytes]]]
def append(self, elements_data):
+ # type: (bytes) -> None
if self._grouped_output:
raise RuntimeError('Grouping table append after read.')
input_stream = create_InputStream(elements_data)
@@ -232,6 +289,7 @@
else windowed_key_value.with_value(value))
def partition(self, n):
+ # type: (int) -> List[List[bytes]]
""" It is used to partition _GroupingBuffer to N parts. Once it is
partitioned, it would not be re-partitioned with diff N. Re-partition
is not supported now.
@@ -272,6 +330,7 @@
return self._grouped_output
def __iter__(self):
+ # type: () -> Iterator[bytes]
""" Since partition() returns a list of lists, add this __iter__ to return
a list to simplify code when we need to iterate through ALL elements of
_GroupingBuffer.
@@ -281,12 +340,16 @@
class _WindowGroupingBuffer(object):
"""Used to partition windowed side inputs."""
- def __init__(self, access_pattern, coder):
+ def __init__(self,
+ access_pattern,
+ coder # type: coders.WindowedValueCoder
+ ):
+ # type: (...) -> None
# Here's where we would use a different type of partitioning
# (e.g. also by key) for a different access pattern.
if access_pattern.urn == common_urns.side_inputs.ITERABLE.urn:
self._kv_extractor = lambda value: ('', value)
- self._key_coder = coders.SingletonCoder('')
+ self._key_coder = coders.SingletonCoder('') # type: coders.Coder
self._value_coder = coder.wrapped_value_coder
elif access_pattern.urn == common_urns.side_inputs.MULTIMAP.urn:
self._kv_extractor = lambda value: value
@@ -298,9 +361,10 @@
"Unknown access pattern: '%s'" % access_pattern.urn)
self._windowed_value_coder = coder
self._window_coder = coder.window_coder
- self._values_by_window = collections.defaultdict(list)
+ self._values_by_window = collections.defaultdict(list) # type: DefaultDict[Tuple[str, BoundedWindow], List[Any]]
def append(self, elements_data):
+ # type: (bytes) -> None
input_stream = create_InputStream(elements_data)
while input_stream.size() > 0:
windowed_value = self._windowed_value_coder.get_impl(
@@ -310,6 +374,7 @@
self._values_by_window[key, window].append(value)
def encoded_items(self):
+ # type: () -> Iterator[Tuple[bytes, bytes, bytes]]
value_coder_impl = self._value_coder.get_impl()
key_coder_impl = self._key_coder.get_impl()
for (key, window), values in self._values_by_window.items():
@@ -325,11 +390,12 @@
def __init__(
self,
- default_environment=None,
+ default_environment=None, # type: Optional[beam_runner_api_pb2.Environment]
bundle_repeat=0,
use_state_iterables=False,
- provision_info=None,
+ provision_info=None, # type: Optional[ExtendedProvisionInfo]
progress_request_frequency=None):
+ # type: (...) -> None
"""Creates a new Fn API Runner.
Args:
@@ -350,7 +416,7 @@
self._bundle_repeat = bundle_repeat
self._num_workers = 1
self._progress_frequency = progress_request_frequency
- self._profiler_factory = None
+ self._profiler_factory = None # type: Optional[Callable[..., profiler.Profile]]
self._use_state_iterables = use_state_iterables
self._provision_info = provision_info or ExtendedProvisionInfo(
beam_provision_api_pb2.ProvisionInfo(
@@ -362,7 +428,11 @@
self._last_uid += 1
return str(self._last_uid)
- def run_pipeline(self, pipeline, options):
+ def run_pipeline(self,
+ pipeline, # type: Pipeline
+ options # type: pipeline_options.PipelineOptions
+ ):
+ # type: (...) -> RunnerResult
RuntimeValueProvider.set_runtime_options({})
# Setup "beam_fn_api" experiment options if lacked.
@@ -382,6 +452,19 @@
pipeline_options.DirectOptions).direct_runner_bundle_repeat
self._num_workers = options.view_as(
pipeline_options.DirectOptions).direct_num_workers or self._num_workers
+
+ # set direct workers running mode if it is defined with pipeline options.
+ running_mode = \
+ options.view_as(pipeline_options.DirectOptions).direct_running_mode
+ if running_mode == 'multi_threading':
+ self._default_environment = environments.EmbeddedPythonGrpcEnvironment()
+ elif running_mode == 'multi_processing':
+ command_string = '%s -m apache_beam.runners.worker.sdk_worker_main' \
+ % sys.executable
+ self._default_environment = environments.SubprocessSDKEnvironment(
+ command_string=command_string
+ )
+
self._profiler_factory = profiler.Profile.factory_from_options(
options.view_as(pipeline_options.ProfilingOptions))
@@ -390,6 +473,7 @@
return self._latest_run_result
def run_via_runner_api(self, pipeline_proto):
+ # type: (beam_runner_api_pb2.Pipeline) -> RunnerResult
stage_context, stages = self.create_stages(pipeline_proto)
# TODO(pabloem, BEAM-7514): Create a watermark manager (that has access to
# the teststream (if any), and all the stages).
@@ -434,7 +518,10 @@
# Empty context.
yield
- def create_stages(self, pipeline_proto):
+ def create_stages(self,
+ pipeline_proto # type: beam_runner_api_pb2.Pipeline
+ ):
+ # type: (...) -> Tuple[fn_api_runner_transforms.TransformContext, List[fn_api_runner_transforms.Stage]]
return fn_api_runner_transforms.create_and_optimize_stages(
copy.deepcopy(pipeline_proto),
phases=[fn_api_runner_transforms.annotate_downstream_side_inputs,
@@ -454,7 +541,11 @@
common_urns.primitives.GROUP_BY_KEY.urn]),
use_state_iterables=self._use_state_iterables)
- def run_stages(self, stage_context, stages):
+ def run_stages(self,
+ stage_context, # type: fn_api_runner_transforms.TransformContext
+ stages # type: List[fn_api_runner_transforms.Stage]
+ ):
+ # type: (...) -> RunnerResult
"""Run a list of topologically-sorted stages in batch mode.
Args:
@@ -468,7 +559,7 @@
try:
with self.maybe_profile():
- pcoll_buffers = collections.defaultdict(_ListBuffer)
+ pcoll_buffers = collections.defaultdict(_ListBuffer) # type: DefaultDict[bytes, _ListBuffer]
for stage in stages:
stage_results = self._run_stage(
worker_handler_manager.get_worker_handlers,
@@ -485,11 +576,11 @@
runner.PipelineState.DONE, monitoring_infos_by_stage, metrics_by_stage)
def _store_side_inputs_in_state(self,
- worker_handler,
- context,
- pipeline_components,
- data_side_input,
- pcoll_buffers,
+ worker_handler, # type: WorkerHandler
+ context, # type: pipeline_context.PipelineContext
+ pipeline_components, # type: beam_runner_api_pb2.Components
+ data_side_input, # type: DataSideInput
+ pcoll_buffers, # type: Mapping[bytes, _ListBuffer]
safe_coders):
for (transform_id, tag), (buffer_id, si) in data_side_input.items():
_, pcoll_id = split_buffer_id(buffer_id)
@@ -521,9 +612,18 @@
"Unknown access pattern: '%s'" % si.urn)
def _run_bundle_multiple_times_for_testing(
- self, worker_handler_list, process_bundle_descriptor, data_input,
- data_output, get_input_coder_callable, cache_token_generator):
-
+ self,
+ worker_handler_list, # type: Sequence[WorkerHandler]
+ process_bundle_descriptor,
+ data_input,
+ data_output, # type: DataOutput
+ get_input_coder_callable,
+ cache_token_generator
+ ):
+ # type: (...) -> None
+ """
+ If bundle_repeat > 0, replay every bundle for profiling and debugging.
+ """
# all workers share state, so use any worker_handler.
worker_handler = worker_handler_list[0]
for k in range(self._bundle_repeat):
@@ -540,12 +640,14 @@
finally:
worker_handler.state.restore()
- def _collect_written_timers_and_add_to_deferred_inputs(self,
- context,
- pipeline_components,
- stage,
- get_buffer_callable,
- deferred_inputs):
+ def _collect_written_timers_and_add_to_deferred_inputs(
+ self,
+ context, # type: pipeline_context.PipelineContext
+ pipeline_components, # type: beam_runner_api_pb2.Components
+ stage, # type: fn_api_runner_transforms.Stage
+ get_buffer_callable,
+ deferred_inputs # type: DefaultDict[str, _ListBuffer]
+ ):
for transform_id, timer_writes in stage.timer_pcollections:
@@ -575,9 +677,15 @@
written_timers[:] = []
def _add_residuals_and_channel_splits_to_deferred_inputs(
- self, splits, get_input_coder_callable,
- input_for_callable, last_sent, deferred_inputs):
- prev_stops = {}
+ self,
+ splits, # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse]
+ get_input_coder_callable,
+ input_for_callable,
+ last_sent,
+ deferred_inputs # type: DefaultDict[str, _ListBuffer]
+ ):
+
+ prev_stops = {} # type: Dict[str, int]
for split in splits:
for delayed_application in split.residual_roots:
deferred_inputs[
@@ -613,12 +721,16 @@
@staticmethod
def _extract_stage_data_endpoints(
- stage, pipeline_components, data_api_service_descriptor, pcoll_buffers):
+ stage, # type: fn_api_runner_transforms.Stage
+ pipeline_components, # type: beam_runner_api_pb2.Components
+ data_api_service_descriptor,
+ pcoll_buffers # type: DefaultDict[bytes, _ListBuffer]
+ ):
# Returns maps of transform names to PCollection identifiers.
# Also mutates IO stages to point to the data ApiServiceDescriptor.
data_input = {}
- data_side_input = {}
- data_output = {}
+ data_side_input = {} # type: DataSideInput
+ data_output = {} # type: Dict[Tuple[str, str], bytes]
for transform in stage.transforms:
if transform.spec.urn in (bundle_processor.DATA_INPUT_URN,
bundle_processor.DATA_OUTPUT_URN):
@@ -652,16 +764,18 @@
return data_input, data_side_input, data_output
def _run_stage(self,
- worker_handler_factory,
- pipeline_components,
- stage,
- pcoll_buffers,
- safe_coders):
+ worker_handler_factory, # type: Callable[[Optional[str], int], List[WorkerHandler]]
+ pipeline_components, # type: beam_runner_api_pb2.Components
+ stage, # type: fn_api_runner_transforms.Stage
+ pcoll_buffers, # type: DefaultDict[bytes, _ListBuffer]
+ safe_coders
+ ):
+ # type: (...) -> beam_fn_api_pb2.InstructionResponse
"""Run an individual stage.
Args:
- worker_handler_factory: A ``callable`` that takes in an environment, and
- returns a ``WorkerHandler`` class.
+ worker_handler_factory: A ``callable`` that takes in an environment id
+ and a number of workers, and returns a list of ``WorkerHandler``s.
pipeline_components (beam_runner_api_pb2.Components): TODO
stage (fn_api_runner_transforms.Stage)
pcoll_buffers (collections.defaultdict of str: list): Mapping of
@@ -670,6 +784,7 @@
safe_coders (dict): TODO
"""
def iterable_state_write(values, element_coder_impl):
+ # type: (...) -> bytes
token = unique_name(None, 'iter').encode('ascii')
out = create_OutputStream()
for element in values:
@@ -774,6 +889,7 @@
result, splits = bundle_manager.process_bundle(data_input, data_output)
def input_for(transform_id, input_id):
+ # type: (str, str) -> str
input_pcoll = process_bundle_descriptor.transforms[
transform_id].inputs[input_id]
for read_id, proto in process_bundle_descriptor.transforms.items():
@@ -787,7 +903,7 @@
last_sent = data_input
while True:
- deferred_inputs = collections.defaultdict(_ListBuffer)
+ deferred_inputs = collections.defaultdict(_ListBuffer) # type: DefaultDict[str, _ListBuffer]
self._collect_written_timers_and_add_to_deferred_inputs(
context, pipeline_components, stage, get_buffer, deferred_inputs)
@@ -831,10 +947,12 @@
return result
@staticmethod
- def _extract_endpoints(stage,
- pipeline_components,
- data_api_service_descriptor,
- pcoll_buffers):
+ def _extract_endpoints(stage, # type: fn_api_runner_transforms.Stage
+ pipeline_components, # type: beam_runner_api_pb2.Components
+ data_api_service_descriptor, # type: Optional[endpoints_pb2.ApiServiceDescriptor]
+ pcoll_buffers # type: DefaultDict[bytes, _ListBuffer]
+ ):
+ # type: (...) -> Tuple[Dict[str, _ListBuffer], DataSideInput, DataOutput]
"""Returns maps of transform names to PCollection identifiers.
Also mutates IO stages to point to the data ApiServiceDescriptor.
@@ -853,9 +971,9 @@
PCollection buffer; `data_output` is a dictionary mapping
(transform_name, output_name) to a PCollection ID.
"""
- data_input = {}
- data_side_input = {}
- data_output = {}
+ data_input = {} # type: Dict[str, _ListBuffer]
+ data_side_input = {} # type: DataSideInput
+ data_output = {} # type: DataOutput
for transform in stage.transforms:
if transform.spec.urn in (bundle_processor.DATA_INPUT_URN,
bundle_processor.DATA_OUTPUT_URN):
@@ -904,6 +1022,7 @@
self._underlying, self._overlay, key)
def __delitem__(self, key):
+ # type: (bytes) -> None
self._overlay[key] = []
def commit(self):
@@ -917,19 +1036,21 @@
self._key = key
def __iter__(self):
+ # type: () -> Iterator[bytes]
if self._key in self._overlay:
return iter(self._overlay[self._key])
else:
return iter(self._underlying[self._key])
def append(self, item):
+ # type: (bytes) -> None
if self._key not in self._overlay:
self._overlay[self._key] = list(self._underlying[self._key])
self._overlay[self._key].append(item)
def __init__(self):
self._lock = threading.Lock()
- self._state = collections.defaultdict(list)
+ self._state = collections.defaultdict(list) # type: DefaultDict[bytes, List[bytes]]
self._checkpoint = None
self._use_continuation_tokens = False
self._continuations = {}
@@ -952,7 +1073,11 @@
def process_instruction_id(self, unused_instruction_id):
yield
- def get_raw(self, state_key, continuation_token=None):
+ def get_raw(self,
+ state_key, # type: beam_fn_api_pb2.StateKey
+ continuation_token=None # type: Optional[bytes]
+ ):
+ # type: (...) -> Tuple[bytes, Optional[bytes]]
with self._lock:
full_state = self._state[self._to_key(state_key)]
if self._use_continuation_tokens:
@@ -973,12 +1098,17 @@
assert not continuation_token
return b''.join(full_state), None
- def append_raw(self, state_key, data):
+ def append_raw(self,
+ state_key, # type: beam_fn_api_pb2.StateKey
+ data # type: bytes
+ ):
+ # type: (...) -> _Future
with self._lock:
self._state[self._to_key(state_key)].append(data)
return _Future.done()
def clear(self, state_key):
+ # type: (beam_fn_api_pb2.StateKey) -> _Future
with self._lock:
try:
del self._state[self._to_key(state_key)]
@@ -992,13 +1122,19 @@
@staticmethod
def _to_key(state_key):
+ # type: (beam_fn_api_pb2.StateKey) -> bytes
return state_key.SerializeToString()
class GrpcStateServicer(beam_fn_api_pb2_grpc.BeamFnStateServicer):
def __init__(self, state):
+ # type: (FnApiRunner.StateServicer) -> None
self._state = state
- def State(self, request_stream, context=None):
+ def State(self,
+ request_stream, # type: Iterable[beam_fn_api_pb2.StateRequest]
+ context=None
+ ):
+ # type: (...) -> Iterator[beam_fn_api_pb2.StateResponse]
# Note that this eagerly mutates state, assuming any failures are fatal.
# Thus it is safe to ignore instruction_id.
for request in request_stream:
@@ -1027,13 +1163,16 @@
"""A singleton cache for a StateServicer."""
def __init__(self, state_handler):
+ # type: (sdk_worker.StateHandler) -> None
self._state_handler = state_handler
def create_state_handler(self, api_service_descriptor):
+ # type: (endpoints_pb2.ApiServiceDescriptor) -> sdk_worker.StateHandler
"""Returns the singleton state handler."""
return self._state_handler
def close(self):
+ # type: (...) -> None
"""Does nothing."""
pass
@@ -1086,12 +1225,17 @@
it.
"""
- _registered_environments = {}
+ _registered_environments = {} # type: Dict[str, Tuple[ConstructorFn, type]]
_worker_id_counter = -1
_lock = threading.Lock()
- def __init__(
- self, control_handler, data_plane_handler, state, provision_info):
+ def __init__(self,
+ control_handler,
+ data_plane_handler,
+ state, # type: FnApiRunner.StateServicer
+ provision_info # type: Optional[ExtendedProvisionInfo]
+ ):
+ # type: (...) -> None
"""Initialize a WorkerHandler.
Args:
@@ -1110,32 +1254,48 @@
self.worker_id = 'worker_%s' % WorkerHandler._worker_id_counter
def close(self):
+ # type: () -> None
self.stop_worker()
def start_worker(self):
+ # type: () -> None
raise NotImplementedError
def stop_worker(self):
+ # type: () -> None
raise NotImplementedError
def data_api_service_descriptor(self):
+ # type: () -> Optional[endpoints_pb2.ApiServiceDescriptor]
raise NotImplementedError
def state_api_service_descriptor(self):
+ # type: () -> Optional[endpoints_pb2.ApiServiceDescriptor]
raise NotImplementedError
def logging_api_service_descriptor(self):
+ # type: () -> Optional[endpoints_pb2.ApiServiceDescriptor]
raise NotImplementedError
@classmethod
- def register_environment(cls, urn, payload_type):
+ def register_environment(cls,
+ urn, # type: str
+ payload_type # type: Optional[Type[T]]
+ ):
+ # type: (...) -> Callable[[Callable[[T, FnApiRunner.StateServicer, Optional[ExtendedProvisionInfo], GrpcServer], WorkerHandler]], Callable[[T, FnApiRunner.StateServicer, Optional[ExtendedProvisionInfo], GrpcServer], WorkerHandler]]
def wrapper(constructor):
cls._registered_environments[urn] = constructor, payload_type
return constructor
return wrapper
@classmethod
- def create(cls, environment, state, provision_info, grpc_server):
+ def create(cls,
+ environment, # type: beam_runner_api_pb2.Environment
+ state, # type: FnApiRunner.StateServicer
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ grpc_server # type: GrpcServer
+ ):
+ # type: (...) -> WorkerHandler
constructor, payload_type = cls._registered_environments[environment.urn]
return constructor(
proto_utils.parse_Bytes(environment.payload, payload_type),
@@ -1148,11 +1308,16 @@
class EmbeddedWorkerHandler(WorkerHandler):
"""An in-memory worker_handler for fn API control, state and data planes."""
- def __init__(self, unused_payload, state, provision_info,
- unused_grpc_server=None):
+ def __init__(self,
+ unused_payload, # type: None
+ state,
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ unused_grpc_server=None
+ ):
+ # type: (...) -> None
super(EmbeddedWorkerHandler, self).__init__(
self, data_plane.InMemoryDataChannel(), state, provision_info)
- self.control_conn = self
+ self.control_conn = self # type: ignore # need Protocol to describe this
self.data_conn = self.data_plane_handler
state_cache = StateCache(STATE_CACHE_SIZE)
self.bundle_processor_cache = sdk_worker.BundleProcessorCache(
@@ -1174,21 +1339,27 @@
return ControlFuture(request.instruction_id, response)
def start_worker(self):
+ # type: () -> None
pass
def stop_worker(self):
+ # type: () -> None
self.bundle_processor_cache.shutdown()
def done(self):
+ # type: () -> None
pass
def data_api_service_descriptor(self):
+ # type: () -> None
return None
def state_api_service_descriptor(self):
+ # type: () -> None
return None
def logging_api_service_descriptor(self):
+ # type: () -> None
return None
@@ -1216,9 +1387,11 @@
beam_provision_api_pb2_grpc.ProvisionServiceServicer):
def __init__(self, info):
+ # type: (Optional[beam_provision_api_pb2.ProvisionInfo]) -> None
self._info = info
def GetProvisionInfo(self, request, context=None):
+ # type: (...) -> beam_provision_api_pb2.GetProvisionInfoResponse
return beam_provision_api_pb2.GetProvisionInfoResponse(
info=self._info)
@@ -1238,7 +1411,11 @@
_DEFAULT_SHUTDOWN_TIMEOUT_SECS = 5
- def __init__(self, state, provision_info):
+ def __init__(self,
+ state, # type: FnApiRunner.StateServicer
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ ):
+ # type: (...) -> None
self.state = state
self.provision_info = provision_info
self.control_server = grpc.server(UnboundedThreadPoolExecutor())
@@ -1277,13 +1454,15 @@
if self.provision_info.artifact_staging_dir:
service = artifact_service.BeamFilesystemArtifactService(
- self.provision_info.artifact_staging_dir)
+ self.provision_info.artifact_staging_dir
+ ) # type: beam_artifact_api_pb2_grpc.ArtifactRetrievalServiceServicer
else:
service = EmptyArtifactRetrievalService()
beam_artifact_api_pb2_grpc.add_ArtifactRetrievalServiceServicer_to_server(
service, self.control_server)
- self.data_plane_handler = data_plane.BeamFnDataServicer()
+ self.data_plane_handler = data_plane.BeamFnDataServicer(
+ DATA_BUFFER_TIME_LIMIT_MS)
beam_fn_api_pb2_grpc.add_BeamFnDataServicer_to_server(
self.data_plane_handler, self.data_server)
@@ -1323,7 +1502,12 @@
class GrpcWorkerHandler(WorkerHandler):
"""An grpc based worker_handler for fn API control, state and data planes."""
- def __init__(self, state, provision_info, grpc_server):
+ def __init__(self,
+ state, # type: FnApiRunner.StateServicer
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ grpc_server # type: GrpcServer
+ ):
+ # type: (...) -> None
self._grpc_server = grpc_server
super(GrpcWorkerHandler, self).__init__(
self._grpc_server.control_handler, self._grpc_server.data_plane_handler,
@@ -1338,14 +1522,17 @@
self.worker_id)
def data_api_service_descriptor(self):
+ # type: () -> endpoints_pb2.ApiServiceDescriptor
return endpoints_pb2.ApiServiceDescriptor(
url=self.port_from_worker(self._grpc_server.data_port))
def state_api_service_descriptor(self):
+ # type: () -> endpoints_pb2.ApiServiceDescriptor
return endpoints_pb2.ApiServiceDescriptor(
url=self.port_from_worker(self._grpc_server.state_port))
def logging_api_service_descriptor(self):
+ # type: () -> endpoints_pb2.ApiServiceDescriptor
return endpoints_pb2.ApiServiceDescriptor(
url=self.port_from_worker(self._grpc_server.logging_port))
@@ -1364,26 +1551,37 @@
@WorkerHandler.register_environment(
common_urns.environments.EXTERNAL.urn, beam_runner_api_pb2.ExternalPayload)
class ExternalWorkerHandler(GrpcWorkerHandler):
- def __init__(self, external_payload, state, provision_info, grpc_server):
+ def __init__(self,
+ external_payload, # type: beam_runner_api_pb2.ExternalPayload
+ state, # type: FnApiRunner.StateServicer
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ grpc_server # type: GrpcServer
+ ):
+ # type: (...) -> None
super(ExternalWorkerHandler, self).__init__(state, provision_info,
grpc_server)
self._external_payload = external_payload
def start_worker(self):
+ # type: () -> None
stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub(
GRPCChannelFactory.insecure_channel(
self._external_payload.endpoint.url))
+ control_descriptor = endpoints_pb2.ApiServiceDescriptor(
+ url=self.control_address)
response = stub.StartWorker(
beam_fn_api_pb2.StartWorkerRequest(
worker_id=self.worker_id,
- control_endpoint=endpoints_pb2.ApiServiceDescriptor(
- url=self.control_address),
+ control_endpoint=control_descriptor,
+ artifact_endpoint=control_descriptor,
+ provision_endpoint=control_descriptor,
logging_endpoint=self.logging_api_service_descriptor(),
params=self._external_payload.params))
if response.error:
raise RuntimeError("Error starting worker: %s" % response.error)
def stop_worker(self):
+ # type: () -> None
pass
def host_from_worker(self):
@@ -1396,18 +1594,29 @@
@WorkerHandler.register_environment(python_urns.EMBEDDED_PYTHON_GRPC, bytes)
class EmbeddedGrpcWorkerHandler(GrpcWorkerHandler):
- def __init__(self, payload, state, provision_info, grpc_server):
+ def __init__(self,
+ payload, # type: bytes
+ state, # type: FnApiRunner.StateServicer
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ grpc_server # type: GrpcServer
+ ):
+ # type: (...) -> None
super(EmbeddedGrpcWorkerHandler, self).__init__(state, provision_info,
grpc_server)
- if payload:
- state_cache_size = payload.decode('ascii')
- self._state_cache_size = int(state_cache_size)
- else:
- self._state_cache_size = STATE_CACHE_SIZE
+
+ from apache_beam.transforms.environments import EmbeddedPythonGrpcEnvironment
+ config = EmbeddedPythonGrpcEnvironment.parse_config(
+ payload.decode('utf-8'))
+ self._state_cache_size = config.get('state_cache_size') or STATE_CACHE_SIZE
+ self._data_buffer_time_limit_ms = \
+ config.get('data_buffer_time_limit_ms') or DATA_BUFFER_TIME_LIMIT_MS
def start_worker(self):
+ # type: () -> None
self.worker = sdk_worker.SdkHarness(
- self.control_address, state_cache_size=self._state_cache_size,
+ self.control_address,
+ state_cache_size=self._state_cache_size,
+ data_buffer_time_limit_ms=self._data_buffer_time_limit_ms,
worker_id=self.worker_id)
self.worker_thread = threading.Thread(
name='run_worker', target=self.worker.run)
@@ -1415,6 +1624,7 @@
self.worker_thread.start()
def stop_worker(self):
+ # type: () -> None
self.worker_thread.join()
@@ -1425,12 +1635,19 @@
@WorkerHandler.register_environment(python_urns.SUBPROCESS_SDK, bytes)
class SubprocessSdkWorkerHandler(GrpcWorkerHandler):
- def __init__(self, worker_command_line, state, provision_info, grpc_server):
+ def __init__(self,
+ worker_command_line, # type: bytes
+ state, # type: FnApiRunner.StateServicer
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ grpc_server # type: GrpcServer
+ ):
+ # type: (...) -> None
super(SubprocessSdkWorkerHandler, self).__init__(state, provision_info,
grpc_server)
self._worker_command_line = worker_command_line
def start_worker(self):
+ # type: () -> None
from apache_beam.runners.portability import local_job_service
self.worker = local_job_service.SubprocessSdkWorker(
self._worker_command_line, self.control_address, self.worker_id)
@@ -1439,17 +1656,24 @@
self.worker_thread.start()
def stop_worker(self):
+ # type: () -> None
self.worker_thread.join()
@WorkerHandler.register_environment(common_urns.environments.DOCKER.urn,
beam_runner_api_pb2.DockerPayload)
class DockerSdkWorkerHandler(GrpcWorkerHandler):
- def __init__(self, payload, state, provision_info, grpc_server):
+ def __init__(self,
+ payload, # type: beam_runner_api_pb2.DockerPayload
+ state, # type: FnApiRunner.StateServicer
+ provision_info, # type: Optional[ExtendedProvisionInfo]
+ grpc_server # type: GrpcServer
+ ):
+ # type: (...) -> None
super(DockerSdkWorkerHandler, self).__init__(state, provision_info,
grpc_server)
self._container_image = payload.container_image
- self._container_id = None
+ self._container_id = None # type: Optional[bytes]
def host_from_worker(self):
if sys.platform == "darwin":
@@ -1459,6 +1683,7 @@
return super(DockerSdkWorkerHandler, self).host_from_worker()
def start_worker(self):
+ # type: () -> None
with SUBPROCESS_LOCK:
try:
subprocess.check_call(['docker', 'pull', self._container_image])
@@ -1500,6 +1725,7 @@
time.sleep(1)
def stop_worker(self):
+ # type: () -> None
if self._container_id:
with SUBPROCESS_LOCK:
subprocess.call([
@@ -1509,14 +1735,27 @@
class WorkerHandlerManager(object):
- def __init__(self, environments, job_provision_info):
+ """
+ Manages creation of ``WorkerHandler``s.
+
+ Caches ``WorkerHandler``s based on environment id.
+ """
+ def __init__(self,
+ environments, # type: Mapping[str, beam_runner_api_pb2.Environment]
+ job_provision_info # type: Optional[ExtendedProvisionInfo]
+ ):
+ # type: (...) -> None
self._environments = environments
self._job_provision_info = job_provision_info
- self._cached_handlers = collections.defaultdict(list)
+ self._cached_handlers = collections.defaultdict(list) # type: DefaultDict[str, List[WorkerHandler]]
self._state = FnApiRunner.StateServicer() # rename?
- self._grpc_server = None
+ self._grpc_server = None # type: Optional[GrpcServer]
- def get_worker_handlers(self, environment_id, num_workers):
+ def get_worker_handlers(self,
+ environment_id, # type: Optional[str]
+ num_workers # type: int
+ ):
+ # type: (...) -> List[WorkerHandler]
if environment_id is None:
# Any environment will do, pick one arbitrarily.
environment_id = next(iter(self._environments.keys()))
@@ -1555,7 +1794,10 @@
class ExtendedProvisionInfo(object):
- def __init__(self, provision_info=None, artifact_staging_dir=None):
+ def __init__(self,
+ provision_info=None, # type: Optional[beam_provision_api_pb2.ProvisionInfo]
+ artifact_staging_dir=None
+ ):
self.provision_info = (
provision_info or beam_provision_api_pb2.ProvisionInfo())
self.artifact_staging_dir = artifact_staging_dir
@@ -1597,10 +1839,15 @@
_uid_counter = 0
_lock = threading.Lock()
- def __init__(
- self, worker_handler_list, get_buffer, get_input_coder_impl,
- bundle_descriptor, progress_frequency=None, skip_registration=False,
- cache_token_generator=FnApiRunner.get_cache_token_generator()):
+ def __init__(self,
+ worker_handler_list, # type: Sequence[WorkerHandler]
+ get_buffer, # type: Callable[[bytes], list]
+ get_input_coder_impl, # type: Callable[[str], CoderImpl]
+ bundle_descriptor, # type: beam_fn_api_pb2.ProcessBundleDescriptor
+ progress_frequency=None,
+ skip_registration=False,
+ cache_token_generator=FnApiRunner.get_cache_token_generator()
+ ):
"""Set up a bundle manager.
Args:
@@ -1617,13 +1864,14 @@
self._bundle_descriptor = bundle_descriptor
self._registered = skip_registration
self._progress_frequency = progress_frequency
- self._worker_handler = None
+ self._worker_handler = None # type: Optional[WorkerHandler]
self._cache_token_generator = cache_token_generator
def _send_input_to_worker(self,
- process_bundle_id,
- read_transform_id,
- byte_streams):
+ process_bundle_id, # type: str
+ read_transform_id, # type: str
+ byte_streams
+ ):
data_out = self._worker_handler.data_conn.output_stream(
process_bundle_id, read_transform_id)
for byte_stream in byte_streams:
@@ -1631,6 +1879,7 @@
data_out.close()
def _register_bundle_descriptor(self):
+ # type: () -> Optional[ControlFuture]
if self._registered:
registration_future = None
else:
@@ -1659,9 +1908,10 @@
def _generate_splits_for_testing(self,
split_manager,
- inputs,
+ inputs, # type: Mapping[str, _ListBuffer]
process_bundle_id):
- split_results = []
+ # type: (...) -> List[beam_fn_api_pb2.ProcessBundleSplitResponse]
+ split_results = [] # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse]
read_transform_id, buffer_data = only_element(inputs.items())
byte_stream = b''.join(buffer_data)
@@ -1696,7 +1946,7 @@
estimated_input_elements=num_elements)
}))
split_response = self._worker_handler.control_conn.push(
- split_request).get()
+ split_request).get() # type: beam_fn_api_pb2.InstructionResponse
for t in (0.05, 0.1, 0.2):
waiting = ('Instruction not running', 'not yet scheduled')
if any(msg in split_response.error for msg in waiting):
@@ -1717,7 +1967,11 @@
break
return split_results
- def process_bundle(self, inputs, expected_outputs):
+ def process_bundle(self,
+ inputs, # type: Mapping[str, _ListBuffer]
+ expected_outputs # type: DataOutput
+ ):
+ # type: (...) -> BundleProcessResult
# Unique id for the instruction processing this bundle.
with BundleManager._lock:
BundleManager._uid_counter += 1
@@ -1746,7 +2000,7 @@
cache_tokens=[next(self._cache_token_generator)]))
result_future = self._worker_handler.control_conn.push(process_bundle_req)
- split_results = []
+ split_results = [] # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse]
with ProgressRequester(
self._worker_handler, process_bundle_id, self._progress_frequency):
@@ -1766,7 +2020,7 @@
expected_outputs[output.transform_id]).append(output.data)
_LOGGER.debug('Wait for the bundle %s to finish.' % process_bundle_id)
- result = result_future.get()
+ result = result_future.get() # type: beam_fn_api_pb2.InstructionResponse
if result.error:
raise RuntimeError(result.error)
@@ -1785,23 +2039,34 @@
class ParallelBundleManager(BundleManager):
def __init__(
- self, worker_handler_list, get_buffer, get_input_coder_impl,
- bundle_descriptor, progress_frequency=None, skip_registration=False,
- cache_token_generator=None, **kwargs):
+ self,
+ worker_handler_list, # type: Sequence[WorkerHandler]
+ get_buffer, # type: Callable[[bytes], list]
+ get_input_coder_impl, # type: Callable[[str], CoderImpl]
+ bundle_descriptor, # type: beam_fn_api_pb2.ProcessBundleDescriptor
+ progress_frequency=None,
+ skip_registration=False,
+ cache_token_generator=None,
+ **kwargs):
+ # type: (...) -> None
super(ParallelBundleManager, self).__init__(
worker_handler_list, get_buffer, get_input_coder_impl,
bundle_descriptor, progress_frequency, skip_registration,
cache_token_generator=cache_token_generator)
self._num_workers = kwargs.pop('num_workers', 1)
- def process_bundle(self, inputs, expected_outputs):
- part_inputs = [{} for _ in range(self._num_workers)]
+ def process_bundle(self,
+ inputs, # type: Mapping[str, _ListBuffer]
+ expected_outputs # type: DataOutput
+ ):
+ # type: (...) -> BundleProcessResult
+ part_inputs = [{} for _ in range(self._num_workers)] # type: List[Dict[str, List[bytes]]]
for name, input in inputs.items():
for ix, part in enumerate(input.partition(self._num_workers)):
part_inputs[ix][name] = part
- merged_result = None
- split_result_list = []
+ merged_result = None # type: Optional[beam_fn_api_pb2.InstructionResponse]
+ split_result_list = [] # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse]
with UnboundedThreadPoolExecutor() as executor:
for result, split_result in executor.map(lambda part: BundleManager(
self._worker_handler_list, self._get_buffer,
@@ -1831,7 +2096,13 @@
A callback can be passed to call with progress updates.
"""
- def __init__(self, worker_handler, instruction_id, frequency, callback=None):
+ def __init__(self,
+ worker_handler, # type: WorkerHandler
+ instruction_id,
+ frequency,
+ callback=None
+ ):
+ # type: (...) -> None
super(ProgressRequester, self).__init__()
self._worker_handler = worker_handler
self._instruction_id = instruction_id
@@ -1929,6 +2200,7 @@
self.GAUGES: gauges}
def monitoring_infos(self):
+ # type: () -> List[metrics_pb2.MonitoringInfo]
return [item for sublist in self._monitoring_infos.values() for item in
sublist]
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
index ef09b1f..365fa4e 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
@@ -31,12 +33,14 @@
import unittest
import uuid
from builtins import range
+from typing import Dict
# patches unittest.TestCase to be python3 compatible
import future.tests.base # pylint: disable=unused-import
import hamcrest # pylint: disable=ungrouped-imports
from hamcrest.core.matcher import Matcher
from hamcrest.core.string_description import StringDescription
+from nose.plugins.attrib import attr
from tenacity import retry
from tenacity import stop_after_attempt
@@ -596,7 +600,8 @@
with self.create_pipeline() as p:
big = (p
| beam.Create(['a', 'a', 'b'])
- | beam.Map(lambda x: (x, x * data_plane._DEFAULT_FLUSH_THRESHOLD)))
+ | beam.Map(lambda x: (
+ x, x * data_plane._DEFAULT_SIZE_FLUSH_THRESHOLD)))
side_input_res = (
big
@@ -1152,7 +1157,8 @@
return beam.Pipeline(
runner=fn_api_runner.FnApiRunner(
default_environment=environments.EmbeddedPythonGrpcEnvironment(
- state_cache_size=0)))
+ state_cache_size=0,
+ data_buffer_time_limit_ms=0)))
class FnApiRunnerTestWithMultiWorkers(FnApiRunnerTest):
@@ -1179,10 +1185,10 @@
class FnApiRunnerTestWithGrpcAndMultiWorkers(FnApiRunnerTest):
def create_pipeline(self):
- pipeline_options = PipelineOptions(direct_num_workers=2)
+ pipeline_options = PipelineOptions(direct_num_workers=2,
+ direct_running_mode='multi_threading')
p = beam.Pipeline(
- runner=fn_api_runner.FnApiRunner(
- default_environment=environments.EmbeddedPythonGrpcEnvironment()),
+ runner=fn_api_runner.FnApiRunner(),
options=pipeline_options)
#TODO(BEAM-8444): Fix these tests..
p.options.view_as(DebugOptions).experiments.remove('beam_fn_api')
@@ -1485,7 +1491,7 @@
return _unpickle_element_counter, (name,)
-_pickled_element_counters = {}
+_pickled_element_counters = {} # type: Dict[str, ElementCounter]
def _unpickle_element_counter(name):
@@ -1539,10 +1545,10 @@
class FnApiRunnerSplitTestWithMultiWorkers(FnApiRunnerSplitTest):
def create_pipeline(self):
- pipeline_options = PipelineOptions(direct_num_workers=2)
+ pipeline_options = PipelineOptions(direct_num_workers=2,
+ direct_running_mode='multi_threading')
p = beam.Pipeline(
- runner=fn_api_runner.FnApiRunner(
- default_environment=environments.EmbeddedPythonGrpcEnvironment()),
+ runner=fn_api_runner.FnApiRunner(),
options=pipeline_options)
#TODO(BEAM-8444): Fix these tests..
p.options.view_as(DebugOptions).experiments.remove('beam_fn_api')
@@ -1585,6 +1591,46 @@
'.*There has been a processing lull of over.*',
'Unable to find a lull logged for this job.')
+class StateBackedTestElementType(object):
+ live_element_count = 0
+
+ def __init__(self, num_elements, unused):
+ self.num_elements = num_elements
+ StateBackedTestElementType.live_element_count += 1
+ # Due to using state backed iterable, we expect there is a few instances
+ # alive at any given time.
+ if StateBackedTestElementType.live_element_count > 5:
+ raise RuntimeError('Too many live instances.')
+
+ def __del__(self):
+ StateBackedTestElementType.live_element_count -= 1
+
+ def __reduce__(self):
+ return (self.__class__, (self.num_elements, 'x' * self.num_elements))
+
+@attr('ValidatesRunner')
+class FnApiBasedStateBackedCoderTest(unittest.TestCase):
+
+ def create_pipeline(self):
+ return beam.Pipeline(
+ runner=fn_api_runner.FnApiRunner(use_state_iterables=True))
+
+ def test_gbk_many_values(self):
+ with self.create_pipeline() as p:
+ # The number of integers could be a knob to test against
+ # different runners' default settings on page size.
+ VALUES_PER_ELEMENT = 300
+ NUM_OF_ELEMENTS = 200
+
+ r = (p
+ | beam.Create([None])
+ | beam.FlatMap(
+ lambda x: ((1, StateBackedTestElementType(VALUES_PER_ELEMENT, _))
+ for _ in range(NUM_OF_ELEMENTS)))
+ | beam.GroupByKey()
+ | beam.MapTuple(lambda _, vs: sum(e.num_elements for e in vs)))
+
+ assert_that(r, equal_to([VALUES_PER_ELEMENT * NUM_OF_ELEMENTS]))
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner_transforms.py b/sdks/python/apache_beam/runners/portability/fn_api_runner_transforms.py
index 931f199..97c6cfa 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner_transforms.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner_transforms.py
@@ -17,6 +17,8 @@
"""Pipeline transformations for the FnApiRunner.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
@@ -24,6 +26,17 @@
import functools
import logging
from builtins import object
+from typing import Container
+from typing import DefaultDict
+from typing import Dict
+from typing import FrozenSet
+from typing import Iterable
+from typing import Iterator
+from typing import List
+from typing import Optional
+from typing import Set
+from typing import Tuple
+from typing import TypeVar
from past.builtins import unicode
@@ -34,6 +47,8 @@
from apache_beam.runners.worker import bundle_processor
from apache_beam.utils import proto_utils
+T = TypeVar('T')
+
# This module is experimental. No backwards-compatibility guarantees.
_LOGGER = logging.getLogger(__name__)
@@ -45,10 +60,12 @@
common_urns.primitives.PAR_DO.urn, # After SDF expansion.
])
+
COMBINE_URNS = frozenset([
common_urns.composites.COMBINE_PER_KEY.urn,
])
+
PAR_DO_URNS = frozenset([
common_urns.primitives.PAR_DO.urn,
common_urns.sdf_components.PAIR_WITH_RESTRICTION.urn,
@@ -58,19 +75,26 @@
common_urns.sdf_components.PROCESS_ELEMENTS.urn,
])
+
IMPULSE_BUFFER = b'impulse'
class Stage(object):
"""A set of Transforms that can be sent to the worker for processing."""
- def __init__(self, name, transforms,
- downstream_side_inputs=None, must_follow=frozenset(),
- parent=None, environment=None, forced_root=False):
+ def __init__(self,
+ name, # type: str
+ transforms, # type: List[beam_runner_api_pb2.PTransform]
+ downstream_side_inputs=None, # type: Optional[FrozenSet[str]]
+ must_follow=frozenset(), # type: FrozenSet[Stage]
+ parent=None, # type: Optional[Stage]
+ environment=None, # type: Optional[str]
+ forced_root=False
+ ):
self.name = name
self.transforms = transforms
self.downstream_side_inputs = downstream_side_inputs
self.must_follow = must_follow
- self.timer_pcollections = []
+ self.timer_pcollections = [] # type: List[Tuple[str, str]]
self.parent = parent
if environment is None:
environment = functools.reduce(
@@ -95,19 +119,15 @@
@staticmethod
def _extract_environment(transform):
- if transform.spec.urn in PAR_DO_URNS:
- pardo_payload = proto_utils.parse_Bytes(
- transform.spec.payload, beam_runner_api_pb2.ParDoPayload)
- return pardo_payload.do_fn.environment_id
- elif transform.spec.urn in COMBINE_URNS:
- combine_payload = proto_utils.parse_Bytes(
- transform.spec.payload, beam_runner_api_pb2.CombinePayload)
- return combine_payload.combine_fn.environment_id
- else:
- return None
+
+ # type: (beam_runner_api_pb2.PTransform) -> Optional[str]
+ environment = transform.environment_id
+ if environment:
+ return environment
@staticmethod
def _merge_environments(env1, env2):
+ # type: (Optional[str], Optional[str]) -> Optional[str]
if env1 is None:
return env2
elif env2 is None:
@@ -120,6 +140,7 @@
return env1
def can_fuse(self, consumer, context):
+ # type: (Stage, TransformContext) -> bool
try:
self._merge_environments(self.environment, consumer.environment)
except ValueError:
@@ -136,6 +157,7 @@
and no_overlap(self.downstream_side_inputs, consumer.side_inputs()))
def fuse(self, other):
+ # type: (Stage) -> Stage
return Stage(
"(%s)+(%s)" % (self.name, other.name),
self.transforms + other.transforms,
@@ -147,10 +169,12 @@
forced_root=self.forced_root or other.forced_root)
def is_runner_urn(self, context):
+ # type: (TransformContext) -> bool
return any(transform.spec.urn in context.known_runner_urns
for transform in self.transforms)
def side_inputs(self):
+ # type: () -> Iterator[str]
for transform in self.transforms:
if transform.spec.urn in PAR_DO_URNS:
payload = proto_utils.parse_Bytes(
@@ -171,7 +195,8 @@
return True
def deduplicate_read(self):
- seen_pcolls = set()
+ # type: () -> None
+ seen_pcolls = set() # type: Set[str]
new_transforms = []
for transform in self.transforms:
if transform.spec.urn == bundle_processor.DATA_INPUT_URN:
@@ -182,8 +207,12 @@
new_transforms.append(transform)
self.transforms = new_transforms
- def executable_stage_transform(
- self, known_runner_urns, all_consumers, components):
+ def executable_stage_transform(self,
+ known_runner_urns, # type: FrozenSet[str]
+ all_consumers,
+ components # type: beam_runner_api_pb2.Components
+ ):
+ # type: (...) -> beam_runner_api_pb2.PTransform
if (len(self.transforms) == 1
and self.transforms[0].spec.urn in known_runner_urns):
return self.transforms[0]
@@ -210,7 +239,7 @@
# Only keep the transforms in this stage.
# Also gather up payload data as we iterate over the transforms.
stage_components.transforms.clear()
- main_inputs = set()
+ main_inputs = set() # type: Set[str]
side_inputs = []
user_states = []
timers = []
@@ -265,7 +294,7 @@
payload=payload.SerializeToString()),
inputs=named_inputs,
outputs={'output_%d' % ix: pcoll
- for ix, pcoll in enumerate(external_outputs)})
+ for ix, pcoll in enumerate(external_outputs)},)
def memoize_on_instance(f):
@@ -290,15 +319,24 @@
_KNOWN_CODER_URNS = set(
value.urn for value in common_urns.coders.__dict__.values())
- def __init__(self, components, known_runner_urns, use_state_iterables=False):
+ def __init__(self,
+ components, # type: beam_runner_api_pb2.Components
+ known_runner_urns, # type: FrozenSet[str]
+ use_state_iterables=False
+ ):
self.components = components
self.known_runner_urns = known_runner_urns
self.use_state_iterables = use_state_iterables
- self.bytes_coder_id = self.add_or_get_coder_id(
- coders.BytesCoder().to_runner_api(None), 'bytes_coder')
+ # ok to pass None for context because BytesCoder has no components
+ coder_proto = coders.BytesCoder().to_runner_api(None) # type: ignore[arg-type]
+ self.bytes_coder_id = self.add_or_get_coder_id(coder_proto, 'bytes_coder')
self.safe_coders = {self.bytes_coder_id: self.bytes_coder_id}
- def add_or_get_coder_id(self, coder_proto, coder_prefix='coder'):
+ def add_or_get_coder_id(self,
+ coder_proto, # type: beam_runner_api_pb2.Coder
+ coder_prefix='coder'
+ ):
+ # type: (...) -> str
for coder_id, coder in self.components.coders.items():
if coder == coder_proto:
return coder_id
@@ -308,6 +346,7 @@
@memoize_on_instance
def with_state_iterables(self, coder_id):
+ # type: (str) -> str
coder = self.components.coders[coder_id]
if coder.spec.urn == common_urns.coders.ITERABLE.urn:
new_coder_id = unique_name(
@@ -335,6 +374,7 @@
@memoize_on_instance
def length_prefixed_coder(self, coder_id):
+ # type: (str) -> str
if coder_id in self.safe_coders:
return coder_id
length_prefixed_id, safe_id = self.length_prefixed_and_safe_coder(coder_id)
@@ -343,6 +383,7 @@
@memoize_on_instance
def length_prefixed_and_safe_coder(self, coder_id):
+ # type: (str) -> Tuple[str, str]
coder = self.components.coders[coder_id]
if coder.spec.urn == common_urns.coders.LENGTH_PREFIX.urn:
return coder_id, self.bytes_coder_id
@@ -380,6 +421,7 @@
return new_coder_id, self.bytes_coder_id
def length_prefix_pcoll_coders(self, pcoll_id):
+ # type: (str) -> None
self.components.pcollections[pcoll_id].coder_id = (
self.length_prefixed_coder(
self.components.pcollections[pcoll_id].coder_id))
@@ -387,6 +429,7 @@
def leaf_transform_stages(
root_ids, components, parent=None, known_composites=KNOWN_COMPOSITES):
+ # type: (...) -> Iterator[Stage]
for root_id in root_ids:
root = components.transforms[root_id]
if root.spec.urn in known_composites:
@@ -401,8 +444,12 @@
yield stage
-def pipeline_from_stages(
- pipeline_proto, stages, known_runner_urns, partial):
+def pipeline_from_stages(pipeline_proto, # type: beam_runner_api_pb2.Pipeline
+ stages, # type: Iterable[Stage]
+ known_runner_urns, # type: FrozenSet[str]
+ partial # type: bool
+ ):
+ # type: (...) -> beam_runner_api_pb2.Pipeline
# In case it was a generator that mutates components as it
# produces outputs (as is the case with most transformations).
@@ -434,7 +481,7 @@
add_parent(parent, parents.get(parent))
components.transforms[parent].subtransforms.append(child)
- all_consumers = collections.defaultdict(set)
+ all_consumers = collections.defaultdict(set) # type: DefaultDict[str, Set[int]]
for stage in stages:
for transform in stage.transforms:
for pcoll in transform.inputs.values():
@@ -456,10 +503,12 @@
return new_proto
-def create_and_optimize_stages(pipeline_proto,
+def create_and_optimize_stages(pipeline_proto, # type: beam_runner_api_pb2.Pipeline
phases,
- known_runner_urns,
- use_state_iterables=False):
+ known_runner_urns, # type: FrozenSet[str]
+ use_state_iterables=False
+ ):
+ # type: (...) -> Tuple[TransformContext, List[Stage]]
"""Create a set of stages given a pipeline proto, and set of optimizations.
Args:
@@ -497,9 +546,9 @@
def optimize_pipeline(
- pipeline_proto,
+ pipeline_proto, # type: beam_runner_api_pb2.Pipeline
phases,
- known_runner_urns,
+ known_runner_urns, # type: FrozenSet[str]
partial=False,
**kwargs):
unused_context, stages = create_and_optimize_stages(
@@ -515,6 +564,7 @@
def annotate_downstream_side_inputs(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterable[Stage]
"""Annotate each stage with fusion-prohibiting information.
Each stage is annotated with the (transitive) set of pcollections that
@@ -529,7 +579,8 @@
This representation is also amenable to simple recomputation on fusion.
"""
- consumers = collections.defaultdict(list)
+ consumers = collections.defaultdict(list) # type: DefaultDict[str, List[Stage]]
+
all_side_inputs = set()
for stage in stages:
for transform in stage.transforms:
@@ -539,11 +590,12 @@
all_side_inputs.add(si)
all_side_inputs = frozenset(all_side_inputs)
- downstream_side_inputs_by_stage = {}
+ downstream_side_inputs_by_stage = {} # type: Dict[Stage, FrozenSet[str]]
def compute_downstream_side_inputs(stage):
+ # type: (Stage) -> FrozenSet[str]
if stage not in downstream_side_inputs_by_stage:
- downstream_side_inputs = frozenset()
+ downstream_side_inputs = frozenset() # type: FrozenSet[str]
for transform in stage.transforms:
for output in transform.outputs.values():
if output in all_side_inputs:
@@ -562,6 +614,7 @@
def annotate_stateful_dofns_as_roots(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterable[Stage]
for stage in stages:
for transform in stage.transforms:
if transform.spec.urn == common_urns.primitives.PAR_DO.urn:
@@ -573,6 +626,7 @@
def fix_side_input_pcoll_coders(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterable[Stage]
"""Length prefix side input PCollection coders.
"""
for stage in stages:
@@ -582,6 +636,7 @@
def lift_combiners(stages, context):
+ # type: (List[Stage], TransformContext) -> Iterator[Stage]
"""Expands CombinePerKey into pre- and post-grouping stages.
... -> CombinePerKey -> ...
@@ -625,15 +680,10 @@
context.components.pcollections[
only_element(list(combine_per_key_transform.inputs.values()))
].windowing_strategy_id]
- if windowing.output_time != beam_runner_api_pb2.OutputTime.END_OF_WINDOW:
- # This depends on the spec of PartialGroupByKey.
- return False
- elif not is_compatible_with_combiner_lifting(windowing.trigger):
- return False
- else:
- return True
+ return is_compatible_with_combiner_lifting(windowing.trigger)
def make_stage(base_stage, transform):
+ # type: (Stage, beam_runner_api_pb2.PTransform) -> Stage
return Stage(
transform.unique_name,
[transform],
@@ -714,7 +764,8 @@
.COMBINE_PER_KEY_PRECOMBINE.urn,
payload=transform.spec.payload),
inputs=transform.inputs,
- outputs={'out': precombined_pcoll_id}))
+ outputs={'out': precombined_pcoll_id},
+ environment_id=transform.environment_id))
yield make_stage(
stage,
@@ -734,7 +785,8 @@
.COMBINE_PER_KEY_MERGE_ACCUMULATORS.urn,
payload=transform.spec.payload),
inputs={'in': grouped_pcoll_id},
- outputs={'out': merged_pcoll_id}))
+ outputs={'out': merged_pcoll_id},
+ environment_id=transform.environment_id))
yield make_stage(
stage,
@@ -745,7 +797,8 @@
.COMBINE_PER_KEY_EXTRACT_OUTPUTS.urn,
payload=transform.spec.payload),
inputs={'in': merged_pcoll_id},
- outputs=transform.outputs))
+ outputs=transform.outputs,
+ environment_id=transform.environment_id))
def unlifted_stages(stage):
transform = stage.transforms[0]
@@ -764,6 +817,7 @@
def expand_sdf(stages, context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Transforms splitable DoFns into pair+split+read."""
for stage in stages:
assert len(stage.transforms) == 1
@@ -804,6 +858,7 @@
return new_id
def make_stage(base_stage, transform_id, extra_must_follow=()):
+ # type: (Stage, str, Iterable[Stage]) -> Stage
transform = context.components.transforms[transform_id]
return Stage(
transform.unique_name,
@@ -833,7 +888,9 @@
component_coder_ids=[
paired_coder_id,
context.add_or_get_coder_id(
- coders.FloatCoder().to_runner_api(None),
+ # context can be None here only because FloatCoder does
+ # not have components
+ coders.FloatCoder().to_runner_api(None), # type: ignore
'doubles_coder')
]))
@@ -905,6 +962,7 @@
def expand_gbk(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Transforms each GBK into a write followed by a read.
"""
for stage in stages:
@@ -954,6 +1012,7 @@
def fix_flatten_coders(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Ensures that the inputs of Flatten have the same coders as the output.
"""
pcollections = pipeline_context.components.pcollections
@@ -980,7 +1039,8 @@
inputs={local_in: pcoll_in},
outputs={'out': transcoded_pcollection},
spec=beam_runner_api_pb2.FunctionSpec(
- urn=bundle_processor.IDENTITY_DOFN_URN))],
+ urn=bundle_processor.IDENTITY_DOFN_URN),
+ environment_id=transform.environment_id)],
downstream_side_inputs=frozenset(),
must_follow=stage.must_follow)
pcollections[transcoded_pcollection].CopyFrom(
@@ -994,6 +1054,7 @@
def sink_flattens(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Sink flattens and remove them from the graph.
A flatten that cannot be sunk/fused away becomes multiple writes (to the
@@ -1006,7 +1067,7 @@
if transform.spec.urn == common_urns.primitives.FLATTEN.urn:
# This is used later to correlate the read and writes.
buffer_id = create_buffer_id(transform.unique_name)
- flatten_writes = []
+ flatten_writes = [] # type: List[Stage]
for local_in, pcoll_in in transform.inputs.items():
flatten_write = Stage(
transform.unique_name + '/Write/' + local_in,
@@ -1125,6 +1186,7 @@
def read_to_impulse(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Translates Read operations into Impulse operations."""
for stage in stages:
# First map Reads, if any, to Impulse + triggered read op.
@@ -1162,6 +1224,7 @@
def impulse_to_input(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Translates Impulse operations into GRPC reads."""
for stage in stages:
for transform in list(stage.transforms):
@@ -1178,6 +1241,7 @@
def extract_impulse_stages(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Splits fused Impulse operations into their own stage."""
for stage in stages:
for transform in list(stage.transforms):
@@ -1195,6 +1259,7 @@
def remove_data_plane_ops(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
for stage in stages:
for transform in list(stage.transforms):
if transform.spec.urn in (bundle_processor.DATA_INPUT_URN,
@@ -1206,6 +1271,7 @@
def inject_timer_pcollections(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterator[Stage]
"""Create PCollections for fired timers and to-be-set timers.
At execution time, fired timers and timers-to-set are represented as
@@ -1279,10 +1345,11 @@
def sort_stages(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> List[Stage]
"""Order stages suitable for sequential execution.
"""
all_stages = set(stages)
- seen = set()
+ seen = set() # type: Set[Stage]
ordered = []
def process(stage):
@@ -1299,6 +1366,7 @@
def window_pcollection_coders(stages, pipeline_context):
+ # type: (Iterable[Stage], TransformContext) -> Iterable[Stage]
"""Wrap all PCollection coders as windowed value coders.
This is required as some SDK workers require windowed coders for their
@@ -1342,6 +1410,7 @@
def unique_name(existing, prefix):
+ # type: (Optional[Container[str]], str) -> str
if existing is None:
global _global_counter
_global_counter += 1
@@ -1358,14 +1427,17 @@
def only_element(iterable):
+ # type: (Iterable[T]) -> T
element, = iterable
return element
def create_buffer_id(name, kind='materialize'):
+ # type: (str, str) -> bytes
return ('%s:%s' % (kind, name)).encode('utf-8')
def split_buffer_id(buffer_id):
+ # type: (bytes) -> Tuple[str, str]
"""A buffer id is "kind:pcollection_id". Split into (kind, pcoll_id). """
return buffer_id.decode('utf-8').split(':', 1)
diff --git a/sdks/python/apache_beam/runners/portability/job_server.py b/sdks/python/apache_beam/runners/portability/job_server.py
index e25e5a3..66d1558 100644
--- a/sdks/python/apache_beam/runners/portability/job_server.py
+++ b/sdks/python/apache_beam/runners/portability/job_server.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import atexit
@@ -52,6 +54,7 @@
self._timeout = timeout
def start(self):
+ # type: () -> beam_job_api_pb2_grpc.JobServiceStub
channel = grpc.insecure_channel(self._endpoint)
grpc.channel_ready_future(channel).result(timeout=self._timeout)
return beam_job_api_pb2_grpc.JobServiceStub(channel)
@@ -62,6 +65,7 @@
class EmbeddedJobServer(JobServer):
def start(self):
+ # type: () -> local_job_service.LocalJobServicer
return local_job_service.LocalJobServicer()
def stop(self):
diff --git a/sdks/python/apache_beam/runners/portability/local_job_service.py b/sdks/python/apache_beam/runners/portability/local_job_service.py
index 2bbafbb..7fecdc7 100644
--- a/sdks/python/apache_beam/runners/portability/local_job_service.py
+++ b/sdks/python/apache_beam/runners/portability/local_job_service.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -26,9 +28,12 @@
import time
import traceback
from builtins import object
+from typing import TYPE_CHECKING
+from typing import List
+from typing import Optional
import grpc
-from google.protobuf import text_format
+from google.protobuf import text_format # type: ignore # not in typeshed
from apache_beam.metrics import monitoring_infos
from apache_beam.portability.api import beam_artifact_api_pb2
@@ -43,6 +48,10 @@
from apache_beam.runners.portability import fn_api_runner
from apache_beam.utils.thread_pool_executor import UnboundedThreadPoolExecutor
+if TYPE_CHECKING:
+ from google.protobuf import struct_pb2 # pylint: disable=ungrouped-imports
+ from apache_beam.portability.api import beam_runner_api_pb2
+
_LOGGER = logging.getLogger(__name__)
@@ -71,9 +80,15 @@
self._staging_dir = staging_dir or tempfile.mkdtemp()
self._artifact_service = artifact_service.BeamFilesystemArtifactService(
self._staging_dir)
- self._artifact_staging_endpoint = None
+ self._artifact_staging_endpoint = None # type: Optional[endpoints_pb2.ApiServiceDescriptor]
- def create_beam_job(self, preparation_id, job_name, pipeline, options):
+ def create_beam_job(self,
+ preparation_id, # stype: str
+ job_name, # type: str
+ pipeline, # type: beam_runner_api_pb2.Pipeline
+ options # type: struct_pb2.Struct
+ ):
+ # type: (...) -> BeamJob
# TODO(angoenka): Pass an appropriate staging_session_token. The token can
# be obtained in PutArtifactResponse from JobService
if not self._artifact_staging_endpoint:
@@ -160,7 +175,11 @@
"""Manages a SDK worker implemented as a subprocess communicating over grpc.
"""
- def __init__(self, worker_command_line, control_address, worker_id=None):
+ def __init__(self,
+ worker_command_line, # type: bytes
+ control_address,
+ worker_id=None
+ ):
self._worker_command_line = worker_command_line
self._control_address = control_address
self._worker_id = worker_id
@@ -210,17 +229,18 @@
"""
def __init__(self,
- job_id,
+ job_id, # type: str
pipeline,
options,
- provision_info,
- artifact_staging_endpoint):
+ provision_info, # type: fn_api_runner.ExtendedProvisionInfo
+ artifact_staging_endpoint # type: Optional[endpoints_pb2.ApiServiceDescriptor]
+ ):
super(BeamJob, self).__init__(
job_id, provision_info.provision_info.job_name, pipeline, options)
self._provision_info = provision_info
self._artifact_staging_endpoint = artifact_staging_endpoint
- self._state_queues = []
- self._log_queues = []
+ self._state_queues = [] # type: List[queue.Queue]
+ self._log_queues = [] # type: List[queue.Queue]
self.daemon = True
self.result = None
diff --git a/sdks/python/apache_beam/runners/portability/local_job_service_main.py b/sdks/python/apache_beam/runners/portability/local_job_service_main.py
index aa33263..ac5a178 100644
--- a/sdks/python/apache_beam/runners/portability/local_job_service_main.py
+++ b/sdks/python/apache_beam/runners/portability/local_job_service_main.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/runners/portability/local_job_service_test.py b/sdks/python/apache_beam/runners/portability/local_job_service_test.py
index 475d2fe..4ff382f 100644
--- a/sdks/python/apache_beam/runners/portability/local_job_service_test.py
+++ b/sdks/python/apache_beam/runners/portability/local_job_service_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/portable_metrics.py b/sdks/python/apache_beam/runners/portability/portable_metrics.py
index 0c88b73..956d4d2 100644
--- a/sdks/python/apache_beam/runners/portability/portable_metrics.py
+++ b/sdks/python/apache_beam/runners/portability/portable_metrics.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/portability/portable_runner.py b/sdks/python/apache_beam/runners/portability/portable_runner.py
index c22654d..0715311 100644
--- a/sdks/python/apache_beam/runners/portability/portable_runner.py
+++ b/sdks/python/apache_beam/runners/portability/portable_runner.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import functools
@@ -22,6 +24,8 @@
import logging
import threading
import time
+from typing import TYPE_CHECKING
+from typing import Optional
import grpc
@@ -43,6 +47,10 @@
from apache_beam.runners.worker import worker_pool_main
from apache_beam.transforms import environments
+if TYPE_CHECKING:
+ from apache_beam.options.pipeline_options import PipelineOptions
+ from apache_beam.pipeline import Pipeline
+
__all__ = ['PortableRunner']
MESSAGE_LOG_LEVELS = {
@@ -76,10 +84,11 @@
running and managing the job lies with the job service used.
"""
def __init__(self):
- self._dockerized_job_server = None
+ self._dockerized_job_server = None # type: Optional[job_server.JobServer]
@staticmethod
def _create_environment(options):
+ # type: (PipelineOptions) -> beam_runner_api_pb2.Environment
portable_options = options.view_as(PortableOptions)
# Do not set a Runner. Otherwise this can cause problems in Java's
# PipelineOptions, i.e. ClassNotFoundException, if the corresponding Runner
@@ -106,6 +115,7 @@
return env_class.from_options(portable_options)
def default_job_server(self, portable_options):
+ # type: (...) -> job_server.JobServer
# TODO Provide a way to specify a container Docker URL
# https://issues.apache.org/jira/browse/BEAM-6328
if not self._dockerized_job_server:
@@ -126,6 +136,7 @@
return server.start()
def run_pipeline(self, pipeline, options):
+ # type: (Pipeline, PipelineOptions) -> PipelineResult
portable_options = options.view_as(PortableOptions)
# TODO: https://issues.apache.org/jira/browse/BEAM-5525
@@ -142,6 +153,8 @@
portable_options.environment_config, server = (
worker_pool_main.BeamFnExternalWorkerPoolServicer.start(
state_cache_size=sdk_worker_main._get_state_cache_size(options),
+ data_buffer_time_limit_ms=
+ sdk_worker_main._get_data_buffer_time_limit_ms(options),
use_process=use_loopback_process_worker))
cleanup_callbacks = [functools.partial(server.stop, 1)]
else:
@@ -209,6 +222,7 @@
# fetch runner options from job service
# retries in case the channel is not ready
def send_options_request(max_retries=5):
+ # type: (int) -> beam_job_api_pb2.DescribePipelineOptionsResponse
num_retries = 0
while True:
try:
@@ -407,6 +421,7 @@
def wait_until_finish(self):
def read_messages():
+ previous_state = -1
for message in self._message_stream:
if message.HasField('message_response'):
logging.log(
@@ -414,10 +429,12 @@
"%s",
message.message_response.message_text)
else:
- _LOGGER.info(
- "Job state changed to %s",
- self._runner_api_state_to_pipeline_state(
- message.state_response.state))
+ current_state = message.state_response.state
+ if current_state != previous_state:
+ _LOGGER.info(
+ "Job state changed to %s",
+ self._runner_api_state_to_pipeline_state(current_state))
+ previous_state = current_state
self._messages.append(message)
t = threading.Thread(target=read_messages, name='wait_until_finish_read')
diff --git a/sdks/python/apache_beam/runners/portability/portable_runner_test.py b/sdks/python/apache_beam/runners/portability/portable_runner_test.py
index f8b6cf8..49b4231 100644
--- a/sdks/python/apache_beam/runners/portability/portable_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/portable_runner_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
@@ -185,6 +187,9 @@
python_urns.EMBEDDED_PYTHON)
# Enable caching (disabled by default)
options.view_as(DebugOptions).add_experiment('state_cache_size=100')
+ # Enable time-based data buffer (disabled by default)
+ options.view_as(DebugOptions).add_experiment(
+ 'data_buffer_time_limit_ms=1000')
return options
def create_pipeline(self):
@@ -240,6 +245,8 @@
options = super(PortableRunnerOptimized, self).create_options()
options.view_as(DebugOptions).add_experiment('pre_optimize=all')
options.view_as(DebugOptions).add_experiment('state_cache_size=100')
+ options.view_as(DebugOptions).add_experiment(
+ 'data_buffer_time_limit_ms=1000')
return options
@@ -249,7 +256,7 @@
def setUpClass(cls):
cls._worker_address, cls._worker_server = (
worker_pool_main.BeamFnExternalWorkerPoolServicer.start(
- state_cache_size=100))
+ state_cache_size=100, data_buffer_time_limit_ms=1000))
@classmethod
def tearDownClass(cls):
@@ -274,6 +281,9 @@
sys.executable.encode('ascii')).decode('utf-8')
# Enable caching (disabled by default)
options.view_as(DebugOptions).add_experiment('state_cache_size=100')
+ # Enable time-based data buffer (disabled by default)
+ options.view_as(DebugOptions).add_experiment(
+ 'data_buffer_time_limit_ms=1000')
return options
@classmethod
diff --git a/sdks/python/apache_beam/runners/portability/portable_stager.py b/sdks/python/apache_beam/runners/portability/portable_stager.py
index 09ff18f..b9447fe 100644
--- a/sdks/python/apache_beam/runners/portability/portable_stager.py
+++ b/sdks/python/apache_beam/runners/portability/portable_stager.py
@@ -16,12 +16,16 @@
"""A :class:`FileHandler` to work with :class:`ArtifactStagingServiceStub`.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import os
+from typing import Iterator
+from typing import List
from apache_beam.portability.api import beam_artifact_api_pb2
from apache_beam.portability.api import beam_artifact_api_pb2_grpc
@@ -54,9 +58,10 @@
self._artifact_staging_stub = beam_artifact_api_pb2_grpc.\
ArtifactStagingServiceStub(channel=artifact_service_channel)
self._staging_session_token = staging_session_token
- self._artifacts = []
+ self._artifacts = [] # type: List[beam_artifact_api_pb2.ArtifactMetadata]
def stage_artifact(self, local_path_to_artifact, artifact_name):
+ # type: (str, str) -> None
"""Stage a file to ArtifactStagingService.
Args:
@@ -69,6 +74,7 @@
.format(local_path_to_artifact))
def artifact_request_generator():
+ # type: () -> Iterator[beam_artifact_api_pb2.PutArtifactRequest]
artifact_metadata = beam_artifact_api_pb2.ArtifactMetadata(
name=artifact_name,
sha256=_get_file_hash(local_path_to_artifact),
diff --git a/sdks/python/apache_beam/runners/portability/portable_stager_test.py b/sdks/python/apache_beam/runners/portability/portable_stager_test.py
index fd86819..77ffda5 100644
--- a/sdks/python/apache_beam/runners/portability/portable_stager_test.py
+++ b/sdks/python/apache_beam/runners/portability/portable_stager_test.py
@@ -15,6 +15,8 @@
#
"""Test cases for :module:`artifact_service_client`."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/spark_runner.py b/sdks/python/apache_beam/runners/portability/spark_runner.py
index 8c3939e..608d2b6 100644
--- a/sdks/python/apache_beam/runners/portability/spark_runner.py
+++ b/sdks/python/apache_beam/runners/portability/spark_runner.py
@@ -17,14 +17,18 @@
"""A runner for executing portable pipelines on Spark."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
import re
+import sys
from apache_beam.options import pipeline_options
from apache_beam.runners.portability import job_server
from apache_beam.runners.portability import portable_runner
+from apache_beam.runners.portability import spark_uber_jar_job_server
# https://spark.apache.org/docs/latest/submitting-applications.html#master-urls
LOCAL_MASTER_PATTERN = r'^local(\[.+\])?$'
@@ -32,7 +36,7 @@
class SparkRunner(portable_runner.PortableRunner):
def run_pipeline(self, pipeline, options):
- spark_options = options.view_as(SparkRunnerOptions)
+ spark_options = options.view_as(pipeline_options.SparkRunnerOptions)
portable_options = options.view_as(pipeline_options.PortableOptions)
if (re.match(LOCAL_MASTER_PATTERN, spark_options.spark_master_url)
and not portable_options.environment_type
@@ -41,27 +45,23 @@
return super(SparkRunner, self).run_pipeline(pipeline, options)
def default_job_server(self, options):
- # TODO(BEAM-8139) submit a Spark jar to a cluster
+ spark_options = options.view_as(pipeline_options.SparkRunnerOptions)
+ if spark_options.spark_submit_uber_jar:
+ if sys.version_info < (3, 6):
+ raise ValueError(
+ 'spark_submit_uber_jar requires Python 3.6+, current version %s'
+ % sys.version)
+ if not spark_options.spark_rest_url:
+ raise ValueError('Option spark_rest_url must be set.')
+ return spark_uber_jar_job_server.SparkUberJarJobServer(
+ spark_options.spark_rest_url, options)
return job_server.StopOnExitJobServer(SparkJarJobServer(options))
-class SparkRunnerOptions(pipeline_options.PipelineOptions):
- @classmethod
- def _add_argparse_args(cls, parser):
- parser.add_argument('--spark_master_url',
- default='local[4]',
- help='Spark master URL (spark://HOST:PORT). '
- 'Use "local" (single-threaded) or "local[*]" '
- '(multi-threaded) to start a local cluster for '
- 'the execution.')
- parser.add_argument('--spark_job_server_jar',
- help='Path or URL to a Beam Spark jobserver jar.')
-
-
class SparkJarJobServer(job_server.JavaJarJobServer):
def __init__(self, options):
super(SparkJarJobServer, self).__init__(options)
- options = options.view_as(SparkRunnerOptions)
+ options = options.view_as(pipeline_options.SparkRunnerOptions)
self._jar = options.spark_job_server_jar
self._master_url = options.spark_master_url
diff --git a/sdks/python/apache_beam/runners/portability/spark_runner_test.py b/sdks/python/apache_beam/runners/portability/spark_runner_test.py
index 1ac5e6c..fa7e795 100644
--- a/sdks/python/apache_beam/runners/portability/spark_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/spark_runner_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py
new file mode 100644
index 0000000..146ab44
--- /dev/null
+++ b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py
@@ -0,0 +1,257 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""A job server submitting portable pipelines as uber jars to Spark."""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+import itertools
+import logging
+import tempfile
+import time
+import zipfile
+
+import requests
+
+from apache_beam.options import pipeline_options
+from apache_beam.portability.api import beam_job_api_pb2
+from apache_beam.runners.portability import abstract_job_service
+from apache_beam.runners.portability import job_server
+from apache_beam.utils.timestamp import Timestamp
+
+_LOGGER = logging.getLogger(__name__)
+
+
+class SparkUberJarJobServer(abstract_job_service.AbstractJobServiceServicer):
+ """A Job server which submits a self-contained Jar to a Spark cluster.
+
+ The jar contains the Beam pipeline definition, dependencies, and
+ the pipeline artifacts.
+ """
+
+ def __init__(self, rest_url, options):
+ super(SparkUberJarJobServer, self).__init__()
+ self._rest_url = rest_url
+ self._executable_jar = (options.view_as(pipeline_options.SparkRunnerOptions)
+ .spark_job_server_jar)
+ self._artifact_port = (options.view_as(pipeline_options.JobServerOptions)
+ .artifact_port)
+ self._temp_dir = tempfile.mkdtemp(prefix='apache-beam-spark')
+
+ def start(self):
+ return self
+
+ def stop(self):
+ pass
+
+ def executable_jar(self):
+ url = (self._executable_jar or
+ job_server.JavaJarJobServer.path_to_beam_jar(
+ 'runners:spark:job-server:shadowJar'))
+ return job_server.JavaJarJobServer.local_jar(url)
+
+ def create_beam_job(self, job_id, job_name, pipeline, options):
+ return SparkBeamJob(
+ self._rest_url,
+ self.executable_jar(),
+ job_id,
+ job_name,
+ pipeline,
+ options,
+ artifact_port=self._artifact_port)
+
+
+class SparkBeamJob(abstract_job_service.UberJarBeamJob):
+ """Runs a single Beam job on Spark by staging all contents into a Jar
+ and uploading it via the Spark Rest API.
+
+ Note that the Spark Rest API is not enabled by default. It must be enabled by
+ setting the configuration property spark.master.rest.enabled to true."""
+
+ def __init__(
+ self, rest_url, executable_jar, job_id, job_name, pipeline, options,
+ artifact_port=0):
+ super(SparkBeamJob, self).__init__(
+ executable_jar, job_id, job_name, pipeline, options,
+ artifact_port=artifact_port)
+ self._rest_url = rest_url
+ # Message history is a superset of state history.
+ self._message_history = self._state_history[:]
+
+ def request(self, method, path, expected_status=200, **kwargs):
+ url = '%s/%s' % (self._rest_url, path)
+ response = method(url, **kwargs)
+ if response.status_code != expected_status:
+ raise RuntimeError("Request to %s failed with status %d: %s" %
+ (url, response.status_code, response.text))
+ if response.text:
+ return response.json()
+
+ def get(self, path, **kwargs):
+ return self.request(requests.get, path, **kwargs)
+
+ def post(self, path, **kwargs):
+ return self.request(requests.post, path, **kwargs)
+
+ def delete(self, path, **kwargs):
+ return self.request(requests.delete, path, **kwargs)
+
+ def _get_server_spark_version(self):
+ # Spark REST API doesn't seem to offer a dedicated endpoint for getting the
+ # version, but it does include the version in all responses, even errors.
+ return self.get('', expected_status=400)['serverSparkVersion']
+
+ def _get_client_spark_version_from_properties(self, jar):
+ """Parse Spark version from spark-version-info.properties file in the jar.
+ https://github.com/apache/spark/blob/dddfeca175bdce5294debe00d4a993daef92ca60/build/spark-build-info#L30
+ """
+ with zipfile.ZipFile(jar, 'a', compression=zipfile.ZIP_DEFLATED) as z:
+ with z.open('spark-version-info.properties') as fin:
+ for line in fin.read().decode('utf-8').splitlines():
+ split = list(map(lambda s: s.strip(), line.split('=')))
+ if len(split) == 2 and split[0] == 'version' and split[1] != '':
+ return split[1]
+ raise ValueError(
+ 'Property "version" not found in spark-version-info.properties.')
+
+ def _get_client_spark_version(self, jar):
+ try:
+ return self._get_client_spark_version_from_properties(jar)
+ except Exception as e:
+ _LOGGER.debug(e)
+ server_version = self._get_server_spark_version()
+ _LOGGER.warning('Unable to parse Spark version from '
+ 'spark-version-info.properties. Defaulting to %s' %
+ server_version)
+ return server_version
+
+ def _create_submission_request(self, jar, job_name):
+ jar_url = "file:%s" % jar
+ return {
+ "action": "CreateSubmissionRequest",
+ "appArgs": [],
+ "appResource": jar_url,
+ "clientSparkVersion": self._get_client_spark_version(jar),
+ "environmentVariables": {},
+ "mainClass": "org.apache.beam.runners.spark.SparkPipelineRunner",
+ "sparkProperties": {
+ "spark.jars": jar_url,
+ "spark.app.name": job_name,
+ "spark.submit.deployMode": "cluster",
+ }
+ }
+
+ def run(self):
+ self._stop_artifact_service()
+ # Move the artifact manifest to the expected location.
+ with zipfile.ZipFile(self._jar, 'a', compression=zipfile.ZIP_DEFLATED) as z:
+ with z.open(self._artifact_manifest_location) as fin:
+ manifest_contents = fin.read()
+ with z.open(self.ARTIFACT_MANIFEST_PATH, 'w') as fout:
+ fout.write(manifest_contents)
+
+ # Upload the jar and start the job.
+ self._spark_submission_id = self.post(
+ 'v1/submissions/create',
+ json=self._create_submission_request(self._jar, self._job_name)
+ )['submissionId']
+ _LOGGER.info('Submitted Spark job with ID %s' % self._spark_submission_id)
+
+ def cancel(self):
+ self.post('v1/submissions/kill/%s' % self._spark_submission_id)
+
+ @staticmethod
+ def _get_beam_state(spark_response):
+ return {
+ 'SUBMITTED': beam_job_api_pb2.JobState.STARTING,
+ 'RUNNING': beam_job_api_pb2.JobState.RUNNING,
+ 'FINISHED': beam_job_api_pb2.JobState.DONE,
+ 'RELAUNCHING': beam_job_api_pb2.JobState.RUNNING,
+ 'UNKNOWN': beam_job_api_pb2.JobState.UNSPECIFIED,
+ 'KILLED': beam_job_api_pb2.JobState.CANCELLED,
+ 'FAILED': beam_job_api_pb2.JobState.FAILED,
+ 'ERROR': beam_job_api_pb2.JobState.FAILED,
+ }.get(spark_response['driverState'], beam_job_api_pb2.JobState.UNSPECIFIED)
+
+ def _get_spark_status(self):
+ return self.get('v1/submissions/status/%s' % self._spark_submission_id)
+
+ def get_state(self):
+ response = self._get_spark_status()
+ state = self._get_beam_state(response)
+ timestamp = self.set_state(state)
+ if timestamp is None:
+ # State has not changed since last check. Use previous timestamp.
+ return super(SparkBeamJob, self).get_state()
+ else:
+ return state, timestamp
+
+ def _with_message_history(self, message_stream):
+ return itertools.chain(self._message_history[:], message_stream)
+
+ def _get_message_iter(self):
+ """Returns an iterator of messages from the Spark server.
+ Note that while message history is de-duped, this function's returned
+ iterator may contain duplicate values."""
+ sleep_secs = 1.0
+ message_ix = 0
+ while True:
+ response = self._get_spark_status()
+ state = self._get_beam_state(response)
+ timestamp = Timestamp.now()
+ message = None
+ if 'message' in response:
+ importance = (
+ beam_job_api_pb2.JobMessage.MessageImportance.JOB_MESSAGE_ERROR if
+ state == beam_job_api_pb2.JobState.FAILED else
+ beam_job_api_pb2.JobMessage.MessageImportance.JOB_MESSAGE_BASIC)
+ message = beam_job_api_pb2.JobMessage(
+ message_id='message%d' % message_ix,
+ time=str(int(timestamp)),
+ importance=importance,
+ message_text=response['message'])
+ yield message
+ message_ix += 1
+ # TODO(BEAM-8983) In the event of a failure, query
+ # additional info from Spark master and/or workers.
+ check_timestamp = self.set_state(state)
+ if check_timestamp is not None:
+ if message:
+ self._message_history.append(message)
+ self._message_history.append((state, check_timestamp))
+ yield state, timestamp
+ sleep_secs = min(60, sleep_secs * 1.2)
+ time.sleep(sleep_secs)
+
+ def get_state_stream(self):
+ for msg in self._with_message_history(self._get_message_iter()):
+ if isinstance(msg, tuple):
+ state, timestamp = msg
+ yield state, timestamp
+ if self.is_terminal_state(state):
+ break
+
+ def get_message_stream(self):
+ for msg in self._with_message_history(self._get_message_iter()):
+ yield msg
+ if isinstance(msg, tuple):
+ state, _ = msg
+ if self.is_terminal_state(state):
+ break
diff --git a/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server_test.py b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server_test.py
new file mode 100644
index 0000000..e999480
--- /dev/null
+++ b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server_test.py
@@ -0,0 +1,211 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pytype: skip-file
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+import contextlib
+import logging
+import os
+import sys
+import tempfile
+import unittest
+import zipfile
+
+import freezegun
+import grpc
+import requests_mock
+
+from apache_beam.options import pipeline_options
+from apache_beam.portability.api import beam_artifact_api_pb2
+from apache_beam.portability.api import beam_artifact_api_pb2_grpc
+from apache_beam.portability.api import beam_job_api_pb2
+from apache_beam.portability.api import beam_runner_api_pb2
+from apache_beam.runners.portability import spark_uber_jar_job_server
+
+
+@contextlib.contextmanager
+def temp_name(*args, **kwargs):
+ with tempfile.NamedTemporaryFile(*args, **kwargs) as t:
+ name = t.name
+ yield name
+ if os.path.exists(name):
+ os.unlink(name)
+
+def spark_job():
+ return spark_uber_jar_job_server.SparkBeamJob(
+ 'http://host:6066', '', '', '', '', '',
+ pipeline_options.SparkRunnerOptions())
+
+
+@unittest.skipIf(sys.version_info < (3, 6), "Requires Python 3.6+")
+class SparkUberJarJobServerTest(unittest.TestCase):
+
+ @requests_mock.mock()
+ def test_get_server_spark_version(self, http_mock):
+ http_mock.get('http://host:6066', json={
+ "action": "ErrorResponse",
+ "message": "Missing protocol version. Please submit requests through "
+ "http://[host]:[port]/v1/submissions/...",
+ "serverSparkVersion": "1.2.3"
+ }, status_code=400)
+ self.assertEqual(spark_job()._get_server_spark_version(), "1.2.3")
+
+ def test_get_client_spark_version_from_properties(self):
+ with temp_name(suffix='fake.jar') as fake_jar:
+ with zipfile.ZipFile(fake_jar, 'w') as zip:
+ with zip.open('spark-version-info.properties', 'w') as fout:
+ fout.write(b'version=4.5.6')
+ self.assertEqual(spark_job().
+ _get_client_spark_version_from_properties(fake_jar),
+ "4.5.6")
+
+ def test_get_client_spark_version_from_properties_no_properties_file(self):
+ with self.assertRaises(KeyError):
+ with temp_name(suffix='fake.jar') as fake_jar:
+ with zipfile.ZipFile(fake_jar, 'w') as zip:
+ # Write some other file to the jar.
+ with zip.open('FakeClass.class', 'w') as fout:
+ fout.write(b'[original_contents]')
+ spark_job()._get_client_spark_version_from_properties(fake_jar)
+
+ def test_get_client_spark_version_from_properties_missing_version(self):
+ with self.assertRaises(ValueError):
+ with temp_name(suffix='fake.jar') as fake_jar:
+ with zipfile.ZipFile(fake_jar, 'w') as zip:
+ with zip.open('spark-version-info.properties', 'w') as fout:
+ fout.write(b'version=')
+ spark_job()._get_client_spark_version_from_properties(fake_jar)
+
+ @requests_mock.mock()
+ @freezegun.freeze_time("1970-01-01")
+ def test_end_to_end(self, http_mock):
+ submission_id = "submission-id"
+ worker_host_port = "workerhost:12345"
+ worker_id = "worker-id"
+ server_spark_version = "1.2.3"
+
+ def spark_submission_status_response(state):
+ return {
+ 'json': {
+ "action": "SubmissionStatusResponse",
+ "driverState": state,
+ "serverSparkVersion": server_spark_version,
+ "submissionId": submission_id,
+ "success": "true",
+ "workerHostPort": worker_host_port,
+ "workerId": worker_id
+ }
+ }
+
+ with temp_name(suffix='fake.jar') as fake_jar:
+ with zipfile.ZipFile(fake_jar, 'w') as zip:
+ with zip.open('spark-version-info.properties', 'w') as fout:
+ fout.write(b'version=4.5.6')
+
+ options = pipeline_options.SparkRunnerOptions()
+ options.spark_job_server_jar = fake_jar
+ job_server = spark_uber_jar_job_server.SparkUberJarJobServer(
+ 'http://host:6066', options)
+
+ # Prepare the job.
+ prepare_response = job_server.Prepare(
+ beam_job_api_pb2.PrepareJobRequest(
+ job_name='job',
+ pipeline=beam_runner_api_pb2.Pipeline()))
+ channel = grpc.insecure_channel(
+ prepare_response.artifact_staging_endpoint.url)
+ retrieval_token = beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub(
+ channel).CommitManifest(
+ beam_artifact_api_pb2.CommitManifestRequest(
+ staging_session_token=prepare_response.staging_session_token,
+ manifest=beam_artifact_api_pb2.Manifest())
+ ).retrieval_token
+ channel.close()
+
+ # Now actually run the job.
+ http_mock.post(
+ 'http://host:6066/v1/submissions/create',
+ json={
+ "action": "CreateSubmissionResponse",
+ "message": "Driver successfully submitted as submission-id",
+ "serverSparkVersion": "1.2.3",
+ "submissionId": "submission-id",
+ "success": "true"
+ })
+ job_server.Run(
+ beam_job_api_pb2.RunJobRequest(
+ preparation_id=prepare_response.preparation_id,
+ retrieval_token=retrieval_token))
+
+ # Check the status until the job is "done" and get all error messages.
+ http_mock.get(
+ 'http://host:6066/v1/submissions/status/submission-id',
+ [spark_submission_status_response('RUNNING'),
+ spark_submission_status_response('RUNNING'),
+ {
+ 'json': {
+ "action": "SubmissionStatusResponse",
+ "driverState": "ERROR",
+ "message": "oops",
+ "serverSparkVersion": "1.2.3",
+ "submissionId": submission_id,
+ "success": "true",
+ "workerHostPort": worker_host_port,
+ "workerId": worker_id
+ }}])
+
+ state_stream = job_server.GetStateStream(
+ beam_job_api_pb2.GetJobStateRequest(
+ job_id=prepare_response.preparation_id))
+
+ self.assertEqual(
+ [s.state for s in state_stream],
+ [beam_job_api_pb2.JobState.STOPPED,
+ beam_job_api_pb2.JobState.RUNNING,
+ beam_job_api_pb2.JobState.RUNNING,
+ beam_job_api_pb2.JobState.FAILED])
+
+ message_stream = job_server.GetMessageStream(
+ beam_job_api_pb2.JobMessagesRequest(
+ job_id=prepare_response.preparation_id))
+
+ def get_item(x):
+ if x.HasField('message_response'):
+ return x.message_response
+ else:
+ return x.state_response.state
+
+ self.assertEqual(
+ [get_item(m) for m in message_stream],
+ [
+ beam_job_api_pb2.JobState.STOPPED,
+ beam_job_api_pb2.JobState.RUNNING,
+ beam_job_api_pb2.JobMessage(
+ message_id='message0',
+ time='0',
+ importance=beam_job_api_pb2.JobMessage.MessageImportance
+ .JOB_MESSAGE_ERROR,
+ message_text="oops"),
+ beam_job_api_pb2.JobState.FAILED,
+ ])
+
+
+if __name__ == '__main__':
+ logging.getLogger().setLevel(logging.INFO)
+ unittest.main()
diff --git a/sdks/python/apache_beam/runners/portability/stager.py b/sdks/python/apache_beam/runners/portability/stager.py
index f884102..0f989dc 100644
--- a/sdks/python/apache_beam/runners/portability/stager.py
+++ b/sdks/python/apache_beam/runners/portability/stager.py
@@ -44,6 +44,8 @@
TODO(silviuc): Should we allow several setup packages?
TODO(silviuc): We should allow customizing the exact command for setup build.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import glob
@@ -52,6 +54,8 @@
import shutil
import sys
import tempfile
+from typing import List
+from typing import Optional
import pkg_resources
@@ -59,6 +63,7 @@
from apache_beam.internal.http_client import get_new_http
from apache_beam.io.filesystems import FileSystems
from apache_beam.options.pipeline_options import DebugOptions
+from apache_beam.options.pipeline_options import PipelineOptions # pylint: disable=unused-import
from apache_beam.options.pipeline_options import SetupOptions
from apache_beam.options.pipeline_options import WorkerOptions
# TODO(angoenka): Remove reference to dataflow internal names
@@ -94,6 +99,7 @@
"""
def stage_artifact(self, local_path_to_artifact, artifact_name):
+ # type: (str, str) -> None
""" Stages the artifact to Stager._staging_location and adds artifact_name
to the manifest of artifacts that have been staged."""
raise NotImplementedError
@@ -109,11 +115,12 @@
return names.BEAM_PACKAGE_NAME
def stage_job_resources(self,
- options,
- build_setup_args=None,
- temp_dir=None,
- populate_requirements_cache=None,
- staging_location=None):
+ options, # type: PipelineOptions
+ build_setup_args=None, # type: Optional[List[str]]
+ temp_dir=None, # type: Optional[str]
+ populate_requirements_cache=None, # type: Optional[str]
+ staging_location=None # type: Optional[str]
+ ):
"""For internal use only; no backwards-compatibility guarantees.
Creates (if needed) and stages job resources to staging_location.
@@ -141,7 +148,7 @@
while trying to create the resources (e.g., build a setup package).
"""
temp_dir = temp_dir or tempfile.mkdtemp()
- resources = []
+ resources = [] # type: List[str]
setup_options = options.view_as(SetupOptions)
# Make sure that all required options are specified.
@@ -322,6 +329,7 @@
return path.find('://') != -1
def _stage_jar_packages(self, jar_packages, staging_location, temp_dir):
+ # type: (...) -> List[str]
"""Stages a list of local jar packages for Java SDK Harness.
:param jar_packages: Ordered list of local paths to jar packages to be
@@ -334,9 +342,9 @@
RuntimeError: If files specified are not found or do not have expected
name patterns.
"""
- resources = []
+ resources = [] # type: List[str]
staging_temp_dir = tempfile.mkdtemp(dir=temp_dir)
- local_packages = []
+ local_packages = [] # type: List[str]
for package in jar_packages:
if not os.path.basename(package).endswith('.jar'):
raise RuntimeError(
@@ -372,6 +380,7 @@
return resources
def _stage_extra_packages(self, extra_packages, staging_location, temp_dir):
+ # type: (...) -> List[str]
"""Stages a list of local extra packages.
Args:
@@ -390,9 +399,9 @@
RuntimeError: If files specified are not found or do not have expected
name patterns.
"""
- resources = []
+ resources = [] # type: List[str]
staging_temp_dir = tempfile.mkdtemp(dir=temp_dir)
- local_packages = []
+ local_packages = [] # type: List[str]
for package in extra_packages:
if not (os.path.basename(package).endswith('.tar') or
os.path.basename(package).endswith('.tar.gz') or
@@ -490,7 +499,11 @@
processes.check_output(cmd_args, stderr=processes.STDOUT)
@staticmethod
- def _build_setup_package(setup_file, temp_dir, build_setup_args=None):
+ def _build_setup_package(setup_file, # type: str
+ temp_dir, # type: str
+ build_setup_args=None # type: Optional[List[str]]
+ ):
+ # type: (...) -> str
saved_current_directory = os.getcwd()
try:
os.chdir(os.path.dirname(setup_file))
@@ -511,6 +524,7 @@
@staticmethod
def _desired_sdk_filename_in_staging_location(sdk_location):
+ # type: (...) -> str
"""Returns the name that SDK file should have in the staging location.
Args:
sdk_location: Full path to SDK file.
@@ -525,6 +539,7 @@
return DATAFLOW_SDK_TARBALL_FILE
def _stage_beam_sdk(self, sdk_remote_location, staging_location, temp_dir):
+ # type: (...) -> List[str]
"""Stages a Beam SDK file with the appropriate version.
Args:
diff --git a/sdks/python/apache_beam/runners/portability/stager_test.py b/sdks/python/apache_beam/runners/portability/stager_test.py
index 9cf91cb..180ba8a 100644
--- a/sdks/python/apache_beam/runners/portability/stager_test.py
+++ b/sdks/python/apache_beam/runners/portability/stager_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the stager module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -24,6 +26,7 @@
import sys
import tempfile
import unittest
+from typing import List
import mock
import pytest
@@ -69,7 +72,7 @@
def is_remote_path(self, path):
return path.startswith('/tmp/remote/')
- remote_copied_files = []
+ remote_copied_files = [] # type: List[str]
def file_copy(self, from_path, to_path):
if self.is_remote_path(from_path):
diff --git a/sdks/python/apache_beam/runners/runner.py b/sdks/python/apache_beam/runners/runner.py
index 7e1778d..bb74e62 100644
--- a/sdks/python/apache_beam/runners/runner.py
+++ b/sdks/python/apache_beam/runners/runner.py
@@ -17,6 +17,8 @@
"""PipelineRunner, an abstract base runner object."""
+# pytype: skip-file
+
from __future__ import absolute_import
import importlib
@@ -26,6 +28,16 @@
import shutil
import tempfile
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Optional
+
+if TYPE_CHECKING:
+ from apache_beam import pvalue
+ from apache_beam import PTransform
+ from apache_beam.options.pipeline_options import PipelineOptions
+ from apache_beam.pipeline import AppliedPTransform
+ from apache_beam.pipeline import Pipeline
+ from apache_beam.pipeline import PipelineVisitor
__all__ = ['PipelineRunner', 'PipelineState', 'PipelineResult']
@@ -57,6 +69,7 @@
def create_runner(runner_name):
+ # type: (str) -> PipelineRunner
"""For internal use only; no backwards-compatibility guarantees.
Creates a runner instance from a runner class name.
@@ -113,7 +126,11 @@
materialized values in order to reduce footprint.
"""
- def run(self, transform, options=None):
+ def run(self,
+ transform, # type: PTransform
+ options=None # type: Optional[PipelineOptions]
+ ):
+ # type: (...) -> PipelineResult
"""Run the given transform or callable with this runner.
Blocks until the pipeline is complete. See also `PipelineRunner.run_async`.
@@ -122,7 +139,11 @@
result.wait_until_finish()
return result
- def run_async(self, transform, options=None):
+ def run_async(self,
+ transform, # type: PTransform
+ options=None # type: Optional[PipelineOptions]
+ ):
+ # type: (...) -> PipelineResult
"""Run the given transform or callable with this runner.
May return immediately, executing the pipeline in the background.
@@ -141,31 +162,22 @@
transform(PBegin(p))
return p.run()
- def run_pipeline(self, pipeline, options):
+ def run_pipeline(self,
+ pipeline, # type: Pipeline
+ options # type: PipelineOptions
+ ):
+ # type: (...) -> PipelineResult
"""Execute the entire pipeline or the sub-DAG reachable from a node.
Runners should override this method.
"""
+ raise NotImplementedError
- # Imported here to avoid circular dependencies.
- # pylint: disable=wrong-import-order, wrong-import-position
- from apache_beam.pipeline import PipelineVisitor
-
- class RunVisitor(PipelineVisitor):
-
- def __init__(self, runner):
- self.runner = runner
-
- def visit_transform(self, transform_node):
- try:
- self.runner.run_transform(transform_node, options)
- except:
- _LOGGER.error('Error while visiting %s', transform_node.full_label)
- raise
-
- pipeline.visit(RunVisitor(self))
-
- def apply(self, transform, input, options):
+ def apply(self,
+ transform, # type: PTransform
+ input, # type: pvalue.PCollection
+ options # type: PipelineOptions
+ ):
"""Runner callback for a pipeline.apply call.
Args:
@@ -184,11 +196,38 @@
raise NotImplementedError(
'Execution of [%s] not implemented in runner %s.' % (transform, self))
+ def visit_transforms(self,
+ pipeline, # type: Pipeline
+ options # type: PipelineOptions
+ ):
+ # type: (...) -> None
+ # Imported here to avoid circular dependencies.
+ # pylint: disable=wrong-import-order, wrong-import-position
+ from apache_beam.pipeline import PipelineVisitor
+
+ class RunVisitor(PipelineVisitor):
+
+ def __init__(self, runner):
+ # type: (PipelineRunner) -> None
+ self.runner = runner
+
+ def visit_transform(self, transform_node):
+ try:
+ self.runner.run_transform(transform_node, options)
+ except:
+ _LOGGER.error('Error while visiting %s', transform_node.full_label)
+ raise
+
+ pipeline.visit(RunVisitor(self))
+
def apply_PTransform(self, transform, input, options):
# The base case of apply is to call the transform's expand.
return transform.expand(input)
- def run_transform(self, transform_node, options):
+ def run_transform(self,
+ transform_node, # type: AppliedPTransform
+ options # type: PipelineOptions
+ ):
"""Runner callback for a pipeline.run call.
Args:
@@ -306,6 +345,7 @@
return self.to_cache_key(pobj.real_producer, pobj.tag)
+# FIXME: replace with PipelineState(str, enum.Enum)
class PipelineState(object):
"""State of the Pipeline, as returned by :attr:`PipelineResult.state`.
diff --git a/sdks/python/apache_beam/runners/runner_test.py b/sdks/python/apache_beam/runners/runner_test.py
index 914fa12..b26134f 100644
--- a/sdks/python/apache_beam/runners/runner_test.py
+++ b/sdks/python/apache_beam/runners/runner_test.py
@@ -22,6 +22,8 @@
caching and clearing values that are not tested elsewhere.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor.py b/sdks/python/apache_beam/runners/worker/bundle_processor.py
index fd2528d..052582c 100644
--- a/sdks/python/apache_beam/runners/worker/bundle_processor.py
+++ b/sdks/python/apache_beam/runners/worker/bundle_processor.py
@@ -17,6 +17,8 @@
"""SDK harness for executing Python Fns via the Fn API."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -30,6 +32,23 @@
import threading
from builtins import next
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import Container
+from typing import DefaultDict
+from typing import Dict
+from typing import Iterable
+from typing import Iterator
+from typing import List
+from typing import Mapping
+from typing import Optional
+from typing import Set
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+from typing import Union
+from typing import cast
from future.utils import itervalues
from google.protobuf import duration_pb2
@@ -58,8 +77,25 @@
from apache_beam.utils import timestamp
from apache_beam.utils import windowed_value
-# This module is experimental. No backwards-compatibility guarantees.
+if TYPE_CHECKING:
+ from google.protobuf import message # pylint: disable=ungrouped-imports
+ from apache_beam import pvalue
+ from apache_beam.portability.api import metrics_pb2
+ from apache_beam.runners.worker import data_plane
+ from apache_beam.runners.worker import sdk_worker
+ from apache_beam.transforms import window
+ from apache_beam.utils.timestamp import Timestamp
+# This module is experimental. No backwards-compatibility guarantees.
+T = TypeVar('T')
+ConstructorFn = Callable[
+ ['BeamTransformFactory',
+ Any,
+ beam_runner_api_pb2.PTransform,
+ Union['message.Message', bytes],
+ Dict[str, List[operations.Operation]]],
+ operations.Operation]
+OperationT = TypeVar('OperationT', bound=operations.Operation)
DATA_INPUT_URN = 'beam:source:runner:0.1'
DATA_OUTPUT_URN = 'beam:sink:runner:0.1'
@@ -76,8 +112,17 @@
class RunnerIOOperation(operations.Operation):
"""Common baseclass for runner harness IO operations."""
- def __init__(self, name_context, step_name, consumers, counter_factory,
- state_sampler, windowed_coder, transform_id, data_channel):
+ def __init__(self,
+ name_context, # type: Union[str, common.NameContext]
+ step_name,
+ consumers, # type: Mapping[Any, Iterable[operations.Operation]]
+ counter_factory,
+ state_sampler,
+ windowed_coder, # type: coders.Coder
+ transform_id, # type: str
+ data_channel # type: data_plane.DataChannel
+ ):
+ # type: (...) -> None
super(RunnerIOOperation, self).__init__(
name_context, None, counter_factory, state_sampler)
self.windowed_coder = windowed_coder
@@ -96,14 +141,17 @@
"""
def set_output_stream(self, output_stream):
+ # type: (data_plane.ClosableOutputStream) -> None
self.output_stream = output_stream
def process(self, windowed_value):
+ # type: (windowed_value.WindowedValue) -> None
self.windowed_coder_impl.encode_to_stream(
windowed_value, self.output_stream, True)
self.output_stream.maybe_flush()
def finish(self):
+ # type: () -> None
self.output_stream.close()
super(DataOutputOperation, self).finish()
@@ -111,8 +159,17 @@
class DataInputOperation(RunnerIOOperation):
"""A source-like operation that gathers input from the runner."""
- def __init__(self, operation_name, step_name, consumers, counter_factory,
- state_sampler, windowed_coder, transform_id, data_channel):
+ def __init__(self,
+ operation_name, # type: str
+ step_name,
+ consumers, # type: Mapping[Any, Iterable[operations.Operation]]
+ counter_factory,
+ state_sampler,
+ windowed_coder, # type: coders.Coder
+ transform_id,
+ data_channel # type: data_plane.GrpcClientDataChannel
+ ):
+ # type: (...) -> None
super(DataInputOperation, self).__init__(
operation_name, step_name, consumers, counter_factory, state_sampler,
windowed_coder, transform_id=transform_id, data_channel=data_channel)
@@ -125,6 +182,7 @@
self.started = False
def start(self):
+ # type: () -> None
super(DataInputOperation, self).start()
with self.splitting_lock:
self.index = -1
@@ -132,9 +190,11 @@
self.started = True
def process(self, windowed_value):
+ # type: (windowed_value.WindowedValue) -> None
self.output(windowed_value)
def process_encoded(self, encoded_windowed_values):
+ # type: (bytes) -> None
input_stream = coder_impl.create_InputStream(encoded_windowed_values)
while input_stream.size() > 0:
with self.splitting_lock:
@@ -188,6 +248,7 @@
return self.stop - 1, None, None, self.stop
def progress_metrics(self):
+ # type: () -> beam_fn_api_pb2.Metrics.PTransform
with self.splitting_lock:
metrics = super(DataInputOperation, self).progress_metrics()
current_element_progress = self.receivers[0].current_element_progress()
@@ -197,13 +258,19 @@
return metrics
def finish(self):
+ # type: () -> None
with self.splitting_lock:
self.started = False
class _StateBackedIterable(object):
- def __init__(self, state_handler, state_key, coder_or_impl,
- is_cached=False):
+ def __init__(self,
+ state_handler,
+ state_key, # type: beam_fn_api_pb2.StateKey
+ coder_or_impl, # type: Union[coders.Coder, coder_impl.CoderImpl]
+ is_cached=False
+ ):
+ # type: (...) -> None
self._state_handler = state_handler
self._state_key = state_key
if isinstance(coder_or_impl, coders.Coder):
@@ -213,6 +280,7 @@
self._is_cached = is_cached
def __iter__(self):
+ # type: () -> Iterator[Any]
return self._state_handler.blocking_get(
self._state_key, self._coder_impl, is_cached=self._is_cached)
@@ -225,7 +293,14 @@
class StateBackedSideInputMap(object):
- def __init__(self, state_handler, transform_id, tag, side_input_data, coder):
+ def __init__(self,
+ state_handler,
+ transform_id, # type: str
+ tag, # type: Optional[str]
+ side_input_data, # type: pvalue.SideInputData
+ coder # type: WindowedValueCoder
+ ):
+ # type: (...) -> None
self._state_handler = state_handler
self._transform_id = transform_id
self._tag = tag
@@ -233,7 +308,7 @@
self._element_coder = coder.wrapped_value_coder
self._target_window_coder = coder.window_coder
# TODO(robertwb): Limit the cache size.
- self._cache = {}
+ self._cache = {} # type: Dict[window.BoundedWindow, Any]
def __getitem__(self, window):
target_window = self._side_input_data.window_mapping_fn(window)
@@ -286,10 +361,12 @@
return self._cache[target_window]
def is_globally_windowed(self):
+ # type: () -> bool
return (self._side_input_data.window_mapping_fn
== sideinputs._global_window_mapping_fn)
def reset(self):
+ # type: () -> None
# TODO(BEAM-5428): Cross-bundle caching respecting cache tokens.
self._cache = {}
@@ -309,9 +386,11 @@
return merged_accumulator
def read(self):
+ # type: () -> Iterable[Any]
return self._combinefn.extract_output(self._read_accumulator())
def add(self, value):
+ # type: (Any) -> None
# Prefer blind writes, but don't let them grow unboundedly.
# This should be tuned to be much lower, but for now exercise
# both paths well.
@@ -324,10 +403,11 @@
self._combinefn.add_input(accumulator, value))
def clear(self):
+ # type: () -> None
self._underlying_bag_state.clear()
- def _commit(self):
- self._underlying_bag_state._commit()
+ def commit(self):
+ self._underlying_bag_state.commit()
class _ConcatIterable(object):
@@ -336,10 +416,12 @@
Unlike itertools.chain, this allows reiteration.
"""
def __init__(self, first, second):
+ # type: (Iterable[Any], Iterable[Any]) -> None
self.first = first
self.second = second
def __iter__(self):
+ # type: () -> Iterator[Any]
for elem in self.first:
yield elem
for elem in self.second:
@@ -351,28 +433,37 @@
class SynchronousBagRuntimeState(userstate.BagRuntimeState):
- def __init__(self, state_handler, state_key, value_coder):
+ def __init__(self,
+ state_handler,
+ state_key, # type: beam_fn_api_pb2.StateKey
+ value_coder # type: coders.Coder
+ ):
+ # type: (...) -> None
self._state_handler = state_handler
self._state_key = state_key
self._value_coder = value_coder
self._cleared = False
- self._added_elements = []
+ self._added_elements = [] # type: List[Any]
def read(self):
+ # type: () -> Iterable[Any]
return _ConcatIterable(
- [] if self._cleared else _StateBackedIterable(
+ [] if self._cleared
+ else cast('Iterable[Any]', _StateBackedIterable(
self._state_handler, self._state_key, self._value_coder,
- is_cached=True),
+ is_cached=True)),
self._added_elements)
def add(self, value):
+ # type: (Any) -> None
self._added_elements.append(value)
def clear(self):
+ # type: () -> None
self._cleared = True
self._added_elements = []
- def _commit(self):
+ def commit(self):
to_await = None
if self._cleared:
to_await = self._state_handler.clear(self._state_key, is_cached=True)
@@ -389,12 +480,17 @@
class SynchronousSetRuntimeState(userstate.SetRuntimeState):
- def __init__(self, state_handler, state_key, value_coder):
+ def __init__(self,
+ state_handler,
+ state_key, # type: beam_fn_api_pb2.StateKey
+ value_coder # type: coders.Coder
+ ):
+ # type: (...) -> None
self._state_handler = state_handler
self._state_key = state_key
self._value_coder = value_coder
self._cleared = False
- self._added_elements = set()
+ self._added_elements = set() # type: Set[Any]
def _compact_data(self, rewrite=True):
accumulator = set(_ConcatIterable(
@@ -418,9 +514,11 @@
return accumulator
def read(self):
+ # type: () -> Set[Any]
return self._compact_data(rewrite=False)
def add(self, value):
+ # type: (Any) -> None
if self._cleared:
# This is a good time explicitly clear.
self._state_handler.clear(self._state_key, is_cached=True)
@@ -431,10 +529,12 @@
self._compact_data()
def clear(self):
+ # type: () -> None
self._cleared = True
self._added_elements = set()
- def _commit(self):
+ def commit(self):
+ # type: () -> None
to_await = None
if self._cleared:
to_await = self._state_handler.clear(self._state_key, is_cached=True)
@@ -450,7 +550,11 @@
class OutputTimer(object):
- def __init__(self, key, window, receiver):
+ def __init__(self,
+ key,
+ window, # type: windowed_value.BoundedWindow
+ receiver # type: operations.ConsumerSet
+ ):
self._key = key
self._window = window
self._receiver = receiver
@@ -462,6 +566,7 @@
(self._key, dict(timestamp=ts)), ts, (self._window,)))
def clear(self):
+ # type: () -> None
dummy_millis = int(common_urns.constants.MAX_TIMESTAMP_MILLIS.constant) + 1
clear_ts = timestamp.Timestamp(micros=dummy_millis * 1000)
self._receiver.receive(
@@ -472,8 +577,14 @@
class FnApiUserStateContext(userstate.UserStateContext):
"""Interface for state and timers from SDK to Fn API servicer of state.."""
- def __init__(
- self, state_handler, transform_id, key_coder, window_coder, timer_specs):
+ def __init__(self,
+ state_handler,
+ transform_id, # type: str
+ key_coder, # type: coders.Coder
+ window_coder, # type: coders.Coder
+ timer_specs # type: Mapping[str, beam_runner_api_pb2.TimerSpec]
+ ):
+ # type: (...) -> None
"""Initialize a ``FnApiUserStateContext``.
Args:
@@ -489,16 +600,22 @@
self._key_coder = key_coder
self._window_coder = window_coder
self._timer_specs = timer_specs
- self._timer_receivers = None
- self._all_states = {}
+ self._timer_receivers = None # type: Optional[Dict[str, operations.ConsumerSet]]
+ self._all_states = {} # type: Dict[tuple, userstate.AccumulatingRuntimeState]
def update_timer_receivers(self, receivers):
+ # type: (operations._TaggedReceivers) -> None
"""TODO"""
self._timer_receivers = {}
for tag in self._timer_specs:
self._timer_receivers[tag] = receivers.pop(tag)
- def get_timer(self, timer_spec, key, window):
+ def get_timer(self,
+ timer_spec,
+ key,
+ window # type: windowed_value.BoundedWindow
+ ):
+ # type: (...) -> OutputTimer
return OutputTimer(
key, window, self._timer_receivers[timer_spec.name])
@@ -508,7 +625,12 @@
state_handle = self._all_states[args] = self._create_state(*args)
return state_handle
- def _create_state(self, state_spec, key, window):
+ def _create_state(self,
+ state_spec, # type: userstate.StateSpec
+ key,
+ window # type: windowed_value.BoundedWindow
+ ):
+ # type: (...) -> userstate.AccumulatingRuntimeState
if isinstance(state_spec,
(userstate.BagStateSpec, userstate.CombiningValueStateSpec)):
bag_state = SynchronousBagRuntimeState(
@@ -540,10 +662,12 @@
raise NotImplementedError(state_spec)
def commit(self):
+ # type: () -> None
for state in self._all_states.values():
- state._commit()
+ state.commit()
def reset(self):
+ # type: () -> None
# TODO(BEAM-5428): Implement cross-bundle state caching.
self._all_states = {}
@@ -561,6 +685,7 @@
def only_element(iterable):
+ # type: (Iterable[T]) -> T
element, = iterable
return element
@@ -568,8 +693,12 @@
class BundleProcessor(object):
""" A class for processing bundles of elements. """
- def __init__(
- self, process_bundle_descriptor, state_handler, data_channel_factory):
+ def __init__(self,
+ process_bundle_descriptor, # type: beam_fn_api_pb2.ProcessBundleDescriptor
+ state_handler, # type: Union[FnApiRunner.StateServicer, GrpcStateHandler]
+ data_channel_factory # type: data_plane.DataChannelFactory
+ ):
+ # type: (...) -> None
"""Initialize a bundle processor.
Args:
@@ -592,7 +721,10 @@
op.setup()
self.splitting_lock = threading.Lock()
- def create_execution_tree(self, descriptor):
+ def create_execution_tree(self,
+ descriptor # type: beam_fn_api_pb2.ProcessBundleDescriptor
+ ):
+ # type: (...) -> collections.OrderedDict[str, operations.Operation]
transform_factory = BeamTransformFactory(
descriptor, self.data_channel_factory, self.counter_factory,
self.state_sampler, self.state_handler)
@@ -603,7 +735,7 @@
transform_proto.spec.payload,
beam_runner_api_pb2.ParDoPayload).side_inputs
- pcoll_consumers = collections.defaultdict(list)
+ pcoll_consumers = collections.defaultdict(list) # type: DefaultDict[str, List[str]]
for transform_id, transform_proto in descriptor.transforms.items():
for tag, pcoll_id in transform_proto.inputs.items():
if not is_side_input(transform_proto, tag):
@@ -611,6 +743,7 @@
@memoize
def get_operation(transform_id):
+ # type: (str) -> operations.Operation
transform_consumers = {
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
for tag, pcoll_id
@@ -622,6 +755,7 @@
# Operations must be started (hence returned) in order.
@memoize
def topological_height(transform_id):
+ # type: (str) -> int
return 1 + max(
[0] +
[topological_height(consumer)
@@ -634,6 +768,7 @@
descriptor.transforms, key=topological_height, reverse=True)])
def reset(self):
+ # type: () -> None
self.counter_factory.reset()
self.state_sampler.reset()
# Side input caches.
@@ -641,6 +776,7 @@
op.reset()
def process_bundle(self, instruction_id):
+ # type: (str) -> Tuple[List[beam_fn_api_pb2.DelayedBundleApplication], bool]
expected_inputs = []
for op in self.ops.values():
if isinstance(op, DataOutputOperation):
@@ -662,7 +798,7 @@
op.start()
# Inject inputs from data plane.
- data_channels = collections.defaultdict(list)
+ data_channels = collections.defaultdict(list) # type: DefaultDict[data_plane.GrpcClientDataChannel, List[str]]
input_op_by_transform_id = {}
for input_op in expected_inputs:
data_channels[input_op.data_channel].append(input_op.transform_id)
@@ -690,14 +826,17 @@
self.state_sampler.stop_if_still_running()
def finalize_bundle(self):
+ # type: () -> beam_fn_api_pb2.FinalizeBundleResponse
for op in self.ops.values():
op.finalize_bundle()
return beam_fn_api_pb2.FinalizeBundleResponse()
def requires_finalization(self):
+ # type: () -> bool
return any(op.needs_finalization() for op in self.ops.values())
def try_split(self, bundle_split_request):
+ # type: (...) -> beam_fn_api_pb2.ProcessBundleSplitResponse
split_response = beam_fn_api_pb2.ProcessBundleSplitResponse()
with self.splitting_lock:
for op in self.ops.values():
@@ -724,7 +863,11 @@
return split_response
- def delayed_bundle_application(self, op, deferred_remainder):
+ def delayed_bundle_application(self,
+ op, # type: operations.DoOperation
+ deferred_remainder # type: Tuple[windowed_value.WindowedValue, Timestamp]
+ ):
+ # type: (...) -> beam_fn_api_pb2.DelayedBundleApplication
# TODO(SDF): For non-root nodes, need main_input_coder + residual_coder.
((element_and_restriction, output_watermark),
deferred_watermark) = deferred_remainder
@@ -760,6 +903,7 @@
element=main_input_coder.get_impl().encode_nested(element))
def metrics(self):
+ # type: () -> beam_fn_api_pb2.Metrics
# DEPRECATED
return beam_fn_api_pb2.Metrics(
# TODO(robertwb): Rename to progress?
@@ -791,6 +935,7 @@
return metrics
def monitoring_infos(self):
+ # type: () -> List[metrics_pb2.MonitoringInfo]
"""Returns the list of MonitoringInfos collected processing this bundle."""
# Construct a new dict first to remove duplicates.
all_monitoring_infos_dict = {}
@@ -839,6 +984,7 @@
return infos_list
def _fix_output_tags_monitoring_info(self, transform_id, monitoring_info):
+ # type: (str, metrics_pb2.MonitoringInfo) -> metrics_pb2.MonitoringInfo
actual_output_tags = list(
self.process_bundle_descriptor.transforms[transform_id].outputs.keys())
if ('TAG' in monitoring_info.labels and
@@ -848,19 +994,25 @@
return monitoring_info
def shutdown(self):
+ # type: () -> None
for op in self.ops.values():
op.teardown()
class ExecutionContext(object):
def __init__(self):
- self.delayed_applications = []
+ self.delayed_applications = [] # type: List[Tuple[operations.DoOperation, Tuple[windowed_value.WindowedValue, Timestamp]]]
class BeamTransformFactory(object):
"""Factory for turning transform_protos into executable operations."""
- def __init__(self, descriptor, data_channel_factory, counter_factory,
- state_sampler, state_handler):
+ def __init__(self,
+ descriptor, # type: beam_fn_api_pb2.ProcessBundleDescriptor
+ data_channel_factory, # type: data_plane.DataChannelFactory
+ counter_factory,
+ state_sampler, # type: statesampler.StateSampler
+ state_handler
+ ):
self.descriptor = descriptor
self.data_channel_factory = data_channel_factory
self.counter_factory = counter_factory
@@ -875,16 +1027,24 @@
runner=beam_fn_api_pb2.StateKey.Runner(key=token)),
element_coder_impl))
- _known_urns = {}
+ _known_urns = {} # type: Dict[str, Tuple[ConstructorFn, Union[Type[message.Message], Type[bytes], None]]]
@classmethod
- def register_urn(cls, urn, parameter_type):
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type # type: Optional[Type[T]]
+ ):
+ # type: (...) -> Callable[[Callable[[BeamTransformFactory, str, beam_runner_api_pb2.PTransform, T, Dict[str, List[operations.Operation]]], operations.Operation]], Callable[[BeamTransformFactory, str, beam_runner_api_pb2.PTransform, T, Dict[str, List[operations.Operation]]], operations.Operation]]
def wrapper(func):
cls._known_urns[urn] = func, parameter_type
return func
return wrapper
- def create_operation(self, transform_id, consumers):
+ def create_operation(self,
+ transform_id, # type: str
+ consumers # type: Dict[str, List[operations.Operation]]
+ ):
+ # type: (...) -> operations.Operation
transform_proto = self.descriptor.transforms[transform_id]
if not transform_proto.unique_name:
_LOGGER.debug("No unique name set for transform %s" % transform_id)
@@ -895,6 +1055,7 @@
return creator(self, transform_id, transform_proto, payload, consumers)
def get_coder(self, coder_id):
+ # type: (str) -> coders.Coder
if coder_id not in self.descriptor.coders:
raise KeyError("No such coder: %s" % coder_id)
coder_proto = self.descriptor.coders[coder_id]
@@ -906,6 +1067,7 @@
json.loads(coder_proto.spec.payload.decode('utf-8')))
def get_windowed_coder(self, pcoll_id):
+ # type: (str) -> coders.Coder
coder = self.get_coder(self.descriptor.pcollections[pcoll_id].coder_id)
# TODO(robertwb): Remove this condition once all runners are consistent.
if not isinstance(coder, WindowedValueCoder):
@@ -917,26 +1079,38 @@
return coder
def get_output_coders(self, transform_proto):
+ # type: (beam_runner_api_pb2.PTransform) -> Dict[str, coders.Coder]
return {
tag: self.get_windowed_coder(pcoll_id)
for tag, pcoll_id in transform_proto.outputs.items()
}
def get_only_output_coder(self, transform_proto):
+ # type: (beam_runner_api_pb2.PTransform) -> coders.Coder
return only_element(self.get_output_coders(transform_proto).values())
def get_input_coders(self, transform_proto):
+ # type: (beam_runner_api_pb2.PTransform) -> Dict[str, coders.Coder]
return {
tag: self.get_windowed_coder(pcoll_id)
for tag, pcoll_id in transform_proto.inputs.items()
}
def get_only_input_coder(self, transform_proto):
+ # type: (beam_runner_api_pb2.PTransform) -> coders.Coder
return only_element(list(self.get_input_coders(transform_proto).values()))
+ def get_input_windowing(self, transform_proto):
+ pcoll_id = only_element(transform_proto.inputs.values())
+ windowing_strategy_id = self.descriptor.pcollections[
+ pcoll_id].windowing_strategy_id
+ return self.context.windowing_strategies.get_by_id(windowing_strategy_id)
+
# TODO(robertwb): Update all operations to take these in the constructor.
@staticmethod
- def augment_oldstyle_op(op, step_name, consumers, tag_list=None):
+ def augment_oldstyle_op(op, # type: OperationT
+ step_name, consumers, tag_list=None):
+ # type: (...) -> OperationT
op.step_name = step_name
for tag, op_consumers in consumers.items():
for consumer in op_consumers:
@@ -950,6 +1124,7 @@
self._do_op = do_op
def process(self, windowed_value):
+ # type: (windowed_value.WindowedValue) -> None
self._do_op.process_timer(self._timer_tag, windowed_value)
@@ -1029,7 +1204,14 @@
@BeamTransformFactory.register_urn(
common_urns.deprecated_primitives.READ.urn, beam_runner_api_pb2.ReadPayload)
-def create(factory, transform_id, transform_proto, parameter, consumers):
+def create_deprecated_read(
+ factory, # type: BeamTransformFactory
+ transform_id, # type: str
+ transform_proto, # type: beam_runner_api_pb2.PTransform
+ parameter, # type: beam_runner_api_pb2.ReadPayload
+ consumers # type: Dict[str, List[operations.Operation]]
+):
+ # type: (...) -> operations.ReadOperation
source = iobase.SourceBase.from_runner_api(parameter.source, factory.context)
spec = operation_specs.WorkerRead(
iobase.SourceBundle(1.0, source, None, None),
@@ -1106,8 +1288,8 @@
common_urns.sdf_components.PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS.urn,
beam_runner_api_pb2.ParDoPayload)
def create(factory, transform_id, transform_proto, parameter, consumers):
- assert parameter.do_fn.spec.urn == python_urns.PICKLED_DOFN_INFO
- serialized_fn = parameter.do_fn.spec.payload
+ assert parameter.do_fn.urn == python_urns.PICKLED_DOFN_INFO
+ serialized_fn = parameter.do_fn.payload
return _create_pardo_operation(
factory, transform_id, transform_proto, consumers,
serialized_fn, parameter,
@@ -1118,7 +1300,7 @@
proxy_dofn,
factory, transform_id, transform_proto, parameter, consumers):
- dofn_data = pickler.loads(parameter.do_fn.spec.payload)
+ dofn_data = pickler.loads(parameter.do_fn.payload)
dofn = dofn_data[0]
restriction_provider = common.DoFnSignature(dofn).get_restriction_provider()
serialized_fn = pickler.dumps(
@@ -1131,16 +1313,22 @@
@BeamTransformFactory.register_urn(
common_urns.primitives.PAR_DO.urn, beam_runner_api_pb2.ParDoPayload)
def create(factory, transform_id, transform_proto, parameter, consumers):
- assert parameter.do_fn.spec.urn == python_urns.PICKLED_DOFN_INFO
- serialized_fn = parameter.do_fn.spec.payload
+ assert parameter.do_fn.urn == python_urns.PICKLED_DOFN_INFO
+ serialized_fn = parameter.do_fn.payload
return _create_pardo_operation(
factory, transform_id, transform_proto, consumers,
serialized_fn, parameter)
def _create_pardo_operation(
- factory, transform_id, transform_proto, consumers,
- serialized_fn, pardo_proto=None, operation_cls=operations.DoOperation):
+ factory,
+ transform_id, # type: str
+ transform_proto, # type: beam_runner_api_pb2.PTransform
+ consumers,
+ serialized_fn,
+ pardo_proto=None, # type: Optional[beam_runner_api_pb2.ParDoPayload]
+ operation_cls=operations.DoOperation
+):
if pardo_proto and pardo_proto.side_inputs:
input_tags_to_coders = factory.get_input_coders(transform_proto)
@@ -1179,7 +1367,8 @@
# Windowing not set.
if pardo_proto:
other_input_tags = set.union(
- set(pardo_proto.side_inputs), set(pardo_proto.timer_specs))
+ set(pardo_proto.side_inputs),
+ set(pardo_proto.timer_specs)) # type: Container[str]
else:
other_input_tags = ()
pcoll_id, = [pcoll for tag, pcoll in transform_proto.inputs.items()
@@ -1188,9 +1377,10 @@
factory.descriptor.pcollections[pcoll_id].windowing_strategy_id)
serialized_fn = pickler.dumps(dofn_data[:-1] + (windowing,))
+ timer_inputs = None # type: Optional[Dict[str, str]]
if pardo_proto and (pardo_proto.timer_specs or pardo_proto.state_specs
or pardo_proto.splittable):
- main_input_coder = None
+ main_input_coder = None # type: Optional[WindowedValueCoder]
timer_inputs = {}
for tag, pcoll_id in transform_proto.inputs.items():
if tag in pardo_proto.timer_specs:
@@ -1210,12 +1400,11 @@
transform_id,
main_input_coder.key_coder(),
main_input_coder.window_coder,
- timer_specs=pardo_proto.timer_specs)
+ timer_specs=pardo_proto.timer_specs) # type: Optional[FnApiUserStateContext]
else:
user_state_context = None
else:
user_state_context = None
- timer_inputs = None
output_coders = factory.get_output_coders(transform_proto)
spec = operation_specs.WorkerDoFn(
@@ -1244,8 +1433,12 @@
return result
-def _create_simple_pardo_operation(
- factory, transform_id, transform_proto, consumers, dofn):
+def _create_simple_pardo_operation(factory, # type: BeamTransformFactory
+ transform_id,
+ transform_proto,
+ consumers,
+ dofn, # type: beam.DoFn
+ ):
serialized_fn = pickler.dumps((dofn, (), {}, [], None))
return _create_pardo_operation(
factory, transform_id, transform_proto, consumers, serialized_fn)
@@ -1300,7 +1493,8 @@
None,
[factory.get_only_output_coder(transform_proto)]),
factory.counter_factory,
- factory.state_sampler),
+ factory.state_sampler,
+ factory.get_input_windowing(transform_proto)),
transform_proto.unique_name,
consumers)
@@ -1331,6 +1525,7 @@
def _create_combine_phase_operation(
factory, transform_id, transform_proto, payload, consumers, phase):
+ # type: (...) -> operations.CombineOperation
serialized_combine_fn = pickler.dumps(
(beam.CombineFn.from_runner_api(payload.combine_fn, factory.context),
[], {}))
@@ -1364,10 +1559,10 @@
@BeamTransformFactory.register_urn(
common_urns.primitives.MAP_WINDOWS.urn,
- beam_runner_api_pb2.SdkFunctionSpec)
+ beam_runner_api_pb2.FunctionSpec)
def create(factory, transform_id, transform_proto, mapping_fn_spec, consumers):
- assert mapping_fn_spec.spec.urn == python_urns.PICKLED_WINDOW_MAPPING_FN
- window_mapping_fn = pickler.loads(mapping_fn_spec.spec.payload)
+ assert mapping_fn_spec.urn == python_urns.PICKLED_WINDOW_MAPPING_FN
+ window_mapping_fn = pickler.loads(mapping_fn_spec.payload)
class MapWindows(beam.DoFn):
diff --git a/sdks/python/apache_beam/runners/worker/channel_factory.py b/sdks/python/apache_beam/runners/worker/channel_factory.py
index d0823fa..29d1504 100644
--- a/sdks/python/apache_beam/runners/worker/channel_factory.py
+++ b/sdks/python/apache_beam/runners/worker/channel_factory.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
"""Factory to create grpc channel."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py
index 3690b9e..d2fddf6 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane.py
@@ -17,6 +17,8 @@
"""Implementation of ``DataChannel``s to communicate across the data plane."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -27,8 +29,17 @@
import queue
import sys
import threading
+import time
from builtins import object
from builtins import range
+from typing import TYPE_CHECKING
+from typing import Callable
+from typing import DefaultDict
+from typing import Dict
+from typing import Iterable
+from typing import Iterator
+from typing import List
+from typing import Optional
import grpc
from future.utils import raise_
@@ -40,37 +51,141 @@
from apache_beam.runners.worker.channel_factory import GRPCChannelFactory
from apache_beam.runners.worker.worker_id_interceptor import WorkerIdInterceptor
+if TYPE_CHECKING:
+ # TODO: remove from TYPE_CHECKING scope when we drop support for python < 3.6
+ from typing import Collection
+
# This module is experimental. No backwards-compatibility guarantees.
_LOGGER = logging.getLogger(__name__)
-_DEFAULT_FLUSH_THRESHOLD = 10 << 20 # 10MB
+_DEFAULT_SIZE_FLUSH_THRESHOLD = 10 << 20 # 10MB
+_DEFAULT_TIME_FLUSH_THRESHOLD_MS = 0 # disable time-based flush by default
-class ClosableOutputStream(type(coder_impl.create_OutputStream())):
+if TYPE_CHECKING:
+ import apache_beam.coders.slow_stream
+ OutputStream = apache_beam.coders.slow_stream.OutputStream
+else:
+ OutputStream = type(coder_impl.create_OutputStream())
+
+
+class ClosableOutputStream(OutputStream):
"""A Outputstream for use with CoderImpls that has a close() method."""
- def __init__(self,
- close_callback=None,
- flush_callback=None,
- flush_threshold=_DEFAULT_FLUSH_THRESHOLD):
+ def __init__(self, close_callback=None):
super(ClosableOutputStream, self).__init__()
self._close_callback = close_callback
- self._flush_callback = flush_callback
- self._flush_threshold = flush_threshold
-
- # This must be called explicitly to avoid flushing partial elements.
- def maybe_flush(self):
- if self._flush_callback and self.size() > self._flush_threshold:
- self._flush_callback(self.get())
- self._clear()
def close(self):
if self._close_callback:
self._close_callback(self.get())
+ @staticmethod
+ def create(close_callback,
+ flush_callback,
+ data_buffer_time_limit_ms):
+ if data_buffer_time_limit_ms > 0:
+ return TimeBasedBufferingClosableOutputStream(
+ close_callback,
+ flush_callback=flush_callback,
+ time_flush_threshold_ms=data_buffer_time_limit_ms)
+ else:
+ return SizeBasedBufferingClosableOutputStream(
+ close_callback, flush_callback=flush_callback)
-class DataChannel(with_metaclass(abc.ABCMeta, object)):
+
+class SizeBasedBufferingClosableOutputStream(ClosableOutputStream):
+ """A size-based buffering OutputStream."""
+
+ def __init__(self,
+ close_callback=None, # type: Optional[Callable[[bytes], None]]
+ flush_callback=None, # type: Optional[Callable[[bytes], None]]
+ size_flush_threshold=_DEFAULT_SIZE_FLUSH_THRESHOLD):
+ super(SizeBasedBufferingClosableOutputStream, self).__init__(close_callback)
+ self._flush_callback = flush_callback
+ self._size_flush_threshold = size_flush_threshold
+
+ # This must be called explicitly to avoid flushing partial elements.
+ def maybe_flush(self):
+ if self.size() > self._size_flush_threshold:
+ self.flush()
+
+ def flush(self):
+ if self._flush_callback:
+ self._flush_callback(self.get())
+ self._clear()
+
+
+class TimeBasedBufferingClosableOutputStream(
+ SizeBasedBufferingClosableOutputStream):
+ """A buffering OutputStream with both time-based and size-based."""
+
+ def __init__(self,
+ close_callback=None,
+ flush_callback=None,
+ size_flush_threshold=_DEFAULT_SIZE_FLUSH_THRESHOLD,
+ time_flush_threshold_ms=_DEFAULT_TIME_FLUSH_THRESHOLD_MS):
+ super(TimeBasedBufferingClosableOutputStream, self).__init__(
+ close_callback, flush_callback, size_flush_threshold)
+ assert time_flush_threshold_ms > 0
+ self._time_flush_threshold_ms = time_flush_threshold_ms
+ self._flush_lock = threading.Lock()
+ self._schedule_lock = threading.Lock()
+ self._closed = False
+ self._schedule_periodic_flush()
+
+ def flush(self):
+ with self._flush_lock:
+ super(TimeBasedBufferingClosableOutputStream, self).flush()
+
+ def close(self):
+ with self._schedule_lock:
+ self._closed = True
+ if self._periodic_flusher:
+ self._periodic_flusher.cancel()
+ self._periodic_flusher = None
+ super(TimeBasedBufferingClosableOutputStream, self).close()
+
+ def _schedule_periodic_flush(self):
+ def _flush():
+ with self._schedule_lock:
+ if not self._closed:
+ self.flush()
+
+ self._periodic_flusher = PeriodicThread(
+ self._time_flush_threshold_ms / 1000.0, _flush)
+ self._periodic_flusher.daemon = True
+ self._periodic_flusher.start()
+
+
+class PeriodicThread(threading.Thread):
+ """Call a function periodically with the specified number of seconds"""
+
+ def __init__(self,
+ interval,
+ function,
+ args=None,
+ kwargs=None):
+ threading.Thread.__init__(self)
+ self._interval = interval
+ self._function = function
+ self._args = args if args is not None else []
+ self._kwargs = kwargs if kwargs is not None else {}
+ self._finished = threading.Event()
+
+ def run(self):
+ next_call = time.time() + self._interval
+ while not self._finished.wait(next_call - time.time()):
+ next_call = next_call + self._interval
+ self._function(*self._args, **self._kwargs)
+
+ def cancel(self):
+ """Stop the thread if it hasn't finished yet."""
+ self._finished.set()
+
+
+class DataChannel(with_metaclass(abc.ABCMeta, object)): # type: ignore[misc]
"""Represents a channel for reading and writing data over the data plane.
Read from this channel with the input_elements method::
@@ -91,8 +206,12 @@
"""
@abc.abstractmethod
- def input_elements(
- self, instruction_id, expected_transforms, abort_callback=None):
+ def input_elements(self,
+ instruction_id, # type: str
+ expected_transforms, # type: Collection[str]
+ abort_callback=None # type: Optional[Callable[[], bool]]
+ ):
+ # type: (...) -> Iterator[beam_fn_api_pb2.Elements.Data]
"""Returns an iterable of all Element.Data bundles for instruction_id.
This iterable terminates only once the full set of data has been recieved
@@ -107,7 +226,11 @@
raise NotImplementedError(type(self))
@abc.abstractmethod
- def output_stream(self, instruction_id, transform_id):
+ def output_stream(self,
+ instruction_id, # type: str
+ transform_id # type: str
+ ):
+ # type: (...) -> ClosableOutputStream
"""Returns an output stream writing elements to transform_id.
Args:
@@ -118,6 +241,7 @@
@abc.abstractmethod
def close(self):
+ # type: () -> None
"""Closes this channel, indicating that all data has been written.
Data can continue to be read.
@@ -135,15 +259,23 @@
The inverse() method returns the other side of a instance.
"""
- def __init__(self, inverse=None):
- self._inputs = []
- self._inverse = inverse or InMemoryDataChannel(self)
+ def __init__(self, inverse=None, data_buffer_time_limit_ms=0):
+ # type: (Optional[InMemoryDataChannel], Optional[int]) -> None
+ self._inputs = [] # type: List[beam_fn_api_pb2.Elements.Data]
+ self._data_buffer_time_limit_ms = data_buffer_time_limit_ms
+ self._inverse = inverse or InMemoryDataChannel(
+ self, data_buffer_time_limit_ms=data_buffer_time_limit_ms)
def inverse(self):
+ # type: () -> InMemoryDataChannel
return self._inverse
- def input_elements(self, instruction_id, unused_expected_transforms=None,
- abort_callback=None):
+ def input_elements(self,
+ instruction_id, # type: str
+ unused_expected_transforms=None, # type: Optional[Collection[str]]
+ abort_callback=None # type: Optional[Callable[[], bool]]
+ ):
+ # type: (...) -> Iterator[beam_fn_api_pb2.Elements.Data]
other_inputs = []
for data in self._inputs:
if data.instruction_id == instruction_id:
@@ -154,14 +286,17 @@
self._inputs = other_inputs
def output_stream(self, instruction_id, transform_id):
+ # type: (str, str) -> ClosableOutputStream
def add_to_inverse_output(data):
self._inverse._inputs.append( # pylint: disable=protected-access
beam_fn_api_pb2.Elements.Data(
instruction_id=instruction_id,
transform_id=transform_id,
data=data))
- return ClosableOutputStream(
- add_to_inverse_output, flush_callback=add_to_inverse_output)
+ return ClosableOutputStream.create(
+ add_to_inverse_output,
+ add_to_inverse_output,
+ self._data_buffer_time_limit_ms)
def close(self):
pass
@@ -172,9 +307,11 @@
_WRITES_FINISHED = object()
- def __init__(self):
- self._to_send = queue.Queue()
- self._received = collections.defaultdict(lambda: queue.Queue(maxsize=5))
+ def __init__(self, data_buffer_time_limit_ms=0):
+ # type: (Optional[int]) -> None
+ self._data_buffer_time_limit_ms = data_buffer_time_limit_ms
+ self._to_send = queue.Queue() # type: queue.Queue[beam_fn_api_pb2.Elements.Data]
+ self._received = collections.defaultdict(lambda: queue.Queue(maxsize=5)) # type: DefaultDict[str, queue.Queue[beam_fn_api_pb2.Elements.Data]]
self._receive_lock = threading.Lock()
self._reads_finished = threading.Event()
self._closed = False
@@ -188,15 +325,21 @@
self._reads_finished.wait(timeout)
def _receiving_queue(self, instruction_id):
+ # type: (str) -> queue.Queue[beam_fn_api_pb2.Elements.Data]
with self._receive_lock:
return self._received[instruction_id]
def _clean_receiving_queue(self, instruction_id):
+ # type: (str) -> None
with self._receive_lock:
self._received.pop(instruction_id)
- def input_elements(self, instruction_id, expected_transforms,
- abort_callback=None):
+ def input_elements(self,
+ instruction_id, # type: str
+ expected_transforms, # type: Collection[str]
+ abort_callback=None # type: Optional[Callable[[], bool]]
+ ):
+ # type: (...) -> Iterator[beam_fn_api_pb2.Elements.Data]
"""
Generator to retrieve elements for an instruction_id
input_elements should be called only once for an instruction_id
@@ -206,7 +349,7 @@
expected_transforms(collection): expected transforms
"""
received = self._receiving_queue(instruction_id)
- done_transforms = []
+ done_transforms = [] # type: List[str]
abort_callback = abort_callback or (lambda: False)
try:
while len(done_transforms) < len(expected_transforms):
@@ -232,7 +375,9 @@
self._clean_receiving_queue(instruction_id)
def output_stream(self, instruction_id, transform_id):
+ # type: (str, str) -> ClosableOutputStream
def add_to_send_queue(data):
+ # type: (bytes) -> None
if data:
self._to_send.put(
beam_fn_api_pb2.Elements.Data(
@@ -241,6 +386,7 @@
data=data))
def close_callback(data):
+ # type: (bytes) -> None
add_to_send_queue(data)
# End of stream marker.
self._to_send.put(
@@ -248,10 +394,14 @@
instruction_id=instruction_id,
transform_id=transform_id,
data=b''))
- return ClosableOutputStream(
- close_callback, flush_callback=add_to_send_queue)
+
+ return ClosableOutputStream.create(
+ close_callback,
+ add_to_send_queue,
+ self._data_buffer_time_limit_ms)
def _write_outputs(self):
+ # type: () -> Iterator[beam_fn_api_pb2.Elements]
done = False
while not done:
data = [self._to_send.get()]
@@ -268,6 +418,7 @@
yield beam_fn_api_pb2.Elements(data=data)
def _read_inputs(self, elements_iterator):
+ # type: (Iterable[beam_fn_api_pb2.Elements]) -> None
try:
for elements in elements_iterator:
for data in elements.data:
@@ -282,6 +433,7 @@
self._reads_finished.set()
def set_inputs(self, elements_iterator):
+ # type: (Iterable[beam_fn_api_pb2.Elements]) -> None
reader = threading.Thread(
target=lambda: self._read_inputs(elements_iterator),
name='read_grpc_client_inputs')
@@ -292,41 +444,54 @@
class GrpcClientDataChannel(_GrpcDataChannel):
"""A DataChannel wrapping the client side of a BeamFnData connection."""
- def __init__(self, data_stub):
- super(GrpcClientDataChannel, self).__init__()
+ def __init__(self,
+ data_stub, # type: beam_fn_api_pb2_grpc.BeamFnDataStub
+ data_buffer_time_limit_ms=0 # type: Optional[int]
+ ):
+ # type: (...) -> None
+ super(GrpcClientDataChannel, self).__init__(data_buffer_time_limit_ms)
self.set_inputs(data_stub.Data(self._write_outputs()))
class BeamFnDataServicer(beam_fn_api_pb2_grpc.BeamFnDataServicer):
"""Implementation of BeamFnDataServicer for any number of clients"""
- def __init__(self):
+ def __init__(self,
+ data_buffer_time_limit_ms=0 # type: Optional[int]
+ ):
self._lock = threading.Lock()
self._connections_by_worker_id = collections.defaultdict(
- _GrpcDataChannel)
+ lambda: _GrpcDataChannel(data_buffer_time_limit_ms)) # type: DefaultDict[str, _GrpcDataChannel]
def get_conn_by_worker_id(self, worker_id):
+ # type: (str) -> _GrpcDataChannel
with self._lock:
return self._connections_by_worker_id[worker_id]
- def Data(self, elements_iterator, context):
- worker_id = dict(context.invocation_metadata()).get('worker_id')
+ def Data(self,
+ elements_iterator, # type: Iterable[beam_fn_api_pb2.Elements]
+ context
+ ):
+ # type: (...) -> Iterator[beam_fn_api_pb2.Elements]
+ worker_id = dict(context.invocation_metadata())['worker_id']
data_conn = self.get_conn_by_worker_id(worker_id)
data_conn.set_inputs(elements_iterator)
for elements in data_conn._write_outputs():
yield elements
-class DataChannelFactory(with_metaclass(abc.ABCMeta, object)):
+class DataChannelFactory(with_metaclass(abc.ABCMeta, object)): # type: ignore[misc]
"""An abstract factory for creating ``DataChannel``."""
@abc.abstractmethod
def create_data_channel(self, remote_grpc_port):
+ # type: (beam_fn_api_pb2.RemoteGrpcPort) -> GrpcClientDataChannel
"""Returns a ``DataChannel`` from the given RemoteGrpcPort."""
raise NotImplementedError(type(self))
@abc.abstractmethod
def close(self):
+ # type: () -> None
"""Close all channels that this factory owns."""
raise NotImplementedError(type(self))
@@ -337,16 +502,23 @@
Caches the created channels by ``data descriptor url``.
"""
- def __init__(self, credentials=None, worker_id=None):
- self._data_channel_cache = {}
+ def __init__(self,
+ credentials=None,
+ worker_id=None, # type: Optional[str]
+ data_buffer_time_limit_ms=0 # type: Optional[int]
+ ):
+ # type: (...) -> None
+ self._data_channel_cache = {} # type: Dict[str, GrpcClientDataChannel]
self._lock = threading.Lock()
self._credentials = None
self._worker_id = worker_id
+ self._data_buffer_time_limit_ms = data_buffer_time_limit_ms
if credentials is not None:
_LOGGER.info('Using secure channel creds.')
self._credentials = credentials
def create_data_channel(self, remote_grpc_port):
+ # type: (beam_fn_api_pb2.RemoteGrpcPort) -> GrpcClientDataChannel
url = remote_grpc_port.api_service_descriptor.url
if url not in self._data_channel_cache:
with self._lock:
@@ -368,11 +540,13 @@
grpc_channel = grpc.intercept_channel(
grpc_channel, WorkerIdInterceptor(self._worker_id))
self._data_channel_cache[url] = GrpcClientDataChannel(
- beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel))
+ beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel),
+ self._data_buffer_time_limit_ms)
return self._data_channel_cache[url]
def close(self):
+ # type: () -> None
_LOGGER.info('Closing all cached grpc data channels.')
for _, channel in self._data_channel_cache.items():
channel.close()
@@ -383,10 +557,13 @@
"""A singleton factory for ``InMemoryDataChannel``."""
def __init__(self, in_memory_data_channel):
+ # type: (GrpcClientDataChannel) -> None
self._in_memory_data_channel = in_memory_data_channel
def create_data_channel(self, unused_remote_grpc_port):
+ # type: (beam_fn_api_pb2.RemoteGrpcPort) -> GrpcClientDataChannel
return self._in_memory_data_channel
def close(self):
+ # type: () -> None
pass
diff --git a/sdks/python/apache_beam/runners/worker/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
index 900532b..64b4ae7 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane_test.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane_test.py
@@ -17,10 +17,13 @@
"""Tests for apache_beam.runners.worker.data_plane."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import itertools
import logging
import sys
import threading
@@ -62,7 +65,18 @@
@timeout(5)
def test_grpc_data_channel(self):
- data_servicer = data_plane.BeamFnDataServicer()
+ self._grpc_data_channel_test()
+
+ @timeout(5)
+ def test_time_based_flush_grpc_data_channel(self):
+ self._grpc_data_channel_test(True)
+
+ def _grpc_data_channel_test(self, time_based_flush=False):
+ if time_based_flush:
+ data_servicer = data_plane.BeamFnDataServicer(
+ data_buffer_time_limit_ms=100)
+ else:
+ data_servicer = data_plane.BeamFnDataServicer()
worker_id = 'worker_0'
data_channel_service = \
data_servicer.get_conn_by_worker_id(worker_id)
@@ -78,10 +92,15 @@
grpc_channel = grpc.intercept_channel(
grpc_channel, WorkerIdInterceptor(worker_id))
data_channel_stub = beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel)
- data_channel_client = data_plane.GrpcClientDataChannel(data_channel_stub)
+ if time_based_flush:
+ data_channel_client = data_plane.GrpcClientDataChannel(
+ data_channel_stub, data_buffer_time_limit_ms=100)
+ else:
+ data_channel_client = data_plane.GrpcClientDataChannel(data_channel_stub)
try:
- self._data_channel_test(data_channel_service, data_channel_client)
+ self._data_channel_test(
+ data_channel_service, data_channel_client, time_based_flush)
finally:
data_channel_client.close()
data_channel_service.close()
@@ -92,22 +111,25 @@
channel = data_plane.InMemoryDataChannel()
self._data_channel_test(channel, channel.inverse())
- def _data_channel_test(self, server, client):
- self._data_channel_test_one_direction(server, client)
- self._data_channel_test_one_direction(client, server)
+ def _data_channel_test(self, server, client, time_based_flush=False):
+ self._data_channel_test_one_direction(server, client, time_based_flush)
+ self._data_channel_test_one_direction(client, server, time_based_flush)
- def _data_channel_test_one_direction(self, from_channel, to_channel):
+ def _data_channel_test_one_direction(
+ self, from_channel, to_channel, time_based_flush):
def send(instruction_id, transform_id, data):
stream = from_channel.output_stream(instruction_id, transform_id)
stream.write(data)
- stream.close()
+ if not time_based_flush:
+ stream.close()
transform_1 = '1'
transform_2 = '2'
# Single write.
send('0', transform_1, b'abc')
self.assertEqual(
- list(to_channel.input_elements('0', [transform_1])),
+ list(itertools.islice(
+ to_channel.input_elements('0', [transform_1]), 1)),
[beam_fn_api_pb2.Elements.Data(
instruction_id='0',
transform_id=transform_1,
@@ -117,14 +139,16 @@
send('1', transform_1, b'abc')
send('2', transform_1, b'def')
self.assertEqual(
- list(to_channel.input_elements('1', [transform_1])),
+ list(itertools.islice(
+ to_channel.input_elements('1', [transform_1]), 1)),
[beam_fn_api_pb2.Elements.Data(
instruction_id='1',
transform_id=transform_1,
data=b'abc')])
send('2', transform_2, b'ghi')
self.assertEqual(
- list(to_channel.input_elements('2', [transform_1, transform_2])),
+ list(itertools.islice(
+ to_channel.input_elements('2', [transform_1, transform_2]), 2)),
[beam_fn_api_pb2.Elements.Data(
instruction_id='2',
transform_id=transform_1,
diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py
index 12f162b..f1df332 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler.py
@@ -16,6 +16,8 @@
#
"""Beam fn API log handler."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
@@ -64,7 +66,7 @@
self._alive = True
self._dropped_logs = 0
- self._log_entry_queue = queue.Queue(maxsize=self._QUEUE_SIZE)
+ self._log_entry_queue = queue.Queue(maxsize=self._QUEUE_SIZE) # type: queue.Queue[beam_fn_api_pb2.LogEntry]
ch = GRPCChannelFactory.insecure_channel(log_service_descriptor.url)
# Make sure the channel is ready to avoid [BEAM-4649]
@@ -92,6 +94,7 @@
if python_level <= level)
def emit(self, record):
+ # type: (logging.LogRecord) -> None
log_entry = beam_fn_api_pb2.LogEntry()
log_entry.severity = self.map_log_level(record.levelno)
log_entry.message = self.format(record)
@@ -161,7 +164,9 @@
# This case is unlikely and the chance of reconnection and successful
# transmission of logs is also very less as the process is terminating.
# I choose not to handle this case to avoid un-necessary code complexity.
- while self._alive:
+
+ alive = True # Force at least one connection attempt.
+ while alive:
# Loop for reconnection.
log_control_iterator = self.connect()
if self._dropped_logs > 0:
@@ -180,3 +185,4 @@
file=sys.stderr)
# Wait a bit before trying a reconnect
time.sleep(0.5) # 0.5 seconds
+ alive = self._alive
diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py
index c79ccf9..1fa5fc7 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler_test.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/worker/logger.py b/sdks/python/apache_beam/runners/worker/logger.py
index ae9cdd3..be67e47 100644
--- a/sdks/python/apache_beam/runners/worker/logger.py
+++ b/sdks/python/apache_beam/runners/worker/logger.py
@@ -19,12 +19,16 @@
"""Python worker logging."""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
import logging
import threading
import traceback
+from typing import Any
+from typing import Dict
from apache_beam.runners.worker import statesampler
@@ -115,7 +119,7 @@
Python thread object. Nevertheless having this value can allow to
filter log statement from only one specific thread.
"""
- output = {}
+ output = {} # type: Dict[str, Any]
output['timestamp'] = {
'seconds': int(record.created),
'nanos': int(record.msecs * 1000000)}
diff --git a/sdks/python/apache_beam/runners/worker/logger_test.py b/sdks/python/apache_beam/runners/worker/logger_test.py
index c131775..e04e1b5 100644
--- a/sdks/python/apache_beam/runners/worker/logger_test.py
+++ b/sdks/python/apache_beam/runners/worker/logger_test.py
@@ -17,6 +17,8 @@
"""Tests for worker logging utilities."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import unicode_literals
diff --git a/sdks/python/apache_beam/runners/worker/opcounters.py b/sdks/python/apache_beam/runners/worker/opcounters.py
index ae36a6b..9a1a30a 100644
--- a/sdks/python/apache_beam/runners/worker/opcounters.py
+++ b/sdks/python/apache_beam/runners/worker/opcounters.py
@@ -20,6 +20,8 @@
"""Counters collect the progress of the Worker for reporting to the service."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -27,11 +29,17 @@
import random
from builtins import hex
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Optional
from apache_beam.utils import counters
from apache_beam.utils.counters import Counter
from apache_beam.utils.counters import CounterName
+if TYPE_CHECKING:
+ from apache_beam.utils import windowed_value
+ from apache_beam.runners.worker.statesampler import StateSampler
+
# This module is experimental. No backwards-compatibility guarantees.
@@ -122,8 +130,12 @@
not be the only step that spends time reading from this side input.
"""
- def __init__(self, counter_factory, state_sampler, declaring_step,
- input_index):
+ def __init__(self,
+ counter_factory,
+ state_sampler, # type: StateSampler
+ declaring_step,
+ input_index
+ ):
"""Create a side input read counter.
Args:
@@ -177,7 +189,12 @@
class OperationCounters(object):
"""The set of basic counters to attach to an Operation."""
- def __init__(self, counter_factory, step_name, coder, output_index):
+ def __init__(self,
+ counter_factory,
+ step_name, # type: str
+ coder,
+ output_index
+ ):
self._counter_factory = counter_factory
self.element_counter = counter_factory.get_counter(
'%s-out%s-ElementCount' % (step_name, output_index), Counter.SUM)
@@ -185,12 +202,13 @@
'%s-out%s-MeanByteCount' % (step_name, output_index),
Counter.BEAM_DISTRIBUTION)
self.coder_impl = coder.get_impl() if coder else None
- self.active_accumulator = None
- self.current_size = None
+ self.active_accumulator = None # type: Optional[SumAccumulator]
+ self.current_size = None # type: Optional[int]
self._sample_counter = 0
self._next_sample = 0
def update_from(self, windowed_value):
+ # type: (windowed_value.WindowedValue) -> None
"""Add one value to this counter."""
if self._should_sample():
self.do_sample(windowed_value)
@@ -210,6 +228,7 @@
return _observable_callback_inner
def do_sample(self, windowed_value):
+ # type: (windowed_value.WindowedValue) -> None
size, observables = (
self.coder_impl.get_estimated_size_and_observables(windowed_value))
if not observables:
diff --git a/sdks/python/apache_beam/runners/worker/opcounters_test.py b/sdks/python/apache_beam/runners/worker/opcounters_test.py
index 13e78b2..62e8b5c 100644
--- a/sdks/python/apache_beam/runners/worker/opcounters_test.py
+++ b/sdks/python/apache_beam/runners/worker/opcounters_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py
index 464558d..1f53d5a 100644
--- a/sdks/python/apache_beam/runners/worker/operation_specs.py
+++ b/sdks/python/apache_beam/runners/worker/operation_specs.py
@@ -21,6 +21,8 @@
source, write to a sink, parallel do, etc.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/runners/worker/operations.pxd b/sdks/python/apache_beam/runners/worker/operations.pxd
index 68da490..672fc4c 100644
--- a/sdks/python/apache_beam/runners/worker/operations.pxd
+++ b/sdks/python/apache_beam/runners/worker/operations.pxd
@@ -111,11 +111,13 @@
cdef public object combine_fn
cdef public object combine_fn_add_input
cdef public object combine_fn_compact
+ cdef public bint is_default_windowing
+ cdef public object timestamp_combiner
cdef dict table
cdef long max_keys
cdef long key_count
- cpdef output_key(self, tuple wkey, value)
+ cpdef output_key(self, wkey, value, timestamp)
cdef class FlattenOperation(Operation):
diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py
index aafbb68..6bb8bf1 100644
--- a/sdks/python/apache_beam/runners/worker/operations.py
+++ b/sdks/python/apache_beam/runners/worker/operations.py
@@ -20,6 +20,8 @@
"""Worker operations executor."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
@@ -29,6 +31,15 @@
from builtins import filter
from builtins import object
from builtins import zip
+from typing import TYPE_CHECKING
+from typing import DefaultDict
+from typing import Dict
+from typing import FrozenSet
+from typing import Hashable
+from typing import Iterator
+from typing import List
+from typing import Optional
+from typing import Union
from apache_beam import pvalue
from apache_beam.internal import pickler
@@ -47,11 +58,16 @@
from apache_beam.transforms import combiners
from apache_beam.transforms import core
from apache_beam.transforms import userstate
+from apache_beam.transforms import window
from apache_beam.transforms.combiners import PhasedCombineFnExecutor
from apache_beam.transforms.combiners import curry_combine_fn
from apache_beam.transforms.window import GlobalWindows
from apache_beam.utils.windowed_value import WindowedValue
+if TYPE_CHECKING:
+ from apache_beam.runners.worker.bundle_processor import ExecutionContext
+ from apache_beam.runners.worker.statesampler import StateSampler
+
# Allow some "pure mode" declarations.
try:
import cython
@@ -79,7 +95,13 @@
ConsumerSet are attached to the outputting Operation.
"""
@staticmethod
- def create(counter_factory, step_name, output_index, consumers, coder):
+ def create(counter_factory,
+ step_name, # type: str
+ output_index,
+ consumers, # type: List[Operation]
+ coder
+ ):
+ # type: (...) -> ConsumerSet
if len(consumers) == 1:
return SingletonConsumerSet(
counter_factory, step_name, output_index, consumers, coder)
@@ -87,8 +109,13 @@
return ConsumerSet(
counter_factory, step_name, output_index, consumers, coder)
- def __init__(
- self, counter_factory, step_name, output_index, consumers, coder):
+ def __init__(self,
+ counter_factory,
+ step_name, # type: str
+ output_index,
+ consumers, # type: List[Operation]
+ coder
+ ):
self.consumers = consumers
self.opcounter = opcounters.OperationCounters(
counter_factory, step_name, coder, output_index)
@@ -98,6 +125,7 @@
self.coder = coder
def receive(self, windowed_value):
+ # type: (WindowedValue) -> None
self.update_counters_start(windowed_value)
for consumer in self.consumers:
cython.cast(Operation, consumer).process(windowed_value)
@@ -112,6 +140,7 @@
return None
def current_element_progress(self):
+ # type: () -> Optional[iobase.RestrictionProgress]
"""Returns the progress of the current element.
This progress should be an instance of
@@ -122,9 +151,11 @@
return None
def update_counters_start(self, windowed_value):
+ # type: (WindowedValue) -> None
self.opcounter.update_from(windowed_value)
def update_counters_finish(self):
+ # type: () -> None
self.opcounter.update_collect()
def __repr__(self):
@@ -142,6 +173,7 @@
self.consumer = consumers[0]
def receive(self, windowed_value):
+ # type: (WindowedValue) -> None
self.update_counters_start(windowed_value)
self.consumer.process(windowed_value)
self.update_counters_finish()
@@ -160,7 +192,12 @@
one or more receiver operations that will take that as input.
"""
- def __init__(self, name_context, spec, counter_factory, state_sampler):
+ def __init__(self,
+ name_context, # type: Union[str, common.NameContext]
+ spec,
+ counter_factory,
+ state_sampler # type: StateSampler
+ ):
"""Initializes a worker operation instance.
Args:
@@ -180,8 +217,8 @@
self.spec = spec
self.counter_factory = counter_factory
- self.execution_context = None
- self.consumers = collections.defaultdict(list)
+ self.execution_context = None # type: Optional[ExecutionContext]
+ self.consumers = collections.defaultdict(list) # type: DefaultDict[int, List[Operation]]
# These are overwritten in the legacy harness.
self.metrics_container = MetricsContainer(self.name_context.metrics_name())
@@ -195,12 +232,13 @@
self.name_context, 'finish', metrics_container=self.metrics_container)
# TODO(ccy): the '-abort' state can be added when the abort is supported in
# Operations.
- self.receivers = []
+ self.receivers = [] # type: List[ConsumerSet]
# Legacy workers cannot call setup() until after setting additional state
# on the operation.
self.setup_done = False
def setup(self):
+ # type: () -> None
"""Set up operation.
This must be called before any other methods of the operation."""
@@ -221,16 +259,19 @@
self.setup_done = True
def start(self):
+ # type: () -> None
"""Start operation."""
if not self.setup_done:
# For legacy workers.
self.setup()
def process(self, o):
+ # type: (WindowedValue) -> None
"""Process element in operation."""
pass
def finalize_bundle(self):
+ # type: () -> None
pass
def needs_finalization(self):
@@ -243,26 +284,32 @@
return None
def finish(self):
+ # type: () -> None
"""Finish operation."""
pass
def teardown(self):
+ # type: () -> None
"""Tear down operation.
No other methods of this operation should be called after this."""
pass
def reset(self):
+ # type: () -> None
self.metrics_container.reset()
def output(self, windowed_value, output_index=0):
+ # type: (WindowedValue, int) -> None
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
def add_receiver(self, operation, output_index=0):
+ # type: (Operation, int) -> None
"""Adds a receiver operation for the specified output."""
self.consumers[output_index].append(operation)
def progress_metrics(self):
+ # type: () -> beam_fn_api_pb2.Metrics.PTransform
return beam_fn_api_pb2.Metrics.PTransform(
processed_elements=beam_fn_api_pb2.Metrics.PTransform.ProcessedElements(
measured=beam_fn_api_pb2.Metrics.PTransform.Measured(
@@ -282,6 +329,7 @@
user=self.metrics_container.to_runner_api())
def monitoring_infos(self, transform_id):
+ # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
"""Returns the list of MonitoringInfos collected by this operation."""
all_monitoring_infos = self.execution_time_monitoring_infos(transform_id)
all_monitoring_infos.update(
@@ -331,6 +379,7 @@
return self.metrics_container.to_runner_api_monitoring_infos(transform_id)
def execution_time_monitoring_infos(self, transform_id):
+ # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
total_time_spent_msecs = (
self.scoped_start_state.sampled_msecs_int()
+ self.scoped_process_state.sampled_msecs_int()
@@ -428,6 +477,7 @@
next(iter(consumers.values())), output_coder)]
def process(self, unused_impulse):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
range_tracker = self.source.get_range_tracker(None, None)
for value in self.source.read(range_tracker):
@@ -442,6 +492,7 @@
"""A write operation that will write to an in-memory sink."""
def process(self, o):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
if self.debug_logging_enabled:
_LOGGER.debug('Processing [%s] in %s', o, self)
@@ -464,17 +515,24 @@
class DoOperation(Operation):
"""A Do operation that will execute a custom DoFn for each input element."""
- def __init__(
- self, name, spec, counter_factory, sampler, side_input_maps=None,
- user_state_context=None, timer_inputs=None):
+ def __init__(self,
+ name, # type: common.NameContext
+ spec, # operation_specs.WorkerDoFn # need to fix this type
+ counter_factory,
+ sampler,
+ side_input_maps=None,
+ user_state_context=None,
+ timer_inputs=None
+ ):
super(DoOperation, self).__init__(name, spec, counter_factory, sampler)
self.side_input_maps = side_input_maps
self.user_state_context = user_state_context
- self.tagged_receivers = None
+ self.tagged_receivers = None # type: Optional[_TaggedReceivers]
# A mapping of timer tags to the input "PCollections" they come in on.
self.timer_inputs = timer_inputs or {}
def _read_side_inputs(self, tags_and_types):
+ # type: (...) -> Iterator[apache_sideinputs.SideInputMap]
"""Generator reading side inputs in the order prescribed by tags_and_types.
Args:
@@ -535,6 +593,7 @@
view_class, view_options, sideinputs.EmulatedIterable(iterator_fn))
def setup(self):
+ # type: () -> None
with self.scoped_start_state:
super(DoOperation, self).setup()
@@ -554,7 +613,7 @@
output_tag_prefix = PropertyNames.OUT + '_'
for index, tag in enumerate(self.spec.output_tags):
if tag == PropertyNames.OUT:
- original_tag = None
+ original_tag = None # type: Optional[str]
elif tag.startswith(output_tag_prefix):
original_tag = tag[len(output_tag_prefix):]
else:
@@ -588,11 +647,13 @@
else DoFnRunnerReceiver(self.dofn_runner))
def start(self):
+ # type: () -> None
with self.scoped_start_state:
super(DoOperation, self).start()
self.dofn_runner.start()
def process(self, o):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
delayed_application = self.dofn_receiver.receive(o)
if delayed_application:
@@ -600,9 +661,11 @@
(self, delayed_application))
def finalize_bundle(self):
+ # type: () -> None
self.dofn_receiver.finalize()
def needs_finalization(self):
+ # type: () -> bool
return self.dofn_receiver.bundle_finalizer_param.has_callbacks()
def process_timer(self, tag, windowed_timer):
@@ -612,16 +675,19 @@
timer_spec, key, windowed_timer.windows[0], timer_data['timestamp'])
def finish(self):
+ # type: () -> None
with self.scoped_finish_state:
self.dofn_runner.finish()
if self.user_state_context:
self.user_state_context.commit()
def teardown(self):
+ # type: () -> None
with self.scoped_finish_state:
self.dofn_runner.teardown()
def reset(self):
+ # type: () -> None
super(DoOperation, self).reset()
for side_input_map in self.side_input_maps:
side_input_map.reset()
@@ -630,6 +696,7 @@
self.dofn_receiver.bundle_finalizer_param.reset()
def progress_metrics(self):
+ # type: () -> beam_fn_api_pb2.Metrics.PTransform
metrics = super(DoOperation, self).progress_metrics()
if self.tagged_receivers:
metrics.processed_elements.measured.output_element_counts.clear()
@@ -639,6 +706,7 @@
return metrics
def monitoring_infos(self, transform_id):
+ # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
infos = super(DoOperation, self).monitoring_infos(transform_id)
if self.tagged_receivers:
for tag, receiver in self.tagged_receivers.items():
@@ -679,6 +747,7 @@
self.element_start_output_bytes = None
def process(self, o):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
try:
with self.lock:
@@ -710,6 +779,7 @@
self._total_output_bytes() - self.element_start_output_bytes)
def progress_metrics(self):
+ # type: () -> beam_fn_api_pb2.Metrics.PTransform
with self.lock:
metrics = super(SdfProcessSizedElements, self).progress_metrics()
current_element_progress = self.current_element_progress()
@@ -736,6 +806,7 @@
self.dofn_runner = dofn_runner
def receive(self, windowed_value):
+ # type: (WindowedValue) -> None
self.dofn_runner.process(windowed_value)
@@ -753,6 +824,7 @@
PhasedCombineFnExecutor(self.spec.phase, fn, args, kwargs))
def process(self, o):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
if self.debug_logging_enabled:
_LOGGER.debug('Processing [%s] in %s', o, self)
@@ -789,6 +861,7 @@
self.max_size = 10 * 1000
def process(self, o):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
# TODO(robertwb): Structural (hashable) values.
key = o.value[0], tuple(o.windows)
@@ -816,7 +889,8 @@
class PGBKCVOperation(Operation):
- def __init__(self, name_context, spec, counter_factory, state_sampler):
+ def __init__(
+ self, name_context, spec, counter_factory, state_sampler, windowing=None):
super(PGBKCVOperation, self).__init__(
name_context, spec, counter_factory, state_sampler)
# Combiners do not accept deferred side-inputs (the ignored fourth
@@ -832,6 +906,15 @@
self.combine_fn_compact = None
else:
self.combine_fn_compact = self.combine_fn.compact
+ if windowing:
+ self.is_default_windowing = windowing.is_default()
+ tsc_type = windowing.timestamp_combiner
+ self.timestamp_combiner = (
+ None if tsc_type == window.TimestampCombiner.OUTPUT_AT_EOW
+ else window.TimestampCombiner.get_impl(tsc_type, windowing.windowfn))
+ else:
+ self.is_default_windowing = False # unknown
+ self.timestamp_combiner = None
# Optimization for the (known tiny accumulator, often wide keyspace)
# combine functions.
# TODO(b/36567833): Bound by in-memory size rather than key count.
@@ -846,12 +929,13 @@
self.table = {}
def process(self, wkv):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
key, value = wkv.value
# pylint: disable=unidiomatic-typecheck
# Optimization for the global window case.
- if len(wkv.windows) == 1 and type(wkv.windows[0]) is _global_window_type:
- wkey = 0, key
+ if self.is_default_windowing:
+ wkey = key # type: Hashable
else:
wkey = tuple(wkv.windows), key
entry = self.table.get(wkey, None)
@@ -862,7 +946,7 @@
# TODO(robertwb): Use an LRU cache?
for old_wkey, old_wvalue in self.table.items():
old_wkeys.append(old_wkey) # Can't mutate while iterating.
- self.output_key(old_wkey, old_wvalue[0])
+ self.output_key(old_wkey, old_wvalue[0], old_wvalue[1])
self.key_count -= 1
if self.key_count <= target:
break
@@ -871,26 +955,33 @@
self.key_count += 1
# We save the accumulator as a one element list so we can efficiently
# mutate when new values are added without searching the cache again.
- entry = self.table[wkey] = [self.combine_fn.create_accumulator()]
+ entry = self.table[wkey] = [self.combine_fn.create_accumulator(), None]
+ if not self.is_default_windowing:
+ # Conditional as the timestamp attribute is lazily initialized.
+ entry[1] = wkv.timestamp
entry[0] = self.combine_fn_add_input(entry[0], value)
+ if not self.is_default_windowing and self.timestamp_combiner:
+ entry[1] = self.timestamp_combiner.combine(entry[1], wkv.timestamp)
def finish(self):
for wkey, value in self.table.items():
- self.output_key(wkey, value[0])
+ self.output_key(wkey, value[0], value[1])
self.table = {}
self.key_count = 0
- def output_key(self, wkey, accumulator):
- windows, key = wkey
+ def output_key(self, wkey, accumulator, timestamp):
if self.combine_fn_compact is None:
value = accumulator
else:
value = self.combine_fn_compact(accumulator)
- if windows == 0:
- self.output(_globally_windowed_value.with_value((key, value)))
+
+ if self.is_default_windowing:
+ self.output(_globally_windowed_value.with_value((wkey, value)))
else:
- self.output(
- WindowedValue((key, value), windows[0].max_timestamp(), windows))
+ windows, key = wkey
+ if self.timestamp_combiner is None:
+ timestamp = windows[0].max_timestamp()
+ self.output(WindowedValue((key, value), timestamp, windows))
class FlattenOperation(Operation):
@@ -901,6 +992,7 @@
"""
def process(self, o):
+ # type: (WindowedValue) -> None
with self.scoped_process_state:
if self.debug_logging_enabled:
_LOGGER.debug('Processing [%s] in %s', o, self)
@@ -910,6 +1002,7 @@
def create_operation(name_context, spec, counter_factory, step_name=None,
state_sampler=None, test_shuffle_source=None,
test_shuffle_sink=None, is_streaming=False):
+ # type: (...) -> Operation
"""Create Operation object for given operation specification."""
# TODO(pabloem): Document arguments to this function call.
@@ -919,7 +1012,7 @@
if isinstance(spec, operation_specs.WorkerRead):
if isinstance(spec.source, iobase.SourceBundle):
op = ReadOperation(
- name_context, spec, counter_factory, state_sampler)
+ name_context, spec, counter_factory, state_sampler) # type: Operation
else:
from dataflow_worker.native_operations import NativeReadOperation
op = NativeReadOperation(
@@ -1001,12 +1094,13 @@
self._map_task = map_task
self._counter_factory = counter_factory
- self._ops = []
+ self._ops = [] # type: List[Operation]
self._state_sampler = state_sampler
self._test_shuffle_source = test_shuffle_source
self._test_shuffle_sink = test_shuffle_sink
def operations(self):
+ # type: () -> List[Operation]
return self._ops[:]
def execute(self):
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index 00a0ac2..cdad130 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -16,6 +16,8 @@
#
"""SDK harness for executing Python Fns via the Fn API."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -29,6 +31,15 @@
import threading
import traceback
from builtins import object
+from concurrent import futures
+from typing import TYPE_CHECKING
+from typing import Callable
+from typing import DefaultDict
+from typing import Dict
+from typing import Iterator
+from typing import List
+from typing import Optional
+from typing import Tuple
import grpc
from future.utils import raise_
@@ -45,6 +56,10 @@
from apache_beam.runners.worker.worker_id_interceptor import WorkerIdInterceptor
from apache_beam.utils.thread_pool_executor import UnboundedThreadPoolExecutor
+if TYPE_CHECKING:
+ from apache_beam.portability.api import endpoints_pb2
+ from apache_beam.utils.profiler import Profile
+
_LOGGER = logging.getLogger(__name__)
# This SDK harness will (by default), log a "lull" in processing if it sees no
@@ -56,13 +71,16 @@
class SdkHarness(object):
REQUEST_METHOD_PREFIX = '_request_'
- def __init__(
- self, control_address,
- credentials=None,
- worker_id=None,
- # Caching is disabled by default
- state_cache_size=0,
- profiler_factory=None):
+ def __init__(self,
+ control_address, # type: str
+ credentials=None,
+ worker_id=None, # type: Optional[str]
+ # Caching is disabled by default
+ state_cache_size=0,
+ # time-based data buffering is disabled by default
+ data_buffer_time_limit_ms=0,
+ profiler_factory=None # type: Optional[Callable[..., Profile]]
+ ):
self._alive = True
self._worker_index = 0
self._worker_id = worker_id
@@ -81,18 +99,22 @@
self._control_channel = grpc.intercept_channel(
self._control_channel, WorkerIdInterceptor(self._worker_id))
self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
- credentials, self._worker_id)
+ credentials, self._worker_id, data_buffer_time_limit_ms)
self._state_handler_factory = GrpcStateHandlerFactory(self._state_cache,
credentials)
self._profiler_factory = profiler_factory
- self._fns = {}
+ self._fns = {} # type: Dict[str, beam_fn_api_pb2.ProcessBundleDescriptor]
# BundleProcessor cache across all workers.
self._bundle_processor_cache = BundleProcessorCache(
state_handler_factory=self._state_handler_factory,
data_channel_factory=self._data_channel_factory,
fns=self._fns)
+
+ # TODO(BEAM-8998) use common UnboundedThreadPoolExecutor to process bundle
+ # progress once dataflow runner's excessive progress polling is removed.
+ self._report_progress_executor = futures.ThreadPoolExecutor(max_workers=1)
self._worker_thread_pool = UnboundedThreadPoolExecutor()
- self._responses = queue.Queue()
+ self._responses = queue.Queue() # type: queue.Queue[beam_fn_api_pb2.InstructionResponse]
_LOGGER.info('Initializing SDKHarness with unbounded number of workers.')
def run(self):
@@ -100,6 +122,7 @@
no_more_work = object()
def get_responses():
+ # type: () -> Iterator[beam_fn_api_pb2.InstructionResponse]
while True:
response = self._responses.get()
if response is no_more_work:
@@ -132,7 +155,11 @@
self._bundle_processor_cache.shutdown()
_LOGGER.info('Done consuming work.')
- def _execute(self, task, request):
+ def _execute(self,
+ task, # type: Callable[[], beam_fn_api_pb2.InstructionResponse]
+ request # type: beam_fn_api_pb2.InstructionRequest
+ ):
+ # type: (...) -> None
with statesampler.instruction_id(request.instruction_id):
try:
response = task()
@@ -147,20 +174,25 @@
self._responses.put(response)
def _request_register(self, request):
+ # type: (beam_fn_api_pb2.InstructionRequest) -> None
# registration request is handled synchronously
self._execute(
lambda: self.create_worker().do_instruction(request), request)
def _request_process_bundle(self, request):
+ # type: (beam_fn_api_pb2.InstructionRequest) -> None
self._request_execute(request)
def _request_process_bundle_split(self, request):
+ # type: (beam_fn_api_pb2.InstructionRequest) -> None
self._request_process_bundle_action(request)
def _request_process_bundle_progress(self, request):
+ # type: (beam_fn_api_pb2.InstructionRequest) -> None
self._request_process_bundle_action(request)
def _request_process_bundle_action(self, request):
+ # type: (beam_fn_api_pb2.InstructionRequest) -> None
def task():
instruction_id = getattr(
@@ -176,9 +208,10 @@
'Unknown process bundle instruction {}').format(
instruction_id)), request)
- self._worker_thread_pool.submit(task)
+ self._report_progress_executor.submit(task)
def _request_finalize_bundle(self, request):
+ # type: (beam_fn_api_pb2.InstructionRequest) -> None
self._request_execute(request)
def _request_execute(self, request):
@@ -219,18 +252,29 @@
performing processing.
"""
- def __init__(self, state_handler_factory, data_channel_factory, fns):
+ def __init__(self,
+ state_handler_factory, # type: StateHandlerFactory
+ data_channel_factory, # type: data_plane.DataChannelFactory
+ fns # type: Dict[str, beam_fn_api_pb2.ProcessBundleDescriptor]
+ ):
self.fns = fns
self.state_handler_factory = state_handler_factory
self.data_channel_factory = data_channel_factory
- self.active_bundle_processors = {}
- self.cached_bundle_processors = collections.defaultdict(list)
+ self.active_bundle_processors = {} # type: Dict[str, Tuple[str, bundle_processor.BundleProcessor]]
+ self.cached_bundle_processors = collections.defaultdict(list) # type: DefaultDict[str, List[bundle_processor.BundleProcessor]]
def register(self, bundle_descriptor):
+ # type: (beam_fn_api_pb2.ProcessBundleDescriptor) -> None
"""Register a ``beam_fn_api_pb2.ProcessBundleDescriptor`` by its id."""
self.fns[bundle_descriptor.id] = bundle_descriptor
def get(self, instruction_id, bundle_descriptor_id):
+ # type: (str, str) -> bundle_processor.BundleProcessor
+ """
+ Return the requested ``BundleProcessor``, creating it if necessary.
+
+ Moves the ``BundleProcessor`` from the inactive to the active cache.
+ """
try:
# pop() is threadsafe
processor = self.cached_bundle_processors[bundle_descriptor_id].pop()
@@ -245,18 +289,36 @@
return processor
def lookup(self, instruction_id):
+ # type: (str) -> Optional[bundle_processor.BundleProcessor]
+ """
+ Return the requested ``BundleProcessor`` from the cache.
+ """
return self.active_bundle_processors.get(instruction_id, (None, None))[-1]
def discard(self, instruction_id):
+ # type: (str) -> None
+ """
+ Remove the ``BundleProcessor`` from the cache.
+ """
self.active_bundle_processors[instruction_id][1].shutdown()
del self.active_bundle_processors[instruction_id]
def release(self, instruction_id):
+ # type: (str) -> None
+ """
+ Release the requested ``BundleProcessor``.
+
+ Resets the ``BundleProcessor`` and moves it from the active to the
+ inactive cache.
+ """
descriptor_id, processor = self.active_bundle_processors.pop(instruction_id)
processor.reset()
self.cached_bundle_processors[descriptor_id].append(processor)
def shutdown(self):
+ """
+ Shutdown all ``BundleProcessor``s in the cache.
+ """
for instruction_id in self.active_bundle_processors:
self.active_bundle_processors[instruction_id][1].shutdown()
del self.active_bundle_processors[instruction_id]
@@ -268,10 +330,11 @@
class SdkWorker(object):
def __init__(self,
- bundle_processor_cache,
+ bundle_processor_cache, # type: BundleProcessorCache
state_cache_metrics_fn=list,
- profiler_factory=None,
- log_lull_timeout_ns=None):
+ profiler_factory=None, # type: Optional[Callable[..., Profile]]
+ log_lull_timeout_ns=None,
+ ):
self.bundle_processor_cache = bundle_processor_cache
self.state_cache_metrics_fn = state_cache_metrics_fn
self.profiler_factory = profiler_factory
@@ -279,6 +342,7 @@
or DEFAULT_LOG_LULL_TIMEOUT_NS)
def do_instruction(self, request):
+ # type: (beam_fn_api_pb2.InstructionRequest) -> beam_fn_api_pb2.InstructionResponse
request_type = request.WhichOneof('request')
if request_type:
# E.g. if register is set, this will call self.register(request.register))
@@ -287,7 +351,11 @@
else:
raise NotImplementedError
- def register(self, request, instruction_id):
+ def register(self,
+ request, # type: beam_fn_api_pb2.RegisterRequest
+ instruction_id # type: str
+ ):
+ # type: (...) -> beam_fn_api_pb2.InstructionResponse
"""Registers a set of ``beam_fn_api_pb2.ProcessBundleDescriptor``s.
This set of ``beam_fn_api_pb2.ProcessBundleDescriptor`` come as part of a
@@ -301,7 +369,11 @@
instruction_id=instruction_id,
register=beam_fn_api_pb2.RegisterResponse())
- def process_bundle(self, request, instruction_id):
+ def process_bundle(self,
+ request, # type: beam_fn_api_pb2.ProcessBundleRequest
+ instruction_id # type: str
+ ):
+ # type: (...) -> beam_fn_api_pb2.InstructionResponse
bundle_processor = self.bundle_processor_cache.get(
instruction_id, request.process_bundle_descriptor_id)
try:
@@ -328,7 +400,11 @@
self.bundle_processor_cache.discard(instruction_id)
raise
- def process_bundle_split(self, request, instruction_id):
+ def process_bundle_split(self,
+ request, # type: beam_fn_api_pb2.ProcessBundleSplitRequest
+ instruction_id # type: str
+ ):
+ # type: (...) -> beam_fn_api_pb2.InstructionResponse
processor = self.bundle_processor_cache.lookup(
request.instruction_id)
if processor:
@@ -364,7 +440,11 @@
_LOGGER.warning(
'%s%s. Traceback:\n%s', state_lull_log, step_name_log, stack_trace)
- def process_bundle_progress(self, request, instruction_id):
+ def process_bundle_progress(self,
+ request, # type: beam_fn_api_pb2.ProcessBundleProgressRequest
+ instruction_id # type: str
+ ):
+ # type: (...) -> beam_fn_api_pb2.InstructionResponse
# It is an error to get progress for a not-in-flight bundle.
processor = self.bundle_processor_cache.lookup(request.instruction_id)
if processor:
@@ -375,7 +455,11 @@
metrics=processor.metrics() if processor else None,
monitoring_infos=processor.monitoring_infos() if processor else []))
- def finalize_bundle(self, request, instruction_id):
+ def finalize_bundle(self,
+ request, # type: beam_fn_api_pb2.FinalizeBundleRequest
+ instruction_id # type: str
+ ):
+ # type: (...) -> beam_fn_api_pb2.InstructionResponse
processor = self.bundle_processor_cache.lookup(
request.instruction_id)
if processor:
@@ -427,13 +511,14 @@
"""
def __init__(self, state_cache, credentials=None):
- self._state_handler_cache = {}
+ self._state_handler_cache = {} # type: Dict[str, GrpcStateHandler]
self._lock = threading.Lock()
self._throwing_state_handler = ThrowingStateHandler()
self._credentials = credentials
self._state_cache = state_cache
def create_state_handler(self, api_service_descriptor):
+ # type: (endpoints_pb2.ApiServiceDescriptor) -> GrpcStateHandler
if not api_service_descriptor:
return self._throwing_state_handler
url = api_service_descriptor.url
@@ -495,10 +580,11 @@
_DONE = object()
def __init__(self, state_stub):
+ # type: (beam_fn_api_pb2_grpc.BeamFnStateStub) -> None
self._lock = threading.Lock()
self._state_stub = state_stub
- self._requests = queue.Queue()
- self._responses_by_id = {}
+ self._requests = queue.Queue() # type: queue.Queue[beam_fn_api_pb2.StateRequest]
+ self._responses_by_id = {} # type: Dict[str, _Future]
self._last_id = 0
self._exc_info = None
self._context = threading.local()
@@ -547,7 +633,11 @@
self._done = True
self._requests.put(self._DONE)
- def get_raw(self, state_key, continuation_token=None):
+ def get_raw(self,
+ state_key, # type: beam_fn_api_pb2.StateKey
+ continuation_token=None # type: Optional[bytes]
+ ):
+ # type: (...) -> Tuple[bytes, Optional[bytes]]
response = self._blocking_request(
beam_fn_api_pb2.StateRequest(
state_key=state_key,
@@ -555,19 +645,25 @@
continuation_token=continuation_token)))
return response.get.data, response.get.continuation_token
- def append_raw(self, state_key, data):
+ def append_raw(self,
+ state_key, # type: Optional[beam_fn_api_pb2.StateKey]
+ data # type: bytes
+ ):
+ # type: (...) -> _Future
return self._request(
beam_fn_api_pb2.StateRequest(
state_key=state_key,
append=beam_fn_api_pb2.StateAppendRequest(data=data)))
def clear(self, state_key):
+ # type: (Optional[beam_fn_api_pb2.StateKey]) -> _Future
return self._request(
beam_fn_api_pb2.StateRequest(
state_key=state_key,
clear=beam_fn_api_pb2.StateClearRequest()))
def _request(self, request):
+ # type: (beam_fn_api_pb2.StateRequest) -> _Future
request.id = self._next_id()
request.instruction_id = self._context.process_instruction_id
# Adding a new item to a dictionary is atomic in cPython
@@ -591,6 +687,7 @@
return response
def _next_id(self):
+ # type: () -> str
with self._lock:
# Use a lock here because this GrpcStateHandler is shared across all
# requests which have the same process bundle descriptor. State requests
@@ -653,7 +750,13 @@
materialized)
return iter(cached_value)
- def extend(self, state_key, coder, elements, is_cached=False):
+ def extend(self,
+ state_key, # type: beam_fn_api_pb2.StateKey
+ coder, # type: coder_impl.CoderImpl
+ elements, # type: Iterable[Any]
+ is_cached=False
+ ):
+ # type: (...) -> _Future
if self._should_be_cached(is_cached):
# Update the cache
cache_key = self._convert_to_cache_key(state_key)
@@ -671,6 +774,7 @@
return self._underlying.clear(state_key)
def done(self):
+ # type: () -> None
self._underlying.done()
def _materialize_iter(self, state_key, coder):
@@ -719,8 +823,9 @@
@classmethod
def done(cls):
+ # type: () -> _Future
if not hasattr(cls, 'DONE'):
done_future = _Future()
done_future.set(None)
- cls.DONE = done_future
- return cls.DONE
+ cls.DONE = done_future # type: ignore[attr-defined]
+ return cls.DONE # type: ignore[attr-defined]
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
index 13abed6..3569e13 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
@@ -16,6 +16,8 @@
#
"""SDK Fn Harness entry point."""
+# pytype: skip-file
+
from __future__ import absolute_import
import http.server
@@ -28,7 +30,7 @@
import traceback
from builtins import object
-from google.protobuf import text_format
+from google.protobuf import text_format # type: ignore # not in typeshed
from apache_beam.internal import pickler
from apache_beam.options.pipeline_options import DebugOptions
@@ -54,7 +56,8 @@
for t in threading.enumerate():
lines.append('--- Thread #%s name: %s ---\n' % (t.ident, t.name))
- lines.append(''.join(traceback.format_stack(frames[t.ident])))
+ if t.ident in frames:
+ lines.append(''.join(traceback.format_stack(frames[t.ident])))
return lines
@@ -151,6 +154,8 @@
control_address=service_descriptor.url,
worker_id=_worker_id,
state_cache_size=_get_state_cache_size(sdk_pipeline_options),
+ data_buffer_time_limit_ms=_get_data_buffer_time_limit_ms(
+ sdk_pipeline_options),
profiler_factory=profiler.Profile.factory_from_options(
sdk_pipeline_options.view_as(ProfilingOptions))
).run()
@@ -200,6 +205,29 @@
return 0
+def _get_data_buffer_time_limit_ms(pipeline_options):
+ """Defines the time limt of the outbound data buffering.
+
+ Note: data_buffer_time_limit_ms is an experimental flag and might
+ not be available in future releases.
+
+ Returns:
+ an int indicating the time limit in milliseconds of the the outbound
+ data buffering. Default is 0 (disabled)
+ """
+ experiments = pipeline_options.view_as(DebugOptions).experiments
+ experiments = experiments if experiments else []
+
+ for experiment in experiments:
+ # There should only be 1 match so returning from the loop
+ if re.match(r'data_buffer_time_limit_ms=', experiment):
+ return int(
+ re.match(
+ r'data_buffer_time_limit_ms=(?P<data_buffer_time_limit_ms>.*)',
+ experiment).group('data_buffer_time_limit_ms'))
+ return 0
+
+
def _load_main_session(semi_persistent_directory):
"""Loads a pickled main session from the path specified."""
if semi_persistent_directory:
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py
index cae65a2..19285f9 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py
@@ -16,6 +16,8 @@
#
"""Tests for apache_beam.runners.worker.sdk_worker_main."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index e14b3f4..0aa102d 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -16,6 +16,8 @@
#
"""Tests for apache_beam.runners.worker.sdk_worker."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs.py b/sdks/python/apache_beam/runners/worker/sideinputs.py
index 7c1d649..7d84323 100644
--- a/sdks/python/apache_beam/runners/worker/sideinputs.py
+++ b/sdks/python/apache_beam/runners/worker/sideinputs.py
@@ -17,6 +17,8 @@
"""Utilities for handling side inputs."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs_test.py b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
index 4a8f7c8..e7491f7 100644
--- a/sdks/python/apache_beam/runners/worker/sideinputs_test.py
+++ b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
@@ -17,6 +17,8 @@
"""Tests for side input utilities."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/worker/statecache.py b/sdks/python/apache_beam/runners/worker/statecache.py
index 0c96f3f..e8fd2ae 100644
--- a/sdks/python/apache_beam/runners/worker/statecache.py
+++ b/sdks/python/apache_beam/runners/worker/statecache.py
@@ -16,6 +16,8 @@
#
"""A module for caching state reads/writes in Beam applications."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/runners/worker/statecache_test.py b/sdks/python/apache_beam/runners/worker/statecache_test.py
index 00ae852..730d149 100644
--- a/sdks/python/apache_beam/runners/worker/statecache_test.py
+++ b/sdks/python/apache_beam/runners/worker/statecache_test.py
@@ -16,6 +16,8 @@
#
"""Tests for state caching."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/runners/worker/statesampler.py b/sdks/python/apache_beam/runners/worker/statesampler.py
index e57815e..a9de8b1 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler.py
@@ -17,11 +17,17 @@
# This module is experimental. No backwards-compatibility guarantees.
+# pytype: skip-file
+
from __future__ import absolute_import
import contextlib
import threading
from collections import namedtuple
+from typing import TYPE_CHECKING
+from typing import Dict
+from typing import Optional
+from typing import Union
from apache_beam.runners import common
from apache_beam.utils.counters import Counter
@@ -29,12 +35,14 @@
from apache_beam.utils.counters import CounterName
try:
- from apache_beam.runners.worker import statesampler_fast as statesampler_impl
+ from apache_beam.runners.worker import statesampler_fast as statesampler_impl # type: ignore
FAST_SAMPLER = True
except ImportError:
from apache_beam.runners.worker import statesampler_slow as statesampler_impl
FAST_SAMPLER = False
+if TYPE_CHECKING:
+ from apache_beam.metrics.execution import MetricsContainer
_STATE_SAMPLERS = threading.local()
@@ -88,37 +96,43 @@
class StateSampler(statesampler_impl.StateSampler):
- def __init__(self, prefix, counter_factory,
+ def __init__(self,
+ prefix, # type: str
+ counter_factory,
sampling_period_ms=DEFAULT_SAMPLING_PERIOD_MS):
- self.states_by_name = {}
self._prefix = prefix
self._counter_factory = counter_factory
- self._states_by_name = {}
+ self._states_by_name = {} # type: Dict[CounterName, statesampler_impl.ScopedState]
self.sampling_period_ms = sampling_period_ms
- self.tracked_thread = None
+ self.tracked_thread = None # type: Optional[threading.Thread]
self.finished = False
self.started = False
super(StateSampler, self).__init__(sampling_period_ms)
@property
def stage_name(self):
+ # type: () -> str
return self._prefix
def stop(self):
+ # type: () -> None
set_current_tracker(None)
super(StateSampler, self).stop()
def stop_if_still_running(self):
+ # type: () -> None
if self.started and not self.finished:
self.stop()
def start(self):
+ # type: () -> None
self.tracked_thread = threading.current_thread()
set_current_tracker(self)
super(StateSampler, self).start()
self.started = True
def get_info(self):
+ # type: () -> StateSamplerInfo
"""Returns StateSamplerInfo with transition statistics."""
return StateSamplerInfo(
self.current_state().name,
@@ -127,10 +141,12 @@
self.tracked_thread)
def scoped_state(self,
- name_context,
- state_name,
+ name_context, # type: Union[str, common.NameContext]
+ state_name, # type: str
io_target=None,
- metrics_container=None):
+ metrics_container=None # type: Optional[MetricsContainer]
+ ):
+ # type: (...) -> statesampler_impl.ScopedState
"""Returns a ScopedState object associated to a Step and a State.
Args:
@@ -163,6 +179,7 @@
return self._states_by_name[counter_name]
def commit_counters(self):
+ # type: () -> None
"""Updates output counters with latest state statistics."""
for state in self._states_by_name.values():
state_msecs = int(1e-6 * state.nsecs)
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_slow.py b/sdks/python/apache_beam/runners/worker/statesampler_slow.py
index fb2592c..fb3cbf6 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_slow.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_slow.py
@@ -17,9 +17,12 @@
# This module is experimental. No backwards-compatibility guarantees.
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
+from typing import Optional
from apache_beam.runners import common
from apache_beam.utils import counters
@@ -35,6 +38,7 @@
self.time_since_transition = 0
def current_state(self):
+ # type: () -> ScopedState
"""Returns the current execution state.
This operation is not thread safe, and should only be called from the
@@ -42,10 +46,11 @@
return self._state_stack[-1]
def _scoped_state(self,
- counter_name,
- name_context,
+ counter_name, # type: counters.CounterName
+ name_context, # type: common.NameContext
output_counter,
metrics_container=None):
+ # type: (...) -> ScopedState
assert isinstance(name_context, common.NameContext)
return ScopedState(
self, counter_name, name_context, output_counter, metrics_container)
@@ -55,29 +60,38 @@
typed_metric_name).update(value)
def _enter_state(self, state):
+ # type: (ScopedState) -> None
self.state_transition_count += 1
self._state_stack.append(state)
def _exit_state(self):
+ # type: () -> None
self.state_transition_count += 1
self._state_stack.pop()
def start(self):
+ # type: () -> None
# Sampling not yet supported. Only state tracking at the moment.
pass
def stop(self):
+ # type: () -> None
pass
def reset(self):
+ # type: () -> None
for state in self._states_by_name.values():
state.nsecs = 0
class ScopedState(object):
- def __init__(self, sampler, name, step_name_context,
- counter=None, metrics_container=None):
+ def __init__(self,
+ sampler, # type: StateSampler
+ name, # type: counters.CounterName
+ step_name_context, # type: Optional[common.NameContext]
+ counter=None,
+ metrics_container=None):
self.state_sampler = sampler
self.name = name
self.name_context = step_name_context
@@ -86,9 +100,11 @@
self.metrics_container = metrics_container
def sampled_seconds(self):
+ # type: () -> float
return 1e-9 * self.nsecs
def sampled_msecs_int(self):
+ # type: () -> int
return int(1e-6 * self.nsecs)
def __repr__(self):
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
index ed51ae1..3b5caed 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_test.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -16,6 +16,8 @@
#
"""Tests for state sampler."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/runners/worker/worker_id_interceptor.py b/sdks/python/apache_beam/runners/worker/worker_id_interceptor.py
index 6c9a605..1ba11b6 100644
--- a/sdks/python/apache_beam/runners/worker/worker_id_interceptor.py
+++ b/sdks/python/apache_beam/runners/worker/worker_id_interceptor.py
@@ -15,12 +15,15 @@
# limitations under the License.
#
"""Client Interceptor to inject worker_id"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os
+from typing import Optional
import grpc
@@ -41,6 +44,7 @@
_worker_id = os.environ.get('WORKER_ID')
def __init__(self, worker_id=None):
+ # type: (Optional[str]) -> None
if worker_id:
self._worker_id = worker_id
diff --git a/sdks/python/apache_beam/runners/worker/worker_id_interceptor_test.py b/sdks/python/apache_beam/runners/worker/worker_id_interceptor_test.py
index 411e309..e23a50d 100644
--- a/sdks/python/apache_beam/runners/worker/worker_id_interceptor_test.py
+++ b/sdks/python/apache_beam/runners/worker/worker_id_interceptor_test.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
"""Test for WorkerIdInterceptor"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/runners/worker/worker_pool_main.py b/sdks/python/apache_beam/runners/worker/worker_pool_main.py
index a68190d..baa1f3a 100644
--- a/sdks/python/apache_beam/runners/worker/worker_pool_main.py
+++ b/sdks/python/apache_beam/runners/worker/worker_pool_main.py
@@ -26,6 +26,8 @@
This entry point is used by the Python SDK container in worker pool mode.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
@@ -35,6 +37,9 @@
import sys
import threading
import time
+from typing import Dict
+from typing import Optional
+from typing import Tuple
import grpc
@@ -51,22 +56,32 @@
def __init__(self,
use_process=False,
- container_executable=None,
- state_cache_size=0):
+ container_executable=None, # type: Optional[str]
+ state_cache_size=0,
+ data_buffer_time_limit_ms=0
+ ):
self._use_process = use_process
self._container_executable = container_executable
self._state_cache_size = state_cache_size
- self._worker_processes = {}
+ self._data_buffer_time_limit_ms = data_buffer_time_limit_ms
+ self._worker_processes = {} # type: Dict[str, subprocess.Popen]
@classmethod
- def start(cls, use_process=False, port=0,
- state_cache_size=0, container_executable=None):
+ def start(cls,
+ use_process=False,
+ port=0,
+ state_cache_size=0,
+ data_buffer_time_limit_ms=-1,
+ container_executable=None # type: Optional[str]
+ ):
+ # type: (...) -> Tuple[str, grpc.Server]
worker_server = grpc.server(UnboundedThreadPoolExecutor())
worker_address = 'localhost:%s' % worker_server.add_insecure_port(
'[::]:%s' % port)
worker_pool = cls(use_process=use_process,
container_executable=container_executable,
- state_cache_size=state_cache_size)
+ state_cache_size=state_cache_size,
+ data_buffer_time_limit_ms=data_buffer_time_limit_ms)
beam_fn_api_pb2_grpc.add_BeamFnExternalWorkerPoolServicer_to_server(
worker_pool,
worker_server)
@@ -80,7 +95,11 @@
return worker_address, worker_server
- def StartWorker(self, start_worker_request, unused_context):
+ def StartWorker(self,
+ start_worker_request, # type: beam_fn_api_pb2.StartWorkerRequest
+ unused_context
+ ):
+ # type: (...) -> beam_fn_api_pb2.StartWorkerResponse
try:
if self._use_process:
command = ['python', '-c',
@@ -90,11 +109,13 @@
'"%s",'
'worker_id="%s",'
'state_cache_size=%d'
+ 'data_buffer_time_limit_ms=%d'
')'
'.run()' % (
start_worker_request.control_endpoint.url,
start_worker_request.worker_id,
- self._state_cache_size)]
+ self._state_cache_size,
+ self._data_buffer_time_limit_ms)]
if self._container_executable:
# command as per container spec
# the executable is responsible to handle concurrency
@@ -119,7 +140,8 @@
worker = sdk_worker.SdkHarness(
start_worker_request.control_endpoint.url,
worker_id=start_worker_request.worker_id,
- state_cache_size=self._state_cache_size)
+ state_cache_size=self._state_cache_size,
+ data_buffer_time_limit_ms=self._data_buffer_time_limit_ms)
worker_thread = threading.Thread(
name='run_worker_%s' % start_worker_request.worker_id,
target=worker.run)
@@ -130,7 +152,11 @@
except Exception as exn:
return beam_fn_api_pb2.StartWorkerResponse(error=str(exn))
- def StopWorker(self, stop_worker_request, unused_context):
+ def StopWorker(self,
+ stop_worker_request, # type: beam_fn_api_pb2.StopWorkerRequest
+ unused_context
+ ):
+ # type: (...) -> beam_fn_api_pb2.StopWorkerResponse
# applicable for process mode to ensure process cleanup
# thread based workers terminate automatically
worker_process = self._worker_processes.pop(stop_worker_request.worker_id,
diff --git a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/preprocess.py b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/preprocess.py
index e36ffb7..b0078f5 100644
--- a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/preprocess.py
+++ b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/preprocess.py
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Preprocessor applying tf.transform to the chicago_taxi data."""
+# pytype: skip-file
+
from __future__ import absolute_import, division, print_function
import argparse
diff --git a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/process_tfma.py b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/process_tfma.py
index 11fc335..cc9cc2a 100644
--- a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/process_tfma.py
+++ b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/process_tfma.py
@@ -13,6 +13,8 @@
# limitations under the License.
"""Runs a batch job for performing Tensorflow Model Analysis."""
+# pytype: skip-file
+
from __future__ import absolute_import, division, print_function
import argparse
diff --git a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/setup.py b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/setup.py
index 4b3b8f5..22f6150 100644
--- a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/setup.py
+++ b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/setup.py
@@ -12,13 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Setup dependencies for local and cloud deployment."""
+# pytype: skip-file
+
from __future__ import absolute_import
import setuptools
-# LINT.IfChange
TF_VERSION = '1.14.0'
-# LINT.ThenChange(train_mlengine.sh, start_model_server_mlengine.sh)
if __name__ == '__main__':
setuptools.setup(
diff --git a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/tfdv_analyze_and_validate.py b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/tfdv_analyze_and_validate.py
index 966b08b..32728df 100644
--- a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/tfdv_analyze_and_validate.py
+++ b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/tfdv_analyze_and_validate.py
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute stats, infer schema, and validate stats for chicago taxi example."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -29,7 +31,7 @@
from apache_beam.testing.load_tests.load_test_metrics_utils import MeasureTime
from apache_beam.testing.load_tests.load_test_metrics_utils import MetricsReader
-from google.protobuf import text_format
+from google.protobuf import text_format # type: ignore # typeshed out of date
from trainer import taxi
diff --git a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/model.py b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/model.py
index f04d0a8..a726dfd 100644
--- a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/model.py
+++ b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/model.py
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Defines the model used to predict who will tip in the Chicago Taxi demo."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/task.py b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/task.py
index 7b80056..aef7d3c 100644
--- a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/task.py
+++ b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/task.py
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for the chicago_taxi demo."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/taxi.py b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/taxi.py
index 5bf3191..540f0ca 100644
--- a/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/taxi.py
+++ b/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/trainer/taxi.py
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility and schema methods for the chicago_taxi sample."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -20,7 +22,7 @@
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import schema_utils
-from google.protobuf import text_format
+from google.protobuf import text_format # type: ignore # typeshed out of date
from tensorflow.python.lib.io import file_io
from tensorflow_metadata.proto.v0 import schema_pb2
diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_launcher.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_launcher.py
index 2779cb4..9977cc6 100644
--- a/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_launcher.py
+++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_launcher.py
@@ -56,6 +56,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_util.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_util.py
index 916faa4..c55f937 100644
--- a/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_util.py
+++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_util.py
@@ -32,6 +32,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query0.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query0.py
index 3df848d..5c56178 100644
--- a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query0.py
+++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query0.py
@@ -25,6 +25,8 @@
to verify the infrastructure.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import apache_beam as beam
diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py
index 24b8579..88df169 100644
--- a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py
+++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py
@@ -23,6 +23,8 @@
This query converts bid prices from dollars to euros.
It illustrates a simple map.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import apache_beam as beam
diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query2.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query2.py
index 33ee3f3..fe45503 100644
--- a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query2.py
+++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query2.py
@@ -24,6 +24,8 @@
It illustrates a simple filter.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import apache_beam as beam
diff --git a/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml b/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml
index cac0c74..b2d4e9a 100644
--- a/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml
+++ b/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml
@@ -30,6 +30,7 @@
window_fn: FixedWindows(10)
trigger_fn: Default
timestamp_combiner: OUTPUT_AT_EOW
+allowed_lateness: 100
transcript:
- input: [1, 2, 3, 10, 11, 25]
- watermark: 100
@@ -42,6 +43,26 @@
- {window: [0, 9], values: [1, 2, 3, 7], timestamp: 9, late: true}
---
+name: fixed_drop_late_data_after_allowed_lateness
+window_fn: FixedWindows(10)
+trigger_fn: AfterWatermark(early=AfterCount(3), late=AfterCount(1))
+timestamp_combiner: OUTPUT_AT_EOW
+allowed_lateness: 20
+accumulation_mode: accumulating
+transcript:
+ - input: [1, 2, 10, 11, 80, 81]
+ - watermark: 100
+ - expect:
+ - {window: [0, 9], values: [1, 2], timestamp: 9, final: false}
+ - {window: [10, 19], values: [10, 11], timestamp: 19}
+ - {window: [80, 89], values: [80, 81], timestamp: 89, late: false}
+ - input: [7, 8] # no output
+ - input: [17, 18] # no output
+ - input: [82]
+ - expect:
+ - {window: [80, 89], values: [80, 81, 82], timestamp: 89, late: true}
+
+---
name: timestamp_combiner_earliest
window_fn: FixedWindows(10)
trigger_fn: Default
@@ -118,6 +139,7 @@
- SwitchingDirectRunner
window_fn: Sessions(10)
trigger_fn: AfterWatermark(early=AfterCount(2), late=AfterCount(3))
+allowed_lateness: 100
timestamp_combiner: OUTPUT_AT_EOW
transcript:
- input: [1, 2, 3]
diff --git a/sdks/python/apache_beam/testing/datatype_inference.py b/sdks/python/apache_beam/testing/datatype_inference.py
index b1a689c..fbf3380 100644
--- a/sdks/python/apache_beam/testing/datatype_inference.py
+++ b/sdks/python/apache_beam/testing/datatype_inference.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import array
diff --git a/sdks/python/apache_beam/testing/datatype_inference_test.py b/sdks/python/apache_beam/testing/datatype_inference_test.py
index 131eafb..0cbd6f9 100644
--- a/sdks/python/apache_beam/testing/datatype_inference_test.py
+++ b/sdks/python/apache_beam/testing/datatype_inference_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/extra_assertions.py b/sdks/python/apache_beam/testing/extra_assertions.py
index 53a9eeb..b67f814 100644
--- a/sdks/python/apache_beam/testing/extra_assertions.py
+++ b/sdks/python/apache_beam/testing/extra_assertions.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
diff --git a/sdks/python/apache_beam/testing/extra_assertions_test.py b/sdks/python/apache_beam/testing/extra_assertions_test.py
index 8948f4e..8b078af 100644
--- a/sdks/python/apache_beam/testing/extra_assertions_test.py
+++ b/sdks/python/apache_beam/testing/extra_assertions_test.py
@@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/load_tests/build.gradle b/sdks/python/apache_beam/testing/load_tests/build.gradle
index 8eba17f..543d8c4 100644
--- a/sdks/python/apache_beam/testing/load_tests/build.gradle
+++ b/sdks/python/apache_beam/testing/load_tests/build.gradle
@@ -24,8 +24,6 @@
distTarBall project(path: ":sdks:python", configuration: "distTarBall")
}
-pythonVersion = '2.7'
-
description = "Apache Beam :: SDKs :: Python :: Load Tests"
def mainClassProperty = "loadTest.mainClass"
@@ -59,4 +57,4 @@
def parseOptions(String option) {
option.replace('\"', '\\"')
-}
\ No newline at end of file
+}
diff --git a/sdks/python/apache_beam/testing/load_tests/co_group_by_key_test.py b/sdks/python/apache_beam/testing/load_tests/co_group_by_key_test.py
index 57425a2..51b5d98 100644
--- a/sdks/python/apache_beam/testing/load_tests/co_group_by_key_test.py
+++ b/sdks/python/apache_beam/testing/load_tests/co_group_by_key_test.py
@@ -142,6 +142,8 @@
-Prunner=TestDataflowRunner :sdks:python:apache_beam:testing:load-tests:run
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
diff --git a/sdks/python/apache_beam/testing/load_tests/combine_test.py b/sdks/python/apache_beam/testing/load_tests/combine_test.py
index 9c2f2d0..068e95e 100644
--- a/sdks/python/apache_beam/testing/load_tests/combine_test.py
+++ b/sdks/python/apache_beam/testing/load_tests/combine_test.py
@@ -117,6 +117,8 @@
TestDataflowRunner :sdks:python:apache_beam:testing:load-tests:run
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/load_tests/group_by_key_test.py b/sdks/python/apache_beam/testing/load_tests/group_by_key_test.py
index d19e7f7..bc15bbb 100644
--- a/sdks/python/apache_beam/testing/load_tests/group_by_key_test.py
+++ b/sdks/python/apache_beam/testing/load_tests/group_by_key_test.py
@@ -118,6 +118,8 @@
-Prunner=TestDataflowRunner :sdks:python:apache_beam:testing:load-tests:run
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/load_tests/load_test.py b/sdks/python/apache_beam/testing/load_tests/load_test.py
index 71aa3a8..0a7eb37 100644
--- a/sdks/python/apache_beam/testing/load_tests/load_test.py
+++ b/sdks/python/apache_beam/testing/load_tests/load_test.py
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# pytype: skip-file
+
from __future__ import absolute_import
import json
diff --git a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py
index 798f401..d843e24 100644
--- a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py
+++ b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py
@@ -27,11 +27,14 @@
* total_bytes_count
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
import time
import uuid
+from typing import List
import apache_beam as beam
from apache_beam.metrics import Metrics
@@ -170,7 +173,7 @@
A :class:`MetricsReader` retrieves metrics from pipeline result,
prepares it for publishers and setup publishers.
"""
- publishers = []
+ publishers = [] # type: List[ConsoleMetricsPublisher]
def __init__(self, project_name=None, bq_table=None, bq_dataset=None,
publish_to_bq=False, filters=None):
diff --git a/sdks/python/apache_beam/testing/load_tests/pardo_test.py b/sdks/python/apache_beam/testing/load_tests/pardo_test.py
index 7c05422..ce9c264 100644
--- a/sdks/python/apache_beam/testing/load_tests/pardo_test.py
+++ b/sdks/python/apache_beam/testing/load_tests/pardo_test.py
@@ -117,6 +117,8 @@
-Prunner=TestDataflowRunner :sdks:python:apache_beam:testing:load-tests:run
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/load_tests/sideinput_test.py b/sdks/python/apache_beam/testing/load_tests/sideinput_test.py
index 4143f83..2e0dc189 100644
--- a/sdks/python/apache_beam/testing/load_tests/sideinput_test.py
+++ b/sdks/python/apache_beam/testing/load_tests/sideinput_test.py
@@ -112,6 +112,8 @@
-Prunner=TestDataflowRunner :sdks:python:apache_beam:testing:load-tests:run
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_pipeline.py b/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_pipeline.py
index 3fc277e..88ccaed 100644
--- a/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_pipeline.py
+++ b/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_pipeline.py
@@ -25,6 +25,8 @@
Values have to be reparsed again to bytes
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_test.py b/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_test.py
index 6042e68..56ad43f 100644
--- a/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_test.py
+++ b/sdks/python/apache_beam/testing/load_tests/streaming/group_by_key_streaming_test.py
@@ -34,6 +34,8 @@
* --metrics_table=gbk_stream
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/metric_result_matchers.py b/sdks/python/apache_beam/testing/metric_result_matchers.py
index 126ba3d..15dc6ff 100644
--- a/sdks/python/apache_beam/testing/metric_result_matchers.py
+++ b/sdks/python/apache_beam/testing/metric_result_matchers.py
@@ -41,6 +41,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from hamcrest import equal_to
diff --git a/sdks/python/apache_beam/testing/metric_result_matchers_test.py b/sdks/python/apache_beam/testing/metric_result_matchers_test.py
index dc674ac..6f6a131 100644
--- a/sdks/python/apache_beam/testing/metric_result_matchers_test.py
+++ b/sdks/python/apache_beam/testing/metric_result_matchers_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the metric_result_matchers."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/testing/pipeline_verifiers.py b/sdks/python/apache_beam/testing/pipeline_verifiers.py
index cf99541..3e286a4 100644
--- a/sdks/python/apache_beam/testing/pipeline_verifiers.py
+++ b/sdks/python/apache_beam/testing/pipeline_verifiers.py
@@ -22,6 +22,8 @@
`hamcrest.core.base_matcher.BaseMatcher` and override _matches.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/pipeline_verifiers_test.py b/sdks/python/apache_beam/testing/pipeline_verifiers_test.py
index ec17ef6..f7b2f07 100644
--- a/sdks/python/apache_beam/testing/pipeline_verifiers_test.py
+++ b/sdks/python/apache_beam/testing/pipeline_verifiers_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the test pipeline verifiers"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -42,7 +44,7 @@
from apache_beam.io.gcp.gcsfilesystem import GCSFileSystem
except ImportError:
HttpError = None
- GCSFileSystem = None
+ GCSFileSystem = None # type: ignore
class PipelineVerifiersTest(unittest.TestCase):
diff --git a/sdks/python/apache_beam/testing/synthetic_pipeline.py b/sdks/python/apache_beam/testing/synthetic_pipeline.py
index fbef112..653281d 100644
--- a/sdks/python/apache_beam/testing/synthetic_pipeline.py
+++ b/sdks/python/apache_beam/testing/synthetic_pipeline.py
@@ -32,6 +32,8 @@
data for the pipeline.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/testing/synthetic_pipeline_test.py b/sdks/python/apache_beam/testing/synthetic_pipeline_test.py
index 18d4bdd..b74e650 100644
--- a/sdks/python/apache_beam/testing/synthetic_pipeline_test.py
+++ b/sdks/python/apache_beam/testing/synthetic_pipeline_test.py
@@ -17,6 +17,8 @@
"""Tests for apache_beam.testing.synthetic_pipeline."""
+# pytype: skip-file
+
from __future__ import absolute_import
import glob
diff --git a/sdks/python/apache_beam/testing/test_pipeline.py b/sdks/python/apache_beam/testing/test_pipeline.py
index 7a2d575..a34af95 100644
--- a/sdks/python/apache_beam/testing/test_pipeline.py
+++ b/sdks/python/apache_beam/testing/test_pipeline.py
@@ -17,6 +17,8 @@
"""Test Pipeline, a wrapper of Pipeline for test purpose"""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
diff --git a/sdks/python/apache_beam/testing/test_pipeline_test.py b/sdks/python/apache_beam/testing/test_pipeline_test.py
index 8efd8c6..8cd4c88 100644
--- a/sdks/python/apache_beam/testing/test_pipeline_test.py
+++ b/sdks/python/apache_beam/testing/test_pipeline_test.py
@@ -17,6 +17,8 @@
"""Unit test for the TestPipeline class"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/test_stream.py b/sdks/python/apache_beam/testing/test_stream.py
index f84008b..7f026df 100644
--- a/sdks/python/apache_beam/testing/test_stream.py
+++ b/sdks/python/apache_beam/testing/test_stream.py
@@ -19,6 +19,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from abc import ABCMeta
@@ -28,13 +30,12 @@
from future.utils import with_metaclass
-import apache_beam as beam
from apache_beam import coders
-from apache_beam import core
from apache_beam import pvalue
from apache_beam.portability import common_urns
from apache_beam.portability.api import beam_runner_api_pb2
from apache_beam.transforms import PTransform
+from apache_beam.transforms import core
from apache_beam.transforms import window
from apache_beam.transforms.window import TimestampedValue
from apache_beam.utils import timestamp
@@ -50,7 +51,7 @@
@total_ordering
-class Event(with_metaclass(ABCMeta, object)):
+class Event(with_metaclass(ABCMeta, object)): # type: ignore[misc]
"""Test stream event to be emitted during execution of a TestStream."""
@abstractmethod
@@ -171,47 +172,24 @@
output.
"""
- def __init__(self, coder=coders.FastPrimitivesCoder(), events=()):
+ def __init__(self, coder=coders.FastPrimitivesCoder(), events=None):
super(TestStream, self).__init__()
assert coder is not None
self.coder = coder
self.watermarks = {None: timestamp.MIN_TIMESTAMP}
- self._events = list(events)
+ self._events = [] if events is None else list(events)
self.output_tags = set()
def get_windowing(self, unused_inputs):
return core.Windowing(window.GlobalWindows())
+ def _infer_output_coder(self, input_type=None, input_coder=None):
+ return self.coder
+
def expand(self, pbegin):
assert isinstance(pbegin, pvalue.PBegin)
self.pipeline = pbegin.pipeline
-
- # This multiplexing the multiple output PCollections.
- def mux(event):
- if event.tag:
- yield pvalue.TaggedOutput(event.tag, event)
- else:
- yield event
- mux_output = (pbegin
- | _TestStream(self.output_tags, events=self._events)
- | 'TestStream Multiplexer' >> beam.ParDo(mux).with_outputs())
-
- # Apply a way to control the watermark per output. It is necessary to
- # have an individual _WatermarkController per PCollection because the
- # calculation of the input watermark of a transform is based on the event
- # timestamp of the elements flowing through it. Meaning, it is impossible
- # to control the output watermarks of the individual PCollections solely
- # on the event timestamps.
- outputs = {}
- for tag in self.output_tags:
- label = '_WatermarkController[{}]'.format(tag)
- outputs[tag] = (mux_output[tag] | label >> _WatermarkController())
-
- # Downstream consumers expect a PCollection if there is only a single
- # output.
- if len(outputs) == 1:
- return list(outputs.values())[0]
- return outputs
+ return pvalue.PCollection(self.pipeline, is_bounded=False)
def _add(self, event):
if isinstance(event, ElementEvent):
@@ -303,141 +281,3 @@
return TestStream(
coder=coder,
events=[Event.from_runner_api(e, coder) for e in payload.events])
-
-
-class _WatermarkController(PTransform):
- """A runner-overridable PTransform Primitive to control the watermark.
-
- Expected implementation behavior:
- - If the instance recieves a WatermarkEvent, it sets its output watermark to
- the specified value then drops the event.
- - If the instance receives an ElementEvent, it emits all specified elements
- to the Global Window with the event time set to the element's timestamp.
- """
- def get_windowing(self, _):
- return core.Windowing(window.GlobalWindows())
-
- def expand(self, pcoll):
- return pvalue.PCollection.from_(pcoll)
-
-
-class _TestStream(PTransform):
- """Test stream that generates events on an unbounded PCollection of elements.
-
- Each event emits elements, advances the watermark or advances the processing
- time. After all of the specified elements are emitted, ceases to produce
- output.
-
- Expected implementation behavior:
- - If the instance receives a WatermarkEvent with the WATERMARK_CONTROL_TAG
- then the instance sets its own watermark hold at the specified value and
- drops the event.
- - If the instance receives any other WatermarkEvent or ElementEvent, it
- passes it to the consumer.
- """
-
- # This tag is used on WatermarkEvents to control the watermark at the root
- # TestStream.
- WATERMARK_CONTROL_TAG = '_TestStream_Watermark'
-
- def __init__(self, output_tags, coder=coders.FastPrimitivesCoder(),
- events=None):
- assert coder is not None
- self.coder = coder
- self._events = self._add_watermark_advancements(output_tags, events)
-
- def _watermark_starts(self, output_tags):
- """Sentinel values to hold the watermark of outputs to -inf.
-
- The output watermarks of the output PCollections (fake unbounded sources) in
- a TestStream are controlled by watermark holds. This sets the hold of each
- output PCollection so that the individual holds can be controlled by the
- given events.
- """
- return [WatermarkEvent(timestamp.MIN_TIMESTAMP, tag) for tag in output_tags]
-
- def _watermark_stops(self, output_tags):
- """Sentinel values to close the watermark of outputs."""
- return [WatermarkEvent(timestamp.MAX_TIMESTAMP, tag) for tag in output_tags]
-
- def _test_stream_start(self):
- """Sentinel value to move the watermark hold of the TestStream to +inf.
-
- This sets a hold to +inf such that the individual holds of the output
- PCollections are allowed to modify their individial output watermarks with
- their holds. This is because the calculation of the output watermark is a
- min over all input watermarks.
- """
- return [WatermarkEvent(timestamp.MAX_TIMESTAMP - timestamp.TIME_GRANULARITY,
- _TestStream.WATERMARK_CONTROL_TAG)]
-
- def _test_stream_stop(self):
- """Sentinel value to close the watermark of the TestStream."""
- return [WatermarkEvent(timestamp.MAX_TIMESTAMP,
- _TestStream.WATERMARK_CONTROL_TAG)]
-
- def _test_stream_init(self):
- """Sentinel value to hold the watermark of the TestStream to -inf.
-
- This sets a hold to ensure that the output watermarks of the output
- PCollections do not advance to +inf before their watermark holds are set.
- """
- return [WatermarkEvent(timestamp.MIN_TIMESTAMP,
- _TestStream.WATERMARK_CONTROL_TAG)]
-
- def _set_up(self, output_tags):
- return (self._test_stream_init()
- + self._watermark_starts(output_tags)
- + self._test_stream_start())
-
- def _tear_down(self, output_tags):
- return self._watermark_stops(output_tags) + self._test_stream_stop()
-
- def _add_watermark_advancements(self, output_tags, events):
- """Adds watermark advancements to the given events.
-
- The following watermark advancements can be done on the runner side.
- However, it makes the logic on the runner side much more complicated than
- it needs to be.
-
- In order for watermarks to be properly advanced in a TestStream, a specific
- sequence of watermark holds must be sent:
-
- 1. Hold the root watermark at -inf (this prevents the pipeline from
- immediately returning).
- 2. Hold the watermarks at the WatermarkControllerss at -inf (this prevents
- the pipeline from immediately returning).
- 3. Advance the root watermark to +inf - 1 (this allows the downstream
- WatermarkControllers to control their watermarks via holds).
- 4. Advance watermarks as normal.
- 5. Advance WatermarkController watermarks to +inf
- 6. Advance root watermark to +inf.
- """
- if not events:
- return []
-
- return self._set_up(output_tags) + events + self._tear_down(output_tags)
-
- def get_windowing(self, unused_inputs):
- return core.Windowing(window.GlobalWindows())
-
- def expand(self, pcoll):
- return pvalue.PCollection(pcoll.pipeline, is_bounded=False)
-
- def _infer_output_coder(self, input_type=None, input_coder=None):
- return self.coder
-
- def _events_from_script(self, index):
- yield self._events[index]
-
- def events(self, index):
- return self._events_from_script(index)
-
- def begin(self):
- return 0
-
- def end(self, index):
- return index >= len(self._events)
-
- def next(self, index):
- return index + 1
diff --git a/sdks/python/apache_beam/testing/test_stream_it_test.py b/sdks/python/apache_beam/testing/test_stream_it_test.py
new file mode 100644
index 0000000..1ef6ecd
--- /dev/null
+++ b/sdks/python/apache_beam/testing/test_stream_it_test.py
@@ -0,0 +1,239 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Integration tests for the test_stream module."""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import unittest
+from functools import wraps
+
+from nose.plugins.attrib import attr
+
+import apache_beam as beam
+from apache_beam.options.pipeline_options import StandardOptions
+from apache_beam.testing.test_pipeline import TestPipeline
+from apache_beam.testing.test_stream import TestStream
+from apache_beam.testing.util import assert_that
+from apache_beam.testing.util import equal_to
+from apache_beam.testing.util import equal_to_per_window
+from apache_beam.transforms import trigger
+from apache_beam.transforms import window
+from apache_beam.transforms.window import FixedWindows
+from apache_beam.transforms.window import TimestampedValue
+from apache_beam.utils import timestamp
+from apache_beam.utils.timestamp import Timestamp
+
+
+def supported(runners):
+ if not isinstance(runners, list):
+ runners = [runners]
+
+ def inner(fn):
+ @wraps(fn)
+ def wrapped(self):
+ if self.runner_name not in runners:
+ self.skipTest('The "{}", does not support the TestStream transform. '
+ 'Supported runners: {}'.format(self.runner_name, runners))
+ else:
+ return fn(self)
+ return wrapped
+ return inner
+
+
+class TestStreamIntegrationTests(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ cls.test_pipeline = TestPipeline(is_integration_test=True)
+ cls.args = cls.test_pipeline.get_full_options_as_args()
+ cls.runner_name = type(cls.test_pipeline.runner).__name__
+ cls.project = cls.test_pipeline.get_option('project')
+
+ @supported(['DirectRunner', 'SwitchingDirectRunner'])
+ @attr('IT')
+ def test_basic_execution(self):
+ test_stream = (TestStream()
+ .advance_watermark_to(10)
+ .add_elements(['a', 'b', 'c'])
+ .advance_watermark_to(20)
+ .add_elements(['d'])
+ .add_elements(['e'])
+ .advance_processing_time(10)
+ .advance_watermark_to(300)
+ .add_elements([TimestampedValue('late', 12)])
+ .add_elements([TimestampedValue('last', 310)])
+ .advance_watermark_to_infinity())
+
+ class RecordFn(beam.DoFn):
+
+ def process(self, element=beam.DoFn.ElementParam,
+ timestamp=beam.DoFn.TimestampParam):
+ yield (element, timestamp)
+
+ with beam.Pipeline(argv=self.args) as p:
+ my_record_fn = RecordFn()
+ records = p | test_stream | beam.ParDo(my_record_fn)
+
+ assert_that(records, equal_to([
+ ('a', timestamp.Timestamp(10)),
+ ('b', timestamp.Timestamp(10)),
+ ('c', timestamp.Timestamp(10)),
+ ('d', timestamp.Timestamp(20)),
+ ('e', timestamp.Timestamp(20)),
+ ('late', timestamp.Timestamp(12)),
+ ('last', timestamp.Timestamp(310)),]))
+
+ @supported(['DirectRunner', 'SwitchingDirectRunner'])
+ @attr('IT')
+ def test_multiple_outputs(self):
+ """Tests that the TestStream supports emitting to multiple PCollections."""
+ letters_elements = [
+ TimestampedValue('a', 6),
+ TimestampedValue('b', 7),
+ TimestampedValue('c', 8),
+ ]
+ numbers_elements = [
+ TimestampedValue('1', 11),
+ TimestampedValue('2', 12),
+ TimestampedValue('3', 13),
+ ]
+ test_stream = (TestStream()
+ .advance_watermark_to(5, tag='letters')
+ .add_elements(letters_elements, tag='letters')
+ .advance_watermark_to(10, tag='numbers')
+ .add_elements(numbers_elements, tag='numbers'))
+
+ class RecordFn(beam.DoFn):
+ def process(self, element=beam.DoFn.ElementParam,
+ timestamp=beam.DoFn.TimestampParam):
+ yield (element, timestamp)
+
+ options = StandardOptions(streaming=True)
+ p = TestPipeline(is_integration_test=True, options=options)
+
+ main = p | test_stream
+ letters = main['letters'] | 'record letters' >> beam.ParDo(RecordFn())
+ numbers = main['numbers'] | 'record numbers' >> beam.ParDo(RecordFn())
+
+ assert_that(letters, equal_to([
+ ('a', Timestamp(6)),
+ ('b', Timestamp(7)),
+ ('c', Timestamp(8))]), label='assert letters')
+
+ assert_that(numbers, equal_to([
+ ('1', Timestamp(11)),
+ ('2', Timestamp(12)),
+ ('3', Timestamp(13))]), label='assert numbers')
+
+ p.run()
+
+ @supported(['DirectRunner', 'SwitchingDirectRunner'])
+ @attr('IT')
+ def test_multiple_outputs_with_watermark_advancement(self):
+ """Tests that the TestStream can independently control output watermarks."""
+
+ # Purposely set the watermark of numbers to 20 then letters to 5 to test
+ # that the watermark advancement is per PCollection.
+ #
+ # This creates two PCollections, (a, b, c) and (1, 2, 3). These will be
+ # emitted at different times so that they will have different windows. The
+ # watermark advancement is checked by checking their windows. If the
+ # watermark does not advance, then the windows will be [-inf, -inf). If the
+ # windows do not advance separately, then the PCollections will both
+ # windowed in [15, 30).
+ letters_elements = [
+ TimestampedValue('a', 6),
+ TimestampedValue('b', 7),
+ TimestampedValue('c', 8),
+ ]
+ numbers_elements = [
+ TimestampedValue('1', 21),
+ TimestampedValue('2', 22),
+ TimestampedValue('3', 23),
+ ]
+ test_stream = (TestStream()
+ .advance_watermark_to(0, tag='letters')
+ .advance_watermark_to(0, tag='numbers')
+ .advance_watermark_to(20, tag='numbers')
+ .advance_watermark_to(5, tag='letters')
+ .add_elements(letters_elements, tag='letters')
+ .advance_watermark_to(10, tag='letters')
+ .add_elements(numbers_elements, tag='numbers')
+ .advance_watermark_to(30, tag='numbers'))
+
+ options = StandardOptions(streaming=True)
+ p = TestPipeline(is_integration_test=True, options=options)
+
+ main = p | test_stream
+
+ # Use an AfterWatermark trigger with an early firing to test that the
+ # watermark is advancing properly and that the element is being emitted in
+ # the correct window.
+ letters = (main['letters']
+ | 'letter windows' >> beam.WindowInto(
+ FixedWindows(15),
+ trigger=trigger.AfterWatermark(early=trigger.AfterCount(1)),
+ accumulation_mode=trigger.AccumulationMode.DISCARDING)
+ | 'letter with key' >> beam.Map(lambda x: ('k', x))
+ | 'letter gbk' >> beam.GroupByKey())
+
+ numbers = (main['numbers']
+ | 'number windows' >> beam.WindowInto(
+ FixedWindows(15),
+ trigger=trigger.AfterWatermark(early=trigger.AfterCount(1)),
+ accumulation_mode=trigger.AccumulationMode.DISCARDING)
+ | 'number with key' >> beam.Map(lambda x: ('k', x))
+ | 'number gbk' >> beam.GroupByKey())
+
+ # The letters were emitted when the watermark was at 5, thus we expect to
+ # see the elements in the [0, 15) window. We used an early trigger to make
+ # sure that the ON_TIME empty pane was also emitted with a TestStream.
+ # This pane has no data because of the early trigger causes the elements to
+ # fire before the end of the window and because the accumulation mode
+ # discards any data after the trigger fired.
+ expected_letters = {
+ window.IntervalWindow(0, 15): [
+ ('k', ['a', 'b', 'c']),
+ ('k', []),
+ ],
+ }
+
+ # Same here, except the numbers were emitted at watermark = 20, thus they
+ # are in the [15, 30) window.
+ expected_numbers = {
+ window.IntervalWindow(15, 30): [
+ ('k', ['1', '2', '3']),
+ ('k', []),
+ ],
+ }
+ assert_that(
+ letters,
+ equal_to_per_window(expected_letters),
+ label='letters assert per window')
+ assert_that(
+ numbers,
+ equal_to_per_window(expected_numbers),
+ label='numbers assert per window')
+
+ p.run()
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/sdks/python/apache_beam/testing/test_stream_service.py b/sdks/python/apache_beam/testing/test_stream_service.py
new file mode 100644
index 0000000..28ec9ca
--- /dev/null
+++ b/sdks/python/apache_beam/testing/test_stream_service.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+from concurrent.futures import ThreadPoolExecutor
+
+import grpc
+
+from apache_beam.portability.api import beam_runner_api_pb2_grpc
+from apache_beam.portability.api.beam_runner_api_pb2_grpc import TestStreamServiceServicer
+
+
+class TestStreamServiceController(TestStreamServiceServicer):
+ def __init__(self, events, endpoint=None):
+ self._server = grpc.server(ThreadPoolExecutor(max_workers=10))
+
+ if endpoint:
+ self.endpoint = endpoint
+ self._server.add_insecure_port(self.endpoint)
+ else:
+ port = self._server.add_insecure_port('[::]:0')
+ self.endpoint = '[::]:{}'.format(port)
+
+ beam_runner_api_pb2_grpc.add_TestStreamServiceServicer_to_server(
+ self, self._server)
+ self._events = events
+
+ def start(self):
+ self._server.start()
+
+ def stop(self):
+ self._server.stop(0)
+ self._server.wait_for_termination()
+
+ def Events(self, request, context):
+ """Streams back all of the events from the streaming cache."""
+
+ for e in self._events:
+ yield e
diff --git a/sdks/python/apache_beam/testing/test_stream_service_test.py b/sdks/python/apache_beam/testing/test_stream_service_test.py
new file mode 100644
index 0000000..fd0f897
--- /dev/null
+++ b/sdks/python/apache_beam/testing/test_stream_service_test.py
@@ -0,0 +1,98 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import unittest
+
+import grpc
+
+from apache_beam.portability.api import beam_runner_api_pb2
+from apache_beam.portability.api import beam_runner_api_pb2_grpc
+from apache_beam.portability.api.beam_interactive_api_pb2 import TestStreamFileHeader
+from apache_beam.portability.api.beam_interactive_api_pb2 import TestStreamFileRecord
+from apache_beam.portability.api.beam_runner_api_pb2 import TestStreamPayload
+from apache_beam.testing.test_stream_service import TestStreamServiceController
+
+# Nose automatically detects tests if they match a regex. Here, it mistakens
+# these protos as tests. For more info see the Nose docs at:
+# https://nose.readthedocs.io/en/latest/writing_tests.html
+TestStreamPayload.__test__ = False
+TestStreamFileHeader.__test__ = False
+TestStreamFileRecord.__test__ = False
+
+
+class TestStreamServiceTest(unittest.TestCase):
+ def events(self):
+ events = []
+ for i in range(10):
+ e = TestStreamPayload.Event()
+ e.element_event.elements.append(
+ TestStreamPayload.TimestampedElement(timestamp=i))
+ events.append(e)
+ return events
+
+ def setUp(self):
+ self.controller = TestStreamServiceController(self.events())
+ self.controller.start()
+
+ channel = grpc.insecure_channel(self.controller.endpoint)
+ self.stub = beam_runner_api_pb2_grpc.TestStreamServiceStub(channel)
+
+ def tearDown(self):
+ self.controller.stop()
+
+ def test_normal_run(self):
+ r = self.stub.Events(beam_runner_api_pb2.EventsRequest())
+ events = [e for e in r]
+ expected_events = [e for e in self.events()]
+
+ self.assertEqual(events, expected_events)
+
+ def test_multiple_sessions(self):
+ resp_a = self.stub.Events(beam_runner_api_pb2.EventsRequest())
+ resp_b = self.stub.Events(beam_runner_api_pb2.EventsRequest())
+
+ events_a = []
+ events_b = []
+
+ done = False
+ while not done:
+ a_is_done = False
+ b_is_done = False
+ try:
+ events_a.append(next(resp_a))
+ except StopIteration:
+ a_is_done = True
+
+ try:
+ events_b.append(next(resp_b))
+ except StopIteration:
+ b_is_done = True
+
+ done = a_is_done and b_is_done
+
+ expected_events = [e for e in self.events()]
+
+ self.assertEqual(events_a, expected_events)
+ self.assertEqual(events_b, expected_events)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/sdks/python/apache_beam/testing/test_stream_test.py b/sdks/python/apache_beam/testing/test_stream_test.py
index 26b54bd..0aefbcb 100644
--- a/sdks/python/apache_beam/testing/test_stream_test.py
+++ b/sdks/python/apache_beam/testing/test_stream_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the test_stream module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
@@ -275,7 +277,7 @@
p = TestPipeline(options=options)
records = (p
| test_stream
- | beam.WindowInto(FixedWindows(15))
+ | beam.WindowInto(FixedWindows(15), allowed_lateness=300)
| beam.Map(lambda x: ('k', x))
| beam.GroupByKey())
diff --git a/sdks/python/apache_beam/testing/test_utils.py b/sdks/python/apache_beam/testing/test_utils.py
index f9aa128..1c40324 100644
--- a/sdks/python/apache_beam/testing/test_utils.py
+++ b/sdks/python/apache_beam/testing/test_utils.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import hashlib
diff --git a/sdks/python/apache_beam/testing/test_utils_test.py b/sdks/python/apache_beam/testing/test_utils_test.py
index 2b16c30c..e68d26f 100644
--- a/sdks/python/apache_beam/testing/test_utils_test.py
+++ b/sdks/python/apache_beam/testing/test_utils_test.py
@@ -17,6 +17,8 @@
"""Unittest for testing utilities,"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/testing/util.py b/sdks/python/apache_beam/testing/util.py
index 5b6bc85..b41ae71 100644
--- a/sdks/python/apache_beam/testing/util.py
+++ b/sdks/python/apache_beam/testing/util.py
@@ -17,6 +17,8 @@
"""Utilities for testing Beam pipelines."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
@@ -174,15 +176,19 @@
# 2) As a fallback if we encounter a TypeError in python 3. this method
# works on collections that have different types.
except (BeamAssertException, TypeError):
+ unexpected = []
for element in actual:
try:
expected_list.remove(element)
except ValueError:
- raise BeamAssertException(
- 'Failed assert: %r == %r' % (expected, actual))
- if expected_list:
- raise BeamAssertException(
- 'Failed assert: %r == %r' % (expected, actual))
+ unexpected.append(element)
+ if unexpected or expected_list:
+ msg = 'Failed assert: %r == %r' % (expected, actual)
+ if unexpected:
+ msg = msg + ', unexpected elements %r' % unexpected
+ if expected_list:
+ msg = msg + ', missing elements %r' % expected_list
+ raise BeamAssertException(msg)
return _equal
diff --git a/sdks/python/apache_beam/testing/util_test.py b/sdks/python/apache_beam/testing/util_test.py
index 72c9205..6716b05 100644
--- a/sdks/python/apache_beam/testing/util_test.py
+++ b/sdks/python/apache_beam/testing/util_test.py
@@ -17,6 +17,8 @@
"""Unit tests for testing utilities."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
@@ -42,6 +44,12 @@
class UtilTest(unittest.TestCase):
+ def setUp(self):
+ try: # Python 3
+ _ = self.assertRaisesRegex
+ except AttributeError: # Python 2
+ self.assertRaisesRegex = self.assertRaisesRegexp
+
def test_assert_that_passes(self):
with TestPipeline() as p:
assert_that(p | Create([1, 2, 3]), equal_to([1, 2, 3]))
@@ -67,6 +75,27 @@
with TestPipeline() as p:
assert_that(p | Create([1, 10, 100]), equal_to([1, 2, 3]))
+ def test_assert_missing(self):
+ with self.assertRaisesRegex(BeamAssertException,
+ r"missing elements \['c'\]"):
+ with TestPipeline() as p:
+ assert_that(p | Create(['a', 'b']), equal_to(['a', 'b', 'c']))
+
+ def test_assert_unexpected(self):
+ with self.assertRaisesRegex(BeamAssertException,
+ r"unexpected elements \['c', 'd'\]|"
+ r"unexpected elements \['d', 'c'\]"):
+ with TestPipeline() as p:
+ assert_that(p | Create(['a', 'b', 'c', 'd']), equal_to(['a', 'b']))
+
+ def test_assert_missing_and_unexpected(self):
+ with self.assertRaisesRegex(
+ BeamAssertException,
+ r"unexpected elements \['c'\].*missing elements \['d'\]"):
+ with TestPipeline() as p:
+ assert_that(p | Create(['a', 'b', 'c']),
+ equal_to(['a', 'b', 'd']))
+
def test_reified_value_passes(self):
expected = [TestWindowedValue(v, MIN_TIMESTAMP, [GlobalWindow()])
for v in [1, 2, 3]]
diff --git a/sdks/python/apache_beam/tools/coders_microbenchmark.py b/sdks/python/apache_beam/tools/coders_microbenchmark.py
index edaa3ea..707e447 100644
--- a/sdks/python/apache_beam/tools/coders_microbenchmark.py
+++ b/sdks/python/apache_beam/tools/coders_microbenchmark.py
@@ -28,6 +28,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/tools/distribution_counter_microbenchmark.py b/sdks/python/apache_beam/tools/distribution_counter_microbenchmark.py
index 06035d5..5889853 100644
--- a/sdks/python/apache_beam/tools/distribution_counter_microbenchmark.py
+++ b/sdks/python/apache_beam/tools/distribution_counter_microbenchmark.py
@@ -23,6 +23,8 @@
python -m apache_beam.tools.distribution_counter_microbenchmark
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/tools/fn_api_runner_microbenchmark.py b/sdks/python/apache_beam/tools/fn_api_runner_microbenchmark.py
index 538f65f..d8506b2 100644
--- a/sdks/python/apache_beam/tools/fn_api_runner_microbenchmark.py
+++ b/sdks/python/apache_beam/tools/fn_api_runner_microbenchmark.py
@@ -54,6 +54,8 @@
R^2 0.95189
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/tools/map_fn_microbenchmark.py b/sdks/python/apache_beam/tools/map_fn_microbenchmark.py
index 6b4a143..191b335 100644
--- a/sdks/python/apache_beam/tools/map_fn_microbenchmark.py
+++ b/sdks/python/apache_beam/tools/map_fn_microbenchmark.py
@@ -30,6 +30,8 @@
python -m apache_beam.tools.map_fn_microbenchmark
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/tools/microbenchmarks_test.py b/sdks/python/apache_beam/tools/microbenchmarks_test.py
index 74949f6..850ef33 100644
--- a/sdks/python/apache_beam/tools/microbenchmarks_test.py
+++ b/sdks/python/apache_beam/tools/microbenchmarks_test.py
@@ -17,6 +17,8 @@
"""Unit tests for microbenchmarks code."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/tools/sideinput_microbenchmark.py b/sdks/python/apache_beam/tools/sideinput_microbenchmark.py
index 2a46aee..37cff3e 100644
--- a/sdks/python/apache_beam/tools/sideinput_microbenchmark.py
+++ b/sdks/python/apache_beam/tools/sideinput_microbenchmark.py
@@ -22,6 +22,8 @@
python -m apache_beam.tools.sideinput_microbenchmark
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/tools/utils.py b/sdks/python/apache_beam/tools/utils.py
index 41253a8..838e347 100644
--- a/sdks/python/apache_beam/tools/utils.py
+++ b/sdks/python/apache_beam/tools/utils.py
@@ -17,6 +17,8 @@
"""Utility functions for all microbenchmarks."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
diff --git a/sdks/python/apache_beam/transforms/combiners.py b/sdks/python/apache_beam/transforms/combiners.py
index 49f15d3..0aedaf7 100644
--- a/sdks/python/apache_beam/transforms/combiners.py
+++ b/sdks/python/apache_beam/transforms/combiners.py
@@ -17,6 +17,8 @@
"""A library of basic combiner PTransform subclasses."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -62,7 +64,7 @@
T = TypeVar('T')
K = TypeVar('K')
V = TypeVar('V')
-TimestampType = Union[int, long, float, Timestamp, Duration]
+TimestampType = Union[int, float, Timestamp, Duration]
class Mean(object):
@@ -219,10 +221,12 @@
self._py2__init__(n, None, **kwargs)
# Python 3 sort does not accept a comparison operator, and nor do we.
+ # FIXME: mypy would handle this better if we placed the _py*__init__ funcs
+ # inside the if/else block below:
if sys.version_info[0] < 3:
__init__ = _py2__init__
else:
- __init__ = _py3__init__
+ __init__ = _py3__init__ # type: ignore
def default_label(self):
return 'Top(%d)' % self._n
@@ -309,7 +313,7 @@
if sys.version_info[0] < 3:
__init__ = _py2__init__
else:
- __init__ = _py3__init__
+ __init__ = _py3__init__ # type: ignore
def default_label(self):
return 'TopPerKey(%d)' % self._n
@@ -882,7 +886,7 @@
def expand(self, pcoll):
return (pcoll
| core.ParDo(self.add_timestamp)
- .with_output_types(Tuple[T, TimestampType])
+ .with_output_types(Tuple[T, TimestampType]) # type: ignore[misc]
| core.CombineGlobally(LatestCombineFn()))
@with_input_types(Tuple[K, V])
@@ -899,11 +903,11 @@
def expand(self, pcoll):
return (pcoll
| core.ParDo(self.add_timestamp)
- .with_output_types(Tuple[K, Tuple[T, TimestampType]])
+ .with_output_types(Tuple[K, Tuple[T, TimestampType]]) # type: ignore[misc]
| core.CombinePerKey(LatestCombineFn()))
-@with_input_types(Tuple[T, TimestampType])
+@with_input_types(Tuple[T, TimestampType]) # type: ignore[misc]
@with_output_types(T)
class LatestCombineFn(core.CombineFn):
"""CombineFn to get the element with the latest timestamp
diff --git a/sdks/python/apache_beam/transforms/combiners_test.py b/sdks/python/apache_beam/transforms/combiners_test.py
index 7a20fb3..94f336e 100644
--- a/sdks/python/apache_beam/transforms/combiners_test.py
+++ b/sdks/python/apache_beam/transforms/combiners_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for our libraries of combine PTransforms."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -399,6 +401,90 @@
| beam.CombineGlobally(combine.MeanCombineFn()).with_fanout(11))
assert_that(result, equal_to([49.5]))
+ def test_MeanCombineFn_combine(self):
+ with TestPipeline() as p:
+ input = (p
+ | beam.Create([('a', 1),
+ ('a', 1),
+ ('a', 4),
+ ('b', 1),
+ ('b', 13)]))
+ # The mean of all values regardless of key.
+ global_mean = (input
+ | beam.Values()
+ | beam.CombineGlobally(combine.MeanCombineFn()))
+
+ # The (key, mean) pairs for all keys.
+ mean_per_key = (input | beam.CombinePerKey(combine.MeanCombineFn()))
+
+ expected_mean_per_key = [('a', 2), ('b', 7)]
+ assert_that(global_mean, equal_to([4]), label='global mean')
+ assert_that(mean_per_key, equal_to(expected_mean_per_key),
+ label='mean per key')
+
+ def test_MeanCombineFn_combine_empty(self):
+ # For each element in a PCollection, if it is float('NaN'), then emits
+ # a string 'NaN', otherwise emits str(element).
+
+ with TestPipeline() as p:
+ input = (p | beam.Create([]))
+
+ # Compute the mean of all values in the PCollection,
+ # then format the mean. Since the Pcollection is empty,
+ # the mean is float('NaN'), and is formatted to be a string 'NaN'.
+ global_mean = (input
+ | beam.Values()
+ | beam.CombineGlobally(combine.MeanCombineFn())
+ | beam.Map(str))
+
+ mean_per_key = (input | beam.CombinePerKey(combine.MeanCombineFn()))
+
+ # We can't compare one float('NaN') with another float('NaN'),
+ # but we can compare one 'nan' string with another string.
+ assert_that(global_mean, equal_to(['nan']), label='global mean')
+ assert_that(mean_per_key, equal_to([]), label='mean per key')
+
+ def test_sessions_combine(self):
+ with TestPipeline() as p:
+ input = (
+ p
+ | beam.Create([('c', 1), ('c', 9), ('c', 12), ('d', 2), ('d', 4)])
+ | beam.MapTuple(lambda k, v: window.TimestampedValue((k, v), v))
+ | beam.WindowInto(window.Sessions(4)))
+
+ global_sum = (input
+ | beam.Values()
+ | beam.CombineGlobally(sum).without_defaults())
+ sum_per_key = input | beam.CombinePerKey(sum)
+
+ # The first window has 3 elements: ('c', 1), ('d', 2), ('d', 4).
+ # The second window has 2 elements: ('c', 9), ('c', 12).
+ assert_that(global_sum, equal_to([7, 21]), label='global sum')
+ assert_that(sum_per_key, equal_to([('c', 1), ('c', 21), ('d', 6)]),
+ label='sum per key')
+
+ def test_fixed_windows_combine(self):
+ with TestPipeline() as p:
+ input = (
+ p
+ | beam.Create([('c', 1), ('c', 2), ('c', 10),
+ ('d', 5), ('d', 8), ('d', 9)])
+ | beam.MapTuple(lambda k, v: window.TimestampedValue((k, v), v))
+ | beam.WindowInto(window.FixedWindows(4)))
+
+ global_sum = (input
+ | beam.Values()
+ | beam.CombineGlobally(sum).without_defaults())
+ sum_per_key = input | beam.CombinePerKey(sum)
+
+ # The first window has 2 elements: ('c', 1), ('c', 2).
+ # The second window has 1 elements: ('d', 5).
+ # The third window has 3 elements: ('c', 10), ('d', 8), ('d', 9).
+ assert_that(global_sum, equal_to([3, 5, 27]), label='global sum')
+ assert_that(sum_per_key,
+ equal_to([('c', 3), ('c', 10), ('d', 5), ('d', 17)]),
+ label='sum per key')
+
class LatestTest(unittest.TestCase):
diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py
index 3169d53..5410b5b 100644
--- a/sdks/python/apache_beam/transforms/core.py
+++ b/sdks/python/apache_beam/transforms/core.py
@@ -17,6 +17,8 @@
"""Core PTransform subclasses, such as FlatMap, GroupByKey, and Map."""
+# pytype: skip-file
+
from __future__ import absolute_import
import copy
@@ -64,13 +66,23 @@
from apache_beam.typehints.typehints import is_consistent_with
from apache_beam.utils import timestamp
from apache_beam.utils import urns
+from apache_beam.utils.timestamp import Duration
+
+if typing.TYPE_CHECKING:
+ from google.protobuf import message # pylint: disable=ungrouped-imports
+ from apache_beam.io import iobase
+ from apache_beam.pipeline import Pipeline
+ from apache_beam.runners.pipeline_context import PipelineContext
+ from apache_beam.transforms import create_source
+ from apache_beam.transforms.trigger import AccumulationMode
+ from apache_beam.transforms.trigger import DefaultTrigger
+ from apache_beam.transforms.trigger import TriggerFn
try:
import funcsigs # Python 2 only.
except ImportError:
funcsigs = None
-
__all__ = [
'DoFn',
'CombineFn',
@@ -243,6 +255,7 @@
"""
def create_tracker(self, restriction):
+ # type: (...) -> iobase.RestrictionTracker
"""Produces a new ``RestrictionTracker`` for the given restriction.
This API is required to be implemented.
@@ -308,6 +321,7 @@
def get_function_arguments(obj, func):
+ # type: (...) -> typing.Tuple[typing.List[str], typing.List[typing.Any]]
"""Return the function arguments based on the name provided. If they have
a _inspect_function attached to the class then use that otherwise default
to the modified version of python inspect library.
@@ -324,6 +338,7 @@
def get_function_args_defaults(f):
+ # type: (...) -> typing.Tuple[typing.List[str], typing.List[typing.Any]]
"""Returns the function arguments of a given function.
Returns:
@@ -458,6 +473,7 @@
"""Restriction Provider DoFn parameter."""
def __init__(self, restriction_provider):
+ # type: (RestrictionProvider) -> None
if not isinstance(restriction_provider, RestrictionProvider):
raise ValueError(
'DoFn.RestrictionParam expected RestrictionProvider object.')
@@ -470,6 +486,7 @@
"""State DoFn parameter."""
def __init__(self, state_spec):
+ # type: (StateSpec) -> None
if not isinstance(state_spec, StateSpec):
raise ValueError("DoFn.StateParam expected StateSpec object.")
self.state_spec = state_spec
@@ -480,6 +497,7 @@
"""Timer DoFn parameter."""
def __init__(self, timer_spec):
+ # type: (TimerSpec) -> None
if not isinstance(timer_spec, TimerSpec):
raise ValueError("DoFn.TimerParam expected TimerSpec object.")
self.timer_spec = timer_spec
@@ -595,7 +613,7 @@
**kwargs: other keyword arguments.
Returns:
- An Iterable of output elements.
+ An Iterable of output elements or None.
"""
raise NotImplementedError
@@ -911,6 +929,7 @@
@staticmethod
def maybe_from_callable(fn, has_side_inputs=True):
+ # type: (typing.Union[CombineFn, typing.Callable], bool) -> CombineFn
if isinstance(fn, CombineFn):
return fn
elif callable(fn) and not has_side_inputs:
@@ -1096,6 +1115,7 @@
return self.__class__.__name__
def partition_for(self, element, num_partitions, *args, **kwargs):
+ # type: (T, int, *typing.Any, **typing.Any) -> int
"""Specify which partition will receive this element.
Args:
@@ -1135,6 +1155,7 @@
self._fn = fn
def partition_for(self, element, num_partitions, *args, **kwargs):
+ # type: (T, int, *typing.Any, **typing.Any) -> int
return self._fn(element, num_partitions, *args, **kwargs)
@@ -1174,7 +1195,7 @@
super(ParDo, self).__init__(fn, *args, **kwargs)
# TODO(robertwb): Change all uses of the dofn attribute to use fn instead.
self.dofn = self.fn
- self.output_tags = set()
+ self.output_tags = set() # type: typing.Set[str]
if not isinstance(self.fn, DoFn):
raise TypeError('ParDo must be called with a DoFn instance.')
@@ -1269,6 +1290,7 @@
return self.fn, self.args, self.kwargs, si_tags_and_types, windowing
def to_runner_api_parameter(self, context):
+ # type: (PipelineContext) -> typing.Tuple[str, message.Message]
assert isinstance(self, ParDo), \
"expected instance of ParDo, but got %s" % self.__class__
picked_pardo_fn_data = pickler.dumps(self._pardo_fn_data())
@@ -1278,17 +1300,15 @@
if is_splittable:
restriction_coder = (
DoFnSignature(self.fn).get_restriction_provider().restriction_coder())
- restriction_coder_id = context.coders.get_id(restriction_coder)
+ restriction_coder_id = context.coders.get_id(restriction_coder) # type: typing.Optional[str]
else:
restriction_coder_id = None
return (
common_urns.primitives.PAR_DO.urn,
beam_runner_api_pb2.ParDoPayload(
- do_fn=beam_runner_api_pb2.SdkFunctionSpec(
- environment_id=context.default_environment_id(),
- spec=beam_runner_api_pb2.FunctionSpec(
- urn=python_urns.PICKLED_DOFN_INFO,
- payload=picked_pardo_fn_data)),
+ do_fn=beam_runner_api_pb2.FunctionSpec(
+ urn=python_urns.PICKLED_DOFN_INFO,
+ payload=picked_pardo_fn_data),
splittable=is_splittable,
restriction_coder_id=restriction_coder_id,
state_specs={spec.name: spec.to_runner_api(context)
@@ -1307,9 +1327,9 @@
@PTransform.register_urn(
common_urns.primitives.PAR_DO.urn, beam_runner_api_pb2.ParDoPayload)
def from_runner_api_parameter(pardo_payload, context):
- assert pardo_payload.do_fn.spec.urn == python_urns.PICKLED_DOFN_INFO
+ assert pardo_payload.do_fn.urn == python_urns.PICKLED_DOFN_INFO
fn, args, kwargs, si_tags_and_types, windowing = pickler.loads(
- pardo_payload.do_fn.spec.payload)
+ pardo_payload.do_fn.payload)
if si_tags_and_types:
raise NotImplementedError('explicit side input data')
elif windowing:
@@ -1865,7 +1885,10 @@
hints.set_output_types(typehints.Tuple[K, main_output_type])
return hints
- def to_runner_api_parameter(self, context):
+ def to_runner_api_parameter(self,
+ context # type: PipelineContext
+ ):
+ # type: (...) -> typing.Tuple[str, beam_runner_api_pb2.CombinePayload]
if self.args or self.kwargs:
from apache_beam.transforms.combiners import curry_combine_fn
combine_fn = curry_combine_fn(self.fn, self.args, self.kwargs)
@@ -1928,7 +1951,11 @@
class CombineValuesDoFn(DoFn):
"""DoFn for performing per-key Combine transforms."""
- def __init__(self, input_pcoll_type, combinefn, runtime_type_check):
+ def __init__(self,
+ input_pcoll_type,
+ combinefn, # type: CombineFn
+ runtime_type_check, # type: bool
+ ):
super(CombineValuesDoFn, self).__init__()
self.combinefn = combinefn
self.runtime_type_check = runtime_type_check
@@ -1980,7 +2007,11 @@
class _CombinePerKeyWithHotKeyFanout(PTransform):
- def __init__(self, combine_fn, fanout):
+ def __init__(self,
+ combine_fn, # type: CombineFn
+ fanout, # type: typing.Union[int, typing.Callable[[typing.Any], int]]
+ ):
+ # type: (...) -> None
self._combine_fn = combine_fn
self._fanout_fn = (
(lambda key: fanout) if isinstance(fanout, int) else fanout)
@@ -2087,7 +2118,7 @@
def infer_output_type(self, input_type):
key_type, value_type = trivial_inference.key_value_types(input_type)
return typehints.Iterable[
- typehints.KV[key_type, typehints.WindowedValue[value_type]]]
+ typehints.KV[key_type, typehints.WindowedValue[value_type]]] # type: ignore[misc]
def expand(self, pcoll):
# This code path is only used in the local direct runner. For Dataflow
@@ -2104,11 +2135,11 @@
'GroupByKey operation "%s"' % self.label)
reify_output_type = typehints.KV[
- key_type, typehints.WindowedValue[value_type]]
+ key_type, typehints.WindowedValue[value_type]] # type: ignore[misc]
gbk_input_type = (
typehints.KV[
key_type,
- typehints.Iterable[typehints.WindowedValue[value_type]]])
+ typehints.Iterable[typehints.WindowedValue[value_type]]]) # type: ignore[misc]
gbk_output_type = typehints.KV[
key_type, typehints.Iterable[value_type]]
@@ -2134,6 +2165,7 @@
return typehints.KV[key_type, typehints.Iterable[value_type]]
def to_runner_api_parameter(self, unused_context):
+ # type: (PipelineContext) -> typing.Tuple[str, None]
return common_urns.primitives.GROUP_BY_KEY.urn, None
@PTransform.register_urn(common_urns.primitives.GROUP_BY_KEY.urn, None)
@@ -2237,8 +2269,26 @@
class Windowing(object):
- def __init__(self, windowfn, triggerfn=None, accumulation_mode=None,
- timestamp_combiner=None):
+ def __init__(self,
+ windowfn, # type: WindowFn
+ triggerfn=None, # type: typing.Optional[TriggerFn]
+ accumulation_mode=None, # type: typing.Optional[beam_runner_api_pb2.AccumulationMode]
+ timestamp_combiner=None, # type: typing.Optional[beam_runner_api_pb2.OutputTime]
+ allowed_lateness=0, # type: typing.Union[int, float]
+ ):
+ """Class representing the window strategy.
+
+ Args:
+ windowfn: Window assign function.
+ triggerfn: Trigger function.
+ accumulation_mode: a AccumulationMode, controls what to do with data
+ when a trigger fires multiple times.
+ timestamp_combiner: a TimestampCombiner, determines how output
+ timestamps of grouping operations are assigned.
+ allowed_lateness: Maximum delay in seconds after end of window
+ allowed for any late data to be processed without being discarded
+ directly.
+ """
global AccumulationMode, DefaultTrigger # pylint: disable=global-variable-not-assigned
# pylint: disable=wrong-import-order, wrong-import-position
from apache_beam.transforms.trigger import AccumulationMode, DefaultTrigger
@@ -2258,13 +2308,15 @@
self.windowfn = windowfn
self.triggerfn = triggerfn
self.accumulation_mode = accumulation_mode
+ self.allowed_lateness = Duration.of(allowed_lateness)
self.timestamp_combiner = (
timestamp_combiner or TimestampCombiner.OUTPUT_AT_EOW)
self._is_default = (
self.windowfn == GlobalWindows() and
self.triggerfn == DefaultTrigger() and
self.accumulation_mode == AccumulationMode.DISCARDING and
- self.timestamp_combiner == TimestampCombiner.OUTPUT_AT_EOW)
+ self.timestamp_combiner == TimestampCombiner.OUTPUT_AT_EOW and
+ self.allowed_lateness == 0)
def __repr__(self):
return "Windowing(%s, %s, %s, %s)" % (self.windowfn, self.triggerfn,
@@ -2279,7 +2331,8 @@
self.windowfn == other.windowfn
and self.triggerfn == other.triggerfn
and self.accumulation_mode == other.accumulation_mode
- and self.timestamp_combiner == other.timestamp_combiner)
+ and self.timestamp_combiner == other.timestamp_combiner
+ and self.allowed_lateness == other.allowed_lateness)
return False
def __ne__(self, other):
@@ -2287,13 +2340,15 @@
return not self == other
def __hash__(self):
- return hash((self.windowfn, self.accumulation_mode,
+ return hash((self.windowfn, self.triggerfn, self.accumulation_mode,
+ self.allowed_lateness,
self.timestamp_combiner))
def is_default(self):
return self._is_default
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.WindowingStrategy
return beam_runner_api_pb2.WindowingStrategy(
window_fn=self.windowfn.to_runner_api(context),
# TODO(robertwb): Prohibit implicit multi-level merging.
@@ -2308,7 +2363,8 @@
# TODO(robertwb): Support EMIT_IF_NONEMPTY
closing_behavior=beam_runner_api_pb2.ClosingBehavior.EMIT_ALWAYS,
OnTimeBehavior=beam_runner_api_pb2.OnTimeBehavior.FIRE_ALWAYS,
- allowed_lateness=0)
+ allowed_lateness=self.allowed_lateness.micros // 1000,
+ environment_id=context.default_environment_id())
@staticmethod
def from_runner_api(proto, context):
@@ -2318,7 +2374,8 @@
windowfn=WindowFn.from_runner_api(proto.window_fn, context),
triggerfn=TriggerFn.from_runner_api(proto.trigger, context),
accumulation_mode=proto.accumulation_mode,
- timestamp_combiner=proto.output_time)
+ timestamp_combiner=proto.output_time,
+ allowed_lateness=Duration(micros=proto.allowed_lateness * 1000))
@typehints.with_input_types(T)
@@ -2336,6 +2393,7 @@
"""A DoFn that applies a WindowInto operation."""
def __init__(self, windowing):
+ # type: (Windowing) -> None
self.windowing = windowing
def process(self, element, timestamp=DoFn.TimestampParam,
@@ -2346,10 +2404,11 @@
yield WindowedValue(element, context.timestamp, new_windows)
def __init__(self,
- windowfn,
- trigger=None,
+ windowfn, # type: typing.Union[Windowing, WindowFn]
+ trigger=None, # type: typing.Optional[TriggerFn]
accumulation_mode=None,
- timestamp_combiner=None):
+ timestamp_combiner=None,
+ allowed_lateness=0):
"""Initializes a WindowInto transform.
Args:
@@ -2371,10 +2430,12 @@
timestamp_combiner = timestamp_combiner or windowing.timestamp_combiner
self.windowing = Windowing(
- windowfn, trigger, accumulation_mode, timestamp_combiner)
+ windowfn, trigger, accumulation_mode, timestamp_combiner,
+ allowed_lateness)
super(WindowInto, self).__init__(self.WindowIntoFn(self.windowing))
def get_windowing(self, unused_inputs):
+ # type: (typing.Any) -> Windowing
return self.windowing
def infer_output_type(self, input_type):
@@ -2390,6 +2451,7 @@
return super(WindowInto, self).expand(pcoll)
def to_runner_api_parameter(self, context):
+ # type: (PipelineContext) -> typing.Tuple[str, message.Message]
return (
common_urns.primitives.ASSIGN_WINDOWS.urn,
self.windowing.to_runner_api(context))
@@ -2436,7 +2498,7 @@
def __init__(self, **kwargs):
super(Flatten, self).__init__()
- self.pipeline = kwargs.pop('pipeline', None)
+ self.pipeline = kwargs.pop('pipeline', None) # type: typing.Optional[Pipeline]
if kwargs:
raise ValueError('Unexpected keyword arguments: %s' % list(kwargs))
@@ -2458,12 +2520,14 @@
return result
def get_windowing(self, inputs):
+ # type: (typing.Any) -> Windowing
if not inputs:
# TODO(robertwb): Return something compatible with every windowing?
return Windowing(GlobalWindows())
return super(Flatten, self).get_windowing(inputs)
def to_runner_api_parameter(self, context):
+ # type: (PipelineContext) -> typing.Tuple[str, None]
return common_urns.primitives.FLATTEN.urn, None
@staticmethod
@@ -2494,6 +2558,7 @@
self.reshuffle = reshuffle
def to_runner_api_parameter(self, context):
+ # type: (PipelineContext) -> typing.Tuple[str, bytes]
# Required as this is identified by type in PTransformOverrides.
# TODO(BEAM-3812): Use an actual URN here.
return self.to_runner_api_pickled(context)
@@ -2538,6 +2603,7 @@
return iobase.Read(source).with_output_types(self.get_output_type())
def get_windowing(self, unused_inputs):
+ # type: (typing.Any) -> Windowing
return Windowing(GlobalWindows())
@staticmethod
@@ -2546,6 +2612,7 @@
@staticmethod
def _create_source(serialized_values, coder):
+ # type: (typing.Any, typing.Any) -> create_source._CreateSource
from apache_beam.transforms.create_source import _CreateSource
return _CreateSource(serialized_values, coder)
@@ -2562,12 +2629,14 @@
return pvalue.PCollection(pbegin.pipeline)
def get_windowing(self, inputs):
+ # type: (typing.Any) -> Windowing
return Windowing(GlobalWindows())
def infer_output_type(self, unused_input_type):
return bytes
def to_runner_api_parameter(self, unused_context):
+ # type: (PipelineContext) -> typing.Tuple[str, None]
return common_urns.primitives.IMPULSE.urn, None
@PTransform.register_urn(common_urns.primitives.IMPULSE.urn, None)
diff --git a/sdks/python/apache_beam/transforms/core_test.py b/sdks/python/apache_beam/transforms/core_test.py
index 1a27bd2..3791252 100644
--- a/sdks/python/apache_beam/transforms/core_test.py
+++ b/sdks/python/apache_beam/transforms/core_test.py
@@ -17,6 +17,8 @@
"""Unit tests for core module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/transforms/create_source.py b/sdks/python/apache_beam/transforms/create_source.py
index aa26ceb..b0188c6 100644
--- a/sdks/python/apache_beam/transforms/create_source.py
+++ b/sdks/python/apache_beam/transforms/create_source.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/transforms/create_test.py b/sdks/python/apache_beam/transforms/create_test.py
index 915056f..08c5c24 100644
--- a/sdks/python/apache_beam/transforms/create_test.py
+++ b/sdks/python/apache_beam/transforms/create_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the Create and _CreateSource classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/transforms/cy_combiners.py b/sdks/python/apache_beam/transforms/cy_combiners.py
index 139b8a3..33914f9 100644
--- a/sdks/python/apache_beam/transforms/cy_combiners.py
+++ b/sdks/python/apache_beam/transforms/cy_combiners.py
@@ -22,6 +22,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/transforms/dataflow_distribution_counter_test.py b/sdks/python/apache_beam/transforms/dataflow_distribution_counter_test.py
index bedad4b..f072cd1 100644
--- a/sdks/python/apache_beam/transforms/dataflow_distribution_counter_test.py
+++ b/sdks/python/apache_beam/transforms/dataflow_distribution_counter_test.py
@@ -14,6 +14,8 @@
otherwise, test on pure python module
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py
index bcbf68e..0f9fa53 100644
--- a/sdks/python/apache_beam/transforms/display.py
+++ b/sdks/python/apache_beam/transforms/display.py
@@ -36,6 +36,8 @@
and communicate it to the API.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import calendar
@@ -44,9 +46,14 @@
from builtins import object
from datetime import datetime
from datetime import timedelta
+from typing import TYPE_CHECKING
+from typing import List
from past.builtins import unicode
+if TYPE_CHECKING:
+ from apache_beam.options.pipeline_options import PipelineOptions
+
__all__ = ['HasDisplayData', 'DisplayDataItem', 'DisplayData']
@@ -57,6 +64,7 @@
"""
def display_data(self):
+ # type: () -> dict
""" Returns the display data associated to a pipeline component.
It should be reimplemented in pipeline components that wish to have
@@ -80,6 +88,7 @@
return {}
def _namespace(self):
+ # type: () -> str
return '{}.{}'.format(self.__module__, self.__class__.__name__)
@@ -87,9 +96,13 @@
""" Static display data associated with a pipeline component.
"""
- def __init__(self, namespace, display_data_dict):
+ def __init__(self,
+ namespace, # type: str
+ display_data_dict # type: dict
+ ):
+ # type: (...) -> None
self.namespace = namespace
- self.items = []
+ self.items = [] # type: List[DisplayDataItem]
self._populate_items(display_data_dict)
def _populate_items(self, display_data_dict):
@@ -191,6 +204,7 @@
self._drop_if_default = False
def drop_if_none(self):
+ # type: () -> DisplayDataItem
""" The item should be dropped if its value is None.
Returns:
@@ -200,6 +214,7 @@
return self
def drop_if_default(self, default):
+ # type: (...) -> DisplayDataItem
""" The item should be dropped if its value is equal to its default.
Returns:
@@ -210,6 +225,7 @@
return self
def should_drop(self):
+ # type: () -> bool
""" Return True if the item should be dropped, or False if it should not
be dropped. This depends on the drop_if_none, and drop_if_default calls.
@@ -223,6 +239,7 @@
return False
def is_valid(self):
+ # type: () -> None
""" Checks that all the necessary fields of the :class:`DisplayDataItem`
are filled in. It checks that neither key, namespace, value or type are
:data:`None`.
@@ -261,6 +278,7 @@
return res
def get_dict(self):
+ # type: () -> dict
""" Returns the internal-API dictionary representing the
:class:`DisplayDataItem`.
diff --git a/sdks/python/apache_beam/transforms/display_test.py b/sdks/python/apache_beam/transforms/display_test.py
index bdaade6..6ead789 100644
--- a/sdks/python/apache_beam/transforms/display_test.py
+++ b/sdks/python/apache_beam/transforms/display_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the DisplayData API."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/transforms/dofn_lifecycle_test.py b/sdks/python/apache_beam/transforms/dofn_lifecycle_test.py
index fd3eee6..0f2cc4c 100644
--- a/sdks/python/apache_beam/transforms/dofn_lifecycle_test.py
+++ b/sdks/python/apache_beam/transforms/dofn_lifecycle_test.py
@@ -16,6 +16,8 @@
#
"""UnitTests for DoFn lifecycle and bundle methods"""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/transforms/environments.py b/sdks/python/apache_beam/transforms/environments.py
index 6f67266..f3b3c22 100644
--- a/sdks/python/apache_beam/transforms/environments.py
+++ b/sdks/python/apache_beam/transforms/environments.py
@@ -19,6 +19,8 @@
For internal use only. No backwards compatibility guarantees."""
+# pytype: skip-file
+
from __future__ import absolute_import
import json
@@ -39,6 +41,11 @@
'SubprocessSDKEnvironment', 'RunnerAPIEnvironmentHolder']
+def looks_like_json(s):
+ import re
+ return re.match(r'\s*\{.*\}\s*$', s)
+
+
class Environment(object):
"""Abstract base class for environments.
@@ -263,10 +270,6 @@
@classmethod
def from_options(cls, options):
- def looks_like_json(environment_config):
- import re
- return re.match(r'\s*\{.*\}\s*$', environment_config)
-
if looks_like_json(options.environment_config):
config = json.loads(options.environment_config)
url = config.get('url')
@@ -308,50 +311,77 @@
@Environment.register_urn(python_urns.EMBEDDED_PYTHON_GRPC, bytes)
class EmbeddedPythonGrpcEnvironment(Environment):
- def __init__(self, state_cache_size=None):
+ def __init__(self, state_cache_size=None, data_buffer_time_limit_ms=None):
self.state_cache_size = state_cache_size
+ self.data_buffer_time_limit_ms = data_buffer_time_limit_ms
def __eq__(self, other):
return self.__class__ == other.__class__ \
- and self.state_cache_size == other.state_cache_size
+ and self.state_cache_size == other.state_cache_size \
+ and self.data_buffer_time_limit_ms == other.data_buffer_time_limit_ms
def __ne__(self, other):
# TODO(BEAM-5949): Needed for Python 2 compatibility.
return not self == other
def __hash__(self):
- return hash((self.__class__, self.state_cache_size))
+ return hash((self.__class__, self.state_cache_size,
+ self.data_buffer_time_limit_ms))
def __repr__(self):
repr_parts = []
if not self.state_cache_size is None:
repr_parts.append('state_cache_size=%d' % self.state_cache_size)
+ if not self.data_buffer_time_limit_ms is None:
+ repr_parts.append(
+ 'data_buffer_time_limit_ms=%d' % self.data_buffer_time_limit_ms)
return 'EmbeddedPythonGrpcEnvironment(%s)' % ','.join(repr_parts)
def to_runner_api_parameter(self, context):
- if self.state_cache_size is None:
- payload = b''
- else:
- payload = b'%d' % self.state_cache_size
+ params = {}
+ if self.state_cache_size is not None:
+ params['state_cache_size'] = self.state_cache_size
+ if self.data_buffer_time_limit_ms is not None:
+ params['data_buffer_time_limit_ms'] = self.data_buffer_time_limit_ms
+ payload = json.dumps(params).encode('utf-8')
return python_urns.EMBEDDED_PYTHON_GRPC, payload
@staticmethod
def from_runner_api_parameter(payload, context):
if payload:
- state_cache_size = payload.decode('utf-8')
+ config = EmbeddedPythonGrpcEnvironment.parse_config(
+ payload.decode('utf-8'))
return EmbeddedPythonGrpcEnvironment(
- state_cache_size=int(state_cache_size))
+ state_cache_size=config.get('state_cache_size'),
+ data_buffer_time_limit_ms=config.get('data_buffer_time_limit_ms'))
else:
return EmbeddedPythonGrpcEnvironment()
@classmethod
def from_options(cls, options):
if options.environment_config:
- state_cache_size = options.environment_config
- return cls(state_cache_size=state_cache_size)
+ config = EmbeddedPythonGrpcEnvironment.parse_config(
+ options.environment_config)
+ return cls(state_cache_size=config.get('state_cache_size'),
+ data_buffer_time_limit_ms=config.get(
+ 'data_buffer_time_limit_ms'))
else:
return cls()
+ @staticmethod
+ def parse_config(s):
+ if looks_like_json(s):
+ config_dict = json.loads(s)
+ if 'state_cache_size' in config_dict:
+ config_dict['state_cache_size'] = int(config_dict['state_cache_size'])
+
+ if 'data_buffer_time_limit_ms' in config_dict:
+ config_dict['data_buffer_time_limit_ms'] = \
+ int(config_dict['data_buffer_time_limit_ms'])
+ return config_dict
+ else:
+ return {'state_cache_size': int(s)}
+
@Environment.register_urn(python_urns.SUBPROCESS_SDK, bytes)
class SubprocessSDKEnvironment(Environment):
@@ -371,7 +401,7 @@
return hash((self.__class__, self.command_string))
def __repr__(self):
- return 'SubprocessSDKEnvironment(command_string=%s)' % self.container_string
+ return 'SubprocessSDKEnvironment(command_string=%s)' % self.command_string
def to_runner_api_parameter(self, context):
return python_urns.SUBPROCESS_SDK, self.command_string.encode('utf-8')
diff --git a/sdks/python/apache_beam/transforms/environments_test.py b/sdks/python/apache_beam/transforms/environments_test.py
index 46868e8..b79ca79 100644
--- a/sdks/python/apache_beam/transforms/environments_test.py
+++ b/sdks/python/apache_beam/transforms/environments_test.py
@@ -18,6 +18,8 @@
"""Unit tests for the transform.environments classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -46,7 +48,8 @@
ExternalEnvironment('localhost:8080', params={'k1': 'v1'}),
EmbeddedPythonEnvironment(),
EmbeddedPythonGrpcEnvironment(),
- EmbeddedPythonGrpcEnvironment(state_cache_size=0),
+ EmbeddedPythonGrpcEnvironment(
+ state_cache_size=0, data_buffer_time_limit_ms=0),
SubprocessSDKEnvironment(command_string=u'foö')):
context = pipeline_context.PipelineContext()
self.assertEqual(
diff --git a/sdks/python/apache_beam/transforms/external.py b/sdks/python/apache_beam/transforms/external.py
index f1f0c15..882d1ea 100644
--- a/sdks/python/apache_beam/transforms/external.py
+++ b/sdks/python/apache_beam/transforms/external.py
@@ -19,12 +19,15 @@
No backward compatibility guarantees. Everything in this module is experimental.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
import contextlib
import copy
import threading
+from typing import Dict
from apache_beam import pvalue
from apache_beam.coders import registry
@@ -246,6 +249,8 @@
else payload)
self._expansion_service = expansion_service
self._namespace = self._fresh_namespace()
+ self._inputs = {} # type: Dict[str, pvalue.PCollection]
+ self._output = {} # type: Dict[str, pvalue.PCollection]
def __post_init__(self, expansion_service):
"""
@@ -275,10 +280,12 @@
@classmethod
def _fresh_namespace(cls):
+ # type: () -> str
ExternalTransform._namespace_counter += 1
return '%s_%d' % (cls.get_local_namespace(), cls._namespace_counter)
def expand(self, pvalueish):
+ # type: (pvalue.PCollection) -> pvalue.PCollection
if isinstance(pvalueish, pvalue.PBegin):
self._inputs = {}
elif isinstance(pvalueish, (list, tuple)):
@@ -310,7 +317,7 @@
components = context.to_runner_api()
request = beam_expansion_api_pb2.ExpansionRequest(
components=components,
- namespace=self._namespace,
+ namespace=self._namespace, # type: ignore # mypy thinks self._namespace is threading.local
transform=transform_proto)
if isinstance(self._expansion_service, str):
@@ -403,7 +410,8 @@
inputs={tag: pcoll_renames.get(pcoll, pcoll)
for tag, pcoll in proto.inputs.items()},
outputs={tag: pcoll_renames.get(pcoll, pcoll)
- for tag, pcoll in proto.outputs.items()})
+ for tag, pcoll in proto.outputs.items()},
+ environment_id=proto.environment_id)
context.transforms.put_proto(id, new_proto)
return beam_runner_api_pb2.PTransform(
@@ -413,7 +421,8 @@
inputs=self._expanded_transform.inputs,
outputs={
tag: pcoll_renames.get(pcoll, pcoll)
- for tag, pcoll in self._expanded_transform.outputs.items()})
+ for tag, pcoll in self._expanded_transform.outputs.items()},
+ environment_id=self._expanded_transform.environment_id)
class JavaJarExpansionService(object):
diff --git a/sdks/python/apache_beam/transforms/external_test.py b/sdks/python/apache_beam/transforms/external_test.py
index 7ae2bf5..2674b01 100644
--- a/sdks/python/apache_beam/transforms/external_test.py
+++ b/sdks/python/apache_beam/transforms/external_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the transform.external classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
import argparse
@@ -56,7 +58,7 @@
try:
from apache_beam.runners.dataflow.internal import apiclient
except ImportError:
- apiclient = None
+ apiclient = None # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position
@@ -227,8 +229,8 @@
class ExternalTransformTest(unittest.TestCase):
# This will be overwritten if set via a flag.
- expansion_service_jar = None
- expansion_service_port = None
+ expansion_service_jar = None # type: str
+ expansion_service_port = None # type: int
class _RunWithExpansion(object):
diff --git a/sdks/python/apache_beam/transforms/external_test_it.py b/sdks/python/apache_beam/transforms/external_test_it.py
index 97f857c..d597886 100644
--- a/sdks/python/apache_beam/transforms/external_test_it.py
+++ b/sdks/python/apache_beam/transforms/external_test_it.py
@@ -17,6 +17,8 @@
"""Integration tests for cross-language transform expansion."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/transforms/external_test_py3.py b/sdks/python/apache_beam/transforms/external_test_py3.py
index c2e7f87..3f7b2a4 100644
--- a/sdks/python/apache_beam/transforms/external_test_py3.py
+++ b/sdks/python/apache_beam/transforms/external_test_py3.py
@@ -17,6 +17,8 @@
"""Unit tests for the transform.external classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
import typing
diff --git a/sdks/python/apache_beam/transforms/external_test_py37.py b/sdks/python/apache_beam/transforms/external_test_py37.py
index e01f532..8399b8b 100644
--- a/sdks/python/apache_beam/transforms/external_test_py37.py
+++ b/sdks/python/apache_beam/transforms/external_test_py37.py
@@ -17,6 +17,8 @@
"""Unit tests for the transform.external classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
import dataclasses
diff --git a/sdks/python/apache_beam/transforms/ptransform.py b/sdks/python/apache_beam/transforms/ptransform.py
index 0c9459f..66ee5f3 100644
--- a/sdks/python/apache_beam/transforms/ptransform.py
+++ b/sdks/python/apache_beam/transforms/ptransform.py
@@ -34,6 +34,8 @@
FlatMap processing functions.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import contextlib
@@ -48,6 +50,17 @@
from builtins import zip
from functools import reduce
from functools import wraps
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+from typing import Union
+from typing import overload
from google.protobuf import message
@@ -68,6 +81,13 @@
from apache_beam.typehints.typehints import validate_composite_type_param
from apache_beam.utils import proto_utils
+if TYPE_CHECKING:
+ from apache_beam import coders
+ from apache_beam.pipeline import Pipeline
+ from apache_beam.runners.pipeline_context import PipelineContext
+ from apache_beam.transforms.core import Windowing
+ from apache_beam.portability.api import beam_runner_api_pb2
+
__all__ = [
'PTransform',
'ptransform_fn',
@@ -75,6 +95,13 @@
]
+T = TypeVar('T')
+PTransformT = TypeVar('PTransformT', bound='PTransform')
+ConstructorFn = Callable[
+ [Optional[Any], 'PipelineContext'],
+ Any]
+
+
class _PValueishTransform(object):
"""Visitor for PValueish objects.
@@ -307,27 +334,31 @@
with input as an argument.
"""
# By default, transforms don't have any side inputs.
- side_inputs = ()
+ side_inputs = () # type: Sequence[pvalue.AsSideInput]
# Used for nullary transforms.
- pipeline = None
+ pipeline = None # type: Optional[Pipeline]
# Default is unset.
- _user_label = None
+ _user_label = None # type: Optional[str]
def __init__(self, label=None):
+ # type: (Optional[str]) -> None
super(PTransform, self).__init__()
- self.label = label
+ self.label = label # type: ignore # https://github.com/python/mypy/issues/3004
@property
def label(self):
+ # type: () -> str
return self._user_label or self.default_label()
@label.setter
def label(self, value):
+ # type: (Optional[str]) -> None
self._user_label = value
def default_label(self):
+ # type: () -> str
return self.__class__.__name__
def with_input_types(self, input_type_hint):
@@ -409,6 +440,7 @@
pvalue_.element_type))
def _infer_output_coder(self, input_type=None, input_coder=None):
+ # type: (...) -> Optional[coders.Coder]
"""Returns the output coder to use for output of this transform.
Note: this API is experimental and is subject to change; please do not rely
@@ -454,12 +486,14 @@
' side_inputs=%s' % str(self.side_inputs) if self.side_inputs else '')
def _check_pcollection(self, pcoll):
+ # type: (pvalue.PCollection) -> None
if not isinstance(pcoll, pvalue.PCollection):
raise error.TransformError('Expecting a PCollection argument.')
if not pcoll.pipeline:
raise error.TransformError('PCollection not part of a pipeline.')
def get_windowing(self, inputs):
+ # type: (Any) -> Windowing
"""Returns the window function to be associated with transform's output.
By default most transforms just return the windowing function associated
@@ -556,7 +590,45 @@
else:
return input_dict
- _known_urns = {}
+ _known_urns = {} # type: Dict[str, Tuple[Optional[type], ConstructorFn]]
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: Type[T]
+ ):
+ # type: (...) -> Callable[[Union[type, Callable[[T, PipelineContext], Any]]], Callable[[T, PipelineContext], Any]]
+ pass
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: None
+ ):
+ # type: (...) -> Callable[[Union[type, Callable[[bytes, PipelineContext], Any]]], Callable[[bytes, PipelineContext], Any]]
+ pass
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: Type[T]
+ constructor # type: Callable[[T, PipelineContext], Any]
+ ):
+ # type: (...) -> None
+ pass
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: None
+ constructor # type: Callable[[bytes, PipelineContext], Any]
+ ):
+ # type: (...) -> None
+ pass
@classmethod
def register_urn(cls, urn, parameter_type, constructor=None):
@@ -589,6 +661,7 @@
return register
def to_runner_api(self, context, has_parts=False):
+ # type: (PipelineContext, bool) -> beam_runner_api_pb2.FunctionSpec
from apache_beam.portability.api import beam_runner_api_pb2
urn, typed_param = self.to_runner_api_parameter(context)
if urn == python_urns.GENERIC_COMPOSITE_TRANSFORM and not has_parts:
@@ -602,7 +675,11 @@
else typed_param)
@classmethod
- def from_runner_api(cls, proto, context):
+ def from_runner_api(cls,
+ proto, # type: Optional[beam_runner_api_pb2.FunctionSpec]
+ context # type: PipelineContext
+ ):
+ # type: (...) -> Optional[PTransform]
if proto is None or not proto.urn:
return None
parameter_type, constructor = cls._known_urns[proto.urn]
@@ -619,12 +696,16 @@
return RunnerAPIPTransformHolder(proto, context)
raise
- def to_runner_api_parameter(self, unused_context):
+ def to_runner_api_parameter(self,
+ unused_context # type: PipelineContext
+ ):
+ # type: (...) -> Tuple[str, Optional[Union[message.Message, bytes, str]]]
# The payload here is just to ease debugging.
return (python_urns.GENERIC_COMPOSITE_TRANSFORM,
getattr(self, '_fn_api_payload', str(self)))
def to_runner_api_pickled(self, unused_context):
+ # type: (PipelineContext) -> Tuple[str, bytes]
return (python_urns.PICKLED_TRANSFORM,
pickler.dumps(self))
@@ -647,6 +728,7 @@
class _ChainedPTransform(PTransform):
def __init__(self, *parts):
+ # type: (*PTransform) -> None
super(_ChainedPTransform, self).__init__(label=self._chain_label(parts))
self._parts = parts
@@ -678,6 +760,7 @@
"""
def __init__(self, fn, *args, **kwargs):
+ # type: (WithTypeHints, *Any, **Any) -> None
if isinstance(fn, type) and issubclass(fn, WithTypeHints):
# Don't treat Fn class objects as callables.
raise ValueError('Use %s() not %s.' % (fn.__name__, fn.__name__))
diff --git a/sdks/python/apache_beam/transforms/ptransform_test.py b/sdks/python/apache_beam/transforms/ptransform_test.py
index ffb245c..f85a2b9 100644
--- a/sdks/python/apache_beam/transforms/ptransform_test.py
+++ b/sdks/python/apache_beam/transforms/ptransform_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the PTransform and descendants."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -347,14 +349,14 @@
pipeline = TestPipeline()
result = (pipeline
- | 'Start' >> beam.Create([x for x in range(3)])
+ | 'Start' >> beam.Create([1])
| beam.ParDo(MyDoFn())
| WindowInto(windowfn)
| 'create tuple' >> beam.Map(
lambda v, t=beam.DoFn.TimestampParam, w=beam.DoFn.WindowParam:
(v, t, w.start, w.end)))
- expected_process = [('process'+ str(x), Timestamp(5), Timestamp(4),
- Timestamp(6)) for x in range(3)]
+ expected_process = [('process1', Timestamp(5), Timestamp(4),
+ Timestamp(6))]
expected_finish = [('finish', Timestamp(1), Timestamp(0), Timestamp(2))]
assert_that(result, equal_to(expected_process + expected_finish))
@@ -592,7 +594,8 @@
assert_that(result, equal_to(input))
pipeline.run()
- @attr('ValidatesRunner')
+ # TODO(BEAM-9002): Does not work in streaming mode on Dataflow.
+ @attr('ValidatesRunner', 'sickbay-streaming')
def test_flatten_same_pcollections(self):
pipeline = TestPipeline()
pc = pipeline | beam.Create(['a', 'b'])
diff --git a/sdks/python/apache_beam/transforms/py_dataflow_distribution_counter.py b/sdks/python/apache_beam/transforms/py_dataflow_distribution_counter.py
index 980abab..4905b0b 100644
--- a/sdks/python/apache_beam/transforms/py_dataflow_distribution_counter.py
+++ b/sdks/python/apache_beam/transforms/py_dataflow_distribution_counter.py
@@ -17,6 +17,8 @@
"""For internal use only; no backwards-compatibility guarantees."""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
diff --git a/sdks/python/apache_beam/transforms/sideinputs.py b/sdks/python/apache_beam/transforms/sideinputs.py
index 21fc919..8e57ede 100644
--- a/sdks/python/apache_beam/transforms/sideinputs.py
+++ b/sdks/python/apache_beam/transforms/sideinputs.py
@@ -24,23 +24,36 @@
AsSingleton, AsIter, AsList and AsDict in apache_beam.pvalue.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import Dict
from apache_beam.transforms import window
+if TYPE_CHECKING:
+ from apache_beam import pvalue
+
+WindowMappingFn = Callable[[window.BoundedWindow], window.BoundedWindow]
# Top-level function so we can identify it later.
def _global_window_mapping_fn(w, global_window=window.GlobalWindow()):
+ # type: (...) -> window.GlobalWindow
return global_window
def default_window_mapping_fn(target_window_fn):
+ # type: (window.WindowFn) -> WindowMappingFn
if target_window_fn == window.GlobalWindows():
return _global_window_mapping_fn
def map_via_end(source_window):
+ # type: (window.BoundedWindow) -> window.BoundedWindow
return list(target_window_fn.assign(
window.WindowFn.AssignContext(source_window.max_timestamp())))[-1]
@@ -50,15 +63,20 @@
class SideInputMap(object):
"""Represents a mapping of windows to side input values."""
- def __init__(self, view_class, view_options, iterable):
+ def __init__(self,
+ view_class, # type: pvalue.AsSideInput
+ view_options,
+ iterable
+ ):
self._window_mapping_fn = view_options.get(
'window_mapping_fn', _global_window_mapping_fn)
self._view_class = view_class
self._view_options = view_options
self._iterable = iterable
- self._cache = {}
+ self._cache = {} # type: Dict[window.BoundedWindow, Any]
def __getitem__(self, window):
+ # type: (window.BoundedWindow) -> Any
if window not in self._cache:
target_window = self._window_mapping_fn(window)
self._cache[window] = self._view_class._from_runtime_iterable(
@@ -66,6 +84,7 @@
return self._cache[window]
def is_globally_windowed(self):
+ # type: () -> bool
return self._window_mapping_fn == _global_window_mapping_fn
diff --git a/sdks/python/apache_beam/transforms/sideinputs_test.py b/sdks/python/apache_beam/transforms/sideinputs_test.py
index f9c9ae9..3aa87e8 100644
--- a/sdks/python/apache_beam/transforms/sideinputs_test.py
+++ b/sdks/python/apache_beam/transforms/sideinputs_test.py
@@ -17,6 +17,8 @@
"""Unit tests for side inputs."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
@@ -25,10 +27,16 @@
from nose.plugins.attrib import attr
import apache_beam as beam
+from apache_beam.options.pipeline_options import StandardOptions
from apache_beam.testing.test_pipeline import TestPipeline
+from apache_beam.testing.test_stream import TestStream
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
+from apache_beam.testing.util import equal_to_per_window
+from apache_beam.transforms import Map
+from apache_beam.transforms import trigger
from apache_beam.transforms import window
+from apache_beam.utils.timestamp import Timestamp
class SideInputsTest(unittest.TestCase):
@@ -309,6 +317,65 @@
assert_that(results, equal_to(['a', 'b']))
pipeline.run()
+ @attr('ValidatesRunner')
+ def test_multi_triggered_gbk_side_input(self):
+ """Test a GBK sideinput, with multiple triggering."""
+ options = StandardOptions(streaming=True)
+ p = TestPipeline(options=options)
+
+ test_stream = (p
+ | 'Mixed TestStream' >> TestStream()
+ .advance_watermark_to(3, tag='main')
+ .add_elements(['a1'], tag='main')
+ .advance_watermark_to(8, tag='main')
+ .add_elements(['a2'], tag='main')
+ .add_elements([window.TimestampedValue(('k', 100), 2)],
+ tag='side')
+ .add_elements([window.TimestampedValue(('k', 400), 7)],
+ tag='side')
+ .advance_watermark_to_infinity(tag='main')
+ .advance_watermark_to_infinity(tag='side'))
+
+ main_data = (test_stream['main']
+ | 'Main windowInto' >> beam.WindowInto(
+ window.FixedWindows(5),
+ accumulation_mode=trigger.AccumulationMode.DISCARDING))
+
+ side_data = (test_stream['side']
+ | 'Side windowInto' >> beam.WindowInto(
+ window.FixedWindows(5),
+ trigger=trigger.AfterWatermark(
+ early=trigger.AfterCount(1)),
+ accumulation_mode=trigger.AccumulationMode.DISCARDING)
+ | beam.CombinePerKey(sum)
+ | 'Values' >> Map(lambda k_vs: k_vs[1]))
+
+ class RecordFn(beam.DoFn):
+ def process(self,
+ elm=beam.DoFn.ElementParam,
+ ts=beam.DoFn.TimestampParam,
+ side=beam.DoFn.SideInputParam):
+ yield (elm, ts, side)
+
+ records = (main_data
+ | beam.ParDo(RecordFn(), beam.pvalue.AsList(side_data)))
+
+ expected_window_to_elements = {
+ window.IntervalWindow(0, 5): [
+ ('a1', Timestamp(3), [100, 0]),
+ ],
+ window.IntervalWindow(5, 10): [
+ ('a2', Timestamp(8), [400, 0])
+ ],
+ }
+
+ assert_that(
+ records,
+ equal_to_per_window(expected_window_to_elements),
+ use_global_window=False,
+ label='assert per window')
+
+ p.run()
if __name__ == '__main__':
logging.getLogger().setLevel(logging.DEBUG)
diff --git a/sdks/python/apache_beam/transforms/stats.py b/sdks/python/apache_beam/transforms/stats.py
index 5550e48..501d63c 100644
--- a/sdks/python/apache_beam/transforms/stats.py
+++ b/sdks/python/apache_beam/transforms/stats.py
@@ -17,6 +17,8 @@
"""This module has all statistic related transforms."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/transforms/stats_test.py b/sdks/python/apache_beam/transforms/stats_test.py
index 14027fd..550c3f5 100644
--- a/sdks/python/apache_beam/transforms/stats_test.py
+++ b/sdks/python/apache_beam/transforms/stats_test.py
@@ -16,6 +16,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/transforms/timeutil.py b/sdks/python/apache_beam/transforms/timeutil.py
index 88305cb..aaa313d 100644
--- a/sdks/python/apache_beam/transforms/timeutil.py
+++ b/sdks/python/apache_beam/transforms/timeutil.py
@@ -17,6 +17,8 @@
"""Timestamp utilities."""
+# pytype: skip-file
+
from __future__ import absolute_import
from abc import ABCMeta
@@ -59,7 +61,7 @@
return TimeDomain._RUNNER_API_MAPPING[domain]
-class TimestampCombinerImpl(with_metaclass(ABCMeta, object)):
+class TimestampCombinerImpl(with_metaclass(ABCMeta, object)): # type: ignore[misc]
"""Implementation of TimestampCombiner."""
@abstractmethod
@@ -86,7 +88,7 @@
return self.combine_all(merging_timestamps)
-class DependsOnlyOnWindow(with_metaclass(ABCMeta, TimestampCombinerImpl)):
+class DependsOnlyOnWindow(with_metaclass(ABCMeta, TimestampCombinerImpl)): # type: ignore[misc]
"""TimestampCombinerImpl that only depends on the window."""
def merge(self, result_window, unused_merging_timestamps):
diff --git a/sdks/python/apache_beam/transforms/transforms_keyword_only_args_test_py3.py b/sdks/python/apache_beam/transforms/transforms_keyword_only_args_test_py3.py
index 6a3c311..661d6ac 100644
--- a/sdks/python/apache_beam/transforms/transforms_keyword_only_args_test_py3.py
+++ b/sdks/python/apache_beam/transforms/transforms_keyword_only_args_test_py3.py
@@ -17,6 +17,8 @@
"""Unit tests for side inputs."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/transforms/trigger.py b/sdks/python/apache_beam/transforms/trigger.py
index 2a76c2f..d69f056 100644
--- a/sdks/python/apache_beam/transforms/trigger.py
+++ b/sdks/python/apache_beam/transforms/trigger.py
@@ -20,6 +20,8 @@
Triggers control when in processing time windows get emitted.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
@@ -78,7 +80,7 @@
# RETRACTING = 3
-class _StateTag(with_metaclass(ABCMeta, object)):
+class _StateTag(with_metaclass(ABCMeta, object)): # type: ignore[misc]
"""An identifier used to store and retrieve typed, combinable state.
The given tag must be unique for this step."""
@@ -164,7 +166,7 @@
# pylint: disable=unused-argument
# TODO(robertwb): Provisional API, Java likely to change as well.
-class TriggerFn(with_metaclass(ABCMeta, object)):
+class TriggerFn(with_metaclass(ABCMeta, object)): # type: ignore[misc]
"""A TriggerFn determines when window (panes) are emitted.
See https://beam.apache.org/documentation/programming-guide/#triggers
@@ -584,7 +586,7 @@
return self.underlying.has_ontime_pane()
-class _ParallelTriggerFn(with_metaclass(ABCMeta, TriggerFn)):
+class _ParallelTriggerFn(with_metaclass(ABCMeta, TriggerFn)): # type: ignore[misc]
def __init__(self, *triggers):
self.triggers = triggers
@@ -824,7 +826,7 @@
# pylint: disable=unused-argument
-class SimpleState(with_metaclass(ABCMeta, object)):
+class SimpleState(with_metaclass(ABCMeta, object)): # type: ignore[misc]
"""Basic state storage interface used for triggering.
Only timers must hold the watermark (by their timestamp).
@@ -999,7 +1001,7 @@
return driver
-class TriggerDriver(with_metaclass(ABCMeta, object)):
+class TriggerDriver(with_metaclass(ABCMeta, object)): # type: ignore[misc]
"""Breaks a series of bundle and timer firings into window (pane)s."""
@abstractmethod
@@ -1128,6 +1130,7 @@
def __init__(self, windowing, clock):
self.clock = clock
+ self.allowed_lateness = windowing.allowed_lateness
self.window_fn = windowing.windowfn
self.timestamp_combiner_impl = TimestampCombiner.get_impl(
windowing.timestamp_combiner, self.window_fn)
@@ -1147,6 +1150,9 @@
windows_to_elements = collections.defaultdict(list)
for wv in windowed_values:
for window in wv.windows:
+ # ignore expired windows
+ if input_watermark > window.end + self.allowed_lateness:
+ continue
windows_to_elements[window].append((wv.value, wv.timestamp))
# First handle merging.
@@ -1241,7 +1247,6 @@
nonspeculative_index = state.get_state(
window, self.NONSPECULATIVE_INDEX)
state.add_state(window, self.NONSPECULATIVE_INDEX, 1)
- windowed_value.PaneInfoTiming.LATE
_LOGGER.warning('Watermark moved backwards in time '
'or late data moved window end forward.')
else:
diff --git a/sdks/python/apache_beam/transforms/trigger_test.py b/sdks/python/apache_beam/transforms/trigger_test.py
index 58b29e0..8899cf8 100644
--- a/sdks/python/apache_beam/transforms/trigger_test.py
+++ b/sdks/python/apache_beam/transforms/trigger_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the triggering classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
@@ -36,6 +38,7 @@
from apache_beam import coders
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import StandardOptions
+from apache_beam.portability import common_urns
from apache_beam.runners import pipeline_context
from apache_beam.runners.direct.clock import TestClock
from apache_beam.testing.test_pipeline import TestPipeline
@@ -66,6 +69,7 @@
from apache_beam.transforms.window import WindowFn
from apache_beam.utils.timestamp import MAX_TIMESTAMP
from apache_beam.utils.timestamp import MIN_TIMESTAMP
+from apache_beam.utils.timestamp import Duration
from apache_beam.utils.windowed_value import PaneInfoTiming
@@ -118,8 +122,11 @@
bundles, late_bundles,
expected_panes):
actual_panes = collections.defaultdict(list)
+ allowed_lateness = Duration(micros=int(
+ common_urns.constants.MAX_TIMESTAMP_MILLIS.constant)*1000)
driver = GeneralTriggerDriver(
- Windowing(window_fn, trigger_fn, accumulation_mode), TestClock())
+ Windowing(window_fn, trigger_fn, accumulation_mode,
+ allowed_lateness=allowed_lateness), TestClock())
state = InMemoryUnmergedState()
for bundle in bundles:
@@ -422,19 +429,6 @@
class TriggerPipelineTest(unittest.TestCase):
- def setUp(self):
- # Use state on the TestCase class, since other references would be pickled
- # into a closure and not have the desired side effects.
- TriggerPipelineTest.all_records = []
-
- def record_dofn(self):
- class RecordDoFn(beam.DoFn):
-
- def process(self, element):
- TriggerPipelineTest.all_records.append(element)
-
- return RecordDoFn()
-
def test_after_count(self):
with TestPipeline() as p:
def construct_timestamped(k_t):
@@ -471,29 +465,28 @@
if i % 5 == 0:
ts.advance_watermark_to(i)
ts.advance_processing_time(5)
+ ts.advance_watermark_to_infinity()
options = PipelineOptions()
options.view_as(StandardOptions).streaming = True
with TestPipeline(options=options) as p:
- _ = (p
- | ts
- | beam.WindowInto(
- FixedWindows(10),
- accumulation_mode=trigger.AccumulationMode.ACCUMULATING,
- trigger=AfterWatermark(
- early=AfterAll(
- AfterCount(1), AfterProcessingTime(5))
- ))
- | beam.GroupByKey()
- | beam.FlatMap(lambda x: x[1])
- | beam.ParDo(self.record_dofn()))
+ records = (p
+ | ts
+ | beam.WindowInto(
+ FixedWindows(10),
+ accumulation_mode=trigger.AccumulationMode.ACCUMULATING,
+ trigger=AfterWatermark(
+ early=AfterAll(
+ AfterCount(1), AfterProcessingTime(5))
+ ))
+ | beam.GroupByKey()
+ | beam.FlatMap(lambda x: x[1]))
# The trigger should fire twice. Once after 5 seconds, and once after 10.
# The firings should accumulate the output.
first_firing = [str(i) for i in elements if i <= 5]
second_firing = [str(i) for i in elements]
- self.assertListEqual(first_firing + second_firing,
- TriggerPipelineTest.all_records)
+ assert_that(records, equal_to(first_firing + second_firing))
class TranscriptTest(unittest.TestCase):
@@ -604,6 +597,7 @@
timestamp_combiner = getattr(
TimestampCombiner,
spec.get('timestamp_combiner', 'OUTPUT_AT_EOW').upper())
+ allowed_lateness = spec.get('allowed_lateness', 0.000)
def only_element(xs):
x, = list(xs)
@@ -613,7 +607,7 @@
self._execute(
window_fn, trigger_fn, accumulation_mode, timestamp_combiner,
- transcript, spec)
+ allowed_lateness, transcript, spec)
def _windowed_value_info(windowed_value):
@@ -690,11 +684,11 @@
def _execute(
self, window_fn, trigger_fn, accumulation_mode, timestamp_combiner,
- transcript, unused_spec):
+ allowed_lateness, transcript, unused_spec):
driver = GeneralTriggerDriver(
- Windowing(window_fn, trigger_fn, accumulation_mode, timestamp_combiner),
- TestClock())
+ Windowing(window_fn, trigger_fn, accumulation_mode,
+ timestamp_combiner, allowed_lateness), TestClock())
state = InMemoryUnmergedState()
output = []
watermark = MIN_TIMESTAMP
@@ -722,7 +716,8 @@
for t in params]
output = [
_windowed_value_info(wv)
- for wv in driver.process_elements(state, bundle, watermark)]
+ for wv in driver.process_elements(state, bundle, watermark,
+ watermark)]
fire_timers()
elif action == 'watermark':
@@ -756,7 +751,7 @@
def _execute(
self, window_fn, trigger_fn, accumulation_mode, timestamp_combiner,
- transcript, spec):
+ allowed_lateness, transcript, spec):
runner_name = TestPipeline().runner.__class__.__name__
if runner_name in spec.get('broken_on', ()):
@@ -816,6 +811,7 @@
else:
raise ValueError('Unexpected action: %s' % action)
test_stream.add_elements([json.dumps(('expect', []))])
+ test_stream.advance_watermark_to_infinity()
read_test_stream = test_stream | beam.Map(json.loads)
@@ -894,7 +890,8 @@
window_fn,
trigger=trigger_fn,
accumulation_mode=accumulation_mode,
- timestamp_combiner=timestamp_combiner)
+ timestamp_combiner=timestamp_combiner,
+ allowed_lateness=allowed_lateness)
| aggregation
| beam.MapTuple(_windowed_value_info_map_fn)
# Place outputs back into the global window to allow flattening
@@ -934,7 +931,7 @@
def _execute(
self, window_fn, trigger_fn, accumulation_mode, timestamp_combiner,
- transcript, spec):
+ allowed_lateness, transcript, spec):
if timestamp_combiner == TimestampCombiner.OUTPUT_AT_EARLIEST_TRANSFORMED:
self.skipTest(
'Non-fnapi timestamp combiner: %s' % spec.get('timestamp_combiner'))
@@ -984,7 +981,8 @@
window_fn,
trigger=trigger_fn,
accumulation_mode=accumulation_mode,
- timestamp_combiner=timestamp_combiner))
+ timestamp_combiner=timestamp_combiner,
+ allowed_lateness=allowed_lateness))
grouped = input_pc | 'Grouped' >> (
beam.GroupByKey()
diff --git a/sdks/python/apache_beam/transforms/userstate.py b/sdks/python/apache_beam/transforms/userstate.py
index 4d7126e..dd2b296 100644
--- a/sdks/python/apache_beam/transforms/userstate.py
+++ b/sdks/python/apache_beam/transforms/userstate.py
@@ -20,16 +20,32 @@
Experimental; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import types
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import Iterable
+from typing import Optional
+from typing import Set
+from typing import Tuple
+from typing import TypeVar
from apache_beam.coders import Coder
from apache_beam.coders import coders
from apache_beam.portability.api import beam_runner_api_pb2
from apache_beam.transforms.timeutil import TimeDomain
+if TYPE_CHECKING:
+ from apache_beam.runners.pipeline_context import PipelineContext
+ from apache_beam.transforms.core import CombineFn
+
+CallableT = TypeVar('CallableT', bound=Callable)
+
class StateSpec(object):
"""Specification for a user DoFn state cell."""
@@ -48,12 +64,14 @@
"""Specification for a user DoFn bag state cell."""
def __init__(self, name, coder):
+ # type: (str, Coder) -> None
assert isinstance(name, str)
assert isinstance(coder, Coder)
self.name = name
self.coder = coder
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.StateSpec
return beam_runner_api_pb2.StateSpec(
bag_spec=beam_runner_api_pb2.BagStateSpec(
element_coder_id=context.coders.get_id(self.coder)))
@@ -63,6 +81,7 @@
"""Specification for a user DoFn Set State cell"""
def __init__(self, name, coder):
+ # type: (str, Coder) -> None
if not isinstance(name, str):
raise TypeError("SetState name is not a string")
if not isinstance(coder, Coder):
@@ -80,6 +99,7 @@
"""Specification for a user DoFn combining value state cell."""
def __init__(self, name, coder=None, combine_fn=None):
+ # type: (str, Optional[Coder], Any) -> None
"""Initialize the specification for CombiningValue state.
CombiningValueStateSpec(name, combine_fn) -> Coder-inferred combining value
@@ -118,6 +138,7 @@
self.coder = coder
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.StateSpec
return beam_runner_api_pb2.StateSpec(
combining_spec=beam_runner_api_pb2.CombiningStateSpec(
combine_fn=self.combine_fn.to_runner_api(context),
@@ -138,6 +159,7 @@
return '%s(%s)' % (self.__class__.__name__, self.name)
def to_runner_api(self, context):
+ # type: (PipelineContext) -> beam_runner_api_pb2.TimerSpec
return beam_runner_api_pb2.TimerSpec(
time_domain=TimeDomain.to_runner_api(self.time_domain),
timer_coder_id=context.coders.get_id(
@@ -145,6 +167,7 @@
def on_timer(timer_spec):
+ # type: (TimerSpec) -> Callable[[CallableT], CallableT]
"""Decorator for timer firing DoFn method.
This decorator allows a user to specify an on_timer processing method
@@ -174,6 +197,7 @@
def get_dofn_specs(dofn):
+ # type: (...) -> Tuple[Set[StateSpec], Set[TimerSpec]]
"""Gets the state and timer specs for a DoFn, if any.
Args:
@@ -274,12 +298,19 @@
class AccumulatingRuntimeState(RuntimeState):
def read(self):
+ # type: () -> Iterable[Any]
raise NotImplementedError(type(self))
def add(self, value):
+ # type: (Any) -> None
raise NotImplementedError(type(self))
def clear(self):
+ # type: () -> None
+ raise NotImplementedError(type(self))
+
+ def commit(self):
+ # type: () -> None
raise NotImplementedError(type(self))
diff --git a/sdks/python/apache_beam/transforms/userstate_test.py b/sdks/python/apache_beam/transforms/userstate_test.py
index 601a1d4..8c1ace0 100644
--- a/sdks/python/apache_beam/transforms/userstate_test.py
+++ b/sdks/python/apache_beam/transforms/userstate_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the Beam State and Timer API interfaces."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/transforms/util.py b/sdks/python/apache_beam/transforms/util.py
index ef4404a..a29ad68 100644
--- a/sdks/python/apache_beam/transforms/util.py
+++ b/sdks/python/apache_beam/transforms/util.py
@@ -18,6 +18,8 @@
"""Simple utility PTransforms.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -27,12 +29,18 @@
import re
import sys
import time
-import typing
import warnings
from builtins import filter
from builtins import object
from builtins import range
from builtins import zip
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Iterable
+from typing import List
+from typing import Tuple
+from typing import TypeVar
+from typing import Union
from future.utils import itervalues
from past.builtins import long
@@ -67,6 +75,10 @@
from apache_beam.utils.annotations import deprecated
from apache_beam.utils.annotations import experimental
+if TYPE_CHECKING:
+ from apache_beam import pvalue
+ from apache_beam.runners.pipeline_context import PipelineContext
+
__all__ = [
'BatchElements',
'CoGroupByKey',
@@ -83,9 +95,9 @@
'GroupIntoBatches'
]
-K = typing.TypeVar('K')
-V = typing.TypeVar('V')
-T = typing.TypeVar('T')
+K = TypeVar('K')
+V = TypeVar('V')
+T = TypeVar('T')
class CoGroupByKey(PTransform):
@@ -512,7 +524,7 @@
@typehints.with_input_types(T)
-@typehints.with_output_types(typing.List[T])
+@typehints.with_output_types(List[T])
class BatchElements(PTransform):
"""A Transform that batches elements for amortized processing.
@@ -605,8 +617,8 @@
return self._window_coder
-@typehints.with_input_types(typing.Tuple[K, V])
-@typehints.with_output_types(typing.Tuple[K, V])
+@typehints.with_input_types(Tuple[K, V])
+@typehints.with_output_types(Tuple[K, V])
class ReshufflePerKey(PTransform):
"""PTransform that returns a PCollection equivalent to its input,
but operationally provides some of the side effects of a GroupByKey,
@@ -650,7 +662,7 @@
key, windowed_values = element
return [wv.with_value((key, wv.value)) for wv in windowed_values]
- ungrouped = pcoll | Map(reify_timestamps).with_output_types(typing.Any)
+ ungrouped = pcoll | Map(reify_timestamps).with_output_types(Any)
# TODO(BEAM-8104) Using global window as one of the standard window.
# This is to mitigate the Dataflow Java Runner Harness limitation to
@@ -662,7 +674,7 @@
timestamp_combiner=TimestampCombiner.OUTPUT_AT_EARLIEST)
result = (ungrouped
| GroupByKey()
- | FlatMap(restore_timestamps).with_output_types(typing.Any))
+ | FlatMap(restore_timestamps).with_output_types(Any))
result._windowing = windowing_saved
return result
@@ -682,10 +694,11 @@
"""
def expand(self, pcoll):
+ # type: (pvalue.PValue) -> pvalue.PCollection
if sys.version_info >= (3,):
- KeyedT = typing.Tuple[int, T]
+ KeyedT = Tuple[int, T]
else:
- KeyedT = typing.Tuple[long, T] # pylint: disable=long-builtin
+ KeyedT = Tuple[long, T] # pylint: disable=long-builtin
return (pcoll
| 'AddRandomKeys' >> Map(lambda t: (random.getrandbits(32), t))
.with_input_types(T).with_output_types(KeyedT)
@@ -694,6 +707,7 @@
.with_input_types(KeyedT).with_output_types(T))
def to_runner_api_parameter(self, unused_context):
+ # type: (PipelineContext) -> Tuple[str, None]
return common_urns.composites.RESHUFFLE.urn, None
@PTransform.register_urn(common_urns.composites.RESHUFFLE.urn, None)
@@ -714,7 +728,7 @@
@experimental()
-@typehints.with_input_types(typing.Tuple[K, V])
+@typehints.with_input_types(Tuple[K, V])
class GroupIntoBatches(PTransform):
"""PTransform that batches the input into desired batch size. Elements are
buffered until they are equal to batch size provided in the argument at which
@@ -794,11 +808,11 @@
self.delimiter = delimiter or ","
def expand(self, pcoll):
- input_type = typing.Tuple[typing.Any, typing.Any]
+ input_type = Tuple[Any, Any]
output_type = str
return (pcoll | ('%s:KeyVaueToString' % self.label >> (Map(
lambda x: "{}{}{}".format(x[0], self.delimiter, x[1])))
- .with_input_types(input_type)
+ .with_input_types(input_type) # type: ignore[misc]
.with_output_types(output_type)))
class Element(PTransform):
@@ -824,7 +838,7 @@
self.delimiter = delimiter or ","
def expand(self, pcoll):
- input_type = typing.Iterable[typing.Any]
+ input_type = Iterable[Any]
output_type = str
return (pcoll | ('%s:IterablesToString' % self.label >> (
Map(lambda x: self.delimiter.join(str(_x) for _x in x)))
@@ -864,8 +878,8 @@
def expand(self, pcoll):
return pcoll | ParDo(self.add_window_info)
- @typehints.with_input_types(typing.Tuple[K, V])
- @typehints.with_output_types(typing.Tuple[K, V])
+ @typehints.with_input_types(Tuple[K, V])
+ @typehints.with_output_types(Tuple[K, V])
class TimestampInValue(PTransform):
"""PTransform to wrap the Value in a KV pair in a TimestampedValue with
the element's associated timestamp."""
@@ -878,8 +892,8 @@
def expand(self, pcoll):
return pcoll | ParDo(self.add_timestamp_info)
- @typehints.with_input_types(typing.Tuple[K, V])
- @typehints.with_output_types(typing.Tuple[K, V])
+ @typehints.with_input_types(Tuple[K, V])
+ @typehints.with_output_types(Tuple[K, V])
class WindowInValue(PTransform):
"""PTransform to convert the Value in a KV pair into a tuple of
(value, timestamp, window), with the whole element being wrapped inside a
@@ -938,7 +952,7 @@
@staticmethod
@typehints.with_input_types(str)
- @typehints.with_output_types(typing.List[str])
+ @typehints.with_output_types(List[str])
@ptransform_fn
def all_matches(pcoll, regex):
"""
@@ -959,7 +973,7 @@
@staticmethod
@typehints.with_input_types(str)
- @typehints.with_output_types(typing.Tuple[str, str])
+ @typehints.with_output_types(Tuple[str, str])
@ptransform_fn
def matches_kv(pcoll, regex, keyGroup, valueGroup=0):
"""
@@ -1004,8 +1018,7 @@
@staticmethod
@typehints.with_input_types(str)
- @typehints.with_output_types(typing.Union[typing.List[str],
- typing.Tuple[str, str]])
+ @typehints.with_output_types(Union[List[str], Tuple[str, str]])
@ptransform_fn
def find_all(pcoll, regex, group=0, outputEmpty=True):
"""
@@ -1033,7 +1046,7 @@
@staticmethod
@typehints.with_input_types(str)
- @typehints.with_output_types(typing.Tuple[str, str])
+ @typehints.with_output_types(Tuple[str, str])
@ptransform_fn
def find_kv(pcoll, regex, keyGroup, valueGroup=0):
"""
@@ -1090,7 +1103,7 @@
@staticmethod
@typehints.with_input_types(str)
- @typehints.with_output_types(typing.List[str])
+ @typehints.with_output_types(List[str])
@ptransform_fn
def split(pcoll, regex, outputEmpty=False):
"""
diff --git a/sdks/python/apache_beam/transforms/util_test.py b/sdks/python/apache_beam/transforms/util_test.py
index 58cf243..d7290ce 100644
--- a/sdks/python/apache_beam/transforms/util_test.py
+++ b/sdks/python/apache_beam/transforms/util_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the transform.util classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -483,7 +485,8 @@
label='after reshuffle')
pipeline.run()
- @attr('ValidatesRunner')
+ # TODO(BEAM-9003): Does not work in streaming mode on Dataflow.
+ @attr('ValidatesRunner', 'sickbay-streaming')
def test_reshuffle_preserves_timestamps(self):
with TestPipeline() as pipeline:
@@ -606,7 +609,7 @@
.advance_watermark_to(start_time +
GroupIntoBatchesTest.NUM_ELEMENTS)
.advance_watermark_to_infinity())
- pipeline = TestPipeline()
+ pipeline = TestPipeline(options=StandardOptions(streaming=True))
# window duration is 6 and batch size is 5, so output batch size should be
# 5 (flush because of batchSize reached)
expected_0 = 5
diff --git a/sdks/python/apache_beam/transforms/window.py b/sdks/python/apache_beam/transforms/window.py
index cfbbae1..b79ed20 100644
--- a/sdks/python/apache_beam/transforms/window.py
+++ b/sdks/python/apache_beam/transforms/window.py
@@ -47,12 +47,17 @@
WindowFn.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import abc
from builtins import object
from builtins import range
from functools import total_ordering
+from typing import Any
+from typing import Iterable
+from typing import List
from future.utils import with_metaclass
from google.protobuf import duration_pb2
@@ -69,7 +74,9 @@
from apache_beam.utils import windowed_value
from apache_beam.utils.timestamp import MIN_TIMESTAMP
from apache_beam.utils.timestamp import Duration
+from apache_beam.utils.timestamp import DurationTypes # pylint: disable=unused-import
from apache_beam.utils.timestamp import Timestamp
+from apache_beam.utils.timestamp import TimestampTypes # pylint: disable=unused-import
from apache_beam.utils.windowed_value import WindowedValue
__all__ = [
@@ -112,19 +119,24 @@
raise ValueError('Invalid TimestampCombiner: %s.' % timestamp_combiner)
-class WindowFn(with_metaclass(abc.ABCMeta, urns.RunnerApiFn)):
+class WindowFn(with_metaclass(abc.ABCMeta, urns.RunnerApiFn)): # type: ignore[misc]
"""An abstract windowing function defining a basic assign and merge."""
class AssignContext(object):
"""Context passed to WindowFn.assign()."""
- def __init__(self, timestamp, element=None, window=None):
+ def __init__(self,
+ timestamp, # type: TimestampTypes
+ element=None,
+ window=None
+ ):
self.timestamp = Timestamp.of(timestamp)
self.element = element
self.window = window
@abc.abstractmethod
def assign(self, assign_context):
+ # type: (AssignContext) -> Iterable[BoundedWindow]
"""Associates windows to an element.
Arguments:
@@ -139,6 +151,7 @@
"""Context passed to WindowFn.merge() to perform merging, if any."""
def __init__(self, windows):
+ # type: (Iterable[BoundedWindow]) -> None
self.windows = list(windows)
def merge(self, to_be_merged, merge_result):
@@ -146,6 +159,7 @@
@abc.abstractmethod
def merge(self, merge_context):
+ # type: (WindowFn.MergeContext) -> None
"""Returns a window that is the result of merging a set of windows."""
raise NotImplementedError
@@ -187,7 +201,18 @@
"""
def __init__(self, end):
- self.end = Timestamp.of(end)
+ # type: (TimestampTypes) -> None
+ self._end = Timestamp.of(end)
+
+ @property
+ def start(self):
+ # type: () -> Timestamp
+ raise NotImplementedError
+
+ @property
+ def end(self):
+ # type: () -> Timestamp
+ return self._end
def max_timestamp(self):
return self.end.predecessor()
@@ -257,6 +282,7 @@
"""
def __init__(self, value, timestamp):
+ # type: (Any, TimestampTypes) -> None
self.value = value
self.timestamp = Timestamp.of(timestamp)
@@ -290,7 +316,6 @@
def __init__(self):
super(GlobalWindow, self).__init__(GlobalWindow._getTimestampFromProto())
- self.start = MIN_TIMESTAMP
def __repr__(self):
return 'GlobalWindow'
@@ -305,6 +330,11 @@
def __ne__(self, other):
return not self == other
+ @property
+ def start(self):
+ # type: () -> Timestamp
+ return MIN_TIMESTAMP
+
@staticmethod
def _getTimestampFromProto():
ts_millis = int(
@@ -318,6 +348,7 @@
return False
def merge(self, merge_context):
+ # type: (WindowFn.MergeContext) -> None
pass # No merging.
@@ -371,7 +402,10 @@
range.
"""
- def __init__(self, size, offset=0):
+ def __init__(self,
+ size, # type: DurationTypes
+ offset=0 # type: TimestampTypes
+ ):
"""Initialize a ``FixedWindows`` function for a given size and offset.
Args:
@@ -436,7 +470,11 @@
in range [0, period). If it is not it will be normalized to this range.
"""
- def __init__(self, size, period, offset=0):
+ def __init__(self,
+ size, # type: DurationTypes
+ period, # type: DurationTypes
+ offset=0, # type: TimestampTypes
+ ):
if size <= 0:
raise ValueError('The size parameter must be strictly positive.')
self.size = Duration.of(size)
@@ -497,6 +535,7 @@
"""
def __init__(self, gap_size):
+ # type: (DurationTypes) -> None
if gap_size <= 0:
raise ValueError('The size parameter must be strictly positive.')
self.gap_size = Duration.of(gap_size)
@@ -509,7 +548,8 @@
return coders.IntervalWindowCoder()
def merge(self, merge_context):
- to_merge = []
+ # type: (WindowFn.MergeContext) -> None
+ to_merge = [] # type: List[BoundedWindow]
end = MIN_TIMESTAMP
for w in sorted(merge_context.windows, key=lambda w: w.start):
if to_merge:
diff --git a/sdks/python/apache_beam/transforms/window_test.py b/sdks/python/apache_beam/transforms/window_test.py
index 30430cc..0c88e21 100644
--- a/sdks/python/apache_beam/transforms/window_test.py
+++ b/sdks/python/apache_beam/transforms/window_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the windowing classes."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
diff --git a/sdks/python/apache_beam/transforms/write_ptransform_test.py b/sdks/python/apache_beam/transforms/write_ptransform_test.py
index a8f56fd..5d3e7a9 100644
--- a/sdks/python/apache_beam/transforms/write_ptransform_test.py
+++ b/sdks/python/apache_beam/transforms/write_ptransform_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the write transform."""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/typehints/decorators.py b/sdks/python/apache_beam/typehints/decorators.py
index d3c073b..81ca4b2 100644
--- a/sdks/python/apache_beam/typehints/decorators.py
+++ b/sdks/python/apache_beam/typehints/decorators.py
@@ -83,6 +83,8 @@
defined, or before importing a module containing type-hinted functions.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import inspect
@@ -92,9 +94,16 @@
from builtins import next
from builtins import object
from builtins import zip
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import Optional
+from typing import Tuple
+from typing import TypeVar
from apache_beam.typehints import native_type_compatibility
from apache_beam.typehints import typehints
+from apache_beam.typehints.native_type_compatibility import convert_to_beam_type
from apache_beam.typehints.typehints import CompositeTypeHintError
from apache_beam.typehints.typehints import SimpleTypeHintError
from apache_beam.typehints.typehints import check_constraint
@@ -113,6 +122,9 @@
'TypeCheckError',
]
+T = TypeVar('T')
+WithTypeHintsT = TypeVar('WithTypeHintsT', bound='WithTypeHints') # pylint: disable=invalid-name
+
# This is missing in the builtin types module. str.upper is arbitrary, any
# method on a C-implemented type will do.
# pylint: disable=invalid-name
@@ -128,7 +140,7 @@
_original_getfullargspec = inspect.getfullargspec
_use_full_argspec = True
except AttributeError: # Python 2
- _original_getfullargspec = inspect.getargspec
+ _original_getfullargspec = inspect.getargspec # type: ignore
_use_full_argspec = False
@@ -219,7 +231,10 @@
"""
__slots__ = ('input_types', 'output_types')
- def __init__(self, input_types=None, output_types=None):
+ def __init__(self,
+ input_types=None, # type: Optional[Tuple[Tuple[Any, ...], Dict[str, Any]]]
+ output_types=None # type: Optional[Tuple[Tuple[Any, ...], Dict[str, Any]]]
+ ):
self.input_types = input_types
self.output_types = output_types
@@ -254,16 +269,16 @@
input_args.append(typehints.Any)
else:
if param.kind in [param.KEYWORD_ONLY, param.VAR_KEYWORD]:
- input_kwargs[param.name] = param.annotation
+ input_kwargs[param.name] = convert_to_beam_type(param.annotation)
else:
assert param.kind in [param.POSITIONAL_ONLY,
param.POSITIONAL_OR_KEYWORD,
param.VAR_POSITIONAL], \
'Unsupported Parameter kind: %s' % param.kind
- input_args.append(param.annotation)
+ input_args.append(convert_to_beam_type(param.annotation))
output_args = []
if signature.return_annotation != signature.empty:
- output_args.append(signature.return_annotation)
+ output_args.append(convert_to_beam_type(signature.return_annotation))
else:
output_args.append(typehints.Any)
@@ -312,9 +327,11 @@
return res
def copy(self):
+ # type: () -> IOTypeHints
return IOTypeHints(self.input_types, self.output_types)
def with_defaults(self, hints):
+ # type: (Optional[IOTypeHints]) -> IOTypeHints
if not hints:
return self
if self._has_input_types():
@@ -365,6 +382,7 @@
self._type_hints = IOTypeHints()
def _get_or_create_type_hints(self):
+ # type: () -> IOTypeHints
# __init__ may have not been called
try:
# Only return an instance bound to self (see BEAM-8629).
@@ -389,12 +407,14 @@
return None
def with_input_types(self, *arg_hints, **kwarg_hints):
+ # type: (WithTypeHintsT, *Any, **Any) -> WithTypeHintsT
arg_hints = native_type_compatibility.convert_to_beam_types(arg_hints)
kwarg_hints = native_type_compatibility.convert_to_beam_types(kwarg_hints)
self._get_or_create_type_hints().set_input_types(*arg_hints, **kwarg_hints)
return self
def with_output_types(self, *arg_hints, **kwarg_hints):
+ # type: (WithTypeHintsT, *Any, **Any) -> WithTypeHintsT
arg_hints = native_type_compatibility.convert_to_beam_types(arg_hints)
kwarg_hints = native_type_compatibility.convert_to_beam_types(kwarg_hints)
self._get_or_create_type_hints().set_output_types(*arg_hints, **kwarg_hints)
@@ -591,6 +611,7 @@
def get_type_hints(fn):
+ # type: (Any) -> IOTypeHints
"""Gets the type hint associated with an arbitrary object fn.
Always returns a valid IOTypeHints object, creating one if necessary.
@@ -605,13 +626,14 @@
hints = IOTypeHints()
# Python 3.7 introduces annotations for _MethodDescriptorTypes.
if isinstance(fn, _MethodDescriptorType) and sys.version_info < (3, 7):
- hints.set_input_types(fn.__objclass__)
+ hints.set_input_types(fn.__objclass__) # type: ignore
return hints
return fn._type_hints
# pylint: enable=protected-access
def with_input_types(*positional_hints, **keyword_hints):
+ # type: (*Any, **Any) -> Callable[[T], T]
"""A decorator that type-checks defined type-hints with passed func arguments.
All type-hinted arguments can be specified using positional arguments,
@@ -694,6 +716,7 @@
def with_output_types(*return_type_hint, **kwargs):
+ # type: (*Any, **Any) -> Callable[[T], T]
"""A decorator that type-checks defined type-hints for return values(s).
This decorator will type-check the return value(s) of the decorated function.
diff --git a/sdks/python/apache_beam/typehints/decorators_test.py b/sdks/python/apache_beam/typehints/decorators_test.py
index 21f62f3..1edbae3 100644
--- a/sdks/python/apache_beam/typehints/decorators_test.py
+++ b/sdks/python/apache_beam/typehints/decorators_test.py
@@ -17,6 +17,8 @@
"""Tests for decorators module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
@@ -26,6 +28,7 @@
from apache_beam.typehints import List
from apache_beam.typehints import WithTypeHints
from apache_beam.typehints import decorators
+from apache_beam.typehints import typehints
decorators._enable_from_callable = True
@@ -72,6 +75,41 @@
{'__unknown__keywords': decorators._ANY_VAR_KEYWORD}))
self.assertEqual(th.output_types, ((Any,), {}))
+ def test_strip_iterable_not_simple_output_noop(self):
+ th = decorators.IOTypeHints(output_types=((int, str), {}))
+ th.strip_iterable()
+ self.assertEqual(((int, str), {}), th.output_types)
+
+ def _test_strip_iterable(self, before, expected_after):
+ after = decorators.IOTypeHints(
+ output_types=((before,), {})).strip_iterable()
+ self.assertEqual(((expected_after, ), {}), after.output_types)
+
+ def _test_strip_iterable_fail(self, before):
+ with self.assertRaisesRegex(ValueError, r'not iterable'):
+ self._test_strip_iterable(before, None)
+
+ def test_strip_iterable(self):
+ # TODO(BEAM-8492): Uncomment once #9895 is merged.
+ # self._test_strip_iterable(None, None)
+ self._test_strip_iterable(typehints.Any, typehints.Any)
+ self._test_strip_iterable(typehints.Iterable[str], str)
+ self._test_strip_iterable(typehints.List[str], str)
+ self._test_strip_iterable(typehints.Iterator[str], str)
+ self._test_strip_iterable(typehints.Generator[str], str)
+ self._test_strip_iterable(typehints.Tuple[str], str)
+ self._test_strip_iterable(typehints.Tuple[str, int],
+ typehints.Union[str, int])
+ self._test_strip_iterable(typehints.Tuple[str, ...], str)
+ self._test_strip_iterable(typehints.KV[str, int],
+ typehints.Union[str, int])
+ self._test_strip_iterable(typehints.Set[str], str)
+
+ self._test_strip_iterable_fail(typehints.Union[str, int])
+ self._test_strip_iterable_fail(typehints.Optional[str])
+ self._test_strip_iterable_fail(typehints.WindowedValue[str])
+ self._test_strip_iterable_fail(typehints.Dict[str, int])
+
class WithTypeHintsTest(unittest.TestCase):
def test_get_type_hints_no_settings(self):
@@ -133,8 +171,8 @@
pass # intentionally avoiding super call
# These should be equal, but not the same object lest mutating the instance
# mutates the class.
- self.assertFalse(
- Subclass()._get_or_create_type_hints() is Subclass._type_hints)
+ self.assertIsNot(
+ Subclass()._get_or_create_type_hints(), Subclass._type_hints)
self.assertEqual(
Subclass().get_type_hints(), Subclass._type_hints)
self.assertNotEqual(
diff --git a/sdks/python/apache_beam/typehints/decorators_test_py3.py b/sdks/python/apache_beam/typehints/decorators_test_py3.py
index d48f845..647a4fa 100644
--- a/sdks/python/apache_beam/typehints/decorators_test_py3.py
+++ b/sdks/python/apache_beam/typehints/decorators_test_py3.py
@@ -17,8 +17,11 @@
"""Tests for decorators module with Python 3 syntax not supported by 2.7."""
+# pytype: skip-file
+
from __future__ import absolute_import
+import typing
import unittest
# patches unittest.TestCase to be python3 compatible
@@ -33,18 +36,20 @@
decorators._enable_from_callable = True
T = TypeVariable('T')
+# Name is 'T' so it converts to a beam type with the same name.
+T_typing = typing.TypeVar('T')
class IOTypeHintsTest(unittest.TestCase):
def test_from_callable(self):
def fn(a: int, b: str = None, *args: Tuple[T], foo: List[int],
- **kwargs: Dict[str, str]) -> Tuple:
+ **kwargs: Dict[str, str]) -> Tuple[Any, ...]:
return a, b, args, foo, kwargs
th = decorators.IOTypeHints.from_callable(fn)
self.assertEqual(th.input_types, (
(int, str, Tuple[T]), {'foo': List[int], 'kwargs': Dict[str, str]}))
- self.assertEqual(th.output_types, ((Tuple,), {}))
+ self.assertEqual(th.output_types, ((Tuple[Any, ...],), {}))
def test_from_callable_partial_annotations(self):
def fn(a: int, b=None, *args, foo: List[int], **kwargs):
@@ -77,9 +82,22 @@
self.assertEqual(th.input_types, ((T,), {}))
self.assertEqual(th.output_types, ((None,), {}))
+ def test_from_callable_convert_to_beam_types(self):
+ def fn(a: typing.List[int],
+ b: str = None,
+ *args: typing.Tuple[T_typing],
+ foo: typing.List[int],
+ **kwargs: typing.Dict[str, str]) -> typing.Tuple[typing.Any, ...]:
+ return a, b, args, foo, kwargs
+ th = decorators.IOTypeHints.from_callable(fn)
+ self.assertEqual(th.input_types, (
+ (List[int], str, Tuple[T]),
+ {'foo': List[int], 'kwargs': Dict[str, str]}))
+ self.assertEqual(th.output_types, ((Tuple[Any, ...],), {}))
+
def test_getcallargs_forhints(self):
def fn(a: int, b: str = None, *args: Tuple[T], foo: List[int],
- **kwargs: Dict[str, str]) -> Tuple:
+ **kwargs: Dict[str, str]) -> Tuple[Any, ...]:
return a, b, args, foo, kwargs
callargs = decorators.getcallargs_forhints(fn, float, foo=List[str])
self.assertDictEqual(callargs,
@@ -91,7 +109,7 @@
def test_getcallargs_forhints_default_arg(self):
# Default args are not necessarily types, so they should be ignored.
- def fn(a=List[int], b=None, *args, foo=(), **kwargs) -> Tuple:
+ def fn(a=List[int], b=None, *args, foo=(), **kwargs) -> Tuple[Any, ...]:
return a, b, args, foo, kwargs
callargs = decorators.getcallargs_forhints(fn)
self.assertDictEqual(callargs,
diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility.py b/sdks/python/apache_beam/typehints/native_type_compatibility.py
index d73a1cf..d7e45a1 100644
--- a/sdks/python/apache_beam/typehints/native_type_compatibility.py
+++ b/sdks/python/apache_beam/typehints/native_type_compatibility.py
@@ -17,9 +17,12 @@
"""Module to convert Python's native typing types to Beam types."""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
+import logging
import sys
import typing
from builtins import next
@@ -27,6 +30,8 @@
from apache_beam.typehints import typehints
+_LOGGER = logging.getLogger(__name__)
+
# Describes an entry in the type map in convert_to_beam_type.
# match is a function that takes a user type and returns whether the conversion
# should trigger.
@@ -44,7 +49,10 @@
# __union_params__ argument respectively.
if (3, 0, 0) <= sys.version_info[0:3] < (3, 5, 3):
if getattr(typ, '__tuple_params__', None) is not None:
- return typ.__tuple_params__
+ if typ.__tuple_use_ellipsis__:
+ return typ.__tuple_params__ + (Ellipsis,)
+ else:
+ return typ.__tuple_params__
elif getattr(typ, '__union_params__', None) is not None:
return typ.__union_params__
return None
@@ -176,14 +184,15 @@
# Mapping from typing.TypeVar/typehints.TypeVariable ids to an object of the
# other type. Bidirectional mapping preserves typing.TypeVar instances.
-_type_var_cache = {}
+_type_var_cache = {} # type: typing.Dict[int, typehints.TypeVariable]
def convert_to_beam_type(typ):
"""Convert a given typing type to a Beam type.
Args:
- typ (type): typing type.
+ typ (`typing.Union[type, str]`): typing type or string literal representing
+ a type.
Returns:
type: The given type converted to a Beam type as far as we can do the
@@ -203,6 +212,11 @@
_type_var_cache[id(typ)] = new_type_variable
_type_var_cache[id(new_type_variable)] = typ
return _type_var_cache[id(typ)]
+ elif isinstance(typ, str):
+ # Special case for forward references.
+ # TODO(BEAM-8487): Currently unhandled.
+ _LOGGER.info('Converting string literal type hint to Any: "%s"', typ)
+ return typehints.Any
elif getattr(typ, '__module__', None) != 'typing':
# Only translate types from the typing module.
return typ
@@ -251,8 +265,9 @@
# Find the first matching entry.
matched_entry = next((entry for entry in type_map if entry.match(typ)), None)
if not matched_entry:
- # No match: return original type.
- return typ
+ # Please add missing type support if you see this message.
+ _LOGGER.info('Using Any for unsupported type: %s', typ)
+ return typehints.Any
if matched_entry.arity == -1:
arity = _len_arg(typ)
diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py
index bca9d50..969f9c7 100644
--- a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py
+++ b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py
@@ -17,6 +17,8 @@
"""Test for Beam type compatibility library."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
@@ -69,7 +71,7 @@
('arbitrary-length tuple', typing.Tuple[int, ...],
typehints.Tuple[int, ...])
if sys.version_info >= (3, 5, 4) else None,
- ('flat alias', _TestFlatAlias, typehints.Tuple[bytes, float]),
+ ('flat alias', _TestFlatAlias, typehints.Tuple[bytes, float]), # type: ignore[misc]
('nested alias', _TestNestedAlias,
typehints.List[typehints.Tuple[bytes, float]]),
('complex dict',
@@ -102,6 +104,11 @@
typehints.Iterator[int],
convert_to_beam_type(typing.Generator[int, None, None]))
+ def test_string_literal_converted_to_any(self):
+ self.assertEqual(
+ typehints.Any,
+ convert_to_beam_type('typing.List[int]'))
+
def test_convert_nested_to_beam_type(self):
self.assertEqual(
typehints.List[typing.Any],
diff --git a/sdks/python/apache_beam/typehints/opcodes.py b/sdks/python/apache_beam/typehints/opcodes.py
index 6aa5de2..8061470 100644
--- a/sdks/python/apache_beam/typehints/opcodes.py
+++ b/sdks/python/apache_beam/typehints/opcodes.py
@@ -26,6 +26,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import inspect
diff --git a/sdks/python/apache_beam/typehints/schemas.py b/sdks/python/apache_beam/typehints/schemas.py
index 812cbe1..eae83cd 100644
--- a/sdks/python/apache_beam/typehints/schemas.py
+++ b/sdks/python/apache_beam/typehints/schemas.py
@@ -41,6 +41,8 @@
ByteString <-----> BYTES
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
diff --git a/sdks/python/apache_beam/typehints/schemas_test.py b/sdks/python/apache_beam/typehints/schemas_test.py
index 9dd1bc2..b8916d8 100644
--- a/sdks/python/apache_beam/typehints/schemas_test.py
+++ b/sdks/python/apache_beam/typehints/schemas_test.py
@@ -16,6 +16,8 @@
#
"""Tests for schemas."""
+# pytype: skip-file
+
from __future__ import absolute_import
import itertools
diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py
index c67cb7b..c5502d0 100644
--- a/sdks/python/apache_beam/typehints/trivial_inference.py
+++ b/sdks/python/apache_beam/typehints/trivial_inference.py
@@ -19,6 +19,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
@@ -40,7 +42,7 @@
try: # Python 2
import __builtin__ as builtins
except ImportError: # Python 3
- import builtins
+ import builtins # type: ignore
# pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
diff --git a/sdks/python/apache_beam/typehints/trivial_inference_test.py b/sdks/python/apache_beam/typehints/trivial_inference_test.py
index ff0949b..4fdca82 100644
--- a/sdks/python/apache_beam/typehints/trivial_inference_test.py
+++ b/sdks/python/apache_beam/typehints/trivial_inference_test.py
@@ -17,6 +17,8 @@
"""Tests for apache_beam.typehints.trivial_inference."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
diff --git a/sdks/python/apache_beam/typehints/trivial_inference_test_py3.py b/sdks/python/apache_beam/typehints/trivial_inference_test_py3.py
index 291e52e..9c368a7 100644
--- a/sdks/python/apache_beam/typehints/trivial_inference_test_py3.py
+++ b/sdks/python/apache_beam/typehints/trivial_inference_test_py3.py
@@ -18,6 +18,8 @@
"""Tests for apache_beam.typehints.trivial_inference that use Python 3 syntax.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/typehints/typecheck.py b/sdks/python/apache_beam/typehints/typecheck.py
index e9187f0..09b0cb2 100644
--- a/sdks/python/apache_beam/typehints/typecheck.py
+++ b/sdks/python/apache_beam/typehints/typecheck.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
diff --git a/sdks/python/apache_beam/typehints/typed_pipeline_test.py b/sdks/python/apache_beam/typehints/typed_pipeline_test.py
index 9726064..fafd386 100644
--- a/sdks/python/apache_beam/typehints/typed_pipeline_test.py
+++ b/sdks/python/apache_beam/typehints/typed_pipeline_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the type-hint objects and decorators."""
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
diff --git a/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py b/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py
index e49b11c..81fa5c1 100644
--- a/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py
+++ b/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py
@@ -18,6 +18,8 @@
"""Unit tests for type-hint objects and decorators - Python 3 syntax specific.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
@@ -147,7 +149,7 @@
r'requires.*str.*got.*int.*side_input'):
_ = [1, 2, 3] | beam.ParDo(my_do_fn, side_input=1)
- def test_type_dofn_var_kwargs(self):
+ def test_typed_dofn_var_kwargs(self):
class MyDoFn(beam.DoFn):
def process(self, element: int, **side_inputs: typehints.Dict[str, str]) \
-> typehints.Generator[typehints.Optional[int]]:
@@ -161,6 +163,21 @@
r'requires.*str.*got.*int.*side_inputs'):
_ = [1, 2, 3] | beam.ParDo(my_do_fn, a=1)
+ def test_typed_callable_string_literals(self):
+ def do_fn(element: 'int') -> 'typehints.List[str]':
+ return [[str(element)] * 2]
+
+ result = [1, 2] | beam.ParDo(do_fn)
+ self.assertEqual([['1', '1'], ['2', '2']], sorted(result))
+
+ def test_typed_dofn_string_literals(self):
+ class MyDoFn(beam.DoFn):
+ def process(self, element: 'int') -> 'typehints.List[str]':
+ return [[str(element)] * 2]
+
+ result = [1, 2] | beam.ParDo(MyDoFn())
+ self.assertEqual([['1', '1'], ['2', '2']], sorted(result))
+
class AnnotationsTest(unittest.TestCase):
diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py
index b64e020..78bff46 100644
--- a/sdks/python/apache_beam/typehints/typehints.py
+++ b/sdks/python/apache_beam/typehints/typehints.py
@@ -63,6 +63,8 @@
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import collections
@@ -70,6 +72,7 @@
import logging
import sys
import types
+import typing
from builtins import next
from builtins import zip
@@ -1030,7 +1033,7 @@
IteratorTypeConstraint = IteratorHint.IteratorTypeConstraint
-class WindowedTypeConstraint(with_metaclass(GetitemConstructor,
+class WindowedTypeConstraint(with_metaclass(GetitemConstructor, # type: ignore[misc]
TypeConstraint)):
"""A type constraint for WindowedValue objects.
@@ -1114,7 +1117,7 @@
# There is a circular dependency between defining this mapping
# and using it in normalize(). Initialize it here and populate
# it below.
-_KNOWN_PRIMITIVE_TYPES = {}
+_KNOWN_PRIMITIVE_TYPES = {} # type: typing.Dict[type, typing.Any]
def normalize(x, none_as_type=False):
@@ -1174,7 +1177,7 @@
"""Obtains the type of elements yielded by an iterable.
Note that "iterable" here means: can be iterated over in a for loop, excluding
- strings.
+ strings and dicts.
Args:
type_hint: (TypeConstraint) The iterable in question. Must be normalize()-d.
@@ -1190,7 +1193,10 @@
if is_consistent_with(type_hint, Iterator[Any]):
return type_hint.yielded_type
if is_consistent_with(type_hint, Tuple[Any, ...]):
- return Union[type_hint.tuple_types]
+ if isinstance(type_hint, TupleConstraint):
+ return Union[type_hint.tuple_types]
+ else: # TupleSequenceConstraint
+ return type_hint.inner_type
if is_consistent_with(type_hint, Iterable[Any]):
return type_hint.inner_type
raise ValueError('%s is not iterable' % type_hint)
diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py
index 9b73a7f..38c0118 100644
--- a/sdks/python/apache_beam/typehints/typehints_test.py
+++ b/sdks/python/apache_beam/typehints/typehints_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the type-hint objects and decorators."""
+# pytype: skip-file
+
from __future__ import absolute_import
import functools
diff --git a/sdks/python/apache_beam/typehints/typehints_test_py3.py b/sdks/python/apache_beam/typehints/typehints_test_py3.py
index 01df57c..56042ff 100644
--- a/sdks/python/apache_beam/typehints/typehints_test_py3.py
+++ b/sdks/python/apache_beam/typehints/typehints_test_py3.py
@@ -18,6 +18,8 @@
"""Unit tests for the type-hint objects and decorators with Python 3 syntax not
supported by 2.7."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import print_function
diff --git a/sdks/python/apache_beam/utils/annotations.py b/sdks/python/apache_beam/utils/annotations.py
index af44fac..b60fb18 100644
--- a/sdks/python/apache_beam/utils/annotations.py
+++ b/sdks/python/apache_beam/utils/annotations.py
@@ -81,6 +81,8 @@
print(exp_multiply(5,6))
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import warnings
diff --git a/sdks/python/apache_beam/utils/annotations_test.py b/sdks/python/apache_beam/utils/annotations_test.py
index b9bd774..58d0f40 100644
--- a/sdks/python/apache_beam/utils/annotations_test.py
+++ b/sdks/python/apache_beam/utils/annotations_test.py
@@ -14,6 +14,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/utils/counters.py b/sdks/python/apache_beam/utils/counters.py
index dcb5683..51d1ab0 100644
--- a/sdks/python/apache_beam/utils/counters.py
+++ b/sdks/python/apache_beam/utils/counters.py
@@ -23,15 +23,22 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import threading
from builtins import hex
from builtins import object
from collections import namedtuple
+from typing import TYPE_CHECKING
+from typing import Dict
from apache_beam.transforms import cy_combiners
+if TYPE_CHECKING:
+ from apache_beam.transforms import core
+
# Information identifying the IO being measured by a counter.
#
# A CounterName with IOTarget helps identify the IO being measured by a
@@ -45,6 +52,7 @@
def side_input_id(step_name, input_index):
+ # type: (str, int) -> IOTargetName
"""Create an IOTargetName that identifies the reading of a side input.
Given a step "s4" that receives two side inputs, then the CounterName
@@ -60,6 +68,7 @@
def shuffle_id(step_name):
+ # type: (str) -> IOTargetName
"""Create an IOTargetName that identifies a GBK step.
Given a step "s6" that is downstream from a GBK "s5", then "s6" will read
@@ -141,6 +150,7 @@
DATAFLOW_DISTRIBUTION = cy_combiners.DataflowDistributionCounterFn()
def __init__(self, name, combine_fn):
+ # type: (CounterName, core.CombineFn) -> None
"""Creates a Counter object.
Args:
@@ -177,6 +187,7 @@
"""Counter optimized for a mutating accumulator that holds all the logic."""
def __init__(self, name, combine_fn):
+ # type: (CounterName, cy_combiners.AccumulatorCombineFn) -> None
assert isinstance(combine_fn, cy_combiners.AccumulatorCombineFn)
super(AccumulatorCombineFnCounter, self).__init__(name, combine_fn)
self.reset()
@@ -193,12 +204,13 @@
"""Keeps track of unique counters."""
def __init__(self):
- self.counters = {}
+ self.counters = {} # type: Dict[CounterName, Counter]
# Lock to be acquired when accessing the counters map.
self._lock = threading.Lock()
def get_counter(self, name, combine_fn):
+ # type: (CounterName, core.CombineFn) -> Counter
"""Returns a counter with the requested name.
Passing in the same name will return the same counter; the
diff --git a/sdks/python/apache_beam/utils/counters_test.py b/sdks/python/apache_beam/utils/counters_test.py
index d868861..be1ecf9 100644
--- a/sdks/python/apache_beam/utils/counters_test.py
+++ b/sdks/python/apache_beam/utils/counters_test.py
@@ -17,6 +17,8 @@
"""Unit tests for counters and counter names."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/utils/interactive_utils.py b/sdks/python/apache_beam/utils/interactive_utils.py
index ac4e5d8..2163e94 100644
--- a/sdks/python/apache_beam/utils/interactive_utils.py
+++ b/sdks/python/apache_beam/utils/interactive_utils.py
@@ -19,6 +19,8 @@
For experimental usage only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/utils/plugin.py b/sdks/python/apache_beam/utils/plugin.py
index 1425874..7a92489 100644
--- a/sdks/python/apache_beam/utils/plugin.py
+++ b/sdks/python/apache_beam/utils/plugin.py
@@ -20,6 +20,8 @@
For experimental usage only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
diff --git a/sdks/python/apache_beam/utils/processes.py b/sdks/python/apache_beam/utils/processes.py
index cfd82bd..d1214d3 100644
--- a/sdks/python/apache_beam/utils/processes.py
+++ b/sdks/python/apache_beam/utils/processes.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import platform
diff --git a/sdks/python/apache_beam/utils/processes_test.py b/sdks/python/apache_beam/utils/processes_test.py
index 354a076..491bfeb 100644
--- a/sdks/python/apache_beam/utils/processes_test.py
+++ b/sdks/python/apache_beam/utils/processes_test.py
@@ -16,6 +16,8 @@
#
"""Unit tests for the processes module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import subprocess
diff --git a/sdks/python/apache_beam/utils/profiler.py b/sdks/python/apache_beam/utils/profiler.py
index c6f7295..1b42104 100644
--- a/sdks/python/apache_beam/utils/profiler.py
+++ b/sdks/python/apache_beam/utils/profiler.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import cProfile # pylint: disable=bad-python3-import
@@ -33,6 +35,8 @@
import warnings
from builtins import object
from threading import Timer
+from typing import Callable
+from typing import Optional
from apache_beam.io import filesystems
@@ -97,6 +101,7 @@
@staticmethod
def factory_from_options(options):
+ # type: (...) -> Optional[Callable[..., Profile]]
if options.profile_cpu:
def create_profiler(profile_id, **kwargs):
if random.random() < options.profile_sample_rate:
diff --git a/sdks/python/apache_beam/utils/proto_utils.py b/sdks/python/apache_beam/utils/proto_utils.py
index 79464c2..da9ca58 100644
--- a/sdks/python/apache_beam/utils/proto_utils.py
+++ b/sdks/python/apache_beam/utils/proto_utils.py
@@ -17,13 +17,35 @@
"""For internal use only; no backwards-compatibility guarantees."""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
+from typing import Type
+from typing import TypeVar
+from typing import Union
+from typing import overload
+
from google.protobuf import any_pb2
+from google.protobuf import message
from google.protobuf import struct_pb2
from google.protobuf import timestamp_pb2
+MessageT = TypeVar('MessageT', bound=message.Message)
+
+
+@overload
+def pack_Any(msg):
+ # type: (message.Message) -> any_pb2.Any
+ pass
+
+
+@overload
+def pack_Any(msg):
+ # type: (None) -> None
+ pass
+
def pack_Any(msg):
"""Creates a protobuf Any with msg as its content.
@@ -38,6 +60,18 @@
return result
+@overload
+def unpack_Any(any_msg, msg_class):
+ # type: (any_pb2.Any, Type[MessageT]) -> MessageT
+ pass
+
+
+@overload
+def unpack_Any(any_msg, msg_class):
+ # type: (any_pb2.Any, None) -> None
+ pass
+
+
def unpack_Any(any_msg, msg_class):
"""Unpacks any_msg into msg_class.
@@ -50,6 +84,18 @@
return msg
+@overload
+def parse_Bytes(serialized_bytes, msg_class):
+ # type: (bytes, Type[MessageT]) -> MessageT
+ pass
+
+
+@overload
+def parse_Bytes(serialized_bytes, msg_class):
+ # type: (bytes, Union[Type[bytes], None]) -> bytes
+ pass
+
+
def parse_Bytes(serialized_bytes, msg_class):
"""Parses the String of bytes into msg_class.
@@ -62,6 +108,7 @@
def pack_Struct(**kwargs):
+ # type: (...) -> struct_pb2.Struct
"""Returns a struct containing the values indicated by kwargs.
"""
msg = struct_pb2.Struct()
diff --git a/sdks/python/apache_beam/utils/retry.py b/sdks/python/apache_beam/utils/retry.py
index e34e364..d62c3cf 100644
--- a/sdks/python/apache_beam/utils/retry.py
+++ b/sdks/python/apache_beam/utils/retry.py
@@ -25,6 +25,8 @@
needed right now use a @retry.no_retries decorator.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
import functools
@@ -46,8 +48,15 @@
# TODO(sourabhbajaj): Remove the GCP specific error code to a submodule
try:
from apitools.base.py.exceptions import HttpError
-except ImportError:
+except ImportError as e:
HttpError = None
+
+# Protect against environments where aws tools are not available.
+# pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
+try:
+ from apache_beam.io.aws.clients.s3.messages import S3ClientError
+except ImportError:
+ S3ClientError = None
# pylint: enable=wrong-import-order, wrong-import-position
@@ -104,6 +113,8 @@
"""Filter allowing retries on server errors and non-HttpErrors."""
if (HttpError is not None) and isinstance(exception, HttpError):
return exception.status_code >= 500
+ if (S3ClientError is not None) and isinstance(exception, S3ClientError):
+ return exception.code >= 500
return not isinstance(exception, PermanentException)
@@ -120,6 +131,9 @@
if HttpError is not None and isinstance(exception, HttpError):
if exception.status_code == 408: # 408 Request Timeout
return True
+ if S3ClientError is not None and isinstance(exception, S3ClientError):
+ if exception.code == 408: # 408 Request Timeout
+ return True
return retry_on_server_errors_filter(exception)
@@ -131,6 +145,9 @@
if HttpError is not None and isinstance(exception, HttpError):
if exception.status_code == 403:
return True
+ if S3ClientError is not None and isinstance(exception, S3ClientError):
+ if exception.code == 403:
+ return True
return retry_on_server_errors_and_timeout_filter(exception)
diff --git a/sdks/python/apache_beam/utils/retry_test.py b/sdks/python/apache_beam/utils/retry_test.py
index 7f471fb..8053b1d 100644
--- a/sdks/python/apache_beam/utils/retry_test.py
+++ b/sdks/python/apache_beam/utils/retry_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the retry module."""
+# pytype: skip-file
+
from __future__ import absolute_import
import unittest
diff --git a/sdks/python/apache_beam/utils/subprocess_server.py b/sdks/python/apache_beam/utils/subprocess_server.py
index fd55f18..b7c8030 100644
--- a/sdks/python/apache_beam/utils/subprocess_server.py
+++ b/sdks/python/apache_beam/utils/subprocess_server.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import logging
diff --git a/sdks/python/apache_beam/utils/thread_pool_executor.py b/sdks/python/apache_beam/utils/thread_pool_executor.py
index aba8f5ad..903d9f7 100644
--- a/sdks/python/apache_beam/utils/thread_pool_executor.py
+++ b/sdks/python/apache_beam/utils/thread_pool_executor.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+# pytype: skip-file
+
from __future__ import absolute_import
import sys
diff --git a/sdks/python/apache_beam/utils/thread_pool_executor_test.py b/sdks/python/apache_beam/utils/thread_pool_executor_test.py
index c82d0f9..d9bbae4 100644
--- a/sdks/python/apache_beam/utils/thread_pool_executor_test.py
+++ b/sdks/python/apache_beam/utils/thread_pool_executor_test.py
@@ -17,6 +17,8 @@
"""Unit tests for UnboundedThreadPoolExecutor."""
+# pytype: skip-file
+
from __future__ import absolute_import
import itertools
diff --git a/sdks/python/apache_beam/utils/timestamp.py b/sdks/python/apache_beam/utils/timestamp.py
index 429f074..babe5dc 100644
--- a/sdks/python/apache_beam/utils/timestamp.py
+++ b/sdks/python/apache_beam/utils/timestamp.py
@@ -20,6 +20,8 @@
For internal use only; no backwards-compatibility guarantees.
"""
+# pytype: skip-file
+
from __future__ import absolute_import
from __future__ import division
@@ -27,6 +29,9 @@
import functools
import time
from builtins import object
+from typing import Any
+from typing import Union
+from typing import overload
import dateutil.parser
import pytz
@@ -36,6 +41,11 @@
from apache_beam.portability import common_urns
+# types compatible with Timestamp.of()
+TimestampTypes = Union[int, float, 'Timestamp']
+# types compatible with Duration.of()
+DurationTypes = Union[int, float, 'Duration']
+
@functools.total_ordering
class Timestamp(object):
@@ -50,6 +60,7 @@
"""
def __init__(self, seconds=0, micros=0):
+ # type: (Union[int, float], Union[int, float]) -> None
if not isinstance(seconds, (int, long, float)):
raise TypeError('Cannot interpret %s %s as seconds.' % (
seconds, type(seconds)))
@@ -60,6 +71,7 @@
@staticmethod
def of(seconds):
+ # type: (TimestampTypes) -> Timestamp
"""Return the Timestamp for the given number of seconds.
If the input is already a Timestamp, the input itself will be returned.
@@ -169,14 +181,17 @@
micros=timestamp_proto.nanos // 1000)
def __float__(self):
+ # type: () -> float
# Note that the returned value may have lost precision.
return self.micros / 1000000
def __int__(self):
+ # type: () -> int
# Note that the returned value may have lost precision.
return self.micros // 1000000
def __eq__(self, other):
+ # type: (Union[int, float, Timestamp, Duration]) -> bool
# Allow comparisons between Duration and Timestamp values.
if not isinstance(other, Duration):
try:
@@ -186,10 +201,12 @@
return self.micros == other.micros
def __ne__(self, other):
+ # type: (Any) -> bool
# TODO(BEAM-5949): Needed for Python 2 compatibility.
return not self == other
def __lt__(self, other):
+ # type: (Union[int, float, Timestamp, Duration]) -> bool
# Allow comparisons between Duration and Timestamp values.
if not isinstance(other, Duration):
other = Timestamp.of(other)
@@ -199,12 +216,24 @@
return hash(self.micros)
def __add__(self, other):
+ # type: (DurationTypes) -> Timestamp
other = Duration.of(other)
return Timestamp(micros=self.micros + other.micros)
def __radd__(self, other):
+ # type: (DurationTypes) -> Timestamp
return self + other
+ @overload
+ def __sub__(self, other):
+ # type: (DurationTypes) -> Timestamp
+ pass
+
+ @overload
+ def __sub__(self, other):
+ # type: (Timestamp) -> Duration
+ pass
+
def __sub__(self, other):
if isinstance(other, Timestamp):
return Duration(micros=self.micros - other.micros)
@@ -212,6 +241,7 @@
return Timestamp(micros=self.micros - other.micros)
def __mod__(self, other):
+ # type: (DurationTypes) -> Duration
other = Duration.of(other)
return Duration(micros=self.micros % other.micros)
@@ -235,10 +265,12 @@
"""
def __init__(self, seconds=0, micros=0):
+ # type: (Union[int, float], Union[int, float]) -> None
self.micros = int(seconds * 1000000) + int(micros)
@staticmethod
def of(seconds):
+ # type: (DurationTypes) -> Duration
"""Return the Duration for the given number of seconds since Unix epoch.
If the input is already a Duration, the input itself will be returned.
@@ -295,20 +327,24 @@
return 'Duration(%s%d)' % (sign, int_part)
def __float__(self):
+ # type: () -> float
# Note that the returned value may have lost precision.
return self.micros / 1000000
def __eq__(self, other):
+ # type: (Union[int, float, Duration, Timestamp]) -> bool
# Allow comparisons between Duration and Timestamp values.
if not isinstance(other, Timestamp):
other = Duration.of(other)
return self.micros == other.micros
def __ne__(self, other):
+ # type: (Any) -> bool
# TODO(BEAM-5949): Needed for Python 2 compatibility.
return not self == other
def __lt__(self, other):
+ # type: (Union[int, float, Duration, Timestamp]) -> bool
# Allow comparisons between Duration and Timestamp values.
if not isinstance(other, Timestamp):
other = Duration.of(other)
@@ -318,9 +354,11 @@
return hash(self.micros)
def __neg__(self):
+ # type: () -> Duration
return Duration(micros=-self.micros)
def __add__(self, other):
+ # type: (DurationTypes) -> Duration
if isinstance(other, Timestamp):
return other + self
other = Duration.of(other)
@@ -330,6 +368,7 @@
return self + other
def __sub__(self, other):
+ # type: (DurationTypes) -> Duration
other = Duration.of(other)
return Duration(micros=self.micros - other.micros)
@@ -337,6 +376,7 @@
return -(self - other)
def __mul__(self, other):
+ # type: (DurationTypes) -> Duration
other = Duration.of(other)
return Duration(micros=self.micros * other.micros // 1000000)
@@ -344,6 +384,7 @@
return self * other
def __mod__(self, other):
+ # type: (DurationTypes) -> Duration
other = Duration.of(other)
return Duration(micros=self.micros % other.micros)
diff --git a/sdks/python/apache_beam/utils/timestamp_test.py b/sdks/python/apache_beam/utils/timestamp_test.py
index e258a5b..18e31f7 100644
--- a/sdks/python/apache_beam/utils/timestamp_test.py
+++ b/sdks/python/apache_beam/utils/timestamp_test.py
@@ -17,6 +17,8 @@
"""Unit tests for time utilities."""
+# pytype: skip-file
+
from __future__ import absolute_import
import datetime
@@ -184,6 +186,14 @@
expected_ts_proto = timestamp_pb2.Timestamp(seconds=1234, nanos=56000)
self.assertEqual(actual_ts_proto, expected_ts_proto)
+ def test_equality(self):
+ for min_val in (Timestamp(1), Duration(1), 1, 1.1):
+ for max_val in (Timestamp(123), Duration(123), 123, 123.4):
+ self.assertTrue(min_val < max_val, "%s < %s" % (min_val, max_val))
+ self.assertTrue(min_val <= max_val, "%s <= %s" % (min_val, max_val))
+ self.assertTrue(max_val > min_val, "%s > %s" % (max_val, min_val))
+ self.assertTrue(max_val >= min_val, "%s >= %s" % (max_val, min_val))
+
class DurationTest(unittest.TestCase):
diff --git a/sdks/python/apache_beam/utils/urns.py b/sdks/python/apache_beam/utils/urns.py
index 4e9c357..1bd31e9 100644
--- a/sdks/python/apache_beam/utils/urns.py
+++ b/sdks/python/apache_beam/utils/urns.py
@@ -17,11 +17,23 @@
"""For internal use only; no backwards-compatibility guarantees."""
+# pytype: skip-file
+
from __future__ import absolute_import
import abc
import inspect
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import Optional
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+from typing import Union
+from typing import overload
from google.protobuf import message
from google.protobuf import wrappers_pb2
@@ -29,13 +41,23 @@
from apache_beam.internal import pickler
from apache_beam.utils import proto_utils
+if TYPE_CHECKING:
+ from apache_beam.portability.api import beam_runner_api_pb2
+ from apache_beam.runners.pipeline_context import PipelineContext
+
+T = TypeVar('T')
+ConstructorFn = Callable[
+ [Union['message.Message', bytes],
+ 'PipelineContext'],
+ Any]
+
class RunnerApiFn(object):
"""Abstract base class that provides urn registration utilities.
A class that inherits from this class will get a registration-based
from_runner_api and to_runner_api method that convert to and from
- beam_runner_api_pb2.SdkFunctionSpec.
+ beam_runner_api_pb2.FunctionSpec.
Additionally, register_pickle_urn can be called from the body of a class
to register serialization via pickling.
@@ -44,10 +66,11 @@
# TODO(BEAM-2685): Issue with dill + local classes + abc metaclass
# __metaclass__ = abc.ABCMeta
- _known_urns = {}
+ _known_urns = {} # type: Dict[str, Tuple[Optional[type], ConstructorFn]]
@abc.abstractmethod
def to_runner_api_parameter(self, unused_context):
+ # type: (PipelineContext) -> Tuple[str, Any]
"""Returns the urn and payload for this Fn.
The returned urn(s) should be registered with `register_urn`.
@@ -55,6 +78,44 @@
pass
@classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: Type[T]
+ ):
+ # type: (...) -> Callable[[Callable[[T, PipelineContext], Any]], Callable[[T, PipelineContext], Any]]
+ pass
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: None
+ ):
+ # type: (...) -> Callable[[Callable[[bytes, PipelineContext], Any]], Callable[[bytes, PipelineContext], Any]]
+ pass
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: Type[T]
+ fn # type: Callable[[T, PipelineContext], Any]
+ ):
+ # type: (...) -> None
+ pass
+
+ @classmethod
+ @overload
+ def register_urn(cls,
+ urn, # type: str
+ parameter_type, # type: None
+ fn # type: Callable[[bytes, PipelineContext], Any]
+ ):
+ # type: (...) -> None
+ pass
+
+ @classmethod
def register_urn(cls, urn, parameter_type, fn=None):
"""Registers a urn with a constructor.
@@ -90,27 +151,26 @@
lambda proto, unused_context: pickler.loads(proto.value))
def to_runner_api(self, context):
- """Returns an SdkFunctionSpec encoding this Fn.
+ # type: (PipelineContext) -> beam_runner_api_pb2.FunctionSpec
+ """Returns an FunctionSpec encoding this Fn.
Prefer overriding self.to_runner_api_parameter.
"""
from apache_beam.portability.api import beam_runner_api_pb2
urn, typed_param = self.to_runner_api_parameter(context)
- return beam_runner_api_pb2.SdkFunctionSpec(
- environment_id=context.default_environment_id(),
- spec=beam_runner_api_pb2.FunctionSpec(
- urn=urn,
- payload=typed_param.SerializeToString()
- if isinstance(typed_param, message.Message)
- else typed_param))
+ return beam_runner_api_pb2.FunctionSpec(
+ urn=urn,
+ payload=typed_param.SerializeToString()
+ if isinstance(typed_param, message.Message) else typed_param)
@classmethod
def from_runner_api(cls, fn_proto, context):
- """Converts from an SdkFunctionSpec to a Fn object.
+ # type: (beam_runner_api_pb2.FunctionSpec, PipelineContext) -> Any
+ """Converts from an FunctionSpec to a Fn object.
Prefer registering a urn with its parameter type and constructor.
"""
- parameter_type, constructor = cls._known_urns[fn_proto.spec.urn]
+ parameter_type, constructor = cls._known_urns[fn_proto.urn]
return constructor(
- proto_utils.parse_Bytes(fn_proto.spec.payload, parameter_type),
+ proto_utils.parse_Bytes(fn_proto.payload, parameter_type),
context)
diff --git a/sdks/python/apache_beam/utils/windowed_value.py b/sdks/python/apache_beam/utils/windowed_value.py
index 95016c5..dc16a58 100644
--- a/sdks/python/apache_beam/utils/windowed_value.py
+++ b/sdks/python/apache_beam/utils/windowed_value.py
@@ -27,13 +27,23 @@
#cython: profile=True
+# pytype: skip-file
+
from __future__ import absolute_import
from builtins import object
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Optional
+from typing import Tuple
from apache_beam.utils.timestamp import MAX_TIMESTAMP
from apache_beam.utils.timestamp import MIN_TIMESTAMP
from apache_beam.utils.timestamp import Timestamp
+from apache_beam.utils.timestamp import TimestampTypes # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+ from apache_beam.transforms.window import BoundedWindow
class PaneInfoTiming(object):
@@ -53,6 +63,15 @@
cls.UNKNOWN: 'UNKNOWN',
}[value]
+ @classmethod
+ def from_string(cls, value):
+ return {
+ 'EARLY': cls.EARLY,
+ 'ON_TIME': cls.ON_TIME,
+ 'LATE': cls.LATE,
+ 'UNKNOWN': cls.UNKNOWN
+ }[value]
+
class PaneInfo(object):
"""Describes the trigger firing information for a given WindowedValue.
@@ -178,12 +197,20 @@
PANE_INFO_UNKNOWN.
"""
- def __init__(self, value, timestamp, windows, pane_info=PANE_INFO_UNKNOWN):
+ def __init__(self,
+ value,
+ timestamp, # type: TimestampTypes
+ windows, # type: Tuple[BoundedWindow, ...]
+ pane_info=PANE_INFO_UNKNOWN
+ ):
+ # type: (...) -> None
# For performance reasons, only timestamp_micros is stored by default
# (as a C int). The Timestamp object is created on demand below.
self.value = value
if isinstance(timestamp, int):
self.timestamp_micros = timestamp * 1000000
+ if TYPE_CHECKING:
+ self.timestamp_object = None # type: Optional[Timestamp]
else:
self.timestamp_object = (timestamp if isinstance(timestamp, Timestamp)
else Timestamp.of(timestamp))
@@ -193,6 +220,7 @@
@property
def timestamp(self):
+ # type: () -> Timestamp
if self.timestamp_object is None:
self.timestamp_object = Timestamp(0, self.timestamp_micros)
return self.timestamp_object
@@ -224,6 +252,7 @@
11 * (hash(self.pane_info) & 0xFFFFFFFFFFFFF))
def with_value(self, new_value):
+ # type: (Any) -> WindowedValue
"""Creates a new WindowedValue with the same timestamps and windows as this.
This is the fasted way to create a new WindowedValue.
@@ -260,15 +289,21 @@
"""
def __init__(self, start, end):
- if start is not None or end is not None:
- self._start_object = Timestamp.of(start)
- self._end_object = Timestamp.of(end)
+ # type: (TimestampTypes, TimestampTypes) -> None
+ if start is not None:
+ self._start_object = Timestamp.of(start) # type: Optional[Timestamp]
try:
self._start_micros = self._start_object.micros
except OverflowError:
self._start_micros = (
MIN_TIMESTAMP.micros if self._start_object.micros < 0
else MAX_TIMESTAMP.micros)
+ else:
+ # Micros must be populated elsewhere.
+ self._start_object = None
+
+ if end is not None:
+ self._end_object = Timestamp.of(end) # type: Optional[Timestamp]
try:
self._end_micros = self._end_object.micros
except OverflowError:
@@ -277,16 +312,18 @@
else MAX_TIMESTAMP.micros)
else:
# Micros must be populated elsewhere.
- self._start_object = self._end_object = None
+ self._end_object = None
@property
def start(self):
+ # type: () -> Timestamp
if self._start_object is None:
self._start_object = Timestamp(0, self._start_micros)
return self._start_object
@property
def end(self):
+ # type: () -> Timestamp
if self._end_object is None:
self._end_object = Timestamp(0, self._end_micros)
return self._end_object
diff --git a/sdks/python/apache_beam/utils/windowed_value_test.py b/sdks/python/apache_beam/utils/windowed_value_test.py
index 5549aee..27512d3 100644
--- a/sdks/python/apache_beam/utils/windowed_value_test.py
+++ b/sdks/python/apache_beam/utils/windowed_value_test.py
@@ -17,6 +17,8 @@
"""Unit tests for the windowed_value."""
+# pytype: skip-file
+
from __future__ import absolute_import
import copy
diff --git a/sdks/python/build-requirements.txt b/sdks/python/build-requirements.txt
new file mode 100644
index 0000000..b4fcd87
--- /dev/null
+++ b/sdks/python/build-requirements.txt
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# TODO(BEAM-5414): latest grpcio-tools incompatible with latest protobuf 3.6.1.
+grpcio-tools>=1.3.5,<=1.14.2
+future==0.16.0
diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go
index 71da7b9..71775c4 100644
--- a/sdks/python/container/boot.go
+++ b/sdks/python/container/boot.go
@@ -143,6 +143,10 @@
os.Setenv("LOGGING_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(&pbpipeline.ApiServiceDescriptor{Url: *loggingEndpoint}))
os.Setenv("CONTROL_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(&pbpipeline.ApiServiceDescriptor{Url: *controlEndpoint}))
+ if info.GetStatusEndpoint() != nil {
+ os.Setenv("STATUS_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(info.GetStatusEndpoint()))
+ }
+
args := []string{
"-m",
sdkHarnessEntrypoint,
diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py
index 871f9ec..5105ad5 100644
--- a/sdks/python/gen_protos.py
+++ b/sdks/python/gen_protos.py
@@ -32,9 +32,6 @@
import pkg_resources
-# TODO(BEAM-5414): latest grpcio-tools incompatible with latest protobuf 3.6.1.
-GRPC_TOOLS = 'grpcio-tools>=1.3.5,<=1.14.2'
-
BEAM_PROTO_PATHS = [
os.path.join('..', '..', 'model', 'pipeline', 'src', 'main', 'proto'),
os.path.join('..', '..', 'model', 'job-management', 'src', 'main', 'proto'),
@@ -123,7 +120,6 @@
if p.exitcode:
raise ValueError("Proto generation failed (see log for details).")
else:
-
log.info('Regenerating Python proto definitions (%s).' % regenerate)
builtin_protos = pkg_resources.resource_filename('grpc_tools', '_proto')
args = (
@@ -163,8 +159,8 @@
# directory and add it to the path as needed.
# See https://github.com/pypa/setuptools/issues/377
def _install_grpcio_tools_and_generate_proto_files():
- install_path = os.path.join(
- os.path.dirname(os.path.abspath(__file__)), '.eggs', 'grpcio-wheels')
+ py_sdk_root = os.path.dirname(os.path.abspath(__file__))
+ install_path = os.path.join(py_sdk_root, '.eggs', 'grpcio-wheels')
build_path = install_path + '-build'
if os.path.exists(build_path):
shutil.rmtree(build_path)
@@ -174,7 +170,8 @@
subprocess.check_call(
[sys.executable, '-m', 'pip', 'install',
'--target', install_path, '--build', build_path,
- '--upgrade', GRPC_TOOLS])
+ '--upgrade',
+ '-r', os.path.join(py_sdk_root, 'build-requirements.txt')])
logging.warning(
'Installing grpcio-tools took %0.2f seconds.', time.time() - start)
finally:
diff --git a/sdks/python/mypy.ini b/sdks/python/mypy.ini
new file mode 100644
index 0000000..1e4748f
--- /dev/null
+++ b/sdks/python/mypy.ini
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+[mypy]
+python_version = 3.6
+ignore_missing_imports = true
+follow_imports = true
+warn_no_return = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+show_error_codes = true
+files = apache_beam
+color_output = true
+# uncomment this to see how close we are to being complete
+# check_untyped_defs = true
+
+[mypy-apache_beam.coders.proto2_coder_test_messages_pb2]
+ignore_errors = true
+
+[mypy-apache_beam.examples.*]
+ignore_errors = true
+
+[mypy-apache_beam.io.gcp.gcsfilesystem_test]
+# error: Cannot infer type of lambda [misc]
+ignore_errors = true
+
+[mypy-apache_beam.io.gcp.internal.clients.storage.storage_v1_client]
+ignore_errors = true
+
+[mypy-apache_beam.io.gcp.internal.clients.bigquery.bigquery_v2_client]
+ignore_errors = true
+
+[mypy-apache_beam.portability.api.*]
+ignore_errors = true
+
+[mypy-apache_beam.runners.dataflow.internal.clients.dataflow.dataflow_v1b3_client]
+ignore_errors = true
+
+[mypy-apache_beam.typehints.typed_pipeline_test_py3]
+# error: Signature of "process" incompatible with supertype "DoFn" [override]
+ignore_errors = true
+
+[mypy-apache_beam.typehints.typehints_test_py3]
+# error: Signature of "process" incompatible with supertype "DoFn" [override]
+ignore_errors = true
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index dcd798f..8599b7f 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -25,6 +25,7 @@
import sys
import warnings
from distutils import log
+from distutils.errors import DistutilsError
from distutils.version import StrictVersion
# Pylint and isort disagree here.
@@ -32,13 +33,44 @@
import setuptools
from pkg_resources import DistributionNotFound
from pkg_resources import get_distribution
+from pkg_resources import normalize_path
+from pkg_resources import to_filename
+from setuptools import Command
from setuptools.command.build_py import build_py
-# TODO: (BEAM-8411): re-enable lint check.
-from setuptools.command.develop import develop # pylint: disable-all
+from setuptools.command.develop import develop
from setuptools.command.egg_info import egg_info
from setuptools.command.test import test
+class mypy(Command):
+ user_options = []
+
+ def initialize_options(self):
+ """Abstract method that is required to be overwritten"""
+
+ def finalize_options(self):
+ """Abstract method that is required to be overwritten"""
+
+ def get_project_path(self):
+ self.run_command('egg_info')
+
+ # Build extensions in-place
+ self.reinitialize_command('build_ext', inplace=1)
+ self.run_command('build_ext')
+
+ ei_cmd = self.get_finalized_command("egg_info")
+
+ project_path = normalize_path(ei_cmd.egg_base)
+ return os.path.join(project_path, to_filename(ei_cmd.egg_name))
+
+ def run(self):
+ import subprocess
+ args = ['mypy', self.get_project_path()]
+ result = subprocess.call(args)
+ if result != 0:
+ raise DistutilsError("mypy exited with status %d" % result)
+
+
def get_version():
global_names = {}
exec( # pylint: disable=exec-used
@@ -106,7 +138,10 @@
'avro>=1.8.1,<2.0.0; python_version < "3.0"',
'avro-python3>=1.8.1,<2.0.0; python_version >= "3.0"',
'crcmod>=1.7,<2.0',
- # Dill doesn't guarantee compatibility between releases within minor version.
+ # Dill doesn't have forwards-compatibility guarantees within minor version.
+ # Pickles created with a new version of dill may not unpickle using older
+ # version of dill. It is best to use the same version of dill on client and
+ # server, therefore list of allowed versions is very narrow.
# See: https://github.com/uqfoundation/dill/issues/341.
'dill>=0.3.1.1,<0.3.2',
'fastavro>=0.21.4,<0.22',
@@ -129,7 +164,9 @@
'pytz>=2018.3',
# [BEAM-5628] Beam VCF IO is not supported in Python 3.
'pyvcf>=0.6.8,<0.7.0; python_version < "3.0"',
- 'typing>=3.6.0,<3.7.0; python_version < "3.5.0"',
+ # fixes and additions have been made since typing 3.5
+ 'typing>=3.7.0,<3.8.0; python_version < "3.8.0"',
+ 'typing-extensions>=3.7.0,<3.8.0; python_version < "3.8.0"',
]
# [BEAM-8181] pyarrow cannot be installed on 32-bit Windows platforms.
@@ -139,11 +176,13 @@
]
REQUIRED_TEST_PACKAGES = [
+ 'freezegun>=0.3.12',
'nose>=1.3.7',
'nose_xunitmp>=0.4.1',
'pandas>=0.23.4,<0.25',
- 'parameterized>=0.6.0,<0.7.0',
- 'pyhamcrest>=1.9,<2.0',
+ 'parameterized>=0.6.0,<0.8.0',
+ # pyhamcrest==1.10.0 requires Py3. Beam still supports Py2.
+ 'pyhamcrest>=1.9,<1.10.0',
'pyyaml>=3.12,<6.0.0',
'requests_mock>=1.7,<2.0',
'tenacity>=5.0.2,<6.0',
@@ -173,6 +212,10 @@
'jsons>=1.0.0,<2; python_version >= "3.5.3"',
'timeloop>=1.0.2,<2',
]
+AWS_REQUIREMENTS = [
+ 'boto3 >=1.9'
+]
+
# We must generate protos after setup_requires are installed.
def generate_protos_first(original_cmd):
@@ -233,6 +276,7 @@
'test': REQUIRED_TEST_PACKAGES,
'gcp': GCP_REQUIREMENTS,
'interactive': INTERACTIVE_BEAM,
+ 'aws': AWS_REQUIREMENTS
},
zip_safe=False,
# PyPI package information.
@@ -258,5 +302,6 @@
'develop': generate_protos_first(develop),
'egg_info': generate_protos_first(egg_info),
'test': generate_protos_first(test),
+ 'mypy': generate_protos_first(mypy),
},
)
diff --git a/sdks/python/test-suites/dataflow/py35/build.gradle b/sdks/python/test-suites/dataflow/py35/build.gradle
index 08ba22d..b15d24d 100644
--- a/sdks/python/test-suites/dataflow/py35/build.gradle
+++ b/sdks/python/test-suites/dataflow/py35/build.gradle
@@ -104,3 +104,16 @@
}
}
}
+
+task mongodbioIT {
+ dependsOn 'installGcpTest'
+ dependsOn ':sdks:python:sdist'
+ def opts = findProperty('opts')
+ opts = String.format("%s %s", opts, "--sdk_location=${files(configurations.distTarBall.files).singleFile}")
+ doLast {
+ exec {
+ executable 'sh'
+ args '-c', ". ${envdir}/bin/activate && python -m apache_beam.io.mongodbio_it_test ${opts}"
+ }
+ }
+}
diff --git a/sdks/python/test-suites/direct/py2/build.gradle b/sdks/python/test-suites/direct/py2/build.gradle
index e422b74..436cd7e 100644
--- a/sdks/python/test-suites/direct/py2/build.gradle
+++ b/sdks/python/test-suites/direct/py2/build.gradle
@@ -38,6 +38,8 @@
"apache_beam.io.gcp.pubsub_integration_test:PubSubIntegrationTest",
"apache_beam.io.gcp.big_query_query_to_table_it_test:BigQueryQueryToTableIT",
"apache_beam.io.gcp.bigquery_io_read_it_test",
+ "apache_beam.io.gcp.bigquery_read_it_test",
+ "apache_beam.io.gcp.bigquery_write_it_test",
"apache_beam.io.gcp.datastore.v1new.datastore_write_it_test",
]
def batchTestOpts = basicTestOpts + ["--tests=${tests.join(',')}"]
diff --git a/sdks/python/test-suites/portable/py2/build.gradle b/sdks/python/test-suites/portable/py2/build.gradle
index b55173c..f9fb5f4 100644
--- a/sdks/python/test-suites/portable/py2/build.gradle
+++ b/sdks/python/test-suites/portable/py2/build.gradle
@@ -22,6 +22,7 @@
applyPythonNature()
def pythonRootDir = "${rootDir}/sdks/python"
+def runScriptsDir = "${rootDir}/sdks/python/scripts"
/*************************************************************************************************/
@@ -39,6 +40,7 @@
dependsOn ':runners:flink:1.9:job-server:shadowJar'
dependsOn portableWordCountFlinkRunnerBatch
dependsOn portableWordCountFlinkRunnerStreaming
+ dependsOn 'postCommitPy2IT'
dependsOn ':runners:spark:job-server:shadowJar'
dependsOn portableWordCountSparkRunnerBatch
}
@@ -146,6 +148,28 @@
}
}
+task postCommitPy2IT {
+ dependsOn 'installGcpTest'
+ dependsOn 'setupVirtualenv'
+ dependsOn ':runners:flink:1.9:job-server:shadowJar'
+
+ doLast {
+ def tests = [
+ "apache_beam.io.gcp.bigquery_read_it_test",
+ ]
+ def testOpts = ["--tests=${tests.join(',')}"]
+ def cmdArgs = mapToArgString([
+ "test_opts": testOpts,
+ "suite": "postCommitIT-flink-py2",
+ "pipeline_opts": "--runner=FlinkRunner --project=apache-beam-testing --environment_type=LOOPBACK --temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
+ ])
+ exec {
+ executable 'sh'
+ args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
+ }
+ }
+}
+
task crossLanguageTests {
dependsOn "crossLanguagePythonJavaFlink"
dependsOn "crossLanguagePortableWordCount"
diff --git a/sdks/python/test-suites/portable/py35/build.gradle b/sdks/python/test-suites/portable/py35/build.gradle
index 88b4e2f..479556e 100644
--- a/sdks/python/test-suites/portable/py35/build.gradle
+++ b/sdks/python/test-suites/portable/py35/build.gradle
@@ -18,6 +18,9 @@
apply plugin: org.apache.beam.gradle.BeamModulePlugin
applyPythonNature()
+
+def runScriptsDir = "${rootDir}/sdks/python/scripts"
+
// Required to setup a Python 3.5 virtualenv.
pythonVersion = '3.5'
apply from: "../common.gradle"
@@ -36,6 +39,29 @@
dependsOn ':runners:flink:1.9:job-server:shadowJar'
dependsOn portableWordCountFlinkRunnerBatch
dependsOn portableWordCountFlinkRunnerStreaming
+ dependsOn 'postCommitPy35IT'
dependsOn ':runners:spark:job-server:shadowJar'
dependsOn portableWordCountSparkRunnerBatch
}
+
+task postCommitPy35IT {
+ dependsOn 'setupVirtualenv'
+ dependsOn 'installGcpTest'
+ dependsOn ':runners:flink:1.9:job-server:shadowJar'
+
+ doLast {
+ def tests = [
+ "apache_beam.io.gcp.bigquery_read_it_test",
+ ]
+ def testOpts = ["--tests=${tests.join(',')}"]
+ def cmdArgs = mapToArgString([
+ "test_opts": testOpts,
+ "suite": "postCommitIT-flink-py35",
+ "pipeline_opts": "--runner=FlinkRunner --project=apache-beam-testing --environment_type=LOOPBACK --temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
+ ])
+ exec {
+ executable 'sh'
+ args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
+ }
+ }
+}
diff --git a/sdks/python/test-suites/portable/py36/build.gradle b/sdks/python/test-suites/portable/py36/build.gradle
index 496777d..6cefad6 100644
--- a/sdks/python/test-suites/portable/py36/build.gradle
+++ b/sdks/python/test-suites/portable/py36/build.gradle
@@ -18,6 +18,9 @@
apply plugin: org.apache.beam.gradle.BeamModulePlugin
applyPythonNature()
+
+def runScriptsDir = "${rootDir}/sdks/python/scripts"
+
// Required to setup a Python 3.6 virtualenv.
pythonVersion = '3.6'
apply from: "../common.gradle"
@@ -36,6 +39,29 @@
dependsOn ':runners:flink:1.9:job-server:shadowJar'
dependsOn portableWordCountFlinkRunnerBatch
dependsOn portableWordCountFlinkRunnerStreaming
+ dependsOn 'postCommitPy36IT'
dependsOn ':runners:spark:job-server:shadowJar'
dependsOn portableWordCountSparkRunnerBatch
}
+
+task postCommitPy36IT {
+ dependsOn 'setupVirtualenv'
+ dependsOn 'installGcpTest'
+ dependsOn ':runners:flink:1.9:job-server:shadowJar'
+
+ doLast {
+ def tests = [
+ "apache_beam.io.gcp.bigquery_read_it_test",
+ ]
+ def testOpts = ["--tests=${tests.join(',')}"]
+ def cmdArgs = mapToArgString([
+ "test_opts": testOpts,
+ "suite": "postCommitIT-flink-py36",
+ "pipeline_opts": "--runner=FlinkRunner --project=apache-beam-testing --environment_type=LOOPBACK --temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
+ ])
+ exec {
+ executable 'sh'
+ args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
+ }
+ }
+}
diff --git a/sdks/python/test-suites/portable/py37/build.gradle b/sdks/python/test-suites/portable/py37/build.gradle
index 924de81..c7acb57 100644
--- a/sdks/python/test-suites/portable/py37/build.gradle
+++ b/sdks/python/test-suites/portable/py37/build.gradle
@@ -18,6 +18,9 @@
apply plugin: org.apache.beam.gradle.BeamModulePlugin
applyPythonNature()
+
+def runScriptsDir = "${rootDir}/sdks/python/scripts"
+
// Required to setup a Python 3.7 virtualenv.
pythonVersion = '3.7'
apply from: "../common.gradle"
@@ -36,6 +39,29 @@
dependsOn ':runners:flink:1.9:job-server:shadowJar'
dependsOn portableWordCountFlinkRunnerBatch
dependsOn portableWordCountFlinkRunnerStreaming
+ dependsOn 'postCommitPy37IT'
dependsOn ':runners:spark:job-server:shadowJar'
dependsOn portableWordCountSparkRunnerBatch
}
+
+task postCommitPy37IT {
+ dependsOn 'setupVirtualenv'
+ dependsOn 'installGcpTest'
+ dependsOn ':runners:flink:1.9:job-server:shadowJar'
+
+ doLast {
+ def tests = [
+ "apache_beam.io.gcp.bigquery_read_it_test",
+ ]
+ def testOpts = ["--tests=${tests.join(',')}"]
+ def cmdArgs = mapToArgString([
+ "test_opts": testOpts,
+ "suite": "postCommitIT-flink-py37",
+ "pipeline_opts": "--runner=FlinkRunner --project=apache-beam-testing --environment_type=LOOPBACK --temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
+ ])
+ exec {
+ executable 'sh'
+ args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
+ }
+ }
+}
diff --git a/sdks/python/test-suites/tox/py2/build.gradle b/sdks/python/test-suites/tox/py2/build.gradle
index 7749609..116b79b 100644
--- a/sdks/python/test-suites/tox/py2/build.gradle
+++ b/sdks/python/test-suites/tox/py2/build.gradle
@@ -35,34 +35,19 @@
toxTask "testPy2Gcp", "py27-gcp-pytest"
test.dependsOn testPy2Gcp
+// TODO(BEAM-3713): Migrate to -pytest env variant.
toxTask "testPython2", "py27"
test.dependsOn testPython2
-toxTask "testPy2Cython", "py27-cython"
+toxTask "testPy2Cython", "py27-cython-pytest"
test.dependsOn testPy2Cython
-// Ensure that testPy2Cython runs exclusively to other tests. This line is not
-// actually required, since gradle doesn't do parallel execution within a
-// project.
+// TODO(BEAM-8954): Remove this once tox uses isolated builds.
testPy2Cython.mustRunAfter testPython2, testPy2Gcp
-// TODO(BEAM-3713): Temporary pytest tasks that should eventually replace
-// nose-based test tasks.
-toxTask "testPy2GcpPytest", "py27-gcp-pytest"
-toxTask "testPython2Pytest", "py27-pytest"
-toxTask "testPy2CythonPytest", "py27-cython-pytest"
-// Ensure that cython tests run exclusively to other tests.
-testPy2CythonPytest.mustRunAfter testPython2Pytest, testPy2GcpPytest
-
toxTask "cover", "cover"
task preCommitPy2() {
dependsOn "testPy2Cython"
dependsOn "testPy2Gcp"
}
-
-task preCommitPy2Pytest {
- dependsOn "testPy2CythonPytest"
- dependsOn "testPy2GcpPytest"
- dependsOn "lint"
-}
diff --git a/sdks/python/test-suites/tox/py35/build.gradle b/sdks/python/test-suites/tox/py35/build.gradle
index 37e13f6..f81cf1d 100644
--- a/sdks/python/test-suites/tox/py35/build.gradle
+++ b/sdks/python/test-suites/tox/py35/build.gradle
@@ -26,35 +26,20 @@
// Required to setup a Python 3 virtualenv.
pythonVersion = '3.5'
+// TODO(BEAM-3713): Migrate to -pytest env variant.
toxTask "testPython35", "py35"
test.dependsOn testPython35
-toxTask "testPy35Gcp", "py35-gcp"
+toxTask "testPy35Gcp", "py35-gcp-pytest"
test.dependsOn testPy35Gcp
-toxTask "testPy35Cython", "py35-cython"
+toxTask "testPy35Cython", "py35-cython-pytest"
test.dependsOn testPy35Cython
-// Ensure that testPy35Cython runs exclusively to other tests. This line is not
-// actually required, since gradle doesn't do parallel execution within a
-// project.
+// TODO(BEAM-8954): Remove this once tox uses isolated builds.
testPy35Cython.mustRunAfter testPython35, testPy35Gcp
-// TODO(BEAM-3713): Temporary pytest tasks that should eventually replace
-// nose-based test tasks.
-toxTask "testPy35GcpPytest", "py35-gcp-pytest"
-toxTask "testPython35Pytest", "py35-pytest"
-toxTask "testPy35CythonPytest", "py35-cython-pytest"
-// Ensure that cython tests run exclusively to other tests.
-testPy35CythonPytest.mustRunAfter testPython35Pytest, testPy35GcpPytest
-
task preCommitPy35() {
dependsOn "testPy35Gcp"
dependsOn "testPy35Cython"
}
-
-task preCommitPy35Pytest {
- dependsOn "testPy35GcpPytest"
- dependsOn "testPy35CythonPytest"
- dependsOn "lint"
-}
diff --git a/sdks/python/test-suites/tox/py36/build.gradle b/sdks/python/test-suites/tox/py36/build.gradle
index 8171366..e43b857 100644
--- a/sdks/python/test-suites/tox/py36/build.gradle
+++ b/sdks/python/test-suites/tox/py36/build.gradle
@@ -26,34 +26,20 @@
// Required to setup a Python 3 virtualenv.
pythonVersion = '3.6'
+// TODO(BEAM-3713): Migrate to -pytest env variant.
toxTask "testPython36", "py36"
test.dependsOn testPython36
-toxTask "testPy36Gcp", "py36-gcp"
+toxTask "testPy36Gcp", "py36-gcp-pytest"
test.dependsOn testPy36Gcp
-toxTask "testPy36Cython", "py36-cython"
+toxTask "testPy36Cython", "py36-cython-pytest"
test.dependsOn testPy36Cython
-// Ensure that testPy36Cython runs exclusively to other tests. This line is not
-// actually required, since gradle doesn't do parallel execution within a
-// project.
+// TODO(BEAM-8954): Remove this once tox uses isolated builds.
testPy36Cython.mustRunAfter testPython36, testPy36Gcp
-// TODO(BEAM-3713): Temporary pytest tasks that should eventually replace
-// nose-based test tasks.
-toxTask "testPy36GcpPytest", "py36-gcp-pytest"
-toxTask "testPython36Pytest", "py36-pytest"
-toxTask "testPy36CythonPytest", "py36-cython-pytest"
-// Ensure that cython tests run exclusively to other tests.
-testPy36CythonPytest.mustRunAfter testPython36Pytest, testPy36GcpPytest
-
task preCommitPy36() {
dependsOn "testPy36Gcp"
dependsOn "testPy36Cython"
}
-
-task preCommitPy36Pytest {
- dependsOn "testPy36GcpPytest"
- dependsOn "testPy36CythonPytest"
-}
diff --git a/sdks/python/test-suites/tox/py37/build.gradle b/sdks/python/test-suites/tox/py37/build.gradle
index c9c99e6..ccc6ab7 100644
--- a/sdks/python/test-suites/tox/py37/build.gradle
+++ b/sdks/python/test-suites/tox/py37/build.gradle
@@ -32,34 +32,23 @@
toxTask "lintPy37", "py37-lint"
lint.dependsOn lintPy37
+toxTask "mypyPy37", "py37-mypy"
+lint.dependsOn mypyPy37
+
+// TODO(BEAM-3713): Migrate to -pytest env variant.
toxTask "testPython37", "py37"
test.dependsOn testPython37
-toxTask "testPy37Gcp", "py37-gcp"
+toxTask "testPy37Gcp", "py37-gcp-pytest"
test.dependsOn testPy37Gcp
-toxTask "testPy37Cython", "py37-cython"
+toxTask "testPy37Cython", "py37-cython-pytest"
test.dependsOn testPy37Cython
-// Ensure that testPy37Cython runs exclusively to other tests. This line is not
-// actually required, since gradle doesn't do parallel execution within a
-// project.
+// TODO(BEAM-8954): Remove this once tox uses isolated builds.
testPy37Cython.mustRunAfter testPython37, testPy37Gcp
-// TODO(BEAM-3713): Temporary pytest tasks that should eventually replace
-// nose-based test tasks.
-toxTask "testPy37GcpPytest", "py37-gcp-pytest"
-toxTask "testPython37Pytest", "py37-pytest"
-toxTask "testPy37CythonPytest", "py37-cython-pytest"
-// Ensure that cython tests run exclusively to other tests.
-testPy37CythonPytest.mustRunAfter testPython37Pytest, testPy37GcpPytest
-
task preCommitPy37() {
dependsOn "testPy37Gcp"
dependsOn "testPy37Cython"
}
-
-task preCommitPy37Pytest {
- dependsOn "testPy37GcpPytest"
- dependsOn "testPy37CythonPytest"
-}
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index d065c8b..c78bddf 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -17,7 +17,7 @@
[tox]
# new environments will be excluded by default unless explicitly added to envlist.
-envlist = py27,py35,py36,py37,py27-{gcp,cython,lint,lint3},py35-{gcp,cython},py36-{gcp,cython},py37-{gcp,cython,lint},docs
+envlist = py27,py35,py36,py37,py27-{gcp,cython,lint,lint3},py35-{gcp,cython},py36-{gcp,cython},py37-{gcp,cython,lint,mypy},docs
toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox}
[pycodestyle]
@@ -35,7 +35,11 @@
time
deps =
cython: cython==0.28.1
- future==0.16.0
+ -r build-requirements.txt
+setenv =
+ RUN_SKIPPED_PY3_TESTS=0
+ # Use an isolated tmp dir for tests that get slowed down by scanning /tmp.
+ TMPDIR={envtmpdir}
# These 2 magic command overrides are required for Jenkins builds.
# Otherwise we get "OSError: [Errno 2] No such file or directory" errors.
@@ -63,43 +67,31 @@
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py35]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
python setup.py nosetests --ignore-files '.*py3[6-9]\.py$' {posargs}
[testenv:py35-pytest]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py36]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
python setup.py nosetests --ignore-files '.*py3[7-9]\.py$' {posargs}
[testenv:py36-pytest]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py37]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
python setup.py nosetests --ignore-files '.*py3[8-9]\.py$' {posargs}
[testenv:py37-pytest]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
@@ -121,6 +113,9 @@
# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes
platform = linux2
commands =
+ # TODO(#10038): Remove this build_ext invocation once local source no longer
+ # shadows the installed apache_beam.
+ python setup.py build_ext --inplace
python apache_beam/examples/complete/autocomplete_test.py
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
@@ -130,8 +125,6 @@
# `platform = linux2|darwin|...`
# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes
platform = linux
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
python setup.py nosetests --ignore-files '.*py3[5-9]\.py$' {posargs}
@@ -142,9 +135,10 @@
# `platform = linux2|darwin|...`
# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes
platform = linux
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
+ # TODO(BEAM-8954): Remove this build_ext invocation once local source no longer
+ # shadows the installed apache_beam.
+ python setup.py build_ext --inplace
python apache_beam/examples/complete/autocomplete_test.py
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
@@ -154,8 +148,6 @@
# `platform = linux2|darwin|...`
# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes
platform = linux
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
python setup.py nosetests --ignore-files '.*py3[7-9]\.py$' {posargs}
@@ -166,9 +158,10 @@
# `platform = linux2|darwin|...`
# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes
platform = linux
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
+ # TODO(BEAM-8954): Remove this build_ext invocation once local source no longer
+ # shadows the installed apache_beam.
+ python setup.py build_ext --inplace
python apache_beam/examples/complete/autocomplete_test.py
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
@@ -178,8 +171,6 @@
# `platform = linux2|darwin|...`
# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes
platform = linux
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
python apache_beam/examples/complete/autocomplete_test.py
python setup.py nosetests --ignore-files '.*py3[8-9]\.py$' {posargs}
@@ -190,9 +181,10 @@
# `platform = linux2|darwin|...`
# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes
platform = linux
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
commands =
+ # TODO(BEAM-8954): Remove this build_ext invocation once local source no longer
+ # shadows the installed apache_beam.
+ python setup.py build_ext --inplace
python apache_beam/examples/complete/autocomplete_test.py
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
@@ -221,60 +213,56 @@
pytest -o junit_suite_name={envname}_v1new --junitxml=pytest_{envname}_v1new.xml apache_beam/io/gcp/datastore/v1new
[testenv:py35-gcp]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
extras = test,gcp
commands =
python setup.py nosetests --ignore-files '.*py3[6-9]\.py$' {posargs}
[testenv:py35-gcp-pytest]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
extras = test,gcp
commands =
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py36-gcp]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
-extras = test,gcp
+extras = test,gcp,interactive
commands =
python setup.py nosetests --ignore-files '.*py3[7-9]\.py$' {posargs}
[testenv:py36-gcp-pytest]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
-extras = test,gcp
+extras = test,gcp,interactive
commands =
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py37-gcp]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
-extras = test,gcp
+extras = test,gcp,interactive
commands =
python setup.py nosetests --ignore-files '.*py3[8-9]\.py$' {posargs}
[testenv:py37-gcp-pytest]
-setenv =
- RUN_SKIPPED_PY3_TESTS=0
-extras = test,gcp
+extras = test,gcp,interactive
commands =
{toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+[testenv:py37-aws]
+extras = test,aws
+commands =
+ python setup.py nosetests --ignore-files '.*py3[8-9]\.py$' {posargs}
+
[testenv:py27-lint]
# Checks for py2 syntax errors
deps =
+ -r build-requirements.txt
flake8==3.5.0
commands =
time {toxinidir}/scripts/run_mini_py2lint.sh
[testenv:py27-lint3]
# Checks for py2/3 compatibility issues
+# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint.
+setenv =
deps =
+ -r build-requirements.txt
pycodestyle==2.3.1
pylint==1.9.3
- future==0.16.0
isort==4.2.15
flake8==3.5.0
commands =
@@ -282,17 +270,31 @@
time {toxinidir}/scripts/run_pylint_2to3.sh
[testenv:py37-lint]
+# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint.
+setenv =
deps =
+ -r build-requirements.txt
astroid<2.4,>=2.3.0
pycodestyle==2.3.1
- pylint==2.4.2
- future==0.16.0
+ pylint==2.4.3
isort==4.2.15
flake8==3.5.0
commands =
pylint --version
time {toxinidir}/scripts/run_pylint.sh
+[testenv:py37-mypy]
+deps =
+ -r build-requirements.txt
+ mypy==0.730
+# make extras available in case any of these libs are typed
+extras =
+ gcp
+# TODO: enable c test failures
+commands =
+ mypy --version
+ - python setup.py mypy
+
[testenv:docs]
extras = test,gcp,docs,interactive
deps =
diff --git a/settings.gradle b/settings.gradle
index 55784ca..c7ca4db 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -54,6 +54,7 @@
include ":runners:portability:java"
include ":runners:spark"
include ":runners:spark:job-server"
+include ":runners:spark:job-server:container"
include ":runners:samza"
include ":runners:samza:job-server"
include ":sdks:go"
@@ -73,6 +74,7 @@
include ":sdks:java:extensions:sketching"
include ":sdks:java:extensions:sorter"
include ":sdks:java:extensions:sql"
+include ":sdks:java:extensions:sql:perf-tests"
include ":sdks:java:extensions:sql:jdbc"
include ":sdks:java:extensions:sql:shell"
include ":sdks:java:extensions:sql:hcatalog"
@@ -91,6 +93,7 @@
include ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-2"
include ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-5"
include ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-6"
+include ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-7"
include ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-common"
include ":sdks:java:io:file-based-io-tests"
include ':sdks:java:io:bigquery-io-perf-tests'
@@ -145,7 +148,7 @@
include ":sdks:python:test-suites:tox:py35"
include ":sdks:python:test-suites:tox:py36"
include ":sdks:python:test-suites:tox:py37"
-include ":vendor:grpc-1_21_0"
+include ":vendor:grpc-1_26_0"
include ":vendor:bytebuddy-1_9_3"
include ":vendor:calcite-1_20_0"
include ":vendor:guava-26_0-jre"
diff --git a/vendor/README.md b/vendor/README.md
new file mode 100644
index 0000000..c4b38cd
--- /dev/null
+++ b/vendor/README.md
@@ -0,0 +1,38 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Vendored Dependencies Release
+
+The upgrading of the vendored dependencies should be performed in two steps:
+- Firstly, we need to perform a formal release of the vendored dependency.
+ The [release process](http://s.apache.org/beam-release-vendored-artifacts) of the vendored dependency
+ is separate from the release of Apache Beam.
+- When the release of the vendored dependency is out, we can migrate Apache Beam to use the newly released
+ vendored dependency.
+
+# How to validate the vendored dependencies
+
+The [linkage tool](https://lists.apache.org/thread.html/eb5d95b9a33d7e32dc9bcd0f7d48ba8711d42bd7ed03b9cf0f1103f1%40%3Cdev.beam.apache.org%3E)
+is useful for the vendored dependency upgrades. It reports the linkage errors across multiple Apache Beam artifact ids.
+
+For example, when we upgrade the version of gRPC to 1.26.0 and the version of the vendored gRPC is 0.1-SNAPSHOT,
+we could run the linkage tool as following:
+```
+./gradlew -PvendoredDependenciesOnly -Ppublishing -PjavaLinkageArtifactIds=beam-vendor-grpc-1_26_0:0.1-SNAPSHOT :checkJavaLinkage
+```
diff --git a/vendor/grpc-1_21_0/build.gradle b/vendor/grpc-1_26_0/build.gradle
similarity index 72%
rename from vendor/grpc-1_21_0/build.gradle
rename to vendor/grpc-1_26_0/build.gradle
index 16ded33..85f68bb 100644
--- a/vendor/grpc-1_21_0/build.gradle
+++ b/vendor/grpc-1_26_0/build.gradle
@@ -16,21 +16,22 @@
* limitations under the License.
*/
-import org.apache.beam.gradle.GrpcVendoring
+import org.apache.beam.gradle.GrpcVendoring_1_26_0
plugins { id 'org.apache.beam.vendor-java' }
-description = "Apache Beam :: Vendored Dependencies :: gRPC :: 1.21.0"
+description = "Apache Beam :: Vendored Dependencies :: gRPC :: 1.26.0"
group = "org.apache.beam"
version = "0.1"
vendorJava(
- dependencies: GrpcVendoring.dependencies(),
- runtimeDependencies: GrpcVendoring.runtimeDependencies(),
- relocations: GrpcVendoring.relocations(),
- exclusions: GrpcVendoring.exclusions(),
- artifactId: "beam-vendor-grpc-1_21_0",
+ dependencies: GrpcVendoring_1_26_0.dependencies(),
+ runtimeDependencies: GrpcVendoring_1_26_0.runtimeDependencies(),
+ testDependencies: GrpcVendoring_1_26_0.testDependencies(),
+ relocations: GrpcVendoring_1_26_0.relocations(),
+ exclusions: GrpcVendoring_1_26_0.exclusions(),
+ artifactId: "beam-vendor-grpc-1_26_0",
groupId: group,
version: version,
)
diff --git a/website/_config.yml b/website/_config.yml
index 19eb6d1..ff65ef8 100644
--- a/website/_config.yml
+++ b/website/_config.yml
@@ -62,7 +62,7 @@
toc_levels: 2..6
# The most recent release of Beam.
-release_latest: 2.16.0
+release_latest: 2.17.0
# Plugins are configured in the Gemfile.
diff --git a/website/src/.htaccess b/website/src/.htaccess
index 20d4586..3f3eb2b 100644
--- a/website/src/.htaccess
+++ b/website/src/.htaccess
@@ -21,4 +21,4 @@
# The following redirect maintains the previously supported URLs.
RedirectMatch permanent "/documentation/sdks/(javadoc|pydoc)(.*)" "https://beam.apache.org/releases/$1$2"
# Keep this updated to point to the current release.
-RedirectMatch "/releases/([^/]+)/current(.*)" "https://beam.apache.org/releases/$1/2.16.0$2"
+RedirectMatch "/releases/([^/]+)/current(.*)" "https://beam.apache.org/releases/$1/2.17.0$2"
diff --git a/website/src/_data/authors.yml b/website/src/_data/authors.yml
index 6830fe1..a8e874d 100644
--- a/website/src/_data/authors.yml
+++ b/website/src/_data/authors.yml
@@ -81,6 +81,10 @@
name: Mark Liu
email: markliu@apache.org
twitter:
+ardagan:
+ name: Mikhail Gryzykhin
+ email: mikhail@apache.org
+ twitter:
robertwb:
name: Robert Bradshaw
email: robertwb@apache.org
diff --git a/website/src/_includes/flink_java_pipeline_options.html b/website/src/_includes/flink_java_pipeline_options.html
new file mode 100644
index 0000000..4495a33
--- /dev/null
+++ b/website/src/_includes/flink_java_pipeline_options.html
@@ -0,0 +1,145 @@
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<!--
+This is an auto-generated file.
+Use generatePipelineOptionsTableJava and generatePipelineOptionsTablePython respectively
+which should be called before running the tests.
+-->
+<table class="table table-bordered">
+<tr>
+ <td><code>allowNonRestoredState</code></td>
+ <td>Flag indicating whether non restored state is allowed if the savepoint contains state for an operator that is no longer part of the pipeline.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>autoBalanceWriteFilesShardingEnabled</code></td>
+ <td>Flag indicating whether auto-balance sharding for WriteFiles transform should be enabled. This might prove useful in streaming use-case, where pipeline needs to write quite many events into files, typically divided into N shards. Default behavior on Flink would be, that some workers will receive more shards to take care of than others. This cause workers to go out of balance in terms of processing backlog and memory usage. Enabling this feature will make shards to be spread evenly among available workers in improve throughput and memory usage stability.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>autoWatermarkInterval</code></td>
+ <td>The interval in milliseconds for automatic watermark emission.</td>
+ <td></td>
+</tr>
+<tr>
+ <td><code>checkpointTimeoutMillis</code></td>
+ <td>The maximum time in milliseconds that a checkpoint may take before being discarded.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>checkpointingInterval</code></td>
+ <td>The interval in milliseconds at which to trigger checkpoints of the running pipeline. Default: No checkpointing.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>checkpointingMode</code></td>
+ <td>The checkpointing mode that defines consistency guarantee.</td>
+ <td>Default: <code>EXACTLY_ONCE</code></td>
+</tr>
+<tr>
+ <td><code>disableMetrics</code></td>
+ <td>Disable Beam metrics in Flink Runner</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>executionModeForBatch</code></td>
+ <td>Flink mode for data exchange of batch pipelines. Reference {@link org.apache.flink.api.common.ExecutionMode}. Set this to BATCH_FORCED if pipelines get blocked, see https://issues.apache.org/jira/browse/FLINK-10672</td>
+ <td>Default: <code>PIPELINED</code></td>
+</tr>
+<tr>
+ <td><code>executionRetryDelay</code></td>
+ <td>Sets the delay in milliseconds between executions. A value of {@code -1} indicates that the default value should be used.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>externalizedCheckpointsEnabled</code></td>
+ <td>Enables or disables externalized checkpoints. Works in conjunction with CheckpointingInterval</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>failOnCheckpointingErrors</code></td>
+ <td>Sets the expected behaviour for tasks in case that they encounter an error in their checkpointing procedure. If this is set to true, the task will fail on checkpointing error. If this is set to false, the task will only decline a the checkpoint and continue running. </td>
+ <td>Default: <code>true</code></td>
+</tr>
+<tr>
+ <td><code>filesToStage</code></td>
+ <td>Jar-Files to send to all workers and put on the classpath. The default value is all files from the classpath.</td>
+ <td></td>
+</tr>
+<tr>
+ <td><code>flinkMaster</code></td>
+ <td>Address of the Flink Master where the Pipeline should be executed. Can either be of the form "host:port" or one of the special values [local], [collection] or [auto].</td>
+ <td>Default: <code>[auto]</code></td>
+</tr>
+<tr>
+ <td><code>latencyTrackingInterval</code></td>
+ <td>Interval in milliseconds for sending latency tracking marks from the sources to the sinks. Interval value <= 0 disables the feature.</td>
+ <td>Default: <code>0</code></td>
+</tr>
+<tr>
+ <td><code>maxBundleSize</code></td>
+ <td>The maximum number of elements in a bundle.</td>
+ <td>Default: <code>1000</code></td>
+</tr>
+<tr>
+ <td><code>maxBundleTimeMills</code></td>
+ <td>The maximum time to wait before finalising a bundle (in milliseconds).</td>
+ <td>Default: <code>1000</code></td>
+</tr>
+<tr>
+ <td><code>maxParallelism</code></td>
+ <td>The pipeline wide maximum degree of parallelism to be used. The maximum parallelism specifies the upper limit for dynamic scaling and the number of key groups used for partitioned state.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>minPauseBetweenCheckpoints</code></td>
+ <td>The minimal pause in milliseconds before the next checkpoint is triggered.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>numberOfExecutionRetries</code></td>
+ <td>Sets the number of times that failed tasks are re-executed. A value of zero effectively disables fault tolerance. A value of -1 indicates that the system default value (as defined in the configuration) should be used.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>objectReuse</code></td>
+ <td>Sets the behavior of reusing objects.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>parallelism</code></td>
+ <td>The degree of parallelism to be used when distributing operations onto workers. If the parallelism is not set, the configured Flink default is used, or 1 if none can be found.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>retainExternalizedCheckpointsOnCancellation</code></td>
+ <td>Sets the behavior of externalized checkpoints on cancellation.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>savepointPath</code></td>
+ <td>Savepoint restore path. If specified, restores the streaming pipeline from the provided path.</td>
+ <td></td>
+</tr>
+<tr>
+ <td><code>shutdownSourcesOnFinalWatermark</code></td>
+ <td>If set, shutdown sources when their watermark reaches +Inf.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>stateBackendFactory</code></td>
+ <td>Sets the state backend factory to use in streaming mode. Defaults to the flink cluster's state.backend configuration.</td>
+ <td></td>
+</tr>
+</table>
diff --git a/website/src/_includes/flink_python_pipeline_options.html b/website/src/_includes/flink_python_pipeline_options.html
new file mode 100644
index 0000000..b57c433
--- /dev/null
+++ b/website/src/_includes/flink_python_pipeline_options.html
@@ -0,0 +1,145 @@
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<!--
+This is an auto-generated file.
+Use generatePipelineOptionsTableJava and generatePipelineOptionsTablePython respectively
+which should be called before running the tests.
+-->
+<table class="table table-bordered">
+<tr>
+ <td><code>allow_non_restored_state</code></td>
+ <td>Flag indicating whether non restored state is allowed if the savepoint contains state for an operator that is no longer part of the pipeline.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>auto_balance_write_files_sharding_enabled</code></td>
+ <td>Flag indicating whether auto-balance sharding for WriteFiles transform should be enabled. This might prove useful in streaming use-case, where pipeline needs to write quite many events into files, typically divided into N shards. Default behavior on Flink would be, that some workers will receive more shards to take care of than others. This cause workers to go out of balance in terms of processing backlog and memory usage. Enabling this feature will make shards to be spread evenly among available workers in improve throughput and memory usage stability.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>auto_watermark_interval</code></td>
+ <td>The interval in milliseconds for automatic watermark emission.</td>
+ <td></td>
+</tr>
+<tr>
+ <td><code>checkpoint_timeout_millis</code></td>
+ <td>The maximum time in milliseconds that a checkpoint may take before being discarded.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>checkpointing_interval</code></td>
+ <td>The interval in milliseconds at which to trigger checkpoints of the running pipeline. Default: No checkpointing.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>checkpointing_mode</code></td>
+ <td>The checkpointing mode that defines consistency guarantee.</td>
+ <td>Default: <code>EXACTLY_ONCE</code></td>
+</tr>
+<tr>
+ <td><code>disable_metrics</code></td>
+ <td>Disable Beam metrics in Flink Runner</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>execution_mode_for_batch</code></td>
+ <td>Flink mode for data exchange of batch pipelines. Reference {@link org.apache.flink.api.common.ExecutionMode}. Set this to BATCH_FORCED if pipelines get blocked, see https://issues.apache.org/jira/browse/FLINK-10672</td>
+ <td>Default: <code>PIPELINED</code></td>
+</tr>
+<tr>
+ <td><code>execution_retry_delay</code></td>
+ <td>Sets the delay in milliseconds between executions. A value of {@code -1} indicates that the default value should be used.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>externalized_checkpoints_enabled</code></td>
+ <td>Enables or disables externalized checkpoints. Works in conjunction with CheckpointingInterval</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>fail_on_checkpointing_errors</code></td>
+ <td>Sets the expected behaviour for tasks in case that they encounter an error in their checkpointing procedure. If this is set to true, the task will fail on checkpointing error. If this is set to false, the task will only decline a the checkpoint and continue running. </td>
+ <td>Default: <code>true</code></td>
+</tr>
+<tr>
+ <td><code>files_to_stage</code></td>
+ <td>Jar-Files to send to all workers and put on the classpath. The default value is all files from the classpath.</td>
+ <td></td>
+</tr>
+<tr>
+ <td><code>flink_master</code></td>
+ <td>Address of the Flink Master where the Pipeline should be executed. Can either be of the form "host:port" or one of the special values [local], [collection] or [auto].</td>
+ <td>Default: <code>[auto]</code></td>
+</tr>
+<tr>
+ <td><code>latency_tracking_interval</code></td>
+ <td>Interval in milliseconds for sending latency tracking marks from the sources to the sinks. Interval value <= 0 disables the feature.</td>
+ <td>Default: <code>0</code></td>
+</tr>
+<tr>
+ <td><code>max_bundle_size</code></td>
+ <td>The maximum number of elements in a bundle.</td>
+ <td>Default: <code>1000</code></td>
+</tr>
+<tr>
+ <td><code>max_bundle_time_mills</code></td>
+ <td>The maximum time to wait before finalising a bundle (in milliseconds).</td>
+ <td>Default: <code>1000</code></td>
+</tr>
+<tr>
+ <td><code>max_parallelism</code></td>
+ <td>The pipeline wide maximum degree of parallelism to be used. The maximum parallelism specifies the upper limit for dynamic scaling and the number of key groups used for partitioned state.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>min_pause_between_checkpoints</code></td>
+ <td>The minimal pause in milliseconds before the next checkpoint is triggered.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>number_of_execution_retries</code></td>
+ <td>Sets the number of times that failed tasks are re-executed. A value of zero effectively disables fault tolerance. A value of -1 indicates that the system default value (as defined in the configuration) should be used.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>object_reuse</code></td>
+ <td>Sets the behavior of reusing objects.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>parallelism</code></td>
+ <td>The degree of parallelism to be used when distributing operations onto workers. If the parallelism is not set, the configured Flink default is used, or 1 if none can be found.</td>
+ <td>Default: <code>-1</code></td>
+</tr>
+<tr>
+ <td><code>retain_externalized_checkpoints_on_cancellation</code></td>
+ <td>Sets the behavior of externalized checkpoints on cancellation.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>savepoint_path</code></td>
+ <td>Savepoint restore path. If specified, restores the streaming pipeline from the provided path.</td>
+ <td></td>
+</tr>
+<tr>
+ <td><code>shutdown_sources_on_final_watermark</code></td>
+ <td>If set, shutdown sources when their watermark reaches +Inf.</td>
+ <td>Default: <code>false</code></td>
+</tr>
+<tr>
+ <td><code>state_backend_factory</code></td>
+ <td>Sets the state backend factory to use in streaming mode. Defaults to the flink cluster's state.backend configuration.</td>
+ <td></td>
+</tr>
+</table>
diff --git a/website/src/_includes/section-menu/documentation.html b/website/src/_includes/section-menu/documentation.html
index ed776ea..f8753b6 100644
--- a/website/src/_includes/section-menu/documentation.html
+++ b/website/src/_includes/section-menu/documentation.html
@@ -260,7 +260,8 @@
<ul class="section-nav-list">
<li><a href="{{ site.baseurl }}/documentation/runtime/model/">Execution model</a></li>
- <li><a href="{{ site.baseurl }}/documentation/runtime/environments/">Runtime environments</a></li>
+ <li><a href="{{ site.baseurl }}/documentation/runtime/environments/">Container environments</a></li>
+ <li><a href="{{ site.baseurl }}/documentation/runtime/sdk-harness-config/">SDK Harness Configuration</a></li>
</ul>
</li>
diff --git a/website/src/_posts/2020-01-06-beam-2.17.0.md b/website/src/_posts/2020-01-06-beam-2.17.0.md
new file mode 100644
index 0000000..c5c4716
--- /dev/null
+++ b/website/src/_posts/2020-01-06-beam-2.17.0.md
@@ -0,0 +1,97 @@
+---
+layout: post
+title: "Apache Beam 2.17.0"
+date: 2020-01-06 00:00:01 -0800
+# Date above corrected but keep the old URL:
+permalink: /blog/2020/01/06/beam-2.17.0.html
+excerpt_separator: <!--more-->
+categories: blog
+authors:
+ - ardagan
+
+---
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+We are happy to present the new 2.17.0 release of Beam. This release includes both improvements and new functionality.
+See the [download page]({{ site.baseurl }}/get-started/downloads/#2170-2020-01-06) for this release.<!--more-->
+For more information on changes in 2.17.0, check out the
+[detailed release notes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?version=12345970&projectId=12319527).
+
+## Highlights
+* [BEAM-7962](https://issues.apache.org/jira/browse/BEAM-7962) - Drop support for Flink 1.5 and 1.6
+* [BEAM-7635](https://issues.apache.org/jira/browse/BEAM-7635) - Migrate SnsIO to AWS SDK for Java 2
+* Improved usability for portable Flink Runner
+ * [BEAM-8183](https://issues.apache.org/jira/browse/BEAM-8183) - Optionally bundle multiple pipelines into a single Flink jar.
+ * [BEAM-8372](https://issues.apache.org/jira/browse/BEAM-8372) - Allow submission of Flink UberJar directly to flink cluster.
+ * [BEAM-8471](https://issues.apache.org/jira/browse/BEAM-8471) - Flink native job submission for portable pipelines.
+ * [BEAM-8312](https://issues.apache.org/jira/browse/BEAM-8312) - Flink portable pipeline jars do not need to stage artifacts remotely.
+
+### New Features / Improvements
+* [BEAM-7730](https://issues.apache.org/jira/browse/BEAM-7730) - Add Flink 1.9 build target and Make FlinkRunner compatible with Flink 1.9.
+* [BEAM-7990](https://issues.apache.org/jira/browse/BEAM-7990) - Add ability to read parquet files into PCollection of pyarrow.Table.
+* [BEAM-8355](https://issues.apache.org/jira/browse/BEAM-8355) - Make BooleanCoder a standard coder.
+* [BEAM-8394](https://issues.apache.org/jira/browse/BEAM-8394) - Add withDataSourceConfiguration() method in JdbcIO.ReadRows class.
+* [BEAM-5428](https://issues.apache.org/jira/browse/BEAM-5428) - Implement cross-bundle state caching.
+* [BEAM-5967](https://issues.apache.org/jira/browse/BEAM-5967) - Add handling of DynamicMessage in ProtoCoder.
+* [BEAM-7473](https://issues.apache.org/jira/browse/BEAM-7473) - Update RestrictionTracker within Python to not be required to be thread safe.
+* [BEAM-7920](https://issues.apache.org/jira/browse/BEAM-7920) - Added AvroTableProvider to Beam SQL.
+* [BEAM-8098](https://issues.apache.org/jira/browse/BEAM-8098) - Improve documentation on BigQueryIO.
+* [BEAM-8100](https://issues.apache.org/jira/browse/BEAM-8100) - Add exception handling to Json transforms in Java SDK.
+* [BEAM-8306](https://issues.apache.org/jira/browse/BEAM-8306) - Improve estimation of data byte size reading from source in ElasticsearchIO.
+* [BEAM-8351](https://issues.apache.org/jira/browse/BEAM-8351) - Support passing in arbitrary KV pairs to sdk worker via external environment config.
+* [BEAM-8396](https://issues.apache.org/jira/browse/BEAM-8396) - Default to LOOPBACK mode for local flink (spark, ...) runner.
+* [BEAM-8410](https://issues.apache.org/jira/browse/BEAM-8410) - JdbcIO should support setConnectionInitSqls in its DataSource.
+* [BEAM-8609](https://issues.apache.org/jira/browse/BEAM-8609) - Add HllCount to Java transform catalog.
+* [BEAM-8861](https://issues.apache.org/jira/browse/BEAM-8861) - Disallow self-signed certificates by default in ElasticsearchIO.
+
+### Dependency Changes
+* [BEAM-8285](https://issues.apache.org/jira/browse/BEAM-8285) - Upgrade ZetaSQL to 2019.09.1.
+* [BEAM-8392](https://issues.apache.org/jira/browse/BEAM-8392) - Upgrade pyarrow version bounds: 0.15.1<= to <0.16.0.
+* [BEAM-5895](https://issues.apache.org/jira/browse/BEAM-5895) - Upgrade com.rabbitmq:amqp-client to 5.7.3.
+* [BEAM-6896](https://issues.apache.org/jira/browse/BEAM-6896) - Upgrade PyYAML version bounds: 3.12<= to <6.0.0.
+
+
+### Bugfixes
+* [BEAM-8819] - AvroCoder for SpecificRecords is not serialized correctly since 2.13.0
+* Various bug fixes and performance improvements.
+
+### Known Issues
+
+* [BEAM-8989](https://issues.apache.org/jira/browse/BEAM-8989) Apache Nemo
+ runner broken due to backwards incompatible change since 2.16.0.
+
+## List of Contributors
+
+ According to git shortlog, the following people contributed to the 2.17.0 release. Thank you to all contributors!
+
+Ahmet Altay, Alan Myrvold, Alexey Romanenko, Andre-Philippe Paquet, Andrew
+Pilloud, angulartist, Ankit Jhalaria, Ankur Goenka, Anton Kedin, Aryan Naraghi,
+Aurélien Geron, B M VISHWAS, Bartok Jozsef, Boyuan Zhang, Brian Hulette, Cerny
+Ondrej, Chad Dombrova, Chamikara Jayalath, ChethanU, cmach, Colm O hEigeartaigh,
+Cyrus Maden, Daniel Oliveira, Daniel Robert, Dante, David Cavazos, David
+Moravek, David Yan, Enrico Canzonieri, Etienne Chauchot, gxercavins, Hai Lu,
+Hannah Jiang, Ian Lance Taylor, Ismaël Mejía, Israel Herraiz, James Wen, Jan
+Lukavský, Jean-Baptiste Onofré, Jeff Klukas, jesusrv1103, Jofre, Kai Jiang,
+Kamil Wasilewski, Kasia Kucharczyk, Kenneth Knowles, Kirill Kozlov,
+kirillkozlov, Kohki YAMAGIWA, Kyle Weaver, Leonardo Alves Miguel, lloigor,
+lostluck, Luis Enrique Ortíz Ramirez, Luke Cwik, Mark Liu, Maximilian Michels,
+Michal Walenia, Mikhail Gryzykhin, mrociorg, Nicolas Delsaux, Ning Kang, NING
+KANG, Pablo Estrada, pabloem, Piotr Szczepanik, rahul8383, Rakesh Kumar, Renat
+Nasyrov, Reuven Lax, Robert Bradshaw, Robert Burke, Rui Wang, Ruslan Altynnikov,
+Ryan Skraba, Salman Raza, Saul Chavez, Sebastian Jambor, sunjincheng121, Tatu
+Saloranta, tchiarato, Thomas Weise, Tomo Suzuki, Tudor Marian, tvalentyn, Udi
+Meiri, Valentyn Tymofieiev, Viola Lyu, Vishwas, Yichi Zhang, Yifan Zou, Yueyang
+Qiu, Łukasz Gajowy
+
diff --git a/website/src/contribute/release-guide.md b/website/src/contribute/release-guide.md
index 04039df..40dd143 100644
--- a/website/src/contribute/release-guide.md
+++ b/website/src/contribute/release-guide.md
@@ -529,6 +529,18 @@
Check if there are outstanding cherry-picks into the release branch, [e.g. for `2.14.0`](https://github.com/apache/beam/pulls?utf8=%E2%9C%93&q=is%3Apr+base%3Arelease-2.14.0).
Make sure they have blocker JIRAs attached and are OK to get into the release by checking with community if needed.
+As the Release Manager you are empowered to accept or reject cherry-picks to the release branch. You are encouraged to ask the following questions to be answered on each cherry-pick PR and you can choose to reject cherry-pick requests if these questions are not satisfactorily answered:
+
+* Is this a regression from a previous release? (If no, fix could go to a newer version.)
+* Is this a new feature or related to a new feature? (If yes, fix could go to a new version.)
+* Would this impact production workloads for users? (E.g. if this is a direct runner only fix it may not need to be a cherry pick.)
+* What percentage of users would be impacted by this issue if it is not fixed? (E.g. If this is predicted to be a small number it may not need to be a cherry pick.)
+* Would it be possible for the impacted users to skip this version? (If users could skip this version, fix could go to a newer version.)
+
+It is important to accept major/blocking fixes to isolated issues to make a higher quality release. However, beyond that each cherry pick will increase the time required for the release and add more last minute code to the release branch. Neither late releases nor not fully tested code will provide positive user value.
+
+_Tip_: Another tool in your toolbox is the known issues section of the release blog. Consider adding known issues there for minor issues instead of accepting cherry picks to the release branch.
+
**********
@@ -855,6 +867,7 @@
* {$KNOWN_ISSUE_1}
* {$KNOWN_ISSUE_2}
+ * See a full list of open [issues that affects](https://issues.apache.org/jira/browse/BEAM-8989?jql=project = BEAM AND affectedVersion = 2.16.0 ORDER BY priority DESC, updated DESC) this version.
## List of Contributors
@@ -1246,9 +1259,7 @@
Make sure the download address for last release version is upldaed, [example PR](https://github.com/apache/beam-site/pull/478).
### Deploy SDK docker images to DockerHub
-TODO(hannahjiang): change link to master branch after #9560 is merged.
-
-* Script: [publish_docker_images.sh](https://github.com/Hannah-Jiang/beam/blob/release_script_for_containers/release/src/main/scripts/publish_docker_images.sh)
+* Script: [publish_docker_images.sh](https://github.com/apache/beam/blob/master/release/src/main/scripts/publish_docker_images.sh)
* Usage
```
./beam/release/src/main/scripts/publish_docker_images.sh
diff --git a/website/src/documentation/pipelines/design-your-pipeline.md b/website/src/documentation/pipelines/design-your-pipeline.md
index c04df7f..5c4c28e 100644
--- a/website/src/documentation/pipelines/design-your-pipeline.md
+++ b/website/src/documentation/pipelines/design-your-pipeline.md
@@ -37,12 +37,11 @@
## A basic pipeline
-The simplest pipelines represent a linear flow of operations, as shown in figure
-1.
+The simplest pipelines represent a linear flow of operations, as shown in figure 1.

+ {{ "/images/design-your-pipeline-linear.svg" | prepend: site.baseurl }})
*Figure 1: A linear pipeline.*
@@ -60,7 +59,7 @@

+ {{ "/images/design-your-pipeline-multiple-pcollections.svg" | prepend: site.baseurl }})
*Figure 2: A branching pipeline. Two transforms are applied to a single
PCollection of database table rows.*
@@ -96,7 +95,7 @@
Figure 3 illustrates the same example described above, but with one transform that produces multiple outputs. Names that start with 'A' are added to the main output `PCollection`, and names that start with 'B' are added to an additional output `PCollection`.

+ {{ "/images/design-your-pipeline-additional-outputs.svg" | prepend: site.baseurl }})
*Figure 3: A pipeline with a transform that outputs multiple PCollections.*
@@ -172,10 +171,9 @@
merged both contain the same type.

+ {{ "/images/design-your-pipeline-flatten.svg" | prepend: site.baseurl }})
-*Figure 4: A pipeline that merges two collections into one collection with the Flatten
-transform.*
+*Figure 4: A pipeline that merges two collections into one collection with the Flatten transform.*
The following example code applies `Flatten` to merge two collections.
@@ -194,7 +192,7 @@
Your pipeline can read its input from one or more sources. If your pipeline reads from multiple sources and the data from those sources is related, it can be useful to join the inputs together. In the example illustrated in figure 5 below, the pipeline reads names and addresses from a database table, and names and order numbers from a Kafka topic. The pipeline then uses `CoGroupByKey` to join this information, where the key is the name; the resulting `PCollection` contains all the combinations of names, addresses, and orders.

+ {{ "/images/design-your-pipeline-join.svg" | prepend: site.baseurl }})
*Figure 5: A pipeline that does a relational join of two input collections.*
diff --git a/website/src/documentation/programming-guide.md b/website/src/documentation/programming-guide.md
index 9d644ae..510d7cd 100644
--- a/website/src/documentation/programming-guide.md
+++ b/website/src/documentation/programming-guide.md
@@ -497,9 +497,7 @@
SDKs).
How you apply your pipeline's transforms determines the structure of your
-pipeline. The best way to think of your pipeline is as a directed acyclic graph,
-where the nodes are `PCollection`s and the edges are transforms. For example,
-you can chain transforms to create a sequential pipeline, like this one:
+pipeline. The best way to think of your pipeline is as a directed acyclic graph, where `PTransform` nodes are subroutines that accept `PCollection` nodes as inputs and emit `PCollection` nodes as outputs. For example, you can chain together transforms to create a pipeline that successively modifies input data:
```java
[Final Output PCollection] = [Initial Input PCollection].apply([First Transform])
@@ -512,13 +510,13 @@
| [Third Transform])
```
-The resulting workflow graph of the above pipeline looks like this.
+The graph of this pipeline looks like the following:

+ {{ "/images/design-your-pipeline-linear.svg" | prepend: site.baseurl }})
-*Figure: A linear pipeline with three sequential transforms.*
+*Figure 1: A linear pipeline with three sequential transforms.*
However, note that a transform *does not consume or otherwise alter* the input
collection--remember that a `PCollection` is immutable by definition. This means
@@ -536,13 +534,13 @@
[PCollection of 'B' names] = [PCollection of database table rows] | [Transform B]
```
-The resulting workflow graph from the branching pipeline above looks like this.
+The graph of this branching pipeline looks like the following:

+ {{ "/images/design-your-pipeline-multiple-pcollections.svg" | prepend: site.baseurl }})
-*Figure: A branching pipeline. Two transforms are applied to a single
+*Figure 2: A branching pipeline. Two transforms are applied to a single
PCollection of database table rows.*
You can also build your own [composite transforms](#composite-transforms) that
@@ -2315,9 +2313,9 @@
window and key. This can have different effects on your pipeline. Consider the
example pipeline in the figure below:
-
+
-**Figure:** Pipeline applying windowing
+**Figure 3:** Pipeline applying windowing
In the above pipeline, we create an unbounded `PCollection` by reading a set of
key/value pairs using `KafkaIO`, and then apply a windowing function to that
@@ -2345,9 +2343,9 @@
To illustrate how windowing with a bounded `PCollection` can affect how your
pipeline processes data, consider the following pipeline:
-
+
-**Figure:** `GroupByKey` and `ParDo` without windowing, on a bounded collection.
+**Figure 4:** `GroupByKey` and `ParDo` without windowing, on a bounded collection.
In the above pipeline, we create a bounded `PCollection` by reading a set of
key/value pairs using `TextIO`. We then group the collection using `GroupByKey`,
@@ -2360,9 +2358,9 @@
Now, consider the same pipeline, but using a windowing function:
-
+
-**Figure:** `GroupByKey` and `ParDo` with windowing, on a bounded collection.
+**Figure 5:** `GroupByKey` and `ParDo` with windowing, on a bounded collection.
As before, the pipeline creates a bounded `PCollection` of key/value pairs. We
then set a [windowing function](#setting-your-pcollections-windowing-function)
@@ -2406,7 +2404,7 @@

-**Figure:** Fixed time windows, 30s in duration.
+**Figure 6:** Fixed time windows, 30s in duration.
#### 7.2.2. Sliding time windows {#sliding-time-windows}
@@ -2425,7 +2423,7 @@

-**Figure:** Sliding time windows, with 1 minute window duration and 30s window
+**Figure 7:** Sliding time windows, with 1 minute window duration and 30s window
period.
#### 7.2.3. Session windows {#session-windows}
@@ -2440,7 +2438,7 @@

-**Figure:** Session windows, with a minimum gap duration. Note how each data key
+**Figure 8:** Session windows, with a minimum gap duration. Note how each data key
has different windows, according to its data distribution.
#### 7.2.4. The single global window {#single-global-window}
diff --git a/website/src/documentation/runners/flink.md b/website/src/documentation/runners/flink.md
index a7ab744..6e7307f 100644
--- a/website/src/documentation/runners/flink.md
+++ b/website/src/documentation/runners/flink.md
@@ -336,7 +336,7 @@
"--flink_master=localhost:8081",
"--environment_type=LOOPBACK"
])
-with beam.Pipeline(options) as p:
+with beam.Pipeline(options=options) as p:
...
```
@@ -350,7 +350,7 @@
### Streaming Execution
-If your pipeline uses an unbounded data source or sink, the Flink Runner will automatically switch to streaming mode. You can enforce streaming mode by using the `streaming` setting mentioned below.
+If your pipeline uses an unbounded data source or sink, the Flink Runner will automatically switch to streaming mode. You can enforce streaming mode by using the `--streaming` flag.
Note: The Runner will print a warning message when unbounded sources are used and checkpointing is not enabled.
Many sources like `PubSubIO` rely on their checkpoints to be acknowledged which can only be done when checkpointing is enabled for the `FlinkRunner`. To enable checkpointing, please set <span class="language-java">`checkpointingInterval`</span><span class="language-py">`checkpointing_interval`</span> to the desired checkpointing interval in milliseconds.
@@ -359,284 +359,23 @@
When executing your pipeline with the Flink Runner, you can set these pipeline options.
-See the reference documentation for the<span class="language-java">
+The following list of Flink-specific pipeline options is generated automatically from the
[FlinkPipelineOptions](https://beam.apache.org/releases/javadoc/{{ site.release_latest }}/index.html?org/apache/beam/runners/flink/FlinkPipelineOptions.html)
-</span><span class="language-py">
-[PipelineOptions](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/options/pipeline_options.py)
-</span>interface (and its subinterfaces) for the complete list of pipeline configuration options.
-
+reference class:
<!-- Java Options -->
<div class="language-java">
-<table class="table table-bordered">
-<tr>
- <th>Field</th>
- <th>Description</th>
- <th>Default Value</th>
-</tr>
-<tr>
- <td><code>runner</code></td>
- <td>The pipeline runner to use. This option allows you to determine the pipeline runner at runtime.</td>
- <td>Set to <code>FlinkRunner</code> to run using Flink.</td>
-</tr>
-<tr>
- <td><code>streaming</code></td>
- <td>Whether streaming mode is enabled or disabled; <code>true</code> if enabled. Set to <code>true</code> if running pipelines with unbounded <code>PCollection</code>s.</td>
- <td><code>false</code></td>
-</tr>
-<tr>
- <td><code>flinkMaster</code></td>
- <td>The url of the Flink JobManager on which to execute pipelines. This can either be the address of a cluster JobManager, in the form <code>"host:port"</code> or one of the special Strings <code>"[local]"</code> or <code>"[auto]"</code>. <code>"[local]"</code> will start a local Flink Cluster in the JVM while <code>"[auto]"</code> will let the system decide where to execute the pipeline based on the environment.</td>
- <td><code>[auto]</code></td>
-</tr>
-<tr>
- <td><code>filesToStage</code></td>
- <td>Jar Files to send to all workers and put on the classpath. Here you have to put the fat jar that contains your program along with all dependencies.</td>
- <td>empty</td>
-</tr>
-<tr>
- <td><code>parallelism</code></td>
- <td>The degree of parallelism to be used when distributing operations onto workers.</td>
- <td>For local execution: <code>Number of available CPU cores</code>
- For remote execution: <code>Default parallelism configuerd at remote cluster</code>
- Otherwise: <code>1</code>
- </td>
-</tr>
-<tr>
- <td><code>maxParallelism</code></td>
- <td>The pipeline wide maximum degree of parallelism to be used. The maximum parallelism specifies the upper limit for dynamic scaling and the number of key groups used for partitioned state.</td>
- <td><code>-1L</code>, meaning same as the parallelism</td>
-</tr>
-<tr>
- <td><code>checkpointingInterval</code></td>
- <td>The interval between consecutive checkpoints (i.e. snapshots of the current pipeline state used for fault tolerance).</td>
- <td><code>-1L</code>, i.e. disabled</td>
-</tr>
-<tr>
- <td><code>checkpointMode</code></td>
- <td>The checkpointing mode that defines consistency guarantee.</td>
- <td><code>EXACTLY_ONCE</code></td>
-</tr>
-<tr>
- <td><code>checkpointTimeoutMillis</code></td>
- <td>The maximum time in milliseconds that a checkpoint may take before being discarded</td>
- <td><code>-1</code>, the cluster default</td>
-</tr>
-<tr>
- <td><code>minPauseBetweenCheckpoints</code></td>
- <td>The minimal pause in milliseconds before the next checkpoint is triggered.</td>
- <td><code>-1</code>, the cluster default</td>
-</tr>
-<tr>
- <td><code>failOnCheckpointingErrors</code></td>
- <td>
- Sets the expected behaviour for tasks in case that they encounter an error in their
- checkpointing procedure. If this is set to true, the task will fail on checkpointing error.
- If this is set to false, the task will only decline a the checkpoint and continue running.
- </td>
- <td><code>-1</code>, the cluster default</td>
-</tr>
-<tr>
- <td><code>numberOfExecutionRetries</code></td>
- <td>Sets the number of times that failed tasks are re-executed. A value of <code>0</code> effectively disables fault tolerance. A value of <code>-1</code> indicates that the system default value (as defined in the configuration) should be used.</td>
- <td><code>-1</code></td>
-</tr>
-<tr>
- <td><code>executionRetryDelay</code></td>
- <td>Sets the delay between executions. A value of <code>-1</code> indicates that the default value should be used.</td>
- <td><code>-1</code></td>
-</tr>
-<tr>
- <td><code>objectReuse</code></td>
- <td>Sets the behavior of reusing objects.</td>
- <td><code>false</code>, no Object reuse</td>
-</tr>
-<tr>
- <td><code>stateBackend</code></td>
- <td>Sets the state backend to use in streaming mode. The default is to read this setting from the Flink config.</td>
- <td><code>empty</code>, i.e. read from Flink config</td>
-</tr>
-<tr>
- <td><code>enableMetrics</code></td>
- <td>Enable/disable Beam metrics in Flink Runner</td>
- <td>Default: <code>true</code></td>
-</tr>
-<tr>
- <td><code>externalizedCheckpointsEnabled</code></td>
- <td>Enables or disables externalized checkpoints. Works in conjunction with CheckpointingInterval</td>
- <td>Default: <code>false</code></td>
-</tr>
-<tr>
- <td><code>retainExternalizedCheckpointsOnCancellation</code></td>
- <td>Sets the behavior of externalized checkpoints on cancellation.</td>
- <td>Default: <code>false</code></td>
-</tr>
-<tr>
- <td><code>maxBundleSize</code></td>
- <td>The maximum number of elements in a bundle.</td>
- <td>Default: <code>1000</code></td>
-</tr>
-<tr>
- <td><code>maxBundleTimeMills</code></td>
- <td>The maximum time to wait before finalising a bundle (in milliseconds).</td>
- <td>Default: <code>1000</code></td>
-</tr>
-<tr>
- <td><code>shutdownSourcesOnFinalWatermark</code></td>
- <td>If set, shutdown sources when their watermark reaches +Inf.</td>
- <td>Default: <code>false</code></td>
-</tr>
-<tr>
- <td><code>latencyTrackingInterval</code></td>
- <td>Interval in milliseconds for sending latency tracking marks from the sources to the sinks. Interval value <= 0 disables the feature.</td>
- <td>Default: <code>0</code></td>
-</tr>
-<tr>
- <td><code>autoWatermarkInterval</code></td>
- <td>The interval in milliseconds for automatic watermark emission.</td>
-</tr>
-<tr>
- <td><code>executionModeForBatch</code></td>
- <td>Flink mode for data exchange of batch pipelines. Reference {@link org.apache.flink.api.common.ExecutionMode}. Set this to BATCH_FORCED if pipelines get blocked, see https://issues.apache.org/jira/browse/FLINK-10672</td>
- <td>Default: <code>PIPELINED</code></td>
-</tr>
-<tr>
- <td><code>savepointPath</code></td>
- <td>Savepoint restore path. If specified, restores the streaming pipeline from the provided path.</td>
- <td>Default: None</td>
-</tr>
-<tr>
- <td><code>allowNonRestoredState</code></td>
- <td>Flag indicating whether non restored state is allowed if the savepoint contains state for an operator that is no longer part of the pipeline.</td>
- <td>Default: <code>false</code></td>
-</tr>
-</table>
+{% include flink_java_pipeline_options.html %}
</div>
-
<!-- Python Options -->
<div class="language-py">
-<table class="table table-bordered">
-
-<tr>
- <td><code>files_to_stage</code></td>
- <td>Jar-Files to send to all workers and put on the classpath. The default value is all files from the classpath.</td>
-</tr>
-<tr>
- <td><code>flink_master</code></td>
- <td>Address of the Flink Master where the Pipeline should be executed. Can either be of the form "host:port" or one of the special values [local], [collection] or [auto].</td>
- <td>Default: <code>[auto]</code></td>
-</tr>
-<tr>
- <td><code>parallelism</code></td>
- <td>The degree of parallelism to be used when distributing operations onto workers. If the parallelism is not set, the configured Flink default is used, or 1 if none can be found.</td>
- <td>Default: <code>-1</code></td>
-</tr>
-<tr>
- <td><code>max_parallelism</code></td>
- <td>The pipeline wide maximum degree of parallelism to be used. The maximum parallelism specifies the upper limit for dynamic scaling and the number of key groups used for partitioned state.</td>
- <td>Default: <code>-1</code></td>
-</tr>
-<tr>
- <td><code>checkpointing_interval</code></td>
- <td>The interval in milliseconds at which to trigger checkpoints of the running pipeline. Default: No checkpointing.</td>
- <td>Default: <code>-1</code></td>
-</tr>
-<tr>
- <td><code>checkpointing_mode</code></td>
- <td>The checkpointing mode that defines consistency guarantee.</td>
- <td>Default: <code>EXACTLY_ONCE</code></td>
-</tr>
-<tr>
- <td><code>checkpoint_timeout_millis</code></td>
- <td>The maximum time in milliseconds that a checkpoint may take before being discarded.</td>
- <td>Default: <code>-1</code></td>
-</tr>
-<tr>
- <td><code>min_pause_between_checkpoints</code></td>
- <td>The minimal pause in milliseconds before the next checkpoint is triggered.</td>
- <td>Default: <code>-1</code></td>
-</tr>
-<tr>
- <td><code>fail_on_checkpointing_errors</code></td>
- <td>Sets the expected behaviour for tasks in case that they encounter an error in their checkpointing procedure. If this is set to true, the task will fail on checkpointing error. If this is set to false, the task will only decline a the checkpoint and continue running. </td>
- <td>Default: <code>true</code></td>
-</tr>
-<tr>
- <td><code>number_of_execution_retries</code></td>
- <td>Sets the number of times that failed tasks are re-executed. A value of zero effectively disables fault tolerance. A value of -1 indicates that the system default value (as defined in the configuration) should be used.</td>
- <td>Default: <code>-1</code></td>
-</tr>
-<tr>
- <td><code>execution_retry_delay</code></td>
- <td>Sets the delay in milliseconds between executions. A value of {@code -1} indicates that the default value should be used.</td>
- <td>Default: <code>-1</code></td>
-</tr>
-<tr>
- <td><code>object_reuse</code></td>
- <td>Sets the behavior of reusing objects.</td>
- <td>Default: <code>false</code></td>
-</tr>
-<tr>
- <td><code>state_backend</code></td>
- <td>Sets the state backend to use in streaming mode. Otherwise the default is read from the Flink config.</td>
-</tr>
-<tr>
- <td><code>enable_metrics</code></td>
- <td>Enable/disable Beam metrics in Flink Runner</td>
- <td>Default: <code>true</code></td>
-</tr>
-<tr>
- <td><code>externalized_checkpoints_enabled</code></td>
- <td>Enables or disables externalized checkpoints. Works in conjunction with CheckpointingInterval</td>
- <td>Default: <code>false</code></td>
-</tr>
-<tr>
- <td><code>retain_externalized_checkpoints_on_cancellation</code></td>
- <td>Sets the behavior of externalized checkpoints on cancellation.</td>
- <td>Default: <code>false</code></td>
-</tr>
-<tr>
- <td><code>max_bundle_size</code></td>
- <td>The maximum number of elements in a bundle.</td>
- <td>Default: <code>1000</code></td>
-</tr>
-<tr>
- <td><code>max_bundle_time_mills</code></td>
- <td>The maximum time to wait before finalising a bundle (in milliseconds).</td>
- <td>Default: <code>1000</code></td>
-</tr>
-<tr>
- <td><code>shutdown_sources_on_final_watermark</code></td>
- <td>If set, shutdown sources when their watermark reaches +Inf.</td>
- <td>Default: <code>false</code></td>
-</tr>
-<tr>
- <td><code>latency_tracking_interval</code></td>
- <td>Interval in milliseconds for sending latency tracking marks from the sources to the sinks. Interval value <= 0 disables the feature.</td>
- <td>Default: <code>0</code></td>
-</tr>
-<tr>
- <td><code>auto_watermark_interval</code></td>
- <td>The interval in milliseconds for automatic watermark emission.</td>
-</tr>
-<tr>
- <td><code>execution_mode_for_batch</code></td>
- <td>Flink mode for data exchange of batch pipelines. Reference {@link org.apache.flink.api.common.ExecutionMode}. Set this to BATCH_FORCED if pipelines get blocked, see https://issues.apache.org/jira/browse/FLINK-10672</td>
- <td>Default: <code>PIPELINED</code></td>
-</tr>
-<tr>
- <td><code>savepoint_path</code></td>
- <td>Savepoint restore path. If specified, restores the streaming pipeline from the provided path.</td>
-</tr>
-<tr>
- <td><code>allow_non_restored_state</code></td>
- <td>Flag indicating whether non restored state is allowed if the savepoint contains state for an operator that is no longer part of the pipeline.</td>
- <td>Default: <code>false</code></td>
-</tr>
-
-</table>
+{% include flink_python_pipeline_options.html %}
</div>
+For general Beam pipeline options see the
+[PipelineOptions](https://beam.apache.org/releases/javadoc/{{ site.release_latest }}/index.html?org/apache/beam/sdk/options/PipelineOptions.html)
+reference.
+
## Capability
The [Beam Capability Matrix]({{ site.baseurl
diff --git a/website/src/documentation/runtime/environments.md b/website/src/documentation/runtime/environments.md
index 4b29ac0..3507d7c 100644
--- a/website/src/documentation/runtime/environments.md
+++ b/website/src/documentation/runtime/environments.md
@@ -1,6 +1,6 @@
---
layout: section
-title: "Runtime environments"
+title: "Container environments"
section_menu: section-menu/documentation.html
permalink: /documentation/runtime/environments/
---
@@ -18,7 +18,7 @@
limitations under the License.
-->
-# Runtime environments
+# Container environments
The Beam SDK runtime environment is isolated from other runtime systems because the SDK runtime environment is [containerized](https://s.apache.org/beam-fn-api-container-contract) with [Docker](https://www.docker.com/). This means that any execution engine can run the Beam SDK.
diff --git a/website/src/documentation/runtime/sdk-harness-config.md b/website/src/documentation/runtime/sdk-harness-config.md
new file mode 100644
index 0000000..ebcd444
--- /dev/null
+++ b/website/src/documentation/runtime/sdk-harness-config.md
@@ -0,0 +1,57 @@
+---
+layout: section
+title: "SDK Harness Configuration"
+section_menu: section-menu/documentation.html
+permalink: /documentation/runtime/sdk-harness-config/
+---
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# SDK Harness Configuration
+
+Beam allows configuration of the [SDK harness]({{ site.baseurl }}/roadmap/portability/) to
+accommodate varying cluster setups.
+(The options below are for Python, but much of this information should apply to the Java and Go SDKs
+as well.)
+
+- `environment_type` determines where user code will be executed.
+ `environment_config` configures the environment depending on the value of `environment_type`.
+ - `DOCKER` (default): User code is executed within a container started on each worker node.
+ This requires docker to be installed on worker nodes.
+ - `environment_config`: URL for the Docker container image. Official Docker images
+ are available [here](https://hub.docker.com/u/apachebeam) and are used by default.
+ Alternatively, you can build your own image by following the instructions
+ [here]({{ site.baseurl }}/documentation/runtime/environments/).
+ - `PROCESS`: User code is executed by processes that are automatically started by the runner on
+ each worker node.
+ - `environment_config`: JSON of the form `{"os": "<OS>", "arch": "<ARCHITECTURE>",
+ "command": "<process to execute>", "env":{"<Environment variables 1>": "<ENV_VAL>"} }`. All
+ fields in the JSON are optional except `command`.
+ - For `command`, it is recommended to use the bootloader executable, which can be built from
+ source with `./gradlew :sdks:python:container:build` and copied from
+ `sdks/python/container/build/target/launcher/linux_amd64/boot` to worker machines.
+ Note that the Python bootloader assumes Python and the `apache_beam` module are installed
+ on each worker machine.
+ - `EXTERNAL`: User code will be dispatched to an external service. For example, one can start
+ an external service for Python workers by running
+ `docker run -p=50000:50000 apachebeam/python3.6_sdk --worker_pool`.
+ - `environment_config`: Address for the external service, e.g. `localhost:50000`.
+ - To access a Dockerized worker pool service from a Mac or Windows client, set the
+ `BEAM_WORKER_POOL_IN_DOCKER_VM` environment variable on the client:
+ `export BEAM_WORKER_POOL_IN_DOCKER_VM=1`.
+ - `LOOPBACK`: User code is executed within the same process that submitted the pipeline. This
+ option is useful for local testing. However, it is not suitable for a production environment,
+ as it performs work on the machine the job originated from.
+ - `environment_config` is not used for the `LOOPBACK` environment.
+- `sdk_worker_parallelism` sets the number of SDK workers that will run on each worker node.
\ No newline at end of file
diff --git a/website/src/documentation/sdks/python-dependencies.md b/website/src/documentation/sdks/python-dependencies.md
index 70da2bd..a983bd6 100644
--- a/website/src/documentation/sdks/python-dependencies.md
+++ b/website/src/documentation/sdks/python-dependencies.md
@@ -29,6 +29,46 @@
<p>To see the compile and runtime dependencies for your Beam SDK version, expand
the relevant section below.</p>
+<details><summary markdown="span"><b>2.17.0</b></summary>
+
+<p>Beam SDK for Python 2.17.0 has the following compile and runtime dependencies.</p>
+
+<table class="table-bordered table-striped">
+<tr><th>Package</th><th>Version</th></tr>
+ <tr><td>avro-python3</td><td>>=1.8.1,<2.0.0; python_version >= "3.0"</td></tr>
+ <tr><td>avro</td><td>>=1.8.1,<2.0.0; python_version < "3.0"</td></tr>
+ <tr><td>cachetools</td><td>>=3.1.0,<4</td></tr>
+ <tr><td>crcmod</td><td>>=1.7,<2.0</td></tr>
+ <tr><td>dill</td><td>>=0.3.0,<0.3.1</td></tr>
+ <tr><td>fastavro</td><td>>=0.21.4,<0.22</td></tr>
+ <tr><td>funcsigs</td><td>>=1.0.2,<2; python_version < "3.0"</td></tr>
+ <tr><td>future</td><td>>=0.16.0,<1.0.0</td></tr>
+ <tr><td>futures</td><td>>=3.2.0,<4.0.0; python_version < "3.0"</td></tr>
+ <tr><td>google-apitools</td><td>>=0.5.28,<0.5.29</td></tr>
+ <tr><td>google-cloud-bigquery</td><td>>=1.6.0,<1.18.0</td></tr>
+ <tr><td>google-cloud-bigtable</td><td>>=0.31.1,<1.1.0</td></tr>
+ <tr><td>google-cloud-core</td><td>>=0.28.1,<2</td></tr>
+ <tr><td>google-cloud-datastore</td><td>>=1.7.1,<1.8.0</td></tr>
+ <tr><td>google-cloud-pubsub</td><td>>=0.39.0,<1.1.0</td></tr>
+ <tr><td>googledatastore</td><td>>=7.0.1,<7.1; python_version < "3.0"</td></tr>
+ <tr><td>grpcio</td><td>>=1.12.1,<2</td></tr>
+ <tr><td>hdfs</td><td>>=2.1.0,<3.0.0</td></tr>
+ <tr><td>httplib2</td><td>>=0.8,<=0.12.0</td></tr>
+ <tr><td>mock</td><td>>=1.0.1,<3.0.0</td></tr>
+ <tr><td>oauth2client</td><td>>=2.0.1,<4</td></tr>
+ <tr><td>proto-google-cloud-datastore-v1</td><td>>=0.90.0,<=0.90.4; python_version < "3.0"</td></tr>
+ <tr><td>protobuf</td><td>>=3.5.0.post1,<4</td></tr>
+ <tr><td>pyarrow</td><td>>=0.15.1,<0.16.0; python_version >= "3.0" or platform_system != "Windows"</td></tr>
+ <tr><td>pydot</td><td>>=1.2.0,<2</td></tr>
+ <tr><td>pymongo</td><td>>=3.8.0,<4.0.0</td></tr>
+ <tr><td>python-dateutil</td><td>>=2.8.0,<3</td></tr>
+ <tr><td>pytz</td><td>>=2018.3</td></tr>
+ <tr><td>pyvcf</td><td>>=0.6.8,<0.7.0; python_version < "3.0"</td></tr>
+ <tr><td>typing</td><td>>=3.6.0,<3.7.0; python_version < "3.5.0"</td></tr>
+</table>
+
+</details>
+
<details><summary markdown="span"><b>2.16.0</b></summary>
<p>Beam SDK for Python 2.16.0 has the following compile and
@@ -443,4 +483,3 @@
</table>
</details>
-
diff --git a/website/src/get-started/downloads.md b/website/src/get-started/downloads.md
index ab19e5b..fed6d71 100644
--- a/website/src/get-started/downloads.md
+++ b/website/src/get-started/downloads.md
@@ -90,14 +90,21 @@
## Releases
-## 2.16.0 (2019-10-07)
+### 2.17.0 (2020-01-06)
+Official [source code download](http://www.apache.org/dyn/closer.cgi/beam/2.17.0/apache-beam-2.17.0-source-release.zip).
+[SHA-512](https://www.apache.org/dist/beam/2.17.0/apache-beam-2.17.0-source-release.zip.sha512).
+[signature](https://www.apache.org/dist/beam/2.17.0/apache-beam-2.17.0-source-release.zip.asc).
+
+[Release notes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12319527&version=12345970).
+
+### 2.16.0 (2019-10-07)
Official [source code download](http://www.apache.org/dyn/closer.cgi/beam/2.16.0/apache-beam-2.16.0-source-release.zip).
[SHA-512](https://www.apache.org/dist/beam/2.16.0/apache-beam-2.16.0-source-release.zip.sha512).
[signature](https://www.apache.org/dist/beam/2.16.0/apache-beam-2.16.0-source-release.zip.asc).
[Release notes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12319527&version=12345494).
-## 2.15.0 (2019-08-22)
+### 2.15.0 (2019-08-22)
Official [source code download](http://www.apache.org/dyn/closer.cgi/beam/2.15.0/apache-beam-2.15.0-source-release.zip).
[SHA-512](https://www.apache.org/dist/beam/2.15.0/apache-beam-2.15.0-source-release.zip.sha512).
[signature](https://www.apache.org/dist/beam/2.15.0/apache-beam-2.15.0-source-release.zip.asc).
diff --git a/website/src/get-started/wordcount-example.md b/website/src/get-started/wordcount-example.md
index 1019710..0bb304c 100644
--- a/website/src/get-started/wordcount-example.md
+++ b/website/src/get-started/wordcount-example.md
@@ -171,7 +171,7 @@
represent a dataset of virtually any size, including unbounded datasets.
{: width="800px"}
+ {{ "/images/wordcount-pipeline.svg" | prepend: site.baseurl }}){: width="800px"}
*Figure 1: The MinimalWordCount pipeline data flow.*
diff --git a/website/src/images/design-your-pipeline-additional-outputs.png b/website/src/images/design-your-pipeline-additional-outputs.png
deleted file mode 100644
index a4fae32..0000000
--- a/website/src/images/design-your-pipeline-additional-outputs.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/design-your-pipeline-additional-outputs.svg b/website/src/images/design-your-pipeline-additional-outputs.svg
new file mode 100644
index 0000000..7ed856c
--- /dev/null
+++ b/website/src/images/design-your-pipeline-additional-outputs.svg
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="207px" viewBox="0 0 1158 207" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>A pipeline with a transform that outputs multiple PCollections.</title>
+ <g id="design-your-pipeline-additional-outputs" transform="translate(0.000000, 1.000000)">
+ <g id="Branch" transform="translate(676.000000, 32.000000)" stroke="#3062A8" stroke-width="2" fill="#FFFFFF">
+ <polyline id="Directed-edge" points="0 72 8.75 72 8.75 0 20 0"></polyline>
+ <polyline id="Directed-edge" transform="translate(10.000000, 108.000000) scale(1, -1) translate(-10.000000, -108.000000) " points="0 144 8.75 144 8.75 72 20 72"></polyline>
+ </g>
+ <g id="PCollection" transform="translate(696.000000, 143.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="‘B’-names" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="23.3242188" y="22">‘B’ </tspan>
+ <tspan x="7.359375" y="41">names</tspan>
+ </text>
+ </g>
+ <g id="PCollection" transform="translate(696.000000, 0.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="‘A’-names" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="23.0859375" y="22">‘A’ </tspan>
+ <tspan x="7.359375" y="41">names</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(482.000000, 40.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="64" y="70">ParDo</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(353.000000, 102.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Line" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 73.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.2070312" y="28">Table </tspan>
+ <tspan x="13.59375" y="47">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 42.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-database-of-nam" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="33" y="42">Read </tspan>
+ <tspan x="33" y="70">database of </tspan>
+ <tspan x="33" y="98">names</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M146,103.5 L113,103.5 L113,101.5 L146,101.5 L146,95.5 L160,102.5 L146,109.5 L146,103.5 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 51.000000)">
+ <g id="Database-symbol" transform="translate(0.000000, 0.000000)">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="83.2631579" rx="56.5" ry="17.8421053"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="17.8421053" width="113" height="65.4210526"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="17.8421053" rx="56.5" ry="17.8421053"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="17" y="61">Database </tspan>
+ <tspan x="17" y="82">table</tspan>
+ </text>
+ </g>
+ <g id="Legend" transform="translate(0.000000, 183.000000)">
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="155" y="17">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" stroke="#757575" fill="#FFFFFF" stroke-width="2" cx="140.5" cy="10.5" r="7.5"></circle>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="3" width="15" height="15"></rect>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/design-your-pipeline-flatten.png b/website/src/images/design-your-pipeline-flatten.png
deleted file mode 100644
index d07f7e5..0000000
--- a/website/src/images/design-your-pipeline-flatten.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/design-your-pipeline-flatten.svg b/website/src/images/design-your-pipeline-flatten.svg
new file mode 100644
index 0000000..c354cd7
--- /dev/null
+++ b/website/src/images/design-your-pipeline-flatten.svg
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="266px" viewBox="0 0 1158 266" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>A pipeline that merges two collections into one collection with the Flatten transform.</title>
+ <g id="design-your-pipeline-flatten">
+ <path d="M1012,121 L1012,141" id="Directed-edge" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(1012.000000, 131.000000) rotate(-90.000000) translate(-1012.000000, -131.000000) "></path>
+ <g id="PCollection" transform="translate(1024.000000, 97.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="34.5" cy="34.5" r="34.5"></circle>
+ <text id="‘A’-+-‘B’-names" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="9.4140625" y="31">‘A’ + ‘B’ </tspan>
+ <tspan x="10.359375" y="50">names</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(808.000000, 70.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Flatten" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="60" y="70">Flatten</tspan>
+ </text>
+ </g>
+ <g id="Merge" transform="translate(678.000000, 60.000000)">
+ <path id="Directed-edge" d="M116,73 L105.125,73 L105.125,147 L84,147 L84,145 L103.125,145 L103.125,71 L116,71 L116,65 L130,72 L116,79 L116,73 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path id="Directed-edge" d="M116,73 L103.125,73 L103.125,2 L84,2 L84,0 L105.125,0 L105.125,71 L116,71 L116,65 L130,72 L116,79 L116,73 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M10,-9 L10,11" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(10.000000, 1.000000) rotate(-90.000000) translate(-10.000000, -1.000000) "></path>
+ <path d="M10,135 L10,155" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(10.000000, 145.000000) rotate(-90.000000) translate(-10.000000, -145.000000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(700.000000, 30.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="‘A’-names" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="23.5859375" y="23">‘A’ </tspan>
+ <tspan x="7.859375" y="42">names</tspan>
+ </text>
+ </g>
+ <g id="PCollection" transform="translate(700.000000, 173.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="‘B’-names" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="23.3242188" y="23">‘B’ </tspan>
+ <tspan x="7.359375" y="42">names</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(483.000000, 0.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo-(extract-strin" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="20" y="40">ParDo (extract </tspan>
+ <tspan x="20" y="67">strings starting </tspan>
+ <tspan x="20" y="94">with ‘A’)</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(483.000000, 142.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo-(extract-strin" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="20" y="43">ParDo (extract </tspan>
+ <tspan x="20" y="70">strings starting </tspan>
+ <tspan x="20" y="97">with ‘B’)</tspan>
+ </text>
+ </g>
+ <g id="Branch" transform="translate(353.000000, 61.000000)">
+ <path id="Directed-edge" d="M115.983293,144.729332 L103.125,144.480863 L103.125,73 L84,73 L84,71 L105.125,71 L105.125,142.519137 L116.021933,142.729706 L116.137854,136.730826 L130,144 L115.867373,150.728213 L115.983293,144.729332 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path id="Directed-edge" d="M116,1 L105.125,1 L105.125,73 L84,73 L84,71 L103.125,71 L103.125,-1 L116,-1 L116,-7 L130,0 L116,7 L116,1 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M11,63 L11,83" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 73.000000) rotate(-90.000000) translate(-11.000000, -73.000000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 103.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.2070312" y="27">Table </tspan>
+ <tspan x="13.59375" y="46">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 72.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-database-of-nam" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="17" y="56">Read database </tspan>
+ <tspan x="17" y="84">of names</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M146,133.5 L113,133.5 L113,131.5 L146,131.5 L146,125.5 L160,132.5 L146,139.5 L146,133.5 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 82.000000)">
+ <g id="Database-symbol">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="84" rx="56.5" ry="18"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="18" width="113" height="66"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="18" rx="56.5" ry="18"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="22" y="61">Database </tspan>
+ <tspan x="22" y="82">table</tspan>
+ </text>
+ </g>
+ <g id="Legend" transform="translate(0.000000, 214.000000)">
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="155" y="17">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" stroke="#757575" fill="#FFFFFF" stroke-width="2" cx="140.5" cy="10.5" r="7.5"></circle>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="3" width="15" height="15"></rect>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/design-your-pipeline-join.png b/website/src/images/design-your-pipeline-join.png
deleted file mode 100644
index b7ccb9f..0000000
--- a/website/src/images/design-your-pipeline-join.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/design-your-pipeline-join.svg b/website/src/images/design-your-pipeline-join.svg
new file mode 100644
index 0000000..41ca394
--- /dev/null
+++ b/website/src/images/design-your-pipeline-join.svg
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="333px" viewBox="0 0 1158 333" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>A pipeline that does a relational join of two input collections.</title>
+ <g id="design-your-pipeline-join">
+ <g id="PTransform" transform="translate(160.000000, 0.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-from-database" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="41" y="55">Read from </tspan>
+ <tspan x="41" y="83">database</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 142.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-from-text-file" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="40" y="56">Read from </tspan>
+ <tspan x="40" y="84">text file</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(496.000000, 39.725000)">
+ <polygon id="Aggregation-PTransform-symbol" fill="#3062A8" points="128.813953 0 261 91.7333333 128.813953 184 0 92.2666667"></polygon>
+ <text id="Join" font-family="Roboto-Regular, Roboto" font-size="28" font-weight="normal" fill="#FFFFFF">
+ <tspan x="98.1689453" y="101.275">Join</tspan>
+ </text>
+ </g>
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="815" cy="131" r="37"></circle>
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="413" cy="60" r="36"></circle>
+ <text id="Names-+-addrs." font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="387.570312" y="56">Names</tspan>
+ <tspan x="384.171875" y="75">+ addrs.</tspan>
+ </text>
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="413" cy="205" r="36"></circle>
+ <text id="Names-+-orders" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="387.070312" y="200">Names</tspan>
+ <tspan x="383.132812" y="219">+ orders</tspan>
+ </text>
+ <text id="Names,-orders-+-addr" font-family="Roboto-Regular, Roboto" font-size="15" font-weight="normal" fill="#000000">
+ <tspan x="790.6875" y="120">Names, </tspan>
+ <tspan x="788.468262" y="138">orders + </tspan>
+ <tspan x="795.082031" y="156">addrs.</tspan>
+ </text>
+ <path d="M766,121.5 L766,141.5" id="Directed-edge" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(766.000000, 131.500000) rotate(-90.000000) translate(-766.000000, -131.500000) "></path>
+ <g id="Merge" transform="translate(354.377439, 60.000000)">
+ <path id="Directed-edge" d="M127.622561,73 L116.747561,73 L116.747561,147 L95,147 L95,145 L114.747561,145 L114.747561,71 L127.622561,71 L127.622561,65 L141.622561,72 L127.622561,79 L127.622561,73 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path id="Directed-edge" d="M127.622561,73 L114.747561,73 L114.747561,2 L95.6225614,2 L95.6225614,0 L116.747561,0 L116.747561,71 L127.622561,71 L127.622561,65 L141.622561,72 L127.622561,79 L127.622561,73 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M10.6225614,-9 L10.6225614,11" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(10.622561, 1.000000) rotate(-90.000000) translate(-10.622561, -1.000000) "></path>
+ <path d="M10.6225614,135 L10.6225614,155" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(10.622561, 145.000000) rotate(-90.000000) translate(-10.622561, -145.000000) "></path>
+ </g>
+ <path id="Directed-edge" d="M146,61.5 L113,61.5 L113,59.5 L146,59.5 L146,53.5 L160,60.5 L146,67.5 L146,61.5 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <path id="Directed-edge" d="M146,205.5 L114,205.5 L114,203.5 L146,203.5 L146,197.5 L160,204.5 L146,211.5 L146,205.5 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 9.000000)">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="83.2631579" rx="56.5" ry="17.8421053"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="17.8421053" width="113" height="65.4210526"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="17.8421053" rx="56.5" ry="17.8421053"></ellipse>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="21" y="60">Database </tspan>
+ <tspan x="21" y="81">table</tspan>
+ </text>
+ </g>
+ <g id="Document" transform="translate(1.000000, 161.000000)">
+ <path d="M0,0 L0,81.1880544 C5.73926173,85.0626848 15.1689615,87 28.2890992,87 C56.5,87 57.2032971,71.8576229 84.878475,71.8576229 C90.6940962,71.8576229 100.067938,74.9677667 113,81.1880544 L113,0 L0,0 Z" id="Document-symbol" fill="#E0E0E0"></path>
+ <text id="Text-file" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="25" y="43">Text file</tspan>
+ </text>
+ </g>
+ <g id="Legend" transform="translate(1.000000, 278.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="4" width="15" height="15"></rect>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <text id="Aggregation" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="162" y="17">Aggregating PTransform</tspan>
+ </text>
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="52">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" fill="#FFFFFF" stroke="#757575" stroke-width="2" cx="7.5" cy="46.5" r="7.5"></circle>
+ <path d="M143.364341,5 L154,12.4782609 L143.364341,20 L133,12.5217391 L143.364341,5 Z" id="Rectangle" fill="#3062A8"></path>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/design-your-pipeline-linear.png b/website/src/images/design-your-pipeline-linear.png
deleted file mode 100644
index a021fe7..0000000
--- a/website/src/images/design-your-pipeline-linear.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/design-your-pipeline-linear.svg b/website/src/images/design-your-pipeline-linear.svg
new file mode 100644
index 0000000..19ba99f
--- /dev/null
+++ b/website/src/images/design-your-pipeline-linear.svg
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="163px" viewBox="0 0 1158 163" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>A simple, linear pipeline.</title>
+ <g id="design-your-pipeline-linear" transform="translate(0.000000, 0.000000)">
+ <g id="Legend" transform="translate(0.000000, 143.000000)">
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="155" y="17">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" fill="#FFFFFF" stroke="#757575" stroke-width="2" cx="140.5" cy="11.5" r="7.5"></circle>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="3" width="15" height="15"></rect>
+ </g>
+ <path id="Directed-edge" d="M1031,63 L999,63 L999,61 L1031,61 L1031,55 L1045,62 L1031,69 L1031,63 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 11.000000)">
+ <g id="Database-symbol" transform="translate(0.000000, 0.000000)">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="83.2631579" rx="56.5" ry="17.8421053"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="17.8421053" width="113" height="65.4210526"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="17.8421053" rx="56.5" ry="17.8421053"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="18" y="61">Database </tspan>
+ <tspan x="18" y="82">table</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(805.000000, 0.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Write-transform" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="44" y="56">Write </tspan>
+ <tspan x="44" y="84">transform</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(675.500000, 60.500000)">
+ <path id="Line" d="M115.5,2.5 L83.5,2.5 L83.5,0.5 L115.5,0.5 L115.5,-5.5 L129.5,1.5 L115.5,8.5 L115.5,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(696.500000, 31.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="PColl." font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="10.6953125" y="36">PColl.</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(482.000000, 0.000000)">
+ <path d="M0,0 L194,0 L194,124 L0,124 L0,0 Z" id="PTransform-symbol" fill="#3062A8"></path>
+ <text id="Transform" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="41" y="70">Transform</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(353.000000, 60.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Line" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 31.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="PColl." font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="10.1953125" y="36">PColl.</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 0.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-transform" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="44" y="56">Read </tspan>
+ <tspan x="44" y="84">transform</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M146,63 L113,63 L113,61 L146,61 L146,55 L160,62 L146,69 L146,63 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(1045.000000, 11.000000)">
+ <g id="Database-symbol" transform="translate(0.000000, 0.000000)">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="83.2631579" rx="56.5" ry="17.8421053"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="17.8421053" width="113" height="65.4210526"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="17.8421053" rx="56.5" ry="17.8421053"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="18" y="61">Database </tspan>
+ <tspan x="18" y="82">table</tspan>
+ </text>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/design-your-pipeline-multiple-pcollections.png b/website/src/images/design-your-pipeline-multiple-pcollections.png
deleted file mode 100644
index 7eb802b..0000000
--- a/website/src/images/design-your-pipeline-multiple-pcollections.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/design-your-pipeline-multiple-pcollections.svg b/website/src/images/design-your-pipeline-multiple-pcollections.svg
new file mode 100644
index 0000000..15314c5
--- /dev/null
+++ b/website/src/images/design-your-pipeline-multiple-pcollections.svg
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="266px" viewBox="0 0 1158 266" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>A branching pipeline. Two transforms are applied to a single PCollection of database table rows.</title>
+ <g id="design-your-pipeline-multiple-pcollections">
+ <g id="Merge" transform="translate(678.000000, 60.000000)" stroke="#3062A8" stroke-linecap="square" stroke-width="2">
+ <path d="M10,-9 L10,11" id="Line" transform="translate(10.000000, 1.000000) rotate(-90.000000) translate(-10.000000, -1.000000) "></path>
+ <path d="M10,135 L10,155" id="Line" transform="translate(10.000000, 145.000000) rotate(-90.000000) translate(-10.000000, -145.000000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(700.000000, 30.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="‘A’-names" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="23.5859375" y="23">‘A’ </tspan>
+ <tspan x="7.859375" y="42">names</tspan>
+ </text>
+ </g>
+ <g id="PCollection" transform="translate(700.000000, 173.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="‘B’-names" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="23.3242188" y="23">‘B’ </tspan>
+ <tspan x="7.359375" y="42">names</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(483.000000, 0.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo-(extract-strin" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="20" y="40">ParDo (extract </tspan>
+ <tspan x="20" y="67">strings starting </tspan>
+ <tspan x="20" y="94">with ‘A’)</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(483.000000, 142.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo-(extract-strin" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="20" y="43">ParDo (extract </tspan>
+ <tspan x="20" y="70">strings starting </tspan>
+ <tspan x="20" y="97">with ‘B’)</tspan>
+ </text>
+ </g>
+ <g id="Branch" transform="translate(353.000000, 61.000000)">
+ <path id="Directed-edge" d="M115.983293,144.729332 L103.125,144.480863 L103.125,73 L84,73 L84,71 L105.125,71 L105.125,142.519137 L116.021933,142.729706 L116.137854,136.730826 L130,144 L115.867373,150.728213 L115.983293,144.729332 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path id="Directed-edge" d="M116,1 L105.125,1 L105.125,73 L84,73 L84,71 L103.125,71 L103.125,-1 L116,-1 L116,-7 L130,0 L116,7 L116,1 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M11,63 L11,83" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 73.000000) rotate(-90.000000) translate(-11.000000, -73.000000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 103.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.2070312" y="27">Table </tspan>
+ <tspan x="13.59375" y="46">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 72.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-database-of-nam" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="17" y="57">Read database </tspan>
+ <tspan x="17" y="85">of names</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M146,133.5 L113,133.5 L113,131.5 L146,131.5 L146,125.5 L160,132.5 L146,139.5 L146,133.5 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 82.000000)">
+ <g id="Database-symbol">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="84" rx="56.5" ry="18"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="18" width="113" height="66"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="18" rx="56.5" ry="18"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="22" y="61">Database </tspan>
+ <tspan x="22" y="82">table</tspan>
+ </text>
+ </g>
+ <g id="Legend" transform="translate(0.000000, 214.000000)">
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="155" y="17">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" stroke="#757575" fill="#FFFFFF" stroke-width="2" cx="140.5" cy="11.5" r="7.5"></circle>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="4" width="15" height="15"></rect>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/unwindowed-pipeline-bounded.png b/website/src/images/unwindowed-pipeline-bounded.png
deleted file mode 100644
index 7725f34..0000000
--- a/website/src/images/unwindowed-pipeline-bounded.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/unwindowed-pipeline-bounded.svg b/website/src/images/unwindowed-pipeline-bounded.svg
new file mode 100644
index 0000000..ccf7ab1
--- /dev/null
+++ b/website/src/images/unwindowed-pipeline-bounded.svg
@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="227px" viewBox="0 0 1158 227" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>GroupByKey and ParDo without windowing, on a bounded collection.</title>
+ <g id="unwindowed-pipeline-bounded" transform="translate(1.000000, 0.000000)">
+ <g id="Legend" transform="translate(0.000000, 172.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="4" width="15" height="15"></rect>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <text id="Aggregation" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="162" y="17">Aggregating PTransform</tspan>
+ </text>
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="52">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" fill="#FFFFFF" stroke="#757575" stroke-width="2" cx="7.5" cy="46.5" r="7.5"></circle>
+ <path d="M143.364341,5 L154,12.4782609 L143.364341,20 L133,12.5217391 L143.364341,5 Z" id="Rectangle" fill="#3062A8"></path>
+ </g>
+ <path d="M1072.5,80 L1072.5,100" id="Directed-edge" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(1072.500000, 90.000000) rotate(-90.000000) translate(-1072.500000, -90.000000) "></path>
+ <g id="PCollection" transform="translate(1084.500000, 55.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="35" cy="35" r="35"></circle>
+ <text id="Rows-after-ParDo" font-family="Roboto-Regular, Roboto" font-size="15" font-weight="normal" fill="#000000">
+ <tspan x="17.1015625" y="21">Rows </tspan>
+ <tspan x="19.8554688" y="39">after </tspan>
+ <tspan x="14.9555664" y="57">ParDo</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(868.000000, 28.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="64.5" y="70">ParDo</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(741.550000, 90.765000)">
+ <path d="M10.5,-9 L10.5,11" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(10.500000, 1.000000) rotate(-90.000000) translate(-10.500000, -1.000000) "></path>
+ <path id="Line" d="M112.45,2.235 L83.45,2.235 L83.45,0.235 L112.45,0.235 L112.45,-5.765 L126.45,1.235 L112.45,8.235 L112.45,2.235 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(763.000000, 61.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="30.9530303" cy="31.2207576" r="30.530303"></circle>
+ <text id="Rows-by-key" font-family="Roboto-Regular, Roboto" font-size="15" font-weight="normal" fill="#000000">
+ <tspan x="12.616714" y="27">Rows </tspan>
+ <tspan x="10.0788722" y="45">by key</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(482.000000, 0.000000)">
+ <polygon id="Aggregation-PTransform-symbol" fill="#3062A8" points="128.813953 0 261 91.7333333 128.813953 184 0 92.2666667"></polygon>
+ <text id="GroupByKey" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="68.5810547" y="99">GroupByKey</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(353.000000, 90.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Path-2" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 61.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.2070312" y="28">Table </tspan>
+ <tspan x="13.59375" y="47">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 30.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-from-Kafka-IO" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="44" y="55">Read from </tspan>
+ <tspan x="44" y="83">Kafka IO</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M146,92 L113,92 L113,90 L146,90 L146,84 L160,91 L146,98 L146,92 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 40.000000)">
+ <g id="Database-symbol" transform="translate(0.000000, 0.000000)">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="83.2631579" rx="56.5" ry="17.8421053"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="17.8421053" width="113" height="65.4210526"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="17.8421053" rx="56.5" ry="17.8421053"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="18" y="61">Database </tspan>
+ <tspan x="18" y="82">table</tspan>
+ </text>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/windowing-pipeline-bounded.png b/website/src/images/windowing-pipeline-bounded.png
deleted file mode 100644
index 198ed11..0000000
--- a/website/src/images/windowing-pipeline-bounded.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/windowing-pipeline-bounded.svg b/website/src/images/windowing-pipeline-bounded.svg
new file mode 100644
index 0000000..743fe2e
--- /dev/null
+++ b/website/src/images/windowing-pipeline-bounded.svg
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1175px" height="443px" viewBox="0 0 1175 443" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>GroupByKey and ParDo with windowing, on a bounded collection.</title>
+ <g id="windowing-pipeline-bounded" transform="translate(1.000000, 0.000000)">
+ <path d="M204.5,296 L204.5,316" id="Directed-edge" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(204.500000, 306.000000) rotate(-90.000000) translate(-204.500000, -306.000000) "></path>
+ <g id="PCollection" transform="translate(214.000000, 271.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="35" cy="35" r="35"></circle>
+ <text id="Rows-after-ParDo" font-family="Roboto-Regular, Roboto" font-size="15" font-weight="normal" fill="#000000">
+ <tspan x="17.1015625" y="21">Rows </tspan>
+ <tspan x="19.8554688" y="39">after </tspan>
+ <tspan x="14.9555664" y="57">ParDo</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(0.000000, 244.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="64.5" y="70">ParDo</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(90.550000, 90.765000)">
+ <path id="Line" d="M-0.55,139.235 L-0.55,127.735 L1036.45058,127.735 L1036.5,42.4994202 L1038.5,42.5005798 L1038.44942,129.735 L1.45,129.735 L1.45,139.235 L7.45,139.235 L0.45,153.235 L-6.55,139.235 L-0.55,139.235 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M983.5,-9 L983.5,11" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(983.500000, 1.000000) rotate(-90.000000) translate(-983.500000, -1.000000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(1085.550000, 48.265000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="42.5" cy="42.5" r="42.5"></circle>
+ <text id="Grouped-by-key-and-w" font-family="Roboto-Regular, Roboto" font-size="15" font-weight="normal" fill="#000000">
+ <tspan x="14.0307617" y="30">Grouped </tspan>
+ <tspan x="7.25952148" y="48">by key and </tspan>
+ <tspan x="16.7626953" y="66">window</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(804.000000, 0.000000)">
+ <polygon id="Aggregation-PTransform-symbol" fill="#3062A8" points="128.813953 0 261 91.7333333 128.813953 184 0 92.2666667"></polygon>
+ <text id="GroupByKey-(per-wind" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="67.6310547" y="84.265">GroupByKey </tspan>
+ <tspan x="63.374707" y="111.265">(per window)</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(675.500000, 90.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Path-2" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(696.500000, 61.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.7070312" y="28">Table </tspan>
+ <tspan x="14.09375" y="47">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(482.000000, 30.000000)">
+ <path d="M0,0 L194,0 L194,124 L0,124 L0,0 Z" id="PTransform-symbol" fill="#3062A8"></path>
+ <text id="Apply-windows" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="48" y="54">Apply </tspan>
+ <tspan x="48" y="82">windows</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(353.000000, 90.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Path-2" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 61.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.2070312" y="28">Table </tspan>
+ <tspan x="13.59375" y="47">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 30.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-from-Kafka-IO" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="44" y="55">Read from </tspan>
+ <tspan x="44" y="83">Kafka IO</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M146,92 L113,92 L113,90 L146,90 L146,84 L160,91 L146,98 L146,92 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 40.000000)">
+ <g id="Database-symbol" transform="translate(0.000000, 0.000000)">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="83.2631579" rx="56.5" ry="17.8421053"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="17.8421053" width="113" height="65.4210526"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="17.8421053" rx="56.5" ry="17.8421053"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="18" y="61">Database </tspan>
+ <tspan x="18" y="82">table</tspan>
+ </text>
+ </g>
+ <g id="Legend" transform="translate(0.000000, 388.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="4" width="15" height="15"></rect>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <text id="Aggregation" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="162" y="17">Aggregating PTransform</tspan>
+ </text>
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="52">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" fill="#FFFFFF" stroke="#757575" stroke-width="2" cx="7.5" cy="46.5" r="7.5"></circle>
+ <path d="M143.364341,5 L154,12.4782609 L143.364341,20 L133,12.5217391 L143.364341,5 Z" id="Rectangle" fill="#3062A8"></path>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/windowing-pipeline-unbounded.png b/website/src/images/windowing-pipeline-unbounded.png
deleted file mode 100644
index b5c5ee0..0000000
--- a/website/src/images/windowing-pipeline-unbounded.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/windowing-pipeline-unbounded.svg b/website/src/images/windowing-pipeline-unbounded.svg
new file mode 100644
index 0000000..ef8a0dd
--- /dev/null
+++ b/website/src/images/windowing-pipeline-unbounded.svg
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="468px" viewBox="0 0 1158 468" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>Pipeline applying windowing.</title>
+ <g id="windowing-pipeline-unbounded" transform="translate(1.000000, 0.000000)">
+ <g id="Legend" transform="translate(0.000000, 413.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="4" width="15" height="15"></rect>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <text id="Aggregation" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="162" y="17">Aggregating PTransform</tspan>
+ </text>
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="52">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" fill="#FFFFFF" stroke="#757575" stroke-width="2" cx="7.5" cy="46.5" r="7.5"></circle>
+ <path d="M143.364341,5 L154,12.4782609 L143.364341,20 L133,12.5217391 L143.364341,5 Z" id="Rectangle" fill="#3062A8"></path>
+ </g>
+ <path d="M270.05,290.765 L270.05,310.765" id="Directed-edge" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(270.050000, 300.765000) rotate(-90.000000) translate(-270.050000, -300.765000) "></path>
+ <g id="PCollection" transform="translate(281.550000, 257.265000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="42.5" cy="42.5" r="42.5"></circle>
+ <text id="Grouped-by-key-and-w" font-family="Roboto-Regular, Roboto" font-size="15" font-weight="normal" fill="#000000">
+ <tspan x="14.0307617" y="30">Grouped </tspan>
+ <tspan x="7.25952148" y="48">by key and </tspan>
+ <tspan x="16.7626953" y="66">window</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(0.000000, 209.000000)">
+ <polygon id="Aggregation-PTransform-symbol" fill="#3062A8" points="128.813953 0 261 91.7333333 128.813953 184 0 92.2666667"></polygon>
+ <text id="GroupByKey-(per-wind" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="67.6310547" y="84.265">GroupByKey </tspan>
+ <tspan x="63.374707" y="111.265">(per window)</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(128.800000, 58.500000)">
+ <path id="Line" d="M-1,136.5 L-1,124.941369 L925.955823,124.941369 L926,39.499483 L928,39.500517 L927.954789,126.941369 L1,126.941369 L1,136.5 L7,136.5 L0,150.5 L-7,136.5 L-1,136.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M880.5,-10 L880.5,12.2" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(880.500000, 1.500000) rotate(-90.000000) translate(-880.500000, -1.500000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(1021.000000, 27.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="35" cy="35" r="35"></circle>
+ <text id="Rows-after-ParDo" font-family="Roboto-Regular, Roboto" font-size="15" font-weight="normal" fill="#000000">
+ <tspan x="17.1015625" y="21">Rows </tspan>
+ <tspan x="19.8554688" y="39">after </tspan>
+ <tspan x="14.9555664" y="57">ParDo</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(804.500000, 0.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="ParDo" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="64.5" y="70">ParDo</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(675.500000, 60.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Path-2" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(696.500000, 31.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.7070312" y="28">Table </tspan>
+ <tspan x="14.09375" y="47">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(482.000000, 0.000000)">
+ <path d="M0,0 L194,0 L194,124 L0,124 L0,0 Z" id="PTransform-symbol" fill="#3062A8"></path>
+ <text id="Apply-windows" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="48" y="54">Apply </tspan>
+ <tspan x="48" y="82">windows</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(353.000000, 60.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Path-2" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 31.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Table-rows" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="11.2070312" y="28">Table </tspan>
+ <tspan x="13.59375" y="47">rows</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 0.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-from-Kafka-IO" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="44" y="55">Read from </tspan>
+ <tspan x="44" y="83">Kafka IO</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M146,62 L113,62 L113,60 L146,60 L146,54 L160,61 L146,68 L146,62 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Database" transform="translate(0.000000, 10.000000)">
+ <g id="Database-symbol" transform="translate(0.000000, 0.000000)">
+ <ellipse id="Oval" fill="#E0E0E0" cx="56.5" cy="83.2631579" rx="56.5" ry="17.8421053"></ellipse>
+ <rect id="Rectangle" fill="#E0E0E0" x="0" y="17.8421053" width="113" height="65.4210526"></rect>
+ <ellipse id="Oval" fill="#EFEFEF" cx="56.5" cy="17.8421053" rx="56.5" ry="17.8421053"></ellipse>
+ </g>
+ <text id="Database-table" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="18" y="61">Database </tspan>
+ <tspan x="18" y="82">table</tspan>
+ </text>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/images/wordcount-pipeline.png b/website/src/images/wordcount-pipeline.png
deleted file mode 100644
index 3be0b7e..0000000
--- a/website/src/images/wordcount-pipeline.png
+++ /dev/null
Binary files differ
diff --git a/website/src/images/wordcount-pipeline.svg b/website/src/images/wordcount-pipeline.svg
new file mode 100644
index 0000000..1dd47c7
--- /dev/null
+++ b/website/src/images/wordcount-pipeline.svg
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<svg width="1158px" height="441px" viewBox="0 0 1158 441" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <title>The MinimalWordCount pipeline graph.</title>
+ <g id="wordcount-pipeline" transform="translate(1.000000, 0.000000)">
+ <g id="Legend" transform="translate(0.000000, 387.000000)">
+ <text id="PCollection" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="52">PCollection</tspan>
+ </text>
+ <circle id="PCollection-symbol" fill="#FFFFFF" stroke="#757575" stroke-width="2" cx="7.5" cy="45.5" r="7.5"></circle>
+ <text id="Aggregation" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="162" y="17">Aggregating PTransform</tspan>
+ </text>
+ <path d="M143.364341,4 L154,11.4782609 L143.364341,19 L133,11.5217391 L143.364341,4 Z" id="Aggregation-PTransform-symbol" fill="#3062A8"></path>
+ <text id="PTransform" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#000000">
+ <tspan x="22" y="17">PTransform</tspan>
+ </text>
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="3" width="15" height="15"></rect>
+ </g>
+ <g id="Document" transform="translate(566.000000, 256.000000)">
+ <path d="M0,0 L0,81.1880544 C5.73926173,85.0626848 15.1689615,87 28.2890992,87 C56.5,87 57.2032971,71.8576229 84.878475,71.8576229 C90.6940962,71.8576229 100.067938,74.9677667 113,81.1880544 L113,0 L0,0 Z" id="Document-symbol" fill="#E0E0E0"></path>
+ <text id="Output-text-file" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="26" y="34">Output </tspan>
+ <tspan x="26" y="55">text file</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M552,303 L520,303 L520,301 L552,301 L552,295 L566,302 L552,309 L552,303 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="PTransform" transform="translate(326.000000, 238.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Write-to-text-file" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="55" y="56">Write to </tspan>
+ <tspan x="55" y="84">text file</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(193.500000, 300.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Line" d="M118.5,2.5 L86.5,2.5 L86.5,0.5 L118.5,0.5 L118.5,-5.5 L132.5,1.5 L118.5,8.5 L118.5,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(214.500000, 269.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="32.5" cy="32.5" r="32.5"></circle>
+ <text id="Word-counts" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="14.1210938" y="27">Word </tspan>
+ <tspan x="8.6875" y="46">counts</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(0.000000, 238.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Format-word-counts" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="34" y="56">Format </tspan>
+ <tspan x="34" y="84">word counts</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(95.500000, 91.500000)">
+ <path id="Line" d="M-0.5,132.5 L-0.5,120.941369 L1019.45139,120.941369 L1019.50057,34.4994312 L1021.50057,34.5005691 L1021.45025,122.941369 L1.5,122.941369 L1.5,132.5 L7.5,132.5 L0.5,146.5 L-6.5,132.5 L-0.5,132.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M978.5,-9 L978.5,11" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(978.500000, 1.000000) rotate(-90.000000) translate(-978.500000, -1.000000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(1082.000000, 59.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="34" cy="34" r="34"></circle>
+ <text id="Word-counts" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="15.6210938" y="28">Word </tspan>
+ <tspan x="10.1875" y="47">counts</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(804.000000, 0.750000)">
+ <polygon id="Aggregation-PTransform-symbol" fill="#3062A8" points="128.813953 0 261 91.7333333 128.813953 184 0 92.2666667"></polygon>
+ <text id="Count-frequency-of-e" font-family="Roboto-Regular, Roboto" font-size="23" font-weight="normal" fill="#FFFFFF">
+ <tspan x="100.014893" y="68.25">Count </tspan>
+ <tspan x="66.3515625" y="95.25">frequency of </tspan>
+ <tspan x="77.3742676" y="122.25">each word</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(675.500000, 91.500000)">
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ <path id="Line" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ </g>
+ <g id="PCollection" transform="translate(696.500000, 62.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Words" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="8.49609375" y="37">Words</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(482.000000, 31.000000)">
+ <path d="M0,0 L194,0 L194,124 L0,124 L0,0 Z" id="PTransform-symbol" fill="#3062A8"></path>
+ <text id="ParDo-(tokenize)" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="41" y="54">ParDo </tspan>
+ <tspan x="41" y="82">(tokenize)</tspan>
+ </text>
+ </g>
+ <g id="Directed-edge" transform="translate(353.000000, 91.500000)">
+ <path id="Line" d="M115,2.5 L83,2.5 L83,0.5 L115,0.5 L115,-5.5 L129,1.5 L115,8.5 L115,2.5 Z" fill="#757575" fill-rule="nonzero"></path>
+ <path d="M11,-8.5 L11,11.5" id="Line" stroke="#3062A8" stroke-width="2" stroke-linecap="square" transform="translate(11.000000, 1.500000) rotate(-90.000000) translate(-11.000000, -1.500000) "></path>
+ </g>
+ <g id="PCollection" transform="translate(374.000000, 62.000000)">
+ <circle id="PCollection-symbol" stroke="#757575" stroke-width="2" fill="#FFFFFF" cx="31" cy="31" r="31"></circle>
+ <text id="Text-lines" font-family="Roboto-Regular, Roboto" font-size="16" font-weight="normal" fill="#000000">
+ <tspan x="15.4101562" y="27">Text </tspan>
+ <tspan x="14.3398438" y="46">lines</tspan>
+ </text>
+ </g>
+ <g id="PTransform" transform="translate(160.000000, 31.000000)">
+ <rect id="PTransform-symbol" fill="#3062A8" x="0" y="0" width="194" height="124"></rect>
+ <text id="Read-from-text-file" font-family="Roboto-Regular, Roboto" font-size="24" font-weight="normal" fill="#FFFFFF">
+ <tspan x="41" y="55">Read from </tspan>
+ <tspan x="41" y="83">text file</tspan>
+ </text>
+ </g>
+ <path id="Directed-edge" d="M145,94 L113,94 L113,92 L145,92 L145,86 L159,93 L145,100 L145,94 Z" fill="#E0E0E0" fill-rule="nonzero"></path>
+ <g id="Document" transform="translate(0.000000, 49.000000)">
+ <path d="M0,0 L0,81.1880544 C5.73926173,85.0626848 15.1689615,87 28.2890992,87 C56.5,87 57.2032971,71.8576229 84.878475,71.8576229 C90.6940962,71.8576229 100.067938,74.9677667 113,81.1880544 L113,0 L0,0 Z" id="Document-symbol" fill="#E0E0E0"></path>
+ <text id="Input-text-file" font-family="Roboto-Regular, Roboto" font-size="18" font-weight="normal" fill="#414141">
+ <tspan x="26" y="34">Input </tspan>
+ <tspan x="26" y="55">text file</tspan>
+ </text>
+ </g>
+ </g>
+</svg>
\ No newline at end of file
diff --git a/website/src/roadmap/portability.md b/website/src/roadmap/portability.md
index 4143357..459552c 100644
--- a/website/src/roadmap/portability.md
+++ b/website/src/roadmap/portability.md
@@ -41,7 +41,8 @@
Local Runner (ULR)_, as a practical reference implementation that
complements the direct runners. Finally, it enables cross-language
pipelines (sharing I/O or transformations across SDKs) and
-user-customized execution environments ("custom containers").
+user-customized [execution environments]({{ site.baseurl }}/documentation/runtime/environments/)
+("custom containers").
The portability API consists of a set of smaller contracts that
isolate SDKs and runners for job submission, management and
@@ -167,21 +168,4 @@
## SDK Harness Configuration {#sdk-harness-config}
-The Beam Python SDK allows configuration of the SDK harness to accommodate varying cluster setups.
-
-- `environment_type` determines where user code will be executed.
- - `LOOPBACK`: User code is executed within the same process that submitted the pipeline. This
- option is useful for local testing. However, it is not suitable for a production environment,
- as it requires a connection between the original Python process and the worker nodes, and
- performs work on the machine the job originated from, not the worker nodes.
- - `PROCESS`: User code is executed by processes that are automatically started by the runner on
- each worker node.
- - `DOCKER` (default): User code is executed within a container started on each worker node.
- This requires docker to be installed on worker nodes. For more information, see
- [here]({{ site.baseurl }}/documentation/runtime/environments/).
-- `environment_config` configures the environment depending on the value of `environment_type`.
- - When `environment_type=DOCKER`: URL for the Docker container image.
- - When `environment_type=PROCESS`: JSON of the form `{"os": "<OS>", "arch": "<ARCHITECTURE>",
- "command": "<process to execute>", "env":{"<Environment variables 1>": "<ENV_VAL>"} }`. All
- fields in the JSON are optional except `command`.
-- `sdk_worker_parallelism` sets the number of SDK workers that will run on each worker node.
\ No newline at end of file
+See [here]({{ site.baseurl }}/documentation/runtime/sdk-harness-config/) for more information on SDK harness deployment options.