Merge pull request #11703 from ibzib/BEAM-9001
[BEAM-9001, BEAM-6327] Ensure that all transforms (except for require…
diff --git a/model/pipeline/src/main/proto/beam_runner_api.proto b/model/pipeline/src/main/proto/beam_runner_api.proto
index 0b8d651..2bef4ee 100644
--- a/model/pipeline/src/main/proto/beam_runner_api.proto
+++ b/model/pipeline/src/main/proto/beam_runner_api.proto
@@ -151,8 +151,9 @@
// details.
FunctionSpec spec = 1;
- // (Optional) if this node is a composite, a list of the ids of
- // transforms that it contains.
+ // (Optional) A list of the ids of transforms that it contains.
+ //
+ // Primitive transforms are not allowed to specify this.
repeated string subtransforms = 2;
// (Required) A map from local names of inputs (unique only with this map, and
@@ -184,9 +185,10 @@
// there is none, it may be omitted.
repeated DisplayData display_data = 6;
- // (Optional) Environment where the current PTransform should be executed in.
- // Runner that executes the pipeline may choose to override this if needed. If
- // not specified, environment will be decided by the runner.
+ // Environment where the current PTransform should be executed in.
+ //
+ // Transforms that are required to be implemented by a runner must omit this.
+ // All other transforms are required to specify this.
string environment_id = 7;
}
@@ -227,12 +229,18 @@
// See https://beam.apache.org/documentation/programming-guide/#groupbykey
// for additional details.
//
+ // Never defines an environment as the runner is required to implement this
+ // transform.
+ //
// Payload: None
GROUP_BY_KEY = 2 [(beam_urn) = "beam:transform:group_by_key:v1"];
// A transform which produces a single empty byte array at the minimum
// timestamp in the GlobalWindow.
//
+ // Never defines an environment as the runner is required to implement this
+ // transform.
+ //
// Payload: None
IMPULSE = 3 [(beam_urn) = "beam:transform:impulse:v1"];
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
index 1ff1d69..cf3a7e5 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
@@ -50,8 +50,8 @@
import org.apache.beam.sdk.values.PValue;
import org.apache.beam.sdk.values.TupleTag;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSortedSet;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets;
@@ -74,6 +74,10 @@
public static final String MAP_WINDOWS_TRANSFORM_URN = "beam:transform:map_windows:v1";
public static final String MERGE_WINDOWS_TRANSFORM_URN = "beam:transform:merge_windows:v1";
+ // Required runner implemented transforms. These transforms should never specify an environment.
+ public static final ImmutableSet<String> RUNNER_IMPLEMENTED_TRANSFORMS =
+ ImmutableSet.of(GROUP_BY_KEY_TRANSFORM_URN, IMPULSE_TRANSFORM_URN);
+
// DeprecatedPrimitives
/**
* @deprecated SDKs should move away from creating `Read` transforms and migrate to using Impulse
@@ -350,10 +354,15 @@
// A composite transform is permitted to have a null spec. There are also some pseudo-
// primitives not yet supported by the portability framework that have null specs
+ String urn = "";
if (spec != null) {
+ urn = spec.getUrn();
transformBuilder.setSpec(spec);
}
- transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
+
+ if (!RUNNER_IMPLEMENTED_TRANSFORMS.contains(urn)) {
+ transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
+ }
return transformBuilder.build();
}
}
@@ -367,11 +376,6 @@
private static final Map<Class<? extends PTransform>, TransformPayloadTranslator>
KNOWN_PAYLOAD_TRANSLATORS = loadTransformPayloadTranslators();
- // TODO: BEAM-9001 - set environment ID in all transforms and allow runners to override.
- private static List<String> sdkTransformsWithEnvironment =
- ImmutableList.of(
- PAR_DO_TRANSFORM_URN, COMBINE_PER_KEY_TRANSFORM_URN, ASSIGN_WINDOWS_TRANSFORM_URN);
-
private static Map<Class<? extends PTransform>, TransformPayloadTranslator>
loadTransformPayloadTranslators() {
HashMap<Class<? extends PTransform>, TransformPayloadTranslator> translators =
@@ -423,14 +427,20 @@
if (spec != null) {
transformBuilder.setSpec(spec);
- if (sdkTransformsWithEnvironment.contains(spec.getUrn())) {
- transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
- } else if (spec.getUrn().equals(READ_TRANSFORM_URN)
- && (appliedPTransform.getTransform().getClass() == Read.Bounded.class)) {
- // Only assigning environment to Bounded reads. Not assigning an environment to Unbounded
- // reads since they are a Runner translated transform, unless, in the future, we have an
- // adapter available for splittable DoFn.
- transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
+ // Required runner implemented transforms should not have an environment id.
+ if (!RUNNER_IMPLEMENTED_TRANSFORMS.contains(spec.getUrn())) {
+ // TODO(BEAM-9309): Remove existing hacks around deprecated READ transform.
+ if (spec.getUrn().equals(READ_TRANSFORM_URN)) {
+ // Only assigning environment to Bounded reads. Not assigning an environment to
+ // Unbounded
+ // reads since they are a Runner translated transform, unless, in the future, we have an
+ // adapter available for splittable DoFn.
+ if (appliedPTransform.getTransform().getClass() == Read.Bounded.class) {
+ transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
+ }
+ } else {
+ transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
+ }
}
}
return transformBuilder.build();
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/PipelineTrimmer.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/PipelineTrimmer.java
deleted file mode 100644
index 894a5d1..0000000
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/PipelineTrimmer.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.core.construction.graph;
-
-import java.util.Set;
-import org.apache.beam.model.pipeline.v1.RunnerApi;
-import org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-// TODO(BEAM-6327): Remove the need for this.
-
-/** PipelineTrimmer removes subcomponents of native transforms that shouldn't be fused. */
-public class PipelineTrimmer {
- private static final Logger LOG = LoggerFactory.getLogger(PipelineTrimmer.class);
-
- /**
- * Remove subcomponents of native transforms that shouldn't be fused.
- *
- * @param pipeline the pipeline to be trimmed
- * @param knownUrns set of URNs for the runner's native transforms
- * @return the trimmed pipeline
- */
- public static Pipeline trim(Pipeline pipeline, Set<String> knownUrns) {
- return makeKnownUrnsPrimitives(pipeline, knownUrns);
- }
-
- private static RunnerApi.Pipeline makeKnownUrnsPrimitives(
- RunnerApi.Pipeline pipeline, Set<String> knownUrns) {
- RunnerApi.Pipeline.Builder trimmedPipeline = pipeline.toBuilder();
- for (String ptransformId : pipeline.getComponents().getTransformsMap().keySet()) {
- if (knownUrns.contains(
- pipeline.getComponents().getTransformsOrThrow(ptransformId).getSpec().getUrn())) {
- LOG.debug("Removing descendants of known PTransform {}" + ptransformId);
- removeDescendants(trimmedPipeline, ptransformId);
- }
- }
- return trimmedPipeline.build();
- }
-
- private static void removeDescendants(RunnerApi.Pipeline.Builder pipeline, String parentId) {
- RunnerApi.PTransform parentProto =
- pipeline.getComponents().getTransformsOrDefault(parentId, null);
- if (parentProto != null) {
- for (String childId : parentProto.getSubtransformsList()) {
- removeDescendants(pipeline, childId);
- pipeline.getComponentsBuilder().removeTransforms(childId);
- }
- pipeline
- .getComponentsBuilder()
- .putTransforms(parentId, parentProto.toBuilder().clearSubtransforms().build());
- }
- }
-}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/PipelineValidator.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/PipelineValidator.java
index 34e74a7..a27b220 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/PipelineValidator.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/PipelineValidator.java
@@ -208,6 +208,15 @@
}
String urn = transform.getSpec().getUrn();
+ if (PTransformTranslation.RUNNER_IMPLEMENTED_TRANSFORMS.contains(urn)) {
+ checkArgument(
+ transform.getEnvironmentId().isEmpty(),
+ "Transform %s references environment %s when no environment should be specified since it is a required runner implemented transform %s.",
+ id,
+ transform.getEnvironmentId(),
+ urn);
+ }
+
if (VALIDATORS.containsKey(urn)) {
try {
VALIDATORS.get(urn).validate(id, transform, components, requirements);
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/TrivialNativeTransformExpander.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/TrivialNativeTransformExpander.java
new file mode 100644
index 0000000..8ad95e8
--- /dev/null
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/TrivialNativeTransformExpander.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core.construction.graph;
+
+import java.util.Set;
+import org.apache.beam.model.pipeline.v1.RunnerApi;
+import org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * TrivialNativeTransformExpander is used to replace transforms with known URNs with their native
+ * equivalent.
+ */
+public class TrivialNativeTransformExpander {
+ private static final Logger LOG = LoggerFactory.getLogger(TrivialNativeTransformExpander.class);
+
+ /**
+ * Replaces transforms with the known URN with a native equivalent stripping the environment and
+ * removing any sub-transforms from the returned pipeline.
+ *
+ * @param pipeline the pipeline to be trimmed
+ * @param knownUrns set of URNs for the runner's native transforms
+ * @return the trimmed pipeline
+ */
+ public static Pipeline forKnownUrns(Pipeline pipeline, Set<String> knownUrns) {
+ return makeKnownUrnsPrimitives(pipeline, knownUrns);
+ }
+
+ private static RunnerApi.Pipeline makeKnownUrnsPrimitives(
+ RunnerApi.Pipeline pipeline, Set<String> knownUrns) {
+ RunnerApi.Pipeline.Builder trimmedPipeline = pipeline.toBuilder();
+ for (String ptransformId : pipeline.getComponents().getTransformsMap().keySet()) {
+ // Skip over previously removed transforms from the original pipeline since we iterate
+ // over all transforms from the original pipeline and not the trimmed down version.
+ RunnerApi.PTransform currentTransform =
+ trimmedPipeline.getComponents().getTransformsOrDefault(ptransformId, null);
+ if (currentTransform != null && knownUrns.contains(currentTransform.getSpec().getUrn())) {
+ LOG.debug(
+ "Removing descendants and environment of known native PTransform {}" + ptransformId);
+ removeDescendants(trimmedPipeline, ptransformId);
+ trimmedPipeline
+ .getComponentsBuilder()
+ .putTransforms(
+ ptransformId,
+ currentTransform.toBuilder().clearSubtransforms().clearEnvironmentId().build());
+ }
+ }
+ return trimmedPipeline.build();
+ }
+
+ private static void removeDescendants(RunnerApi.Pipeline.Builder pipeline, String parentId) {
+ RunnerApi.PTransform parentProto = pipeline.getComponents().getTransformsOrThrow(parentId);
+ for (String childId : parentProto.getSubtransformsList()) {
+ removeDescendants(pipeline, childId);
+ pipeline.getComponentsBuilder().removeTransforms(childId);
+ }
+ }
+}
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/QueryablePipelineTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/QueryablePipelineTest.java
index 3b041b4..ff797ef 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/QueryablePipelineTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/QueryablePipelineTest.java
@@ -50,9 +50,11 @@
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.transforms.WithKeys;
import org.apache.beam.sdk.transforms.windowing.FixedWindows;
import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.values.PBegin;
@@ -342,7 +344,7 @@
public void getEnvironmentWithEnvironment() {
Pipeline p = Pipeline.create();
PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
- PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());
+ longs.apply(WithKeys.of("a")).apply("groupByKey", GroupByKey.create());
Components components = PipelineTranslation.toProto(p).getComponents();
QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
@@ -350,7 +352,7 @@
PTransformNode environmentalRead =
PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead"));
PTransformNode nonEnvironmentalTransform =
- PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"));
+ PipelineNode.pTransform("groupByKey", components.getTransformsOrThrow("groupByKey"));
assertThat(qp.getEnvironment(environmentalRead).isPresent(), is(true));
assertThat(
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
index 0729f25..8945d9d 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
@@ -30,9 +30,9 @@
import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
import org.apache.beam.runners.core.construction.graph.ExecutableStage;
import org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser;
-import org.apache.beam.runners.core.construction.graph.PipelineTrimmer;
import org.apache.beam.runners.core.construction.graph.ProtoOverrides;
import org.apache.beam.runners.core.construction.graph.SplittableParDoExpander;
+import org.apache.beam.runners.core.construction.graph.TrivialNativeTransformExpander;
import org.apache.beam.runners.core.metrics.MetricsPusher;
import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineJarUtils;
import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineResult;
@@ -98,7 +98,8 @@
// Don't let the fuser fuse any subcomponents of native transforms.
Pipeline trimmedPipeline =
- PipelineTrimmer.trim(pipelineWithSdfExpanded, translator.knownUrns());
+ TrivialNativeTransformExpander.forKnownUrns(
+ pipelineWithSdfExpanded, translator.knownUrns());
// Fused pipeline proto.
// TODO: Consider supporting partially-fused graphs.
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPortablePipelineTranslator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPortablePipelineTranslator.java
index 5494749..f041d0c 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPortablePipelineTranslator.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPortablePipelineTranslator.java
@@ -228,7 +228,7 @@
@Override
public Set<String> knownUrns() {
- // Do not expose Read as a known URN because PipelineTrimmer otherwise removes
+ // Do not expose Read as a known URN because TrivialNativeTransformExpander otherwise removes
// the subtransforms which are added in case of bounded reads. We only have a
// translator here for unbounded Reads which are native transforms which do not
// have subtransforms. Unbounded Reads are used by cross-language transforms, e.g.
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java
index e5166a3..9727757 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java
@@ -30,9 +30,9 @@
import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
import org.apache.beam.runners.core.construction.graph.ExecutableStage;
import org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser;
-import org.apache.beam.runners.core.construction.graph.PipelineTrimmer;
import org.apache.beam.runners.core.construction.graph.ProtoOverrides;
import org.apache.beam.runners.core.construction.graph.SplittableParDoExpander;
+import org.apache.beam.runners.core.construction.graph.TrivialNativeTransformExpander;
import org.apache.beam.runners.core.metrics.MetricsPusher;
import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineJarUtils;
import org.apache.beam.runners.fnexecution.jobsubmission.PortablePipelineResult;
@@ -82,7 +82,8 @@
// Don't let the fuser fuse any subcomponents of native transforms.
Pipeline trimmedPipeline =
- PipelineTrimmer.trim(pipelineWithSdfExpanded, translator.knownUrns());
+ TrivialNativeTransformExpander.forKnownUrns(
+ pipelineWithSdfExpanded, translator.knownUrns());
// Fused pipeline proto.
// TODO: Consider supporting partially-fused graphs.
diff --git a/sdks/go/pkg/beam/core/runtime/graphx/translate.go b/sdks/go/pkg/beam/core/runtime/graphx/translate.go
index 9ff84c2..9441151 100644
--- a/sdks/go/pkg/beam/core/runtime/graphx/translate.go
+++ b/sdks/go/pkg/beam/core/runtime/graphx/translate.go
@@ -177,6 +177,7 @@
transform := &pipepb.PTransform{
UniqueName: s.Scope.Name,
Subtransforms: subtransforms,
+ EnvironmentId: m.addDefaultEnv(),
}
m.updateIfCombineComposite(s, transform)
@@ -208,7 +209,6 @@
AccumulatorCoderId: acID,
}
transform.Spec = &pipepb.FunctionSpec{Urn: URNCombinePerKey, Payload: protox.MustEncode(payload)}
- transform.EnvironmentId = m.addDefaultEnv()
}
func (m *marshaller) addMultiEdge(edge NamedEdge) []string {
@@ -238,10 +238,8 @@
// allPIds tracks additional PTransformIDs generated for the pipeline
var allPIds []string
var spec *pipepb.FunctionSpec
- var transformEnvID = ""
switch edge.Edge.Op {
case graph.Impulse:
- // TODO(herohde) 7/18/2018: Encode data?
spec = &pipepb.FunctionSpec{Urn: URNImpulse}
case graph.ParDo:
@@ -315,7 +313,6 @@
if edge.Edge.DoFn.IsSplittable() {
payload.RestrictionCoderId = m.coders.Add(edge.Edge.RestrictionCoder)
}
- transformEnvID = m.addDefaultEnv()
spec = &pipepb.FunctionSpec{Urn: URNParDo, Payload: protox.MustEncode(payload)}
case graph.Combine:
@@ -325,7 +322,6 @@
Payload: []byte(mustEncodeMultiEdgeBase64(edge.Edge)),
},
}
- transformEnvID = m.addDefaultEnv()
spec = &pipepb.FunctionSpec{Urn: URNParDo, Payload: protox.MustEncode(payload)}
case graph.Flatten:
@@ -347,6 +343,11 @@
panic(fmt.Sprintf("Unexpected opcode: %v", edge.Edge.Op))
}
+ var transformEnvID = ""
+ if !(spec.Urn == URNGBK || spec.Urn == URNImpulse) {
+ transformEnvID = m.addDefaultEnv()
+ }
+
transform := &pipepb.PTransform{
UniqueName: edge.Name,
Spec: spec,
@@ -413,10 +414,11 @@
flattenID := fmt.Sprintf("%v_flatten", id)
flatten := &pipepb.PTransform{
- UniqueName: flattenID,
- Spec: &pipepb.FunctionSpec{Urn: URNFlatten},
- Inputs: inputs,
- Outputs: map[string]string{"i0": out},
+ UniqueName: flattenID,
+ Spec: &pipepb.FunctionSpec{Urn: URNFlatten},
+ Inputs: inputs,
+ Outputs: map[string]string{"i0": out},
+ EnvironmentId: m.addDefaultEnv(),
}
m.transforms[flattenID] = flatten
subtransforms = append(subtransforms, flattenID)
@@ -468,6 +470,7 @@
m.transforms[cogbkID] = &pipepb.PTransform{
UniqueName: edge.Name,
Subtransforms: subtransforms,
+ EnvironmentId: m.addDefaultEnv(),
}
return cogbkID
}
@@ -632,6 +635,7 @@
Spec: &pipepb.FunctionSpec{
Urn: URNReshuffle,
},
+ EnvironmentId: m.addDefaultEnv(),
}
return reshuffleID
}
diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py
index bb55486..e6aef62 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py
@@ -119,20 +119,16 @@
should be used to designate new names
(e.g. ``input | "label" >> my_tranform``).
"""
-
- # TODO: BEAM-9001 - set environment ID in all transforms and allow runners to
- # override.
@classmethod
- def sdk_transforms_with_environment(cls):
+ def runner_implemented_transforms(cls):
# type: () -> FrozenSet[str]
- from apache_beam.runners.portability.fn_api_runner import translations
- sets = [
- translations.PAR_DO_URNS,
- translations.COMBINE_URNS,
- frozenset([common_urns.primitives.ASSIGN_WINDOWS.urn])
- ]
- result = frozenset() # type: FrozenSet[str]
- return result.union(*sets)
+
+ # This set should only contain transforms which are required to be
+ # implemented by a runner.
+ return frozenset([
+ common_urns.primitives.GROUP_BY_KEY.urn,
+ common_urns.primitives.IMPULSE.urn,
+ ])
def __init__(self, runner=None, options=None, argv=None):
# type: (Optional[Union[str, PipelineRunner]], Optional[PipelineOptions], Optional[List[str]]) -> None
@@ -1117,8 +1113,8 @@
transform_spec = transform_to_runner_api(self.transform, context)
environment_id = self.environment_id
transform_urn = transform_spec.urn if transform_spec else None
- if (not environment_id and transform_urn and
- (transform_urn in Pipeline.sdk_transforms_with_environment())):
+ if (not environment_id and
+ (transform_urn not in Pipeline.runner_implemented_transforms())):
environment_id = context.default_environment_id()
def _maybe_preserve_tag(new_tag, pc, input_tags_to_preserve):