[BEAM-12338] Adds missing BigQuery DisplayData for FnAPI path (#14815)
* Adds missing BigQuery DisplayData for FnAPI path
* Addresses reviewer comments
* Removes unnecessary check.
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index d3dae0f..0deed87 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -2084,5 +2084,5 @@
return (
sources_to_read
- | SDFBoundedSourceReader()
+ | SDFBoundedSourceReader(data_to_display=self.display_data())
| _PassThroughThenCleanup(beam.pvalue.AsIter(cleanup_locations)))
diff --git a/sdks/python/apache_beam/pipeline_test.py b/sdks/python/apache_beam/pipeline_test.py
index 285825c..119be47 100644
--- a/sdks/python/apache_beam/pipeline_test.py
+++ b/sdks/python/apache_beam/pipeline_test.py
@@ -987,6 +987,7 @@
parent_dd = super(MyParentTransform, self).display_data()
parent_dd['p_dd_string'] = DisplayDataItem(
'p_dd_string_value', label='p_dd_string_label')
+ parent_dd['p_dd_string_2'] = DisplayDataItem('p_dd_string_value_2')
parent_dd['p_dd_bool'] = DisplayDataItem(True, label='p_dd_bool_label')
parent_dd['p_dd_int'] = DisplayDataItem(1, label='p_dd_int_label')
return parent_dd
@@ -1000,6 +1001,7 @@
parent_dd = super(MyPTransform, self).display_data()
parent_dd['dd_string'] = DisplayDataItem(
'dd_string_value', label='dd_string_label')
+ parent_dd['dd_string_2'] = DisplayDataItem('dd_string_value_2')
parent_dd['dd_bool'] = DisplayDataItem(False, label='dd_bool_label')
parent_dd['dd_int'] = DisplayDataItem(1.1, label='dd_int_label')
return parent_dd
@@ -1026,6 +1028,11 @@
beam_runner_api_pb2.DisplayData(
urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
payload=beam_runner_api_pb2.LabelledPayload(
+ label='p_dd_string_2',
+ string_value='p_dd_string_value_2').SerializeToString()),
+ beam_runner_api_pb2.DisplayData(
+ urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
+ payload=beam_runner_api_pb2.LabelledPayload(
label='p_dd_bool_label',
bool_value=True).SerializeToString()),
beam_runner_api_pb2.DisplayData(
@@ -1041,6 +1048,11 @@
beam_runner_api_pb2.DisplayData(
urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
payload=beam_runner_api_pb2.LabelledPayload(
+ label='dd_string_2',
+ string_value='dd_string_value_2').SerializeToString()),
+ beam_runner_api_pb2.DisplayData(
+ urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
+ payload=beam_runner_api_pb2.LabelledPayload(
label='dd_bool_label',
bool_value=False).SerializeToString()),
beam_runner_api_pb2.DisplayData(
diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py
index 2d3c618..449e45a 100644
--- a/sdks/python/apache_beam/transforms/display.py
+++ b/sdks/python/apache_beam/transforms/display.py
@@ -137,9 +137,15 @@
except ValueError:
# Skip if the display data is invalid.
return None
- if 'value' not in display_data_dict or 'label' not in display_data_dict:
- return None
- label = display_data_dict['label']
+
+ # We use 'label' or 'key' properties to populate the 'label' attribute of
+ # 'LabelledPayload'. 'label' is a better choice since it's expected to be
+ # more human readable but some transforms, sources, etc. may not set a
+ # 'label' property when configuring DisplayData.
+ label = (
+ display_data_dict['label']
+ if 'label' in display_data_dict else display_data_dict['key'])
+
value = display_data_dict['value']
if isinstance(value, str):
return beam_runner_api_pb2.LabelledPayload(