Merge pull request #15484: [BEAM-12769] Python support for directly using Java transforms using constructor and builder methods

commit: 6f0a1c9aa62d7cd4ca93435d72d524d40f002308 [log] [tgz]
author: Chamikara Jayalath <chamikara@apache.org> Mon Sep 20 19:45:18 2021 -0700
committer: GitHub <noreply@github.com> Mon Sep 20 19:45:18 2021 -0700
tree: c96daacce42f9de9285d153510798bbebd5d43df
parent: cd1cd05408a9e4e8e322022fba4602a5a191d265 [diff]
parent: 6947fa2136e2d74e51de8347f852aa85387d4735 [diff]
diff --git a/.test-infra/jenkins/job_Inventory.groovy b/.test-infra/jenkins/job_Inventory.groovy
index 876c399..3d79377 100644
--- a/.test-infra/jenkins/job_Inventory.groovy
+++ b/.test-infra/jenkins/job_Inventory.groovy

@@ -75,6 +75,7 @@
       shell('echo "Maven home $MAVEN_HOME"')
       shell('env')
       shell('docker system prune --all --filter until=24h --force')
+      shell('docker volume prune --force')
       shell('echo "Current size of /tmp dir is \$(sudo du -sh /tmp)"')
       shell('echo "Deleting files accessed later than \${tmp_unaccessed_for} hours ago"')
       shell('sudo find /tmp -type f -amin +\$((60*\${tmp_unaccessed_for})) -print -delete')

diff --git a/.test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy b/.test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy
index 0143121..d0d3d4e 100644
--- a/.test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy
+++ b/.test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy

@@ -132,5 +132,6 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
 }

diff --git a/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy b/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy
index 5055d1a..5cfe97e 100644
--- a/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy
+++ b/.test-infra/jenkins/job_LoadTests_Combine_Flink_Python.groovy

@@ -177,5 +177,6 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'streaming')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'streaming')
 }

diff --git a/.test-infra/jenkins/job_LoadTests_GBK_Flink_Go.groovy b/.test-infra/jenkins/job_LoadTests_GBK_Flink_Go.groovy
index fee2e7d..f0cd611 100644
--- a/.test-infra/jenkins/job_LoadTests_GBK_Flink_Go.groovy
+++ b/.test-infra/jenkins/job_LoadTests_GBK_Flink_Go.groovy

@@ -225,5 +225,6 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
 }

diff --git a/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Go.groovy b/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Go.groovy
index 567e327..0c86342 100644
--- a/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Go.groovy
+++ b/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Go.groovy

@@ -146,5 +146,6 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
 }

diff --git a/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy b/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy
index 9b80c61..7c0e554 100644
--- a/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy
+++ b/.test-infra/jenkins/job_LoadTests_ParDo_Flink_Python.groovy

@@ -350,7 +350,8 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
 }
 
 CronJobBuilder.cronJob('beam_LoadTests_Python_ParDo_Flink_Streaming', 'H 12 * * *', this) {
@@ -358,5 +359,6 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'streaming')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'streaming')
 }

diff --git a/.test-infra/jenkins/job_LoadTests_SideInput_Flink_Go.groovy b/.test-infra/jenkins/job_LoadTests_SideInput_Flink_Go.groovy
index c2db483..3a2d5c2 100644
--- a/.test-infra/jenkins/job_LoadTests_SideInput_Flink_Go.groovy
+++ b/.test-infra/jenkins/job_LoadTests_SideInput_Flink_Go.groovy

@@ -99,5 +99,6 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
 }

diff --git a/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Go.groovy b/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Go.groovy
index b15c47f..a5de30d 100644
--- a/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Go.groovy
+++ b/.test-infra/jenkins/job_LoadTests_coGBK_Flink_Go.groovy

@@ -178,5 +178,6 @@
     influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
     influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl,
   ]
-  loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
+  // TODO(BEAM-12898): Re-enable this test once fixed.
+  // loadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT, 'batch')
 }

diff --git a/.test-infra/metrics/grafana/Dockerfile b/.test-infra/metrics/grafana/Dockerfile
index ff5b66d..5eadd2a 100644
--- a/.test-infra/metrics/grafana/Dockerfile
+++ b/.test-infra/metrics/grafana/Dockerfile

@@ -16,7 +16,9 @@
 # limitations under the License.
 ################################################################################
 
-FROM grafana/grafana:6.7.3
+FROM grafana/grafana:8.1.2
+
+RUN grafana-cli plugins install marcusolsson-json-datasource
 
 COPY ./provisioning /etc/beamgrafana/provisioning
 COPY ./dashboards /etc/beamgrafana/dashboards

diff --git a/.test-infra/metrics/grafana/dashboards/github_actions_post-commit_tests.json b/.test-infra/metrics/grafana/dashboards/github_actions_post-commit_tests.json
index 11ad969..d5be402 100644
--- a/.test-infra/metrics/grafana/dashboards/github_actions_post-commit_tests.json
+++ b/.test-infra/metrics/grafana/dashboards/github_actions_post-commit_tests.json

@@ -280,7 +280,7 @@
       "type": "table"
     },
     {
-      "datasource": null,
+      "datasource": "Java Tests",
       "fieldConfig": {
         "defaults": {
           "color": {
@@ -420,7 +420,7 @@
       "type": "piechart"
     },
     {
-      "datasource": null,
+      "datasource": "Java Tests",
       "fieldConfig": {
         "defaults": {
           "color": {

diff --git a/.test-infra/metrics/grafana/dashboards/post-commit_tests.json b/.test-infra/metrics/grafana/dashboards/post-commit_tests.json
index 0a7b2a4..2914306 100644
--- a/.test-infra/metrics/grafana/dashboards/post-commit_tests.json
+++ b/.test-infra/metrics/grafana/dashboards/post-commit_tests.json

@@ -10,6 +10,12 @@
         "limit": 100,
         "name": "Annotations & Alerts",
         "showIn": 0,
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
         "type": "dashboard"
       }
     ]
@@ -17,11 +23,10 @@
   "editable": true,
   "gnetId": null,
   "graphTooltip": 0,
-  "id": 1,
   "links": [],
   "panels": [
     {
-      "content": "This dashboard tracks Post-commit test reliability over-time.\n\n* [Post-commit test policies](https://beam.apache.org/contribute/postcommits-policies/)\n* [Existing test failure issues](https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20AND%20status%20in%20(Open%2C%20%22In%20Progress%22%2C%20Reopened)%20AND%20resolution%20%3D%20Unresolved%20AND%20component%20%3D%20test-failures%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC)\n* [File a new test failure issue](https://s.apache.org/beam-test-failure)",
+      "datasource": null,
       "gridPos": {
         "h": 4,
         "w": 24,
@@ -30,7 +35,11 @@
       },
       "id": 11,
       "links": [],
-      "mode": "markdown",
+      "options": {
+        "content": "This dashboard tracks Post-commit test reliability over-time.\n\n* [Post-commit test policies](https://beam.apache.org/contribute/postcommits-policies/)\n* [Existing test failure issues](https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20AND%20status%20in%20(Open%2C%20%22In%20Progress%22%2C%20Reopened)%20AND%20resolution%20%3D%20Unresolved%20AND%20component%20%3D%20test-failures%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC)\n* [File a new test failure issue](https://s.apache.org/beam-test-failure)",
+        "mode": "markdown"
+      },
+      "pluginVersion": "8.1.2",
       "title": "Dashboard guidelines",
       "type": "text"
     },
@@ -68,14 +77,62 @@
         "noDataState": "keep_state",
         "notifications": []
       },
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
-      "decimals": 0,
       "description": "Percent reliability of all post-commit job runs for a given week.\n\nUnreliability of a test suite impact developer productivity by forcing contributors to re-run tests. When tests are consistently unreliable, developers will simply ignore them.\n\nWe aim for >= 70% reliability per test suite.",
-      "fill": 0,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "% successful runs",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "line+area"
+            }
+          },
+          "decimals": 1,
+          "mappings": [],
+          "max": 1,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "red",
+                "value": null
+              },
+              {
+                "color": "transparent",
+                "value": 0.7
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 7,
         "w": 24,
@@ -83,31 +140,20 @@
         "y": 4
       },
       "id": 6,
-      "legend": {
-        "alignAsTable": true,
-        "avg": false,
-        "current": true,
-        "hideEmpty": false,
-        "hideZero": true,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": true,
-        "total": false,
-        "values": true
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "percentage": false,
-      "pointradius": 2,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -115,7 +161,7 @@
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  DATE_TRUNC('week', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_#\"%#\"' for '#') as job_name\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND (((job_name LIKE 'beam_PostCommit_%')\n  AND NOT (job_name like '%_PR')) OR job_name like '%_Cron')\nGROUP BY\n  time, job_name\norder BY\n  job_name, time\n",
+          "rawSql": "SELECT\n  DATE_TRUNC('week', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_#\"%#\"' for '#') as job_name\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND (((job_name LIKE 'beam_PostCommit_%')\n  AND NOT (job_name like '%_PR')) OR job_name like '%_Cron')\nGROUP BY\n  time, job_name\norder BY\n  time, job_name\n",
           "refId": "A",
           "select": [
             [
@@ -137,55 +183,10 @@
           ]
         }
       ],
-      "thresholds": [
-        {
-          "colorMode": "critical",
-          "fill": true,
-          "line": true,
-          "op": "lt",
-          "value": 0.7
-        }
-      ],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Post-commit reliability per week",
-      "tooltip": {
-        "shared": true,
-        "sort": 1,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "decimals": 1,
-          "format": "percentunit",
-          "label": "% successful runs",
-          "logBase": 1,
-          "max": "1",
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     },
     {
       "alert": {
@@ -221,14 +222,62 @@
         "noDataState": "no_data",
         "notifications": []
       },
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
-      "decimals": 0,
       "description": "Percent reliability of all post-commit job runs per-day.\n\nUnreliability of a test suite impact developer productivity by forcing contributors to re-run tests. When tests are consistently unreliable, developers will simply ignore them.\n\nWe aim for >= 70% reliability per test suite.",
-      "fill": 0,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "% successful runs",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "line+area"
+            }
+          },
+          "decimals": 1,
+          "mappings": [],
+          "max": 1,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "red",
+                "value": null
+              },
+              {
+                "color": "transparent",
+                "value": 0.7
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 12,
         "w": 15,
@@ -236,31 +285,20 @@
         "y": 11
       },
       "id": 9,
-      "legend": {
-        "alignAsTable": true,
-        "avg": false,
-        "current": true,
-        "hideZero": true,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": false,
-        "sideWidth": null,
-        "total": false,
-        "values": true
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "percentage": false,
-      "pointradius": 2,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "displayMode": "hidden",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -268,7 +306,7 @@
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  DATE_TRUNC('day', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as job_name\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND (job_name LIKE 'beam_PostCommit_%')\n  AND NOT (job_name like '%_PR')\nGROUP BY\n  time, job_name\norder BY\n  job_name, time\n",
+          "rawSql": "SELECT\n  DATE_TRUNC('day', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as job_name\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND (job_name LIKE 'beam_PostCommit_%')\n  AND NOT (job_name like '%_PR')\nGROUP BY\n  time, job_name\norder BY\n  time, job_name\n",
           "refId": "A",
           "select": [
             [
@@ -290,61 +328,197 @@
           ]
         }
       ],
-      "thresholds": [
-        {
-          "colorMode": "critical",
-          "fill": true,
-          "line": true,
-          "op": "lt",
-          "value": 0.7
-        }
-      ],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Post-commit reliability per day",
-      "tooltip": {
-        "shared": true,
-        "sort": 1,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "decimals": 1,
-          "format": "percentunit",
-          "label": "% successful runs",
-          "logBase": 1,
-          "max": "1",
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     },
     {
-      "columns": [],
       "datasource": "BeamPSQL",
       "description": "List of jobs which have failed. Click on the job to view it in Jenkins.",
-      "fontSize": "100%",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "align": "auto",
+            "displayMode": "auto"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Time"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Time"
+              },
+              {
+                "id": "unit",
+                "value": "time: YYYY-MM-DD HH:mm:ss"
+              },
+              {
+                "id": "custom.align",
+                "value": null
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "build_url"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Build Url"
+              },
+              {
+                "id": "unit",
+                "value": "short"
+              },
+              {
+                "id": "decimals",
+                "value": 2
+              },
+              {
+                "id": "links",
+                "value": [
+                  {
+                    "targetBlank": true,
+                    "title": "Link to Jenkins job.",
+                    "url": "${__cell:raw}"
+                  }
+                ]
+              },
+              {
+                "id": "custom.align",
+                "value": null
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "job_name"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Job Name"
+              },
+              {
+                "id": "unit",
+                "value": "short"
+              },
+              {
+                "id": "decimals",
+                "value": 2
+              },
+              {
+                "id": "links",
+                "value": [
+                  {
+                    "targetBlank": true,
+                    "title": "View Jenkins job: ${__cell_1}_${__cell_2}",
+                    "url": "${__cell_0:raw}"
+                  }
+                ]
+              },
+              {
+                "id": "custom.align",
+                "value": null
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "build_id"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Build ID"
+              },
+              {
+                "id": "unit",
+                "value": "short"
+              },
+              {
+                "id": "links",
+                "value": [
+                  {
+                    "targetBlank": true,
+                    "title": "View Jenkins job: ${__cell_1}_${__cell_2}",
+                    "url": "${__cell_0:raw}"
+                  }
+                ]
+              },
+              {
+                "id": "custom.align",
+                "value": null
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "build_timestamp"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Start Time"
+              },
+              {
+                "id": "unit",
+                "value": "short"
+              },
+              {
+                "id": "decimals",
+                "value": 2
+              },
+              {
+                "id": "unit",
+                "value": "time: MM/DD/YY h:mm:ss a"
+              },
+              {
+                "id": "links",
+                "value": [
+                  {
+                    "targetBlank": true,
+                    "title": "View Jenkins job: ${__cell_1}_${__cell_2}",
+                    "url": "${__cell_0:raw}"
+                  }
+                ]
+              },
+              {
+                "id": "custom.align",
+                "value": null
+              }
+            ]
+          }
+        ]
+      },
       "gridPos": {
         "h": 12,
         "w": 9,
@@ -355,109 +529,15 @@
       "id": 8,
       "links": [
         {
-          "includeVars": false,
           "targetBlank": true,
           "title": "Beam Jenkins",
-          "type": "absolute",
           "url": "https://ci-beam.apache.org/"
         }
       ],
-      "pageSize": null,
-      "scroll": true,
-      "showHeader": true,
-      "sort": {
-        "col": 0,
-        "desc": true
+      "options": {
+        "showHeader": true
       },
-      "styles": [
-        {
-          "alias": "Time",
-          "dateFormat": "YYYY-MM-DD HH:mm:ss",
-          "link": false,
-          "pattern": "Time",
-          "type": "date"
-        },
-        {
-          "alias": "Build Url",
-          "colorMode": null,
-          "colors": [
-            "rgba(245, 54, 54, 0.9)",
-            "rgba(237, 129, 40, 0.89)",
-            "rgba(50, 172, 45, 0.97)"
-          ],
-          "dateFormat": "YYYY-MM-DD HH:mm:ss",
-          "decimals": 2,
-          "link": true,
-          "linkTargetBlank": true,
-          "linkTooltip": "Link to Jenkins job.",
-          "linkUrl": "${__cell:raw}",
-          "mappingType": 1,
-          "pattern": "build_url",
-          "thresholds": [],
-          "type": "hidden",
-          "unit": "short"
-        },
-        {
-          "alias": "Job Name",
-          "colorMode": null,
-          "colors": [
-            "rgba(245, 54, 54, 0.9)",
-            "rgba(237, 129, 40, 0.89)",
-            "rgba(50, 172, 45, 0.97)"
-          ],
-          "dateFormat": "YYYY-MM-DD HH:mm:ss",
-          "decimals": 2,
-          "link": true,
-          "linkTargetBlank": true,
-          "linkTooltip": "View Jenkins job: ${__cell_1}_${__cell_2}",
-          "linkUrl": "${__cell_0:raw}",
-          "mappingType": 1,
-          "pattern": "job_name",
-          "thresholds": [],
-          "type": "string",
-          "unit": "short"
-        },
-        {
-          "alias": "Build ID",
-          "colorMode": null,
-          "colors": [
-            "rgba(245, 54, 54, 0.9)",
-            "rgba(237, 129, 40, 0.89)",
-            "rgba(50, 172, 45, 0.97)"
-          ],
-          "dateFormat": "YYYY-MM-DD HH:mm:ss",
-          "decimals": 0,
-          "link": true,
-          "linkTargetBlank": true,
-          "linkTooltip": "View Jenkins job: ${__cell_1}_${__cell_2}",
-          "linkUrl": "${__cell_0:raw}",
-          "mappingType": 1,
-          "pattern": "build_id",
-          "thresholds": [],
-          "type": "number",
-          "unit": "short"
-        },
-        {
-          "alias": "Start Time",
-          "colorMode": null,
-          "colors": [
-            "rgba(245, 54, 54, 0.9)",
-            "rgba(237, 129, 40, 0.89)",
-            "rgba(50, 172, 45, 0.97)"
-          ],
-          "dateFormat": "MM/DD/YY h:mm:ss a",
-          "decimals": 2,
-          "link": true,
-          "linkTargetBlank": true,
-          "linkTooltip": "View Jenkins job: ${__cell_1}_${__cell_2}",
-          "linkUrl": "${__cell_0:raw}",
-          "mappingType": 1,
-          "pattern": "build_timestamp",
-          "thresholds": [],
-          "type": "date",
-          "unit": "short"
-        }
-      ],
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -489,18 +569,71 @@
       ],
       "timeShift": null,
       "title": "Failed builds",
-      "transform": "table",
+      "transformations": [
+        {
+          "id": "merge",
+          "options": {
+            "reducers": []
+          }
+        }
+      ],
       "type": "table"
     },
     {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
-      "decimals": 1,
       "description": "Execution time for each post-commit job",
-      "fill": 0,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "Average job duration",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 8,
         "w": 15,
@@ -508,29 +641,20 @@
         "y": 23
       },
       "id": 5,
-      "legend": {
-        "alignAsTable": true,
-        "avg": false,
-        "current": true,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": false,
-        "total": false,
-        "values": true
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "displayMode": "hidden",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -538,7 +662,7 @@
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  build_timestamp as time,\n  build_duration as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as metric\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND (job_name LIKE 'beam_PostCommit_%')\n  AND NOT (job_name LIKE '%_PR')\nORDER BY\n  job_name, time",
+          "rawSql": "SELECT\n  build_timestamp as time,\n  build_duration as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as metric\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND (job_name LIKE 'beam_PostCommit_%')\n  AND NOT (job_name LIKE '%_PR')\nORDER BY\n  time, job_name",
           "refId": "A",
           "select": [
             [
@@ -560,57 +684,98 @@
           ]
         }
       ],
-      "thresholds": [],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Post-commit job duration",
-      "tooltip": {
-        "shared": true,
-        "sort": 2,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "decimals": null,
-          "format": "ms",
-          "label": "Average job duration",
-          "logBase": 1,
-          "max": null,
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     },
     {
-      "aliasColors": {},
-      "bars": true,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
-      "decimals": 0,
       "description": "Tracks the count of test failure JIRA issues currently open.",
-      "fill": 3,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "# of JIRA issues",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "bars",
+            "fillOpacity": 100,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "decimals": 0,
+          "mappings": [],
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "total_open"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "#eab839",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "currently_failing"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "#bf1b00",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
       "gridPos": {
         "h": 8,
         "w": 9,
@@ -618,45 +783,24 @@
         "y": 23
       },
       "id": 14,
-      "legend": {
-        "alignAsTable": false,
-        "avg": false,
-        "current": false,
-        "max": false,
-        "min": false,
-        "rightSide": false,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": false,
-      "linewidth": 1,
       "links": [
         {
           "targetBlank": true,
           "title": "Jira tickets",
-          "type": "absolute",
           "url": "https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20AND%20resolution%20%3D%20Unresolved%20AND%20component%20%3D%20test-failures%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC"
         }
       ],
-      "nullPointMode": "null",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [
-        {
-          "alias": "total_open",
-          "color": "#eab839"
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
         },
-        {
-          "alias": "currently_failing",
-          "color": "#bf1b00"
+        "tooltip": {
+          "mode": "single"
         }
-      ],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "format": "time_series",
@@ -713,51 +857,14 @@
           ]
         }
       ],
-      "thresholds": [],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Test Failure JIRA issues",
-      "tooltip": {
-        "shared": true,
-        "sort": 0,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "decimals": 0,
-          "format": "short",
-          "label": "# of JIRA issues",
-          "logBase": 1,
-          "max": null,
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     }
   ],
   "refresh": false,
-  "schemaVersion": 18,
+  "schemaVersion": 30,
   "style": "dark",
   "tags": [],
   "templating": {
@@ -793,5 +900,5 @@
   "timezone": "",
   "title": "Post-commit Test Reliability",
   "uid": "D81lW0pmk",
-  "version": 46
+  "version": 2
 }

diff --git a/.test-infra/metrics/grafana/dashboards/pre-commit_tests.json b/.test-infra/metrics/grafana/dashboards/pre-commit_tests.json
index e5ab46e..a518b2a 100644
--- a/.test-infra/metrics/grafana/dashboards/pre-commit_tests.json
+++ b/.test-infra/metrics/grafana/dashboards/pre-commit_tests.json

@@ -8,6 +8,12 @@
         "hide": true,
         "iconColor": "rgba(0, 211, 255, 1)",
         "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
         "type": "dashboard"
       }
     ]
@@ -15,7 +21,6 @@
   "editable": true,
   "gnetId": null,
   "graphTooltip": 0,
-  "id": 2,
   "links": [],
   "panels": [
     {
@@ -52,13 +57,60 @@
         "noDataState": "keep_state",
         "notifications": []
       },
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
       "description": "Execution time for each pre-commit job.\n\nLong test suite execution impacts developer productivity by delaying the quality signal of a pull request of current HEAD. If tests are consistently slow, developers won't wait for them to complete.\n\nWe aim for under 2 hour execution per test suite, but ideally under 30 mins.",
-      "fill": 0,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "Average job duration",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "line+area"
+            }
+          },
+          "mappings": [],
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "transparent",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 7200000
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 8,
         "w": 24,
@@ -66,31 +118,20 @@
         "y": 0
       },
       "id": 4,
-      "legend": {
-        "alignAsTable": true,
-        "avg": false,
-        "current": true,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": true,
-        "sort": "current",
-        "sortDesc": true,
-        "total": false,
-        "values": true
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -98,7 +139,7 @@
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  build_timestamp as time,\n  build_duration as value,\n  substring(job_name from 'beam_PreCommit_#\"%#\"_(Cron|Commit)' for '#') as metric\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND build_result = 'SUCCESS'\n  AND ((job_name LIKE 'beam_PreCommit_%_Commit')\n       OR (job_name LIKE 'beam_PreCommit_%_Cron'))\nORDER BY\n  metric, time",
+          "rawSql": "SELECT\n  build_timestamp as time,\n  build_duration as value,\n  substring(job_name from 'beam_PreCommit_#\"%#\"_(Cron|Commit)' for '#') as metric\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND build_result = 'SUCCESS'\n  AND ((job_name LIKE 'beam_PreCommit_%_Commit')\n       OR (job_name LIKE 'beam_PreCommit_%_Cron'))\nORDER BY\n  time, metric",
           "refId": "A",
           "select": [
             [
@@ -120,62 +161,64 @@
           ]
         }
       ],
-      "thresholds": [
-        {
-          "colorMode": "critical",
-          "fill": true,
-          "line": true,
-          "op": "gt",
-          "value": 7200000
-        }
-      ],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Pre-commit job duration",
-      "tooltip": {
-        "shared": true,
-        "sort": 0,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "format": "ms",
-          "label": "Average job duration",
-          "logBase": 1,
-          "max": null,
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     },
     {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
-      "fill": 1,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "dtdurationms"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 8,
         "w": 24,
@@ -183,38 +226,25 @@
         "y": 8
       },
       "id": 6,
-      "legend": {
-        "alignAsTable": true,
-        "avg": false,
-        "current": false,
-        "hideEmpty": true,
-        "hideZero": true,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "table",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "format": "time_series",
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  build_timestamp as time,\n  timing_queuingDurationMillis as value,\n  substring(job_name from 'beam_PreCommit_#\"%#\"_(Cron|Commit|Phrase)' for '#') as metric\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND build_result = 'SUCCESS'\n  AND ((job_name LIKE 'beam_PreCommit_%_Commit')\n       OR (job_name LIKE 'beam_PreCommit_%_Cron')\n       OR (job_name LIKE 'beam_PreCommit_%_Phrase'))\nORDER BY\n  metric, time",
+          "rawSql": "SELECT\n  build_timestamp as time,\n  timing_queuingDurationMillis as value,\n  substring(job_name from 'beam_PreCommit_#\"%#\"_(Cron|Commit|Phrase)' for '#') as metric\nFROM\n  jenkins_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND build_result = 'SUCCESS'\n  AND ((job_name LIKE 'beam_PreCommit_%_Commit')\n       OR (job_name LIKE 'beam_PreCommit_%_Cron')\n       OR (job_name LIKE 'beam_PreCommit_%_Phrase'))\nORDER BY\n  time, metric",
           "refId": "A",
           "select": [
             [
@@ -236,54 +266,64 @@
           ]
         }
       ],
-      "thresholds": [],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Time in queue",
-      "tooltip": {
-        "shared": true,
-        "sort": 2,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "format": "dtdurationms",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": true
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     },
     {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
-      "fill": 0,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "dtdurationms"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 8,
         "w": 24,
@@ -291,31 +331,18 @@
         "y": 16
       },
       "id": 8,
-      "legend": {
-        "alignAsTable": true,
-        "avg": false,
-        "current": false,
-        "hideEmpty": true,
-        "hideZero": true,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "percentage": false,
-      "pointradius": 2,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "table",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "aggregation": "Last",
@@ -351,49 +378,13 @@
           ]
         }
       ],
-      "thresholds": [],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Time in queue: 0.9 percentile on month period",
-      "tooltip": {
-        "shared": true,
-        "sort": 2,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "format": "dtdurationms",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": true
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     }
   ],
-  "schemaVersion": 18,
+  "schemaVersion": 30,
   "style": "dark",
   "tags": [],
   "templating": {
@@ -431,5 +422,5 @@
   "timezone": "utc",
   "title": "Pre-commit Test Latency",
   "uid": "_TNndF2iz",
-  "version": 18
+  "version": 1
 }

diff --git a/.test-infra/metrics/grafana/dashboards/stability_critical_jobs_status.json b/.test-infra/metrics/grafana/dashboards/stability_critical_jobs_status.json
index 83695dc..f0ffe63 100644
--- a/.test-infra/metrics/grafana/dashboards/stability_critical_jobs_status.json
+++ b/.test-infra/metrics/grafana/dashboards/stability_critical_jobs_status.json

@@ -8,6 +8,12 @@
         "hide": true,
         "iconColor": "rgba(0, 211, 255, 1)",
         "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
         "type": "dashboard"
       }
     ]
@@ -15,11 +21,10 @@
   "editable": true,
   "gnetId": null,
   "graphTooltip": 0,
-  "id": 3,
   "links": [],
   "panels": [
     {
-      "content": "The graph shows: average greenness of critical post-commit tests jobs per week. This graph show health of our project.\n\nTable shows list of relevant jobs failures during selected time interval (You can change time interval on top-right corner of the dashboard). Please, triage failed jobs and update or create corresponding jira tickets. You can utilized provided links to help with this.",
+      "datasource": null,
       "gridPos": {
         "h": 3,
         "w": 10,
@@ -28,15 +33,97 @@
       },
       "id": 8,
       "links": [],
-      "mode": "markdown",
-      "options": {},
+      "options": {
+        "content": "The graph shows: average greenness of critical post-commit tests jobs per week. This graph show health of our project.\n\nTable shows list of relevant jobs failures during selected time interval (You can change time interval on top-right corner of the dashboard). Please, triage failed jobs and update or create corresponding jira tickets. You can utilized provided links to help with this.",
+        "mode": "markdown"
+      },
+      "pluginVersion": "8.1.2",
       "title": "Dashboard guidelines",
       "type": "text"
     },
     {
-      "columns": [],
       "datasource": "BeamPSQL",
-      "fontSize": "100%",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "align": "auto",
+            "displayMode": "auto"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Time"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Time"
+              },
+              {
+                "id": "unit",
+                "value": "time: YYYY-MM-DD HH:mm:ss"
+              },
+              {
+                "id": "custom.align",
+                "value": null
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "build_url"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Build Url"
+              },
+              {
+                "id": "unit",
+                "value": "short"
+              },
+              {
+                "id": "decimals",
+                "value": 2
+              },
+              {
+                "id": "links",
+                "value": [
+                  {
+                    "targetBlank": true,
+                    "title": "Link to Jenkins job.",
+                    "url": "${__cell:raw}"
+                  }
+                ]
+              },
+              {
+                "id": "custom.align",
+                "value": null
+              }
+            ]
+          }
+        ]
+      },
       "gridPos": {
         "h": 6,
         "w": 14,
@@ -46,43 +133,10 @@
       "hideTimeOverride": false,
       "id": 4,
       "links": [],
-      "options": {},
-      "pageSize": null,
-      "scroll": true,
-      "showHeader": true,
-      "sort": {
-        "col": 0,
-        "desc": true
+      "options": {
+        "showHeader": true
       },
-      "styles": [
-        {
-          "alias": "Time",
-          "dateFormat": "YYYY-MM-DD HH:mm:ss",
-          "link": false,
-          "pattern": "Time",
-          "type": "date"
-        },
-        {
-          "alias": "Build Url",
-          "colorMode": null,
-          "colors": [
-            "rgba(245, 54, 54, 0.9)",
-            "rgba(237, 129, 40, 0.89)",
-            "rgba(50, 172, 45, 0.97)"
-          ],
-          "dateFormat": "YYYY-MM-DD HH:mm:ss",
-          "decimals": 2,
-          "link": true,
-          "linkTargetBlank": true,
-          "linkTooltip": "Link to Jenkins job.",
-          "linkUrl": "${__cell:raw}",
-          "mappingType": 1,
-          "pattern": "build_url",
-          "thresholds": [],
-          "type": "number",
-          "unit": "short"
-        }
-      ],
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -114,11 +168,18 @@
       ],
       "timeShift": null,
       "title": "Failed builds",
-      "transform": "table",
+      "transformations": [
+        {
+          "id": "merge",
+          "options": {
+            "reducers": []
+          }
+        }
+      ],
       "type": "table"
     },
     {
-      "content": "[List existing jira tickets](https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20AND%20status%20in%20(Open%2C%20%22In%20Progress%22%2C%20Reopened)%20AND%20resolution%20%3D%20Unresolved%20AND%20component%20%3D%20test-failures%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC)\n\n[Create new Jira ticket](https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12319527&issuetype=1&summary=%5BjobName%5D%5BTestName%5D%5BIsFlake%5D%20Failure%20summary&priority=3&components=12334203&description=%3CFailure%20summary%3E%0AFailing%20job%20url:%0AJob%20history%20url:%0ARelevant%20log:)",
+      "datasource": null,
       "gridPos": {
         "h": 3,
         "w": 10,
@@ -127,19 +188,70 @@
       },
       "id": 6,
       "links": [],
-      "mode": "markdown",
-      "options": {},
+      "options": {
+        "content": "[List existing jira tickets](https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20AND%20status%20in%20(Open%2C%20%22In%20Progress%22%2C%20Reopened)%20AND%20resolution%20%3D%20Unresolved%20AND%20component%20%3D%20test-failures%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC)\n\n[Create new Jira ticket](https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12319527&issuetype=1&summary=%5BjobName%5D%5BTestName%5D%5BIsFlake%5D%20Failure%20summary&priority=3&components=12334203&description=%3CFailure%20summary%3E%0AFailing%20job%20url:%0AJob%20history%20url:%0ARelevant%20log:)",
+        "mode": "markdown"
+      },
+      "pluginVersion": "8.1.2",
       "title": "Useful links",
       "type": "text"
     },
     {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
       "description": "Each data point shows aggregation for corresponding week.\nLatest (rightmost) data point aggregates all data available for current week, so it may change based on new data and should not be considered a final value.",
-      "fill": 0,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "line"
+            }
+          },
+          "mappings": [],
+          "max": 1,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "#3f6833",
+                "value": null
+              },
+              {
+                "color": "transparent",
+                "value": 0.7
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 7,
         "w": 10,
@@ -147,29 +259,18 @@
         "y": 6
       },
       "id": 2,
-      "legend": {
-        "avg": false,
-        "current": false,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "options": {},
-      "percentage": false,
-      "pointradius": 2,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -177,7 +278,7 @@
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  DATE_TRUNC('week', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as job_name\nFROM\n  /*\n    We perform a union here to create a fake \"Python_All\" job_name in\n    order to graph a new line for all the python results combined.\n  */\n  ( SELECT build_timestamp, build_result, job_name\n    FROM jenkins_builds\n  UNION\n    SELECT build_timestamp, build_result, 'beam_PostCommit_Python_All' as job_name\n    FROM jenkins_builds\n    WHERE \n      ((job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+'))\n      AND NOT (job_name like '%_PR')\n  ) AS critical_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND ((job_name = 'beam_PostCommit_Java') \n       OR (job_name = 'beam_PostCommit_Go') \n       OR (job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+')\n       OR (job_name = 'beam_PostCommit_Python_Verify')\n       OR (job_name = 'beam_PostCommit_Python_All')\n       OR (job_name = 'beam_PostCommit_Website_Publish'))\n  AND NOT (job_name like '%_PR')\nGROUP BY\n  time, job_name\norder BY\n  job_name, time",
+          "rawSql": "SELECT\n  DATE_TRUNC('week', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as job_name\nFROM\n  /*\n    We perform a union here to create a fake \"Python_All\" job_name in\n    order to graph a new line for all the python results combined.\n  */\n  ( SELECT build_timestamp, build_result, job_name\n    FROM jenkins_builds\n  UNION\n    SELECT build_timestamp, build_result, 'beam_PostCommit_Python_All' as job_name\n    FROM jenkins_builds\n    WHERE \n      ((job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+'))\n      AND NOT (job_name like '%_PR')\n  ) AS critical_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND ((job_name = 'beam_PostCommit_Java') \n       OR (job_name = 'beam_PostCommit_Go') \n       OR (job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+')\n       OR (job_name = 'beam_PostCommit_Python_Verify')\n       OR (job_name = 'beam_PostCommit_Python_All')\n       OR (job_name = 'beam_PostCommit_Website_Publish'))\n  AND NOT (job_name like '%_PR')\nGROUP BY\n  time, job_name\norder BY\n  time, job_name",
           "refId": "A",
           "select": [
             [
@@ -199,66 +300,67 @@
           ]
         }
       ],
-      "thresholds": [
-        {
-          "colorMode": "custom",
-          "fill": false,
-          "line": true,
-          "lineColor": "#3f6833",
-          "op": "lt",
-          "value": 0.7,
-          "yaxis": "left"
-        }
-      ],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Greenness per Week (in %)",
-      "tooltip": {
-        "shared": true,
-        "sort": 1,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "decimals": null,
-          "format": "percentunit",
-          "label": "",
-          "logBase": 1,
-          "max": "1",
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     },
     {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
       "datasource": "BeamPSQL",
       "description": "Each data point shows aggregation for corresponding month.\nLatest (rightmost) data point aggregates all data available for current month, so it may change based on new data and should not be considered a final value.",
-      "fill": 0,
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "line"
+            }
+          },
+          "mappings": [],
+          "max": 1,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "#3f6833",
+                "value": null
+              },
+              {
+                "color": "transparent",
+                "value": 0.7
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
       "gridPos": {
         "h": 7,
         "w": 14,
@@ -266,30 +368,18 @@
         "y": 6
       },
       "id": 10,
-      "legend": {
-        "alignAsTable": true,
-        "avg": false,
-        "current": false,
-        "max": false,
-        "min": false,
-        "rightSide": true,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
       "links": [],
-      "nullPointMode": "null",
-      "options": {},
-      "percentage": false,
-      "pointradius": 2,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": false,
-      "steppedLine": false,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "table",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "pluginVersion": "8.1.2",
       "targets": [
         {
           "alias": "",
@@ -297,7 +387,7 @@
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  DATE_TRUNC('month', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as job_name\nFROM\n  /*\n  We perform a union here to create a fake \"Python_All\" job_name in\n  order to graph a new line for all the python results combined.\n  */\n  ( SELECT build_timestamp, build_result, job_name\n    FROM jenkins_builds\n  UNION\n    SELECT build_timestamp, build_result, 'beam_PostCommit_Python_All' as job_name\n    FROM jenkins_builds\n    WHERE \n      ((job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+'))\n      AND NOT (job_name like '%_PR')\n  ) AS critical_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND ((job_name = 'beam_PostCommit_Java') \n       OR (job_name = 'beam_PostCommit_Go') \n       OR (job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+')\n       OR (job_name = 'beam_PostCommit_Python_Verify')\n       OR (job_name = 'beam_PostCommit_Python_All')\n       OR (job_name = 'beam_PostCommit_Website_Publish'))\n  AND NOT (job_name like '%_PR')\nGROUP BY\n  time, job_name\norder BY\n  job_name, time",
+          "rawSql": "SELECT\n  DATE_TRUNC('month', build_timestamp) as time,\n  avg(\n  case \n    when build_result = 'SUCCESS' then 1\n    else 0\n  end) as value,\n  substring(job_name from 'beam_PostCommit_#\"%#\"' for '#') as job_name\nFROM\n  /*\n  We perform a union here to create a fake \"Python_All\" job_name in\n  order to graph a new line for all the python results combined.\n  */\n  ( SELECT build_timestamp, build_result, job_name\n    FROM jenkins_builds\n  UNION\n    SELECT build_timestamp, build_result, 'beam_PostCommit_Python_All' as job_name\n    FROM jenkins_builds\n    WHERE \n      ((job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+'))\n      AND NOT (job_name like '%_PR')\n  ) AS critical_builds\nWHERE\n  (build_timestamp BETWEEN $__timeFrom() AND $__timeTo())\n  AND ((job_name = 'beam_PostCommit_Java') \n       OR (job_name = 'beam_PostCommit_Go') \n       OR (job_name SIMILAR TO 'beam_PostCommit_Python[0-9]+')\n       OR (job_name = 'beam_PostCommit_Python_Verify')\n       OR (job_name = 'beam_PostCommit_Python_All')\n       OR (job_name = 'beam_PostCommit_Website_Publish'))\n  AND NOT (job_name like '%_PR')\nGROUP BY\n  time, job_name\norder BY\n  time, job_name",
           "refId": "A",
           "select": [
             [
@@ -319,61 +409,14 @@
           ]
         }
       ],
-      "thresholds": [
-        {
-          "colorMode": "custom",
-          "fill": false,
-          "line": true,
-          "lineColor": "#3f6833",
-          "op": "lt",
-          "value": 0.7,
-          "yaxis": "left"
-        }
-      ],
       "timeFrom": null,
-      "timeRegions": [],
       "timeShift": null,
       "title": "Greenness per Month (in %)",
-      "tooltip": {
-        "shared": true,
-        "sort": 1,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "decimals": null,
-          "format": "percentunit",
-          "label": "",
-          "logBase": 1,
-          "max": "1",
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ],
-      "yaxis": {
-        "align": false,
-        "alignLevel": null
-      }
+      "type": "timeseries"
     }
   ],
   "refresh": false,
-  "schemaVersion": 18,
+  "schemaVersion": 30,
   "style": "dark",
   "tags": [],
   "templating": {
@@ -411,5 +454,5 @@
   "timezone": "utc",
   "title": "Stability critical jobs status",
   "uid": "McTAiu0ik",
-  "version": 1
-}
\ No newline at end of file
+  "version": 2
+}

diff --git a/.test-infra/metrics/grafana/provisioning/datasources/beamgithubjavatests-api.yaml b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubjavatests-api.yaml
new file mode 100644
index 0000000..a3a0e9b
--- /dev/null
+++ b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubjavatests-api.yaml

@@ -0,0 +1,34 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+apiVersion: 1
+
+deleteDatasources:
+
+datasources:
+  - name: Java Tests
+    type: marcusolsson-json-datasource
+    access: proxy
+    orgId: 1
+    url: https://api.github.com/repos/apache/beam/actions/workflows/java_tests.yml/runs
+    jsonData:
+      httpHeaderName1: "accept"
+      customQueryParameters: "per_page=100"
+    secureJsonData:
+      httpHeaderValue1: "application/vnd.github.v3+json"
+    editable: false

diff --git a/.test-infra/metrics/grafana/provisioning/datasources/beamgithubpythontests-api.yaml b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubpythontests-api.yaml
new file mode 100644
index 0000000..abcd060
--- /dev/null
+++ b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubpythontests-api.yaml

@@ -0,0 +1,34 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+apiVersion: 1
+
+deleteDatasources:
+
+datasources:
+  - name: Python Tests
+    type: marcusolsson-json-datasource
+    access: proxy
+    orgId: 1
+    url: https://api.github.com/repos/apache/beam/actions/workflows/python_tests.yml/runs
+    jsonData:
+      httpHeaderName1: "accept"
+      customQueryParameters: "per_page=100"
+    secureJsonData:
+      httpHeaderValue1: "application/vnd.github.v3+json"
+    editable: false

diff --git a/CHANGES.md b/CHANGES.md
index 5547c75..2b3c370 100644
--- a/CHANGES.md
+++ b/CHANGES.md

@@ -46,10 +46,15 @@
 
 * X behavior is deprecated and will be removed in X versions ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
 
-## Known Issues
+## Bugfixes
 
 * Fixed X (Java/Python) ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
+
+## Known Issues
+
+* ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
 -->
+
 # [2.34.0] - Unreleased
 * Add an [example](https://github.com/cometta/python-apache-beam-spark) of deploying Python Apache Beam job with Spark Cluster
 ## Highlights
@@ -81,9 +86,15 @@
 
 * X behavior is deprecated and will be removed in X versions ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
 
-## Known Issues
+## Bugfixes
 
 * Fixed X (Java/Python) ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
+* Fixed error while writing multiple DeferredFrames to csv (Python)
+([BEAM-12701](https://issues.apache.org/jira/browse/BEAM-12701)).
+
+## Known Issues
+
+* ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
 
 # [2.33.0] - Unreleased
 
@@ -172,7 +183,7 @@
 * Python GBK will stop supporting unbounded PCollections that have global windowing and a default trigger in Beam 2.33. This can be overriden with `--allow_unsafe_triggers`. ([BEAM-9487](https://issues.apache.org/jira/browse/BEAM-9487)).
 * Python GBK will start requiring safe triggers or the `--allow_unsafe_triggers` flag starting with Beam 2.33. ([BEAM-9487](https://issues.apache.org/jira/browse/BEAM-9487)).
 
-## Known Issues
+## Bugfixes
 
 * Fixed race condition in RabbitMqIO causing duplicate acks (Java) ([BEAM-6516](https://issues.apache.org/jira/browse/BEAM-6516)))
 

diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 67fe8b8..5c00e27 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy

@@ -510,6 +510,7 @@
         aws_java_sdk2_sdk_core                      : "software.amazon.awssdk:sdk-core:$aws_java_sdk2_version",
         aws_java_sdk2_sns                           : "software.amazon.awssdk:sns:$aws_java_sdk2_version",
         aws_java_sdk2_sqs                           : "software.amazon.awssdk:sqs:$aws_java_sdk2_version",
+        aws_java_sdk2_sts                           : "software.amazon.awssdk:sts:$aws_java_sdk2_version",
         aws_java_sdk2_s3                            : "software.amazon.awssdk:s3:$aws_java_sdk2_version",
         aws_java_sdk2_http_client_spi               : "software.amazon.awssdk:http-client-spi:$aws_java_sdk2_version",
         aws_java_sdk2_regions                       : "software.amazon.awssdk:regions:$aws_java_sdk2_version",

diff --git a/examples/notebooks/tour-of-beam/dataframes.ipynb b/examples/notebooks/tour-of-beam/dataframes.ipynb
new file mode 100644
index 0000000..c19d991
--- /dev/null
+++ b/examples/notebooks/tour-of-beam/dataframes.ipynb

@@ -0,0 +1,747 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 2,
+  "metadata": {
+    "colab": {
+      "name": "Beam DataFrames",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "source": [
+        "#@title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the \"License\")\n",
+        "\n",
+        "# Licensed to the Apache Software Foundation (ASF) under one\n",
+        "# or more contributor license agreements. See the NOTICE file\n",
+        "# distributed with this work for additional information\n",
+        "# regarding copyright ownership. The ASF licenses this file\n",
+        "# to you under the Apache License, Version 2.0 (the\n",
+        "# \"License\"); you may not use this file except in compliance\n",
+        "# with the License. You may obtain a copy of the License at\n",
+        "#\n",
+        "#   http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing,\n",
+        "# software distributed under the License is distributed on an\n",
+        "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+        "# KIND, either express or implied. See the License for the\n",
+        "# specific language governing permissions and limitations\n",
+        "# under the License."
+      ],
+      "outputs": [],
+      "metadata": {
+        "cellView": "form",
+        "id": "rz2qIC9IL2rI"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Beam DataFrames\n",
+        "\n",
+        "<button>\n",
+        "  <a href=\"https://beam.apache.org/documentation/dsls/dataframes/overview/\">\n",
+        "    <img src=\"https://beam.apache.org/images/favicon.ico\" alt=\"Open the docs\" height=\"16\"/>\n",
+        "    Beam DataFrames overview\n",
+        "  </a>\n",
+        "</button>\n",
+        "\n",
+        "Beam DataFrames provide a pandas-like [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)\n",
+        "API to declare Beam pipelines.\n",
+        "\n",
+        "> ℹ️ To learn more about Beam DataFrames, take a look at the\n",
+        "[Beam DataFrames overview](https://beam.apache.org/documentation/dsls/dataframes/overview) page.\n",
+        "\n",
+        "First, we need to install Apache Beam with the `interactive` extra for the Interactive runner.",
+        "We also need `pandas` for this notebook, but the Interactive runner already depends on it."
+      ],
+      "metadata": {
+        "id": "hDuXLLSZnI1D"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "source": [
+        "%pip install --quiet apache-beam[interactive]"
+      ],
+      "outputs": [],
+      "metadata": {
+        "id": "8QVByaWjkarZ"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Lets create a small data file of\n",
+        "[Comma-Separated Values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values).\n",
+        "It simply includes the dates of the\n",
+        "[equinoxes](https://en.wikipedia.org/wiki/Equinox) and\n",
+        "[solstices](https://en.wikipedia.org/wiki/Solstice)\n",
+        "of the year 2021."
+      ],
+      "metadata": {
+        "id": "aLqdbX4Mgipq"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "source": [
+        "%%writefile solar_events.csv\n",
+        "timestamp,event\n",
+        "2021-03-20 09:37:00,March Equinox\n",
+        "2021-06-21 03:32:00,June Solstice\n",
+        "2021-09-22 19:21:00,September Equinox\n",
+        "2021-12-21 15:59:00,December Solstice"
+      ],
+      "outputs": [],
+      "metadata": {
+        "id": "hZjwAm7qotrJ"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Interactive Beam\n",
+        "\n",
+        "Pandas has the\n",
+        "[`pandas.read_csv`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html)\n",
+        "function to easily read CSV files into DataFrames.\n",
+        "Beam has the\n",
+        "[`beam.dataframe.io.read_csv`](https://beam.apache.org/releases/pydoc/current/apache_beam.dataframe.io.html#apache_beam.dataframe.io.read_csv)\n",
+        "function that emulates `pandas.read_csv`, but returns a deferred Beam DataFrame.\n",
+        "\n",
+        "If you’re using\n",
+        "[Interactive Beam](https://beam.apache.org/releases/pydoc/current/apache_beam.runners.interactive.interactive_beam.html),\n",
+        "you can use `collect` to bring a Beam DataFrame into local memory as a Pandas DataFrame."
+      ],
+      "metadata": {
+        "id": "Hv_58JulleQ_"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "source": [
+        "import apache_beam as beam\n",
+        "import apache_beam.runners.interactive.interactive_beam as ib\n",
+        "from apache_beam.runners.interactive.interactive_runner import InteractiveRunner\n",
+        "\n",
+        "pipeline = beam.Pipeline(InteractiveRunner())\n",
+        "\n",
+        "# Create a deferred Beam DataFrame with the contents of our csv file.\n",
+        "beam_df = pipeline | 'Read CSV' >> beam.dataframe.io.read_csv('solar_events.csv')\n",
+        "\n",
+        "# We can use `ib.collect` to view the contents of a Beam DataFrame.\n",
+        "ib.collect(beam_df)"
+      ],
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/javascript": "\n        if (typeof window.interactive_beam_jquery == 'undefined') {\n          var jqueryScript = document.createElement('script');\n          jqueryScript.src = 'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n          jqueryScript.type = 'text/javascript';\n          jqueryScript.onload = function() {\n            var datatableScript = document.createElement('script');\n            datatableScript.src = 'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n            datatableScript.type = 'text/javascript';\n            datatableScript.onload = function() {\n              window.interactive_beam_jquery = jQuery.noConflict(true);\n              window.interactive_beam_jquery(document).ready(function($){\n                \n              });\n            }\n            document.head.appendChild(datatableScript);\n          };\n          document.head.appendChild(jqueryScript);\n        } else {\n          window.interactive_beam_jquery(document).ready(function($){\n            \n          });\n        }"
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/html": [
+              "\n",
+              "            <link rel=\"stylesheet\" href=\"https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css\" integrity=\"sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh\" crossorigin=\"anonymous\">\n",
+              "            <div id=\"progress_indicator_1516f4062e4fc6d4e58f33cf44c41c1d\" class=\"spinner-border text-info\" role=\"status\">\n",
+              "            </div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/javascript": "\n        if (typeof window.interactive_beam_jquery == 'undefined') {\n          var jqueryScript = document.createElement('script');\n          jqueryScript.src = 'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n          jqueryScript.type = 'text/javascript';\n          jqueryScript.onload = function() {\n            var datatableScript = document.createElement('script');\n            datatableScript.src = 'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n            datatableScript.type = 'text/javascript';\n            datatableScript.onload = function() {\n              window.interactive_beam_jquery = jQuery.noConflict(true);\n              window.interactive_beam_jquery(document).ready(function($){\n                \n            $(\"#progress_indicator_1516f4062e4fc6d4e58f33cf44c41c1d\").remove();\n              });\n            }\n            document.head.appendChild(datatableScript);\n          };\n          document.head.appendChild(jqueryScript);\n        } else {\n          window.interactive_beam_jquery(document).ready(function($){\n            \n            $(\"#progress_indicator_1516f4062e4fc6d4e58f33cf44c41c1d\").remove();\n          });\n        }"
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>event</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>solar_events.csv:0</th>\n",
+              "      <td>2021-03-20 09:37:00</td>\n",
+              "      <td>March Equinox</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>solar_events.csv:1</th>\n",
+              "      <td>2021-06-21 03:32:00</td>\n",
+              "      <td>June Solstice</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>solar_events.csv:2</th>\n",
+              "      <td>2021-09-22 19:21:00</td>\n",
+              "      <td>September Equinox</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>solar_events.csv:3</th>\n",
+              "      <td>2021-12-21 15:59:00</td>\n",
+              "      <td>December Solstice</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                              timestamp              event\n",
+              "solar_events.csv:0  2021-03-20 09:37:00      March Equinox\n",
+              "solar_events.csv:1  2021-06-21 03:32:00      June Solstice\n",
+              "solar_events.csv:2  2021-09-22 19:21:00  September Equinox\n",
+              "solar_events.csv:3  2021-12-21 15:59:00  December Solstice"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 242
+        },
+        "id": "sKAMXD5ElhYP",
+        "outputId": "928d9ad7-ae75-42d7-8dc6-8c5afd730b11"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Collecting a Beam DataFrame into a Pandas DataFrame is useful to perform\n",
+        "[operations not supported by Beam DataFrames](https://beam.apache.org/documentation/dsls/dataframes/differences-from-pandas#classes-of-unsupported-operations).\n",
+        "\n",
+        "For example, let's say we want to take only the first two events in chronological order.\n",
+        "Since a deferred Beam DataFrame does not have any ordering guarantees,\n",
+        "first we need to sort the values.\n",
+        "In Pandas, we could first\n",
+        "[`df.sort_values(by='timestamp')`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html) and then\n",
+        "[`df.head(2)`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.head.html) to achieve this.\n",
+        "\n",
+        "However, these are\n",
+        "[order-sensitive operations](https://beam.apache.org/documentation/dsls/dataframes/differences-from-pandas#order-sensitive-operations)\n",
+        "so using them in a Beam DataFrame raises a\n",
+        "[`WontImplementError`](https://beam.apache.org/releases/pydoc/current/apache_beam.dataframe.frame_base.html#apache_beam.dataframe.frame_base.WontImplementError).\n",
+        "We can work around this by using `collect` to convert the Beam DataFrame into a Pandas DataFrame."
+      ],
+      "metadata": {
+        "id": "t3Is6dArtN_Z"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "source": [
+        "import apache_beam.runners.interactive.interactive_beam as ib\n",
+        "\n",
+        "# Collect the Beam DataFrame into a Pandas DataFrame.\n",
+        "df = ib.collect(beam_df)\n",
+        "\n",
+        "# We can now use any Pandas transforms with our data.\n",
+        "df.sort_values(by='timestamp').head(2)"
+      ],
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/html": [
+              "\n",
+              "            <link rel=\"stylesheet\" href=\"https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css\" integrity=\"sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh\" crossorigin=\"anonymous\">\n",
+              "            <div id=\"progress_indicator_4486e01c01f75e7a68a4a5fefa9ecd2c\" class=\"spinner-border text-info\" role=\"status\">\n",
+              "            </div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/javascript": "\n        if (typeof window.interactive_beam_jquery == 'undefined') {\n          var jqueryScript = document.createElement('script');\n          jqueryScript.src = 'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n          jqueryScript.type = 'text/javascript';\n          jqueryScript.onload = function() {\n            var datatableScript = document.createElement('script');\n            datatableScript.src = 'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n            datatableScript.type = 'text/javascript';\n            datatableScript.onload = function() {\n              window.interactive_beam_jquery = jQuery.noConflict(true);\n              window.interactive_beam_jquery(document).ready(function($){\n                \n            $(\"#progress_indicator_4486e01c01f75e7a68a4a5fefa9ecd2c\").remove();\n              });\n            }\n            document.head.appendChild(datatableScript);\n          };\n          document.head.appendChild(jqueryScript);\n        } else {\n          window.interactive_beam_jquery(document).ready(function($){\n            \n            $(\"#progress_indicator_4486e01c01f75e7a68a4a5fefa9ecd2c\").remove();\n          });\n        }"
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>event</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>solar_events.csv:0</th>\n",
+              "      <td>2021-03-20 09:37:00</td>\n",
+              "      <td>March Equinox</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>solar_events.csv:1</th>\n",
+              "      <td>2021-06-21 03:32:00</td>\n",
+              "      <td>June Solstice</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                              timestamp          event\n",
+              "solar_events.csv:0  2021-03-20 09:37:00  March Equinox\n",
+              "solar_events.csv:1  2021-06-21 03:32:00  June Solstice"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 138
+        },
+        "id": "8haEu6_9iTi7",
+        "outputId": "a1e07bdc-c66d-45e5-efff-90b93219c648"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "> ℹ️ Note that `collect` is _only_ accessible if you’re using\n",
+        "[Interactive Beam](https://beam.apache.org/releases/pydoc/current/apache_beam.runners.interactive.interactive_beam.html)"
+      ],
+      "metadata": {
+        "id": "ZkthQ13pwpm0"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Beam DataFrames to PCollections\n",
+        "\n",
+        "If you have your data as a Beam DataFrame, you can convert it into a regular PCollection with\n",
+        "[`to_pcollection`](https://beam.apache.org/releases/pydoc/current/apache_beam.dataframe.convert.html#apache_beam.dataframe.convert.to_pcollection).\n",
+        "\n",
+        "Converting a Beam DataFrame in this way yields a PCollection with a [schema](https://beam.apache.org/documentation/programming-guide/#what-is-a-schema).\n",
+        "This allows us to easily access each property by attribute, for example `element.event` and `element.timestamp`.\n",
+        "\n",
+        "Sometimes it's more convenient to convert the named tuples to Python dictionaries.\n",
+        "We can do that with the\n",
+        "[`_asdict`](https://docs.python.org/3/library/collections.html#collections.somenamedtuple._asdict)\n",
+        "method."
+      ],
+      "metadata": {
+        "id": "ujRm4K0iP8SX"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "source": [
+        "import apache_beam as beam\n",
+        "from apache_beam.dataframe import convert\n",
+        "\n",
+        "with beam.Pipeline() as pipeline:\n",
+        "  beam_df = pipeline | 'Read CSV' >> beam.dataframe.io.read_csv('solar_events.csv')\n",
+        "\n",
+        "  (\n",
+        "      # Convert the Beam DataFrame to a PCollection.\n",
+        "      convert.to_pcollection(beam_df)\n",
+        "\n",
+        "      # We get named tuples, we can convert them to dictionaries like this.\n",
+        "      | 'To dictionaries' >> beam.Map(lambda x: dict(x._asdict()))\n",
+        "\n",
+        "      # Print the elements in the PCollection.\n",
+        "      | 'Print' >> beam.Map(print)\n",
+        "  )"
+      ],
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{'timestamp': '2021-03-20 09:37:00', 'event': 'March Equinox'}\n",
+            "{'timestamp': '2021-06-21 03:32:00', 'event': 'June Solstice'}\n",
+            "{'timestamp': '2021-09-22 19:21:00', 'event': 'September Equinox'}\n",
+            "{'timestamp': '2021-12-21 15:59:00', 'event': 'December Solstice'}\n"
+          ]
+        }
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "g22op8rZPvB3",
+        "outputId": "bba88b0b-4d19-4d61-dac7-2c168998a2e4"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Pandas DataFrames to PCollections\n",
+        "\n",
+        "If you have your data as a Pandas DataFrame, you can convert it into a regular PCollection with\n",
+        "[`to_pcollection`](https://beam.apache.org/releases/pydoc/current/apache_beam.dataframe.convert.html#apache_beam.dataframe.convert.to_pcollection).\n",
+        "\n",
+        "Since Pandas DataFrames are not part of any Beam pipeline, we must provide the `pipeline` explicitly."
+      ],
+      "metadata": {
+        "id": "t6xNIO0iPwtn"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "source": [
+        "import pandas as pd\n",
+        "import apache_beam as beam\n",
+        "from apache_beam.dataframe import convert\n",
+        "\n",
+        "with beam.Pipeline() as pipeline:\n",
+        "  df = pd.read_csv('solar_events.csv')\n",
+        "\n",
+        "  (\n",
+        "      # Convert the Pandas DataFrame to a PCollection.\n",
+        "      convert.to_pcollection(df, pipeline=pipeline)\n",
+        "\n",
+        "      # We get named tuples, we can convert them to dictionaries like this.\n",
+        "      | 'To dictionaries' >> beam.Map(lambda x: dict(x._asdict()))\n",
+        "\n",
+        "      # Print the elements in the PCollection.\n",
+        "      | 'Print' >> beam.Map(print)\n",
+        "  )"
+      ],
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{'timestamp': '2021-03-20 09:37:00', 'event': 'March Equinox'}\n",
+            "{'timestamp': '2021-06-21 03:32:00', 'event': 'June Solstice'}\n",
+            "{'timestamp': '2021-09-22 19:21:00', 'event': 'September Equinox'}\n",
+            "{'timestamp': '2021-12-21 15:59:00', 'event': 'December Solstice'}\n"
+          ]
+        }
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "YWYVFkvFuksz",
+        "outputId": "a3e3e6fa-85ce-4891-95a0-389fba4461a6"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "If you have your data as a PCollection of Pandas DataFrames, you can convert them into a PCollection with\n",
+        "[`FlatMap`](https://beam.apache.org/documentation/transforms/python/elementwise/flatmap).\n",
+        "\n",
+        "> ℹ️ If the number of elements in each DataFrame can be very different (that is, some DataFrames might contain thousands of elements while others contain only a handful of elements), it might be a good idea to\n",
+        "> [`Reshuffle`](https://beam.apache.org/documentation/transforms/python/other/reshuffle).\n",
+        "> This basically rebalances the elements in the PCollection, which helps make sure all the workers have a balanced number of elements."
+      ],
+      "metadata": {
+        "id": "z6Q_tyWszkMC"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "source": [
+        "import pandas as pd\n",
+        "import apache_beam as beam\n",
+        "\n",
+        "with beam.Pipeline() as pipeline:\n",
+        "  (\n",
+        "      pipeline\n",
+        "      | 'Filename' >> beam.Create(['solar_events.csv'])\n",
+        "\n",
+        "      # Each element is a Pandas DataFrame, so we can do any Pandas operation.\n",
+        "      | 'Read CSV' >> beam.Map(pd.read_csv)\n",
+        "\n",
+        "      # We yield each element of all the DataFrames into a PCollection of dictionaries.\n",
+        "      | 'To dictionaries' >> beam.FlatMap(lambda df: df.to_dict('records'))\n",
+        "\n",
+        "      # Reshuffle to make sure parallelization is balanced.\n",
+        "      | 'Reshuffle' >> beam.Reshuffle()\n",
+        "\n",
+        "      # Print the elements in the PCollection.\n",
+        "      | 'Print' >> beam.Map(print)\n",
+        "  )"
+      ],
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{'timestamp': '2021-03-20 09:37:00', 'event': 'March Equinox'}\n",
+            "{'timestamp': '2021-06-21 03:32:00', 'event': 'June Solstice'}\n",
+            "{'timestamp': '2021-09-22 19:21:00', 'event': 'September Equinox'}\n",
+            "{'timestamp': '2021-12-21 15:59:00', 'event': 'December Solstice'}\n"
+          ]
+        }
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fVWjO2Zfziqu",
+        "outputId": "c5db7be4-f764-487a-bc3b-bd5cbad4e396"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# PCollections to Beam DataFrames\n",
+        "\n",
+        "If you have your data as a PCollection, you can convert it into a deferred Beam DataFrame with\n",
+        "[`to_dataframe`](https://beam.apache.org/releases/pydoc/current/apache_beam.dataframe.convert.html#apache_beam.dataframe.convert.to_dataframe).\n",
+        "\n",
+        "> ℹ️ To convert a PCollection to a Beam DataFrame, each element _must_ have a\n",
+        "[schema](https://beam.apache.org/documentation/programming-guide/#what-is-a-schema)."
+      ],
+      "metadata": {
+        "id": "_Dm2u71EIRFr"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "source": [
+        "import csv\n",
+        "import apache_beam as beam\n",
+        "from apache_beam.dataframe import convert\n",
+        "\n",
+        "with open('solar_events.csv') as f:\n",
+        "  solar_events = [dict(row) for row in csv.DictReader(f)]\n",
+        "\n",
+        "with beam.Pipeline() as pipeline:\n",
+        "  pcoll = pipeline | 'Create data' >> beam.Create(solar_events)\n",
+        "\n",
+        "  # Convert the PCollection into a Beam DataFrame\n",
+        "  beam_df = convert.to_dataframe(pcoll | 'To Rows' >> beam.Map(\n",
+        "      lambda x: beam.Row(\n",
+        "          timestamp=x['timestamp'],\n",
+        "          event=x['event'],\n",
+        "      )\n",
+        "  ))\n",
+        "\n",
+        "  # Print the elements in the Beam DataFrame.\n",
+        "  (\n",
+        "      convert.to_pcollection(beam_df)\n",
+        "      | 'To dictionaries' >> beam.Map(lambda x: dict(x._asdict()))\n",
+        "      | 'Print' >> beam.Map(print)\n",
+        "  )"
+      ],
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{'timestamp': '2021-03-20 09:37:00', 'event': 'March Equinox'}\n",
+            "{'timestamp': '2021-06-21 03:32:00', 'event': 'June Solstice'}\n",
+            "{'timestamp': '2021-09-22 19:21:00', 'event': 'September Equinox'}\n",
+            "{'timestamp': '2021-12-21 15:59:00', 'event': 'December Solstice'}\n"
+          ]
+        }
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "N6dVNkkEIWa_",
+        "outputId": "16556170-fbf6-4980-962c-bb466d0b76b2"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# PCollections to Pandas DataFrames\n",
+        "\n",
+        "If you have your data as a PCollection, you can convert it into an in-memory Pandas DataFrame via a\n",
+        "[side input](https://beam.apache.org/documentation/programming-guide#side-inputs).\n",
+        "\n",
+        "> ℹ️ It's recommended to **only** do this if you need to use a Pandas operation that is\n",
+        "> [not supported in Beam DataFrames](https://beam.apache.org/documentation/dsls/dataframes/differences-from-pandas/#classes-of-unsupported-operations).\n",
+        "> Converting a PCollection into a Pandas DataFrame consolidates elements from potentially multiple workers into a single worker, which could create a performance bottleneck.\n"
+        "\n",
+        "> ⚠️ Pandas DataFrames are in-memory data structures, so make sure all the elements in the PCollection fit into memory.\n",
+        "> If they don't fit into memory, consider yielding multiple DataFrame elements via\n",
+        "> [`FlatMap`](https://beam.apache.org/documentation/transforms/python/elementwise/flatmap)."
+      ],
+      "metadata": {
+        "id": "kj08jOZQQa_q"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "source": [
+        "import csv\n",
+        "import pandas as pd\n",
+        "import apache_beam as beam\n",
+        "\n",
+        "with open('solar_events.csv') as f:\n",
+        "  solar_events = [dict(row) for row in csv.DictReader(f)]\n",
+        "\n",
+        "with beam.Pipeline() as pipeline:\n",
+        "  pcoll = pipeline | 'Create data' >> beam.Create(solar_events)\n",
+        "\n",
+        "  (\n",
+        "      pipeline\n",
+        "\n",
+        "      # Create a single element containing the entire PCollection. \n",
+        "      | 'Singleton' >> beam.Create([None])\n",
+        "      | 'As Pandas' >> beam.Map(\n",
+        "          lambda _, dict_iter: pd.DataFrame(dict_iter),\n",
+        "          dict_iter=beam.pvalue.AsIter(pcoll),\n",
+        "      )\n",
+        "\n",
+        "      # Print the Pandas DataFrame.\n",
+        "      | 'Print' >> beam.Map(print)\n",
+        "  )"
+      ],
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "             timestamp              event\n",
+            "0  2021-03-20 09:37:00      March Equinox\n",
+            "1  2021-06-21 03:32:00      June Solstice\n",
+            "2  2021-09-22 19:21:00  September Equinox\n",
+            "3  2021-12-21 15:59:00  December Solstice\n"
+          ]
+        }
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cHZdiPbOG-sy",
+        "outputId": "11c84948-fccf-41fd-c276-7c5803264ff7"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# What's next?\n",
+        "\n",
+        "* [Beam DataFrames overview](https://beam.apache.org/documentation/dsls/dataframes/overview) -- an overview of the Beam DataFrames API.\n",
+        "* [Differences from pandas](https://beam.apache.org/documentation/dsls/dataframes/differences-from-pandas) -- goes through some of the differences between Beam DataFrames and Pandas DataFrames, as well as some of the workarounds for unsupported operations.\n",
+        "* [10 minutes to Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html) -- a quickstart guide to Pandas DataFrames.\n",
+        "* [Pandas DataFrame API](https://pandas.pydata.org/pandas-docs/stable/reference/frame.html) -- the API reference for Pandas DataFrames"
+      ],
+      "metadata": {
+        "id": "UflW6AJp6-ss"
+      }
+    }
+  ]
+}

diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index 442d913..3ab0b72 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties

@@ -1,5 +1,5 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-6.9.1-bin.zip
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists

diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java
index 3133cc1..4c388bf 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java

@@ -43,6 +43,10 @@
         projectId, instanceId, tableId);
   }
 
+  public static String cloudStorageBucket(String bucketId) {
+    return String.format("//storage.googleapis.com/buckets/%s", bucketId);
+  }
+
   public static String datastoreResource(String projectId, String namespace) {
     return String.format(
         "//bigtable.googleapis.com/projects/%s/namespaces/%s", projectId, namespace);

diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java
index 57ee58a..c792719 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java

@@ -83,6 +83,8 @@
     public static final String BIGTABLE_PROJECT_ID = "BIGTABLE_PROJECT_ID";
     public static final String INSTANCE_ID = "INSTANCE_ID";
     public static final String TABLE_ID = "TABLE_ID";
+    public static final String GCS_BUCKET = "GCS_BUCKET";
+    public static final String GCS_PROJECT_ID = "GCS_PROJECT_ID";
 
     static {
       // Note: One benefit of defining these strings above, instead of pulling them in from
@@ -116,6 +118,8 @@
           BIGTABLE_PROJECT_ID.equals(extractLabel(MonitoringInfoLabels.BIGTABLE_PROJECT_ID)));
       checkArgument(INSTANCE_ID.equals(extractLabel(MonitoringInfoLabels.INSTANCE_ID)));
       checkArgument(TABLE_ID.equals(extractLabel(MonitoringInfoLabels.TABLE_ID)));
+      checkArgument(GCS_BUCKET.equals(extractLabel(MonitoringInfoLabels.GCS_BUCKET)));
+      checkArgument(GCS_PROJECT_ID.equals(extractLabel(MonitoringInfoLabels.GCS_PROJECT_ID)));
     }
   }
 

diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle
index b8ec9bb..be0ade9 100644
--- a/runners/google-cloud-dataflow-java/build.gradle
+++ b/runners/google-cloud-dataflow-java/build.gradle

@@ -45,8 +45,8 @@
   filter org.apache.tools.ant.filters.ReplaceTokens, tokens: [
     'dataflow.legacy_environment_major_version' : '8',
     'dataflow.fnapi_environment_major_version' : '8',
-    'dataflow.legacy_container_version' : 'beam-master-20210525',
-    'dataflow.fnapi_container_version' : 'beam-master-20210524',
+    'dataflow.legacy_container_version' : 'beam-master-20210913',
+    'dataflow.fnapi_container_version' : 'beam-master-20210913',
     'dataflow.container_base_repository' : 'gcr.io/cloud-dataflow/v1beta3',
   ]
 }

diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
index f5bde8e..a212fd1 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java

@@ -39,9 +39,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.junit.Assume.assumeFalse;
-import static org.mockito.ArgumentMatchers.anyString;
 import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyInt;
 import static org.mockito.Matchers.anyListOf;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Matchers.isA;
@@ -267,7 +265,7 @@
   private static GcsUtil buildMockGcsUtil() throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
 
-    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .then(
             invocation ->
                 FileChannel.open(
@@ -276,7 +274,7 @@
                     StandardOpenOption.WRITE,
                     StandardOpenOption.DELETE_ON_CLOSE));
 
-    when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .then(
             invocation ->
                 FileChannel.open(
@@ -882,7 +880,7 @@
     options.setGcsUtil(mockGcsUtil);
     options.setGcpCredential(new TestCredential());
 
-    when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .then(
             invocation ->
                 FileChannel.open(
@@ -950,7 +948,7 @@
     options.setGcsUtil(mockGcsUtil);
     options.setGcpCredential(new TestCredential());
 
-    when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .then(
             invocation ->
                 FileChannel.open(

diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
index 5c6e813..ca5dd51 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/PackageUtilTest.java

@@ -27,7 +27,6 @@
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
-import static org.mockito.ArgumentMatchers.anyString;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyListOf;
 import static org.mockito.Mockito.times;
@@ -284,7 +283,8 @@
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
 
-    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
+        .thenReturn(pipe.sink());
 
     List<DataflowPackage> targets =
         defaultPackageUtil.stageClasspathElements(
@@ -294,7 +294,7 @@
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verify(mockGcsUtil).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
     verifyNoMoreInteractions(mockGcsUtil);
 
     assertThat(target.getName(), endsWith(".txt"));
@@ -313,7 +313,7 @@
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
 
-    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .thenAnswer(invocation -> Pipe.open().sink());
 
     List<DataflowPackage> targets =
@@ -342,7 +342,8 @@
         .thenReturn(
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
-    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
+        .thenReturn(pipe.sink());
 
     defaultPackageUtil.stageClasspathElements(
         ImmutableList.of(makeStagedFile(tmpDirectory.getAbsolutePath())),
@@ -350,7 +351,7 @@
         createOptions);
 
     verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verify(mockGcsUtil).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
     verifyNoMoreInteractions(mockGcsUtil);
 
     List<String> zipEntryNames = new ArrayList<>();
@@ -375,7 +376,8 @@
         .thenReturn(
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
-    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
+        .thenReturn(pipe.sink());
 
     List<DataflowPackage> targets =
         defaultPackageUtil.stageClasspathElements(
@@ -385,7 +387,7 @@
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verify(mockGcsUtil).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
     verifyNoMoreInteractions(mockGcsUtil);
 
     assertThat(target.getName(), endsWith(".jar"));
@@ -403,7 +405,7 @@
         .thenReturn(
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
-    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .thenThrow(new IOException("Fake Exception: Upload error"));
 
     try (PackageUtil directPackageUtil =
@@ -415,7 +417,7 @@
           createOptions);
     } finally {
       verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-      verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
+      verify(mockGcsUtil, times(5)).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
       verifyNoMoreInteractions(mockGcsUtil);
     }
   }
@@ -427,7 +429,7 @@
         .thenReturn(
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
-    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .thenThrow(
             new IOException(
                 "Failed to write to GCS path " + STAGING_PATH,
@@ -460,7 +462,7 @@
                       + "login'")));
     } finally {
       verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-      verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+      verify(mockGcsUtil).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
       verifyNoMoreInteractions(mockGcsUtil);
     }
   }
@@ -473,7 +475,7 @@
         .thenReturn(
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
-    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
         .thenThrow(new IOException("Fake Exception: 410 Gone")) // First attempt fails
         .thenReturn(pipe.sink()); // second attempt succeeds
 
@@ -486,7 +488,7 @@
           createOptions);
     } finally {
       verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-      verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
+      verify(mockGcsUtil, times(2)).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
       verifyNoMoreInteractions(mockGcsUtil);
     }
   }
@@ -520,7 +522,8 @@
             ImmutableList.of(
                 StorageObjectOrIOException.create(
                     createStorageObject(STAGING_PATH, Long.MAX_VALUE))));
-    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
+        .thenReturn(pipe.sink());
 
     defaultPackageUtil.stageClasspathElements(
         ImmutableList.of(makeStagedFile(tmpDirectory.getAbsolutePath())),
@@ -528,7 +531,7 @@
         createOptions);
 
     verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verify(mockGcsUtil).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
     verifyNoMoreInteractions(mockGcsUtil);
   }
 
@@ -542,7 +545,8 @@
         .thenReturn(
             ImmutableList.of(
                 StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
-    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+    when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class)))
+        .thenReturn(pipe.sink());
 
     List<DataflowPackage> targets =
         defaultPackageUtil.stageClasspathElements(
@@ -552,7 +556,7 @@
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
-    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verify(mockGcsUtil).create(any(GcsPath.class), any(GcsUtil.CreateOptions.class));
     verifyNoMoreInteractions(mockGcsUtil);
 
     assertThat(target.getName(), equalTo(overriddenName));

diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
index 12b7df2..17b59ec 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java

@@ -794,7 +794,8 @@
 
     final int numIters = 2000;
     for (int i = 0; i < numIters; ++i) {
-      server.addWorkToOffer(makeInput(i, 0, "key", DEFAULT_SHARDING_KEY));
+      server.addWorkToOffer(
+          makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
     }
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
@@ -829,7 +830,8 @@
 
     final int numIters = 2000;
     for (int i = 0; i < numIters; ++i) {
-      server.addWorkToOffer(makeInput(i, 0, "key", DEFAULT_SHARDING_KEY));
+      server.addWorkToOffer(
+          makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
     }
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);

diff --git a/runners/samza/job-server/build.gradle b/runners/samza/job-server/build.gradle
index 266ec0e..4660bfc 100644
--- a/runners/samza/job-server/build.gradle
+++ b/runners/samza/job-server/build.gradle

@@ -69,6 +69,7 @@
         pipelineOpts: pipelineOptions,
         environment: BeamModulePlugin.PortableValidatesRunnerConfiguration.Environment.EMBEDDED,
         testCategories: {
+            includeCategories 'org.apache.beam.sdk.testing.NeedsRunner'
             includeCategories 'org.apache.beam.sdk.testing.ValidatesRunner'
             // TODO: BEAM-12350
             excludeCategories 'org.apache.beam.sdk.testing.UsesAttemptedMetrics'
@@ -112,6 +113,62 @@
             excludeTestsMatching 'org.apache.beam.sdk.testing.TestStreamTest.testFirstElementLate'
             // TODO(BEAM-12036)
             excludeTestsMatching 'org.apache.beam.sdk.testing.TestStreamTest.testLateDataAccumulating'
+            // TODO(BEAM-12886)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupByKeyTest$WindowTests.testWindowFnPostMerging'
+            // TODO(BEAM-12887)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testParDoShiftTimestampInvalid'
+            // TODO(BEAM-12888)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testParDoShiftTimestampInvalidZeroAllowed'
+            // TODO(BEAM-12889)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.DeduplicateTest.testEventTime'
+            // TODO(BEAM-12890)
+            excludeTestsMatching 'org.apache.beam.sdk.io.TFRecordIOTest.testReadInvalidRecord'
+            // TODO(BEAM-12891)
+            excludeTestsMatching 'org.apache.beam.sdk.io.TFRecordIOTest.testReadInvalidDataMask'
+            // TODO(BEAM-12892)
+            excludeTestsMatching 'org.apache.beam.sdk.io.TFRecordIOTest.testReadInvalidLengthMask'
+            // TODO(BEAM-12893)
+            excludeTestsMatching 'org.apache.beam.sdk.io.TextIOReadTest$CompressedReadTest.testCompressedReadWithoutExtension'
+            // TODO(BEAM-12894)
+            excludeTestsMatching 'org.apache.beam.sdk.io.WriteFilesTest.testWithRunnerDeterminedShardingUnbounded'
+            // TODO(BEAM-128945)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$MultipleInputsAndOutputTests.testParDoWritingToUndeclaredTag'
+            // TODO(BEAM-12896)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$MultipleInputsAndOutputTests.testParDoReadingFromUnknownSideInput'
+            // TODO(BEAM-12897)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ViewTest.testMapSideInputWithNullValuesCatchesDuplicates'
+
+            // TODO(BEAM-12743)
+            excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testEncodingNPException'
+            excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testEncodingIOException'
+            excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testDecodingNPException'
+            excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testDecodingIOException'
+            // TODO(BEAM-12744)
+            excludeTestsMatching 'org.apache.beam.sdk.PipelineTest.testEmptyPipeline'
+            // TODO(BEAM-12745)
+            excludeTestsMatching 'org.apache.beam.sdk.io.AvroIOTest*'
+            // TODO(BEAM-12746)
+            excludeTestsMatching 'org.apache.beam.sdk.io.FileIOTest*'
+            // TODO(BEAM-12747)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.WithTimestampsTest.withTimestampsBackwardsInTimeShouldThrow'
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.WithTimestampsTest.withTimestampsWithNullTimestampShouldThrow'
+            // TODO(BEAM-12748)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ViewTest.testEmptySingletonSideInput'
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ViewTest.testNonSingletonSideInput'
+            // TODO(BEAM-12749)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.MapElementsTest.testMapSimpleFunction'
+            // TODO(BEAM-12750)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInGlobalWindowBatchSizeByteSizeFn'
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInStreamingMode'
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testWithShardedKeyInGlobalWindow'
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testWithUnevenBatches'
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInGlobalWindowBatchSizeByteSize'
+            // TODO(BEAM-10025)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestampDefaultUnbounded'
+            // TODO(BEAM-11479)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestamp'
+            // TODO(BEAM-11479)
+            excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testRelativeTimerWithOutputTimestamp'
         }
 )
 

diff --git a/runners/spark/job-server/spark_job_server.gradle b/runners/spark/job-server/spark_job_server.gradle
index c32f7b0..f1bb021 100644
--- a/runners/spark/job-server/spark_job_server.gradle
+++ b/runners/spark/job-server/spark_job_server.gradle

@@ -59,7 +59,8 @@
   validatesPortableRunner project(path: ":runners:core-java", configuration: "testRuntime")
   validatesPortableRunner project(path: ":runners:portability:java", configuration: "testRuntime")
   runtime project(":sdks:java:extensions:google-cloud-platform-core")
-//  TODO: Enable AWS and HDFS file system.
+  runtime project(":sdks:java:io:amazon-web-services2")
+//  TODO: Enable HDFS file system.
 }
 
 // NOTE: runShadow must be used in order to run the job server. The standard run

diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTranslatorBatch.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTranslatorBatch.java
index 4fe26d7..6391ba4 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTranslatorBatch.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTranslatorBatch.java

@@ -17,27 +17,26 @@
  */
 package org.apache.beam.runners.spark.structuredstreaming.translation.batch;
 
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.beam.runners.core.Concatenate;
+import java.io.Serializable;
+import org.apache.beam.runners.core.InMemoryStateInternals;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateInternalsFactory;
+import org.apache.beam.runners.core.SystemReduceFn;
 import org.apache.beam.runners.spark.structuredstreaming.translation.AbstractTranslationContext;
 import org.apache.beam.runners.spark.structuredstreaming.translation.TransformTranslator;
+import org.apache.beam.runners.spark.structuredstreaming.translation.batch.functions.GroupAlsoByWindowViaOutputBufferFn;
 import org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers;
 import org.apache.beam.runners.spark.structuredstreaming.translation.helpers.KVHelpers;
-import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.transforms.Combine;
 import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.WindowingStrategy;
-import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.KeyValueGroupedDataset;
-import scala.Tuple2;
 
 class GroupByKeyTranslatorBatch<K, V>
     implements TransformTranslator<
@@ -49,62 +48,43 @@
       AbstractTranslationContext context) {
 
     @SuppressWarnings("unchecked")
-    final PCollection<KV<K, V>> input = (PCollection<KV<K, V>>) context.getInput();
-    @SuppressWarnings("unchecked")
-    final PCollection<KV<K, List<V>>> output = (PCollection<KV<K, List<V>>>) context.getOutput();
-    final Combine.CombineFn<V, List<V>, List<V>> combineFn = new Concatenate<>();
+    final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput();
+    Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection);
+    WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy();
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder();
+    Coder<V> valueCoder = kvCoder.getValueCoder();
 
-    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+    // group by key only
+    Coder<K> keyCoder = kvCoder.getKeyCoder();
+    KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly =
+        input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));
 
-    Dataset<WindowedValue<KV<K, V>>> inputDataset = context.getDataset(input);
+    // group also by windows
+    WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder =
+        WindowedValue.FullWindowedValueCoder.of(
+            KvCoder.of(keyCoder, IterableCoder.of(valueCoder)),
+            windowingStrategy.getWindowFn().windowCoder());
+    Dataset<WindowedValue<KV<K, Iterable<V>>>> output =
+        groupByKeyOnly.flatMapGroups(
+            new GroupAlsoByWindowViaOutputBufferFn<>(
+                windowingStrategy,
+                new InMemoryStateInternalsFactory<>(),
+                SystemReduceFn.buffering(valueCoder),
+                context.getSerializableOptions()),
+            EncoderHelpers.fromBeamCoder(outputCoder));
 
-    KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();
-    Coder<K> keyCoder = inputCoder.getKeyCoder();
-    KvCoder<K, List<V>> outputKVCoder = (KvCoder<K, List<V>>) output.getCoder();
-    Coder<List<V>> outputCoder = outputKVCoder.getValueCoder();
+    context.putDataset(context.getOutput(), output);
+  }
 
-    KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupedDataset =
-      inputDataset.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));
-
-    Coder<List<V>> accumulatorCoder = null;
-    try {
-      accumulatorCoder =
-        combineFn.getAccumulatorCoder(
-          input.getPipeline().getCoderRegistry(), inputCoder.getValueCoder());
-    } catch (CannotProvideCoderException e) {
-      throw new RuntimeException(e);
+  /**
+   * In-memory state internals factory.
+   *
+   * @param <K> State key type.
+   */
+  static class InMemoryStateInternalsFactory<K> implements StateInternalsFactory<K>, Serializable {
+    @Override
+    public StateInternals stateInternalsForKey(K key) {
+      return InMemoryStateInternals.forKey(key);
     }
-
-    Dataset<Tuple2<K, Iterable<WindowedValue<List<V>>>>> combinedDataset =
-      groupedDataset.agg(
-        new AggregatorCombiner<K, V, List<V>, List<V>, BoundedWindow>(
-          combineFn, windowingStrategy, accumulatorCoder, outputCoder)
-          .toColumn());
-
-    // expand the list into separate elements and put the key back into the elements
-    WindowedValue.WindowedValueCoder<KV<K, List<V>>> wvCoder =
-      WindowedValue.FullWindowedValueCoder.of(
-        outputKVCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
-    Dataset<WindowedValue<KV<K, List<V>>>> outputDataset =
-      combinedDataset.flatMap(
-        (FlatMapFunction<
-          Tuple2<K, Iterable<WindowedValue<List<V>>>>, WindowedValue<KV<K, List<V>>>>)
-          tuple2 -> {
-            K key = tuple2._1();
-            Iterable<WindowedValue<List<V>>> windowedValues = tuple2._2();
-            List<WindowedValue<KV<K, List<V>>>> result = new ArrayList<>();
-            for (WindowedValue<List<V>> windowedValue : windowedValues) {
-              KV<K, List<V>> kv = KV.of(key, windowedValue.getValue());
-              result.add(
-                WindowedValue.of(
-                  kv,
-                  windowedValue.getTimestamp(),
-                  windowedValue.getWindows(),
-                  windowedValue.getPane()));
-            }
-            return result.iterator();
-          },
-        EncoderHelpers.fromBeamCoder(wvCoder));
-    context.putDataset(output, outputDataset);
   }
 }

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
index f735ec3..6c3b176 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java

@@ -50,6 +50,8 @@
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.StructuredCoder;
 import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.io.fs.CreateOptions;
+import org.apache.beam.sdk.io.fs.CreateOptions.StandardCreateOptions;
 import org.apache.beam.sdk.io.fs.MatchResult;
 import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
 import org.apache.beam.sdk.io.fs.MoveOptions.StandardMoveOptions;
@@ -948,7 +950,16 @@
           getWriteOperation().getSink().writableByteChannelFactory;
       // The factory may force a MIME type or it may return null, indicating to use the sink's MIME.
       String channelMimeType = firstNonNull(factory.getMimeType(), mimeType);
-      WritableByteChannel tempChannel = FileSystems.create(outputFile, channelMimeType);
+      CreateOptions createOptions =
+          StandardCreateOptions.builder()
+              .setMimeType(channelMimeType)
+              // The file is based upon a uuid and thus we expect it to be unique and to not already
+              // exist. A new uuid is generated on each bundle processing and thus this also holds
+              // across bundle retries. Collisions of filenames would result in data loss as we
+              // would otherwise overwrite already finalized data.
+              .setExpectFileToNotExist(true)
+              .build();
+      WritableByteChannel tempChannel = FileSystems.create(outputFile, createOptions);
       try {
         channel = factory.create(tempChannel);
       } catch (Exception e) {

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/fs/CreateOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/fs/CreateOptions.java
index 331e66f..85d2311 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/fs/CreateOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/fs/CreateOptions.java

@@ -24,9 +24,14 @@
   /** The file-like resource mime type. */
   public abstract String mimeType();
 
+  /** True if the file is expected to not exist. */
+  public abstract Boolean expectFileToNotExist();
+
   /** An abstract builder for {@link CreateOptions}. */
   public abstract static class Builder<BuilderT extends CreateOptions.Builder<BuilderT>> {
     public abstract BuilderT setMimeType(String value);
+
+    public abstract BuilderT setExpectFileToNotExist(Boolean value);
   }
 
   /** A standard configuration options with builder. */
@@ -35,7 +40,8 @@
 
     /** Returns a {@link StandardCreateOptions.Builder}. */
     public static StandardCreateOptions.Builder builder() {
-      return new AutoValue_CreateOptions_StandardCreateOptions.Builder();
+      return new AutoValue_CreateOptions_StandardCreateOptions.Builder()
+          .setExpectFileToNotExist(false);
     }
 
     /** Builder for {@link StandardCreateOptions}. */

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java
index 8c11464..0d6ab0c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/Group.java

@@ -38,6 +38,7 @@
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.Values;
 import org.apache.beam.sdk.transforms.WithKeys;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.Row;
@@ -365,12 +366,62 @@
   }
 
   /**
+   * a {@link PTransform} that does a combine using an aggregation built up by calls to
+   * aggregateField and aggregateFields. The output of this transform will have a schema that is
+   * determined by the output types of all the composed combiners.
+   *
+   * @param <InputT>
+   */
+  public abstract static class AggregateCombiner<InputT>
+      extends PTransform<PCollection<InputT>, PCollection<Row>> {
+
+    /**
+     * Build up an aggregation function over the input elements.
+     *
+     * <p>This method specifies an aggregation over single field of the input. The union of all
+     * calls to aggregateField and aggregateFields will determine the output schema.
+     */
+    public abstract <CombineInputT, AccumT, CombineOutputT>
+        AggregateCombiner<InputT> aggregateField(
+            int inputFieldId,
+            CombineFn<CombineInputT, AccumT, CombineOutputT> fn,
+            Field outputField);
+
+    /**
+     * Build up an aggregation function over the input elements.
+     *
+     * <p>This method specifies an aggregation over single field of the input. The union of all
+     * calls to aggregateField and aggregateFields will determine the output schema.
+     */
+    public abstract <CombineInputT, AccumT, CombineOutputT>
+        AggregateCombiner<InputT> aggregateField(
+            String inputFieldName,
+            CombineFn<CombineInputT, AccumT, CombineOutputT> fn,
+            Field outputField);
+
+    /**
+     * Build up an aggregation function over the input elements by field id.
+     *
+     * <p>This method specifies an aggregation over multiple fields of the input. The union of all
+     * calls to aggregateField and aggregateFields will determine the output schema.
+     *
+     * <p>Field types in the output schema will be inferred from the provided combine function.
+     * Sometimes the field type cannot be inferred due to Java's type erasure. In that case, use the
+     * overload that allows setting the output field type explicitly.
+     */
+    public abstract <CombineInputT, AccumT, CombineOutputT>
+        AggregateCombiner<InputT> aggregateFieldsById(
+            List<Integer> inputFieldIds,
+            CombineFn<CombineInputT, AccumT, CombineOutputT> fn,
+            Field outputField);
+  }
+
+  /**
    * a {@link PTransform} that does a global combine using an aggregation built up by calls to
    * aggregateField and aggregateFields. The output of this transform will have a schema that is
    * determined by the output types of all the composed combiners.
    */
-  public static class CombineFieldsGlobally<InputT>
-      extends PTransform<PCollection<InputT>, PCollection<Row>> {
+  public static class CombineFieldsGlobally<InputT> extends AggregateCombiner<InputT> {
     private final SchemaAggregateFn.Inner schemaAggregateFn;
 
     CombineFieldsGlobally(SchemaAggregateFn.Inner schemaAggregateFn) {
@@ -378,6 +429,15 @@
     }
 
     /**
+     * Returns a transform that does a global combine using an aggregation built up by calls to
+     * aggregateField and aggregateFields. This transform will have an unknown schema that will be
+     * determined by the output types of all the composed combiners.
+     */
+    public static CombineFieldsGlobally create() {
+      return new CombineFieldsGlobally<>(SchemaAggregateFn.create());
+    }
+
+    /**
      * Build up an aggregation function over the input elements.
      *
      * <p>This method specifies an aggregation over single field of the input. The union of all
@@ -431,6 +491,7 @@
      * <p>This method specifies an aggregation over single field of the input. The union of all
      * calls to aggregateField and aggregateFields will determine the output schema.
      */
+    @Override
     public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField(
         String inputFieldName,
         CombineFn<CombineInputT, AccumT, CombineOutputT> fn,
@@ -450,6 +511,7 @@
               FieldAccessDescriptor.withFieldNames(inputFieldName), true, fn, outputField));
     }
 
+    @Override
     public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField(
         int inputFieldId, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) {
       return new CombineFieldsGlobally<>(
@@ -526,6 +588,7 @@
           FieldAccessDescriptor.withFieldNames(inputFieldNames), fn, outputField);
     }
 
+    @Override
     public <CombineInputT, AccumT, CombineOutputT>
         CombineFieldsGlobally<InputT> aggregateFieldsById(
             List<Integer> inputFieldIds,
@@ -551,9 +614,13 @@
     @Override
     public PCollection<Row> expand(PCollection<InputT> input) {
       SchemaAggregateFn.Inner fn = schemaAggregateFn.withSchema(input.getSchema());
+      Combine.Globally<Row, Row> combineFn = Combine.globally(fn);
+      if (!(input.getWindowingStrategy().getWindowFn() instanceof GlobalWindows)) {
+        combineFn = combineFn.withoutDefaults();
+      }
       return input
           .apply("toRows", Convert.toRows())
-          .apply("Global Combine", Combine.globally(fn))
+          .apply("Global Combine", combineFn)
           .setRowSchema(fn.getOutputSchema());
     }
   }
@@ -566,8 +633,7 @@
    * specified extracted fields.
    */
   @AutoValue
-  public abstract static class ByFields<InputT>
-      extends PTransform<PCollection<InputT>, PCollection<Row>> {
+  public abstract static class ByFields<InputT> extends AggregateCombiner<InputT> {
     abstract FieldAccessDescriptor getFieldAccessDescriptor();
 
     abstract String getKeyField();
@@ -698,6 +764,7 @@
      * <p>This method specifies an aggregation over single field of the input. The union of all
      * calls to aggregateField and aggregateFields will determine the output schema.
      */
+    @Override
     public <CombineInputT, AccumT, CombineOutputT> CombineFieldsByFields<InputT> aggregateField(
         String inputFieldName,
         CombineFn<CombineInputT, AccumT, CombineOutputT> fn,
@@ -725,6 +792,7 @@
           getValueField());
     }
 
+    @Override
     public <CombineInputT, AccumT, CombineOutputT> CombineFieldsByFields<InputT> aggregateField(
         int inputFieldId, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) {
       return CombineFieldsByFields.of(
@@ -812,6 +880,7 @@
           FieldAccessDescriptor.withFieldNames(inputFieldNames), fn, outputField);
     }
 
+    @Override
     public <CombineInputT, AccumT, CombineOutputT>
         CombineFieldsByFields<InputT> aggregateFieldsById(
             List<Integer> inputFieldIds,
@@ -875,8 +944,7 @@
    * determined by the output types of all the composed combiners.
    */
   @AutoValue
-  public abstract static class CombineFieldsByFields<InputT>
-      extends PTransform<PCollection<InputT>, PCollection<Row>> {
+  public abstract static class CombineFieldsByFields<InputT> extends AggregateCombiner<InputT> {
     abstract ByFields<InputT> getByFields();
 
     abstract SchemaAggregateFn.Inner getSchemaAggregateFn();
@@ -995,6 +1063,7 @@
      * <p>This method specifies an aggregation over single field of the input. The union of all
      * calls to aggregateField and aggregateFields will determine the output schema.
      */
+    @Override
     public <CombineInputT, AccumT, CombineOutputT> CombineFieldsByFields<InputT> aggregateField(
         String inputFieldName,
         CombineFn<CombineInputT, AccumT, CombineOutputT> fn,
@@ -1020,6 +1089,7 @@
           .build();
     }
 
+    @Override
     public <CombineInputT, AccumT, CombineOutputT> CombineFieldsByFields<InputT> aggregateField(
         int inputFieldId, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) {
       return toBuilder()
@@ -1095,6 +1165,7 @@
           FieldAccessDescriptor.withFieldNames(inputFieldNames), fn, outputField);
     }
 
+    @Override
     public <CombineInputT, AccumT, CombineOutputT>
         CombineFieldsByFields<InputT> aggregateFieldsById(
             List<Integer> inputFieldIds,

diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoSchemaTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoSchemaTest.java
index 9eee454..d93fe1e 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoSchemaTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoSchemaTest.java

@@ -48,6 +48,7 @@
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.testing.UsesMapState;
 import org.apache.beam.sdk.testing.UsesSchema;
+import org.apache.beam.sdk.testing.UsesSetState;
 import org.apache.beam.sdk.testing.UsesStatefulParDo;
 import org.apache.beam.sdk.testing.ValidatesRunner;
 import org.apache.beam.sdk.values.KV;
@@ -768,7 +769,7 @@
   }
 
   @Test
-  @Category({NeedsRunner.class, UsesStatefulParDo.class})
+  @Category({NeedsRunner.class, UsesStatefulParDo.class, UsesSetState.class})
   public void testSetStateSchemaInference() throws NoSuchSchemaException {
     final String stateId = "foo";
 

diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsCreateOptions.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsCreateOptions.java
index 1fdb871..ce534d6 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsCreateOptions.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsCreateOptions.java

@@ -37,7 +37,7 @@
 
   /** Returns a {@link GcsCreateOptions.Builder}. */
   public static GcsCreateOptions.Builder builder() {
-    return new AutoValue_GcsCreateOptions.Builder();
+    return new AutoValue_GcsCreateOptions.Builder().setExpectFileToNotExist(false);
   }
 
   /** A builder for {@link GcsCreateOptions}. */

diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java
index 6c39428..a547bc9 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystem.java

@@ -133,16 +133,16 @@
   @Override
   protected WritableByteChannel create(GcsResourceId resourceId, CreateOptions createOptions)
       throws IOException {
+    GcsUtil.CreateOptions.Builder builder =
+        GcsUtil.CreateOptions.builder()
+            .setContentType(createOptions.mimeType())
+            .setExpectFileToNotExist(createOptions.expectFileToNotExist());
     if (createOptions instanceof GcsCreateOptions) {
-      return options
-          .getGcsUtil()
-          .create(
-              resourceId.getGcsPath(),
-              createOptions.mimeType(),
+      builder =
+          builder.setUploadBufferSizeBytes(
               ((GcsCreateOptions) createOptions).gcsUploadBufferSizeBytes());
-    } else {
-      return options.getGcsUtil().create(resourceId.getGcsPath(), createOptions.mimeType());
     }
+    return options.getGcsUtil().create(resourceId.getGcsPath(), builder.build());
   }
 
   @Override

diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java
index 538ddaa..668d94b 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java

@@ -58,6 +58,7 @@
 import java.nio.file.FileAlreadyExistsException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -72,6 +73,9 @@
 import java.util.function.Supplier;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import org.apache.beam.runners.core.metrics.GcpResourceIdentifiers;
+import org.apache.beam.runners.core.metrics.MonitoringInfoConstants;
+import org.apache.beam.runners.core.metrics.ServiceCallMetric;
 import org.apache.beam.sdk.extensions.gcp.options.GcsOptions;
 import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
 import org.apache.beam.sdk.io.fs.MoveOptions;
@@ -454,42 +458,143 @@
   @VisibleForTesting
   SeekableByteChannel open(GcsPath path, GoogleCloudStorageReadOptions readOptions)
       throws IOException {
-    return googleCloudStorage.open(
-        new StorageResourceId(path.getBucket(), path.getObject()), readOptions);
+    HashMap<String, String> baseLabels = new HashMap<>();
+    baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+    baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "Storage");
+    baseLabels.put(MonitoringInfoConstants.Labels.METHOD, "GcsGet");
+    baseLabels.put(
+        MonitoringInfoConstants.Labels.RESOURCE,
+        GcpResourceIdentifiers.cloudStorageBucket(path.getBucket()));
+    baseLabels.put(
+        MonitoringInfoConstants.Labels.GCS_PROJECT_ID, googleCloudStorageOptions.getProjectId());
+    baseLabels.put(MonitoringInfoConstants.Labels.GCS_BUCKET, path.getBucket());
+
+    ServiceCallMetric serviceCallMetric =
+        new ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
+    try {
+      SeekableByteChannel channel =
+          googleCloudStorage.open(
+              new StorageResourceId(path.getBucket(), path.getObject()), readOptions);
+      serviceCallMetric.call("ok");
+      return channel;
+    } catch (IOException e) {
+      if (e.getCause() instanceof GoogleJsonResponseException) {
+        serviceCallMetric.call(((GoogleJsonResponseException) e.getCause()).getDetails().getCode());
+      }
+      throw e;
+    }
   }
 
-  /**
-   * Creates an object in GCS.
-   *
-   * <p>Returns a WritableByteChannel that can be used to write data to the object.
-   *
-   * @param path the GCS file to write to
-   * @param type the type of object, eg "text/plain".
-   * @return a Callable object that encloses the operation.
-   */
+  /** @deprecated Use {@link #create(GcsPath, CreateOptions)} instead. */
+  @Deprecated
   public WritableByteChannel create(GcsPath path, String type) throws IOException {
-    return create(path, type, uploadBufferSizeBytes);
+    CreateOptions.Builder builder = CreateOptions.builder().setContentType(type);
+    return create(path, builder.build());
   }
 
-  /**
-   * Same as {@link GcsUtil#create(GcsPath, String)} but allows overriding {code
-   * uploadBufferSizeBytes}.
-   */
+  /** @deprecated Use {@link #create(GcsPath, CreateOptions)} instead. */
+  @Deprecated
   public WritableByteChannel create(GcsPath path, String type, Integer uploadBufferSizeBytes)
       throws IOException {
+    CreateOptions.Builder builder =
+        CreateOptions.builder()
+            .setContentType(type)
+            .setUploadBufferSizeBytes(uploadBufferSizeBytes);
+    return create(path, builder.build());
+  }
+
+  @AutoValue
+  public abstract static class CreateOptions {
+    /**
+     * If true, the created file is expected to not exist. Instead of checking for file presence
+     * before writing a write exception may occur if the file does exist.
+     */
+    public abstract boolean getExpectFileToNotExist();
+
+    /**
+     * If non-null, the upload buffer size to be used. If null, the buffer size corresponds to {code
+     * GCSUtil.getUploadBufferSizeBytes}
+     */
+    public abstract @Nullable Integer getUploadBufferSizeBytes();
+
+    /** The content type for the created file, eg "text/plain". */
+    public abstract @Nullable String getContentType();
+
+    public static Builder builder() {
+      return new AutoValue_GcsUtil_CreateOptions.Builder().setExpectFileToNotExist(false);
+    }
+
+    @AutoValue.Builder
+    public abstract static class Builder {
+      public abstract Builder setContentType(String value);
+
+      public abstract Builder setUploadBufferSizeBytes(int value);
+
+      public abstract Builder setExpectFileToNotExist(boolean value);
+
+      public abstract CreateOptions build();
+    }
+  }
+
+  /**
+   * Creates an object in GCS and prepares for uploading its contents.
+   *
+   * @param path the GCS file to write to
+   * @param options to be used for creating and configuring file upload
+   * @return a WritableByteChannel that can be used to write data to the object.
+   */
+  public WritableByteChannel create(GcsPath path, CreateOptions options) throws IOException {
     AsyncWriteChannelOptions wcOptions = googleCloudStorageOptions.getWriteChannelOptions();
-    int uploadChunkSize =
-        (uploadBufferSizeBytes == null) ? wcOptions.getUploadChunkSize() : uploadBufferSizeBytes;
-    AsyncWriteChannelOptions newOptions =
-        wcOptions.toBuilder().setUploadChunkSize(uploadChunkSize).build();
+    @Nullable
+    Integer uploadBufferSizeBytes =
+        options.getUploadBufferSizeBytes() != null
+            ? options.getUploadBufferSizeBytes()
+            : getUploadBufferSizeBytes();
+    if (uploadBufferSizeBytes != null) {
+      wcOptions = wcOptions.toBuilder().setUploadChunkSize(uploadBufferSizeBytes).build();
+    }
     GoogleCloudStorageOptions newGoogleCloudStorageOptions =
-        googleCloudStorageOptions.toBuilder().setWriteChannelOptions(newOptions).build();
+        googleCloudStorageOptions.toBuilder().setWriteChannelOptions(wcOptions).build();
     GoogleCloudStorage gcpStorage =
         new GoogleCloudStorageImpl(
             newGoogleCloudStorageOptions, this.storageClient, this.credentials);
-    return gcpStorage.create(
-        new StorageResourceId(path.getBucket(), path.getObject()),
-        CreateObjectOptions.builder().setOverwriteExisting(true).setContentType(type).build());
+    StorageResourceId resourceId =
+        new StorageResourceId(
+            path.getBucket(),
+            path.getObject(),
+            // If we expect the file not to exist, we set a generation id of 0. This avoids a read
+            // to identify the object exists already and should be overwritten.
+            // See {@link GoogleCloudStorage#create(StorageResourceId, GoogleCloudStorageOptions)}
+            options.getExpectFileToNotExist() ? 0L : StorageResourceId.UNKNOWN_GENERATION_ID);
+    CreateObjectOptions.Builder createBuilder =
+        CreateObjectOptions.builder().setOverwriteExisting(true);
+    if (options.getContentType() != null) {
+      createBuilder = createBuilder.setContentType(options.getContentType());
+    }
+
+    HashMap<String, String> baseLabels = new HashMap<>();
+    baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+    baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "Storage");
+    baseLabels.put(MonitoringInfoConstants.Labels.METHOD, "GcsInsert");
+    baseLabels.put(
+        MonitoringInfoConstants.Labels.RESOURCE,
+        GcpResourceIdentifiers.cloudStorageBucket(path.getBucket()));
+    baseLabels.put(
+        MonitoringInfoConstants.Labels.GCS_PROJECT_ID, googleCloudStorageOptions.getProjectId());
+    baseLabels.put(MonitoringInfoConstants.Labels.GCS_BUCKET, path.getBucket());
+
+    ServiceCallMetric serviceCallMetric =
+        new ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
+    try {
+      WritableByteChannel channel = gcpStorage.create(resourceId, createBuilder.build());
+      serviceCallMetric.call("ok");
+      return channel;
+    } catch (IOException e) {
+      if (e.getCause() instanceof GoogleJsonResponseException) {
+        serviceCallMetric.call(((GoogleJsonResponseException) e.getCause()).getDetails().getCode());
+      }
+      throw e;
+    }
   }
 
   /** Returns whether the GCS bucket exists and is accessible. */

diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java
index 0c4df7a..7856ddd 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java

@@ -58,7 +58,11 @@
 import com.google.api.services.storage.model.Objects;
 import com.google.api.services.storage.model.RewriteResponse;
 import com.google.api.services.storage.model.StorageObject;
+import com.google.cloud.hadoop.gcsio.CreateObjectOptions;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorage;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions;
 import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions;
+import com.google.cloud.hadoop.gcsio.StorageResourceId;
 import java.io.ByteArrayInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -71,6 +75,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
@@ -78,6 +83,10 @@
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
+import org.apache.beam.runners.core.metrics.GcpResourceIdentifiers;
+import org.apache.beam.runners.core.metrics.MetricsContainerImpl;
+import org.apache.beam.runners.core.metrics.MonitoringInfoConstants;
+import org.apache.beam.runners.core.metrics.MonitoringInfoMetricName;
 import org.apache.beam.sdk.extensions.gcp.auth.TestCredential;
 import org.apache.beam.sdk.extensions.gcp.options.GcsOptions;
 import org.apache.beam.sdk.extensions.gcp.util.GcsUtil.BatchInterface;
@@ -85,10 +94,12 @@
 import org.apache.beam.sdk.extensions.gcp.util.GcsUtil.StorageObjectOrIOException;
 import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
 import org.apache.beam.sdk.io.fs.MoveOptions.StandardMoveOptions;
+import org.apache.beam.sdk.metrics.MetricsEnvironment;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.util.FluentBackoff;
 import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
 import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
+import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -101,6 +112,13 @@
 public class GcsUtilTest {
   @Rule public ExpectedException thrown = ExpectedException.none();
 
+  @Before
+  public void setUp() {
+    // Setup the ProcessWideContainer for testing metrics are set.
+    MetricsContainerImpl container = new MetricsContainerImpl(null);
+    MetricsEnvironment.setProcessWideContainer(container);
+  }
+
   private static GcsOptions gcsOptionsWithTestCredential() {
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
     pipelineOptions.setGcpCredential(new TestCredential());
@@ -785,6 +803,68 @@
     channel.close();
   }
 
+  @Test
+  public void testGCSReadMetricsIsSet() {
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+    gcsUtil.setCloudStorageImpl(
+        GoogleCloudStorageOptions.builder()
+            .setAppName("Beam")
+            .setGrpcEnabled(true)
+            .setProjectId("my_project")
+            .build());
+    GoogleCloudStorageReadOptions readOptions =
+        GoogleCloudStorageReadOptions.builder().setFastFailOnNotFound(true).build();
+    assertThrows(
+        IOException.class,
+        () -> gcsUtil.open(GcsPath.fromComponents("testbucket", "testbucket"), readOptions));
+    verifyMetricWasSet("my_project", "testbucket", "GcsGet", "permission_denied", 1);
+  }
+
+  @Test
+  public void testGCSWriteMetricsIsSet() throws IOException {
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+    GoogleCloudStorage mockStorage = Mockito.mock(GoogleCloudStorage.class);
+    gcsUtil.setCloudStorageImpl(
+        GoogleCloudStorageOptions.builder()
+            .setAppName("Beam")
+            .setGrpcEnabled(true)
+            .setProjectId("my_project")
+            .build());
+    when(mockStorage.create(
+            new StorageResourceId("testbucket", "testobject"),
+            CreateObjectOptions.builder()
+                .setOverwriteExisting(true)
+                .setContentType("type")
+                .build()))
+        .thenThrow(IOException.class);
+    GcsPath gcsPath = GcsPath.fromComponents("testbucket", "testobject");
+    assertThrows(IOException.class, () -> gcsUtil.create(gcsPath, ""));
+    verifyMetricWasSet("my_project", "testbucket", "GcsInsert", "permission_denied", 1);
+  }
+
+  private void verifyMetricWasSet(
+      String projectId, String bucketId, String method, String status, long count) {
+    // Verify the metric as reported.
+    HashMap<String, String> labels = new HashMap<>();
+    labels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+    labels.put(MonitoringInfoConstants.Labels.SERVICE, "Storage");
+    labels.put(MonitoringInfoConstants.Labels.METHOD, method);
+    labels.put(MonitoringInfoConstants.Labels.GCS_PROJECT_ID, projectId);
+    labels.put(MonitoringInfoConstants.Labels.GCS_BUCKET, bucketId);
+    labels.put(
+        MonitoringInfoConstants.Labels.RESOURCE,
+        GcpResourceIdentifiers.cloudStorageBucket(bucketId));
+    labels.put(MonitoringInfoConstants.Labels.STATUS, status);
+
+    MonitoringInfoMetricName name =
+        MonitoringInfoMetricName.named(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, labels);
+    MetricsContainerImpl container =
+        (MetricsContainerImpl) MetricsEnvironment.getProcessWideContainer();
+    assertEquals(count, (long) container.getCounter(name).getCumulative());
+  }
+
   /** Builds a fake GoogleJsonResponseException for testing API error handling. */
   private static GoogleJsonResponseException googleJsonResponseException(
       final int status, final String reason, final String message) throws IOException {

diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamAggregationRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamAggregationRel.java
index f3e14da..3173db9 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamAggregationRel.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamAggregationRel.java

@@ -31,6 +31,7 @@
 import org.apache.beam.sdk.schemas.Schema;
 import org.apache.beam.sdk.schemas.Schema.Field;
 import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.transforms.Group;
 import org.apache.beam.sdk.transforms.Combine.CombineFn;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.GroupByKey;
@@ -216,6 +217,8 @@
     private WindowFn<Row, IntervalWindow> windowFn;
     private int windowFieldIndex;
     private List<FieldAggregation> fieldAggregations;
+    private final int groupSetCount;
+    private boolean ignoreValues;
 
     private Transform(
         WindowFn<Row, IntervalWindow> windowFn,
@@ -227,6 +230,8 @@
       this.windowFieldIndex = windowFieldIndex;
       this.fieldAggregations = fieldAggregations;
       this.outputSchema = outputSchema;
+      this.groupSetCount = groupSet.asList().size();
+      this.ignoreValues = false;
       this.keyFieldsIds =
           groupSet.asList().stream().filter(i -> i != windowFieldIndex).collect(toList());
     }
@@ -243,39 +248,60 @@
       if (windowFn != null) {
         windowedStream = assignTimestampsAndWindow(upstream);
       }
-
       validateWindowIsSupported(windowedStream);
+      // Check if have fields to be grouped
+      if (groupSetCount > 0) {
+        org.apache.beam.sdk.schemas.transforms.Group.AggregateCombiner<Row> byFields =
+            org.apache.beam.sdk.schemas.transforms.Group.byFieldIds(keyFieldsIds);
+        PTransform<PCollection<Row>, PCollection<Row>> combiner = createCombiner(byFields);
+        boolean verifyRowValues =
+            pinput.getPipeline().getOptions().as(BeamSqlPipelineOptions.class).getVerifyRowValues();
+        return windowedStream
+            .apply(combiner)
+            .apply(
+                "mergeRecord",
+                ParDo.of(
+                    mergeRecord(outputSchema, windowFieldIndex, ignoreValues, verifyRowValues)))
+            .setRowSchema(outputSchema);
+      }
+      org.apache.beam.sdk.schemas.transforms.Group.AggregateCombiner<Row> globally =
+          org.apache.beam.sdk.schemas.transforms.Group.CombineFieldsGlobally.create();
+      PTransform<PCollection<Row>, PCollection<Row>> combiner = createCombiner(globally);
+      return windowedStream.apply(combiner).setRowSchema(outputSchema);
+    }
 
-      org.apache.beam.sdk.schemas.transforms.Group.ByFields<Row> byFields =
-          org.apache.beam.sdk.schemas.transforms.Group.byFieldIds(keyFieldsIds);
-      org.apache.beam.sdk.schemas.transforms.Group.CombineFieldsByFields<Row> combined = null;
+    private PTransform<PCollection<Row>, PCollection<Row>> createCombiner(
+        org.apache.beam.sdk.schemas.transforms.Group.AggregateCombiner<Row> initialCombiner) {
+
+      org.apache.beam.sdk.schemas.transforms.Group.AggregateCombiner combined = null;
       for (FieldAggregation fieldAggregation : fieldAggregations) {
         List<Integer> inputs = fieldAggregation.inputs;
         CombineFn combineFn = fieldAggregation.combineFn;
-        if (inputs.size() > 1 || inputs.isEmpty()) {
-          // In this path we extract a Row (an empty row if inputs.isEmpty).
-          combined =
-              (combined == null)
-                  ? byFields.aggregateFieldsById(inputs, combineFn, fieldAggregation.outputField)
-                  : combined.aggregateFieldsById(inputs, combineFn, fieldAggregation.outputField);
-        } else {
+        if (inputs.size() == 1) {
           // Combining over a single field, so extract just that field.
           combined =
               (combined == null)
-                  ? byFields.aggregateField(inputs.get(0), combineFn, fieldAggregation.outputField)
+                  ? initialCombiner.aggregateField(
+                      inputs.get(0), combineFn, fieldAggregation.outputField)
                   : combined.aggregateField(inputs.get(0), combineFn, fieldAggregation.outputField);
+        } else {
+          // In this path we extract a Row (an empty row if inputs.isEmpty).
+          combined =
+              (combined == null)
+                  ? initialCombiner.aggregateFieldsById(
+                      inputs, combineFn, fieldAggregation.outputField)
+                  : combined.aggregateFieldsById(inputs, combineFn, fieldAggregation.outputField);
         }
       }
 
       PTransform<PCollection<Row>, PCollection<Row>> combiner = combined;
-      boolean ignoreValues = false;
       if (combiner == null) {
         // If no field aggregations were specified, we run a constant combiner that always returns
         // a single empty row for each key. This is used by the SELECT DISTINCT query plan - in this
         // case a group by is generated to determine unique keys, and a constant null combiner is
         // used.
         combiner =
-            byFields.aggregateField(
+            initialCombiner.aggregateField(
                 "*",
                 AggregationCombineFnAdapter.createConstantCombineFn(),
                 Field.of(
@@ -283,15 +309,7 @@
                     FieldType.row(AggregationCombineFnAdapter.EMPTY_SCHEMA).withNullable(true)));
         ignoreValues = true;
       }
-
-      boolean verifyRowValues =
-          pinput.getPipeline().getOptions().as(BeamSqlPipelineOptions.class).getVerifyRowValues();
-      return windowedStream
-          .apply(combiner)
-          .apply(
-              "mergeRecord",
-              ParDo.of(mergeRecord(outputSchema, windowFieldIndex, ignoreValues, verifyRowValues)))
-          .setRowSchema(outputSchema);
+      return combiner;
     }
 
     /** Extract timestamps from the windowFieldIndex, then window into windowFns. */
@@ -349,7 +367,6 @@
           if (!ignoreValues) {
             fieldValues.addAll(kvRow.getRow(1).getValues());
           }
-
           if (windowStartFieldIndex != -1) {
             fieldValues.add(windowStartFieldIndex, ((IntervalWindow) window).start());
           }

diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamBuiltinAggregations.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamBuiltinAggregations.java
index 6dd76a3..dcc8ec9 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamBuiltinAggregations.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamBuiltinAggregations.java

@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.extensions.sql.impl.transform;
 
+import java.io.Serializable;
 import java.math.BigDecimal;
 import java.math.MathContext;
 import java.math.RoundingMode;
@@ -24,6 +25,7 @@
 import java.util.function.Function;
 import org.apache.beam.sdk.coders.BigDecimalCoder;
 import org.apache.beam.sdk.coders.BigEndianIntegerCoder;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.CoderRegistry;
 import org.apache.beam.sdk.coders.KvCoder;
@@ -56,16 +58,24 @@
       BUILTIN_AGGREGATOR_FACTORIES =
           ImmutableMap.<String, Function<Schema.FieldType, CombineFn<?, ?, ?>>>builder()
               .put("ANY_VALUE", typeName -> Sample.anyValueCombineFn())
-              .put("COUNT", typeName -> Count.combineFn())
-              .put("MAX", BeamBuiltinAggregations::createMax)
-              .put("MIN", BeamBuiltinAggregations::createMin)
-              .put("SUM", BeamBuiltinAggregations::createSum)
-              .put("$SUM0", BeamBuiltinAggregations::createSum)
-              .put("AVG", BeamBuiltinAggregations::createAvg)
-              .put("BIT_OR", BeamBuiltinAggregations::createBitOr)
-              .put("BIT_XOR", BeamBuiltinAggregations::createBitXOr)
+              // Drop null elements for these aggregations.
+              .put("COUNT", typeName -> new DropNullFn(Count.combineFn()))
+              .put("MAX", typeName -> new DropNullFn(BeamBuiltinAggregations.createMax(typeName)))
+              .put("MIN", typeName -> new DropNullFn(BeamBuiltinAggregations.createMin(typeName)))
+              .put("SUM", typeName -> new DropNullFn(BeamBuiltinAggregations.createSum(typeName)))
+              .put(
+                  "$SUM0", typeName -> new DropNullFn(BeamBuiltinAggregations.createSum0(typeName)))
+              .put("AVG", typeName -> new DropNullFn(BeamBuiltinAggregations.createAvg(typeName)))
+              .put(
+                  "BIT_OR",
+                  typeName -> new DropNullFn(BeamBuiltinAggregations.createBitOr(typeName)))
+              .put(
+                  "BIT_XOR",
+                  typeName -> new DropNullFn(BeamBuiltinAggregations.createBitXOr(typeName)))
               // JIRA link:https://issues.apache.org/jira/browse/BEAM-10379
-              .put("BIT_AND", BeamBuiltinAggregations::createBitAnd)
+              .put(
+                  "BIT_AND",
+                  typeName -> new DropNullFn(BeamBuiltinAggregations.createBitAnd(typeName)))
               .put("VAR_POP", t -> VarianceFn.newPopulation(t.getTypeName()))
               .put("VAR_SAMP", t -> VarianceFn.newSample(t.getTypeName()))
               .put("COVAR_POP", t -> CovarianceFn.newPopulation(t.getTypeName()))
@@ -150,7 +160,7 @@
       case BYTE:
         return new ByteSum();
       case INT64:
-        return Sum.ofLongs();
+        return new LongSum();
       case FLOAT:
         return new FloatSum();
       case DOUBLE:
@@ -163,6 +173,32 @@
     }
   }
 
+  /**
+   * {@link CombineFn} for Sum0 where sum of null returns 0 based on {@link Sum} and {@link
+   * Combine.BinaryCombineFn}.
+   */
+  static CombineFn createSum0(Schema.FieldType fieldType) {
+    switch (fieldType.getTypeName()) {
+      case INT32:
+        return new IntegerSum0();
+      case INT16:
+        return new ShortSum0();
+      case BYTE:
+        return new ByteSum0();
+      case INT64:
+        return new LongSum0();
+      case FLOAT:
+        return new FloatSum0();
+      case DOUBLE:
+        return new DoubleSum0();
+      case DECIMAL:
+        return new BigDecimalSum0();
+      default:
+        throw new UnsupportedOperationException(
+            String.format("[%s] is not supported in SUM0", fieldType));
+    }
+  }
+
   /** {@link CombineFn} for AVG. */
   static CombineFn createAvg(Schema.FieldType fieldType) {
     switch (fieldType.getTypeName()) {
@@ -224,6 +260,13 @@
     }
   }
 
+  static class IntegerSum extends Combine.BinaryCombineFn<Integer> {
+    @Override
+    public Integer apply(Integer left, Integer right) {
+      return (int) (left + right);
+    }
+  }
+
   static class ShortSum extends Combine.BinaryCombineFn<Short> {
     @Override
     public Short apply(Short left, Short right) {
@@ -245,6 +288,20 @@
     }
   }
 
+  static class DoubleSum extends Combine.BinaryCombineFn<Double> {
+    @Override
+    public Double apply(Double left, Double right) {
+      return (double) left + right;
+    }
+  }
+
+  static class LongSum extends Combine.BinaryCombineFn<Long> {
+    @Override
+    public Long apply(Long left, Long right) {
+      return Math.addExact(left, right);
+    }
+  }
+
   static class BigDecimalSum extends Combine.BinaryCombineFn<BigDecimal> {
     @Override
     public BigDecimal apply(BigDecimal left, BigDecimal right) {
@@ -252,6 +309,90 @@
     }
   }
 
+  static class IntegerSum0 extends IntegerSum {
+    @Override
+    public @Nullable Integer identity() {
+      return 0;
+    }
+  }
+
+  static class ShortSum0 extends ShortSum {
+    @Override
+    public @Nullable Short identity() {
+      return 0;
+    }
+  }
+
+  static class ByteSum0 extends ByteSum {
+    @Override
+    public @Nullable Byte identity() {
+      return 0;
+    }
+  }
+
+  static class FloatSum0 extends FloatSum {
+    @Override
+    public @Nullable Float identity() {
+      return 0F;
+    }
+  }
+
+  static class DoubleSum0 extends DoubleSum {
+    @Override
+    public @Nullable Double identity() {
+      return 0D;
+    }
+  }
+
+  static class LongSum0 extends LongSum {
+    @Override
+    public @Nullable Long identity() {
+      return 0L;
+    }
+  }
+
+  static class BigDecimalSum0 extends BigDecimalSum {
+    @Override
+    public @Nullable BigDecimal identity() {
+      return BigDecimal.ZERO;
+    }
+  }
+
+  private static class DropNullFn<InputT, AccumT, OutputT>
+      extends CombineFn<InputT, AccumT, OutputT> {
+    private final CombineFn<InputT, AccumT, OutputT> combineFn;
+
+    DropNullFn(CombineFn<InputT, AccumT, OutputT> combineFn) {
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public AccumT createAccumulator() {
+      return combineFn.createAccumulator();
+    }
+
+    @Override
+    public AccumT addInput(AccumT accumulator, InputT input) {
+      return (input == null) ? accumulator : combineFn.addInput(accumulator, input);
+    }
+
+    @Override
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(accumulators);
+    }
+
+    @Override
+    public OutputT extractOutput(AccumT accumulator) {
+      return combineFn.extractOutput(accumulator);
+    }
+
+    @Override
+    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
+        throws CannotProvideCoderException {
+      return combineFn.getAccumulatorCoder(registry, inputCoder);
+    }
+  }
+
   /** {@link CombineFn} for <em>AVG</em> on {@link Number} types. */
   abstract static class Avg<T extends Number> extends CombineFn<T, KV<Integer, BigDecimal>, T> {
     @Override
@@ -376,29 +517,45 @@
     }
   }
 
-  static class BitOr<T extends Number> extends CombineFn<T, Long, Long> {
-    @Override
-    public Long createAccumulator() {
-      return 0L;
+  static class BitOr<T extends Number> extends CombineFn<T, BitOr.Accum, Long> {
+    static class Accum implements Serializable {
+      /** True if no inputs have been seen yet. */
+      boolean isEmpty = true;
+      /** The bitwise-or of the inputs seen so far. */
+      long bitOr = 0L;
     }
 
     @Override
-    public Long addInput(Long accum, T input) {
-      return accum | input.longValue();
+    public Accum createAccumulator() {
+      return new Accum();
     }
 
     @Override
-    public Long mergeAccumulators(Iterable<Long> accums) {
-      Long merged = createAccumulator();
-      for (Long accum : accums) {
-        merged = merged | accum;
+    public Accum addInput(Accum accum, T input) {
+      accum.isEmpty = false;
+      accum.bitOr |= input.longValue();
+      return accum;
+    }
+
+    @Override
+    public Accum mergeAccumulators(Iterable<Accum> accums) {
+      Accum merged = createAccumulator();
+      for (Accum accum : accums) {
+        if (accum.isEmpty) {
+          continue;
+        }
+        merged.isEmpty = false;
+        merged.bitOr |= accum.bitOr;
       }
       return merged;
     }
 
     @Override
-    public Long extractOutput(Long accum) {
-      return accum;
+    public Long extractOutput(Accum accum) {
+      if (accum.isEmpty) {
+        return null;
+      }
+      return accum.bitOr;
     }
   }
 
@@ -409,14 +566,9 @@
    * (https://issues.apache.org/jira/browse/BEAM-10379)
    */
   static class BitAnd<T extends Number> extends CombineFn<T, BitAnd.Accum, Long> {
-    static class Accum {
+    static class Accum implements Serializable {
       /** True if no inputs have been seen yet. */
       boolean isEmpty = true;
-      /**
-       * True if any null inputs have been seen. If we see a single null value, the end result is
-       * null, so if isNull is true, isEmpty and bitAnd are ignored.
-       */
-      boolean isNull = false;
       /** The bitwise-and of the inputs seen so far. */
       long bitAnd = -1L;
     }
@@ -428,13 +580,6 @@
 
     @Override
     public Accum addInput(Accum accum, T input) {
-      if (accum.isNull) {
-        return accum;
-      }
-      if (input == null) {
-        accum.isNull = true;
-        return accum;
-      }
       accum.isEmpty = false;
       accum.bitAnd &= input.longValue();
       return accum;
@@ -444,9 +589,6 @@
     public Accum mergeAccumulators(Iterable<Accum> accums) {
       Accum merged = createAccumulator();
       for (Accum accum : accums) {
-        if (accum.isNull) {
-          return accum;
-        }
         if (accum.isEmpty) {
           continue;
         }
@@ -458,41 +600,55 @@
 
     @Override
     public Long extractOutput(Accum accum) {
-      if (accum.isEmpty || accum.isNull) {
+      if (accum.isEmpty) {
         return null;
       }
       return accum.bitAnd;
     }
   }
 
-  public static class BitXOr<T extends Number> extends CombineFn<T, Long, Long> {
+  public static class BitXOr<T extends Number> extends CombineFn<T, BitXOr.Accum, Long> {
 
-    @Override
-    public Long createAccumulator() {
-      return 0L;
+    static class Accum implements Serializable {
+      /** True if no inputs have been seen yet. */
+      boolean isEmpty = true;
+      /** The bitwise-and of the inputs seen so far. */
+      long bitXOr = 0L;
     }
 
     @Override
-    public Long addInput(Long mutableAccumulator, T input) {
+    public Accum createAccumulator() {
+      return new Accum();
+    }
+
+    @Override
+    public Accum addInput(Accum mutableAccumulator, T input) {
       if (input != null) {
-        return mutableAccumulator ^ input.longValue();
-      } else {
-        return 0L;
+        mutableAccumulator.isEmpty = false;
+        mutableAccumulator.bitXOr ^= input.longValue();
       }
+      return mutableAccumulator;
     }
 
     @Override
-    public Long mergeAccumulators(Iterable<Long> accumulators) {
-      Long merged = createAccumulator();
-      for (Long accum : accumulators) {
-        merged = merged ^ accum;
+    public Accum mergeAccumulators(Iterable<Accum> accumulators) {
+      Accum merged = createAccumulator();
+      for (Accum accum : accumulators) {
+        if (accum.isEmpty) {
+          continue;
+        }
+        merged.isEmpty = false;
+        merged.bitXOr ^= accum.bitXOr;
       }
       return merged;
     }
 
     @Override
-    public Long extractOutput(Long accumulator) {
-      return accumulator;
+    public Long extractOutput(Accum accumulator) {
+      if (accumulator.isEmpty) {
+        return null;
+      }
+      return accumulator.bitXOr;
     }
   }
 }

diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/AggregationCombineFnAdapter.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/AggregationCombineFnAdapter.java
index 2178a2d..efef9d5 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/AggregationCombineFnAdapter.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/AggregationCombineFnAdapter.java

@@ -51,10 +51,7 @@
 
     @Override
     public Object addInput(Object accumulator, T input) {
-      T processedInput = getInput(input);
-      return (processedInput == null)
-          ? accumulator
-          : combineFn.addInput(accumulator, getInput(input));
+      return combineFn.addInput(accumulator, getInput(input));
     }
 
     @Override

diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CountIf.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CountIf.java
index 92b05b2..51977ed 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CountIf.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CountIf.java

@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.extensions.sql.impl.transform.agg;
 
+import java.io.Serializable;
 import org.apache.beam.sdk.transforms.Combine;
 
 /**
@@ -32,7 +33,7 @@
 
   public static class CountIfFn extends Combine.CombineFn<Boolean, CountIfFn.Accum, Long> {
 
-    public static class Accum {
+    public static class Accum implements Serializable {
       boolean isExpressionFalse = true;
       long countIfResult = 0L;
     }
@@ -56,6 +57,7 @@
       CountIfFn.Accum merged = createAccumulator();
       for (CountIfFn.Accum accum : accums) {
         if (!accum.isExpressionFalse) {
+          merged.isExpressionFalse = false;
           merged.countIfResult += accum.countIfResult;
         }
       }

diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CovarianceFn.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CovarianceFn.java
index f8112e2..14ceb3c 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CovarianceFn.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/CovarianceFn.java

@@ -124,6 +124,11 @@
     BigDecimal adjustedCount =
         this.isSample ? covariance.count().subtract(BigDecimal.ONE) : covariance.count();
 
+    // Avoid ArithmeticException: Division is undefined when adjustedCount is 0
+    if (adjustedCount.equals(BigDecimal.ZERO)) {
+      return BigDecimal.ZERO;
+    }
+
     return covariance.covariance().divide(adjustedCount, MATH_CTX);
   }
 }

diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/VarianceFn.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/VarianceFn.java
index ae1d9c8..032c9b0 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/VarianceFn.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/agg/VarianceFn.java

@@ -141,6 +141,11 @@
     BigDecimal adjustedCount =
         this.isSample ? variance.count().subtract(BigDecimal.ONE) : variance.count();
 
+    // Avoid ArithmeticException: Division is undefined when adjustedCount is 0
+    if (adjustedCount.equals(BigDecimal.ZERO)) {
+      return BigDecimal.ZERO;
+    }
+
     return variance.variance().divide(adjustedCount, MATH_CTX);
   }
 }

diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udaf/ArrayAgg.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udaf/ArrayAgg.java
index 721b4fa..e5ec5a8 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udaf/ArrayAgg.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/udaf/ArrayAgg.java

@@ -20,26 +20,27 @@
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.beam.sdk.transforms.Combine;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 public class ArrayAgg {
 
-  public static class ArrayAggArray extends Combine.CombineFn<Object, List<Object>, List<Object>> {
+  public static class ArrayAggArray<T> extends Combine.CombineFn<T, List<T>, @Nullable List<T>> {
     @Override
-    public List<Object> createAccumulator() {
+    public List<T> createAccumulator() {
       return new ArrayList<>();
     }
 
     @Override
-    public List<Object> addInput(List<Object> accum, Object input) {
+    public List<T> addInput(List<T> accum, T input) {
       accum.add(input);
       return accum;
     }
 
     @Override
-    public List<Object> mergeAccumulators(Iterable<List<Object>> accums) {
-      List<Object> merged = new ArrayList<>();
-      for (List<Object> accum : accums) {
-        for (Object o : accum) {
+    public List<T> mergeAccumulators(Iterable<List<T>> accums) {
+      List<T> merged = new ArrayList<>();
+      for (List<T> accum : accums) {
+        for (T o : accum) {
           merged.add(o);
         }
       }
@@ -47,7 +48,10 @@
     }
 
     @Override
-    public List<Object> extractOutput(List<Object> accumulator) {
+    public @Nullable List<T> extractOutput(List<T> accumulator) {
+      if (accumulator.isEmpty()) {
+        return null;
+      }
       return accumulator;
     }
   }

diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslAggregationTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslAggregationTest.java
index 206dde0..5f2b574 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslAggregationTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslAggregationTest.java

@@ -157,6 +157,113 @@
     input.apply(SqlTransform.query(sql));
   }
 
+  /** Multiple aggregation functions with unbounded PCollection. */
+  @Test
+  public void testAggregationNonGroupedFunctionsWithUnbounded() throws Exception {
+    runAggregationFunctionsWithoutGroup(unboundedInput1);
+  }
+
+  /** Multiple aggregation functions with bounded PCollection. */
+  @Test
+  public void testAggregationNonGroupedFunctionsWithBounded() throws Exception {
+    runAggregationFunctionsWithoutGroup(boundedInput1);
+  }
+
+  private void runAggregationFunctionsWithoutGroup(PCollection<Row> input) throws Exception {
+    String sql =
+        "select count(*) as getFieldCount, "
+            + "sum(f_long) as sum1, avg(f_long) as avg1, "
+            + "max(f_long) as max1, min(f_long) as min1, "
+            + "sum(f_short) as sum2, avg(f_short) as avg2, "
+            + "max(f_short) as max2, min(f_short) as min2, "
+            + "sum(f_byte) as sum3, avg(f_byte) as avg3, "
+            + "max(f_byte) as max3, min(f_byte) as min3, "
+            + "sum(f_float) as sum4, avg(f_float) as avg4, "
+            + "max(f_float) as max4, min(f_float) as min4, "
+            + "sum(f_double) as sum5, avg(f_double) as avg5, "
+            + "max(f_double) as max5, min(f_double) as min5, "
+            + "max(f_timestamp) as max6, min(f_timestamp) as min6, "
+            + "max(f_string) as max7, min(f_string) as min7, "
+            + "var_pop(f_double) as varpop1, var_samp(f_double) as varsamp1, "
+            + "var_pop(f_int) as varpop2, var_samp(f_int) as varsamp2 "
+            + "FROM TABLE_A";
+
+    PCollection<Row> result =
+        PCollectionTuple.of(new TupleTag<>("TABLE_A"), input)
+            .apply("testAggregationFunctions", SqlTransform.query(sql));
+
+    Schema resultType =
+        Schema.builder()
+            .addInt64Field("size")
+            .addInt64Field("sum1")
+            .addInt64Field("avg1")
+            .addInt64Field("max1")
+            .addInt64Field("min1")
+            .addInt16Field("sum2")
+            .addInt16Field("avg2")
+            .addInt16Field("max2")
+            .addInt16Field("min2")
+            .addByteField("sum3")
+            .addByteField("avg3")
+            .addByteField("max3")
+            .addByteField("min3")
+            .addFloatField("sum4")
+            .addFloatField("avg4")
+            .addFloatField("max4")
+            .addFloatField("min4")
+            .addDoubleField("sum5")
+            .addDoubleField("avg5")
+            .addDoubleField("max5")
+            .addDoubleField("min5")
+            .addDateTimeField("max6")
+            .addDateTimeField("min6")
+            .addStringField("max7")
+            .addStringField("min7")
+            .addDoubleField("varpop1")
+            .addDoubleField("varsamp1")
+            .addInt32Field("varpop2")
+            .addInt32Field("varsamp2")
+            .build();
+
+    Row row =
+        Row.withSchema(resultType)
+            .addValues(
+                4L,
+                10000L,
+                2500L,
+                4000L,
+                1000L,
+                (short) 10,
+                (short) 2,
+                (short) 4,
+                (short) 1,
+                (byte) 10,
+                (byte) 2,
+                (byte) 4,
+                (byte) 1,
+                10.0F,
+                2.5F,
+                4.0F,
+                1.0F,
+                10.0,
+                2.5,
+                4.0,
+                1.0,
+                parseTimestampWithoutTimeZone("2017-01-01 02:04:03"),
+                parseTimestampWithoutTimeZone("2017-01-01 01:01:03"),
+                "第四行",
+                "string_row1",
+                1.25,
+                1.666666667,
+                1,
+                1)
+            .build();
+
+    PAssert.that(result).containsInAnyOrder(row);
+
+    pipeline.run().waitUntilFinish();
+  }
+
   /** GROUP-BY with multiple aggregation functions with bounded PCollection. */
   @Test
   public void testAggregationFunctionsWithBounded() throws Exception {

diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java
index 6d9d114..65dee35 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java

@@ -412,11 +412,9 @@
           FunctionSignatureId.FN_SUM_DOUBLE, // sum
           FunctionSignatureId.FN_SUM_NUMERIC, // sum
           // FunctionSignatureId.FN_SUM_BIGNUMERIC, // sum
-          // JIRA link: https://issues.apache.org/jira/browse/BEAM-10379
-          // FunctionSignatureId.FN_BIT_AND_INT64, // bit_and
+          FunctionSignatureId.FN_BIT_AND_INT64, // bit_and
           FunctionSignatureId.FN_BIT_OR_INT64, // bit_or
-          // TODO(BEAM-10379) Re-enable when nulls are handled properly.
-          // FunctionSignatureId.FN_BIT_XOR_INT64, // bit_xor
+          FunctionSignatureId.FN_BIT_XOR_INT64, // bit_xor
           // FunctionSignatureId.FN_LOGICAL_AND, // logical_and
           // FunctionSignatureId.FN_LOGICAL_OR, // logical_or
           // Approximate aggregate functions.

diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/AggregateScanConverter.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/AggregateScanConverter.java
index bd73241..ef00982 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/AggregateScanConverter.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/AggregateScanConverter.java

@@ -41,7 +41,6 @@
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.rel.core.AggregateCall;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.rel.logical.LogicalAggregate;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.rel.logical.LogicalProject;
-import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.rel.type.RelDataType;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.rex.RexNode;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.SqlAggFunction;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.type.SqlReturnTypeInference;
@@ -92,12 +91,9 @@
       aggregateCalls = new ArrayList<>();
       // For aggregate calls, their input ref follow after GROUP BY input ref.
       int columnRefoff = groupFieldsListSize;
-      boolean nullable = false;
-      if (input.getProjects().size() > columnRefoff) {
-        nullable = input.getProjects().get(columnRefoff).getType().isNullable();
-      }
       for (ResolvedComputedColumn computedColumn : zetaNode.getAggregateList()) {
-        AggregateCall aggCall = convertAggCall(computedColumn, columnRefoff, nullable);
+        AggregateCall aggCall =
+            convertAggCall(computedColumn, columnRefoff, groupSet.size(), input);
         aggregateCalls.add(aggCall);
         if (!aggCall.getArgList().isEmpty()) {
           // Only increment column reference offset when aggregates use them (BEAM-8042).
@@ -177,7 +173,7 @@
   }
 
   private AggregateCall convertAggCall(
-      ResolvedComputedColumn computedColumn, int columnRefOff, boolean nullable) {
+      ResolvedComputedColumn computedColumn, int columnRefOff, int groupCount, RelNode input) {
     ResolvedAggregateFunctionCall aggregateFunctionCall =
         (ResolvedAggregateFunctionCall) computedColumn.getExpr();
 
@@ -259,12 +255,19 @@
       }
     }
 
-    RelDataType returnType =
-        ZetaSqlCalciteTranslationUtils.toCalciteType(
-            computedColumn.getColumn().getType(), nullable, getCluster().getRexBuilder());
-
     String aggName = getTrait().resolveAlias(computedColumn.getColumn());
     return AggregateCall.create(
-        sqlAggFunction, false, false, false, argList, -1, RelCollations.EMPTY, returnType, aggName);
+        sqlAggFunction,
+        false,
+        false,
+        false,
+        argList,
+        -1,
+        RelCollations.EMPTY,
+        groupCount,
+        input,
+        // When we pass null as the return type, Calcite infers it for us.
+        null,
+        aggName);
   }
 }

diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperatorMappingTable.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperatorMappingTable.java
index c59df96..01b23eb 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperatorMappingTable.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperatorMappingTable.java

@@ -72,8 +72,7 @@
           .put("sum", SqlStdOperatorTable.SUM)
           .put("any_value", SqlStdOperatorTable.ANY_VALUE)
           .put("count", SqlStdOperatorTable.COUNT)
-          // .put("bit_and", SqlStdOperatorTable.BIT_AND) //JIRA link:
-          // https://issues.apache.org/jira/browse/BEAM-10379
+          .put("bit_and", SqlStdOperatorTable.BIT_AND)
           .put("string_agg", SqlOperators.STRING_AGG_STRING_FN) // NULL values not supported
           .put("array_agg", SqlOperators.ARRAY_AGG_FN)
           .put("bit_or", SqlStdOperatorTable.BIT_OR)

diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperators.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperators.java
index b44f63e..29dfac4 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperators.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/SqlOperators.java

@@ -48,6 +48,7 @@
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.SqlOperator;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.SqlSyntax;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.type.ArraySqlType;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.type.FamilyOperandTypeChecker;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.type.InferTypes;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.sql.type.OperandTypes;
@@ -89,8 +90,8 @@
   public static final SqlOperator ARRAY_AGG_FN =
       createUdafOperator(
           "array_agg",
-          x -> createTypeFactory().createArrayType(x.getOperandType(0), -1),
-          new UdafImpl<>(new ArrayAgg.ArrayAggArray()));
+          x -> new ArraySqlType(x.getOperandType(0), true),
+          new UdafImpl<>(new ArrayAgg.ArrayAggArray<>()));
 
   public static final SqlOperator START_WITHS =
       createUdfOperator(
@@ -161,7 +162,7 @@
   public static final SqlOperator BIT_XOR =
       createUdafOperator(
           "BIT_XOR",
-          x -> createTypeFactory().createSqlType(SqlTypeName.BIGINT),
+          x -> NULLABLE_BIGINT,
           new UdafImpl<>(new BeamBuiltinAggregations.BitXOr<Number>()));
 
   public static final SqlOperator COUNTIF =

diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java
index a802b30..35f8e22 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java

@@ -3840,7 +3840,20 @@
   }
 
   @Test
-  @Ignore("NULL values don't work correctly. (https://issues.apache.org/jira/browse/BEAM-10379)")
+  public void testZetaSQLBitOrNull() {
+    String sql =
+        "SELECT bit_or(CAST(x as int64)) FROM "
+            + "(SELECT NULL x UNION ALL SELECT 5 UNION ALL SELECT 6);";
+
+    PCollection<Row> stream = execute(sql);
+
+    final Schema schema = Schema.builder().addInt64Field("field1").build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue(7L).build());
+
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
   public void testZetaSQLBitAnd() {
     String sql = "SELECT BIT_AND(row_id) FROM table_all_types GROUP BY bool_col";
 
@@ -3851,6 +3864,124 @@
         .containsInAnyOrder(
             Row.withSchema(schema).addValue(1L).build(),
             Row.withSchema(schema).addValue(0L).build());
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testZetaSQLBitAndInt64() {
+    String sql = "SELECT bit_and(CAST(x as int64)) FROM (SELECT 1 x FROM (SELECT 1) WHERE false)";
+
+    PCollection<Row> stream = execute(sql);
+
+    final Schema schema = Schema.builder().addNullableField("field1", FieldType.INT64).build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue((Long) null).build());
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testZetaSQLBitAndNulls() {
+    String sql =
+        "SELECT bit_and(CAST(x as int64)) FROM "
+            + "(SELECT NULL x UNION ALL SELECT 5 UNION ALL SELECT 6)";
+
+    PCollection<Row> stream = execute(sql);
+
+    final Schema schema = Schema.builder().addInt64Field("field1").build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue(4L).build());
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testCountEmpty() {
+    String sql = "SELECT COUNT(x) FROM UNNEST([]) AS x";
+
+    PCollection<Row> stream = execute(sql);
+
+    Schema schema = Schema.builder().addInt64Field("field1").build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue(0L).build());
+
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testBitwiseOrEmpty() {
+    String sql = "SELECT BIT_OR(x) FROM UNNEST([]) AS x";
+
+    PCollection<Row> stream = execute(sql);
+
+    Schema schema = Schema.builder().addNullableField("field1", FieldType.INT64).build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue((Long) null).build());
+
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testArrayAggNulls() {
+    String sql = "SELECT ARRAY_AGG(x) FROM UNNEST([1, NULL, 3]) AS x";
+
+    PCollection<Row> stream = execute(sql);
+
+    Schema schema =
+        Schema.builder()
+            .addField(
+                Field.of(
+                    "field1",
+                    FieldType.array(FieldType.of(Schema.TypeName.INT64).withNullable(true))))
+            .build();
+    PAssert.that(stream)
+        .containsInAnyOrder(Row.withSchema(schema).addArray(1L, (Long) null, 3L).build());
+
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testArrayAggEmpty() {
+    String sql = "SELECT ARRAY_AGG(x) FROM UNNEST([]) AS x";
+
+    PCollection<Row> stream = execute(sql);
+
+    Schema schema = Schema.builder().addNullableField("field1", FieldType.INT64).build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue((Long) null).build());
+
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testInt64SumOverflow() {
+    String sql =
+        "SELECT SUM(col1)\n"
+            + "FROM (SELECT CAST(9223372036854775807 as int64) as col1 UNION ALL\n"
+            + "      SELECT CAST(1 as int64))\n";
+
+    ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+    BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+    BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+    thrown.expect(RuntimeException.class);
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testInt64SumUnderflow() {
+    String sql =
+        "SELECT SUM(col1)\n"
+            + "FROM (SELECT CAST(-9223372036854775808 as int64) as col1 UNION ALL\n"
+            + "      SELECT CAST(-1 as int64))\n";
+
+    ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+    BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+    BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+    thrown.expect(RuntimeException.class);
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testZetaSQLSumNulls() {
+    String sql = "SELECT SUM(x) AS sum FROM UNNEST([null, null, null]) AS x";
+
+    PCollection<Row> stream = execute(sql);
+
+    Schema schema = Schema.builder().addNullableField("field1", FieldType.INT64).build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue((Long) null).build());
 
     pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
   }
@@ -3870,8 +4001,6 @@
   }
 
   @Test
-  @Ignore(
-      "Null values are not handled properly, so BIT_XOR is temporarily removed from SupportedZetaSqlBuiltinFunctions. https://issues.apache.org/jira/browse/BEAM-10379")
   public void testZetaSQLBitXor() {
     String sql = "SELECT BIT_XOR(x) AS bit_xor FROM UNNEST([5678, 1234]) AS x";
     PCollection<Row> stream = execute(sql);
@@ -3883,6 +4012,28 @@
   }
 
   @Test
+  public void testZetaSQLBitXorEmpty() {
+    String sql = "SELECT bit_xor(CAST(x as int64)) FROM (SELECT 1 x FROM (SELECT 1) WHERE false);";
+    PCollection<Row> stream = execute(sql);
+
+    Schema schema = Schema.builder().addNullableField("field1", FieldType.INT64).build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue((Long) null).build());
+
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
+  public void testZetaSQLBitXorNull() {
+    String sql = "SELECT bit_xor(x) FROM (SELECT CAST(NULL AS int64) x);";
+    PCollection<Row> stream = execute(sql);
+
+    Schema schema = Schema.builder().addNullableField("field1", FieldType.INT64).build();
+    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValue((Long) null).build());
+
+    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+  }
+
+  @Test
   public void testCountIfZetaSQLDialect() {
     String sql =
         "WITH is_positive AS ( SELECT x > 0 flag FROM UNNEST([5, -2, 3, 6, -10, -7, 4, 0]) AS x) "

diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlMathFunctionsTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlMathFunctionsTest.java
index 830bb84..92e509c 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlMathFunctionsTest.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlMathFunctionsTest.java

@@ -26,7 +26,6 @@
 import org.apache.beam.sdk.values.Row;
 import org.joda.time.Duration;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -1002,7 +1001,6 @@
   }
 
   @Test
-  @Ignore("[BEAM-10459] Aggregation functions on NUMERIC is not supported yet")
   public void testSumNumeric() {
     String sql = "SELECT SUM(numeric_field) FROM table_with_numeric";
 
@@ -1019,7 +1017,6 @@
   }
 
   @Test
-  @Ignore("[BEAM-10459] Aggregation functions on NUMERIC is not supported yet")
   public void testAvgNumeric() {
     String sql = "SELECT AVG(numeric_field) FROM table_with_numeric";
 

diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java
index 5c2f1f5..69d5d19 100644
--- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java
+++ b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java

@@ -18,9 +18,11 @@
 package org.apache.beam.sdk.io.aws.options;
 
 import com.amazonaws.ClientConfiguration;
+import com.amazonaws.auth.AWSCredentials;
 import com.amazonaws.auth.AWSCredentialsProvider;
 import com.amazonaws.auth.AWSStaticCredentialsProvider;
 import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.auth.BasicSessionCredentials;
 import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider;
 import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
 import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper;
@@ -66,6 +68,7 @@
 
   private static final String AWS_ACCESS_KEY_ID = "awsAccessKeyId";
   private static final String AWS_SECRET_KEY = "awsSecretKey";
+  private static final String SESSION_TOKEN = "sessionToken";
   private static final String CREDENTIALS_FILE_PATH = "credentialsFilePath";
   public static final String CLIENT_EXECUTION_TIMEOUT = "clientExecutionTimeout";
   public static final String CONNECTION_MAX_IDLE_TIME = "connectionMaxIdleTime";
@@ -119,8 +122,17 @@
       }
 
       if (typeName.equals(AWSStaticCredentialsProvider.class.getSimpleName())) {
-        return new AWSStaticCredentialsProvider(
-            new BasicAWSCredentials(asMap.get(AWS_ACCESS_KEY_ID), asMap.get(AWS_SECRET_KEY)));
+        boolean isSession = asMap.containsKey(SESSION_TOKEN);
+        if (isSession) {
+          return new AWSStaticCredentialsProvider(
+              new BasicSessionCredentials(
+                  asMap.get(AWS_ACCESS_KEY_ID),
+                  asMap.get(AWS_SECRET_KEY),
+                  asMap.get(SESSION_TOKEN)));
+        } else {
+          return new AWSStaticCredentialsProvider(
+              new BasicAWSCredentials(asMap.get(AWS_ACCESS_KEY_ID), asMap.get(AWS_SECRET_KEY)));
+        }
       } else if (typeName.equals(PropertiesFileCredentialsProvider.class.getSimpleName())) {
         return new PropertiesFileCredentialsProvider(asMap.get(CREDENTIALS_FILE_PATH));
       } else if (typeName.equals(
@@ -179,11 +191,16 @@
       typeSerializer.writeTypePrefixForObject(credentialsProvider, jsonGenerator);
 
       if (credentialsProvider.getClass().equals(AWSStaticCredentialsProvider.class)) {
-        jsonGenerator.writeStringField(
-            AWS_ACCESS_KEY_ID, credentialsProvider.getCredentials().getAWSAccessKeyId());
-        jsonGenerator.writeStringField(
-            AWS_SECRET_KEY, credentialsProvider.getCredentials().getAWSSecretKey());
-
+        AWSCredentials credentials = credentialsProvider.getCredentials();
+        if (credentials.getClass().equals(BasicSessionCredentials.class)) {
+          BasicSessionCredentials sessionCredentials = (BasicSessionCredentials) credentials;
+          jsonGenerator.writeStringField(AWS_ACCESS_KEY_ID, sessionCredentials.getAWSAccessKeyId());
+          jsonGenerator.writeStringField(AWS_SECRET_KEY, sessionCredentials.getAWSSecretKey());
+          jsonGenerator.writeStringField(SESSION_TOKEN, sessionCredentials.getSessionToken());
+        } else {
+          jsonGenerator.writeStringField(AWS_ACCESS_KEY_ID, credentials.getAWSAccessKeyId());
+          jsonGenerator.writeStringField(AWS_SECRET_KEY, credentials.getAWSSecretKey());
+        }
       } else if (credentialsProvider.getClass().equals(PropertiesFileCredentialsProvider.class)) {
         try {
           PropertiesFileCredentialsProvider specificProvider =

diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java
index 9651803..0e318c9 100644
--- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java
+++ b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java

@@ -25,6 +25,7 @@
 import com.amazonaws.auth.AWSCredentialsProvider;
 import com.amazonaws.auth.AWSStaticCredentialsProvider;
 import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.auth.BasicSessionCredentials;
 import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider;
 import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
 import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper;
@@ -78,6 +79,20 @@
     assertEquals(
         credentialsProvider.getCredentials().getAWSSecretKey(),
         deserializedCredentialsProvider.getCredentials().getAWSSecretKey());
+
+    String sessionToken = "session-token";
+    BasicSessionCredentials sessionCredentials =
+        new BasicSessionCredentials(awsKeyId, awsSecretKey, sessionToken);
+    credentialsProvider = new AWSStaticCredentialsProvider(sessionCredentials);
+    serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider);
+    deserializedCredentialsProvider =
+        objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class);
+    BasicSessionCredentials deserializedCredentials =
+        (BasicSessionCredentials) deserializedCredentialsProvider.getCredentials();
+    assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass());
+    assertEquals(deserializedCredentials.getAWSAccessKeyId(), awsKeyId);
+    assertEquals(deserializedCredentials.getAWSSecretKey(), awsSecretKey);
+    assertEquals(deserializedCredentials.getSessionToken(), sessionToken);
   }
 
   @Test

diff --git a/sdks/java/io/amazon-web-services2/build.gradle b/sdks/java/io/amazon-web-services2/build.gradle
index 116a12c..937feb0 100644
--- a/sdks/java/io/amazon-web-services2/build.gradle
+++ b/sdks/java/io/amazon-web-services2/build.gradle

@@ -53,6 +53,7 @@
   compile library.java.commons_lang3
   compile library.java.http_core
   compile library.java.commons_codec
+  runtime library.java.aws_java_sdk2_sts
   testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
   testCompile project(path: ":sdks:java:io:common", configuration: "testRuntime")
   testCompile project(path: ":sdks:java:io:kinesis", configuration: "testRuntime")

diff --git a/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/options/AwsModule.java b/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/options/AwsModule.java
index 5051438..7453d76 100644
--- a/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/options/AwsModule.java
+++ b/sdks/java/io/amazon-web-services2/src/main/java/org/apache/beam/sdk/io/aws2/options/AwsModule.java

@@ -41,7 +41,9 @@
 import org.apache.beam.sdk.io.aws2.s3.SSECustomerKey;
 import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
 import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentials;
 import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
 import software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider;
 import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
 import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider;
@@ -64,6 +66,7 @@
 public class AwsModule extends SimpleModule {
   private static final String ACCESS_KEY_ID = "accessKeyId";
   private static final String SECRET_ACCESS_KEY = "secretAccessKey";
+  private static final String SESSION_TOKEN = "sessionToken";
   public static final String CONNECTION_ACQUIRE_TIMEOUT = "connectionAcquisitionTimeout";
   public static final String CONNECTION_MAX_IDLE_TIMEOUT = "connectionMaxIdleTime";
   public static final String CONNECTION_TIMEOUT = "connectionTimeout";
@@ -107,10 +110,18 @@
         throw new IOException(
             String.format("AWS credentials provider type name key '%s' not found", typeNameKey));
       }
-
       if (typeName.equals(StaticCredentialsProvider.class.getSimpleName())) {
-        return StaticCredentialsProvider.create(
-            AwsBasicCredentials.create(asMap.get(ACCESS_KEY_ID), asMap.get(SECRET_ACCESS_KEY)));
+        boolean isSession = asMap.containsKey(SESSION_TOKEN);
+        if (isSession) {
+          return StaticCredentialsProvider.create(
+              AwsSessionCredentials.create(
+                  asMap.get(ACCESS_KEY_ID),
+                  asMap.get(SECRET_ACCESS_KEY),
+                  asMap.get(SESSION_TOKEN)));
+        } else {
+          return StaticCredentialsProvider.create(
+              AwsBasicCredentials.create(asMap.get(ACCESS_KEY_ID), asMap.get(SECRET_ACCESS_KEY)));
+        }
       } else if (typeName.equals(DefaultCredentialsProvider.class.getSimpleName())) {
         return DefaultCredentialsProvider.create();
       } else if (typeName.equals(EnvironmentVariableCredentialsProvider.class.getSimpleName())) {
@@ -158,10 +169,16 @@
       // BEAM-11958 Use deprecated Jackson APIs to be compatible with older versions of jackson
       typeSerializer.writeTypePrefixForObject(credentialsProvider, jsonGenerator);
       if (credentialsProvider.getClass().equals(StaticCredentialsProvider.class)) {
-        jsonGenerator.writeStringField(
-            ACCESS_KEY_ID, credentialsProvider.resolveCredentials().accessKeyId());
-        jsonGenerator.writeStringField(
-            SECRET_ACCESS_KEY, credentialsProvider.resolveCredentials().secretAccessKey());
+        AwsCredentials credentials = credentialsProvider.resolveCredentials();
+        if (credentials.getClass().equals(AwsSessionCredentials.class)) {
+          AwsSessionCredentials sessionCredentials = (AwsSessionCredentials) credentials;
+          jsonGenerator.writeStringField(ACCESS_KEY_ID, sessionCredentials.accessKeyId());
+          jsonGenerator.writeStringField(SECRET_ACCESS_KEY, sessionCredentials.secretAccessKey());
+          jsonGenerator.writeStringField(SESSION_TOKEN, sessionCredentials.sessionToken());
+        } else {
+          jsonGenerator.writeStringField(ACCESS_KEY_ID, credentials.accessKeyId());
+          jsonGenerator.writeStringField(SECRET_ACCESS_KEY, credentials.secretAccessKey());
+        }
       } else if (!SINGLETON_CREDENTIAL_PROVIDERS.contains(credentialsProvider.getClass())) {
         throw new IllegalArgumentException(
             "Unsupported AWS credentials provider type " + credentialsProvider.getClass());

diff --git a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/options/AwsModuleTest.java b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/options/AwsModuleTest.java
index 2f0e038..9414e24 100644
--- a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/options/AwsModuleTest.java
+++ b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/options/AwsModuleTest.java

@@ -33,6 +33,7 @@
 import org.junit.runners.JUnit4;
 import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
 import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
 import software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider;
 import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
 import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider;
@@ -68,6 +69,20 @@
     assertEquals(
         credentialsProvider.resolveCredentials().secretAccessKey(),
         deserializedCredentialsProvider.resolveCredentials().secretAccessKey());
+
+    AwsSessionCredentials sessionCredentials =
+        AwsSessionCredentials.create("key-id", "secret-key", "session-token");
+    credentialsProvider = StaticCredentialsProvider.create(sessionCredentials);
+    serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider);
+    deserializedCredentialsProvider =
+        objectMapper.readValue(serializedCredentialsProvider, AwsCredentialsProvider.class);
+
+    assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass());
+    AwsSessionCredentials deserializedCredentials =
+        (AwsSessionCredentials) deserializedCredentialsProvider.resolveCredentials();
+    assertEquals(sessionCredentials.accessKeyId(), deserializedCredentials.accessKeyId());
+    assertEquals(sessionCredentials.secretAccessKey(), deserializedCredentials.secretAccessKey());
+    assertEquals(sessionCredentials.sessionToken(), deserializedCredentials.sessionToken());
   }
 
   @Test

diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HL7v2Message.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HL7v2Message.java
index 1a2a0b0..b9a72f3 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HL7v2Message.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HL7v2Message.java

@@ -80,7 +80,9 @@
     out.setCreateTime(this.getCreateTime());
     out.setData(this.getData());
     out.setSendFacility(this.getSendFacility());
-    out.setSchematizedData(new SchematizedData().setData(this.schematizedData));
+    if (this.schematizedData != null) {
+      out.setSchematizedData(new SchematizedData().setData(this.schematizedData));
+    }
     out.setLabels(this.labels);
     return out;
   }

diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/SubscriptionPartitionLoader.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/SubscriptionPartitionLoader.java
index e411d80..866e922 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/SubscriptionPartitionLoader.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/SubscriptionPartitionLoader.java

@@ -92,9 +92,7 @@
                     })
                 .withPollInterval(pollDuration)
                 .withTerminationPerInput(
-                    terminate
-                        ? Watch.Growth.afterTotalOf(pollDuration.multipliedBy(10))
-                        : Watch.Growth.never()));
+                    terminate ? Watch.Growth.afterIterations(10) : Watch.Growth.never()));
     return partitions.apply(
         MapElements.into(TypeDescriptor.of(SubscriptionPartition.class))
             .via(kv -> SubscriptionPartition.of(subscription, kv.getValue())));

diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/RampupThrottlingFnTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/RampupThrottlingFnTest.java
index 6c57a84..604b5cb 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/RampupThrottlingFnTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/RampupThrottlingFnTest.java

@@ -52,11 +52,18 @@
     MockitoAnnotations.openMocks(this);
 
     DateTimeUtils.setCurrentMillisFixed(0);
-    RampupThrottlingFn<Void> rampupThrottlingFn = new RampupThrottlingFn<>(1);
+    RampupThrottlingFn<Void> rampupThrottlingFn =
+        new RampupThrottlingFn<Void>(1) {
+          @Override
+          @Setup
+          public void setup() {
+            super.setup();
+            this.sleeper = mockSleeper;
+          }
+        };
     rampupThrottlingFnTester = DoFnTester.of(rampupThrottlingFn);
     rampupThrottlingFnTester.setCloningBehavior(CloningBehavior.DO_NOT_CLONE);
     rampupThrottlingFnTester.startBundle();
-    rampupThrottlingFn.sleeper = mockSleeper;
     rampupThrottlingFn.throttlingMsecs = mockCounter;
   }
 

diff --git a/sdks/python/apache_beam/dataframe/io.py b/sdks/python/apache_beam/dataframe/io.py
index 5046acc..5ef8e2c 100644
--- a/sdks/python/apache_beam/dataframe/io.py
+++ b/sdks/python/apache_beam/dataframe/io.py

@@ -74,16 +74,19 @@
       splitter=_CsvSplitter(args, kwargs) if splittable else None)
 
 
-def _as_pc(df):
+def _as_pc(df, label=None):
   from apache_beam.dataframe import convert  # avoid circular import
   # TODO(roberwb): Amortize the computation for multiple writes?
-  return convert.to_pcollection(df, yield_elements='pandas')
+  return convert.to_pcollection(df, yield_elements='pandas', label=label)
 
 
 @frame_base.with_docs_from(pd.DataFrame)
-def to_csv(df, path, *args, **kwargs):
-
-  return _as_pc(df) | _WriteToPandas(
+def to_csv(df, path, transform_label=None, *args, **kwargs):
+  label_pc = f"{transform_label} - ToPCollection" if transform_label \
+    else f"ToPCollection(df) - {path}"
+  label_pd = f"{transform_label} - ToPandasDataFrame" if transform_label \
+    else f"WriteToPandas(df) - {path}"
+  return _as_pc(df, label_pc) | label_pd >> _WriteToPandas(
       'to_csv', path, args, kwargs, incremental=True, binary=False)
 
 

diff --git a/sdks/python/apache_beam/dataframe/io_test.py b/sdks/python/apache_beam/dataframe/io_test.py
index 374eb0c..060eebf 100644
--- a/sdks/python/apache_beam/dataframe/io_test.py
+++ b/sdks/python/apache_beam/dataframe/io_test.py

@@ -40,6 +40,10 @@
 from apache_beam.testing.util import assert_that
 
 
+class SimpleRow(typing.NamedTuple):
+  value: int
+
+
 class MyRow(typing.NamedTuple):
   timestamp: int
   value: int
@@ -343,6 +347,31 @@
     # Check that we've read (and removed) every output file
     self.assertEqual(len(glob.glob(f'{output}out.csv*')), 0)
 
+  def test_double_write(self):
+    output = self.temp_dir()
+    with beam.Pipeline() as p:
+      pc1 = p | 'create pc1' >> beam.Create(
+          [SimpleRow(value=i) for i in [1, 2]])
+      pc2 = p | 'create pc2' >> beam.Create(
+          [SimpleRow(value=i) for i in [3, 4]])
+
+      deferred_df1 = convert.to_dataframe(pc1)
+      deferred_df2 = convert.to_dataframe(pc2)
+
+      deferred_df1.to_csv(
+          f'{output}out1.csv',
+          transform_label="Writing to csv PC1",
+          index=False)
+      deferred_df2.to_csv(
+          f'{output}out2.csv',
+          transform_label="Writing to csv PC2",
+          index=False)
+
+    self.assertCountEqual(['value', '1', '2'],
+                          set(self.read_all_lines(output + 'out1.csv*')))
+    self.assertCountEqual(['value', '3', '4'],
+                          set(self.read_all_lines(output + 'out2.csv*')))
+
 
 if __name__ == '__main__':
   unittest.main()

diff --git a/sdks/python/apache_beam/dataframe/schemas.py b/sdks/python/apache_beam/dataframe/schemas.py
index bd889da..200521b 100644
--- a/sdks/python/apache_beam/dataframe/schemas.py
+++ b/sdks/python/apache_beam/dataframe/schemas.py

@@ -234,7 +234,7 @@
     all_series = self._get_series(df)
     iterators = [
         make_null_checking_generator(series) for series,
-        typehint in zip(all_series, self._namedtuple_ctor._field_types)
+        typehint in zip(all_series, self._namedtuple_ctor.__annotations__)
     ]
 
     # TODO: Avoid materializing the rows. Produce an object that references the

diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py
index 287ec49..c83bc5c 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py

@@ -96,6 +96,7 @@
 from apache_beam.utils import subprocess_server
 from apache_beam.utils.annotations import deprecated
 from apache_beam.utils.interactive_utils import alter_label_if_ipython
+from apache_beam.utils.interactive_utils import is_in_ipython
 
 if TYPE_CHECKING:
   from types import TracebackType
@@ -565,7 +566,9 @@
           shutil.rmtree(tmpdir)
       return self.runner.run_pipeline(self, self._options)
     finally:
-      shutil.rmtree(self.local_tempdir, ignore_errors=True)
+      if not is_in_ipython():
+        shutil.rmtree(self.local_tempdir, ignore_errors=True)
+      # else interactive beam handles the cleanup.
 
   def __enter__(self):
     # type: () -> Pipeline

diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index bbaf52c..29a6016 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py

@@ -508,6 +508,13 @@
       # in the proto representation of the graph.
       pipeline.replace_all(DataflowRunner._NON_PORTABLE_PTRANSFORM_OVERRIDES)
 
+    # Always upload graph out-of-band when explicitly using runner v2 with
+    # use_portable_job_submission to avoid irrelevant large graph limits.
+    if (apiclient._use_unified_worker(debug_options) and
+        debug_options.lookup_experiment('use_portable_job_submission') and
+        not debug_options.lookup_experiment('upload_graph')):
+      debug_options.add_experiment("upload_graph")
+
     # Add setup_options for all the BeamPlugin imports
     setup_options = options.view_as(SetupOptions)
     plugins = BeamPlugin.get_all_plugin_paths()

diff --git a/sdks/python/apache_beam/runners/direct/executor.py b/sdks/python/apache_beam/runners/direct/executor.py
index bfcb47f..8b47b0b 100644
--- a/sdks/python/apache_beam/runners/direct/executor.py
+++ b/sdks/python/apache_beam/runners/direct/executor.py

@@ -23,7 +23,6 @@
 import itertools
 import logging
 import queue
-import sys
 import threading
 import traceback
 from typing import TYPE_CHECKING
@@ -478,8 +477,7 @@
     update = self.visible_updates.take()
     try:
       if update.exception:
-        t, v, tb = update.exc_info
-        raise t(v).with_traceback(tb)
+        raise update.exception
     finally:
       self.executor_service.shutdown()
       self.executor_service.await_completion()
@@ -576,10 +574,6 @@
       self.committed_bundle = committed_bundle
       self.unprocessed_bundle = unprocessed_bundle
       self.exception = exception
-      self.exc_info = sys.exc_info()
-      if self.exc_info[1] is not exception:
-        # Not the right exception.
-        self.exc_info = (exception, None, None)
 
   class _VisibleExecutorUpdate(object):
     """An update of interest to the user.
@@ -587,10 +581,9 @@
     Used for awaiting the completion to decide whether to return normally or
     raise an exception.
     """
-    def __init__(self, exc_info=(None, None, None)):
-      self.finished = exc_info[0] is not None
-      self.exception = exc_info[1] or exc_info[0]
-      self.exc_info = exc_info
+    def __init__(self, exception=None):
+      self.finished = exception is not None
+      self.exception = exception
 
   class _MonitorTask(_ExecutorService.CallableTask):
     """MonitorTask continuously runs to ensure that pipeline makes progress."""
@@ -618,7 +611,7 @@
                 'A task failed with exception: %s', update.exception)
             self._executor.visible_updates.offer(
                 _ExecutorServiceParallelExecutor._VisibleExecutorUpdate(
-                    update.exc_info))
+                    update.exception))
           update = self._executor.all_updates.poll()
         self._executor.evaluation_context.schedule_pending_unblocked_tasks(
             self._executor.executor_service)
@@ -626,8 +619,7 @@
       except Exception as e:  # pylint: disable=broad-except
         _LOGGER.error('Monitor task died due to exception.\n %s', e)
         self._executor.visible_updates.offer(
-            _ExecutorServiceParallelExecutor._VisibleExecutorUpdate(
-                sys.exc_info()))
+            _ExecutorServiceParallelExecutor._VisibleExecutorUpdate(e))
       finally:
         if not self._should_shutdown():
           self._executor.executor_service.submit(self)

diff --git a/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py b/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py
index 3243163..37f914b 100644
--- a/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py
+++ b/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py

@@ -86,9 +86,7 @@
         if (isinstance(val, beam.pvalue.PCollection) and
             val.pipeline is self._user_pipeline and
             (not self._pcolls or val in self._pcolls)):
-          pcoll_id = self._context.pcollections.get_id(val)
           c[val] = Cacheable(
-              pcoll_id=pcoll_id,
               var=key,
               pcoll=val,
               version=str(id(val)),

diff --git a/sdks/python/apache_beam/runners/interactive/background_caching_job.py b/sdks/python/apache_beam/runners/interactive/background_caching_job.py
index c08c5d6..5219538 100644
--- a/sdks/python/apache_beam/runners/interactive/background_caching_job.py
+++ b/sdks/python/apache_beam/runners/interactive/background_caching_job.py

@@ -44,6 +44,7 @@
 
 import apache_beam as beam
 from apache_beam.runners.interactive import interactive_environment as ie
+from apache_beam.runners.interactive import utils
 from apache_beam.runners.interactive.caching import streaming_cache
 from apache_beam.runners.runner import PipelineState
 
@@ -221,10 +222,9 @@
   Throughout the check, if source-to-cache has changed from the last check, it
   also cleans up the invalidated cache early on.
   """
-  from apache_beam.runners.interactive import pipeline_instrument as instr
   # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources.
   # Add logic for other cacheable sources here when they are available.
-  has_cache = instr.has_unbounded_sources(user_pipeline)
+  has_cache = utils.has_unbounded_sources(user_pipeline)
   if has_cache:
     if not isinstance(ie.current_env().get_cache_manager(user_pipeline,
                                                          create_if_absent=True),
@@ -331,10 +331,9 @@
 
   A signature is a str representation of urn and payload of a source.
   """
-  from apache_beam.runners.interactive import pipeline_instrument as instr
   # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources.
   # Add logic for other cacheable sources here when they are available.
-  unbounded_sources_as_applied_transforms = instr.unbounded_sources(
+  unbounded_sources_as_applied_transforms = utils.unbounded_sources(
       user_pipeline)
   unbounded_sources_as_ptransforms = set(
       map(lambda x: x.transform, unbounded_sources_as_applied_transforms))

diff --git a/sdks/python/apache_beam/runners/interactive/caching/cacheable.py b/sdks/python/apache_beam/runners/interactive/caching/cacheable.py
index 96663a7..f69324e 100644
--- a/sdks/python/apache_beam/runners/interactive/caching/cacheable.py
+++ b/sdks/python/apache_beam/runners/interactive/caching/cacheable.py

@@ -26,24 +26,22 @@
 from dataclasses import dataclass
 
 import apache_beam as beam
-from apache_beam.runners.interactive.utils import obfuscate
 
 
 @dataclass
 class Cacheable:
-  pcoll_id: str
   var: str
   version: str
-  pcoll: beam.pvalue.PCollection
   producer_version: str
+  pcoll: beam.pvalue.PCollection
 
   def __hash__(self):
-    return hash((
-        self.pcoll_id,
-        self.var,
-        self.version,
-        self.pcoll,
-        self.producer_version))
+    return hash((self.var, self.version, self.producer_version, self.pcoll))
+
+  @staticmethod
+  def from_pcoll(
+      pcoll_name: str, pcoll: beam.pvalue.PCollection) -> 'Cacheable':
+    return Cacheable(pcoll_name, str(id(pcoll)), str(id(pcoll.producer)), pcoll)
 
   def to_key(self):
     return CacheKey(
@@ -55,22 +53,50 @@
 
 @dataclass
 class CacheKey:
+  """The identifier of a cacheable PCollection in cache.
+
+  It contains 4 stringified components:
+  var: The obfuscated variable name of the PCollection.
+  version: The id() of the PCollection.
+  producer_version: The id() of the producer of the PCollection.
+  pipeline_id: The id() of the pipeline the PCollection belongs to.
+  """
   var: str
   version: str
   producer_version: str
   pipeline_id: str
 
   def __post_init__(self):
+    from apache_beam.runners.interactive.utils import obfuscate
     # Normalize arbitrary variable name to a fixed length hex str.
     self.var = obfuscate(self.var)[:10]
 
+  def __hash__(self):
+    return hash(
+        (self.var, self.version, self.producer_version, self.pipeline_id))
+
   @staticmethod
-  def from_str(r):
+  def from_str(r: str) -> 'CacheKey':
     r_split = r.split('-')
     ck = CacheKey(*r_split)
+    # Avoid double obfuscation.
     ck.var = r_split[0]
     return ck
 
-  def __repr__(self):
+  @staticmethod
+  def from_pcoll(pcoll_name: str, pcoll: beam.pvalue.PCollection) -> 'CacheKey':
+    return CacheKey(
+        pcoll_name,
+        str(id(pcoll)),
+        str(id(pcoll.producer)),
+        str(id(pcoll.pipeline)))
+
+  def to_str(self):
     return '-'.join(
         [self.var, self.version, self.producer_version, self.pipeline_id])
+
+  def __repr__(self):
+    return self.to_str()
+
+  def __str__(self):
+    return self.to_str()

diff --git a/sdks/python/apache_beam/runners/interactive/caching/read_cache.py b/sdks/python/apache_beam/runners/interactive/caching/read_cache.py
index b23681d..cf0859d 100644
--- a/sdks/python/apache_beam/runners/interactive/caching/read_cache.py
+++ b/sdks/python/apache_beam/runners/interactive/caching/read_cache.py

@@ -27,6 +27,7 @@
 from apache_beam.portability.api import beam_runner_api_pb2
 from apache_beam.runners.interactive import cache_manager as cache
 from apache_beam.runners.interactive.caching.cacheable import Cacheable
+from apache_beam.runners.interactive.caching.reify import unreify_from_cache
 from apache_beam.runners.pipeline_context import PipelineContext
 from apache_beam.transforms.ptransform import PTransform
 
@@ -45,7 +46,6 @@
     self._cache_manager = cache_manager
     self._cacheable = cacheable
     self._key = repr(cacheable.to_key())
-    self._label = '{}{}'.format('_cache_', self._key)
 
   def read_cache(self) -> Tuple[str, str]:
     """Reads cache of the cacheable PCollection and wires the cache into the
@@ -119,28 +119,22 @@
 
   def _build_runner_api_template(
       self) -> Tuple[beam_runner_api_pb2.Pipeline, beam.pvalue.PCollection]:
-    transform = _ReadCacheTransform(self._cache_manager, self._key, self._label)
+    transform = _ReadCacheTransform(self._cache_manager, self._key)
     tmp_pipeline = beam.Pipeline()
     tmp_pipeline.component_id_map = self._context.component_id_map
-    read_output = tmp_pipeline | 'source' + self._label >> transform
+    read_output = tmp_pipeline | 'source_cache_' >> transform
     return tmp_pipeline.to_runner_api(), read_output
 
 
 class _ReadCacheTransform(PTransform):
   """A composite transform encapsulates reading cache of PCollections.
   """
-  def __init__(self, cache_manager: cache.CacheManager, key: str, label: str):
+  def __init__(self, cache_manager: cache.CacheManager, key: str):
     self._cache_manager = cache_manager
     self._key = key
-    self._label = label
 
   def expand(self, pcoll: beam.pvalue.PCollection) -> beam.pvalue.PCollection:
-    class Unreify(beam.DoFn):
-      def process(self, e):
-        yield e.windowed_value
-
-    return (
-        pcoll.pipeline
-        |
-        'read' + self._label >> cache.ReadCache(self._cache_manager, self._key)
-        | 'unreify' + self._label >> beam.ParDo(Unreify()))
+    return unreify_from_cache(
+        pipeline=pcoll.pipeline,
+        cache_key=self._key,
+        cache_manager=self._cache_manager)

diff --git a/sdks/python/apache_beam/runners/interactive/caching/read_cache_test.py b/sdks/python/apache_beam/runners/interactive/caching/read_cache_test.py
index aa2ed20..d32c265 100644
--- a/sdks/python/apache_beam/runners/interactive/caching/read_cache_test.py
+++ b/sdks/python/apache_beam/runners/interactive/caching/read_cache_test.py

@@ -69,9 +69,8 @@
     actual_pipeline = pipeline_proto
 
     # Read cache directly on the pipeline instance.
-    label = '{}{}'.format('_cache_', key)
-    transform = read_cache._ReadCacheTransform(aug_p._cache_manager, key, label)
-    p | 'source' + label >> transform
+    transform = read_cache._ReadCacheTransform(aug_p._cache_manager, key)
+    p | 'source_cache_' + key >> transform
     expected_pipeline = p.to_runner_api()
 
     # This rougly checks the equivalence between two protos, not detailed

diff --git a/sdks/python/apache_beam/runners/interactive/caching/reify.py b/sdks/python/apache_beam/runners/interactive/caching/reify.py
new file mode 100644
index 0000000..ce82785
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/caching/reify.py

@@ -0,0 +1,117 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Module for transforms that reifies and unreifies PCollection values with
+window info.
+
+For internal use only; no backwards-compatibility guarantees.
+"""
+
+# pytype: skip-file
+
+from typing import Optional
+
+import apache_beam as beam
+from apache_beam.runners.interactive import cache_manager as cache
+from apache_beam.testing import test_stream
+from apache_beam.transforms.window import WindowedValue
+
+READ_CACHE = 'ReadCache_'
+WRITE_CACHE = 'WriteCache_'
+
+
+class Reify(beam.DoFn):
+  """Reifies elements with window info into windowed values.
+
+  Internally used to capture window info with each element into cache for
+  replayability.
+  """
+  def process(
+      self,
+      e,
+      w=beam.DoFn.WindowParam,
+      p=beam.DoFn.PaneInfoParam,
+      t=beam.DoFn.TimestampParam):
+    yield test_stream.WindowedValueHolder(WindowedValue(e, t, [w], p))
+
+
+class Unreify(beam.DoFn):
+  """Unreifies elements from windowed values.
+
+  Cached values are elements with window info. This unpacks the elements.
+  """
+  def process(self, e):
+    # Row coder was used when encoding windowed values.
+    if isinstance(e, beam.Row) and hasattr(e, 'windowed_value'):
+      yield e.windowed_value
+
+
+def reify_to_cache(
+    pcoll: beam.pvalue.PCollection,
+    cache_key: str,
+    cache_manager: cache.CacheManager,
+    reify_label: Optional[str] = None,
+    write_cache_label: Optional[str] = None,
+    is_capture: bool = False) -> beam.pvalue.PValue:
+  """Reifies elements into windowed values and write to cache.
+
+  Args:
+    pcoll: The PCollection to be cached.
+    cache_key: The key of the cache.
+    cache_manager: The cache manager to manage the cache.
+    reify_label: (optional) A transform label for the Reify transform.
+    write_cache_label: (optional) A transform label for the cache-writing
+      transform.
+    is_capture: Whether the cache is capturing a record of recordable sources.
+  """
+  if not reify_label:
+    reify_label = '{}{}{}'.format('ReifyBefore_', WRITE_CACHE, cache_key)
+  if not write_cache_label:
+    write_cache_label = '{}{}'.format(WRITE_CACHE, cache_key)
+  return (
+      pcoll | reify_label >> beam.ParDo(Reify())
+      | write_cache_label >> cache.WriteCache(
+          cache_manager, cache_key, is_capture=is_capture))
+
+
+def unreify_from_cache(
+    pipeline: beam.Pipeline,
+    cache_key: str,
+    cache_manager: cache.CacheManager,
+    element_type: Optional[type] = None,
+    source_label: Optional[str] = None,
+    unreify_label: Optional[str] = None) -> beam.pvalue.PCollection:
+  """Reads from cache and unreifies elements from windowed values.
+
+  pipeline: The pipeline that's reading from the cache.
+  cache_key: The key of the cache.
+  cache_manager: The cache manager to manage the cache.
+  element_type: (optional) The element type of the PCollection's elements.
+  source_label: (optional) A transform label for the cache-reading transform.
+  unreify_label: (optional) A transform label for the Unreify transform.
+  """
+  if not source_label:
+    source_label = '{}{}'.format(READ_CACHE, cache_key)
+  if not unreify_label:
+    unreify_label = '{}{}{}'.format('UnreifyAfter_', READ_CACHE, cache_key)
+  read_cache = pipeline | source_label >> cache.ReadCache(
+      cache_manager, cache_key)
+  if element_type:
+    # If the PCollection is schema-aware, explicitly sets the output types.
+    return read_cache | unreify_label >> beam.ParDo(
+        Unreify()).with_output_types(element_type)
+  return read_cache | unreify_label >> beam.ParDo(Unreify())

diff --git a/sdks/python/apache_beam/runners/interactive/caching/write_cache.py b/sdks/python/apache_beam/runners/interactive/caching/write_cache.py
index 94effdf..d398e70 100644
--- a/sdks/python/apache_beam/runners/interactive/caching/write_cache.py
+++ b/sdks/python/apache_beam/runners/interactive/caching/write_cache.py

@@ -27,10 +27,9 @@
 from apache_beam.portability.api import beam_runner_api_pb2
 from apache_beam.runners.interactive import cache_manager as cache
 from apache_beam.runners.interactive.caching.cacheable import Cacheable
+from apache_beam.runners.interactive.caching.reify import reify_to_cache
 from apache_beam.runners.pipeline_context import PipelineContext
-from apache_beam.testing import test_stream
 from apache_beam.transforms.ptransform import PTransform
-from apache_beam.transforms.window import WindowedValue
 
 
 class WriteCache:
@@ -46,8 +45,7 @@
     self._context = context
     self._cache_manager = cache_manager
     self._cacheable = cacheable
-    self._key = repr(cacheable.to_key())
-    self._label = '{}{}'.format('_cache_', self._key)
+    self._key = cacheable.to_key().to_str()
 
   def write_cache(self) -> None:
     """Writes cache for the cacheable PCollection that is being computed.
@@ -129,35 +127,21 @@
   def _build_runner_api_template(
       self) -> Tuple[beam_runner_api_pb2.Pipeline, '_PCollectionPlaceHolder']:
     pph = _PCollectionPlaceHolder(self._cacheable.pcoll, self._context)
-    transform = _WriteCacheTransform(
-        self._cache_manager, self._key, self._label)
-    _ = pph.placeholder_pcoll | 'sink' + self._label >> transform
+    transform = _WriteCacheTransform(self._cache_manager, self._key)
+    _ = pph.placeholder_pcoll | 'sink_cache_' + self._key >> transform
     return pph.placeholder_pcoll.pipeline.to_runner_api(), pph
 
 
 class _WriteCacheTransform(PTransform):
   """A composite transform encapsulates writing cache for PCollections.
   """
-  def __init__(self, cache_manager: cache.CacheManager, key: str, label: str):
+  def __init__(self, cache_manager: cache.CacheManager, key: str):
     self._cache_manager = cache_manager
     self._key = key
-    self._label = label
 
-  def expand(self, pcoll: beam.pvalue.PCollection) -> beam.pvalue.PCollection:
-    class Reify(beam.DoFn):
-      def process(
-          self,
-          e,
-          w=beam.DoFn.WindowParam,
-          p=beam.DoFn.PaneInfoParam,
-          t=beam.DoFn.TimestampParam):
-        yield test_stream.WindowedValueHolder(WindowedValue(e, t, [w], p))
-
-    return (
-        pcoll
-        | 'reify' + self._label >> beam.ParDo(Reify())
-        | 'write' + self._label >> cache.WriteCache(
-            self._cache_manager, self._key, is_capture=False))
+  def expand(self, pcoll: beam.pvalue.PCollection) -> beam.pvalue.PValue:
+    return reify_to_cache(
+        pcoll=pcoll, cache_key=self._key, cache_manager=self._cache_manager)
 
 
 class _PCollectionPlaceHolder:

diff --git a/sdks/python/apache_beam/runners/interactive/caching/write_cache_test.py b/sdks/python/apache_beam/runners/interactive/caching/write_cache_test.py
index af8dc7b..588efdc 100644
--- a/sdks/python/apache_beam/runners/interactive/caching/write_cache_test.py
+++ b/sdks/python/apache_beam/runners/interactive/caching/write_cache_test.py

@@ -57,10 +57,8 @@
     actual_pipeline = pipeline_proto
 
     # Write cache directly on the piepline instance.
-    label = '{}{}'.format('_cache_', key)
-    transform = write_cache._WriteCacheTransform(
-        aug_p._cache_manager, key, label)
-    _ = pcoll | 'sink' + label >> transform
+    transform = write_cache._WriteCacheTransform(aug_p._cache_manager, key)
+    _ = pcoll | 'sink_cache_' + key >> transform
     expected_pipeline = p.to_runner_api()
 
     assert_pipeline_proto_equal(self, expected_pipeline, actual_pipeline)

diff --git a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
index 89800d8..9e071fe 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py

@@ -30,6 +30,7 @@
 
 from dateutil import tz
 
+import apache_beam as beam
 from apache_beam.runners.interactive import interactive_environment as ie
 from apache_beam.runners.interactive.utils import elements_to_df
 from apache_beam.transforms.window import GlobalWindow
@@ -236,6 +237,22 @@
   return None
 
 
+def visualize_computed_pcoll(
+    pcoll_name: str, pcoll: beam.pvalue.PCollection) -> None:
+  """A simple visualize alternative.
+
+  When the pcoll_name and pcoll pair identifies a watched and computed
+  PCollection in the current interactive environment without ambiguity, an
+  ElementStream can be built directly from cache.
+  """
+  pipeline = ie.current_env().user_pipeline(pcoll.pipeline)
+  rm = ie.current_env().get_recording_manager(pipeline, create_if_absent=True)
+  stream = rm.read(
+      pcoll_name, pcoll, max_n=float('inf'), max_duration_secs=float('inf'))
+  if stream:
+    visualize(stream, element_type=pcoll.element_type)
+
+
 class PCollectionVisualization(object):
   """A visualization of a PCollection.
 

diff --git a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
index d084569..a94113d 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py

@@ -182,8 +182,8 @@
       for pcoll_id in transform.outputs.values():
         pcoll_node = None
         if self._pipeline_instrument:
-          pcoll_node = self._pipeline_instrument.cacheable_var_by_pcoll_id(
-              pcoll_id)
+          cacheable = self._pipeline_instrument.cacheables.get(pcoll_id)
+          pcoll_node = cacheable.var if cacheable else None
         # If no PipelineInstrument is available or the PCollection is not
         # watched.
         if not pcoll_node:

diff --git a/sdks/python/apache_beam/runners/interactive/interactive_environment.py b/sdks/python/apache_beam/runners/interactive/interactive_environment.py
index 102f10a..e22dee7 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_environment.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_environment.py

@@ -201,6 +201,10 @@
     # A singleton inspector instance to message information of current
     # environment to other applications.
     self._inspector = InteractiveEnvironmentInspector()
+    # A similar singleton inspector except it includes synthetic variables
+    # generated by Interactive Beam.
+    self._inspector_with_synthetic = InteractiveEnvironmentInspector(
+        ignore_synthetic=False)
 
   @property
   def options(self):
@@ -235,9 +239,16 @@
   @property
   def inspector(self):
     """Gets the singleton InteractiveEnvironmentInspector to retrieve
-    information consumable by other applications."""
+    information consumable by other applications such as a notebook
+    extension."""
     return self._inspector
 
+  @property
+  def inspector_with_synthetic(self):
+    """Gets the singleton InteractiveEnvironmentInspector with additional
+    synthetic variables generated by Interactive Beam. Internally used."""
+    return self._inspector_with_synthetic
+
   def cleanup(self, pipeline=None):
     """Cleans up cached states for the given pipeline. Noop if the given
     pipeline is absent from the environment. Cleans up for all pipelines
@@ -560,6 +571,8 @@
     """Evicts the user pipeline and its derived pipelines."""
     if user_pipeline:
       self._tracked_user_pipelines.evict(user_pipeline)
+    else:
+      self._tracked_user_pipelines.clear()
 
   def pipeline_id_to_pipeline(self, pid):
     """Converts a pipeline id to a user pipeline.

diff --git a/sdks/python/apache_beam/runners/interactive/interactive_runner.py b/sdks/python/apache_beam/runners/interactive/interactive_runner.py
index 68a66e0..4778737 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_runner.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_runner.py

@@ -34,6 +34,7 @@
 from apache_beam.runners.interactive.display import pipeline_graph
 from apache_beam.runners.interactive.options import capture_control
 from apache_beam.runners.interactive.utils import to_element_list
+from apache_beam.runners.interactive.utils import watch_sources
 from apache_beam.testing.test_stream_service import TestStreamServiceController
 
 # size of PCollection samples cached.
@@ -129,7 +130,7 @@
       ie.current_env().evict_computed_pcollections()
 
     # Make sure that sources without a user reference are still cached.
-    inst.watch_sources(pipeline)
+    watch_sources(pipeline)
 
     user_pipeline = ie.current_env().user_pipeline(pipeline)
     pipeline_instrument = inst.build_pipeline_instrument(pipeline, options)

diff --git a/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py b/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py
index ed3dc51..db53e87 100644
--- a/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py
+++ b/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py

@@ -36,16 +36,17 @@
   list_inspectables first then communicates back to the kernel and get_val for
   usage on the kernel side.
   """
-  def __init__(self):
+  def __init__(self, ignore_synthetic=True):
     self._inspectables = {}
     self._anonymous = {}
     self._inspectable_pipelines = set()
+    self._ignore_synthetic = ignore_synthetic
 
   @property
   def inspectables(self):
     """Lists pipelines and pcollections assigned to variables as inspectables.
     """
-    self._inspectables = inspect()
+    self._inspectables = inspect(self._ignore_synthetic)
     return self._inspectables
 
   @property
@@ -136,7 +137,7 @@
     return {}
 
 
-def inspect():
+def inspect(ignore_synthetic=True):
   """Inspects current interactive environment to track metadata and values of
   pipelines and pcollections.
 
@@ -148,7 +149,7 @@
   for watching in ie.current_env().watching():
     for name, value in watching:
       # Ignore synthetic vars created by Interactive Beam itself.
-      if name.startswith('synthetic_var_'):
+      if ignore_synthetic and name.startswith('synthetic_var_'):
         continue
       metadata = meta(name, value)
       identifier = obfuscate(metadata)

diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py b/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py
index 6c0a922..7564a76 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py

@@ -65,7 +65,7 @@
     self._runner_pipeline = self._build_runner_pipeline()
     _, self._context = self._runner_pipeline.to_runner_api(return_context=True)
     from apache_beam.runners.interactive import pipeline_instrument as instr
-    self._runner_pcoll_to_id = instr.pcolls_to_pcoll_id(
+    self._runner_pcoll_to_id = instr.pcoll_to_pcoll_id(
         self._runner_pipeline, self._context)
     # Correlate components in the runner pipeline to components in the user
     # pipeline. The target pcolls are the pcolls given and defined in the user

diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py
index 448fca7..065d555 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py

@@ -23,20 +23,24 @@
 """
 # pytype: skip-file
 
+import logging
+from typing import Dict
+
 import apache_beam as beam
 from apache_beam.pipeline import PipelineVisitor
 from apache_beam.portability.api import beam_runner_api_pb2
-from apache_beam.runners.interactive import cache_manager as cache
 from apache_beam.runners.interactive import interactive_environment as ie
 from apache_beam.runners.interactive import pipeline_fragment as pf
 from apache_beam.runners.interactive import background_caching_job
+from apache_beam.runners.interactive import utils
 from apache_beam.runners.interactive.caching.cacheable import Cacheable
 from apache_beam.runners.interactive.caching.cacheable import CacheKey
+from apache_beam.runners.interactive.caching.reify import WRITE_CACHE
+from apache_beam.runners.interactive.caching.reify import reify_to_cache
+from apache_beam.runners.interactive.caching.reify import unreify_from_cache
 from apache_beam.testing import test_stream
-from apache_beam.transforms.window import WindowedValue
 
-READ_CACHE = "_ReadCache_"
-WRITE_CACHE = "_WriteCache_"
+_LOGGER = logging.getLogger(__name__)
 
 
 class PipelineInstrument(object):
@@ -74,20 +78,13 @@
      context) = self._pipeline.to_runner_api(return_context=True)
 
     # All compute-once-against-original-pipeline fields.
-    self._unbounded_sources = unbounded_sources(
+    self._unbounded_sources = utils.unbounded_sources(
         self._background_caching_pipeline)
-    # TODO(BEAM-7760): once cache scope changed, this is not needed to manage
-    # relationships across pipelines, runners, and jobs.
-    self._pcolls_to_pcoll_id = pcolls_to_pcoll_id(self._pipeline, context)
+    self._pcoll_to_pcoll_id = pcoll_to_pcoll_id(self._pipeline, context)
 
-    # A mapping from PCollection id to python id() value in user defined
-    # pipeline instance.
-    (
-        self._pcoll_version_map,
-        self._cacheables,
-        # A dict from pcoll_id to variable name of the referenced PCollection.
-        # (Dict[str, str])
-        self._cacheable_var_by_pcoll_id) = cacheables(self.pcolls_to_pcoll_id)
+    # A Dict[str, Cacheable] from a PCollection id to a Cacheable that belongs
+    # to the analyzed pipeline.
+    self._cacheables = self.find_cacheables()
 
     # A dict from cache key to PCollection that is read from cache.
     # If exists, caller should reuse the PCollection read. If not, caller
@@ -280,7 +277,7 @@
         return_context=True)
 
     # Get all the sources we want to cache.
-    sources = unbounded_sources(self._background_caching_pipeline)
+    sources = utils.unbounded_sources(self._background_caching_pipeline)
 
     # Get all the root transforms. The caching transforms will be subtransforms
     # of one of these roots.
@@ -337,27 +334,20 @@
     return pipeline_to_execute
 
   @property
-  def has_unbounded_sources(self):
-    """Returns whether the pipeline has any recordable sources.
-    """
-    return len(self._unbounded_sources) > 0
+  def cacheables(self) -> Dict[str, Cacheable]:
+    """Returns the Cacheables by PCollection ids.
 
-  @property
-  def cacheables(self):
-    """Finds cacheable PCollections from the pipeline.
-
-    The function only treats the result as cacheables since there is no
-    guarantee whether PCollections that need to be cached have been cached or
-    not. A PCollection needs to be cached when it's bound to a user defined
-    variable in the source code. Otherwise, the PCollection is not reusable
-    nor introspectable which nullifies the need of cache.
+    If you're already working with user defined pipelines and PCollections,
+    do not build a PipelineInstrument just to get the cacheables. Instead,
+    use apache_beam.runners.interactive.utils.cacheables.
     """
     return self._cacheables
 
   @property
-  def pcolls_to_pcoll_id(self):
-    """Returns a dict mapping str(PCollection)s to IDs."""
-    return self._pcolls_to_pcoll_id
+  def has_unbounded_sources(self):
+    """Returns whether the pipeline has any recordable sources.
+    """
+    return len(self._unbounded_sources) > 0
 
   @property
   def original_pipeline_proto(self):
@@ -384,6 +374,27 @@
     pipeline to instances in the user pipeline."""
     return self._runner_pcoll_to_user_pcoll
 
+  def find_cacheables(self) -> Dict[str, Cacheable]:
+    """Finds PCollections that need to be cached for analyzed pipeline.
+
+    There might be multiple pipelines defined and watched, this will only find
+    cacheables belong to the analyzed pipeline.
+    """
+    result = {}
+    cacheables = utils.cacheables()
+    for _, cacheable in cacheables.items():
+      if cacheable.pcoll.pipeline is not self._user_pipeline:
+        # Ignore all cacheables from other pipelines.
+        continue
+      pcoll_id = self.pcoll_id(cacheable.pcoll)
+      if not pcoll_id:
+        _LOGGER.debug(
+            'Unable to retrieve PCollection id for %s. Ignored.',
+            cacheable.pcoll)
+        continue
+      result[self.pcoll_id(cacheable.pcoll)] = cacheable
+    return result
+
   def instrument(self):
     """Instruments original pipeline with cache.
 
@@ -418,13 +429,13 @@
 
     v = InstrumentVisitor(self)
     self._pipeline.visit(v)
-
     # Every output PCollection that is never used as an input PCollection is
     # considered as a side effect of the pipeline run and should be included.
     self._extended_targets.update(all_outputs.difference(all_inputs))
-    # Add the unbounded source pcollections to the cacheable inputs. This allows
+    # Add the unbounded source PCollections to the cacheable inputs. This allows
     # for the caching of unbounded sources without a variable reference.
     cacheable_inputs.update(unbounded_source_pcolls)
+
     # Create ReadCache transforms.
     for cacheable_input in cacheable_inputs:
       self._read_cache(
@@ -435,7 +446,7 @@
     self._replace_with_cached_inputs(self._pipeline)
 
     # Write cache for all cacheables.
-    for _, cacheable in self.cacheables.items():
+    for _, cacheable in self._cacheables.items():
       self._write_cache(
           self._pipeline, cacheable.pcoll, ignore_unbounded_reads=True)
 
@@ -499,13 +510,13 @@
           self._process(out_pcoll)
 
       def _process(self, pcoll):
-        pcoll_id = self._pin.pcolls_to_pcoll_id.get(str(pcoll), '')
-        if pcoll_id in self._pin._pcoll_version_map:
-          cacheable_key = self._pin._cacheable_key(pcoll)
-          user_pcoll = self._pin.cacheables[cacheable_key].pcoll
-          if (cacheable_key in self._pin.cacheables and user_pcoll != pcoll):
+        pcoll_id = self._pin._pcoll_to_pcoll_id.get(str(pcoll), '')
+        if pcoll_id in self._pin._cacheables:
+          pcoll_id = self._pin.pcoll_id(pcoll)
+          user_pcoll = self._pin._cacheables[pcoll_id].pcoll
+          if (pcoll_id in self._pin._cacheables and user_pcoll != pcoll):
             self._pin._runner_pcoll_to_user_pcoll[pcoll] = user_pcoll
-            self._pin.cacheables[cacheable_key].pcoll = pcoll
+            self._pin._cacheables[pcoll_id].pcoll = pcoll
 
     v = PreprocessVisitor(self)
     self._pipeline.visit(v)
@@ -552,29 +563,17 @@
     key = self.cache_key(pcoll)
     # Only need to write when the cache with expected key doesn't exist.
     if not self._cache_manager.exists('full', key):
-      label = '{}{}'.format(WRITE_CACHE, key)
-
       self.cached_pcolls.add(self.runner_pcoll_to_user_pcoll.get(pcoll, pcoll))
-
       # Read the windowing information and cache it along with the element. This
       # caches the arguments to a WindowedValue object because Python has logic
       # that detects if a DoFn returns a WindowedValue. When it detecs one, it
       # puts the element into the correct window then emits the value to
       # downstream transforms.
-      class Reify(beam.DoFn):
-        def process(
-            self,
-            e,
-            w=beam.DoFn.WindowParam,
-            p=beam.DoFn.PaneInfoParam,
-            t=beam.DoFn.TimestampParam):
-          yield test_stream.WindowedValueHolder(WindowedValue(e, t, [w], p))
-
-      extended_target = (
-          pcoll
-          | label + 'reify' >> beam.ParDo(Reify())
-          | label >> cache.WriteCache(
-              self._cache_manager, key, is_capture=is_capture))
+      extended_target = reify_to_cache(
+          pcoll=pcoll,
+          cache_key=key,
+          cache_manager=self._cache_manager,
+          is_capture=is_capture)
       if output_as_extended_target:
         self._extended_targets.add(extended_target)
 
@@ -606,15 +605,8 @@
 
         # To put the cached value into the correct window, simply return a
         # WindowedValue constructed from the element.
-        class Unreify(beam.DoFn):
-          def process(self, e):
-            yield e.windowed_value
-
-        pcoll_from_cache = (
-            pipeline
-            | '{}{}'.format(READ_CACHE, key) >> cache.ReadCache(
-                self._cache_manager, key)
-            | '{}{}unreify'.format(READ_CACHE, key) >> beam.ParDo(Unreify()))
+        pcoll_from_cache = unreify_from_cache(
+            pipeline=pipeline, cache_key=key, cache_manager=self._cache_manager)
         self._cached_pcoll_read[key] = pcoll_from_cache
     # else: NOOP when cache doesn't exist, just compute the original graph.
 
@@ -704,7 +696,7 @@
   def _cacheable_inputs(self, transform):
     inputs = set()
     for in_pcoll in transform.inputs:
-      if self._cacheable_key(in_pcoll) in self.cacheables:
+      if self.pcoll_id(in_pcoll) in self._cacheables:
         inputs.add(in_pcoll)
     return inputs
 
@@ -717,50 +709,35 @@
       outputs.add(out_pcoll)
     return inputs, outputs
 
-  def _cacheable_key(self, pcoll):
-    """Gets the key a cacheable PCollection is tracked within the instrument."""
-    return cacheable_key(
-        pcoll, self.pcolls_to_pcoll_id, self._pcoll_version_map)
+  def pcoll_id(self, pcoll):
+    """Gets the PCollection id of the given pcoll.
+
+    Returns '' if not found.
+    """
+    return self._pcoll_to_pcoll_id.get(str(pcoll), '')
 
   def cache_key(self, pcoll):
     """Gets the identifier of a cacheable PCollection in cache.
 
     If the pcoll is not a cacheable, return ''.
+    This is only needed in pipeline instrument when the origin of given pcoll
+    is unknown (whether it's from the user pipeline or a runner pipeline). If
+    a pcoll is from the user pipeline, always use CacheKey.from_pcoll to build
+    the key.
     The key is what the pcoll would use as identifier if it's materialized in
     cache. It doesn't mean that there would definitely be such cache already.
     Also, the pcoll can come from the original user defined pipeline object or
     an equivalent pcoll from a transformed copy of the original pipeline.
-
-    'pcoll_id' of cacheable is not stable for cache_key, thus not included in
-    cache key. A combination of 'var', 'version' and 'producer_version' is
-    sufficient to identify a cached PCollection.
     """
-    cacheable = self.cacheables.get(self._cacheable_key(pcoll), None)
+    cacheable = self._cacheables.get(self.pcoll_id(pcoll), None)
     if cacheable:
       if cacheable.pcoll in self.runner_pcoll_to_user_pcoll:
         user_pcoll = self.runner_pcoll_to_user_pcoll[cacheable.pcoll]
       else:
         user_pcoll = cacheable.pcoll
-
-      return repr(
-          CacheKey(
-              cacheable.var,
-              cacheable.version,
-              cacheable.producer_version,
-              str(id(user_pcoll.pipeline))))
+      return CacheKey.from_pcoll(cacheable.var, user_pcoll).to_str()
     return ''
 
-  def cacheable_var_by_pcoll_id(self, pcoll_id):
-    """Retrieves the variable name of a PCollection.
-
-    In source code, PCollection variables are defined in the user pipeline. When
-    it's converted to the runner api representation, each PCollection referenced
-    in the user pipeline is assigned a unique-within-pipeline pcoll_id. Given
-    such pcoll_id, retrieves the str variable name defined in user pipeline for
-    that referenced PCollection. If the PCollection is not watched, return None.
-    """
-    return self._cacheable_var_by_pcoll_id.get(pcoll_id, None)
-
 
 def build_pipeline_instrument(pipeline, options=None):
   """Creates PipelineInstrument for a pipeline and its options with cache.
@@ -782,83 +759,7 @@
   return pi
 
 
-def cacheables(pcolls_to_pcoll_id):
-  """Finds PCollections that need to be cached for analyzed PCollections.
-
-  The function only treats the result as cacheables since there is no guarantee
-  whether PCollections that need to be cached have been cached or not. A
-  PCollection needs to be cached when it's bound to a user defined variable in
-  the source code. Otherwise, the PCollection is not reusable nor introspectable
-  which nullifies the need of cache. There might be multiple pipelines defined
-  and watched, this will only return for PCollections with pcolls_to_pcoll_id
-  analyzed. The check is not strict because pcoll_id is not unique across
-  multiple pipelines. Additional check needs to be done during instrument.
-  """
-  pcoll_version_map = {}
-  cacheables = {}
-  cacheable_var_by_pcoll_id = {}
-  for watching in ie.current_env().watching():
-    for key, val in watching:
-      if isinstance(val, beam.pvalue.PCollection):
-        pcoll_id = pcolls_to_pcoll_id.get(str(val), None)
-        # It's highly possible that PCollection str is not unique across
-        # multiple pipelines, further check during instrument is needed.
-        if not pcoll_id:
-          continue
-
-        cacheable = Cacheable(
-            pcoll_id=pcoll_id,
-            var=key,
-            version=str(id(val)),
-            pcoll=val,
-            producer_version=str(id(val.producer)))
-        pcoll_version_map[cacheable.pcoll_id] = cacheable.version
-        cacheables[cacheable_key(val, pcolls_to_pcoll_id)] = cacheable
-        cacheable_var_by_pcoll_id[cacheable.pcoll_id] = key
-
-  return pcoll_version_map, cacheables, cacheable_var_by_pcoll_id
-
-
-def cacheable_key(pcoll, pcolls_to_pcoll_id, pcoll_version_map=None):
-  pcoll_version = str(id(pcoll))
-  pcoll_id = pcolls_to_pcoll_id.get(str(pcoll), '')
-  if pcoll_version_map:
-    original_pipeline_pcoll_version = pcoll_version_map.get(pcoll_id, None)
-    if original_pipeline_pcoll_version:
-      pcoll_version = original_pipeline_pcoll_version
-  return '_'.join((pcoll_version, pcoll_id))
-
-
-def has_unbounded_sources(pipeline):
-  """Checks if a given pipeline has recordable sources."""
-  return len(unbounded_sources(pipeline)) > 0
-
-
-def unbounded_sources(pipeline):
-  """Returns a pipeline's recordable sources."""
-  class CheckUnboundednessVisitor(PipelineVisitor):
-    """Visitor checks if there are any unbounded read sources in the Pipeline.
-
-    Visitor visits all nodes and checks if it is an instance of recordable
-    sources.
-    """
-    def __init__(self):
-      self.unbounded_sources = []
-
-    def enter_composite_transform(self, transform_node):
-      self.visit_transform(transform_node)
-
-    def visit_transform(self, transform_node):
-      if isinstance(transform_node.transform,
-                    tuple(ie.current_env().options.recordable_sources)):
-        self.unbounded_sources.append(transform_node)
-
-  v = CheckUnboundednessVisitor()
-  pipeline.visit(v)
-  return v.unbounded_sources
-
-
-def pcolls_to_pcoll_id(pipeline, original_context):
+def pcoll_to_pcoll_id(pipeline, original_context):
   """Returns a dict mapping PCollections string to PCollection IDs.
 
   Using a PipelineVisitor to iterate over every node in the pipeline,
@@ -878,42 +779,16 @@
     results in validation errors.
     """
     def __init__(self):
-      self.pcolls_to_pcoll_id = {}
+      self.pcoll_to_pcoll_id = {}
 
     def enter_composite_transform(self, transform_node):
       self.visit_transform(transform_node)
 
     def visit_transform(self, transform_node):
       for pcoll in transform_node.outputs.values():
-        self.pcolls_to_pcoll_id[str(pcoll)] = (
+        self.pcoll_to_pcoll_id[str(pcoll)] = (
             original_context.pcollections.get_id(pcoll))
 
   v = PCollVisitor()
   pipeline.visit(v)
-  return v.pcolls_to_pcoll_id
-
-
-def watch_sources(pipeline):
-  """Watches the unbounded sources in the pipeline.
-
-  Sources can output to a PCollection without a user variable reference. In
-  this case the source is not cached. We still want to cache the data so we
-  synthetically create a variable to the intermediate PCollection.
-  """
-
-  retrieved_user_pipeline = ie.current_env().user_pipeline(pipeline)
-
-  class CacheableUnboundedPCollectionVisitor(PipelineVisitor):
-    def __init__(self):
-      self.unbounded_pcolls = set()
-
-    def enter_composite_transform(self, transform_node):
-      self.visit_transform(transform_node)
-
-    def visit_transform(self, transform_node):
-      if isinstance(transform_node.transform,
-                    tuple(ie.current_env().options.recordable_sources)):
-        for pcoll in transform_node.outputs.values():
-          ie.current_env().watch({'synthetic_var_' + str(id(pcoll)): pcoll})
-
-  retrieved_user_pipeline.visit(CacheableUnboundedPCollectionVisitor())
+  return v.pcoll_to_pcoll_id

diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py b/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py
index a3f91c0..bba315d 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_instrument_test.py

@@ -18,7 +18,6 @@
 """Tests for apache_beam.runners.interactive.pipeline_instrument."""
 # pytype: skip-file
 
-import tempfile
 import unittest
 
 import apache_beam as beam
@@ -29,6 +28,9 @@
 from apache_beam.runners.interactive import interactive_environment as ie
 from apache_beam.runners.interactive import pipeline_instrument as instr
 from apache_beam.runners.interactive import interactive_runner
+from apache_beam.runners.interactive import utils
+from apache_beam.runners.interactive.caching.cacheable import Cacheable
+from apache_beam.runners.interactive.caching.cacheable import CacheKey
 from apache_beam.runners.interactive.caching.streaming_cache import StreamingCache
 from apache_beam.runners.interactive.testing.pipeline_assertion import assert_pipeline_equal
 from apache_beam.runners.interactive.testing.pipeline_assertion import assert_pipeline_proto_contain_top_level_transform
@@ -44,38 +46,32 @@
     ie.new_env()
 
   def cache_key_of(self, name, pcoll):
-    return repr(
-        instr.CacheKey(
-            name,
-            str(id(pcoll)),
-            str(id(pcoll.producer)),
-            str(id(pcoll.pipeline))))
+    return CacheKey.from_pcoll(name, pcoll).to_str()
 
-  def test_pcolls_to_pcoll_id(self):
+  def test_pcoll_to_pcoll_id(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     ie.current_env().set_cache_manager(InMemoryCache(), p)
     # pylint: disable=range-builtin-not-iterating
     init_pcoll = p | 'Init Create' >> beam.Impulse()
     _, ctx = p.to_runner_api(return_context=True)
     self.assertEqual(
-        instr.pcolls_to_pcoll_id(p, ctx),
+        instr.pcoll_to_pcoll_id(p, ctx),
         {str(init_pcoll): 'ref_PCollection_PCollection_1'})
 
-  def test_cacheable_key_without_version_map(self):
-    p = beam.Pipeline(interactive_runner.InteractiveRunner())
-    ie.current_env().set_cache_manager(InMemoryCache(), p)
-    # pylint: disable=range-builtin-not-iterating
-    init_pcoll = p | 'Init Create' >> beam.Create(range(10))
-    _, ctx = p.to_runner_api(return_context=True)
+  def test_pcoll_id_with_user_pipeline(self):
+    p_id_user = beam.Pipeline(interactive_runner.InteractiveRunner())
+    ie.current_env().set_cache_manager(InMemoryCache(), p_id_user)
+    init_pcoll = p_id_user | 'Init Create' >> beam.Create([1, 2, 3])
+    instrumentation = instr.build_pipeline_instrument(p_id_user)
     self.assertEqual(
-        instr.cacheable_key(init_pcoll, instr.pcolls_to_pcoll_id(p, ctx)),
-        str(id(init_pcoll)) + '_ref_PCollection_PCollection_8')
+        instrumentation.pcoll_id(init_pcoll), 'ref_PCollection_PCollection_8')
 
-  def test_cacheable_key_with_version_map(self):
-    p = beam.Pipeline(interactive_runner.InteractiveRunner())
-    ie.current_env().set_cache_manager(InMemoryCache(), p)
-    # pylint: disable=range-builtin-not-iterating
-    init_pcoll = p | 'Init Create' >> beam.Create(range(10))
+  def test_pcoll_id_with_runner_pipeline(self):
+    p_id_runner = beam.Pipeline(interactive_runner.InteractiveRunner())
+    ie.current_env().set_cache_manager(InMemoryCache(), p_id_runner)
+    # pylint: disable=possibly-unused-variable
+    init_pcoll = p_id_runner | 'Init Create' >> beam.Create([1, 2, 3])
+    ib.watch(locals())
 
     # It's normal that when executing, the pipeline object is a different
     # but equivalent instance from what user has built. The pipeline instrument
@@ -84,20 +80,16 @@
     # version map can be used to figure out what the PCollection instances are
     # in the original instance and if the evaluation has changed since last
     # execution.
-    p2 = beam.Pipeline(interactive_runner.InteractiveRunner())
-    ie.current_env().set_cache_manager(InMemoryCache(), p2)
+    p2_id_runner = beam.Pipeline(interactive_runner.InteractiveRunner())
     # pylint: disable=range-builtin-not-iterating
-    init_pcoll_2 = p2 | 'Init Create' >> beam.Create(range(10))
-    _, ctx = p2.to_runner_api(return_context=True)
+    init_pcoll_2 = p2_id_runner | 'Init Create' >> beam.Create(range(10))
+    ie.current_env().add_derived_pipeline(p_id_runner, p2_id_runner)
 
-    # The cacheable_key should use id(init_pcoll) as prefix even when
+    instrumentation = instr.build_pipeline_instrument(p2_id_runner)
+    # The cache_key should use id(init_pcoll) as prefix even when
     # init_pcoll_2 is supplied as long as the version map is given.
     self.assertEqual(
-        instr.cacheable_key(
-            init_pcoll_2,
-            instr.pcolls_to_pcoll_id(p2, ctx),
-            {'ref_PCollection_PCollection_8': str(id(init_pcoll))}),
-        str(id(init_pcoll)) + '_ref_PCollection_PCollection_8')
+        instrumentation.pcoll_id(init_pcoll_2), 'ref_PCollection_PCollection_8')
 
   def test_cache_key(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
@@ -120,63 +112,36 @@
         pipeline_instrument.cache_key(cubes), self.cache_key_of('cubes', cubes))
 
   def test_cacheables(self):
-    p = beam.Pipeline(interactive_runner.InteractiveRunner())
-    ie.current_env().set_cache_manager(InMemoryCache(), p)
+    p_cacheables = beam.Pipeline(interactive_runner.InteractiveRunner())
+    ie.current_env().set_cache_manager(InMemoryCache(), p_cacheables)
     # pylint: disable=range-builtin-not-iterating
-    init_pcoll = p | 'Init Create' >> beam.Create(range(10))
+    init_pcoll = p_cacheables | 'Init Create' >> beam.Create(range(10))
     squares = init_pcoll | 'Square' >> beam.Map(lambda x: x * x)
     cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3)
     ib.watch(locals())
 
-    pipeline_instrument = instr.build_pipeline_instrument(p)
-
-    # TODO(BEAM-7760): The PipelineInstrument cacheables maintains a global list
-    # of cacheable PCollections across all pipelines. Here we take the subset of
-    # cacheables that only pertain to this test's pipeline.
-    cacheables = {
-        k: c
-        for k,
-        c in pipeline_instrument.cacheables.items() if c.pcoll.pipeline is p
-    }
+    pipeline_instrument = instr.build_pipeline_instrument(p_cacheables)
 
     self.assertEqual(
-        cacheables,
+        pipeline_instrument._cacheables,
         {
-            pipeline_instrument._cacheable_key(init_pcoll): instr.Cacheable(
+            pipeline_instrument.pcoll_id(init_pcoll): Cacheable(
                 var='init_pcoll',
                 version=str(id(init_pcoll)),
-                pcoll_id='ref_PCollection_PCollection_8',
                 producer_version=str(id(init_pcoll.producer)),
                 pcoll=init_pcoll),
-            pipeline_instrument._cacheable_key(squares): instr.Cacheable(
+            pipeline_instrument.pcoll_id(squares): Cacheable(
                 var='squares',
                 version=str(id(squares)),
-                pcoll_id='ref_PCollection_PCollection_9',
                 producer_version=str(id(squares.producer)),
                 pcoll=squares),
-            pipeline_instrument._cacheable_key(cubes): instr.Cacheable(
+            pipeline_instrument.pcoll_id(cubes): Cacheable(
                 var='cubes',
                 version=str(id(cubes)),
-                pcoll_id='ref_PCollection_PCollection_10',
                 producer_version=str(id(cubes.producer)),
                 pcoll=cubes)
         })
 
-  def test_has_unbounded_source(self):
-    p = beam.Pipeline(interactive_runner.InteractiveRunner())
-    ie.current_env().set_cache_manager(InMemoryCache(), p)
-    _ = p | 'ReadUnboundedSource' >> beam.io.ReadFromPubSub(
-        subscription='projects/fake-project/subscriptions/fake_sub')
-    self.assertTrue(instr.has_unbounded_sources(p))
-
-  def test_not_has_unbounded_source(self):
-    p = beam.Pipeline(interactive_runner.InteractiveRunner())
-    ie.current_env().set_cache_manager(InMemoryCache(), p)
-    with tempfile.NamedTemporaryFile(delete=False) as f:
-      f.write(b'test')
-    _ = p | 'ReadBoundedSource' >> beam.io.ReadFromText(f.name)
-    self.assertFalse(instr.has_unbounded_sources(p))
-
   def test_background_caching_pipeline_proto(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     ie.current_env().set_cache_manager(StreamingCache(cache_dir=None), p)
@@ -215,12 +180,11 @@
         | 'b' >> cache.WriteCache(ie.current_env().get_cache_manager(p), ''))
 
     expected_pipeline = p.to_runner_api(return_context=False)
-
     assert_pipeline_proto_equal(self, expected_pipeline, actual_pipeline)
 
   def _example_pipeline(self, watch=True, bounded=True):
-    p = beam.Pipeline(interactive_runner.InteractiveRunner())
-    ie.current_env().set_cache_manager(InMemoryCache(), p)
+    p_example = beam.Pipeline(interactive_runner.InteractiveRunner())
+    ie.current_env().set_cache_manager(InMemoryCache(), p_example)
     # pylint: disable=range-builtin-not-iterating
     if bounded:
       source = beam.Create(range(10))
@@ -228,11 +192,11 @@
       source = beam.io.ReadFromPubSub(
           subscription='projects/fake-project/subscriptions/fake_sub')
 
-    init_pcoll = p | 'Init Source' >> source
+    init_pcoll = p_example | 'Init Source' >> source
     second_pcoll = init_pcoll | 'Second' >> beam.Map(lambda x: x * x)
     if watch:
       ib.watch(locals())
-    return (p, init_pcoll, second_pcoll)
+    return (p_example, init_pcoll, second_pcoll)
 
   def _mock_write_cache(self, pipeline, values, cache_key):
     """Cache the PCollection where cache.WriteCache would write to."""
@@ -248,7 +212,8 @@
     # Original instance defined by user code has all variables handlers.
     p_origin, init_pcoll, second_pcoll = self._example_pipeline()
     # Copied instance when execution has no user defined variables.
-    p_copy, _, _ = self._example_pipeline(False)
+    p_copy, _, _ = self._example_pipeline(watch=False)
+    ie.current_env().add_derived_pipeline(p_origin, p_copy)
     # Instrument the copied pipeline.
     pipeline_instrument = instr.build_pipeline_instrument(p_copy)
     # Manually instrument original pipeline with expected pipeline transforms.
@@ -337,7 +302,8 @@
 
     # Mock as if cacheable PCollections are cached.
     ib.watch(locals())
-
+    # This should be noop.
+    utils.watch_sources(p_original)
     for name, pcoll in locals().items():
       if not isinstance(pcoll, beam.pvalue.PCollection):
         continue
@@ -395,21 +361,22 @@
     from apache_beam.options.pipeline_options import StandardOptions
     options = StandardOptions(streaming=True)
     streaming_cache_manager = StreamingCache(cache_dir=None)
-    p_original = beam.Pipeline(interactive_runner.InteractiveRunner(), options)
-    ie.current_env().set_cache_manager(streaming_cache_manager, p_original)
+    p_original_cache_source = beam.Pipeline(
+        interactive_runner.InteractiveRunner(), options)
+    ie.current_env().set_cache_manager(
+        streaming_cache_manager, p_original_cache_source)
 
     # pylint: disable=possibly-unused-variable
     source_1 = (
-        p_original
+        p_original_cache_source
         | 'source1' >> beam.io.ReadFromPubSub(
             subscription='projects/fake-project/subscriptions/fake_sub')
         | beam.Map(lambda e: e))
 
     # Watch but do not cache the PCollections.
     ib.watch(locals())
-
     # Make sure that sources without a user reference are still cached.
-    instr.watch_sources(p_original)
+    utils.watch_sources(p_original_cache_source)
 
     intermediate_source_pcoll = None
     for watching in ie.current_env().watching():
@@ -421,14 +388,17 @@
 
     # Instrument the original pipeline to create the pipeline the user will see.
     p_copy = beam.Pipeline.from_runner_api(
-        p_original.to_runner_api(),
+        p_original_cache_source.to_runner_api(),
         runner=interactive_runner.InteractiveRunner(),
         options=options)
+    ie.current_env().add_derived_pipeline(p_original_cache_source, p_copy)
     instrumenter = instr.build_pipeline_instrument(p_copy)
     actual_pipeline = beam.Pipeline.from_runner_api(
         proto=instrumenter.instrumented_pipeline_proto(),
         runner=interactive_runner.InteractiveRunner(),
         options=options)
+    ie.current_env().add_derived_pipeline(
+        p_original_cache_source, actual_pipeline)
 
     # Now, build the expected pipeline which replaces the unbounded source with
     # a TestStream.
@@ -496,7 +466,8 @@
 
     # Watch but do not cache the PCollections.
     ib.watch(locals())
-
+    # This should be noop.
+    utils.watch_sources(p_original)
     self._mock_write_cache(
         p_original, [], self.cache_key_of('source_2', source_2))
     ie.current_env().mark_pcollection_computed([source_2])
@@ -563,36 +534,39 @@
     # Create the pipeline that will be instrumented.
     from apache_beam.options.pipeline_options import StandardOptions
     options = StandardOptions(streaming=True)
-    p_original = beam.Pipeline(interactive_runner.InteractiveRunner(), options)
+    p_original_direct_source = beam.Pipeline(
+        interactive_runner.InteractiveRunner(), options)
     ie.current_env().set_cache_manager(
-        StreamingCache(cache_dir=None), p_original)
-    source_1 = p_original | 'source1' >> beam.io.ReadFromPubSub(
+        StreamingCache(cache_dir=None), p_original_direct_source)
+    source_1 = p_original_direct_source | 'source1' >> beam.io.ReadFromPubSub(
         subscription='projects/fake-project/subscriptions/fake_sub')
     # pylint: disable=possibly-unused-variable
-
+    p_expected = beam.Pipeline()
+    # pylint: disable=unused-variable
+    test_stream = (
+        p_expected
+        | TestStream(output_tags=[self.cache_key_of('source_1', source_1)]))
     # Watch but do not cache the PCollections.
     ib.watch(locals())
-
+    # This should be noop.
+    utils.watch_sources(p_original_direct_source)
     # Instrument the original pipeline to create the pipeline the user will see.
     p_copy = beam.Pipeline.from_runner_api(
-        p_original.to_runner_api(),
+        p_original_direct_source.to_runner_api(),
         runner=interactive_runner.InteractiveRunner(),
         options=options)
+    ie.current_env().add_derived_pipeline(p_original_direct_source, p_copy)
     instrumenter = instr.build_pipeline_instrument(p_copy)
     actual_pipeline = beam.Pipeline.from_runner_api(
         proto=instrumenter.instrumented_pipeline_proto(),
         runner=interactive_runner.InteractiveRunner(),
         options=options)
+    ie.current_env().add_derived_pipeline(
+        p_original_direct_source, actual_pipeline)
 
     # Now, build the expected pipeline which replaces the unbounded source with
     # a TestStream.
     source_1_cache_key = self.cache_key_of('source_1', source_1)
-    p_expected = beam.Pipeline()
-
-    # pylint: disable=unused-variable
-    test_stream = (
-        p_expected
-        | TestStream(output_tags=[self.cache_key_of('source_1', source_1)]))
 
     # Test that the TestStream is outputting to the correct PCollection.
     class TestStreamVisitor(PipelineVisitor):
@@ -625,22 +599,25 @@
     # Create the pipeline that will be instrumented.
     from apache_beam.options.pipeline_options import StandardOptions
     options = StandardOptions(streaming=True)
-    p_original = beam.Pipeline(interactive_runner.InteractiveRunner(), options)
+    p_original_read_cache = beam.Pipeline(
+        interactive_runner.InteractiveRunner(), options)
     ie.current_env().set_cache_manager(
-        StreamingCache(cache_dir=None), p_original)
-    source_1 = p_original | 'source1' >> beam.io.ReadFromPubSub(
+        StreamingCache(cache_dir=None), p_original_read_cache)
+    source_1 = p_original_read_cache | 'source1' >> beam.io.ReadFromPubSub(
         subscription='projects/fake-project/subscriptions/fake_sub')
     # pylint: disable=possibly-unused-variable
     pcoll_1 = source_1 | 'square1' >> beam.Map(lambda x: x * x)
 
     # Watch but do not cache the PCollections.
     ib.watch(locals())
-
+    # This should be noop.
+    utils.watch_sources(p_original_read_cache)
     # Instrument the original pipeline to create the pipeline the user will see.
     p_copy = beam.Pipeline.from_runner_api(
-        p_original.to_runner_api(),
+        p_original_read_cache.to_runner_api(),
         runner=interactive_runner.InteractiveRunner(),
         options=options)
+    ie.current_env().add_derived_pipeline(p_original_read_cache, p_copy)
     instrumenter = instr.build_pipeline_instrument(p_copy)
     actual_pipeline = beam.Pipeline.from_runner_api(
         proto=instrumenter.instrumented_pipeline_proto(),
@@ -705,7 +682,8 @@
 
     # Mock as if cacheable PCollections are cached.
     ib.watch(locals())
-
+    # This should be noop.
+    utils.watch_sources(p_original)
     for name, pcoll in locals().items():
       if not isinstance(pcoll, beam.pvalue.PCollection):
         continue

diff --git a/sdks/python/apache_beam/runners/interactive/recording_manager.py b/sdks/python/apache_beam/runners/interactive/recording_manager.py
index c51a648..690e133 100644
--- a/sdks/python/apache_beam/runners/interactive/recording_manager.py
+++ b/sdks/python/apache_beam/runners/interactive/recording_manager.py

@@ -29,8 +29,8 @@
 from apache_beam.runners.interactive import interactive_environment as ie
 from apache_beam.runners.interactive import interactive_runner as ir
 from apache_beam.runners.interactive import pipeline_fragment as pf
-from apache_beam.runners.interactive import pipeline_instrument as pi
 from apache_beam.runners.interactive import utils
+from apache_beam.runners.interactive.caching.cacheable import CacheKey
 from apache_beam.runners.runner import PipelineState
 
 _LOGGER = logging.getLogger(__name__)
@@ -48,7 +48,7 @@
       ):
     self._pcoll = pcoll
     self._cache_key = cache_key
-    self._pipeline = pcoll.pipeline
+    self._pipeline = ie.current_env().user_pipeline(pcoll.pipeline)
     self._var = var
     self._n = max_n
     self._duration_secs = max_duration_secs
@@ -157,24 +157,22 @@
       user_pipeline,  # type: beam.Pipeline
       pcolls,  # type: List[beam.pvalue.PCollection]
       result,  # type: beam.runner.PipelineResult
-      pipeline_instrument,  # type: beam.runners.interactive.PipelineInstrument
       max_n,  # type: int
       max_duration_secs,  # type: float
       ):
-
     self._user_pipeline = user_pipeline
     self._result = result
     self._result_lock = threading.Lock()
     self._pcolls = pcolls
-
-    pcoll_var = lambda pcoll: pipeline_instrument.cacheable_var_by_pcoll_id(
-        pipeline_instrument.pcolls_to_pcoll_id.get(str(pcoll), None))
+    pcoll_var = lambda pcoll: {v: k
+                               for k, v in utils.pcoll_by_name().items()}.get(
+                                   pcoll, None)
 
     self._streams = {
         pcoll: ElementStream(
             pcoll,
             pcoll_var(pcoll),
-            pipeline_instrument.cache_key(pcoll),
+            CacheKey.from_pcoll(pcoll_var(pcoll), pcoll).to_str(),
             max_n,
             max_duration_secs)
         for pcoll in pcolls
@@ -316,8 +314,8 @@
         ie.current_env().watch(
             {'anonymous_pcollection_{}'.format(id(pcoll)): pcoll})
 
-  def _clear(self, pipeline_instrument):
-    # type: (List[beam.pvalue.PCollection]) -> None
+  def _clear(self):
+    # type: () -> None
 
     """Clears the recording of all non-source PCollections."""
 
@@ -327,7 +325,7 @@
     # BackgroundCachingJob.
     computed = ie.current_env().computed_pcollections
     cacheables = [
-        c for c in pipeline_instrument.cacheables.values()
+        c for c in utils.cacheables().values()
         if c.pcoll.pipeline is self.user_pipeline and c.pcoll not in computed
     ]
     all_cached = set(str(c.to_key()) for c in cacheables)
@@ -397,7 +395,7 @@
 
     # Make sure that sources without a user reference are still cached.
     ie.current_env().add_user_pipeline(self.user_pipeline)
-    pi.watch_sources(self.user_pipeline)
+    utils.watch_sources(self.user_pipeline)
 
     # Attempt to run background caching job to record any sources.
     if ie.current_env().is_in_ipython:
@@ -437,7 +435,6 @@
     # watch it. No validation is needed here because the watch logic can handle
     # arbitrary variables.
     self._watch(pcolls)
-    pipeline_instrument = pi.PipelineInstrument(self.user_pipeline)
     self.record_pipeline()
 
     # Get the subset of computed PCollections. These do not to be recomputed.
@@ -450,7 +447,7 @@
     if uncomputed_pcolls:
       # Clear the cache of the given uncomputed PCollections because they are
       # incomplete.
-      self._clear(pipeline_instrument)
+      self._clear()
 
       warnings.filterwarnings(
           'ignore',
@@ -464,12 +461,29 @@
       result = None
 
     recording = Recording(
-        self.user_pipeline,
-        pcolls,
-        result,
-        pipeline_instrument,
-        max_n,
-        max_duration_secs)
+        self.user_pipeline, pcolls, result, max_n, max_duration_secs)
     self._recordings.add(recording)
 
     return recording
+
+  def read(self, pcoll_name, pcoll, max_n, max_duration_secs):
+    # type: (str, beam.pvalue.PValue, int, float) -> Union[None, ElementStream]
+
+    """Reads an ElementStream of a computed PCollection.
+
+    Returns None if an error occurs. The caller is responsible of validating if
+    the given pcoll_name and pcoll can identify a watched and computed
+    PCollection without ambiguity in the notebook.
+    """
+
+    try:
+      cache_key = CacheKey.from_pcoll(pcoll_name, pcoll).to_str()
+      return ElementStream(
+          pcoll, pcoll_name, cache_key, max_n, max_duration_secs)
+    except (KeyboardInterrupt, SystemExit):
+      raise
+    except Exception as e:
+      # Caller should handle all validations. Here to avoid redundant
+      # validations, simply log errors if caller fails to do so.
+      _LOGGER.error(str(e))
+      return None

diff --git a/sdks/python/apache_beam/runners/interactive/recording_manager_test.py b/sdks/python/apache_beam/runners/interactive/recording_manager_test.py
index ca44ca3..ec7c78e 100644
--- a/sdks/python/apache_beam/runners/interactive/recording_manager_test.py
+++ b/sdks/python/apache_beam/runners/interactive/recording_manager_test.py

@@ -26,7 +26,7 @@
 from apache_beam.runners.interactive import background_caching_job as bcj
 from apache_beam.runners.interactive import interactive_beam as ib
 from apache_beam.runners.interactive import interactive_environment as ie
-from apache_beam.runners.interactive import pipeline_instrument as pi
+from apache_beam.runners.interactive.caching.cacheable import CacheKey
 from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
 from apache_beam.runners.interactive.options.capture_limiters import Limiter
 from apache_beam.runners.interactive.recording_manager import ElementStream
@@ -66,12 +66,12 @@
     self.cache = InMemoryCache()
     self.p = beam.Pipeline()
     self.pcoll = self.p | beam.Create([])
-    self.cache_key = str(pi.CacheKey('pcoll', '', '', ''))
+    self.cache_key = str(CacheKey('pcoll', '', '', ''))
 
     # Create a MockPipelineResult to control the state of a fake run of the
     # pipeline.
     self.mock_result = MockPipelineResult()
-    ie.current_env().track_user_pipelines()
+    ie.current_env().add_user_pipeline(self.p)
     ie.current_env().set_pipeline_result(self.p, self.mock_result)
     ie.current_env().set_cache_manager(self.cache, self.p)
 
@@ -207,11 +207,7 @@
 
     # Create a recording.
     recording = Recording(
-        p, [elems],
-        mock_result,
-        pi.PipelineInstrument(p),
-        max_n=10,
-        max_duration_secs=60)
+        p, [elems], mock_result, max_n=10, max_duration_secs=60)
 
     # The background caching job and the recording isn't done yet so there may
     # be more elements to be recorded.
@@ -235,11 +231,7 @@
     bcj_mock_result.set_state(PipelineState.DONE)
     ie.current_env().set_background_caching_job(p, background_caching_job)
     recording = Recording(
-        p, [elems],
-        mock_result,
-        pi.PipelineInstrument(p),
-        max_n=10,
-        max_duration_secs=60)
+        p, [elems], mock_result, max_n=10, max_duration_secs=60)
     recording.wait_until_finish()
 
     # There are no more elements and the recording finished, meaning that the
@@ -267,11 +259,7 @@
 
     # Create a recording with an arbitrary start time.
     recording = Recording(
-        p, [numbers, letters],
-        mock_result,
-        pi.PipelineInstrument(p),
-        max_n=10,
-        max_duration_secs=60)
+        p, [numbers, letters], mock_result, max_n=10, max_duration_secs=60)
 
     # Get the cache key of the stream and write something to cache. This is
     # so that a pipeline doesn't have to run in the test.
@@ -422,9 +410,6 @@
     # was run.
     rm = RecordingManager(p)
 
-    # Get the cache, key, and coder to read the PCollection from the cache.
-    pipeline_instrument = pi.PipelineInstrument(p)
-
     # Set up a mock for the Cache's clear function which will be used to clear
     # uncomputed PCollections.
     rm._clear_pcolls = MagicMock()
@@ -434,9 +419,26 @@
     # Assert that the cache cleared the PCollection.
     rm._clear_pcolls.assert_any_call(
         unittest.mock.ANY,
-        set(pipeline_instrument.cache_key(pc) for pc in (elems, squares)))
+        # elems is unbounded source populated by the background job, thus not
+        # cleared.
+        {CacheKey.from_pcoll('squares', squares).to_str()})
 
   def test_clear(self):
+    p1 = beam.Pipeline(InteractiveRunner())
+    elems_1 = p1 | 'elems 1' >> beam.Create([0, 1, 2])
+
+    ib.watch(locals())
+    ie.current_env().track_user_pipelines()
+
+    recording_manager = RecordingManager(p1)
+    recording = recording_manager.record([elems_1], max_n=3, max_duration=500)
+    recording.wait_until_finish()
+    record_describe = recording_manager.describe()
+    self.assertGreater(record_describe['size'], 0)
+    recording_manager.clear()
+    self.assertEqual(recording_manager.describe()['size'], 0)
+
+  def test_clear_specific_pipeline(self):
     """Tests that clear can empty the cache for a specific pipeline."""
 
     # Create two pipelines so we can check that clearing the cache won't clear
@@ -461,16 +463,18 @@
     rm_2 = RecordingManager(p2)
     recording = rm_2.record([elems_2], max_n=3, max_duration=500)
     recording.wait_until_finish()
-
     # Assert that clearing only one recording clears that recording.
-    self.assertGreater(rm_1.describe()['size'], 0)
-    self.assertGreater(rm_2.describe()['size'], 0)
-    rm_1.clear()
-    self.assertEqual(rm_1.describe()['size'], 0)
-    self.assertGreater(rm_2.describe()['size'], 0)
+    if rm_1.describe()['state'] == PipelineState.STOPPED \
+            and rm_2.describe()['state'] == PipelineState.STOPPED:
 
-    rm_2.clear()
-    self.assertEqual(rm_2.describe()['size'], 0)
+      self.assertGreater(rm_1.describe()['size'], 0)
+      self.assertGreater(rm_2.describe()['size'], 0)
+      rm_1.clear()
+      self.assertEqual(rm_1.describe()['size'], 0)
+      self.assertGreater(rm_2.describe()['size'], 0)
+
+      rm_2.clear()
+      self.assertEqual(rm_2.describe()['size'], 0)
 
   def test_record_pipeline(self):
     # Add the TestStream so that it can be cached.

diff --git a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py
index 1dc42e0..6c0f8d3 100644
--- a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py
+++ b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py

@@ -30,18 +30,20 @@
 
 import apache_beam as beam
 from apache_beam.pvalue import PValue
-from apache_beam.runners.interactive import cache_manager as cache
 from apache_beam.runners.interactive import interactive_beam as ib
 from apache_beam.runners.interactive import interactive_environment as ie
-from apache_beam.runners.interactive import pipeline_instrument as inst
-from apache_beam.runners.interactive.cache_manager import FileBasedCacheManager
-from apache_beam.runners.interactive.caching.streaming_cache import StreamingCache
+from apache_beam.runners.interactive.background_caching_job import has_source_to_cache
+from apache_beam.runners.interactive.caching.cacheable import CacheKey
+from apache_beam.runners.interactive.caching.reify import reify_to_cache
+from apache_beam.runners.interactive.caching.reify import unreify_from_cache
+from apache_beam.runners.interactive.display.pcoll_visualization import visualize_computed_pcoll
 from apache_beam.runners.interactive.sql.utils import find_pcolls
 from apache_beam.runners.interactive.sql.utils import is_namedtuple
-from apache_beam.runners.interactive.sql.utils import pcolls_by_name
+from apache_beam.runners.interactive.sql.utils import pformat_namedtuple
 from apache_beam.runners.interactive.sql.utils import register_coder_for_schema
 from apache_beam.runners.interactive.sql.utils import replace_single_pcoll_token
 from apache_beam.runners.interactive.utils import obfuscate
+from apache_beam.runners.interactive.utils import pcoll_by_name
 from apache_beam.runners.interactive.utils import progress_indicated
 from apache_beam.testing import test_stream
 from apache_beam.testing.test_stream_service import TestStreamServiceController
@@ -66,6 +68,19 @@
     depending on the SQL statement executed.
 """
 
+_NOT_SUPPORTED_MSG = """The query was valid and successfully applied.
+    But beam_sql failed to execute the query: %s
+
+    Runner used by beam_sql was %s.
+    Some Beam features might have not been supported by the Python SDK and runner combination.
+    Please check the runner output for more details about the failed items.
+
+    In the meantime, you may check:
+    https://beam.apache.org/documentation/runners/capability-matrix/
+    to choose a runner other than the InteractiveRunner and explicitly apply SqlTransform
+    to build Beam pipelines in a non-interactive manner.
+"""
+
 
 def on_error(error_msg, *args):
   """Logs the error and the usage example."""
@@ -98,7 +113,7 @@
     if not cell or cell.isspace():
       on_error('Please supply the sql to be executed.')
       return
-    found = find_pcolls(cell, pcolls_by_name())
+    found = find_pcolls(cell, pcoll_by_name())
     for _, pcoll in found.items():
       if not is_namedtuple(pcoll.element_type):
         on_error(
@@ -110,15 +125,15 @@
         return
       register_coder_for_schema(pcoll.element_type)
 
-    # TODO(BEAM-10708): implicitly execute the pipeline and write output into
-    # cache.
-    return apply_sql(cell, line, found)
+    output_name, output = apply_sql(cell, line, found)
+    cache_output(output_name, output)
+    return output
 
 
 @progress_indicated
 def apply_sql(
     query: str, output_name: Optional[str],
-    found: Dict[str, beam.PCollection]) -> PValue:
+    found: Dict[str, beam.PCollection]) -> Tuple[str, PValue]:
   """Applies a SqlTransform with the given sql and queried PCollections.
 
   Args:
@@ -127,7 +142,9 @@
     found: The PCollections with variable names found to be used in the query.
 
   Returns:
-    A PValue, mostly a PCollection, depending on the query.
+    A Tuple[str, PValue]. First str value is the output variable name in
+    __main__ module (auto-generated if not provided). Second PValue is
+    most likely a PCollection, depending on the query.
   """
   output_name = _generate_output_name(output_name, query, found)
   query, sql_source = _build_query_components(query, found)
@@ -138,53 +155,20 @@
     setattr(importlib.import_module('__main__'), output_name, output)
     ib.watch({output_name: output})
     _LOGGER.info(
-        "The output PCollection variable is %s: %s", output_name, output)
-    return output
+        "The output PCollection variable is %s with element_type %s",
+        output_name,
+        pformat_namedtuple(output.element_type))
+    return output_name, output
   except (KeyboardInterrupt, SystemExit):
     raise
   except Exception as e:
     on_error('Error when applying the Beam SQL: %s', e)
 
 
-def pcoll_from_file_cache(
-    query_pipeline: beam.Pipeline,
-    pcoll: beam.PCollection,
-    cache_manager: FileBasedCacheManager,
-    key: str) -> beam.PCollection:
-  """Reads PCollection cache from files.
-
-  Args:
-    query_pipeline: The beam.Pipeline object built by the magic to execute the
-        SQL query.
-    pcoll: The PCollection to read cache for.
-    cache_manager: The file based cache manager that holds the PCollection
-        cache.
-    key: The key of the PCollection cache.
-
-  Returns:
-    A PCollection read from the cache.
-  """
-  schema = pcoll.element_type
-
-  class Unreify(beam.DoFn):
-    def process(self, e):
-      if isinstance(e, beam.Row) and hasattr(e, 'windowed_value'):
-        yield e.windowed_value
-
-  return (
-      query_pipeline
-      |
-      '{}{}'.format('QuerySource', key) >> cache.ReadCache(cache_manager, key)
-      | '{}{}'.format('Unreify', key) >> beam.ParDo(
-          Unreify()).with_output_types(schema))
-
-
 def pcolls_from_streaming_cache(
     user_pipeline: beam.Pipeline,
     query_pipeline: beam.Pipeline,
-    name_to_pcoll: Dict[str, beam.PCollection],
-    instrumentation: inst.PipelineInstrument,
-    cache_manager: StreamingCache) -> Dict[str, beam.PCollection]:
+    name_to_pcoll: Dict[str, beam.PCollection]) -> Dict[str, beam.PCollection]:
   """Reads PCollection cache through the TestStream.
 
   Args:
@@ -193,9 +177,6 @@
     query_pipeline: The beam.Pipeline object built by the magic to execute the
         SQL query.
     name_to_pcoll: PCollections with variable names used in the SQL query.
-    instrumentation: A pipeline_instrument.PipelineInstrument that helps
-        calculate the cache key of a given PCollection.
-    cache_manager: The streaming cache manager that holds the PCollection cache.
 
   Returns:
     A Dict[str, beam.PCollection], where each PCollection is tagged with
@@ -208,6 +189,8 @@
     _LOGGER.error(str(e))
     return True
 
+  cache_manager = ie.current_env().get_cache_manager(
+      user_pipeline, create_if_absent=True)
   test_stream_service = ie.current_env().get_test_stream_service_controller(
       user_pipeline)
   if not test_stream_service:
@@ -219,7 +202,7 @@
 
   tag_to_name = {}
   for name, pcoll in name_to_pcoll.items():
-    key = instrumentation.cache_key(pcoll)
+    key = CacheKey.from_pcoll(name, pcoll).to_str()
     tag_to_name[key] = name
   output_pcolls = query_pipeline | test_stream.TestStream(
       output_tags=set(tag_to_name.keys()),
@@ -267,27 +250,54 @@
   """
   if found:
     user_pipeline = next(iter(found.values())).pipeline
-    cache_manager = ie.current_env().get_cache_manager(user_pipeline)
-    instrumentation = inst.build_pipeline_instrument(user_pipeline)
     sql_pipeline = beam.Pipeline(options=user_pipeline._options)
     ie.current_env().add_derived_pipeline(user_pipeline, sql_pipeline)
     sql_source = {}
-    if instrumentation.has_unbounded_sources:
+    if has_source_to_cache(user_pipeline):
       sql_source = pcolls_from_streaming_cache(
-          user_pipeline, sql_pipeline, found, instrumentation, cache_manager)
+          user_pipeline, sql_pipeline, found)
     else:
+      cache_manager = ie.current_env().get_cache_manager(
+          user_pipeline, create_if_absent=True)
       for pcoll_name, pcoll in found.items():
-        cache_key = instrumentation.cache_key(pcoll)
-        sql_source[pcoll_name] = pcoll_from_file_cache(
-            sql_pipeline, pcoll, cache_manager, cache_key)
+        cache_key = CacheKey.from_pcoll(pcoll_name, pcoll).to_str()
+        sql_source[pcoll_name] = unreify_from_cache(
+            pipeline=sql_pipeline,
+            cache_key=cache_key,
+            cache_manager=cache_manager,
+            element_type=pcoll.element_type)
     if len(sql_source) == 1:
       query = replace_single_pcoll_token(query, next(iter(sql_source.keys())))
       sql_source = next(iter(sql_source.values()))
   else:
     sql_source = beam.Pipeline()
+    ie.current_env().add_user_pipeline(sql_source)
   return query, sql_source
 
 
+@progress_indicated
+def cache_output(output_name: str, output: PValue) -> None:
+  user_pipeline = ie.current_env().user_pipeline(output.pipeline)
+  if user_pipeline:
+    cache_manager = ie.current_env().get_cache_manager(
+        user_pipeline, create_if_absent=True)
+  else:
+    _LOGGER.warning(
+        'Something is wrong with %s. Cannot introspect its data.', output)
+    return
+  key = CacheKey.from_pcoll(output_name, output).to_str()
+  _ = reify_to_cache(pcoll=output, cache_key=key, cache_manager=cache_manager)
+  try:
+    output.pipeline.run().wait_until_finish()
+  except (KeyboardInterrupt, SystemExit):
+    raise
+  except Exception as e:
+    _LOGGER.warning(_NOT_SUPPORTED_MSG, e, output.pipeline.runner)
+    return
+  ie.current_env().mark_pcollection_computed([output])
+  visualize_computed_pcoll(output_name, output)
+
+
 def load_ipython_extension(ipython):
   """Marks this module as an IPython extension.
 

diff --git a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py
index d35bd46..7c4de77 100644
--- a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py
+++ b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py

@@ -27,10 +27,13 @@
 import apache_beam as beam
 from apache_beam.runners.interactive import interactive_beam as ib
 from apache_beam.runners.interactive import interactive_environment as ie
+from apache_beam.runners.interactive.cache_manager import FileBasedCacheManager
+from apache_beam.runners.interactive.caching.cacheable import CacheKey
 
 try:
   from apache_beam.runners.interactive.sql.beam_sql_magics import _build_query_components
   from apache_beam.runners.interactive.sql.beam_sql_magics import _generate_output_name
+  from apache_beam.runners.interactive.sql.beam_sql_magics import cache_output
 except (ImportError, NameError):
   pass  # The test is to be skipped because [interactive] dep not installed.
 
@@ -67,11 +70,11 @@
     found = {'target': target}
 
     with patch('apache_beam.runners.interactive.sql.beam_sql_magics.'
-               'pcoll_from_file_cache',
-               lambda a,
-               b,
-               c,
-               d: target):
+               'unreify_from_cache',
+               lambda pipeline,
+               cache_key,
+               cache_manager,
+               element_type: target):
       processed_query, sql_source = _build_query_components(query, found)
 
       self.assertEqual(processed_query, 'SELECT * FROM PCOLLECTION where a=1')
@@ -86,11 +89,11 @@
     found = {'pcoll_1': pcoll_1, 'pcoll_2': pcoll_2}
 
     with patch('apache_beam.runners.interactive.sql.beam_sql_magics.'
-               'pcoll_from_file_cache',
-               lambda a,
-               b,
-               c,
-               d: pcoll_1):
+               'unreify_from_cache',
+               lambda pipeline,
+               cache_key,
+               cache_manager,
+               element_type: pcoll_1):
       processed_query, sql_source = _build_query_components(query, found)
 
       self.assertEqual(processed_query, query)
@@ -110,12 +113,26 @@
                'pcolls_from_streaming_cache',
                lambda a,
                b,
-               c,
-               d,
-               e: found):
+               c: found):
       _, sql_source = _build_query_components(query, found)
       self.assertIs(sql_source, pcoll)
 
+  def test_cache_output(self):
+    p_cache_output = beam.Pipeline()
+    pcoll_co = p_cache_output | 'Create Source' >> beam.Create([1, 2, 3])
+    cache_manager = FileBasedCacheManager()
+    ie.current_env().set_cache_manager(cache_manager, p_cache_output)
+    ib.watch(locals())
+    with patch('apache_beam.runners.interactive.display.pcoll_visualization.'
+               'visualize_computed_pcoll',
+               lambda a,
+               b: None):
+      cache_output('pcoll_co', pcoll_co)
+      self.assertIn(pcoll_co, ie.current_env().computed_pcollections)
+      self.assertTrue(
+          cache_manager.exists(
+              'full', CacheKey.from_pcoll('pcoll_co', pcoll_co).to_str()))
+
 
 if __name__ == '__main__':
   unittest.main()

diff --git a/sdks/python/apache_beam/runners/interactive/sql/utils.py b/sdks/python/apache_beam/runners/interactive/sql/utils.py
index 355b6e6..1840e60 100644
--- a/sdks/python/apache_beam/runners/interactive/sql/utils.py
+++ b/sdks/python/apache_beam/runners/interactive/sql/utils.py

@@ -28,7 +28,6 @@
 
 import apache_beam as beam
 from apache_beam.runners.interactive import interactive_beam as ib
-from apache_beam.runners.interactive import interactive_environment as ie
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -37,7 +36,7 @@
   """Determines if a class is built from typing.NamedTuple."""
   return (
       isinstance(cls, type) and issubclass(cls, tuple) and
-      hasattr(cls, '_fields') and hasattr(cls, '_field_types'))
+      hasattr(cls, '_fields') and hasattr(cls, '__annotations__'))
 
 
 def register_coder_for_schema(schema: NamedTuple) -> None:
@@ -59,17 +58,6 @@
     beam.coders.registry.register_coder(schema, beam.coders.RowCoder)
 
 
-def pcolls_by_name() -> Dict[str, beam.PCollection]:
-  """Finds all PCollections by their variable names defined in the notebook."""
-  inspectables = ie.current_env().inspector.inspectables
-  pcolls = {}
-  for _, inspectable in inspectables.items():
-    metadata = inspectable['metadata']
-    if metadata['type'] == 'pcollection':
-      pcolls[metadata['name']] = inspectable['value']
-  return pcolls
-
-
 def find_pcolls(
     sql: str, pcolls: Dict[str,
                            beam.PCollection]) -> Dict[str, beam.PCollection]:
@@ -100,7 +88,6 @@
             name,
             sql)
         raise
-    _LOGGER.info('Done collecting data.')
   return found
 
 
@@ -123,3 +110,12 @@
     if token_location < len(words) and words[token_location] == pcoll_name:
       words[token_location] = 'PCOLLECTION'
   return ' '.join(words)
+
+
+def pformat_namedtuple(schema: NamedTuple) -> str:
+  return '{}({})'.format(
+      schema.__name__,
+      ', '.join([
+          '{}: {}'.format(k, v.__name__) for k,
+          v in schema.__annotations__.items()
+      ]))

diff --git a/sdks/python/apache_beam/runners/interactive/sql/utils_test.py b/sdks/python/apache_beam/runners/interactive/sql/utils_test.py
index ed52cad..01a54c3 100644
--- a/sdks/python/apache_beam/runners/interactive/sql/utils_test.py
+++ b/sdks/python/apache_beam/runners/interactive/sql/utils_test.py

@@ -24,10 +24,9 @@
 from unittest.mock import patch
 
 import apache_beam as beam
-from apache_beam.runners.interactive import interactive_beam as ib
 from apache_beam.runners.interactive.sql.utils import find_pcolls
 from apache_beam.runners.interactive.sql.utils import is_namedtuple
-from apache_beam.runners.interactive.sql.utils import pcolls_by_name
+from apache_beam.runners.interactive.sql.utils import pformat_namedtuple
 from apache_beam.runners.interactive.sql.utils import register_coder_for_schema
 from apache_beam.runners.interactive.sql.utils import replace_single_pcoll_token
 
@@ -58,14 +57,6 @@
     self.assertIsInstance(
         beam.coders.registry.get_coder(ANamedTuple), beam.coders.RowCoder)
 
-  def test_pcolls_by_name(self):
-    p = beam.Pipeline()
-    pcoll = p | beam.Create([1])
-    ib.watch({'p': p, 'pcoll': pcoll})
-
-    name_to_pcoll = pcolls_by_name()
-    self.assertIn('pcoll', name_to_pcoll)
-
   def test_find_pcolls(self):
     with patch('apache_beam.runners.interactive.interactive_beam.collect',
                lambda _: None):
@@ -85,6 +76,10 @@
     self.assertEqual(
         replaced_sql, 'SELECT * FROM PCOLLECTION WHERE a=1 AND b=2')
 
+  def test_pformat_namedtuple(self):
+    self.assertEqual(
+        'ANamedTuple(a: int, b: str)', pformat_namedtuple(ANamedTuple))
+
 
 if __name__ == '__main__':
   unittest.main()

diff --git a/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/29c9237ddf4f3d5988a503069b4d3c47.png b/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/29c9237ddf4f3d5988a503069b4d3c47.png
index 8463b3f..b6af3bd 100644
--- a/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/29c9237ddf4f3d5988a503069b4d3c47.png
+++ b/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/29c9237ddf4f3d5988a503069b4d3c47.png
Binary files differ

diff --git a/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/7a35f487b2a5f3a9b9852a8659eeb4bd.png b/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/7a35f487b2a5f3a9b9852a8659eeb4bd.png
index 2179619..cbb27ed 100644
--- a/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/7a35f487b2a5f3a9b9852a8659eeb4bd.png
+++ b/sdks/python/apache_beam/runners/interactive/testing/integration/goldens/Darwin/7a35f487b2a5f3a9b9852a8659eeb4bd.png
Binary files differ

diff --git a/sdks/python/apache_beam/runners/interactive/user_pipeline_tracker.py b/sdks/python/apache_beam/runners/interactive/user_pipeline_tracker.py
index 432e3d7..53ee54a 100644
--- a/sdks/python/apache_beam/runners/interactive/user_pipeline_tracker.py
+++ b/sdks/python/apache_beam/runners/interactive/user_pipeline_tracker.py

@@ -24,6 +24,7 @@
 that derived pipelines can link back to the parent user pipeline.
 """
 
+import shutil
 from typing import Iterator
 from typing import Optional
 
@@ -66,6 +67,10 @@
 
   def clear(self) -> None:
     """Clears the tracker of all user and derived pipelines."""
+    # Remove all local_tempdir of created pipelines.
+    for p in self._pid_to_pipelines.values():
+      shutil.rmtree(p.local_tempdir, ignore_errors=True)
+
     self._user_pipelines.clear()
     self._derived_pipelines.clear()
     self._pid_to_pipelines.clear()

diff --git a/sdks/python/apache_beam/runners/interactive/utils.py b/sdks/python/apache_beam/runners/interactive/utils.py
index cb0b7db..2c75cc9 100644
--- a/sdks/python/apache_beam/runners/interactive/utils.py
+++ b/sdks/python/apache_beam/runners/interactive/utils.py

@@ -22,12 +22,16 @@
 import hashlib
 import json
 import logging
+from typing import Dict
 
 import pandas as pd
 
+import apache_beam as beam
 from apache_beam.dataframe.convert import to_pcollection
 from apache_beam.dataframe.frame_base import DeferredBase
 from apache_beam.portability.api.beam_runner_api_pb2 import TestStreamPayload
+from apache_beam.runners.interactive.caching.cacheable import Cacheable
+from apache_beam.runners.interactive.caching.cacheable import CacheKey
 from apache_beam.runners.interactive.caching.expression_cache import ExpressionCache
 from apache_beam.testing.test_stream import WindowedValueHolder
 from apache_beam.typehints.schemas import named_fields_from_element_type
@@ -294,3 +298,96 @@
 
   proxy = df._expr.proxy()
   return to_pcollection(df, yield_elements='pandas', label=str(df._expr)), proxy
+
+
+def pcoll_by_name() -> Dict[str, beam.PCollection]:
+  """Finds all PCollections by their variable names defined in the notebook."""
+  from apache_beam.runners.interactive import interactive_environment as ie
+
+  inspectables = ie.current_env().inspector_with_synthetic.inspectables
+  pcolls = {}
+  for _, inspectable in inspectables.items():
+    metadata = inspectable['metadata']
+    if metadata['type'] == 'pcollection':
+      pcolls[metadata['name']] = inspectable['value']
+  return pcolls
+
+
+def cacheables() -> Dict[CacheKey, Cacheable]:
+  """Finds all Cacheables with their CacheKeys."""
+  from apache_beam.runners.interactive import interactive_environment as ie
+
+  inspectables = ie.current_env().inspector_with_synthetic.inspectables
+  cacheables = {}
+  for _, inspectable in inspectables.items():
+    metadata = inspectable['metadata']
+    if metadata['type'] == 'pcollection':
+      cacheable = Cacheable.from_pcoll(metadata['name'], inspectable['value'])
+      cacheables[cacheable.to_key()] = cacheable
+  return cacheables
+
+
+def watch_sources(pipeline):
+  """Watches the unbounded sources in the pipeline.
+
+  Sources can output to a PCollection without a user variable reference. In
+  this case the source is not cached. We still want to cache the data so we
+  synthetically create a variable to the intermediate PCollection.
+  """
+  from apache_beam.pipeline import PipelineVisitor
+  from apache_beam.runners.interactive import interactive_environment as ie
+
+  retrieved_user_pipeline = ie.current_env().user_pipeline(pipeline)
+  pcoll_to_name = {v: k for k, v in pcoll_by_name().items()}
+
+  class CacheableUnboundedPCollectionVisitor(PipelineVisitor):
+    def __init__(self):
+      self.unbounded_pcolls = set()
+
+    def enter_composite_transform(self, transform_node):
+      self.visit_transform(transform_node)
+
+    def visit_transform(self, transform_node):
+      if isinstance(transform_node.transform,
+                    tuple(ie.current_env().options.recordable_sources)):
+        for pcoll in transform_node.outputs.values():
+          # Only generate a synthetic var when it's not already watched. For
+          # example, the user could have assigned the unbounded source output
+          # to a variable, watching it again with a different variable name
+          # creates ambiguity.
+          if pcoll not in pcoll_to_name:
+            ie.current_env().watch({'synthetic_var_' + str(id(pcoll)): pcoll})
+
+  retrieved_user_pipeline.visit(CacheableUnboundedPCollectionVisitor())
+
+
+def has_unbounded_sources(pipeline):
+  """Checks if a given pipeline has recordable sources."""
+  return len(unbounded_sources(pipeline)) > 0
+
+
+def unbounded_sources(pipeline):
+  """Returns a pipeline's recordable sources."""
+  from apache_beam.pipeline import PipelineVisitor
+  from apache_beam.runners.interactive import interactive_environment as ie
+
+  class CheckUnboundednessVisitor(PipelineVisitor):
+    """Visitor checks if there are any unbounded read sources in the Pipeline.
+
+    Visitor visits all nodes and checks if it is an instance of recordable
+    sources.
+    """
+    def __init__(self):
+      self.unbounded_sources = []
+
+    def enter_composite_transform(self, transform_node):
+      self.visit_transform(transform_node)
+
+    def visit_transform(self, transform_node):
+      if isinstance(transform_node.transform,
+                    tuple(ie.current_env().options.recordable_sources)):
+        self.unbounded_sources.append(transform_node)
+
+  v = CheckUnboundednessVisitor()
+  pipeline.visit(v)
+  return v.unbounded_sources

diff --git a/sdks/python/apache_beam/runners/interactive/utils_test.py b/sdks/python/apache_beam/runners/interactive/utils_test.py
index ecbba30..5929c8e 100644
--- a/sdks/python/apache_beam/runners/interactive/utils_test.py
+++ b/sdks/python/apache_beam/runners/interactive/utils_test.py

@@ -17,6 +17,7 @@
 
 import json
 import logging
+import tempfile
 import unittest
 from typing import NamedTuple
 from unittest.mock import PropertyMock
@@ -30,9 +31,12 @@
 from apache_beam import coders
 from apache_beam.dataframe.convert import to_dataframe
 from apache_beam.portability.api.beam_runner_api_pb2 import TestStreamPayload
+from apache_beam.runners.interactive import interactive_beam as ib
 from apache_beam.runners.interactive import interactive_environment as ie
 from apache_beam.runners.interactive import utils
+from apache_beam.runners.interactive.caching.cacheable import Cacheable
 from apache_beam.runners.interactive.testing.mock_ipython import mock_get_ipython
+from apache_beam.runners.interactive.testing.test_cache_manager import InMemoryCache
 from apache_beam.testing.test_stream import WindowedValueHolder
 from apache_beam.utils.timestamp import Timestamp
 from apache_beam.utils.windowed_value import WindowedValue
@@ -272,5 +276,39 @@
     self.assertEqual(json.loads(dummy()), MessagingUtilTest.SAMPLE_DATA)
 
 
+class GeneralUtilTest(unittest.TestCase):
+  def test_pcoll_by_name(self):
+    p = beam.Pipeline()
+    pcoll = p | beam.Create([1])
+    ib.watch({'p': p, 'pcoll': pcoll})
+
+    name_to_pcoll = utils.pcoll_by_name()
+    self.assertIn('pcoll', name_to_pcoll)
+
+  def test_cacheables(self):
+    p2 = beam.Pipeline()
+    pcoll2 = p2 | beam.Create([2])
+    ib.watch({'p2': p2, 'pcoll2': pcoll2})
+
+    cacheables = utils.cacheables()
+    cacheable_key = Cacheable.from_pcoll('pcoll2', pcoll2).to_key()
+    self.assertIn(cacheable_key, cacheables)
+
+  def test_has_unbounded_source(self):
+    p = beam.Pipeline()
+    ie.current_env().set_cache_manager(InMemoryCache(), p)
+    _ = p | 'ReadUnboundedSource' >> beam.io.ReadFromPubSub(
+        subscription='projects/fake-project/subscriptions/fake_sub')
+    self.assertTrue(utils.has_unbounded_sources(p))
+
+  def test_not_has_unbounded_source(self):
+    p = beam.Pipeline()
+    ie.current_env().set_cache_manager(InMemoryCache(), p)
+    with tempfile.NamedTemporaryFile(delete=False) as f:
+      f.write(b'test')
+    _ = p | 'ReadBoundedSource' >> beam.io.ReadFromText(f.name)
+    self.assertFalse(utils.has_unbounded_sources(p))
+
+
 if __name__ == '__main__':
   unittest.main()

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py
index 4053b9c..c002963 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py

@@ -100,6 +100,7 @@
   def create_pipeline(self, is_drain=False):
     return beam.Pipeline(runner=fn_api_runner.FnApiRunner(is_drain=is_drain))
 
+  @retry(stop=stop_after_attempt(3))
   def test_assert_that(self):
     # TODO: figure out a way for fn_api_runner to parse and raise the
     # underlying exception.
@@ -107,10 +108,12 @@
       with self.create_pipeline() as p:
         assert_that(p | beam.Create(['a', 'b']), equal_to(['a']))
 
+  @retry(stop=stop_after_attempt(3))
   def test_create(self):
     with self.create_pipeline() as p:
       assert_that(p | beam.Create(['a', 'b']), equal_to(['a', 'b']))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo(self):
     with self.create_pipeline() as p:
       res = (
@@ -120,6 +123,7 @@
           | beam.Map(lambda e: e + 'x'))
       assert_that(res, equal_to(['aax', 'bcbcx']))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_side_outputs(self):
     def tee(elem, *tags):
       for tag in tags:
@@ -134,6 +138,7 @@
       assert_that(xy.x, equal_to(['x', 'xy']), label='x')
       assert_that(xy.y, equal_to(['y', 'xy']), label='y')
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_side_and_main_outputs(self):
     def even_odd(elem):
       yield elem
@@ -153,6 +158,7 @@
       assert_that(unnamed.even, equal_to([2]), label='unnamed.even')
       assert_that(unnamed.odd, equal_to([1, 3]), label='unnamed.odd')
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_side_inputs(self):
     def cross_product(elem, sides):
       for side in sides:
@@ -166,6 +172,7 @@
           equal_to([('a', 'x'), ('b', 'x'), ('c', 'x'), ('a', 'y'), ('b', 'y'),
                     ('c', 'y')]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_windowed_side_inputs(self):
     with self.create_pipeline() as p:
       # Now with some windowing.
@@ -194,6 +201,7 @@
           ]),
           label='windowed')
 
+  @retry(stop=stop_after_attempt(3))
   def test_flattened_side_input(self, with_transcoding=True):
     with self.create_pipeline() as p:
       main = p | 'main' >> beam.Create([None])
@@ -216,6 +224,7 @@
                   equal_to([('a', 1), ('b', 2)] + third_element),
                   label='CheckFlattenOfSideInput')
 
+  @retry(stop=stop_after_attempt(3))
   def test_gbk_side_input(self):
     with self.create_pipeline() as p:
       main = p | 'main' >> beam.Create([None])
@@ -226,6 +235,7 @@
               'a': [1]
           })]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_multimap_side_input(self):
     with self.create_pipeline() as p:
       main = p | 'main' >> beam.Create(['a', 'b'])
@@ -235,6 +245,7 @@
               lambda k, d: (k, sorted(d[k])), beam.pvalue.AsMultiMap(side)),
           equal_to([('a', [1, 3]), ('b', [2])]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_multimap_multiside_input(self):
     # A test where two transforms in the same stage consume the same PCollection
     # twice as side input.
@@ -256,6 +267,7 @@
               beam.pvalue.AsList(side)),
           equal_to([('a', [1, 3], [1, 2, 3]), ('b', [2], [1, 2, 3])]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_multimap_side_input_type_coercion(self):
     with self.create_pipeline() as p:
       main = p | 'main' >> beam.Create(['a', 'b'])
@@ -270,6 +282,7 @@
               lambda k, d: (k, sorted(d[k])), beam.pvalue.AsMultiMap(side)),
           equal_to([('a', [1, 3]), ('b', [2])]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_unfusable_side_inputs(self):
     def cross_product(elem, sides):
       for side in sides:
@@ -291,6 +304,7 @@
           pcoll | beam.FlatMap(cross_product, beam.pvalue.AsList(derived)),
           equal_to([('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b')]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_state_only(self):
     index_state_spec = userstate.CombiningValueStateSpec('index', sum)
     value_and_index_state_spec = userstate.ReadModifyWriteStateSpec(
@@ -318,6 +332,7 @@
           p | beam.Create(inputs) | beam.ParDo(AddIndex()), equal_to(expected))
 
   @unittest.skip('TestStream not yet supported')
+  @retry(stop=stop_after_attempt(3))
   def test_teststream_pardo_timers(self):
     timer_spec = userstate.TimerSpec('timer', userstate.TimeDomain.WATERMARK)
 
@@ -347,6 +362,7 @@
       #expected = [('fired', ts) for ts in (20, 200)]
       #assert_that(actual, equal_to(expected))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_timers(self):
     timer_spec = userstate.TimerSpec('timer', userstate.TimeDomain.WATERMARK)
     state_spec = userstate.CombiningValueStateSpec('num_called', sum)
@@ -378,6 +394,7 @@
       expected = [('fired', ts) for ts in (20, 200, 40, 400)]
       assert_that(actual, equal_to(expected))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_timers_clear(self):
     timer_spec = userstate.TimerSpec('timer', userstate.TimeDomain.WATERMARK)
     clear_timer_spec = userstate.TimerSpec(
@@ -413,12 +430,15 @@
       expected = [('fired', ts) for ts in (20, 200)]
       assert_that(actual, equal_to(expected))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_state_timers(self):
     self._run_pardo_state_timers(windowed=False)
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_state_timers_non_standard_coder(self):
     self._run_pardo_state_timers(windowed=False, key_type=Any)
 
+  @retry(stop=stop_after_attempt(3))
   def test_windowed_pardo_state_timers(self):
     self._run_pardo_state_timers(windowed=True)
 
@@ -487,6 +507,7 @@
 
       assert_that(actual, is_buffered_correctly)
 
+  @retry(stop=stop_after_attempt(3))
   def test_pardo_dynamic_timer(self):
     class DynamicTimerDoFn(beam.DoFn):
       dynamic_timer_spec = userstate.TimerSpec(
@@ -511,6 +532,7 @@
           | beam.ParDo(DynamicTimerDoFn()))
       assert_that(actual, equal_to([('key1', 10), ('key2', 20), ('key3', 30)]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf(self):
     class ExpandingStringsDoFn(beam.DoFn):
       def process(
@@ -529,6 +551,7 @@
       actual = (p | beam.Create(data) | beam.ParDo(ExpandingStringsDoFn()))
       assert_that(actual, equal_to(list(''.join(data))))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_with_dofn_as_restriction_provider(self):
     class ExpandingStringsDoFn(beam.DoFn, ExpandStringsProvider):
       def process(
@@ -544,6 +567,7 @@
       actual = (p | beam.Create(data) | beam.ParDo(ExpandingStringsDoFn()))
       assert_that(actual, equal_to(list(''.join(data))))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_with_check_done_failed(self):
     class ExpandingStringsDoFn(beam.DoFn):
       def process(
@@ -563,6 +587,7 @@
         data = ['abc', 'defghijklmno', 'pqrstuv', 'wxyz']
         _ = (p | beam.Create(data) | beam.ParDo(ExpandingStringsDoFn()))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_with_watermark_tracking(self):
     class ExpandingStringsDoFn(beam.DoFn):
       def process(
@@ -589,6 +614,7 @@
       actual = (p | beam.Create(data) | beam.ParDo(ExpandingStringsDoFn()))
       assert_that(actual, equal_to(list(''.join(data))))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_with_dofn_as_watermark_estimator(self):
     class ExpandingStringsDoFn(beam.DoFn, beam.WatermarkEstimatorProvider):
       def initial_estimator_state(self, element, restriction):
@@ -652,12 +678,15 @@
       self.assertEqual(1, len(counters))
       self.assertEqual(counters[0].committed, len(''.join(data)))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_with_sdf_initiated_checkpointing(self):
     self.run_sdf_initiated_checkpointing(is_drain=False)
 
+  @retry(stop=stop_after_attempt(3))
   def test_draining_sdf_with_sdf_initiated_checkpointing(self):
     self.run_sdf_initiated_checkpointing(is_drain=True)
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_default_truncate_when_bounded(self):
     class SimleSDF(beam.DoFn):
       def process(
@@ -675,6 +704,7 @@
       actual = (p | beam.Create([10]) | beam.ParDo(SimleSDF()))
       assert_that(actual, equal_to(range(10)))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_default_truncate_when_unbounded(self):
     class SimleSDF(beam.DoFn):
       def process(
@@ -692,6 +722,7 @@
       actual = (p | beam.Create([10]) | beam.ParDo(SimleSDF()))
       assert_that(actual, equal_to([]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_with_truncate(self):
     class SimleSDF(beam.DoFn):
       def process(
@@ -709,6 +740,7 @@
       actual = (p | beam.Create([10]) | beam.ParDo(SimleSDF()))
       assert_that(actual, equal_to(range(5)))
 
+  @retry(stop=stop_after_attempt(3))
   def test_group_by_key(self):
     with self.create_pipeline() as p:
       res = (
@@ -719,11 +751,13 @@
       assert_that(res, equal_to([('a', [1, 2]), ('b', [3])]))
 
   # Runners may special case the Reshuffle transform urn.
+  @retry(stop=stop_after_attempt(3))
   def test_reshuffle(self):
     with self.create_pipeline() as p:
       assert_that(
           p | beam.Create([1, 2, 3]) | beam.Reshuffle(), equal_to([1, 2, 3]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_flatten(self, with_transcoding=True):
     with self.create_pipeline() as p:
       if with_transcoding:
@@ -737,11 +771,13 @@
           p | 'd' >> beam.Create(additional)) | beam.Flatten()
       assert_that(res, equal_to(['a', 'b', 'c'] + additional))
 
+  @retry(stop=stop_after_attempt(3))
   def test_flatten_same_pcollections(self, with_transcoding=True):
     with self.create_pipeline() as p:
       pc = p | beam.Create(['a', 'b'])
       assert_that((pc, pc, pc) | beam.Flatten(), equal_to(['a', 'b'] * 3))
 
+  @retry(stop=stop_after_attempt(3))
   def test_combine_per_key(self):
     with self.create_pipeline() as p:
       res = (
@@ -750,6 +786,7 @@
           | beam.CombinePerKey(beam.combiners.MeanCombineFn()))
       assert_that(res, equal_to([('a', 1.5), ('b', 3.0)]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_read(self):
     # Can't use NamedTemporaryFile as a context
     # due to https://bugs.python.org/issue14243
@@ -763,6 +800,7 @@
     finally:
       os.unlink(temp_file.name)
 
+  @retry(stop=stop_after_attempt(3))
   def test_windowing(self):
     with self.create_pipeline() as p:
       res = (
@@ -774,6 +812,7 @@
           | beam.Map(lambda k_vs1: (k_vs1[0], sorted(k_vs1[1]))))
       assert_that(res, equal_to([('k', [1, 2]), ('k', [100, 101, 102])]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_custom_merging_window(self):
     with self.create_pipeline() as p:
       res = (
@@ -790,6 +829,7 @@
     self.assertEqual(GenericMergingWindowFn._HANDLES, {})
 
   @unittest.skip('BEAM-9119: test is flaky')
+  @retry(stop=stop_after_attempt(3))
   def test_large_elements(self):
     with self.create_pipeline() as p:
       big = (
@@ -812,6 +852,7 @@
       gbk_res = (big | beam.GroupByKey() | beam.Map(lambda x: x[0]))
       assert_that(gbk_res, equal_to(['a', 'b']), label='gbk')
 
+  @retry(stop=stop_after_attempt(3))
   def test_error_message_includes_stage(self):
     with self.assertRaises(BaseException) as e_cm:
       with self.create_pipeline() as p:
@@ -831,6 +872,7 @@
     self.assertIn('StageC', message)
     self.assertNotIn('StageB', message)
 
+  @retry(stop=stop_after_attempt(3))
   def test_error_traceback_includes_user_code(self):
     def first(x):
       return second(x)
@@ -853,6 +895,7 @@
     self.assertIn('second', message)
     self.assertIn('third', message)
 
+  @retry(stop=stop_after_attempt(3))
   def test_no_subtransform_composite(self):
     class First(beam.PTransform):
       def expand(self, pcolls):
@@ -863,6 +906,7 @@
       pcoll_b = p | 'b' >> beam.Create(['b'])
       assert_that((pcoll_a, pcoll_b) | First(), equal_to(['a']))
 
+  @retry(stop=stop_after_attempt(3))
   def test_metrics(self, check_gauge=True):
     p = self.create_pipeline()
 
@@ -895,6 +939,7 @@
                                   .with_name('gauge'))['gauges']
       self.assertEqual(gaug.committed.value, 3)
 
+  @retry(stop=stop_after_attempt(3))
   def test_callbacks_with_exception(self):
     elements_list = ['1', '2']
 
@@ -914,6 +959,7 @@
           | beam.ParDo(FinalizebleDoFnWithException()))
       assert_that(res, equal_to(['1', '2']))
 
+  @retry(stop=stop_after_attempt(3))
   def test_register_finalizations(self):
     event_recorder = EventRecorder(tempfile.gettempdir())
 
@@ -951,6 +997,7 @@
 
     event_recorder.cleanup()
 
+  @retry(stop=stop_after_attempt(3))
   def test_sdf_synthetic_source(self):
     common_attrs = {
         'key_size': 1,
@@ -981,6 +1028,7 @@
           | beam.combiners.Count.Globally())
       assert_that(res, equal_to([total_num_records]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_create_value_provider_pipeline_option(self):
     # Verify that the runner can execute a pipeline when there are value
     # provider pipeline options
@@ -996,6 +1044,7 @@
     with self.create_pipeline() as p:
       assert_that(p | beam.Create(['a', 'b']), equal_to(['a', 'b']))
 
+  @retry(stop=stop_after_attempt(3))
   def _test_pack_combiners(self, assert_using_counter_names):
     counter = beam.metrics.Metrics.counter('ns', 'num_values')
 
@@ -1042,6 +1091,7 @@
         self.assertTrue(
             any([re.match(packed_step_name_regex, s) for s in step_names]))
 
+  @retry(stop=stop_after_attempt(3))
   def test_pack_combiners(self):
     self._test_pack_combiners(assert_using_counter_names=True)
 
@@ -1126,6 +1176,7 @@
   def create_pipeline(self):
     return beam.Pipeline(runner=fn_api_runner.FnApiRunner())
 
+  @retry(stop=stop_after_attempt(3))
   def test_element_count_metrics(self):
     class GenerateTwoOutputs(beam.DoFn):
       def process(self, element):
@@ -1310,6 +1361,7 @@
       print(res._monitoring_infos_by_stage)
       raise
 
+  @retry(stop=stop_after_attempt(3))
   def test_non_user_metrics(self):
     p = self.create_pipeline()
 

diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py
index e89a669..d395cee 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane.py

@@ -24,10 +24,8 @@
 import collections
 import logging
 import queue
-import sys
 import threading
 import time
-from types import TracebackType
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
@@ -41,7 +39,6 @@
 from typing import Optional
 from typing import Set
 from typing import Tuple
-from typing import Type
 from typing import Union
 
 import grpc
@@ -60,9 +57,6 @@
 else:
   OutputStream = type(coder_impl.create_OutputStream())
 
-ExcInfo = Tuple[Type[BaseException], BaseException, TracebackType]
-OptExcInfo = Union[ExcInfo, Tuple[None, None, None]]
-
 # This module is experimental. No backwards-compatibility guarantees.
 
 _LOGGER = logging.getLogger(__name__)
@@ -429,7 +423,7 @@
     self._receive_lock = threading.Lock()
     self._reads_finished = threading.Event()
     self._closed = False
-    self._exc_info = (None, None, None)  # type: OptExcInfo
+    self._exception = None  # type: Optional[Exception]
 
   def close(self):
     # type: () -> None
@@ -497,9 +491,8 @@
             raise RuntimeError('Channel closed prematurely.')
           if abort_callback():
             return
-          t, v, tb = self._exc_info
-          if t:
-            raise t(v).with_traceback(tb)
+          if self._exception:
+            raise self._exception from None
         else:
           if isinstance(element, beam_fn_api_pb2.Elements.Timers):
             if element.is_last:
@@ -644,10 +637,10 @@
           _put_queue(timer.instruction_id, timer)
         for data in elements.data:
           _put_queue(data.instruction_id, data)
-    except:  # pylint: disable=bare-except
+    except Exception as e:
       if not self._closed:
         _LOGGER.exception('Failed to read inputs in the data plane.')
-        self._exc_info = sys.exc_info()
+        self._exception = e
         raise
     finally:
       self._closed = True

diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index 4ad0727..6fdca4d 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py

@@ -31,7 +31,6 @@
 import time
 import traceback
 from concurrent import futures
-from types import TracebackType
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
@@ -45,7 +44,6 @@
 from typing import MutableMapping
 from typing import Optional
 from typing import Tuple
-from typing import Type
 from typing import TypeVar
 from typing import Union
 
@@ -73,9 +71,6 @@
   from apache_beam.portability.api import endpoints_pb2
   from apache_beam.utils.profiler import Profile
 
-ExcInfo = Tuple[Type[BaseException], BaseException, TracebackType]
-OptExcInfo = Union[ExcInfo, Tuple[None, None, None]]
-
 T = TypeVar('T')
 _KT = TypeVar('_KT')
 _VT = TypeVar('_VT')
@@ -1002,7 +997,7 @@
     )  # type: queue.Queue[Union[beam_fn_api_pb2.StateRequest, Sentinel]]
     self._responses_by_id = {}  # type: Dict[str, _Future]
     self._last_id = 0
-    self._exc_info = None  # type: Optional[OptExcInfo]
+    self._exception = None  # type: Optional[Exception]
     self._context = threading.local()
     self.start()
 
@@ -1041,8 +1036,8 @@
           future.set(response)
           if self._done:
             break
-      except:  # pylint: disable=bare-except
-        self._exc_info = sys.exc_info()
+      except Exception as e:
+        self._exception = e
         raise
 
     reader = threading.Thread(target=pull_responses, name='read_state')
@@ -1099,10 +1094,8 @@
     # type: (beam_fn_api_pb2.StateRequest) -> beam_fn_api_pb2.StateResponse
     req_future = self._request(request)
     while not req_future.wait(timeout=1):
-      if self._exc_info:
-        t, v, tb = self._exc_info
-        if t and v and tb:
-          raise t(v).with_traceback(tb)
+      if self._exception:
+        raise self._exception
       elif self._done:
         raise RuntimeError()
     response = req_future.get()

diff --git a/sdks/python/apache_beam/testing/util.py b/sdks/python/apache_beam/testing/util.py
index 5951581..c42f90e 100644
--- a/sdks/python/apache_beam/testing/util.py
+++ b/sdks/python/apache_beam/testing/util.py

@@ -23,6 +23,7 @@
 import glob
 import io
 import tempfile
+from typing import Iterable
 
 from apache_beam import pvalue
 from apache_beam.transforms import window
@@ -287,19 +288,19 @@
       if use_global_window:
         pcoll = pcoll | WindowInto(window.GlobalWindows())
 
-      keyed_actual = pcoll | "ToVoidKey" >> Map(lambda v: (None, v))
+      keyed_actual = pcoll | 'ToVoidKey' >> Map(lambda v: (None, v))
       keyed_actual.is_bounded = True
 
       # This is a CoGroupByKey so that the matcher always runs, even if the
       # PCollection is empty.
       plain_actual = ((keyed_singleton, keyed_actual)
-                      | "Group" >> CoGroupByKey()
-                      | "Unkey" >> Map(lambda k_values: k_values[1][1]))
+                      | 'Group' >> CoGroupByKey()
+                      | 'Unkey' >> Map(lambda k_values: k_values[1][1]))
 
       if not use_global_window:
-        plain_actual = plain_actual | "AddWindow" >> ParDo(AddWindow())
+        plain_actual = plain_actual | 'AddWindow' >> ParDo(AddWindow())
 
-      plain_actual = plain_actual | "Match" >> Map(matcher)
+      plain_actual = plain_actual | 'Match' >> Map(matcher)
 
     def default_label(self):
       return label
@@ -329,3 +330,20 @@
         out_file.write(in_file.read())
     concatenated_file_name = out_file.name
   return io.open(concatenated_file_name, mode, encoding=encoding)
+
+
+def _sort_lists(result):
+  if isinstance(result, list):
+    return sorted(result)
+  elif isinstance(result, tuple):
+    return tuple(_sort_lists(e) for e in result)
+  elif isinstance(result, dict):
+    return {k: _sort_lists(v) for k, v in result.items()}
+  elif isinstance(result, Iterable) and not isinstance(result, str):
+    return sorted(result)
+  else:
+    return result
+
+
+# A utility transform that recursively sorts lists for easier testing.
+SortLists = Map(_sort_lists)

diff --git a/sdks/python/apache_beam/transforms/ptransform_test.py b/sdks/python/apache_beam/transforms/ptransform_test.py
index 9ce33f5..ccf0a55 100644
--- a/sdks/python/apache_beam/transforms/ptransform_test.py
+++ b/sdks/python/apache_beam/transforms/ptransform_test.py

@@ -27,7 +27,6 @@
 import typing
 import unittest
 from functools import reduce
-from typing import Iterable
 from typing import Optional
 from unittest.mock import patch
 
@@ -46,6 +45,7 @@
 from apache_beam.portability import common_urns
 from apache_beam.testing.test_pipeline import TestPipeline
 from apache_beam.testing.test_stream import TestStream
+from apache_beam.testing.util import SortLists
 from apache_beam.testing.util import assert_that
 from apache_beam.testing.util import equal_to
 from apache_beam.testing.util import is_empty
@@ -470,7 +470,7 @@
     with TestPipeline() as pipeline:
       pcoll = pipeline | 'start' >> beam.Create([(1, 1), (2, 1), (3, 1), (1, 2),
                                                  (2, 2), (1, 3)])
-      result = pcoll | 'Group' >> beam.GroupByKey() | _SortLists
+      result = pcoll | 'Group' >> beam.GroupByKey() | SortLists
       assert_that(result, equal_to([(1, [1, 2, 3]), (2, [1, 2]), (3, [1])]))
 
   def test_group_by_key_unbounded_global_default_trigger(self):
@@ -675,7 +675,7 @@
       created = pipeline | 'A' >> beam.Create(contents)
       partitioned = created | 'B' >> beam.Partition(lambda x, n: len(x) % n, 3)
       flattened = partitioned | 'C' >> beam.Flatten()
-      grouped = flattened | 'D' >> beam.GroupByKey() | _SortLists
+      grouped = flattened | 'D' >> beam.GroupByKey() | SortLists
       assert_that(grouped, equal_to([('aa', [1, 2]), ('bb', [2])]))
 
   @pytest.mark.it_validatesrunner
@@ -755,75 +755,6 @@
       assert_that(even_length, equal_to(['AA', 'CC']), label='assert:even')
       assert_that(odd_length, equal_to(['BBB']), label='assert:odd')
 
-  def test_co_group_by_key_on_list(self):
-    with TestPipeline() as pipeline:
-      pcoll_1 = pipeline | 'Start 1' >> beam.Create([('a', 1), ('a', 2),
-                                                     ('b', 3), ('c', 4)])
-      pcoll_2 = pipeline | 'Start 2' >> beam.Create([('a', 5), ('a', 6),
-                                                     ('c', 7), ('c', 8)])
-      result = (pcoll_1, pcoll_2) | beam.CoGroupByKey() | _SortLists
-      assert_that(
-          result,
-          equal_to([('a', ([1, 2], [5, 6])), ('b', ([3], [])),
-                    ('c', ([4], [7, 8]))]))
-
-  def test_co_group_by_key_on_iterable(self):
-    with TestPipeline() as pipeline:
-      pcoll_1 = pipeline | 'Start 1' >> beam.Create([('a', 1), ('a', 2),
-                                                     ('b', 3), ('c', 4)])
-      pcoll_2 = pipeline | 'Start 2' >> beam.Create([('a', 5), ('a', 6),
-                                                     ('c', 7), ('c', 8)])
-      result = iter([pcoll_1, pcoll_2]) | beam.CoGroupByKey()
-      result |= _SortLists
-      assert_that(
-          result,
-          equal_to([('a', ([1, 2], [5, 6])), ('b', ([3], [])),
-                    ('c', ([4], [7, 8]))]))
-
-  def test_co_group_by_key_on_dict(self):
-    with TestPipeline() as pipeline:
-      pcoll_1 = pipeline | 'Start 1' >> beam.Create([('a', 1), ('a', 2),
-                                                     ('b', 3), ('c', 4)])
-      pcoll_2 = pipeline | 'Start 2' >> beam.Create([('a', 5), ('a', 6),
-                                                     ('c', 7), ('c', 8)])
-      result = {'X': pcoll_1, 'Y': pcoll_2} | beam.CoGroupByKey()
-      result |= _SortLists
-      assert_that(
-          result,
-          equal_to([('a', {
-              'X': [1, 2], 'Y': [5, 6]
-          }), ('b', {
-              'X': [3], 'Y': []
-          }), ('c', {
-              'X': [4], 'Y': [7, 8]
-          })]))
-
-  def test_co_group_by_key_on_dict_with_tuple_keys(self):
-    with TestPipeline() as pipeline:
-      key = ('a', ('b', 'c'))
-      pcoll_1 = pipeline | 'Start 1' >> beam.Create([(key, 1)])
-      pcoll_2 = pipeline | 'Start 2' >> beam.Create([(key, 2)])
-      result = {'X': pcoll_1, 'Y': pcoll_2} | beam.CoGroupByKey()
-      result |= _SortLists
-      assert_that(result, equal_to([(key, {'X': [1], 'Y': [2]})]))
-
-  def test_co_group_by_key_on_empty(self):
-    with TestPipeline() as pipeline:
-      assert_that(
-          tuple() | 'EmptyTuple' >> beam.CoGroupByKey(pipeline=pipeline),
-          equal_to([]),
-          label='AssertEmptyTuple')
-      assert_that([] | 'EmptyList' >> beam.CoGroupByKey(pipeline=pipeline),
-                  equal_to([]),
-                  label='AssertEmptyList')
-      assert_that(
-          iter([]) | 'EmptyIterable' >> beam.CoGroupByKey(pipeline=pipeline),
-          equal_to([]),
-          label='AssertEmptyIterable')
-      assert_that({} | 'EmptyDict' >> beam.CoGroupByKey(pipeline=pipeline),
-                  equal_to([]),
-                  label='AssertEmptyDict')
-
   def test_group_by_key_input_must_be_kv_pairs(self):
     with self.assertRaises(typehints.TypeCheckError) as e:
       with TestPipeline() as pipeline:
@@ -886,7 +817,7 @@
     self.assertCountEqual([1, 2, 100, 3], ([1, 2, 3], [100]) | beam.Flatten())
     join_input = ([('k', 'a')], [('k', 'b'), ('k', 'c')])
     self.assertCountEqual([('k', (['a'], ['b', 'c']))],
-                          join_input | beam.CoGroupByKey() | _SortLists)
+                          join_input | beam.CoGroupByKey() | SortLists)
 
   def test_multi_input_ptransform(self):
     class DisjointUnion(PTransform):
@@ -1678,7 +1609,7 @@
                               ]).with_output_types(str)
         | 'GenKeys' >> beam.Map(group_with_upper_ord)
         | 'O' >> beam.GroupByKey()
-        | _SortLists)
+        | SortLists)
 
     assert_that(
         result,
@@ -1731,7 +1662,7 @@
         | 'Nums' >> beam.Create(range(5)).with_output_types(int)
         | 'IsEven' >> beam.Map(is_even_as_key)
         | 'Parity' >> beam.GroupByKey()
-        | _SortLists)
+        | SortLists)
 
     assert_that(result, equal_to([(False, [1, 3]), (True, [0, 2, 4])]))
     self.p.run()
@@ -2626,22 +2557,6 @@
       _ = (p | beam.Create([1, 2]) | MyTransform('test').with_output_types(int))
 
 
-def _sort_lists(result):
-  if isinstance(result, list):
-    return sorted(result)
-  elif isinstance(result, tuple):
-    return tuple(_sort_lists(e) for e in result)
-  elif isinstance(result, dict):
-    return {k: _sort_lists(v) for k, v in result.items()}
-  elif isinstance(result, Iterable) and not isinstance(result, str):
-    return sorted(result)
-  else:
-    return result
-
-
-_SortLists = beam.Map(_sort_lists)
-
-
 class PickledObject(object):
   def __init__(self, value):
     self.value = value

diff --git a/sdks/python/apache_beam/transforms/util_test.py b/sdks/python/apache_beam/transforms/util_test.py
index 94cc8f3..6726df4 100644
--- a/sdks/python/apache_beam/transforms/util_test.py
+++ b/sdks/python/apache_beam/transforms/util_test.py

@@ -43,6 +43,7 @@
 from apache_beam.runners import pipeline_context
 from apache_beam.testing.test_pipeline import TestPipeline
 from apache_beam.testing.test_stream import TestStream
+from apache_beam.testing.util import SortLists
 from apache_beam.testing.util import TestWindowedValue
 from apache_beam.testing.util import assert_that
 from apache_beam.testing.util import contains_in_any_order
@@ -72,6 +73,106 @@
     'ignore', category=FutureWarning, module='apache_beam.transform.util_test')
 
 
+class CoGroupByKeyTest(unittest.TestCase):
+  def test_co_group_by_key_on_tuple(self):
+    with TestPipeline() as pipeline:
+      pcoll_1 = pipeline | 'Start 1' >> beam.Create([('a', 1), ('a', 2),
+                                                     ('b', 3), ('c', 4)])
+      pcoll_2 = pipeline | 'Start 2' >> beam.Create([('a', 5), ('a', 6),
+                                                     ('c', 7), ('c', 8)])
+      result = (pcoll_1, pcoll_2) | beam.CoGroupByKey() | SortLists
+      assert_that(
+          result,
+          equal_to([('a', ([1, 2], [5, 6])), ('b', ([3], [])),
+                    ('c', ([4], [7, 8]))]))
+
+  def test_co_group_by_key_on_iterable(self):
+    with TestPipeline() as pipeline:
+      pcoll_1 = pipeline | 'Start 1' >> beam.Create([('a', 1), ('a', 2),
+                                                     ('b', 3), ('c', 4)])
+      pcoll_2 = pipeline | 'Start 2' >> beam.Create([('a', 5), ('a', 6),
+                                                     ('c', 7), ('c', 8)])
+      result = iter([pcoll_1, pcoll_2]) | beam.CoGroupByKey() | SortLists
+      assert_that(
+          result,
+          equal_to([('a', ([1, 2], [5, 6])), ('b', ([3], [])),
+                    ('c', ([4], [7, 8]))]))
+
+  def test_co_group_by_key_on_list(self):
+    with TestPipeline() as pipeline:
+      pcoll_1 = pipeline | 'Start 1' >> beam.Create([('a', 1), ('a', 2),
+                                                     ('b', 3), ('c', 4)])
+      pcoll_2 = pipeline | 'Start 2' >> beam.Create([('a', 5), ('a', 6),
+                                                     ('c', 7), ('c', 8)])
+      result = [pcoll_1, pcoll_2] | beam.CoGroupByKey() | SortLists
+      assert_that(
+          result,
+          equal_to([('a', ([1, 2], [5, 6])), ('b', ([3], [])),
+                    ('c', ([4], [7, 8]))]))
+
+  def test_co_group_by_key_on_dict(self):
+    with TestPipeline() as pipeline:
+      pcoll_1 = pipeline | 'Start 1' >> beam.Create([('a', 1), ('a', 2),
+                                                     ('b', 3), ('c', 4)])
+      pcoll_2 = pipeline | 'Start 2' >> beam.Create([('a', 5), ('a', 6),
+                                                     ('c', 7), ('c', 8)])
+      result = {'X': pcoll_1, 'Y': pcoll_2} | beam.CoGroupByKey() | SortLists
+      assert_that(
+          result,
+          equal_to([('a', {
+              'X': [1, 2], 'Y': [5, 6]
+          }), ('b', {
+              'X': [3], 'Y': []
+          }), ('c', {
+              'X': [4], 'Y': [7, 8]
+          })]))
+
+  def test_co_group_by_key_on_dict_with_tuple_keys(self):
+    with TestPipeline() as pipeline:
+      key = ('a', ('b', 'c'))
+      pcoll_1 = pipeline | 'Start 1' >> beam.Create([(key, 1)])
+      pcoll_2 = pipeline | 'Start 2' >> beam.Create([(key, 2)])
+      result = {'X': pcoll_1, 'Y': pcoll_2} | beam.CoGroupByKey() | SortLists
+      assert_that(result, equal_to([(key, {'X': [1], 'Y': [2]})]))
+
+  def test_co_group_by_key_on_empty(self):
+    with TestPipeline() as pipeline:
+      assert_that(
+          tuple() | 'EmptyTuple' >> beam.CoGroupByKey(pipeline=pipeline),
+          equal_to([]),
+          label='AssertEmptyTuple')
+      assert_that([] | 'EmptyList' >> beam.CoGroupByKey(pipeline=pipeline),
+                  equal_to([]),
+                  label='AssertEmptyList')
+      assert_that(
+          iter([]) | 'EmptyIterable' >> beam.CoGroupByKey(pipeline=pipeline),
+          equal_to([]),
+          label='AssertEmptyIterable')
+      assert_that({} | 'EmptyDict' >> beam.CoGroupByKey(pipeline=pipeline),
+                  equal_to([]),
+                  label='AssertEmptyDict')
+
+  def test_co_group_by_key_on_one(self):
+    with TestPipeline() as pipeline:
+      pcoll = pipeline | beam.Create([('a', 1), ('b', 2)])
+      expected = [('a', ([1], )), ('b', ([2], ))]
+      assert_that((pcoll, ) | 'OneTuple' >> beam.CoGroupByKey(),
+                  equal_to(expected),
+                  label='AssertOneTuple')
+      assert_that([pcoll] | 'OneList' >> beam.CoGroupByKey(),
+                  equal_to(expected),
+                  label='AssertOneList')
+      assert_that(
+          iter([pcoll]) | 'OneIterable' >> beam.CoGroupByKey(),
+          equal_to(expected),
+          label='AssertOneIterable')
+      assert_that({'tag': pcoll}
+                  | 'OneDict' >> beam.CoGroupByKey()
+                  | beam.MapTuple(lambda k, v: (k, (v['tag'], ))),
+                  equal_to(expected),
+                  label='AssertOneDict')
+
+
 class FakeClock(object):
   def __init__(self, now=time.time()):
     self._now = now

diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility.py b/sdks/python/apache_beam/typehints/native_type_compatibility.py
index 3d69cd6..5d5ae96 100644
--- a/sdks/python/apache_beam/typehints/native_type_compatibility.py
+++ b/sdks/python/apache_beam/typehints/native_type_compatibility.py

@@ -107,7 +107,7 @@
 def match_is_named_tuple(user_type):
   return (
       _safe_issubclass(user_type, typing.Tuple) and
-      hasattr(user_type, '_field_types'))
+      hasattr(user_type, '__annotations__'))
 
 
 def _match_is_optional(user_type):

diff --git a/sdks/python/apache_beam/typehints/schemas.py b/sdks/python/apache_beam/typehints/schemas.py
index 3ef035e..d62ba54 100644
--- a/sdks/python/apache_beam/typehints/schemas.py
+++ b/sdks/python/apache_beam/typehints/schemas.py

@@ -157,7 +157,7 @@
     if schema is None:
       fields = [
           schema_pb2.Field(
-              name=name, type=typing_to_runner_api(type_._field_types[name]))
+              name=name, type=typing_to_runner_api(type_.__annotations__[name]))
           for name in type_._fields
       ]
       type_id = str(uuid4())

diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index a71e1da..514f4e7 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py

@@ -219,7 +219,7 @@
     # headless chrome based integration tests
     'selenium>=3.141.0,<4',
     'needle>=0.5.0,<1',
-    'chromedriver-binary>=91,<92',
+    'chromedriver-binary>=93,<94',
     # use a fixed major version of PIL for different python versions
     'pillow>=7.1.1,<8',
 ]

diff --git a/website/www/site/content/en/blog/beam-2.32.0.md b/website/www/site/content/en/blog/beam-2.32.0.md
index d0d3b12..39c6f89 100644
--- a/website/www/site/content/en/blog/beam-2.32.0.md
+++ b/website/www/site/content/en/blog/beam-2.32.0.md

@@ -88,8 +88,10 @@
 
 ## Deprecations
 
+* Python GBK will stop supporting unbounded PCollections that have global windowing and a default trigger in Beam 2.33. This can be overriden with `--allow_unsafe_triggers`. ([BEAM-9487](https://issues.apache.org/jira/browse/BEAM-9487)).
+* Python GBK will start requiring safe triggers or the `--allow_unsafe_triggers` flag starting with Beam 2.33. ([BEAM-9487](https://issues.apache.org/jira/browse/BEAM-9487)).
 
-## Known Issues
+## Bugfixes
 
 * Fixed race condition in RabbitMqIO causing duplicate acks (Java) ([BEAM-6516](https://issues.apache.org/jira/browse/BEAM-6516)))
 

diff --git a/website/www/site/content/en/documentation/dsls/dataframes/differences-from-pandas.md b/website/www/site/content/en/documentation/dsls/dataframes/differences-from-pandas.md
index da0054e..edcdddc 100644
--- a/website/www/site/content/en/documentation/dsls/dataframes/differences-from-pandas.md
+++ b/website/www/site/content/en/documentation/dsls/dataframes/differences-from-pandas.md

@@ -85,8 +85,8 @@
 
 ## Using Interactive Beam to access the full pandas API
 
-Interactive Beam is a module designed for use in interactive notebooks. The module, which by convention is imported as `ib`, provides an `ib.collect` function that brings a `PCollection` or deferred DataFrrame into local memory as a pandas DataFrame. After using `ib.collect` to materialize a deferred DataFrame you will be able to perform any operation in the pandas API, not just those that are supported in Beam.
+Interactive Beam is a module designed for use in interactive notebooks. The module, which by convention is imported as `ib`, provides an `ib.collect` function that brings a `PCollection` or deferred DataFrame into local memory as a pandas DataFrame. After using `ib.collect` to materialize a deferred DataFrame you will be able to perform any operation in the pandas API, not just those that are supported in Beam.
 
-<!-- TODO: Add code sample: https://issues.apache.org/jira/browse/BEAM-12535 -->
+{{< button-colab url="https://colab.research.google.com/github/apache/beam/blob/master/examples/notebooks/tour-of-beam/dataframes.ipynb" >}}
 
 To get started with Beam in a notebook, see [Try Apache Beam](https://beam.apache.org/get-started/try-apache-beam/).

diff --git a/website/www/site/content/en/documentation/dsls/dataframes/overview.md b/website/www/site/content/en/documentation/dsls/dataframes/overview.md
index e08c61b..8d2c0a8 100644
--- a/website/www/site/content/en/documentation/dsls/dataframes/overview.md
+++ b/website/www/site/content/en/documentation/dsls/dataframes/overview.md

@@ -18,6 +18,8 @@
 
 # Beam DataFrames overview
 
+{{< button-colab url="https://colab.research.google.com/github/apache/beam/blob/master/examples/notebooks/tour-of-beam/dataframes.ipynb" >}}
+
 The Apache Beam Python SDK provides a DataFrame API for working with pandas-like [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) objects. The feature lets you convert a PCollection to a DataFrame and then interact with the DataFrame using the standard methods available on the pandas DataFrame API. The DataFrame API is built on top of the pandas implementation, and pandas DataFrame methods are invoked on subsets of the datasets in parallel. The big difference between Beam DataFrames and pandas DataFrames is that operations are deferred by the Beam API, to support the Beam parallel processing model. (To learn more about differences between the DataFrame implementations, see [Differences from pandas](/documentation/dsls/dataframes/differences-from-pandas/).)
 
 You can think of Beam DataFrames as a domain-specific language (DSL) for Beam pipelines. Similar to [Beam SQL](https://beam.apache.org/documentation/dsls/sql/overview/), DataFrames is a DSL built into the Beam Python SDK. Using this DSL, you can create pipelines without referencing standard Beam constructs like [ParDo](https://beam.apache.org/documentation/transforms/python/elementwise/pardo/) or [CombinePerKey](https://beam.apache.org/documentation/transforms/python/aggregation/combineperkey/).
@@ -115,3 +117,5 @@
 
 [pydoc_dataframe_transform]: https://beam.apache.org/releases/pydoc/current/apache_beam.dataframe.transforms.html#apache_beam.dataframe.transforms.DataframeTransform
 [pydoc_sql_transform]: https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.sql.html#apache_beam.transforms.sql.SqlTransform
+
+{{< button-colab url="https://colab.research.google.com/github/apache/beam/blob/master/examples/notebooks/tour-of-beam/dataframes.ipynb" >}}

diff --git a/website/www/site/content/en/documentation/glossary.md b/website/www/site/content/en/documentation/glossary.md
index 4bc7395..acca8a3 100644
--- a/website/www/site/content/en/documentation/glossary.md
+++ b/website/www/site/content/en/documentation/glossary.md

@@ -308,6 +308,14 @@
 * [Overview](/documentation/programming-guide/#overview)
 * [Transforms](/documentation/programming-guide/#transforms)
 
+## Resource hints
+
+A Beam feature that lets you provide information to a runner about the compute resource requirements of your pipeline. You can use resource hints to define requirements for specific transforms or for an entire pipeline. For example, you could use a resource hint to specify the minimum amount of memory to allocate to workers. The runner is responsible for interpreting resource hints, and runners can ignore unsupported hints.
+
+To learn more, see:
+
+* [Resource hints](/documentation/runtime/resource-hints)
+
 ## Runner
 
 A runner runs a pipeline on a specific platform. Most runners are translators or adapters to massively parallel big data processing systems. Other runners exist for local testing and debugging. Among the supported runners are Google Cloud Dataflow, Apache Spark, Apache Samza, Apache Flink, the Interactive Runner, and the Direct Runner.

diff --git a/website/www/site/content/en/documentation/io/built-in/hcatalog.md b/website/www/site/content/en/documentation/io/built-in/hcatalog.md
index 9cf5d9a..22d4ffa 100644
--- a/website/www/site/content/en/documentation/io/built-in/hcatalog.md
+++ b/website/www/site/content/en/documentation/io/built-in/hcatalog.md

@@ -47,6 +47,7 @@
 The destination table should exist beforehand as the transform will not create a new table if missing.
 
 For example:
+
 {{< highlight java >}}
 Map<String, String> configProperties = new HashMap<String, String>();
 configProperties.put("hive.metastore.uris","thrift://metastore-host:port");
@@ -60,7 +61,6 @@
     .withPartition(partitionValues) //optional, may be specified if the table is partitioned
     .withBatchSize(1024L)) //optional, assumes a default batch size of 1024 if none specified
 {{< /highlight >}}
-
 {{< highlight py >}}
   # The Beam SDK for Python does not support HCatalogIO.
 {{< /highlight >}}

diff --git a/website/www/site/content/en/documentation/runtime/resource-hints.md b/website/www/site/content/en/documentation/runtime/resource-hints.md
new file mode 100644
index 0000000..38357de
--- /dev/null
+++ b/website/www/site/content/en/documentation/runtime/resource-hints.md

@@ -0,0 +1,81 @@
+---
+title: "Resource hints"
+---
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Resource hints
+
+Resource hints let pipeline authors provide information to a runner about compute resource requirements. You can use resource hints to define requirements for specific transforms or for an entire pipeline. The runner is responsible for interpreting resource hints, and runners can ignore unsupported hints.
+
+Resource hints can be nested. For example, resource hints can be specified on subtransforms of a composite transform, and that composite transform can also have resource hints applied. By default, the innermost hint takes precedence. However, hints can define custom reconciliation behavior. For example,  `min_ram` takes the maximum value for all `min_ram` values set on a given step in the pipeline.
+
+{{< language-switcher java py >}}
+
+## Available hints
+
+Currently, Beam supports the following resource hints:
+
+* `min_ram="numberXB"`: The minimum amount of RAM to allocate to workers. Beam can parse various byte units, including MB, GB, MiB, and GiB (for example, `min_ram="4GB"`). This hint is intended to provide advisory minimal memory requirements for processing a transform.
+* `accelerator="hint"`: This hint is intended to describe a hardware accelerator to use for processing a transform. For example, the following is valid accelerator syntax for the Dataflow runner: `accelerator="type:<type>;count:<n>;<options>"`
+
+The interpretaton and actuation of resource hints can vary between runners. For an example implementation, see the [Dataflow resource hints](https://cloud.google.com/dataflow/docs/guides/right-fitting#available_resource_hints).
+
+## Specifying resource hints for a pipeline
+
+To specify resource hints for an entire pipeline, you can use pipeline options. The following command shows the basic syntax.
+
+{{< highlight java >}}
+mvn compile exec:java -Dexec.mainClass=com.example.MyPipeline \
+    -Dexec.args="... \
+                 --resourceHints=min_ram=<N>GB \
+                 --resourceHints=accelerator='hint'" \
+    -Pdirect-runner
+{{< /highlight >}}
+{{< highlight py >}}
+python my_pipeline.py \
+    ... \
+    --resource_hints min_ram=<N>GB \
+    --resource_hints accelerator="hint"
+{{< /highlight >}}
+
+## Specifying resource hints for a transform
+
+{{< paragraph class="language-java" >}}
+You can set resource hints programmatically on pipeline transforms using [setResourceHints](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/PTransform.html#setResourceHints-org.apache.beam.sdk.transforms.resourcehints.ResourceHints-).
+{{< /paragraph >}}
+
+{{< paragraph class="language-py" >}}
+You can set resource hints programmatically on pipeline transforms using [PTransforms.with_resource_hints](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.with_resource_hints) (also see [ResourceHint](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/transforms/resources.py#L51)).
+{{< /paragraph >}}
+
+{{< highlight java >}}
+pcoll.apply(MyCompositeTransform.of(...)
+    .setResourceHints(
+        ResourceHints.create()
+            .withMinRam("15GB")
+            .withAccelerator("type:nvidia-tesla-k80;count:1;install-nvidia-driver")))
+
+pcoll.apply(ParDo.of(new BigMemFn())
+    .setResourceHints(
+        ResourceHints.create().withMinRam("30GB")))
+{{< /highlight >}}
+{{< highlight py >}}
+pcoll | MyPTransform().with_resource_hints(
+    min_ram="4GB",
+    accelerator="type:nvidia-tesla-k80;count:1;install-nvidia-driver")
+
+pcoll | beam.ParDo(BigMemFn()).with_resource_hints(
+    min_ram="30GB")
+{{< /highlight >}}

diff --git a/website/www/site/content/en/get-started/quickstart-java.md b/website/www/site/content/en/get-started/quickstart-java.md
index 44b9bda..caf8c45 100644
--- a/website/www/site/content/en/get-started/quickstart-java.md
+++ b/website/www/site/content/en/get-started/quickstart-java.md

@@ -106,16 +106,14 @@
 
 ## Optional: Convert from Maven to Gradle Project
 
-Ensure you are in the same directory as the `pom.xml` file generated from the previous step. Automatically convert your project from Maven to Gradle by running:
+The steps below explain how to convert the build for the Direct Runner from Maven to Gradle. Converting the builds for the other runners is a more involved process and is out of scope for this guide. For additional guidance, see [Migrating Builds From Apache Maven](https://docs.gradle.org/current/userguide/migrating_from_maven.html).
+
+1. Ensure you are in the same directory as the `pom.xml` file generated from the previous step. Automatically convert your project from Maven to Gradle by running:
 {{< highlight >}}
 $ gradle init
 {{< /highlight >}}
-
 You'll be asked if you want to generate a Gradle build. Enter **yes**. You'll also be prompted to choose a DSL (Groovy or Kotlin). This tutorial uses Groovy, so select that if you don't have a preference.
-
-After you have converted the project to Gradle:
-
-1. In the generated `build.gradle` file, in the `repositories` block, replace `mavenLocal()` with `mavenCentral()`:
+1. After you've converted the project to Gradle, open the generated `build.gradle` file, and, in the `repositories` block, replace `mavenLocal()` with `mavenCentral()`:
 {{< highlight >}}
 repositories {
     mavenCentral()

diff --git a/website/www/site/content/en/get-started/tour-of-beam.md b/website/www/site/content/en/get-started/tour-of-beam.md
index 9e9589b..b2f1484 100644
--- a/website/www/site/content/en/get-started/tour-of-beam.md
+++ b/website/www/site/content/en/get-started/tour-of-beam.md

@@ -49,6 +49,15 @@
 
 {{< button-colab url="https://colab.research.google.com/github/apache/beam/blob/master/examples/notebooks/tour-of-beam/windowing.ipynb" >}}
 
+### DataFrames
+
+Beam DataFrames provide a pandas-like [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)
+API to declare Beam pipelines.
+To learn more about Beam DataFrames, take a look at the
+[Beam DataFrames overview](https://beam.apache.org/documentation/dsls/dataframes/overview) page.
+
+{{< button-colab url="https://colab.research.google.com/github/apache/beam/blob/master/examples/notebooks/tour-of-beam/dataframes.ipynb" >}}
+
 ## Transforms
 
 Check the [Python transform catalog](/documentation/transforms/python/overview/)

diff --git a/website/www/site/layouts/partials/section-menu/en/documentation.html b/website/www/site/layouts/partials/section-menu/en/documentation.html
index 4491ae1..6d1e664 100644
--- a/website/www/site/layouts/partials/section-menu/en/documentation.html
+++ b/website/www/site/layouts/partials/section-menu/en/documentation.html

@@ -213,6 +213,7 @@
 
   <ul class="section-nav-list">
     <li><a href="/documentation/runtime/environments/">Container environments</a></li>
+    <li><a href="/documentation/runtime/resource-hints/">Resource hints</a></li>
     <li><a href="/documentation/runtime/sdk-harness-config/">SDK Harness Configuration</a></li>
   </ul>
 </li>
commit	6f0a1c9aa62d7cd4ca93435d72d524d40f002308	[log] [tgz]
author	Chamikara Jayalath <chamikara@apache.org>	Mon Sep 20 19:45:18 2021 -0700
committer	GitHub <noreply@github.com>	Mon Sep 20 19:45:18 2021 -0700
tree	c96daacce42f9de9285d153510798bbebd5d43df
parent	cd1cd05408a9e4e8e322022fba4602a5a191d265 [diff]
parent	6947fa2136e2d74e51de8347f852aa85387d4735 [diff]