Merge pull request #12423: [BEAM-10135][BEAM-10136] Refactor jdbc external transform registrar
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 2a2b46a..3db6459 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -30,3 +30,10 @@
Portable | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron/lastCompletedBuild/) | --- | ---
See [.test-infra/jenkins/README](https://github.com/apache/beam/blob/master/.test-infra/jenkins/README.md) for trigger phrase, status and link of all Jenkins jobs.
+
+
+GitHub Actions Tests Status (on master branch)
+------------------------------------------------------------------------------------------------
+![Build python source distribution and wheels](https://github.com/apache/beam/workflows/Build%20python%20source%20distribution%20and%20wheels/badge.svg)
+
+See [CI.md](https://github.com/apache/beam/blob/master/CI.md) for more information about GitHub Actions CI.
diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index 6737a19..cd16b1c 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -16,7 +16,7 @@
# under the License.
#
-name: Build python wheels
+name: Build python source distribution and wheels
on:
schedule:
@@ -38,6 +38,7 @@
build_source:
runs-on: ubuntu-latest
+ name: Build python source distribution
steps:
- name: Checkout code
uses: actions/checkout@v2
@@ -90,7 +91,7 @@
run: gsutil rm -r ${{ env.GCP_PATH }} || true
upload_source_to_gcs:
- name: Upload source to GCS bucket
+ name: Upload python source distribution to GCS bucket
needs: prepare_gcs
runs-on: ubuntu-latest
if: github.repository_owner == 'apache'
@@ -109,7 +110,7 @@
run: gsutil cp -r -a public-read source/* ${{ env.GCP_PATH }}
build_wheels:
- name: Build wheels on ${{ matrix.os_python.os }}
+ name: Build python wheels on ${{ matrix.os_python.os }}
needs: build_source
runs-on: ${{ matrix.os_python.os }}
strategy:
@@ -120,7 +121,7 @@
{"os": "windows-latest", "python": "cp35-* cp36-* cp37-* cp38-*"},
]
steps:
- - name: Download source from artifacts
+ - name: Download python source distribution from artifacts
uses: actions/download-artifact@v2
with:
name: source
@@ -155,7 +156,7 @@
path: apache-beam-source/wheelhouse/
upload_wheels_to_gcs:
- name: Upload wheels to GCS bucket
+ name: Upload python wheels to GCS bucket
needs: build_wheels
runs-on: ubuntu-latest
if: github.repository_owner == 'apache' && github.event_name != 'pull_request'
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/.test-infra/jenkins/Committers.groovy
similarity index 64%
copy from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
copy to .test-infra/jenkins/Committers.groovy
index 2aa89c2..18f1a19 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/.test-infra/jenkins/Committers.groovy
@@ -16,21 +16,14 @@
* limitations under the License.
*/
-import PrecommitJobBuilder
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
+/**
+ * That's a placeholder of a file that should be automatically generated as a first step of the Seed
+ * job. It should contain the list of Committers' GitHub usernames. It's used to populate the list of
+ * allowed people that can trigger the jobs that are not allowed to be triggered by non-committers from
+ * GitHub pull requests.
+ */
+
+class Committers {
+ final static List GITHUB_USERNAMES = []
}
diff --git a/.test-infra/jenkins/CommonJobProperties.groovy b/.test-infra/jenkins/CommonJobProperties.groovy
index 41f30bf..248f812 100644
--- a/.test-infra/jenkins/CommonJobProperties.groovy
+++ b/.test-infra/jenkins/CommonJobProperties.groovy
@@ -20,6 +20,9 @@
// common properties that are shared among all Jenkins projects.
// Code in this directory should conform to the Groovy style guide.
// http://groovy-lang.org/style-guide.html
+
+import Committers as committers
+
class CommonJobProperties {
static String checkoutDir = 'src'
@@ -114,9 +117,10 @@
githubPullRequest {
admins(['asfbot'])
useGitHubHooks()
- orgWhitelist(['apache'])
- allowMembersOfWhitelistedOrgsAsAdmin()
permitAll(prPermitAll)
+ if (!prPermitAll) {
+ userWhitelist(committers.GITHUB_USERNAMES)
+ }
// prTriggerPhrase is the argument which gets set when we want to allow
// post-commit builds to run against pending pull requests. This block
// overrides the default trigger phrase with the new one. Setting this
diff --git a/.test-infra/jenkins/committers_list_generator/cert.pem b/.test-infra/jenkins/committers_list_generator/cert.pem
new file mode 100644
index 0000000..534d567
--- /dev/null
+++ b/.test-infra/jenkins/committers_list_generator/cert.pem
@@ -0,0 +1,35 @@
+-----BEGIN CERTIFICATE-----
+MIIGKTCCBBGgAwIBAgIUOu+0mevC4a1HWchpoKwZQajuDhcwDQYJKoZIhvcNAQEL
+BQAwgaMxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJNQTEVMBMGA1UEBwwMRm9ycmVz
+dCBIaWxsMSIwIAYDVQQKDBlBcGFjaCBTb2Z0d2FyZSBGb3VuZGF0aW9uMRcwFQYD
+VQQLDA5JbmZyYXN0cnVjdHVyZTETMBEGA1UEAwwKYXBhY2hlLm9yZzEeMBwGCSqG
+SIb3DQEJARYPcm9vdEBhcGFjaGUub3JnMB4XDTIwMDExMDE2MzAyNVoXDTMwMDEw
+NzE2MzAyNVowgaMxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJNQTEVMBMGA1UEBwwM
+Rm9ycmVzdCBIaWxsMSIwIAYDVQQKDBlBcGFjaCBTb2Z0d2FyZSBGb3VuZGF0aW9u
+MRcwFQYDVQQLDA5JbmZyYXN0cnVjdHVyZTETMBEGA1UEAwwKYXBhY2hlLm9yZzEe
+MBwGCSqGSIb3DQEJARYPcm9vdEBhcGFjaGUub3JnMIICIjANBgkqhkiG9w0BAQEF
+AAOCAg8AMIICCgKCAgEAvqU6WpRFJS5CHl97/jfx0oaCsi7U5AKiuIeTOFN27t3f
+GBAL71BrrZXtvtwcRKXgADlm0xLqhmWh2ICk2mvYLINEFl+BUrIu6oyDykWy3s7m
+4u6joTm5yXrQeItuezrXCvLqjR36eMwZ1CikRHpMudtNPF9LPicJNc+jq8nJWdmd
+68wwDDC9sHosQh3tz7gsGUFnHXDU2FotTA/tEWjdMrICONKCAtJn3J/wf0NdUEnl
+iYI8s7fOnZRrQmj4FXdThjRAHDZiWQqF3jqZiDaUEK61RjMLJkeELAj/Mt1mkI8w
+GtA38TKYcVy87wNRhNzltFY2MuOoyqT5iX0m8YRpmpZb+n+TWGQzgLZEkFVLBSZ6
+Nd1ty0bsofMmSZUqcSoTJEawCd1CG8iJx9e+V7Xy82LwgRC7ntwN5VstBoGNkA80
+u/78T+/KNpsREsv2holcCHlY3DW2lpHQf4F8OgwCvtL1adWil3uFDytTYTRyB1LL
+PDIfagI2wn/9uh1u66VKD/mrQdvJbX5p6q0BGtdQluIO6JG8rlxmlN19e4++kTWJ
+XEy0Nwk2sUtdvhq87vuZUW7RA5krdenkKsfnZQhRtsVZHAlf8PDbEiUU38yAcwJ/
+MIbqR24cvpOxDp8vHeCcSd2gI19AHvlhLMn9W3J2Z+kFiyZd2KUINHn9geijnycC
+AwEAAaNTMFEwHQYDVR0OBBYEFMV0+BUCWcWW61yek8COss/Enk/LMB8GA1UdIwQY
+MBaAFMV0+BUCWcWW61yek8COss/Enk/LMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI
+hvcNAQELBQADggIBAByIqrQwZrPY8DQcki/WUxmCAErWyCRgnKqRp8TgA2PtskWL
+ILjFFCnmD7OXEWFadX69ionBYP8u1ypT90PiqmTAC43FtvjKL3x2ppP/ncAiQ4vy
+gPvWUIRgshJM+tAtg/gxcdqG3T6UZX8uBndp+jTO/eggFw65GRMbumxx82cE3oG6
+IRCH4FK1KPaByXZePPkGGZ1vXJhKDS+1TD3FfeuZ8vE64Fw7SdNCZtN7RBY0/fN/
+rnPRiBuWE5BIKIFU0rw4uosS8f9hdRDdfvOmXIQNVlOvQK0ls/MRm/yY9v272ZSY
+iqIuEzFCIYIP/GoJPcevuigwbyy/LVV2ztX9saqVRPRv7wlnHfT2mkCo1HmuGRgV
+vnKOBDiI5dVFlIlsbvdbpocjuDNl7Ldl0sZeeEgbN4pLhvKkH2WU/D1bbVJ/ZNmU
+WLNaeF9HStgxmFAPW06u0IKDOQG0JpGJjSaBJ2eAQhbsMf7iAt6LHsdvdpcFa8pe
+xDRm7CmznYRe2U3y7exD9n4+KvZ9Vhs9jo8sfGSwNOcFGbhZIvldhGPVI7bJfnDv
+kLH0scjdMV/QuW52lmFCfe5NKN2fzmNMt3J6eEmDyZIy+fSg82XtfDzGbS5bcJLw
+dhO6jfOm+Vwy5XohWVhSfAEvToge6O0bKnLzDRu4M5Lcdov9pOvEOTmkXZom
+-----END CERTIFICATE-----
diff --git a/.test-infra/jenkins/committers_list_generator/main.py b/.test-infra/jenkins/committers_list_generator/main.py
new file mode 100644
index 0000000..3f8c645
--- /dev/null
+++ b/.test-infra/jenkins/committers_list_generator/main.py
@@ -0,0 +1,146 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ldap
+import logging
+import jinja2
+import os
+import argparse
+import sys
+
+
+class CommittersGeneratorException(Exception):
+ pass
+
+
+class _ApacheLDAPException(CommittersGeneratorException):
+ pass
+
+
+_FILENAME = "Committers.groovy"
+_PEOPLE_DN = "ou=people,dc=apache,dc=org"
+_BEAM_DN = "cn=beam,ou=project,ou=groups,dc=apache,dc=org"
+_GITHUB_USERNAME_ATTR = "githubUsername"
+_DEFAULT_LDAP_URIS = "ldaps://ldap-us-ro.apache.org:636 ldaps://ldap-eu-ro.apache.org:636"
+_DEFAULT_CERT_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "cert.pem")
+
+
+def generate_groovy(output_dir, ldap_uris, cert_path):
+ logging.info(f"Generating {_FILENAME}")
+ env = jinja2.Environment(
+ loader=jinja2.FileSystemLoader(
+ os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates")
+ ),
+ )
+ template = env.get_template(f"{_FILENAME}.template")
+ with open(os.path.join(output_dir, _FILENAME), "w") as file:
+ file.write(
+ template.render(
+ github_usernames=get_committers_github_usernames(
+ ldap_uris=ldap_uris,
+ cert_path=cert_path,
+ ),
+ )
+ )
+ logging.info(f"{_FILENAME} saved into {output_dir}")
+
+
+def get_committers_github_usernames(ldap_uris, cert_path):
+ connection = None
+ try:
+ ldap.set_option(ldap.OPT_X_TLS_CACERTFILE, cert_path)
+ ldap.set_option(ldap.OPT_X_TLS, ldap.OPT_X_TLS_DEMAND)
+ ldap.set_option(ldap.OPT_X_TLS_DEMAND, True)
+ ldap.set_option(ldap.OPT_REFERRALS, 0)
+ connection = ldap.initialize(ldap_uris)
+
+ people = connection.search_s(
+ _PEOPLE_DN,
+ ldap.SCOPE_ONELEVEL,
+ attrlist=[_GITHUB_USERNAME_ATTR],
+ )
+
+ if not people:
+ raise _ApacheLDAPException(f"LDAP server returned no people: {repr(people)}")
+
+ github_usernames = {
+ person_dn: data.get(_GITHUB_USERNAME_ATTR, [])
+ for person_dn, data in people
+ }
+
+ committers = connection.search_s(
+ _BEAM_DN,
+ ldap.SCOPE_BASE,
+ attrlist=["member"],
+ )
+
+ if not committers or "member" not in committers[0][1]:
+ raise _ApacheLDAPException(f"LDAP server returned no committers: {repr(committers)}")
+
+ committers_github_usernames = [
+ github_username.decode()
+ for committer_dn in committers[0][1]["member"]
+ for github_username in github_usernames[committer_dn.decode()]
+ ]
+
+ logging.info(f"{len(committers_github_usernames)} committers' GitHub usernames fetched correctly")
+
+ return committers_github_usernames
+
+ except (ldap.LDAPError, _ApacheLDAPException) as e:
+ raise CommittersGeneratorException("Could not fetch LDAP data") from e
+ finally:
+ if connection is not None:
+ connection.unbind_s()
+
+
+def _parse_args():
+ parser = argparse.ArgumentParser(
+ description="Generates groovy file containing beam committers' usernames."
+ )
+
+ parser.add_argument(
+ "-o", "--output-dir",
+ help="Path to the directory where the output groovy file will be saved",
+ metavar="DIR",
+ default=os.getcwd(),
+ )
+
+ parser.add_argument(
+ "-c", "--cert-path",
+ help="Path to the file containing SSL certificate of the LDAP server",
+ metavar="FILE",
+ default=_DEFAULT_CERT_PATH,
+ )
+
+ parser.add_argument(
+ "-u", "--ldap_uris",
+ help="Whitespace separated list of LDAP servers URIs",
+ default=_DEFAULT_LDAP_URIS,
+ )
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ try:
+ logging.getLogger().setLevel(logging.INFO)
+ args = _parse_args()
+ generate_groovy(args.output_dir, args.ldap_uris, args.cert_path)
+ except CommittersGeneratorException as e:
+ logging.exception("Couldn't generate the list of committers")
+ sys.exit(1)
diff --git a/.test-infra/jenkins/committers_list_generator/requirements.txt b/.test-infra/jenkins/committers_list_generator/requirements.txt
new file mode 100644
index 0000000..0a32695
--- /dev/null
+++ b/.test-infra/jenkins/committers_list_generator/requirements.txt
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+python-ldap
+jinja2
\ No newline at end of file
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/.test-infra/jenkins/committers_list_generator/templates/Committers.groovy.template
similarity index 62%
copy from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
copy to .test-infra/jenkins/committers_list_generator/templates/Committers.groovy.template
index 2aa89c2..543a2db 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/.test-infra/jenkins/committers_list_generator/templates/Committers.groovy.template
@@ -16,21 +16,16 @@
* limitations under the License.
*/
-import PrecommitJobBuilder
+/**
+ * That's an automatically generated file. It contains the list of Committers' GitHub usernames. It's
+ * used to populate the list of allowed people that can trigger the jobs that are not allowed to be
+ * triggered by non-committers from GitHub pull requests.
+ */
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
+class Committers {
+ final static List GITHUB_USERNAMES = [
+ {%- for username in github_usernames|sort %}
+ '{{ username|lower }}'{%- if not loop.last -%},{%- endif -%}
+ {% endfor %}
]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
}
diff --git a/.test-infra/jenkins/job_00_seed.groovy b/.test-infra/jenkins/job_00_seed.groovy
index 09a23f7..e01bc34 100644
--- a/.test-infra/jenkins/job_00_seed.groovy
+++ b/.test-infra/jenkins/job_00_seed.groovy
@@ -17,6 +17,9 @@
*/
// Defines the seed job, which creates or updates all other Jenkins projects.
+
+import Committers as committers
+
job('beam_SeedJob') {
description('Automatically configures all Apache Beam Jenkins projects based' +
' on Jenkins DSL groovy files checked into the code repository.')
@@ -76,8 +79,7 @@
githubPullRequest {
admins(['asfbot'])
useGitHubHooks()
- orgWhitelist(['apache'])
- allowMembersOfWhitelistedOrgsAsAdmin()
+ userWhitelist(committers.GITHUB_USERNAMES)
// Also run when manually kicked on a pull request
triggerPhrase('Run Seed Job')
@@ -102,6 +104,17 @@
}
steps {
+ shell {
+ command("""
+ ( cd .test-infra/jenkins/committers_list_generator &&
+ python3.8 -m venv ve3 && source ve3/bin/activate &&
+ pip install -r requirements.txt &&
+ python main.py -o .. &&
+ deactivate ) ||
+ { echo "ERROR: Failed to fetch committers"; exit 3; }
+ """)
+ unstableReturn(3)
+ }
dsl {
// A list or a glob of other groovy files to process.
external('.test-infra/jenkins/job_*.groovy')
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL_Java11.groovy b/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL_Java11.groovy
deleted file mode 100644
index 7efe77d..0000000
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL_Java11.groovy
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import PrecommitJobBuilder
-import CommonJobProperties as properties
-
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQLJava11',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true',
- '-PcompileAndRunTestsWithJava11',
- '-PskipCheckerFramework',
- // Gradle itself is running under JDK8 so plugin configures wrong for JDK11
- "-Pjava11Home=${CommonJobProperties.JAVA_11_HOME}"
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
-}
diff --git a/.test-infra/jenkins/job_seed_standalone.groovy b/.test-infra/jenkins/job_seed_standalone.groovy
index 7f5ebdd..33ac831 100644
--- a/.test-infra/jenkins/job_seed_standalone.groovy
+++ b/.test-infra/jenkins/job_seed_standalone.groovy
@@ -17,6 +17,9 @@
*/
// Defines the seed job, which creates or updates all other Jenkins projects.
+
+import Committers as committers
+
job('beam_SeedJob_Standalone') {
description('Automatically configures all Apache Beam Jenkins projects based' +
' on Jenkins DSL groovy files checked into the code repository.')
@@ -76,8 +79,7 @@
githubPullRequest {
admins(['asfbot'])
useGitHubHooks()
- orgWhitelist(['apache'])
- allowMembersOfWhitelistedOrgsAsAdmin()
+ userWhitelist(committers.GITHUB_USERNAMES)
// Also run when manually kicked on a pull request
triggerPhrase('Run Standalone Seed Job')
@@ -102,6 +104,17 @@
}
steps {
+ shell {
+ command("""
+ ( cd .test-infra/jenkins/committers_list_generator &&
+ python3.8 -m venv ve3 && source ve3/bin/activate &&
+ pip install -r requirements.txt &&
+ python main.py -o .. &&
+ deactivate ) ||
+ { echo "ERROR: Failed to fetch committers"; exit 3; }
+ """)
+ unstableReturn(3)
+ }
dsl {
// A list or a glob of other groovy files to process.
external('.test-infra/jenkins/job_*.groovy')
diff --git a/.test-infra/metrics/grafana/dashboards/perftests_metrics/ParDo_Load_Tests.json b/.test-infra/metrics/grafana/dashboards/perftests_metrics/ParDo_Load_Tests.json
index 47dc505..f39b684 100644
--- a/.test-infra/metrics/grafana/dashboards/perftests_metrics/ParDo_Load_Tests.json
+++ b/.test-infra/metrics/grafana/dashboards/perftests_metrics/ParDo_Load_Tests.json
@@ -16,7 +16,7 @@
"editable": true,
"gnetId": null,
"graphTooltip": 0,
- "iteration": 1588593729528,
+ "iteration": 1596019686624,
"links": [],
"panels": [
{
@@ -78,7 +78,7 @@
],
"orderByTime": "ASC",
"policy": "default",
- "query": "SELECT mean(\"value\") FROM \"${sdk}_${processingType}_pardo_1\" WHERE \"metric\" =~ /runtime/ AND $timeFilter GROUP BY time($__interval), \"metric\"",
+ "query": "SELECT mean(\"value\") FROM \"${sdk}_${processingType}_pardo_5\" WHERE \"metric\" =~ /runtime/ AND $timeFilter GROUP BY time($__interval), \"metric\"",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
@@ -103,7 +103,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "$sdk | ParDo | 2GB, 100 byte records, 10 iterations",
+ "title": "$sdk | ParDo | 2GB, 100 byte records, 5 iterations, 10 counter operations, 3 counters, parallelism 5",
"tooltip": {
"shared": true,
"sort": 0,
@@ -158,7 +158,7 @@
"y": 0
},
"hiddenSeries": false,
- "id": 3,
+ "id": 4,
"interval": "1d",
"legend": {
"avg": false,
@@ -195,15 +195,14 @@
},
{
"params": [
- "metric"
+ "null"
],
- "type": "field"
+ "type": "fill"
}
],
- "measurement": "python_batch_cogbk_2",
"orderByTime": "ASC",
"policy": "default",
- "query": "SELECT mean(\"value\") FROM \"${sdk}_${processingType}_pardo_2\" WHERE \"metric\" =~ /runtime/ AND $timeFilter GROUP BY time($__interval), \"metric\"",
+ "query": "SELECT mean(\"value\") FROM \"${sdk}_${processingType}_pardo_5\" WHERE \"metric\" =~ /runtime/ AND $timeFilter GROUP BY time($__interval), \"metric\"",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
@@ -228,7 +227,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "$sdk | ParDo | 2GB, 100 byte records, 200 iterations",
+ "title": "$sdk | ParDo | 2GB, 100 byte records, 5 iterations, 10 counter operations, 3 counters, parallelism 5",
"tooltip": {
"shared": true,
"sort": 0,
@@ -283,7 +282,7 @@
"y": 8
},
"hiddenSeries": false,
- "id": 4,
+ "id": 5,
"interval": "1d",
"legend": {
"avg": false,
@@ -310,7 +309,7 @@
"steppedLine": false,
"targets": [
{
- "alias": "$tag_metric",
+ "alias": "",
"groupBy": [
{
"params": [
@@ -320,15 +319,14 @@
},
{
"params": [
- "metric"
+ "null"
],
- "type": "field"
+ "type": "fill"
}
],
- "measurement": "python_batch_cogbk_3",
"orderByTime": "ASC",
"policy": "default",
- "query": "SELECT mean(\"value\") FROM \"${sdk}_${processingType}_pardo_3\" WHERE \"metric\" =~ /runtime/ AND $timeFilter GROUP BY time($__interval), \"metric\"",
+ "query": "SELECT min, max, sum / count\nFROM\n(\n SELECT max(value)/1000 as min FROM \"${sdk}_${processingType}_pardo_5\" WHERE \"metric\" =~ /min_latency/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n),\n(\n SELECT max(value)/1000 as max FROM \"${sdk}_${processingType}_pardo_5\" WHERE \"metric\" =~ /max_latency/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n),\n(\n SELECT max(value)/1000 as sum FROM \"${sdk}_${processingType}_pardo_5\" WHERE \"metric\" =~ /sum_latency/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n),\n(\n SELECT max(value)/1000 as count FROM \"${sdk}_${processingType}_pardo_5\" WHERE \"metric\" =~ /count_latency/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n)\n\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
@@ -353,7 +351,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "$sdk | ParDo | 2GB, 100 byte records, 10 counter increments",
+ "title": "Latency $sdk | ParDo | 2GB, 100 byte records, 5 iterations, 10 counter operations, 3 counters, parallelism 5 (latency)",
"tooltip": {
"shared": true,
"sort": 0,
@@ -408,7 +406,7 @@
"y": 8
},
"hiddenSeries": false,
- "id": 5,
+ "id": 3,
"interval": "1d",
"legend": {
"avg": false,
@@ -435,12 +433,24 @@
"steppedLine": false,
"targets": [
{
- "alias": "$tag_metric",
- "groupBy": [],
- "measurement": "python_batch_cogbk_4",
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "null"
+ ],
+ "type": "fill"
+ }
+ ],
"orderByTime": "ASC",
"policy": "default",
- "query": "SELECT mean(\"value\") FROM \"${sdk}_${processingType}_pardo_4\" WHERE \"metric\" =~ /runtime/ AND $timeFilter GROUP BY time($__interval), \"metric\"",
+ "query": "SELECT min, max, sum / count\nFROM\n(\n SELECT max(value)/1000 as min FROM \"${sdk}_${processingType}_pardo_6\" WHERE \"metric\" =~ /loadgenerator\\/impulse.*_min/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n),\n(\n SELECT max(value)/1000 as max FROM \"${sdk}_${processingType}_pardo_6\" WHERE \"metric\" =~ /loadgenerator\\/impulse.*_max/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n),\n(\n SELECT max(value)/1000 as sum FROM \"${sdk}_${processingType}_pardo_6\" WHERE \"metric\" =~ /loadgenerator\\/impulse.*_sum/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n),\n(\n SELECT max(value)/1000 as count FROM \"${sdk}_${processingType}_pardo_6\" WHERE \"metric\" =~ /loadgenerator\\/impulse.*_count/ AND $timeFilter GROUP BY time($__interval), \"metric\"\n)\n\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
@@ -451,6 +461,10 @@
"value"
],
"type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
}
]
],
@@ -461,7 +475,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "$sdk | ParDo | 2GB, 100 byte records, 100 counter increments",
+ "title": "Checkpoint duration $sdk | ParDo | 2GB, 100 byte records, 5 iterations, 10 counter operations, 3 counters, parallelism 5",
"tooltip": {
"shared": true,
"sort": 0,
@@ -505,15 +519,18 @@
"refresh": false,
"schemaVersion": 22,
"style": "dark",
- "tags": ["performance tests"],
+ "tags": [
+ "performance tests"
+ ],
"templating": {
"list": [
{
"allValue": null,
"current": {
"selected": false,
- "text": "batch",
- "value": "batch"
+ "tags": [],
+ "text": "streaming",
+ "value": "streaming"
},
"hide": 0,
"includeAll": false,
@@ -522,12 +539,14 @@
"name": "processingType",
"options": [
{
- "selected": true,
+ "$$hashKey": "object:283",
+ "selected": false,
"text": "batch",
"value": "batch"
},
{
- "selected": false,
+ "$$hashKey": "object:284",
+ "selected": true,
"text": "streaming",
"value": "streaming"
}
@@ -540,8 +559,9 @@
"allValue": null,
"current": {
"selected": false,
- "text": "java",
- "value": "java"
+ "tags": [],
+ "text": "python",
+ "value": "python"
},
"hide": 0,
"includeAll": false,
@@ -550,16 +570,19 @@
"name": "sdk",
"options": [
{
- "selected": true,
+ "$$hashKey": "object:272",
+ "selected": false,
"text": "java",
"value": "java"
},
{
- "selected": false,
+ "$$hashKey": "object:273",
+ "selected": true,
"text": "python",
"value": "python"
},
{
+ "$$hashKey": "object:274",
"selected": false,
"text": "go",
"value": "go"
@@ -596,5 +619,5 @@
"variables": {
"list": []
},
- "version": 1
-}
+ "version": 2
+}
\ No newline at end of file
diff --git a/CHANGES.md b/CHANGES.md
index c2665e7..66dc68c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -58,14 +58,19 @@
* New overloads for BigtableIO.Read.withKeyRange() and BigtableIO.Read.withRowFilter()
methods that take ValueProvider as a parameter (Java) ([BEAM-10283](https://issues.apache.org/jira/browse/BEAM-10283)).
+* The WriteToBigQuery transform (Python) in Dataflow Batch no longer relies on BigQuerySink by default. It relies on
+ a new, fully-featured transform based on file loads into BigQuery. To revert the behavior to the old implementation,
+ you may use `--experiments=use_legacy_bq_sink`.
* Add cross-language support to Java's JdbcIO, now available in the Python module `apache_beam.io.external.jdbc` ([BEAM-10135](https://issues.apache.org/jira/browse/BEAM-10135), [BEAM-10136](https://issues.apache.org/jira/browse/BEAM-10136)).
* Add support of AWS SDK v2 for KinesisIO.Read (Java) ([BEAM-9702](https://issues.apache.org/jira/browse/BEAM-9702)).
* Support for X source added (Java/Python) ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
+* Add streaming support to SnowflakeIO in Java SDK ([BEAM-9896](https://issues.apache.org/jira/browse/BEAM-9896 ))
## New Features / Improvements
* Shared library for simplifying management of large shared objects added to Python SDK. Example use case is sharing a large TF model object across threads ([BEAM-10417](https://issues.apache.org/jira/browse/BEAM-10417)).
* X feature added (Java/Python) ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
+* OnTimerContext should not create a new one when processing each element/timer in FnApiDoFnRunner ([BEAM-9839](https://issues.apache.org/jira/browse/BEAM-9839))
## Breaking Changes
@@ -96,6 +101,7 @@
reading data by exporting to JSON files. This has small differences in behavior for Time and Date-related fields. See
Pydoc for more information.
* Add dispositions for SnowflakeIO.write ([BEAM-10343](https://issues.apache.org/jira/browse/BEAM-10343))
+* Add cross-language support to SnowflakeIO.Read([BEAM-9897](https://issues.apache.org/jira/browse/BEAM-9897)).
## New Features / Improvements
diff --git a/CI.md b/CI.md
new file mode 100644
index 0000000..74d7912
--- /dev/null
+++ b/CI.md
@@ -0,0 +1,104 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Apache Beam
+
+## CI Environment
+
+Continuous Integration is important component of making Apache Beam robust and stable.
+
+Our execution environment for CI is mainly the Jenkins which is available at
+[https://ci-beam.apache.org/](https://ci-beam.apache.org/). See
+[.test-infra/jenkins/README](.test-infra/jenkins/README.md)
+for trigger phrase, status and link of all Jenkins jobs. See Apache Beam Developer Guide for
+[Jenkins Tips](https://cwiki.apache.org/confluence/display/BEAM/Jenkins+Tips).
+
+An additional execution environment for CI is [GitHub Actions](https://github.com/features/actions). GitHub Actions
+(GA) are very well integrated with GitHub code and Workflow and it has evolved fast in 2019/2020 to become
+a fully-fledged CI environment, easy to use and develop for, so we decided to use it for building python source
+distribution and wheels.
+
+## GitHub Actions
+
+### GitHub actions run types
+
+The following GA CI Job runs are currently run for Apache Beam, and each of the runs have different
+purpose and context.
+
+#### Pull request run
+
+Those runs are results of PR from the forks made by contributors. Most builds for Apache Beam fall
+into this category. They are executed in the context of the "Fork", not main
+Beam Code Repository which means that they have only "read" permission to all the GitHub resources
+(container registry, code repository). This is necessary as the code in those PRs (including CI job
+definition) might be modified by people who are not committers for the Apache Beam Code Repository.
+
+The main purpose of those jobs is to check if PR builds cleanly, if the test run properly and if
+the PR is ready to review and merge.
+
+#### Direct Push/Merge Run
+
+Those runs are results of direct pushes done by the committers or as result of merge of a Pull Request
+by the committers. Those runs execute in the context of the Apache Beam Code Repository and have also
+write permission for GitHub resources (container registry, code repository).
+The main purpose for the run is to check if the code after merge still holds all the assertions - like
+whether it still builds, all tests are green.
+
+This is needed because some of the conflicting changes from multiple PRs might cause build and test failures
+after merge even if they do not fail in isolation.
+
+#### Scheduled runs
+
+Those runs are results of (nightly) triggered job - only for `master` branch. The
+main purpose of the job is to check if there was no impact of external dependency changes on the Apache
+Beam code (for example transitive dependencies released that fail the build). Another reason for the nightly
+build is that the builds tags most recent master with `nightly-master`.
+
+All runs consist of the same jobs, but the jobs behave slightly differently or they are skipped in different
+run categories. Here is a summary of the run categories with regards of the jobs they are running.
+Those jobs often have matrix run strategy which runs several different variations of the jobs
+(with different platform type / Python version to run for example)
+
+| Job | Description | Pull Request Run | Direct Push/Merge Run | Scheduled Run | Requires GCP Credentials |
+|-------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|-----------------------|---------------|--------------------------|
+| Build python source distribution | Builds python source distribution and uploads it to artifacts. Artifacts from release branch are used in release process ([`build_release_candidate.sh`](release/src/main/scripts/build_release_candidate.sh)) | Yes | Yes | Yes | - |
+| Prepare GCS | Clears target path on GCS if already exists. | - | Yes | Yes | Yes |
+| Upload python source distribution to GCS bucket | Uploads python source distribution to GCS bucket for path unique for specific workflow run. | - | Yes | Yes | Yes |
+| Build python wheels on linux/macos/windows | Builds python wheels on linux/macos/windows platform with usage of `cibuildwheel` and uploads it to artifacts. Artifacts from release branch are used in release process ( [ `build_release_candidate.sh` ](release/src/main/scripts/build_release_candidate.sh) ) | Yes | Yes | Yes | - |
+| Upload python wheels to GCS bucket | Uploads python wheels to GCS bucket for path unique for specific workflow run. Additionally uploads workflow run data. | - | Yes | Yes | Yes |
+| List files on Google Cloud Storage Bucket | Lists files on GCS for verification purpose. | - | Yes | Yes | Yes |
+| Tag repo nightly | Tag repo with `nightly-master` tag if build python source distribution and python wheels finished successfully. | - | - | Yes | - |
+
+### Google Cloud Platform Credentials
+
+Some of the jobs require variables stored as a [GitHub Secrets](https://docs.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets)
+to perform operations on Google Cloud Platform. Currently these jobs are limited to Apache repository only.
+These variables are:
+ * `GCP_SA_EMAIL` - Service account email address. This is usually of the format `<name>@<project-id>.iam.gserviceaccount.com`.
+ * `GCP_SA_KEY` - Service account key. This key should be created and encoded as a Base64 string (eg. `cat my-key.json | base64` on macOS).
+
+Service Account shall have following permissions:
+ * Storage Object Admin (roles/storage.objectAdmin)
+
+### GitHub Action Tips
+
+* If you introduce changes to the workflow it is possible that your changes will not be present in the check run triggered in Pull Request.
+In this case please attach link to the modified workflow run executed on your fork.
+* Possible timeouts with macOS runner - existing issue: [(X) This check failed - sometimes happens on macOS runner #841](https://github.com/actions/virtual-environments/issues/841)
+* [GitHub Actions Documentation](https://docs.github.com/en/actions)
diff --git a/README.md b/README.md
index 6d957e5..1432712 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@
[![Coverage Status](https://coveralls.io/repos/github/apache/beam/badge.svg?branch=master)](https://coveralls.io/github/apache/beam?branch=master)
[![Compat Check PyPI](https://python-compatibility-tools.appspot.com/one_badge_image?package=apache-beam%5Bgcp%5D)](https://python-compatibility-tools.appspot.com/one_badge_target?package=apache-beam%5Bgcp%5D)
[![Compat Check at master](https://python-compatibility-tools.appspot.com/one_badge_image?package=git%2Bgit%3A//github.com/apache/beam.git%23subdirectory%3Dsdks/python)](https://python-compatibility-tools.appspot.com/one_badge_target?package=git%2Bgit%3A//github.com/apache/beam.git%23subdirectory%3Dsdks/python)
+![Build python source distribution and wheels](https://github.com/apache/beam/workflows/Build%20python%20source%20distribution%20and%20wheels/badge.svg)
### Post-commit tests status (on master branch)
diff --git a/build.gradle b/build.gradle
index 93ca237..7f93fe2 100644
--- a/build.gradle
+++ b/build.gradle
@@ -247,7 +247,6 @@
}
task python2PostCommit() {
- dependsOn ":sdks:python:test-suites:portable:py2:crossLanguagePythonJavaKafkaIOFlink"
dependsOn ":sdks:python:test-suites:portable:py2:crossLanguageTests"
dependsOn ":sdks:python:test-suites:dataflow:py2:postCommitIT"
dependsOn ":sdks:python:test-suites:direct:py2:directRunnerIT"
@@ -275,7 +274,6 @@
}
task python38PostCommit() {
- dependsOn ":sdks:python:test-suites:portable:py38:crossLanguagePythonJavaKafkaIOFlink"
dependsOn ":sdks:python:test-suites:dataflow:py38:postCommitIT"
dependsOn ":sdks:python:test-suites:direct:py38:postCommitIT"
dependsOn ":sdks:python:test-suites:direct:py38:hdfsIntegrationTest"
diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle
index b9c501a..59c6cbd 100644
--- a/buildSrc/build.gradle
+++ b/buildSrc/build.gradle
@@ -38,7 +38,7 @@
compile gradleApi()
compile localGroovy()
compile 'com.github.jengelman.gradle.plugins:shadow:4.0.3'
- compile 'gradle.plugin.com.github.spotbugs:spotbugs-gradle-plugin:2.0.0' // Enable spotbugs
+ compile 'com.github.spotbugs:spotbugs-gradle-plugin:3.0.0' // Enable spotbugs
runtime "net.ltgt.gradle:gradle-apt-plugin:0.20" // Enable a Java annotation processor
runtime "com.google.protobuf:protobuf-gradle-plugin:0.8.5" // Enable proto code generation
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 18efda5..82541ef 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -391,7 +391,7 @@
def gax_version = "1.54.0"
def generated_grpc_ga_version = "1.85.1"
def google_auth_version = "0.19.0"
- def google_clients_version = "1.30.9"
+ def google_clients_version = "1.30.10"
def google_cloud_bigdataoss_version = "2.1.3"
def google_cloud_core_version = "1.92.2"
def google_cloud_pubsublite_version = "0.1.6"
@@ -415,6 +415,7 @@
def protobuf_version = "3.11.1"
def quickcheck_version = "0.8"
def spark_version = "2.4.6"
+ def spotbugs_version = "4.0.6"
// A map of maps containing common libraries used per language. To use:
// dependencies {
@@ -468,13 +469,13 @@
google_api_client_jackson2 : "com.google.api-client:google-api-client-jackson2:$google_clients_version",
google_api_client_java6 : "com.google.api-client:google-api-client-java6:$google_clients_version",
google_api_common : "com.google.api:api-common:1.8.1",
- google_api_services_bigquery : "com.google.apis:google-api-services-bigquery:v2-rev20191211-$google_clients_version",
- google_api_services_clouddebugger : "com.google.apis:google-api-services-clouddebugger:v2-rev20200313-$google_clients_version",
- google_api_services_cloudresourcemanager : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20200311-$google_clients_version",
- google_api_services_dataflow : "com.google.apis:google-api-services-dataflow:v1b3-rev20200305-$google_clients_version",
- google_api_services_healthcare : "com.google.apis:google-api-services-healthcare:v1beta1-rev20200525-$google_clients_version",
- google_api_services_pubsub : "com.google.apis:google-api-services-pubsub:v1-rev20200312-$google_clients_version",
- google_api_services_storage : "com.google.apis:google-api-services-storage:v1-rev20200226-$google_clients_version",
+ google_api_services_bigquery : "com.google.apis:google-api-services-bigquery:v2-rev20200719-$google_clients_version",
+ google_api_services_clouddebugger : "com.google.apis:google-api-services-clouddebugger:v2-rev20200501-$google_clients_version",
+ google_api_services_cloudresourcemanager : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20200720-$google_clients_version",
+ google_api_services_dataflow : "com.google.apis:google-api-services-dataflow:v1b3-rev20200713-$google_clients_version",
+ google_api_services_healthcare : "com.google.apis:google-api-services-healthcare:v1beta1-rev20200713-$google_clients_version",
+ google_api_services_pubsub : "com.google.apis:google-api-services-pubsub:v1-rev20200713-$google_clients_version",
+ google_api_services_storage : "com.google.apis:google-api-services-storage:v1-rev20200611-$google_clients_version",
google_auth_library_credentials : "com.google.auth:google-auth-library-credentials:$google_auth_version",
google_auth_library_oauth2_http : "com.google.auth:google-auth-library-oauth2-http:$google_auth_version",
google_cloud_bigquery : "com.google.cloud:google-cloud-bigquery:1.108.0",
@@ -811,7 +812,7 @@
// sdks/java/core's FieldValueTypeInformation needs javax.annotations.Nullable at runtime.
// Therefore, the java core module declares jsr305 dependency (BSD license) as "compile".
// https://github.com/findbugsproject/findbugs/blob/master/findbugs/licenses/LICENSE-jsr305.txt
- "com.github.spotbugs:spotbugs-annotations:3.1.12",
+ "com.github.spotbugs:spotbugs-annotations:$spotbugs_version",
"net.jcip:jcip-annotations:1.0",
// This explicitly adds javax.annotation.Generated (SOURCE retention)
// as a compile time dependency since Java 9+ no longer includes common
@@ -904,7 +905,7 @@
if (configuration.enableSpotbugs) {
project.apply plugin: 'com.github.spotbugs'
project.dependencies {
- spotbugs "com.github.spotbugs:spotbugs:3.1.12"
+ spotbugs "com.github.spotbugs:spotbugs:$spotbugs_version"
spotbugs "com.google.auto.value:auto-value:1.7"
compileOnlyAnnotationDeps.each { dep -> spotbugs dep }
}
diff --git a/examples/java/build.gradle b/examples/java/build.gradle
index 0c91573..eaad35f 100644
--- a/examples/java/build.gradle
+++ b/examples/java/build.gradle
@@ -52,6 +52,7 @@
compile project(path: ":sdks:java:core", configuration: "shadow")
compile project(":sdks:java:extensions:google-cloud-platform-core")
compile project(":sdks:java:io:google-cloud-platform")
+ compile project(":sdks:java:extensions:ml")
compile library.java.avro
compile library.java.bigdataoss_util
compile library.java.google_api_client
@@ -60,6 +61,7 @@
compile library.java.google_auth_library_credentials
compile library.java.google_auth_library_oauth2_http
compile library.java.google_cloud_datastore_v1_proto_client
+ compile library.java.google_code_gson
compile library.java.google_http_client
compile library.java.joda_time
compile library.java.proto_google_cloud_datastore_v1
@@ -67,6 +69,7 @@
compile library.java.slf4j_jdk14
runtime project(path: ":runners:direct-java", configuration: "shadow")
testCompile project(":sdks:java:io:google-cloud-platform")
+ testCompile project(":sdks:java:extensions:ml")
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
testCompile library.java.junit
diff --git a/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java b/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java
index 3393e6d..cb55475 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java
@@ -22,6 +22,14 @@
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.api.services.bigquery.model.TimePartitioning;
+import com.google.cloud.language.v1.AnnotateTextRequest;
+import com.google.cloud.language.v1.AnnotateTextResponse;
+import com.google.cloud.language.v1.Document;
+import com.google.cloud.language.v1.Entity;
+import com.google.cloud.language.v1.Sentence;
+import com.google.cloud.language.v1.Token;
+import com.google.gson.Gson;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
@@ -30,12 +38,14 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
import org.apache.avro.generic.GenericRecord;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.AvroCoder;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.DefaultCoder;
import org.apache.beam.sdk.coders.DoubleCoder;
+import org.apache.beam.sdk.extensions.ml.AnnotateText;
import org.apache.beam.sdk.io.Compression;
import org.apache.beam.sdk.io.FileIO;
import org.apache.beam.sdk.io.GenerateSequence;
@@ -63,6 +73,7 @@
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.PeriodicImpulse;
+import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.transforms.Sum;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.transforms.Watch;
@@ -984,4 +995,167 @@
return result;
}
}
+
+ public static class NaturalLanguageIntegration {
+ private static final SerializableFunction<AnnotateTextResponse, List<Map<String, List<String>>>>
+ // [START NlpAnalyzeDependencyTree]
+ analyzeDependencyTree =
+ (SerializableFunction<AnnotateTextResponse, List<Map<String, List<String>>>>)
+ response -> {
+ List<Map<String, List<String>>> adjacencyLists = new ArrayList<>();
+ int index = 0;
+ for (Sentence s : response.getSentencesList()) {
+ Map<String, List<String>> adjacencyMap = new HashMap<>();
+ int sentenceBegin = s.getText().getBeginOffset();
+ int sentenceEnd = sentenceBegin + s.getText().getContent().length() - 1;
+ while (index < response.getTokensCount()
+ && response.getTokens(index).getText().getBeginOffset() <= sentenceEnd) {
+ Token token = response.getTokensList().get(index);
+ int headTokenIndex = token.getDependencyEdge().getHeadTokenIndex();
+ String headTokenContent =
+ response.getTokens(headTokenIndex).getText().getContent();
+ List<String> adjacencyList =
+ adjacencyMap.getOrDefault(headTokenContent, new ArrayList<>());
+ adjacencyList.add(token.getText().getContent());
+ adjacencyMap.put(headTokenContent, adjacencyList);
+ index++;
+ }
+ adjacencyLists.add(adjacencyMap);
+ }
+ return adjacencyLists;
+ };
+ // [END NlpAnalyzeDependencyTree]
+
+ private static final SerializableFunction<? super AnnotateTextResponse, TextSentiments>
+ // [START NlpExtractSentiments]
+ extractSentiments =
+ (SerializableFunction<AnnotateTextResponse, TextSentiments>)
+ annotateTextResponse -> {
+ TextSentiments sentiments = new TextSentiments();
+ sentiments.setDocumentSentiment(
+ annotateTextResponse.getDocumentSentiment().getMagnitude());
+ Map<String, Float> sentenceSentimentsMap =
+ annotateTextResponse.getSentencesList().stream()
+ .collect(
+ Collectors.toMap(
+ (Sentence s) -> s.getText().getContent(),
+ (Sentence s) -> s.getSentiment().getMagnitude()));
+ sentiments.setSentenceSentiments(sentenceSentimentsMap);
+ return sentiments;
+ };
+ // [END NlpExtractSentiments]
+
+ private static final SerializableFunction<? super AnnotateTextResponse, Map<String, String>>
+ // [START NlpExtractEntities]
+ extractEntities =
+ (SerializableFunction<AnnotateTextResponse, Map<String, String>>)
+ annotateTextResponse ->
+ annotateTextResponse.getEntitiesList().stream()
+ .collect(
+ Collectors.toMap(Entity::getName, (Entity e) -> e.getType().toString()));
+ // [END NlpExtractEntities]
+
+ private static final SerializableFunction<? super Map<String, String>, String>
+ mapEntitiesToJson =
+ (SerializableFunction<Map<String, String>, String>)
+ item -> {
+ StringBuilder builder = new StringBuilder("[");
+ builder.append(
+ item.entrySet().stream()
+ .map(
+ entry -> "{\"" + entry.getKey() + "\": \"" + entry.getValue() + "\"}")
+ .collect(Collectors.joining(",")));
+ builder.append("]");
+ return builder.toString();
+ };
+
+ private static final SerializableFunction<List<Map<String, List<String>>>, String>
+ mapDependencyTreesToJson =
+ (SerializableFunction<List<Map<String, List<String>>>, String>)
+ tree -> {
+ Gson gson = new Gson();
+ return gson.toJson(tree);
+ };
+
+ public static void main(Pipeline p) {
+ // [START NlpAnalyzeText]
+ AnnotateTextRequest.Features features =
+ AnnotateTextRequest.Features.newBuilder()
+ .setExtractEntities(true)
+ .setExtractDocumentSentiment(true)
+ .setExtractEntitySentiment(true)
+ .setExtractSyntax(true)
+ .build();
+ AnnotateText annotateText = AnnotateText.newBuilder().setFeatures(features).build();
+
+ PCollection<AnnotateTextResponse> responses =
+ p.apply(
+ Create.of(
+ "My experience so far has been fantastic, "
+ + "I\'d really recommend this product."))
+ .apply(
+ MapElements.into(TypeDescriptor.of(Document.class))
+ .via(
+ (SerializableFunction<String, Document>)
+ input ->
+ Document.newBuilder()
+ .setContent(input)
+ .setType(Document.Type.PLAIN_TEXT)
+ .build()))
+ .apply(annotateText);
+
+ responses
+ .apply(MapElements.into(TypeDescriptor.of(TextSentiments.class)).via(extractSentiments))
+ .apply(
+ MapElements.into(TypeDescriptors.strings())
+ .via((SerializableFunction<TextSentiments, String>) TextSentiments::toJson))
+ .apply(TextIO.write().to("sentiments.txt"));
+
+ responses
+ .apply(
+ MapElements.into(
+ TypeDescriptors.maps(TypeDescriptors.strings(), TypeDescriptors.strings()))
+ .via(extractEntities))
+ .apply(MapElements.into(TypeDescriptors.strings()).via(mapEntitiesToJson))
+ .apply(TextIO.write().to("entities.txt"));
+
+ responses
+ .apply(
+ MapElements.into(
+ TypeDescriptors.lists(
+ TypeDescriptors.maps(
+ TypeDescriptors.strings(),
+ TypeDescriptors.lists(TypeDescriptors.strings()))))
+ .via(analyzeDependencyTree))
+ .apply(MapElements.into(TypeDescriptors.strings()).via(mapDependencyTreesToJson))
+ .apply(TextIO.write().to("adjacency_list.txt"));
+ // [END NlpAnalyzeText]
+ }
+
+ private static class TextSentiments implements Serializable {
+ private Float documentSentiment;
+ private Map<String, Float> sentenceSentiments;
+
+ public void setSentenceSentiments(Map<String, Float> sentenceSentiments) {
+ this.sentenceSentiments = sentenceSentiments;
+ }
+
+ public Float getDocumentSentiment() {
+ return documentSentiment;
+ }
+
+ public void setDocumentSentiment(Float documentSentiment) {
+ this.documentSentiment = documentSentiment;
+ }
+
+ public Map<String, Float> getSentenceSentiments() {
+ return sentenceSentiments;
+ }
+
+ public String toJson() {
+ Gson gson = new Gson();
+ return gson.toJson(this);
+ }
+ }
+ }
}
diff --git a/learning/katas/go/core_transforms/composite/composite/cmd/main.go b/learning/katas/go/core_transforms/composite/composite/cmd/main.go
new file mode 100644
index 0000000..c566ddf
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/composite/cmd/main.go
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "beam.apache.org/learning/katas/core_transforms/composite/composite/pkg/common"
+ "beam.apache.org/learning/katas/core_transforms/composite/composite/pkg/task"
+ "context"
+ "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/log"
+ "github.com/apache/beam/sdks/go/pkg/beam/x/beamx"
+ "github.com/apache/beam/sdks/go/pkg/beam/x/debug"
+)
+
+func main() {
+ ctx := context.Background()
+
+ p, s := beam.NewPipelineWithRoot()
+
+ input := common.CreateLines(s)
+
+ output := task.ApplyTransform(s, input)
+
+ debug.Print(s, output)
+
+ err := beamx.Run(ctx, p)
+
+ if err != nil {
+ log.Exitf(context.Background(), "Failed to execute job: %v", err)
+ }
+}
diff --git a/learning/katas/go/core_transforms/composite/composite/pkg/common/common.go b/learning/katas/go/core_transforms/composite/composite/pkg/common/common.go
new file mode 100644
index 0000000..cc942e3
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/composite/pkg/common/common.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import "github.com/apache/beam/sdks/go/pkg/beam"
+
+func CreateLines(s beam.Scope) beam.PCollection {
+ return beam.Create(s,
+ "Apache Beam is an open source unified programming model",
+ "to define and execute data processing pipelines")
+}
diff --git a/learning/katas/go/core_transforms/composite/composite/pkg/task/task.go b/learning/katas/go/core_transforms/composite/composite/pkg/task/task.go
new file mode 100644
index 0000000..5dd426d
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/composite/pkg/task/task.go
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package task
+
+import (
+ "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/transforms/stats"
+)
+
+func ApplyTransform(s beam.Scope, input beam.PCollection) beam.PCollection {
+ s = s.Scope("CountCharacters")
+ characters := extractNonSpaceCharacters(s, input)
+ return stats.Count(s, characters)
+}
+
+func extractNonSpaceCharacters(s beam.Scope, input beam.PCollection) beam.PCollection {
+ return beam.ParDo(s, func(line string, emit func(string)){
+ for _, k := range line {
+ char := string(k)
+ if char != " " {
+ emit(char)
+ }
+ }
+ }, input)
+}
+
diff --git a/learning/katas/go/core_transforms/composite/composite/task-info.yaml b/learning/katas/go/core_transforms/composite/composite/task-info.yaml
new file mode 100644
index 0000000..4dfaee1
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/composite/task-info.yaml
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+type: edu
+files:
+- name: test/task_test.go
+ visible: false
+- name: cmd/main.go
+ visible: true
+- name: pkg/task/task.go
+ visible: true
+ placeholders:
+ - offset: 1048
+ length: 35
+ placeholder_text: TODO()
+ - offset: 1092
+ length: 26
+ placeholder_text: TODO()
+ - offset: 1218
+ length: 154
+ placeholder_text: TODO()
+ - offset: 1006
+ length: 26
+ placeholder_text: TODO()
+- name: pkg/common/common.go
+ visible: true
diff --git a/learning/katas/go/core_transforms/composite/composite/task-remote-info.yaml b/learning/katas/go/core_transforms/composite/composite/task-remote-info.yaml
new file mode 100644
index 0000000..a50d39c
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/composite/task-remote-info.yaml
@@ -0,0 +1,2 @@
+id: 1453131
+update_date: Wed, 29 Jul 2020 20:41:36 UTC
diff --git a/learning/katas/go/core_transforms/composite/composite/task.md b/learning/katas/go/core_transforms/composite/composite/task.md
new file mode 100644
index 0000000..56538c4
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/composite/task.md
@@ -0,0 +1,54 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+# Composite Transform
+
+Transforms can have a nested structure, where a complex transform performs multiple simpler
+transforms (such as more than one ParDo, Combine, GroupByKey, or even other composite transforms).
+These transforms are called composite transforms. Nesting multiple transforms inside a single
+composite transform can make your code more modular and easier to understand. Additionally,
+scopes may be augmented with custom naming for monitoring purposes.
+
+**Kata:** This kata has two tasks. One is to implement a composite transform that extracts characters
+from a list of strings, excludes any spaces and returns a PCollection of type KV<string, int>
+associating a character with its count in the sample input.
+Second is to create a sub-scope in the composite transform with the name "CountCharacters".
+
+<div class="hint">
+ Use <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#ParDo">
+ beam.ParDo</a>
+ with a <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#hdr-DoFns">
+ DoFn</a> in your composite transform to extract non space characters from the input.
+</div>
+
+<div class="hint">
+ You can use <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/transforms/stats/#Count">
+ stats.Count</a>
+ to count the number of appearances of each character in the PCollection<string> input.
+</div>
+
+<div class="hint">
+ Use the method <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#Scope.Scope">
+ Scope</a> to create a sub-scope in the composite transform.
+</div>
+
+<div class="hint">
+ Refer to the Beam Programming Guide
+ <a href="https://beam.apache.org/documentation/programming-guide/#composite-transforms">
+ "Composite transforms"</a> section for more information.
+</div>
diff --git a/learning/katas/go/core_transforms/composite/composite/test/task_test.go b/learning/katas/go/core_transforms/composite/composite/test/task_test.go
new file mode 100644
index 0000000..97deea8
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/composite/test/task_test.go
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package test
+
+import (
+ "beam.apache.org/learning/katas/core_transforms/composite/composite/pkg/common"
+ "beam.apache.org/learning/katas/core_transforms/composite/composite/pkg/task"
+ "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/testing/ptest"
+ "testing"
+)
+
+const (
+ wantAugmentedScopeLabel = "CountCharacters"
+)
+
+type testCase struct {
+ input beam.PCollection
+ want map[string]int
+}
+
+func TestApplyTransform(t *testing.T) {
+ p, s := beam.NewPipelineWithRoot()
+ tests := []testCase{
+ {
+ input: common.CreateLines(s),
+ want: map[string]int{
+ "a": 7,
+ "c": 4,
+ "d": 5,
+ "e": 14,
+ "f": 2,
+ "g": 3,
+ "h": 1,
+ "i": 8,
+ "l": 2,
+ "m": 4,
+ "n": 8,
+ "o": 6,
+ "p": 6,
+ "r": 4,
+ "s": 5,
+ "t": 3,
+ "u": 3,
+ "x": 1,
+ "A": 1,
+ "B": 1,
+ },
+ },
+ }
+ for _, tt := range tests {
+ gotKV := task.ApplyTransform(s, tt.input)
+ if !hasExpectedAugmentedScope(p) {
+ t.Errorf("no augmented scope with label %s", wantAugmentedScopeLabel)
+ }
+
+ beam.ParDo(s, func(gotCharacter string, got int, emit func(bool)){
+ s = s.Scope("TestApplyTransform")
+ want := tt.want[gotCharacter]
+ if got != want {
+ t.Errorf("%s = %v, want %v", gotCharacter, got, want)
+ }
+ }, gotKV)
+
+ if err := ptest.Run(p); err != nil {
+ t.Error(err)
+ }
+ }
+}
+
+func hasExpectedAugmentedScope(p *beam.Pipeline) bool {
+ edges, _, _ := p.Build()
+ hasAugmentedScope := false
+ for _, k := range edges {
+ hasAugmentedScope = k.Scope().Label == wantAugmentedScopeLabel
+ if hasAugmentedScope {
+ return true
+ }
+ }
+ return false
+}
+
diff --git a/learning/katas/go/core_transforms/composite/lesson-info.yaml b/learning/katas/go/core_transforms/composite/lesson-info.yaml
new file mode 100644
index 0000000..6ddfd44
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/lesson-info.yaml
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+content:
+- composite
diff --git a/learning/katas/go/core_transforms/composite/lesson-remote-info.yaml b/learning/katas/go/core_transforms/composite/lesson-remote-info.yaml
new file mode 100644
index 0000000..494e9eb
--- /dev/null
+++ b/learning/katas/go/core_transforms/composite/lesson-remote-info.yaml
@@ -0,0 +1,3 @@
+id: 385015
+update_date: Wed, 29 Jul 2020 20:41:31 UTC
+unit: 374534
diff --git a/learning/katas/go/core_transforms/section-info.yaml b/learning/katas/go/core_transforms/section-info.yaml
index b58d082..bcdab5d 100644
--- a/learning/katas/go/core_transforms/section-info.yaml
+++ b/learning/katas/go/core_transforms/section-info.yaml
@@ -18,12 +18,14 @@
#
content:
- - map
- - groupbykey
- - cogroupbykey
- - combine
- - flatten
- - partition
- - side_input
- - additional_outputs
- - branching
\ No newline at end of file
+- map
+- groupbykey
+- cogroupbykey
+- combine
+- flatten
+- partition
+- side_input
+- additional_outputs
+- branching
+- composite
+- windowing
diff --git a/learning/katas/go/core_transforms/windowing/lesson-info.yaml b/learning/katas/go/core_transforms/windowing/lesson-info.yaml
new file mode 100644
index 0000000..2315b4c
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/lesson-info.yaml
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+content:
+- windowing
diff --git a/learning/katas/go/core_transforms/windowing/lesson-remote-info.yaml b/learning/katas/go/core_transforms/windowing/lesson-remote-info.yaml
new file mode 100644
index 0000000..7a20a3a
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/lesson-remote-info.yaml
@@ -0,0 +1,3 @@
+id: 387853
+update_date: Thu, 06 Aug 2020 17:53:20 UTC
+unit: 377026
diff --git a/learning/katas/go/core_transforms/windowing/windowing/cmd/main.go b/learning/katas/go/core_transforms/windowing/windowing/cmd/main.go
new file mode 100644
index 0000000..e1cfcf0
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/windowing/cmd/main.go
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "beam.apache.org/learning/katas/core_transforms/windowing/windowing/pkg/common"
+ "beam.apache.org/learning/katas/core_transforms/windowing/windowing/pkg/task"
+ "context"
+ "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/log"
+ "github.com/apache/beam/sdks/go/pkg/beam/x/beamx"
+ "github.com/apache/beam/sdks/go/pkg/beam/x/debug"
+)
+
+func main() {
+ ctx := context.Background()
+
+ p, s := beam.NewPipelineWithRoot()
+
+ input := common.CreateLines(s)
+
+ result := task.ApplyTransform(s, input)
+
+ output := beam.ParDo(s, func(commit task.Commit) string {
+ return commit.String()
+ }, result)
+
+ debug.Print(s, output)
+
+ err := beamx.Run(ctx, p)
+
+ if err != nil {
+ log.Exitf(context.Background(), "Failed to execute job: %v", err)
+ }
+}
diff --git a/learning/katas/go/core_transforms/windowing/windowing/pkg/common/input.go b/learning/katas/go/core_transforms/windowing/windowing/pkg/common/input.go
new file mode 100644
index 0000000..3353e2b
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/windowing/pkg/common/input.go
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+ "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/graph/mtime"
+ "time"
+)
+
+var (
+ lines = map[time.Time]string{
+ time.Date(2020, 7, 31, 15, 52, 5, 0, time.UTC): "3c6c45924a Remove trailing whitespace from README",
+ time.Date(2020, 7, 31, 15, 59, 40, 0, time.UTC): "a52be99b62 Merge pull request #12443 from KevinGG/whitespace",
+ time.Date(2020, 7, 31, 16, 7, 36, 0, time.UTC): "7c1772d13f Merge pull request #12439 from ibzib/beam-9199-1",
+ time.Date(2020, 7, 31, 16, 35, 41, 0, time.UTC): "d971ba13b8 Widen ranges for GCP libraries (#12198)",
+ time.Date(2020, 8, 1, 0, 7, 25, 0, time.UTC): "875620111b Enable all Jenkins jobs triggering for committers (#12407)",
+ }
+)
+
+func CreateLines(s beam.Scope) beam.PCollection {
+ return beam.ParDo(s, timestampFn, beam.Impulse(s))
+}
+
+func timestampFn(_ []byte, emit func(beam.EventTime, string)) {
+ for timestamp, line := range lines {
+ emit(mtime.FromTime(timestamp), line)
+ }
+}
diff --git a/learning/katas/go/core_transforms/windowing/windowing/pkg/task/task.go b/learning/katas/go/core_transforms/windowing/windowing/pkg/task/task.go
new file mode 100644
index 0000000..1838578
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/windowing/pkg/task/task.go
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package task
+
+import (
+ "fmt"
+ "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/graph/window"
+ "time"
+)
+
+func ApplyTransform(s beam.Scope, input beam.PCollection) beam.PCollection {
+ windowed := beam.WindowInto(s, window.NewFixedWindows(time.Hour), input)
+ return beam.ParDo(s, timestampFn, windowed)
+}
+
+func timestampFn(iw beam.Window, et beam.EventTime, line string) Commit {
+ return Commit{
+ MaxTimestampWindow: toTime(iw.MaxTimestamp()),
+ EventTimestamp: toTime(et),
+ Line: line,
+ }
+}
+
+func toTime(et beam.EventTime) time.Time {
+ return time.Unix(0, et.Milliseconds() * int64(time.Millisecond))
+}
+
+type Commit struct {
+ MaxTimestampWindow time.Time
+ EventTimestamp time.Time
+ Line string
+}
+
+func (c Commit) String() string {
+ return fmt.Sprintf("Window ending at: %v contains timestamp: %v for commit: \"%s\"",
+ c.MaxTimestampWindow.Format(time.Kitchen),
+ c.EventTimestamp.Format(time.Kitchen),
+ c.Line)
+}
+
diff --git a/learning/katas/go/core_transforms/windowing/windowing/task-info.yaml b/learning/katas/go/core_transforms/windowing/windowing/task-info.yaml
new file mode 100644
index 0000000..b0e38f7
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/windowing/task-info.yaml
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+type: edu
+files:
+- name: test/task_test.go
+ visible: false
+- name: cmd/main.go
+ visible: true
+- name: pkg/common/input.go
+ visible: true
+- name: pkg/task/task.go
+ visible: true
+ placeholders:
+ - offset: 1030
+ length: 60
+ placeholder_text: TODO()
+ - offset: 1099
+ length: 36
+ placeholder_text: TODO()
+ - offset: 1156
+ length: 46
+ placeholder_text: TODO()
+ - offset: 1221
+ length: 121
+ placeholder_text: TODO()
diff --git a/learning/katas/go/core_transforms/windowing/windowing/task-remote-info.yaml b/learning/katas/go/core_transforms/windowing/windowing/task-remote-info.yaml
new file mode 100644
index 0000000..235e663
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/windowing/task-remote-info.yaml
@@ -0,0 +1,2 @@
+id: 1464828
+update_date: Thu, 06 Aug 2020 17:53:26 UTC
diff --git a/learning/katas/go/core_transforms/windowing/windowing/task.md b/learning/katas/go/core_transforms/windowing/windowing/task.md
new file mode 100644
index 0000000..22444a6
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/windowing/task.md
@@ -0,0 +1,77 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+# Windowing
+
+This lesson introduces the concept of windowed PCollection elements. A window is a view into a fixed beginning and
+fixed end to a set of data. In the beam model, windowing subdivides a PCollection according to the
+timestamps of its individual elements. An element can be a part of one or more windows.
+
+A DoFn can request timestamp and windowing information about the element it is processing. All the previous lessons
+had this information available as well. This lesson makes use of these parameters. The simple dataset
+has five git commit messages and their timestamps from the
+[Apache Beam public repository](https://github.com/apache/beam). Timestamps have been applied to this PCollection
+input according to the date and time of these messages.
+
+**Kata:** This lesson challenges you to apply an hourly fixed window to a PCollection. You are then to
+apply a ParDo to that hourly fixed windowed PCollection to produce a PCollection of a Commit struct. The
+Commit struct is provided for you. You are encouraged to run the pipeline at cmd/main.go of this task
+to visualize the windowing and timestamps.
+
+<div class="hint">
+ Use <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#ParDo">
+ beam.ParDo</a>
+ with a <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#hdr-DoFns">
+ DoFn</a> to accomplish this lesson.
+</div>
+
+<div class="hint">
+ Use <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#WindowInto">
+ beam.WindowInto</a>
+ with <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/core/graph/window#NewFixedWindows">
+ window.NewFixedWindows(time.Hour)</a>
+ on your PCollection input to apply an hourly windowing strategy to each element.
+</div>
+
+<div class="hint">
+ To access <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#Window">
+ beam.Window</a>
+ and <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#EventTime">
+ beam.EventTime</a> in your DoFn, add the parameters in the set order.
+
+```
+func doFn(iw beam.Window, et beam.EventTime, element X) Y {
+ // do something with iw, et and element to return Y
+}
+```
+</div>
+
+<div class="hint">
+ The Commit struct provided for you has a MaxTimestampWindow property that can be set from
+ <a href="https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam#Window">
+ beam.Window</a>'s MaxTimestamp().
+</div>
+
+<div class="hint">
+ Refer to the Beam Programming Guide for additional information about
+ <a href="https://beam.apache.org/documentation/programming-guide/#other-dofn-parameters">
+ additional DoFn parameters</a> and
+ <a href="https://beam.apache.org/documentation/programming-guide/#windowing">
+ windowing</a>.
+</div>
+
diff --git a/learning/katas/go/core_transforms/windowing/windowing/test/task_test.go b/learning/katas/go/core_transforms/windowing/windowing/test/task_test.go
new file mode 100644
index 0000000..8d4a2e2
--- /dev/null
+++ b/learning/katas/go/core_transforms/windowing/windowing/test/task_test.go
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package test
+
+import (
+ "beam.apache.org/learning/katas/core_transforms/windowing/windowing/pkg/common"
+ "beam.apache.org/learning/katas/core_transforms/windowing/windowing/pkg/task"
+ "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/testing/ptest"
+ "github.com/google/go-cmp/cmp"
+ "testing"
+ "time"
+)
+
+func TestApplyTransform(t *testing.T) {
+ p, s := beam.NewPipelineWithRoot()
+ tests := []struct {
+ input beam.PCollection
+ want []interface{}
+ }{
+ {
+ input: common.CreateLines(s),
+ want: []interface{}{
+ task.Commit{
+ MaxTimestampWindow: time.Unix(1596211199, 0),
+ EventTimestamp: time.Unix(1596210725, 0),
+ Line: "3c6c45924a Remove trailing whitespace from README",
+ },
+ task.Commit{
+ MaxTimestampWindow: time.Unix(1596211199, 0),
+ EventTimestamp: time.Unix(1596211180, 0),
+ Line: "a52be99b62 Merge pull request #12443 from KevinGG/whitespace",
+ },
+ task.Commit{
+ MaxTimestampWindow: time.Unix(1596214799, 0),
+ EventTimestamp: time.Unix(1596211656, 0),
+ Line: "7c1772d13f Merge pull request #12439 from ibzib/beam-9199-1",
+ },
+ task.Commit{
+ MaxTimestampWindow: time.Unix(1596214799, 0),
+ EventTimestamp: time.Unix(1596213341, 0),
+ Line: "d971ba13b8 Widen ranges for GCP libraries (#12198)",
+ },
+ task.Commit{
+ MaxTimestampWindow: time.Unix(1596243599, 0),
+ EventTimestamp: time.Unix(1596240445, 0),
+ Line: "875620111b Enable all Jenkins jobs triggering for committers (#12407)",
+ },
+ },
+ },
+ }
+ for _, tt := range tests {
+ got := task.ApplyTransform(s, tt.input)
+ cmp.Equal(got, tt.want)
+ if err := ptest.Run(p); err != nil {
+ t.Error(err)
+ }
+ }
+}
diff --git a/learning/katas/go/course-remote-info.yaml b/learning/katas/go/course-remote-info.yaml
index 90e7821..e944389 100644
--- a/learning/katas/go/course-remote-info.yaml
+++ b/learning/katas/go/course-remote-info.yaml
@@ -1,2 +1,2 @@
id: 70387
-update_date: Mon, 27 Jul 2020 20:44:48 UTC
+update_date: Wed, 29 Jul 2020 20:42:36 UTC
diff --git a/learning/katas/python/Common Transforms/Aggregation/Count/task-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Count/task-info.yaml
index 8259cde..0681008 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Count/task-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Count/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 934
+ - offset: 945
length: 31
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/Aggregation/Count/task-remote-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Count/task-remote-info.yaml
index 410c083..7c826b4 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Count/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Count/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755597
-update_date: Tue, 19 May 2020 03:05:33 UTC
+update_date: Sat, 01 Aug 2020 09:42:11 UTC
diff --git a/learning/katas/python/Common Transforms/Aggregation/Count/task.py b/learning/katas/python/Common Transforms/Aggregation/Count/task.py
index 1c34f05..79c46b0 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Count/task.py
+++ b/learning/katas/python/Common Transforms/Aggregation/Count/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(range(1, 11))
- | beam.combiners.Count.Globally()
- | LogElements())
+ (p | beam.Create(range(1, 11))
+ | beam.combiners.Count.Globally()
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Common Transforms/Aggregation/Largest/task-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Largest/task-info.yaml
index cdc5440..9b00391 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Largest/task-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Largest/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 934
+ - offset: 945
length: 29
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/Aggregation/Largest/task-remote-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Largest/task-remote-info.yaml
index b5dd948..372d706 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Largest/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Largest/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755601
-update_date: Tue, 19 May 2020 03:05:45 UTC
+update_date: Sat, 01 Aug 2020 09:42:23 UTC
diff --git a/learning/katas/python/Common Transforms/Aggregation/Largest/task.py b/learning/katas/python/Common Transforms/Aggregation/Largest/task.py
index 32627c8..e584b4d 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Largest/task.py
+++ b/learning/katas/python/Common Transforms/Aggregation/Largest/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(range(1, 11))
- | beam.combiners.Top.Largest(1)
- | LogElements())
+ (p | beam.Create(range(1, 11))
+ | beam.combiners.Top.Largest(1)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Common Transforms/Aggregation/Mean/task-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Mean/task-info.yaml
index 15c8e41..22c5db3 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Mean/task-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Mean/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 934
+ - offset: 945
length: 30
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/Aggregation/Mean/task-remote-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Mean/task-remote-info.yaml
index c89e88a..3f6a9da 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Mean/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Mean/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755599
-update_date: Tue, 19 May 2020 03:05:39 UTC
+update_date: Sat, 01 Aug 2020 09:42:17 UTC
diff --git a/learning/katas/python/Common Transforms/Aggregation/Mean/task.py b/learning/katas/python/Common Transforms/Aggregation/Mean/task.py
index 79f6f53..dc26cd3 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Mean/task.py
+++ b/learning/katas/python/Common Transforms/Aggregation/Mean/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(range(1, 11))
- | beam.combiners.Mean.Globally()
- | LogElements())
+ (p | beam.Create(range(1, 11))
+ | beam.combiners.Mean.Globally()
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Common Transforms/Aggregation/Smallest/task-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Smallest/task-info.yaml
index 15c8e41..22c5db3 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Smallest/task-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Smallest/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 934
+ - offset: 945
length: 30
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/Aggregation/Smallest/task-remote-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Smallest/task-remote-info.yaml
index 68f18b3..6d7ffb5 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Smallest/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Smallest/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755600
-update_date: Tue, 19 May 2020 03:05:42 UTC
+update_date: Sat, 01 Aug 2020 09:42:20 UTC
diff --git a/learning/katas/python/Common Transforms/Aggregation/Smallest/task.py b/learning/katas/python/Common Transforms/Aggregation/Smallest/task.py
index 871ebaf..190af52 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Smallest/task.py
+++ b/learning/katas/python/Common Transforms/Aggregation/Smallest/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(range(1, 11))
- | beam.combiners.Top.Smallest(1)
- | LogElements())
+ (p | beam.Create(range(1, 11))
+ | beam.combiners.Top.Smallest(1)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Common Transforms/Aggregation/Sum/task-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Sum/task-info.yaml
index c9adc6d..31213b8 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Sum/task-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Sum/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 934
+ - offset: 945
length: 25
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/Aggregation/Sum/task-remote-info.yaml b/learning/katas/python/Common Transforms/Aggregation/Sum/task-remote-info.yaml
index 4a01df2..8a69eae 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Sum/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Aggregation/Sum/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755598
-update_date: Tue, 19 May 2020 03:05:36 UTC
+update_date: Sat, 01 Aug 2020 09:42:14 UTC
diff --git a/learning/katas/python/Common Transforms/Aggregation/Sum/task.py b/learning/katas/python/Common Transforms/Aggregation/Sum/task.py
index 9509993..94e56d1 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Sum/task.py
+++ b/learning/katas/python/Common Transforms/Aggregation/Sum/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(range(1, 11))
- | beam.CombineGlobally(sum)
- | LogElements())
+ (p | beam.Create(range(1, 11))
+ | beam.CombineGlobally(sum)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Common Transforms/Filter/Filter/task-info.yaml b/learning/katas/python/Common Transforms/Filter/Filter/task-info.yaml
index 1c1c20d..78b7c1d 100644
--- a/learning/katas/python/Common Transforms/Filter/Filter/task-info.yaml
+++ b/learning/katas/python/Common Transforms/Filter/Filter/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 934
+ - offset: 945
length: 37
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml b/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml
index b128f6e..f0db907 100644
--- a/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755596
-update_date: Tue, 19 May 2020 03:05:30 UTC
+update_date: Sat, 01 Aug 2020 09:42:09 UTC
diff --git a/learning/katas/python/Common Transforms/Filter/Filter/task.py b/learning/katas/python/Common Transforms/Filter/Filter/task.py
index 57a5486..42f525c 100644
--- a/learning/katas/python/Common Transforms/Filter/Filter/task.py
+++ b/learning/katas/python/Common Transforms/Filter/Filter/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(range(1, 11))
- | beam.Filter(lambda num: num % 2 == 0)
- | LogElements())
+ (p | beam.Create(range(1, 11))
+ | beam.Filter(lambda num: num % 2 == 0)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Common Transforms/Filter/ParDo/task-info.yaml b/learning/katas/python/Common Transforms/Filter/ParDo/task-info.yaml
index 5d0d5bb..aff611a 100644
--- a/learning/katas/python/Common Transforms/Filter/ParDo/task-info.yaml
+++ b/learning/katas/python/Common Transforms/Filter/ParDo/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 942
+ - offset: 921
length: 82
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml b/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml
index 227501c..283880c 100644
--- a/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755595
-update_date: Tue, 19 May 2020 03:05:27 UTC
+update_date: Sat, 01 Aug 2020 09:42:06 UTC
diff --git a/learning/katas/python/Common Transforms/Filter/ParDo/task.py b/learning/katas/python/Common Transforms/Filter/ParDo/task.py
index 58fb267..6382a72 100644
--- a/learning/katas/python/Common Transforms/Filter/ParDo/task.py
+++ b/learning/katas/python/Common Transforms/Filter/ParDo/task.py
@@ -18,8 +18,6 @@
from log_elements import LogElements
-p = beam.Pipeline()
-
class FilterOutEvenNumber(beam.DoFn):
@@ -28,8 +26,8 @@
yield element
-(p | beam.Create(range(1, 11))
- | beam.ParDo(FilterOutEvenNumber())
- | LogElements())
+with beam.Pipeline() as p:
+ (p | beam.Create(range(1, 11))
+ | beam.ParDo(FilterOutEvenNumber())
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-info.yaml b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-info.yaml
index acc94b6..7708d24 100644
--- a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-info.yaml
+++ b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 966
+ - offset: 977
length: 37
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
index 686b9b7..18a370d 100644
--- a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1124221
-update_date: Tue, 19 May 2020 03:05:49 UTC
+update_date: Sat, 01 Aug 2020 09:42:26 UTC
diff --git a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.py b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.py
index e4429db..5360e20 100644
--- a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.py
+++ b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(['apple', 'banana', 'cherry', 'durian', 'guava', 'melon'])
- | beam.WithKeys(lambda word: word[0:1])
- | LogElements())
+ (p | beam.Create(['apple', 'banana', 'cherry', 'durian', 'guava', 'melon'])
+ | beam.WithKeys(lambda word: word[0:1])
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Branching/Branching/task-info.yaml b/learning/katas/python/Core Transforms/Branching/Branching/task-info.yaml
index aa799df..13d0ab0 100644
--- a/learning/katas/python/Core Transforms/Branching/Branching/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Branching/Branching/task-info.yaml
@@ -22,10 +22,10 @@
- name: task.py
visible: true
placeholders:
- - offset: 945
+ - offset: 956
length: 39
placeholder_text: TODO()
- - offset: 1002
+ - offset: 1015
length: 40
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml b/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml
index bc28ecc..34690e7 100644
--- a/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755592
-update_date: Tue, 19 May 2020 03:05:20 UTC
+update_date: Sat, 01 Aug 2020 09:42:00 UTC
diff --git a/learning/katas/python/Core Transforms/Branching/Branching/task.py b/learning/katas/python/Core Transforms/Branching/Branching/task.py
index e29b67c..dfc7874 100644
--- a/learning/katas/python/Core Transforms/Branching/Branching/task.py
+++ b/learning/katas/python/Core Transforms/Branching/Branching/task.py
@@ -18,14 +18,13 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-numbers = p | beam.Create([1, 2, 3, 4, 5])
+ numbers = p | beam.Create([1, 2, 3, 4, 5])
-mult5_results = numbers | beam.Map(lambda num: num * 5)
-mult10_results = numbers | beam.Map(lambda num: num * 10)
+ mult5_results = numbers | beam.Map(lambda num: num * 5)
+ mult10_results = numbers | beam.Map(lambda num: num * 10)
-mult5_results | 'Log multiply 5' >> LogElements(prefix='Multiplied by 5: ')
-mult10_results | 'Log multiply 10' >> LogElements(prefix='Multiplied by 10: ')
+ mult5_results | 'Log multiply 5' >> LogElements(prefix='Multiplied by 5: ')
+ mult10_results | 'Log multiply 10' >> LogElements(prefix='Multiplied by 10: ')
-p.run()
diff --git a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
index 43038c4..08be3d3 100644
--- a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755583
-update_date: Tue, 19 May 2020 03:04:56 UTC
+update_date: Sat, 01 Aug 2020 09:41:35 UTC
diff --git a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.py b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.py
index f76002b..cb94e62 100644
--- a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.py
+++ b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.py
@@ -46,12 +46,11 @@
| beam.Map(cogbk_result_to_wordsalphabet))
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-fruits = p | 'Fruits' >> beam.Create(['apple', 'banana', 'cherry'])
-countries = p | 'Countries' >> beam.Create(['australia', 'brazil', 'canada'])
+ fruits = p | 'Fruits' >> beam.Create(['apple', 'banana', 'cherry'])
+ countries = p | 'Countries' >> beam.Create(['australia', 'brazil', 'canada'])
-(apply_transforms(fruits, countries)
- | LogElements())
+ (apply_transforms(fruits, countries)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-info.yaml b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-info.yaml
index fcdb9c50..5025294 100644
--- a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 1088
+ - offset: 1101
length: 23
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
index c7f007a..070eaad 100644
--- a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755587
-update_date: Tue, 19 May 2020 03:05:05 UTC
+update_date: Sat, 01 Aug 2020 09:41:46 UTC
diff --git a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.py b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.py
index c9167f7..4bbae2e 100644
--- a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.py
+++ b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.py
@@ -22,11 +22,10 @@
PLAYER_2 = 'Player 2'
PLAYER_3 = 'Player 3'
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create([(PLAYER_1, 15), (PLAYER_2, 10), (PLAYER_1, 100),
- (PLAYER_3, 25), (PLAYER_2, 75)])
- | beam.CombinePerKey(sum)
- | LogElements())
+ (p | beam.Create([(PLAYER_1, 15), (PLAYER_2, 10), (PLAYER_1, 100),
+ (PLAYER_3, 25), (PLAYER_2, 75)])
+ | beam.CombinePerKey(sum)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Combine/CombineFn/task-info.yaml b/learning/katas/python/Core Transforms/Combine/CombineFn/task-info.yaml
index 1be0f5b..75c8d17 100644
--- a/learning/katas/python/Core Transforms/Combine/CombineFn/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/CombineFn/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 916
length: 436
placeholder_text: TODO()
- - offset: 1420
+ - offset: 1431
length: 33
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml b/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml
index 8330f05..1e1a578 100644
--- a/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755585
-update_date: Tue, 19 May 2020 03:06:40 UTC
+update_date: Sat, 01 Aug 2020 09:41:42 UTC
diff --git a/learning/katas/python/Core Transforms/Combine/CombineFn/task.py b/learning/katas/python/Core Transforms/Combine/CombineFn/task.py
index ce7b4db..cd7208f 100644
--- a/learning/katas/python/Core Transforms/Combine/CombineFn/task.py
+++ b/learning/katas/python/Core Transforms/Combine/CombineFn/task.py
@@ -37,10 +37,9 @@
return sum / count if count else float('NaN')
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create([10, 20, 50, 70, 90])
- | beam.CombineGlobally(AverageFn())
- | LogElements())
+ (p | beam.Create([10, 20, 50, 70, 90])
+ | beam.CombineGlobally(AverageFn())
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Combine/Simple Function/task-info.yaml b/learning/katas/python/Core Transforms/Combine/Simple Function/task-info.yaml
index 5fbd37f..a3f9c3f 100644
--- a/learning/katas/python/Core Transforms/Combine/Simple Function/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/Simple Function/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 900
length: 73
placeholder_text: TODO()
- - offset: 1036
+ - offset: 1047
length: 25
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml b/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml
index d61da29..21fefa6 100644
--- a/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755584
-update_date: Tue, 19 May 2020 03:05:00 UTC
+update_date: Sat, 01 Aug 2020 09:41:38 UTC
diff --git a/learning/katas/python/Core Transforms/Combine/Simple Function/task.py b/learning/katas/python/Core Transforms/Combine/Simple Function/task.py
index a3c29cf..c1f5fa7 100644
--- a/learning/katas/python/Core Transforms/Combine/Simple Function/task.py
+++ b/learning/katas/python/Core Transforms/Combine/Simple Function/task.py
@@ -28,10 +28,9 @@
return total
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create([1, 2, 3, 4, 5])
- | beam.CombineGlobally(sum)
- | LogElements())
+ (p | beam.Create([1, 2, 3, 4, 5])
+ | beam.CombineGlobally(sum)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-info.yaml b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-info.yaml
index 727e22d..aae12cd 100644
--- a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 920
length: 184
placeholder_text: TODO()
- - offset: 1179
+ - offset: 1190
length: 27
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml
index c0a5566..ac9db3b 100644
--- a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755593
-update_date: Tue, 19 May 2020 03:05:23 UTC
+update_date: Sat, 01 Aug 2020 09:45:35 UTC
diff --git a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.py b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.py
index 46396b9..d703ecc 100644
--- a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.py
+++ b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.py
@@ -28,10 +28,9 @@
)
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(['1,2,3,4,5', '6,7,8,9,10'])
- | ExtractAndMultiplyNumbers()
- | LogElements())
+ (p | beam.Create(['1,2,3,4,5', '6,7,8,9,10'])
+ | ExtractAndMultiplyNumbers()
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Flatten/Flatten/task-info.yaml b/learning/katas/python/Core Transforms/Flatten/Flatten/task-info.yaml
index 4cb2da7..c9a5071 100644
--- a/learning/katas/python/Core Transforms/Flatten/Flatten/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Flatten/Flatten/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 1140
+ - offset: 1159
length: 14
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml b/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml
index a64890a..390ba35 100644
--- a/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755588
-update_date: Tue, 19 May 2020 03:05:08 UTC
+update_date: Sat, 01 Aug 2020 09:41:49 UTC
diff --git a/learning/katas/python/Core Transforms/Flatten/Flatten/task.py b/learning/katas/python/Core Transforms/Flatten/Flatten/task.py
index c4c3b6e..282eabf 100644
--- a/learning/katas/python/Core Transforms/Flatten/Flatten/task.py
+++ b/learning/katas/python/Core Transforms/Flatten/Flatten/task.py
@@ -18,16 +18,15 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-wordsStartingWithA = \
- p | 'Words starting with A' >> beam.Create(['apple', 'ant', 'arrow'])
+ wordsStartingWithA = \
+ p | 'Words starting with A' >> beam.Create(['apple', 'ant', 'arrow'])
-wordsStartingWithB = \
- p | 'Words starting with B' >> beam.Create(['ball', 'book', 'bow'])
+ wordsStartingWithB = \
+ p | 'Words starting with B' >> beam.Create(['ball', 'book', 'bow'])
-((wordsStartingWithA, wordsStartingWithB)
- | beam.Flatten()
- | LogElements())
+ ((wordsStartingWithA, wordsStartingWithB)
+ | beam.Flatten()
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-info.yaml b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-info.yaml
index 4151745..98eb868 100644
--- a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-info.yaml
+++ b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-info.yaml
@@ -22,8 +22,8 @@
- name: task.py
visible: true
placeholders:
- - offset: 970
- length: 63
+ - offset: 981
+ length: 65
placeholder_text: '| TODO()'
- name: tests.py
visible: false
diff --git a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
index 6a232871..fea74cb 100644
--- a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755582
-update_date: Tue, 19 May 2020 03:04:53 UTC
+update_date: Sat, 01 Aug 2020 09:41:31 UTC
diff --git a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.py b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.py
index f65ffa1..67e0ed6 100644
--- a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.py
+++ b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.py
@@ -18,11 +18,10 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(['apple', 'ball', 'car', 'bear', 'cheetah', 'ant'])
- | beam.Map(lambda word: (word[0], word))
- | beam.GroupByKey()
- | LogElements())
+ (p | beam.Create(['apple', 'ball', 'car', 'bear', 'cheetah', 'ant'])
+ | beam.Map(lambda word: (word[0], word))
+ | beam.GroupByKey()
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Map/FlatMap/task-info.yaml b/learning/katas/python/Core Transforms/Map/FlatMap/task-info.yaml
index 60eb861..1e50818 100644
--- a/learning/katas/python/Core Transforms/Map/FlatMap/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/FlatMap/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 968
+ - offset: 979
length: 47
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml b/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml
index f98961e..3335fbe 100644
--- a/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755580
-update_date: Tue, 19 May 2020 03:04:50 UTC
+update_date: Sat, 01 Aug 2020 09:41:28 UTC
diff --git a/learning/katas/python/Core Transforms/Map/FlatMap/task.py b/learning/katas/python/Core Transforms/Map/FlatMap/task.py
index ba4c0d8..a345369 100644
--- a/learning/katas/python/Core Transforms/Map/FlatMap/task.py
+++ b/learning/katas/python/Core Transforms/Map/FlatMap/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(['Apache Beam', 'Unified Batch and Streaming'])
- | beam.FlatMap(lambda sentence: sentence.split())
- | LogElements())
+ (p | beam.Create(['Apache Beam', 'Unified Batch and Streaming'])
+ | beam.FlatMap(lambda sentence: sentence.split())
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Map/Map/task-info.yaml b/learning/katas/python/Core Transforms/Map/Map/task-info.yaml
index 271d8cb..d1c5cf1 100644
--- a/learning/katas/python/Core Transforms/Map/Map/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/Map/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 942
+ - offset: 953
length: 29
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml b/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml
index 66446ef..e529042 100644
--- a/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755579
-update_date: Tue, 19 May 2020 03:04:48 UTC
+update_date: Sat, 01 Aug 2020 09:41:24 UTC
diff --git a/learning/katas/python/Core Transforms/Map/Map/task.py b/learning/katas/python/Core Transforms/Map/Map/task.py
index fac6a77..386bb8d 100644
--- a/learning/katas/python/Core Transforms/Map/Map/task.py
+++ b/learning/katas/python/Core Transforms/Map/Map/task.py
@@ -18,10 +18,9 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create([10, 20, 30, 40, 50])
- | beam.Map(lambda num: num * 5)
- | LogElements())
+ (p | beam.Create([10, 20, 30, 40, 50])
+ | beam.Map(lambda num: num * 5)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-info.yaml b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-info.yaml
index 9ebdc5e..c52cd63 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 920
length: 58
placeholder_text: TODO()
- - offset: 1057
+ - offset: 1068
length: 32
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-remote-info.yaml b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-remote-info.yaml
index e0c2b03..eab3ba4 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755578
-update_date: Tue, 19 May 2020 03:04:45 UTC
+update_date: Sat, 01 Aug 2020 09:41:21 UTC
diff --git a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task.py b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task.py
index e93edc11..b9f0a3d 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task.py
+++ b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/task.py
@@ -25,10 +25,9 @@
return element.split()
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(['Hello Beam', 'It is awesome'])
- | beam.ParDo(BreakIntoWordsDoFn())
- | LogElements())
+ (p | beam.Create(['Hello Beam', 'It is awesome'])
+ | beam.ParDo(BreakIntoWordsDoFn())
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Map/ParDo/task-info.yaml b/learning/katas/python/Core Transforms/Map/ParDo/task-info.yaml
index 1d1767f..a2c9191 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/ParDo/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 919
length: 54
placeholder_text: TODO()
- - offset: 1036
+ - offset: 1047
length: 31
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml b/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml
index 97b55d7..3b83a93 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755577
-update_date: Tue, 19 May 2020 03:04:42 UTC
+update_date: Sat, 01 Aug 2020 09:41:17 UTC
diff --git a/learning/katas/python/Core Transforms/Map/ParDo/task.py b/learning/katas/python/Core Transforms/Map/ParDo/task.py
index e0b6cff..cfcfe9d 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo/task.py
+++ b/learning/katas/python/Core Transforms/Map/ParDo/task.py
@@ -25,10 +25,9 @@
yield element * 10
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create([1, 2, 3, 4, 5])
- | beam.ParDo(MultiplyByTenDoFn())
- | LogElements())
+ (p | beam.Create([1, 2, 3, 4, 5])
+ | beam.ParDo(MultiplyByTenDoFn())
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Partition/Partition/task-info.yaml b/learning/katas/python/Core Transforms/Partition/Partition/task-info.yaml
index fb4e439..c63ee3c 100644
--- a/learning/katas/python/Core Transforms/Partition/Partition/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Partition/Partition/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 924
length: 60
placeholder_text: TODO()
- - offset: 1087
+ - offset: 1100
length: 31
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml b/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml
index 3a551d6..15bbc96 100644
--- a/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755589
-update_date: Tue, 19 May 2020 03:05:12 UTC
+update_date: Sat, 01 Aug 2020 09:41:52 UTC
diff --git a/learning/katas/python/Core Transforms/Partition/Partition/task.py b/learning/katas/python/Core Transforms/Partition/Partition/task.py
index d4ab573..7c9bbe8 100644
--- a/learning/katas/python/Core Transforms/Partition/Partition/task.py
+++ b/learning/katas/python/Core Transforms/Partition/Partition/task.py
@@ -26,13 +26,12 @@
return 1
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-results = \
- (p | beam.Create([1, 2, 3, 4, 5, 100, 110, 150, 250])
- | beam.Partition(partition_fn, 2))
+ results = \
+ (p | beam.Create([1, 2, 3, 4, 5, 100, 110, 150, 250])
+ | beam.Partition(partition_fn, 2))
-results[0] | 'Log numbers > 100' >> LogElements(prefix='Number > 100: ')
-results[1] | 'Log numbers <= 100' >> LogElements(prefix='Number <= 100: ')
+ results[0] | 'Log numbers > 100' >> LogElements(prefix='Number > 100: ')
+ results[1] | 'Log numbers <= 100' >> LogElements(prefix='Number <= 100: ')
-p.run()
diff --git a/learning/katas/python/Core Transforms/Side Input/Side Input/task-info.yaml b/learning/katas/python/Core Transforms/Side Input/Side Input/task-info.yaml
index 4ab34f3..be86fee 100644
--- a/learning/katas/python/Core Transforms/Side Input/Side Input/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Side Input/Side Input/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 1534
length: 153
placeholder_text: TODO()
- - offset: 2096
+ - offset: 2135
length: 52
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml b/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml
index 4957ece..61d0bf1 100644
--- a/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755590
-update_date: Tue, 19 May 2020 03:05:14 UTC
+update_date: Sat, 01 Aug 2020 09:44:19 UTC
diff --git a/learning/katas/python/Core Transforms/Side Input/Side Input/task.py b/learning/katas/python/Core Transforms/Side Input/Side Input/task.py
index ec6d39e..06f13fc 100644
--- a/learning/katas/python/Core Transforms/Side Input/Side Input/task.py
+++ b/learning/katas/python/Core Transforms/Side Input/Side Input/task.py
@@ -44,26 +44,25 @@
cities_to_countries[element.city])
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-cities_to_countries = {
- 'Beijing': 'China',
- 'London': 'United Kingdom',
- 'San Francisco': 'United States',
- 'Singapore': 'Singapore',
- 'Sydney': 'Australia'
-}
+ cities_to_countries = {
+ 'Beijing': 'China',
+ 'London': 'United Kingdom',
+ 'San Francisco': 'United States',
+ 'Singapore': 'Singapore',
+ 'Sydney': 'Australia'
+ }
-persons = [
- Person('Henry', 'Singapore'),
- Person('Jane', 'San Francisco'),
- Person('Lee', 'Beijing'),
- Person('John', 'Sydney'),
- Person('Alfred', 'London')
-]
+ persons = [
+ Person('Henry', 'Singapore'),
+ Person('Jane', 'San Francisco'),
+ Person('Lee', 'Beijing'),
+ Person('John', 'Sydney'),
+ Person('Alfred', 'London')
+ ]
-(p | beam.Create(persons)
- | beam.ParDo(EnrichCountryDoFn(), cities_to_countries)
- | LogElements())
+ (p | beam.Create(persons)
+ | beam.ParDo(EnrichCountryDoFn(), cities_to_countries)
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Core Transforms/Side Output/Side Output/task-info.yaml b/learning/katas/python/Core Transforms/Side Output/Side Output/task-info.yaml
index 5f65c7f..025f105 100644
--- a/learning/katas/python/Core Transforms/Side Output/Side Output/task-info.yaml
+++ b/learning/katas/python/Core Transforms/Side Output/Side Output/task-info.yaml
@@ -25,8 +25,8 @@
- offset: 1011
length: 160
placeholder_text: TODO()
- - offset: 1264
- length: 98
+ - offset: 1277
+ length: 100
placeholder_text: TODO()
- name: tests.py
visible: false
diff --git a/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml b/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml
index 158110e..0bd57f0 100644
--- a/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755591
-update_date: Tue, 19 May 2020 03:05:17 UTC
+update_date: Sat, 01 Aug 2020 09:41:58 UTC
diff --git a/learning/katas/python/Core Transforms/Side Output/Side Output/task.py b/learning/katas/python/Core Transforms/Side Output/Side Output/task.py
index f587e1c..e321c7a 100644
--- a/learning/katas/python/Core Transforms/Side Output/Side Output/task.py
+++ b/learning/katas/python/Core Transforms/Side Output/Side Output/task.py
@@ -32,14 +32,13 @@
yield pvalue.TaggedOutput(num_above_100_tag, element)
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-results = \
- (p | beam.Create([10, 50, 120, 20, 200, 0])
- | beam.ParDo(ProcessNumbersDoFn())
- .with_outputs(num_above_100_tag, main=num_below_100_tag))
+ results = \
+ (p | beam.Create([10, 50, 120, 20, 200, 0])
+ | beam.ParDo(ProcessNumbersDoFn())
+ .with_outputs(num_above_100_tag, main=num_below_100_tag))
-results[num_below_100_tag] | 'Log numbers <= 100' >> LogElements(prefix='Number <= 100: ')
-results[num_above_100_tag] | 'Log numbers > 100' >> LogElements(prefix='Number > 100: ')
+ results[num_below_100_tag] | 'Log numbers <= 100' >> LogElements(prefix='Number <= 100: ')
+ results[num_above_100_tag] | 'Log numbers > 100' >> LogElements(prefix='Number > 100: ')
-p.run()
diff --git a/learning/katas/python/Examples/Word Count/Word Count/task-info.yaml b/learning/katas/python/Examples/Word Count/Word Count/task-info.yaml
index 435527d..6370180 100644
--- a/learning/katas/python/Examples/Word Count/Word Count/task-info.yaml
+++ b/learning/katas/python/Examples/Word Count/Word Count/task-info.yaml
@@ -22,8 +22,8 @@
- name: task.py
visible: true
placeholders:
- - offset: 1021
- length: 136
+ - offset: 1032
+ length: 140
placeholder_text: TODO()
- name: tests.py
visible: false
diff --git a/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml b/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml
index eec4604..80e087a 100644
--- a/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml
+++ b/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755604
-update_date: Tue, 19 May 2020 03:06:04 UTC
+update_date: Sat, 01 Aug 2020 09:42:41 UTC
diff --git a/learning/katas/python/Examples/Word Count/Word Count/task.py b/learning/katas/python/Examples/Word Count/Word Count/task.py
index 10b7cf8..9139c76 100644
--- a/learning/katas/python/Examples/Word Count/Word Count/task.py
+++ b/learning/katas/python/Examples/Word Count/Word Count/task.py
@@ -23,14 +23,13 @@
"banana orange banana papaya"
]
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(lines)
+ (p | beam.Create(lines)
- | beam.FlatMap(lambda sentence: sentence.split())
- | beam.combiners.Count.PerElement()
- | beam.MapTuple(lambda k, v: k + ":" + str(v))
+ | beam.FlatMap(lambda sentence: sentence.split())
+ | beam.combiners.Count.PerElement()
+ | beam.MapTuple(lambda k, v: k + ":" + str(v))
- | LogElements())
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
index a5130d4..9c9a163 100644
--- a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
+++ b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076138
-update_date: Tue, 19 May 2020 03:05:56 UTC
+update_date: Sat, 01 Aug 2020 09:42:32 UTC
diff --git a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.py b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.py
index 6894717..b95ceb2 100644
--- a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.py
+++ b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.py
@@ -16,7 +16,6 @@
import apache_beam as beam
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-p.run()
diff --git a/learning/katas/python/IO/TextIO/ReadFromText/task-info.yaml b/learning/katas/python/IO/TextIO/ReadFromText/task-info.yaml
index d42a178..6a322af 100644
--- a/learning/katas/python/IO/TextIO/ReadFromText/task-info.yaml
+++ b/learning/katas/python/IO/TextIO/ReadFromText/task-info.yaml
@@ -22,10 +22,10 @@
- name: task.py
visible: true
placeholders:
- - offset: 919
+ - offset: 930
length: 31
placeholder_text: TODO()
- - offset: 956
+ - offset: 969
length: 41
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml b/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml
index 9afea63..a047a2d 100644
--- a/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml
+++ b/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755602
-update_date: Tue, 19 May 2020 03:05:52 UTC
+update_date: Sat, 01 Aug 2020 09:42:29 UTC
diff --git a/learning/katas/python/IO/TextIO/ReadFromText/task.py b/learning/katas/python/IO/TextIO/ReadFromText/task.py
index 96dfe6f..ab04e1d 100644
--- a/learning/katas/python/IO/TextIO/ReadFromText/task.py
+++ b/learning/katas/python/IO/TextIO/ReadFromText/task.py
@@ -18,12 +18,11 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-file_path = 'countries.txt'
+ file_path = 'countries.txt'
-(p | beam.io.ReadFromText(file_path)
- | beam.Map(lambda country: country.upper())
- | LogElements())
+ (p | beam.io.ReadFromText(file_path)
+ | beam.Map(lambda country: country.upper())
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-info.yaml b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-info.yaml
index 747b4e1..8f9d26b 100644
--- a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-info.yaml
+++ b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-info.yaml
@@ -22,7 +22,7 @@
- name: task.py
visible: true
placeholders:
- - offset: 903
+ - offset: 912
length: 27
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
index d4953da..239fbb8 100644
--- a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
+++ b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755575
-update_date: Tue, 19 May 2020 03:04:39 UTC
+update_date: Sat, 01 Aug 2020 09:41:14 UTC
diff --git a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.py b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.py
index 0a2f3f7..85ffe1d 100644
--- a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.py
+++ b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.py
@@ -18,9 +18,8 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create(['Hello Beam'])
- | LogElements())
+ (p | beam.Create(['Hello Beam'])
+ | LogElements())
-p.run()
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml
index bbdc8d0..5c97141 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml
@@ -25,7 +25,7 @@
- offset: 1231
length: 155
placeholder_text: TODO()
- - offset: 1917
+ - offset: 1940
length: 30
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
index 3eafb58..5e6340b 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1124219
-update_date: Tue, 19 May 2020 03:06:43 UTC
+update_date: Sat, 01 Aug 2020 09:45:39 UTC
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py
index aba4f6e..fd4cdb9 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py
@@ -40,16 +40,15 @@
yield window.TimestampedValue(element, unix_timestamp)
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create([
- Event('1', 'book-order', datetime.datetime(2020, 3, 4, 0, 0, 0, 0, tzinfo=pytz.UTC)),
- Event('2', 'pencil-order', datetime.datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC)),
- Event('3', 'paper-order', datetime.datetime(2020, 3, 6, 0, 0, 0, 0, tzinfo=pytz.UTC)),
- Event('4', 'pencil-order', datetime.datetime(2020, 3, 7, 0, 0, 0, 0, tzinfo=pytz.UTC)),
- Event('5', 'book-order', datetime.datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC)),
- ])
- | beam.ParDo(AddTimestampDoFn())
- | LogElements(with_timestamp=True))
+ (p | beam.Create([
+ Event('1', 'book-order', datetime.datetime(2020, 3, 4, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('2', 'pencil-order', datetime.datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('3', 'paper-order', datetime.datetime(2020, 3, 6, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('4', 'pencil-order', datetime.datetime(2020, 3, 7, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('5', 'book-order', datetime.datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ ])
+ | beam.ParDo(AddTimestampDoFn())
+ | LogElements(with_timestamp=True))
-p.run()
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml
index 5c91b23..a1462bb 100644
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml
@@ -22,8 +22,8 @@
- name: task.py
visible: true
placeholders:
- - offset: 2067
- length: 85
+ - offset: 2100
+ length: 87
placeholder_text: TODO()
- name: tests.py
visible: false
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
index 7062326..8441b4d 100644
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1124220
-update_date: Tue, 19 May 2020 03:06:01 UTC
+update_date: Sat, 01 Aug 2020 09:42:37 UTC
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py
index 0444bec..94eec80 100644
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py
@@ -23,22 +23,21 @@
from log_elements import LogElements
-p = beam.Pipeline()
+with beam.Pipeline() as p:
-(p | beam.Create([
- window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- window.TimestampedValue("event", datetime(2020, 3, 10, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
- ])
- | beam.WindowInto(window.FixedWindows(24*60*60))
- | beam.combiners.Count.PerElement()
- | LogElements(with_window=True))
+ (p | beam.Create([
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 10, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ ])
+ | beam.WindowInto(window.FixedWindows(24*60*60))
+ | beam.combiners.Count.PerElement()
+ | LogElements(with_window=True))
-p.run()
diff --git a/learning/katas/python/course-remote-info.yaml b/learning/katas/python/course-remote-info.yaml
index aa03268..6c6a7de 100644
--- a/learning/katas/python/course-remote-info.yaml
+++ b/learning/katas/python/course-remote-info.yaml
@@ -1,2 +1,2 @@
id: 54532
-update_date: Tue, 19 May 2020 03:04:36 UTC
+update_date: Wed, 01 Jul 2020 22:47:08 UTC
diff --git a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
index 80e4eb8..08b823d 100644
--- a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
+++ b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
@@ -347,7 +347,6 @@
urn: "beam:coder:row:v1"
# str: string, i32: int32, f64: float64, arr: array[string]
payload: "\n\t\n\x03str\x1a\x02\x10\x07\n\t\n\x03i32\x1a\x02\x10\x03\n\t\n\x03f64\x1a\x02\x10\x06\n\r\n\x03arr\x1a\x06\x1a\x04\n\x02\x10\x07\x12$4e5e554c-d4c1-4a5d-b5e1-f3293a6b9f05"
-nested: false
examples:
"\u0004\u0000\u0003foo\u00a9\u0046\u003f\u00b9\u0099\u0099\u0099\u0099\u0099\u009a\0\0\0\u0003\u0003foo\u0003bar\u0003baz": {str: "foo", i32: 9001, f64: "0.1", arr: ["foo", "bar", "baz"]}
@@ -357,7 +356,6 @@
urn: "beam:coder:row:v1"
# str: nullable string, i32: nullable int32, f64: nullable float64
payload: "\n\x0b\n\x03str\x1a\x04\x08\x01\x10\x07\n\x0b\n\x03i32\x1a\x04\x08\x01\x10\x03\n\x0b\n\x03f64\x1a\x04\x08\x01\x10\x06\x12$b20c6545-57af-4bc8-b2a9-51ace21c7393"
-nested: false
examples:
"\u0003\u0001\u0007": {str: null, i32: null, f64: null}
"\u0003\u0001\u0004\u0003foo\u00a9\u0046": {str: "foo", i32: 9001, f64: null}
@@ -380,7 +378,33 @@
urn: "beam:coder:row:v1"
# f_bool: boolean, f_bytes: nullable bytes
payload: "\n\x0c\n\x06f_bool\x1a\x02\x10\x08\n\x0f\n\x07f_bytes\x1a\x04\x08\x01\x10\t\x12$eea1b747-7571-43d3-aafa-9255afdceafb"
-nested: false
examples:
"\x02\x01\x02\x01": {f_bool: True, f_bytes: null}
"\x02\x00\x00\x04ab\x00c": {f_bool: False, f_bytes: "ab\0c"}
+
+---
+
+# Binary data generated with the python SDK:
+#
+# import typing
+# import apache_beam as beam
+# class Test(typing.NamedTuple):
+# f_map: typing.Mapping[str,int]
+# schema = beam.typehints.schemas.named_tuple_to_schema(Test)
+# coder = beam.coders.row_coder.RowCoder(schema)
+# print("payload = %s" % schema.SerializeToString())
+# examples = (Test(f_map={}),
+# Test(f_map={"foo": 9001, "bar": 9223372036854775807}),
+# Test(f_map={"everything": None, "is": None, "null!": None, "¯\_(ツ)_/¯": None}))
+# for example in examples:
+# print("example = %s" % coder.encode(example))
+coder:
+ urn: "beam:coder:row:v1"
+ # f_map: map<str, nullable int64>
+ payload: "\n\x15\n\x05f_map\x1a\x0c*\n\n\x02\x10\x07\x12\x04\x08\x01\x10\x04\x12$d8c8f969-14e6-457f-a8b5-62a1aec7f1cd"
+ # map ordering is non-deterministic
+ non_deterministic: True
+examples:
+ "\x01\x00\x00\x00\x00\x00": {f_map: {}}
+ "\x01\x00\x00\x00\x00\x02\x03foo\x01\xa9F\x03bar\x01\xff\xff\xff\xff\xff\xff\xff\xff\x7f": {f_map: {"foo": 9001, "bar": 9223372036854775807}}
+ "\x01\x00\x00\x00\x00\x04\neverything\x00\x02is\x00\x05null!\x00\r\xc2\xaf\\_(\xe3\x83\x84)_/\xc2\xaf\x00": {f_map: {"everything":null, "is": null, "null!": null, "¯\\_(ツ)_/¯": null}}
diff --git a/model/pipeline/src/main/proto/beam_runner_api.proto b/model/pipeline/src/main/proto/beam_runner_api.proto
index 3623790..8b1ce6b 100644
--- a/model/pipeline/src/main/proto/beam_runner_api.proto
+++ b/model/pipeline/src/main/proto/beam_runner_api.proto
@@ -855,10 +855,21 @@
// BOOLEAN: beam:coder:bool:v1
// BYTES: beam:coder:bytes:v1
// ArrayType: beam:coder:iterable:v1 (always has a known length)
- // MapType: not yet a standard coder (BEAM-7996)
+ // MapType: not a standard coder, specification defined below.
// RowType: beam:coder:row:v1
// LogicalType: Uses the coder for its representation.
//
+ // The MapType is encoded by:
+ // - An INT32 representing the size of the map (N)
+ // - Followed by N interleaved keys and values, encoded with their
+ // corresponding coder.
+ //
+ // Nullable types in container types (ArrayType, MapType) are encoded by:
+ // - A one byte null indicator, 0x00 for null values, or 0x01 for present
+ // values.
+ // - For present values the null indicator is followed by the value
+ // encoded with it's corresponding coder.
+ //
// The payload for RowCoder is an instance of Schema.
// Components: None
// Experimental.
diff --git a/model/pipeline/src/main/proto/schema.proto b/model/pipeline/src/main/proto/schema.proto
index dcf75ca..bffa5f1 100644
--- a/model/pipeline/src/main/proto/schema.proto
+++ b/model/pipeline/src/main/proto/schema.proto
@@ -19,6 +19,9 @@
// ** Experimental **
// Protocol Buffers describing Beam Schemas, a portable representation for
// complex types.
+//
+// The primary application of Schema is as the payload for the standard coder
+// "beam:coder:row:v1", defined in beam_runner_api.proto
syntax = "proto3";
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java
index 181ddab..4e8088f 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/SplittableParDoExpander.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.core.construction.graph;
+import com.google.auto.value.AutoValue;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
@@ -66,7 +67,7 @@
* information is available to the runner if it chooses to inspect it.
*/
public static TransformReplacement createSizedReplacement() {
- return SizedReplacement.INSTANCE;
+ return SizedReplacement.builder().setDrain(false).build();
}
/**
@@ -91,13 +92,25 @@
* .
*/
public static TransformReplacement createTruncateReplacement() {
- return TruncateReplacement.INSTANCE;
+ return SizedReplacement.builder().setDrain(true).build();
}
/** See {@link #createSizedReplacement()} for details. */
- private static class SizedReplacement implements TransformReplacement {
+ @AutoValue
+ abstract static class SizedReplacement implements TransformReplacement {
- private static final SizedReplacement INSTANCE = new SizedReplacement();
+ static Builder builder() {
+ return new AutoValue_SplittableParDoExpander_SizedReplacement.Builder();
+ }
+
+ abstract boolean isDrain();
+
+ @AutoValue.Builder
+ abstract static class Builder {
+ abstract Builder setDrain(boolean isDrain);
+
+ abstract SizedReplacement build();
+ }
@Override
public MessageWithComponents getReplacement(
@@ -209,14 +222,71 @@
splitAndSize.setEnvironmentId(splittableParDo.getEnvironmentId());
rval.getComponentsBuilder().putTransforms(splitAndSizeId, splitAndSize.build());
}
+ PTransform.Builder newCompositeRoot =
+ splittableParDo
+ .toBuilder()
+ // Clear the original splittable ParDo spec and add all the new transforms as
+ // children.
+ .clearSpec()
+ .addAllSubtransforms(Arrays.asList(pairWithRestrictionId, splitAndSizeId));
String processSizedElementsAndRestrictionsId =
generateUniqueId(
transformId + "/ProcessSizedElementsAndRestrictions",
existingComponents::containsTransforms);
+ String processSizedElementsInputPCollectionId = splitAndSizeOutId;
+ if (isDrain()) {
+ String truncateAndSizeCoderId =
+ generateUniqueId(
+ mainInputPCollection.getCoderId() + "/TruncateAndSize",
+ existingComponents::containsCoders);
+ rval.getComponentsBuilder()
+ .putCoders(
+ truncateAndSizeCoderId,
+ ModelCoders.kvCoder(
+ splitAndSizeOutCoderId, getOrAddDoubleCoder(existingComponents, rval)));
+ String truncateAndSizeOutId =
+ generateUniqueId(
+ mainInputPCollectionId + "/TruncateAndSize",
+ existingComponents::containsPcollections);
+
+ rval.getComponentsBuilder()
+ .putPcollections(
+ truncateAndSizeOutId,
+ PCollection.newBuilder()
+ .setCoderId(truncateAndSizeCoderId)
+ .setIsBounded(mainInputPCollection.getIsBounded())
+ .setWindowingStrategyId(mainInputPCollection.getWindowingStrategyId())
+ .setUniqueName(
+ generateUniquePCollectonName(
+ mainInputPCollection.getUniqueName() + "/TruncateAndSize",
+ existingComponents))
+ .build());
+ String truncateAndSizeId =
+ generateUniqueId(
+ transformId + "/TruncateAndSize", existingComponents::containsTransforms);
+ {
+ PTransform.Builder truncateAndSize = PTransform.newBuilder();
+ truncateAndSize.putInputs(mainInputName, splitAndSizeOutId);
+ truncateAndSize.putAllInputs(sideInputs);
+ truncateAndSize.putOutputs("out", truncateAndSizeOutId);
+ truncateAndSize.setUniqueName(
+ generateUniquePCollectonName(
+ splittableParDo.getUniqueName() + "/TruncateAndSize", existingComponents));
+ truncateAndSize.setSpec(
+ FunctionSpec.newBuilder()
+ .setUrn(PTransformTranslation.SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN)
+ .setPayload(splittableParDo.getSpec().getPayload()));
+ truncateAndSize.setEnvironmentId(splittableParDo.getEnvironmentId());
+ rval.getComponentsBuilder().putTransforms(truncateAndSizeId, truncateAndSize.build());
+ }
+ newCompositeRoot.addSubtransforms(truncateAndSizeId);
+ processSizedElementsInputPCollectionId = truncateAndSizeOutId;
+ }
{
PTransform.Builder processSizedElementsAndRestrictions = PTransform.newBuilder();
- processSizedElementsAndRestrictions.putInputs(mainInputName, splitAndSizeOutId);
+ processSizedElementsAndRestrictions.putInputs(
+ mainInputName, processSizedElementsInputPCollectionId);
processSizedElementsAndRestrictions.putAllInputs(sideInputs);
processSizedElementsAndRestrictions.putAllOutputs(splittableParDo.getOutputsMap());
processSizedElementsAndRestrictions.setUniqueName(
@@ -234,20 +304,8 @@
processSizedElementsAndRestrictionsId,
processSizedElementsAndRestrictions.build());
}
-
- PTransform.Builder newCompositeRoot =
- splittableParDo
- .toBuilder()
- // Clear the original splittable ParDo spec and add all the new transforms as
- // children.
- .clearSpec()
- .addAllSubtransforms(
- Arrays.asList(
- pairWithRestrictionId,
- splitAndSizeId,
- processSizedElementsAndRestrictionsId));
+ newCompositeRoot.addSubtransforms(processSizedElementsAndRestrictionsId);
rval.setPtransform(newCompositeRoot);
-
return rval.build();
} catch (IOException e) {
throw new RuntimeException("Unable to perform expansion for transform " + transformId, e);
@@ -298,211 +356,4 @@
}
return prefix + i;
}
-
- /** See {@link #createTruncateReplacement} for details. */
- private static class TruncateReplacement implements TransformReplacement {
- private static final TruncateReplacement INSTANCE = new TruncateReplacement();
-
- @Override
- public MessageWithComponents getReplacement(
- String transformId, ComponentsOrBuilder existingComponents) {
- try {
- MessageWithComponents.Builder rval = MessageWithComponents.newBuilder();
-
- PTransform splittableParDo = existingComponents.getTransformsOrThrow(transformId);
- ParDoPayload payload = ParDoPayload.parseFrom(splittableParDo.getSpec().getPayload());
- // Only perform the expansion if this is a splittable DoFn.
- if (payload.getRestrictionCoderId() == null || payload.getRestrictionCoderId().isEmpty()) {
- return null;
- }
-
- String mainInputName = ParDoTranslation.getMainInputName(splittableParDo);
- String mainInputPCollectionId = splittableParDo.getInputsOrThrow(mainInputName);
- PCollection mainInputPCollection =
- existingComponents.getPcollectionsOrThrow(mainInputPCollectionId);
- Map<String, String> sideInputs =
- Maps.filterKeys(
- splittableParDo.getInputsMap(), input -> payload.containsSideInputs(input));
-
- String pairWithRestrictionOutCoderId =
- generateUniqueId(
- mainInputPCollection.getCoderId() + "/PairWithRestriction",
- existingComponents::containsCoders);
- rval.getComponentsBuilder()
- .putCoders(
- pairWithRestrictionOutCoderId,
- ModelCoders.kvCoder(
- mainInputPCollection.getCoderId(), payload.getRestrictionCoderId()));
-
- String pairWithRestrictionOutId =
- generateUniqueId(
- mainInputPCollectionId + "/PairWithRestriction",
- existingComponents::containsPcollections);
- rval.getComponentsBuilder()
- .putPcollections(
- pairWithRestrictionOutId,
- PCollection.newBuilder()
- .setCoderId(pairWithRestrictionOutCoderId)
- .setIsBounded(mainInputPCollection.getIsBounded())
- .setWindowingStrategyId(mainInputPCollection.getWindowingStrategyId())
- .setUniqueName(
- generateUniquePCollectonName(
- mainInputPCollection.getUniqueName() + "/PairWithRestriction",
- existingComponents))
- .build());
-
- String splitAndSizeOutCoderId =
- generateUniqueId(
- mainInputPCollection.getCoderId() + "/SplitAndSize",
- existingComponents::containsCoders);
- rval.getComponentsBuilder()
- .putCoders(
- splitAndSizeOutCoderId,
- ModelCoders.kvCoder(
- pairWithRestrictionOutCoderId, getOrAddDoubleCoder(existingComponents, rval)));
-
- String splitAndSizeOutId =
- generateUniqueId(
- mainInputPCollectionId + "/SplitAndSize", existingComponents::containsPcollections);
- rval.getComponentsBuilder()
- .putPcollections(
- splitAndSizeOutId,
- PCollection.newBuilder()
- .setCoderId(splitAndSizeOutCoderId)
- .setIsBounded(mainInputPCollection.getIsBounded())
- .setWindowingStrategyId(mainInputPCollection.getWindowingStrategyId())
- .setUniqueName(
- generateUniquePCollectonName(
- mainInputPCollection.getUniqueName() + "/SplitAndSize",
- existingComponents))
- .build());
-
- String truncateAndSizeCoderId =
- generateUniqueId(
- mainInputPCollection.getCoderId() + "/TruncateAndSize",
- existingComponents::containsCoders);
- rval.getComponentsBuilder()
- .putCoders(
- truncateAndSizeCoderId,
- ModelCoders.kvCoder(
- splitAndSizeOutCoderId, getOrAddDoubleCoder(existingComponents, rval)));
- String truncateAndSizeOutId =
- generateUniqueId(
- mainInputPCollectionId + "/TruncateAndSize",
- existingComponents::containsPcollections);
-
- rval.getComponentsBuilder()
- .putPcollections(
- truncateAndSizeOutId,
- PCollection.newBuilder()
- .setCoderId(truncateAndSizeCoderId)
- .setIsBounded(mainInputPCollection.getIsBounded())
- .setWindowingStrategyId(mainInputPCollection.getWindowingStrategyId())
- .setUniqueName(
- generateUniquePCollectonName(
- mainInputPCollection.getUniqueName() + "/TruncateAndSize",
- existingComponents))
- .build());
-
- String pairWithRestrictionId =
- generateUniqueId(
- transformId + "/PairWithRestriction", existingComponents::containsTransforms);
- {
- PTransform.Builder pairWithRestriction = PTransform.newBuilder();
- pairWithRestriction.putAllInputs(splittableParDo.getInputsMap());
- pairWithRestriction.putOutputs("out", pairWithRestrictionOutId);
- pairWithRestriction.setUniqueName(
- generateUniquePCollectonName(
- splittableParDo.getUniqueName() + "/PairWithRestriction", existingComponents));
- pairWithRestriction.setSpec(
- FunctionSpec.newBuilder()
- .setUrn(PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN)
- .setPayload(splittableParDo.getSpec().getPayload()));
- pairWithRestriction.setEnvironmentId(splittableParDo.getEnvironmentId());
- rval.getComponentsBuilder()
- .putTransforms(pairWithRestrictionId, pairWithRestriction.build());
- }
-
- String splitAndSizeId =
- generateUniqueId(transformId + "/SplitAndSize", existingComponents::containsTransforms);
- {
- PTransform.Builder splitAndSize = PTransform.newBuilder();
- splitAndSize.putInputs(mainInputName, pairWithRestrictionOutId);
- splitAndSize.putAllInputs(sideInputs);
- splitAndSize.putOutputs("out", splitAndSizeOutId);
- splitAndSize.setUniqueName(
- generateUniquePCollectonName(
- splittableParDo.getUniqueName() + "/SplitAndSize", existingComponents));
- splitAndSize.setSpec(
- FunctionSpec.newBuilder()
- .setUrn(PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN)
- .setPayload(splittableParDo.getSpec().getPayload()));
- splitAndSize.setEnvironmentId(splittableParDo.getEnvironmentId());
- rval.getComponentsBuilder().putTransforms(splitAndSizeId, splitAndSize.build());
- }
-
- String truncateAndSizeId =
- generateUniqueId(
- transformId + "/TruncateAndSize", existingComponents::containsTransforms);
- {
- PTransform.Builder truncateAndSize = PTransform.newBuilder();
- truncateAndSize.putInputs(mainInputName, splitAndSizeOutId);
- truncateAndSize.putAllInputs(sideInputs);
- truncateAndSize.putOutputs("out", truncateAndSizeOutId);
- truncateAndSize.setUniqueName(
- generateUniquePCollectonName(
- splittableParDo.getUniqueName() + "/TruncateAndSize", existingComponents));
- truncateAndSize.setSpec(
- FunctionSpec.newBuilder()
- .setUrn(PTransformTranslation.SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN)
- .setPayload(splittableParDo.getSpec().getPayload()));
- truncateAndSize.setEnvironmentId(splittableParDo.getEnvironmentId());
- rval.getComponentsBuilder().putTransforms(truncateAndSizeId, truncateAndSize.build());
- }
-
- String processSizedElementsAndRestrictionsId =
- generateUniqueId(
- transformId + "/ProcessSizedElementsAndRestrictions",
- existingComponents::containsTransforms);
- {
- PTransform.Builder processSizedElementsAndRestrictions = PTransform.newBuilder();
- processSizedElementsAndRestrictions.putInputs(mainInputName, truncateAndSizeOutId);
- processSizedElementsAndRestrictions.putAllInputs(sideInputs);
- processSizedElementsAndRestrictions.putAllOutputs(splittableParDo.getOutputsMap());
- processSizedElementsAndRestrictions.setUniqueName(
- generateUniquePCollectonName(
- splittableParDo.getUniqueName() + "/ProcessSizedElementsAndRestrictions",
- existingComponents));
- processSizedElementsAndRestrictions.setSpec(
- FunctionSpec.newBuilder()
- .setUrn(
- PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN)
- .setPayload(splittableParDo.getSpec().getPayload()));
- processSizedElementsAndRestrictions.setEnvironmentId(splittableParDo.getEnvironmentId());
- rval.getComponentsBuilder()
- .putTransforms(
- processSizedElementsAndRestrictionsId,
- processSizedElementsAndRestrictions.build());
- }
-
- PTransform.Builder newCompositeRoot =
- splittableParDo
- .toBuilder()
- // Clear the original splittable ParDo spec and add all the new transforms as
- // children.
- .clearSpec()
- .addAllSubtransforms(
- Arrays.asList(
- pairWithRestrictionId,
- splitAndSizeId,
- truncateAndSizeId,
- processSizedElementsAndRestrictionsId));
- rval.setPtransform(newCompositeRoot);
-
- return rval.build();
- } catch (IOException e) {
- throw new RuntimeException("Unable to perform expansion for transform " + transformId, e);
- }
- }
- }
}
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java
index 10221b8..2c4090e 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CommonCoderTest.java
@@ -21,7 +21,6 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects.firstNonNull;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList.toImmutableList;
-import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap.toImmutableMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.instanceOf;
@@ -43,6 +42,7 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -340,6 +340,10 @@
}
private static Object parseField(Object value, Schema.FieldType fieldType) {
+ if (value == null) {
+ return null;
+ }
+
switch (fieldType.getTypeName()) {
case BYTE:
return ((Number) value).byteValue();
@@ -366,14 +370,18 @@
.map((element) -> parseField(element, fieldType.getCollectionElementType()))
.collect(toImmutableList());
case MAP:
- Map<Object, Object> kvMap = (Map<Object, Object>) value;
- return kvMap.entrySet().stream()
- .collect(
- toImmutableMap(
- (pair) -> parseField(pair.getKey(), fieldType.getMapKeyType()),
- (pair) -> parseField(pair.getValue(), fieldType.getMapValueType())));
+ Map<Object, Object> kvMap = new HashMap<>();
+ ((Map<Object, Object>) value)
+ .entrySet().stream()
+ .forEach(
+ (entry) ->
+ kvMap.put(
+ parseField(entry.getKey(), fieldType.getMapKeyType()),
+ parseField(entry.getValue(), fieldType.getMapValueType())));
+ return kvMap;
case ROW:
- Map<String, Object> rowMap = (Map<String, Object>) value;
+ // Clone map so we don't mutate the underlying value
+ Map<String, Object> rowMap = new HashMap<>((Map<String, Object>) value);
Schema schema = fieldType.getRowSchema();
Row.Builder row = Row.withSchema(schema);
for (Schema.Field field : schema.getFields()) {
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
index 988921e..f7c74c0 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
@@ -337,26 +337,30 @@
*/
@Override
public State waitUntilFinish(Duration duration) {
- State startState = this.state;
- if (!startState.isTerminal()) {
- try {
- state = executor.waitUntilFinish(duration);
- } catch (UserCodeException uce) {
- // Emulates the behavior of Pipeline#run(), where a stack trace caused by a
- // UserCodeException is truncated and replaced with the stack starting at the call to
- // waitToFinish
- throw new Pipeline.PipelineExecutionException(uce.getCause());
- } catch (Exception e) {
- if (e instanceof InterruptedException) {
- Thread.currentThread().interrupt();
- }
- if (e instanceof RuntimeException) {
- throw (RuntimeException) e;
- }
- throw new RuntimeException(e);
- }
+ if (this.state.isTerminal()) {
+ return this.state;
}
- return this.state;
+ final State endState;
+ try {
+ endState = executor.waitUntilFinish(duration);
+ } catch (UserCodeException uce) {
+ // Emulates the behavior of Pipeline#run(), where a stack trace caused by a
+ // UserCodeException is truncated and replaced with the stack starting at the call to
+ // waitToFinish
+ throw new Pipeline.PipelineExecutionException(uce.getCause());
+ } catch (Exception e) {
+ if (e instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
+ }
+ if (e instanceof RuntimeException) {
+ throw (RuntimeException) e;
+ }
+ throw new RuntimeException(e);
+ }
+ if (endState != null) {
+ this.state = endState;
+ }
+ return endState;
}
}
diff --git a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRunnerTest.java b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRunnerTest.java
index 8054a07..fbcf0c0 100644
--- a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRunnerTest.java
+++ b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRunnerTest.java
@@ -328,8 +328,9 @@
// The pipeline should never complete;
assertThat(result.getState(), is(State.RUNNING));
// Must time out, otherwise this test will never complete
- result.waitUntilFinish(Duration.millis(1L));
- assertEquals(null, result.getState());
+ assertEquals(null, result.waitUntilFinish(Duration.millis(1L)));
+ // Ensure multiple calls complete
+ assertEquals(null, result.waitUntilFinish(Duration.millis(1L)));
}
private static final AtomicLong TEARDOWN_CALL = new AtomicLong(-1);
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
index dadb49d..46ee15d 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
@@ -128,13 +128,10 @@
* href="https://cloud.google.com/compute/docs/regions-zones/regions-zones">region</a> for
* creating Dataflow jobs.
*/
- @Hidden
- @Experimental
@Description(
"The Google Compute Engine region for creating Dataflow jobs. See "
+ "https://cloud.google.com/compute/docs/regions-zones/regions-zones for a list of valid "
- + "options. Currently defaults to us-central1, but future releases of Beam will "
- + "require the user to set the region explicitly.")
+ + "options.")
@Default.InstanceFactory(DefaultGcpRegionFactory.class)
String getRegion();
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java
index a7e364d..46dc95a 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java
@@ -99,6 +99,18 @@
}
@Override
+ public String toString() {
+ return "KeyedTimerData{"
+ + "key="
+ + key
+ + ", keyBytes="
+ + Arrays.toString(keyBytes)
+ + ", timerData="
+ + timerData
+ + '}';
+ }
+
+ @Override
public boolean equals(@Nullable Object o) {
if (this == o) {
return true;
diff --git a/sdks/go/pkg/beam/core/graph/coder/map.go b/sdks/go/pkg/beam/core/graph/coder/map.go
new file mode 100644
index 0000000..4e5dc2c
--- /dev/null
+++ b/sdks/go/pkg/beam/core/graph/coder/map.go
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package coder
+
+import (
+ "io"
+ "reflect"
+)
+
+// TODO(lostluck): 2020.08.04 export these for use for others?
+
+// mapDecoder produces a decoder for the beam schema map encoding.
+func mapDecoder(rt reflect.Type, decodeToKey, decodeToElem func(reflect.Value, io.Reader) error) func(reflect.Value, io.Reader) error {
+ return func(ret reflect.Value, r io.Reader) error {
+ // (1) Read count prefixed encoded data
+ size, err := DecodeInt32(r)
+ if err != nil {
+ return err
+ }
+ n := int(size)
+ ret.Set(reflect.MakeMapWithSize(rt, n))
+ for i := 0; i < n; i++ {
+ rvk := reflect.New(rt.Key()).Elem()
+ if err := decodeToKey(rvk, r); err != nil {
+ return err
+ }
+ rvv := reflect.New(rt.Elem()).Elem()
+ if err := decodeToElem(rvv, r); err != nil {
+ return err
+ }
+ ret.SetMapIndex(rvk, rvv)
+ }
+ return nil
+ }
+}
+
+// containerNilDecoder handles when a value is nillable for map or iterable components.
+// Nillable types have an extra byte prefixing them indicating nil status.
+func containerNilDecoder(decodeToElem func(reflect.Value, io.Reader) error) func(reflect.Value, io.Reader) error {
+ return func(ret reflect.Value, r io.Reader) error {
+ hasValue, err := DecodeBool(r)
+ if err != nil {
+ return err
+ }
+ if !hasValue {
+ return nil
+ }
+ rv := reflect.New(ret.Type().Elem())
+ if err := decodeToElem(rv.Elem(), r); err != nil {
+ return err
+ }
+ ret.Set(rv)
+ return nil
+ }
+}
+
+// mapEncoder reflectively encodes a map or array type using the beam map encoding.
+func mapEncoder(rt reflect.Type, encodeKey, encodeValue func(reflect.Value, io.Writer) error) func(reflect.Value, io.Writer) error {
+ return func(rv reflect.Value, w io.Writer) error {
+ size := rv.Len()
+ if err := EncodeInt32((int32)(size), w); err != nil {
+ return err
+ }
+ iter := rv.MapRange()
+ for iter.Next() {
+ if err := encodeKey(iter.Key(), w); err != nil {
+ return err
+ }
+ if err := encodeValue(iter.Value(), w); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+}
+
+// containerNilEncoder handles when a value is nillable for map or iterable components.
+// Nillable types have an extra byte prefixing them indicating nil status.
+func containerNilEncoder(encodeElem func(reflect.Value, io.Writer) error) func(reflect.Value, io.Writer) error {
+ return func(rv reflect.Value, w io.Writer) error {
+ if rv.IsNil() {
+ return EncodeBool(false, w)
+ }
+ if err := EncodeBool(true, w); err != nil {
+ return err
+ }
+ return encodeElem(rv.Elem(), w)
+ }
+}
diff --git a/sdks/go/pkg/beam/core/graph/coder/map_test.go b/sdks/go/pkg/beam/core/graph/coder/map_test.go
new file mode 100644
index 0000000..0b825c2
--- /dev/null
+++ b/sdks/go/pkg/beam/core/graph/coder/map_test.go
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package coder
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "reflect"
+ "testing"
+
+ "github.com/apache/beam/sdks/go/pkg/beam/core/util/reflectx"
+ "github.com/google/go-cmp/cmp"
+)
+
+func TestEncodeDecodeMap(t *testing.T) {
+ byteEnc := containerEncoderForType(reflectx.Uint8)
+ byteDec := containerDecoderForType(reflectx.Uint8)
+ bytePtrEnc := containerEncoderForType(reflect.PtrTo(reflectx.Uint8))
+ bytePtrDec := containerDecoderForType(reflect.PtrTo(reflectx.Uint8))
+
+ ptrByte := byte(42)
+
+ tests := []struct {
+ v interface{}
+ encK, encV func(reflect.Value, io.Writer) error
+ decK, decV func(reflect.Value, io.Reader) error
+ encoded []byte
+ decodeOnly bool
+ }{
+ {
+ v: map[byte]byte{10: 42},
+ encK: byteEnc,
+ encV: byteEnc,
+ decK: byteDec,
+ decV: byteDec,
+ encoded: []byte{0, 0, 0, 1, 10, 42},
+ }, {
+ v: map[byte]*byte{10: &ptrByte},
+ encK: byteEnc,
+ encV: bytePtrEnc,
+ decK: byteDec,
+ decV: bytePtrDec,
+ encoded: []byte{0, 0, 0, 1, 10, 1, 42},
+ }, {
+ v: map[byte]*byte{10: &ptrByte, 23: nil, 53: nil},
+ encK: byteEnc,
+ encV: bytePtrEnc,
+ decK: byteDec,
+ decV: bytePtrDec,
+ encoded: []byte{0, 0, 0, 3, 10, 1, 42, 23, 0, 53, 0},
+ decodeOnly: true,
+ },
+ }
+ for _, test := range tests {
+ test := test
+ if !test.decodeOnly {
+ t.Run(fmt.Sprintf("encode %q", test.v), func(t *testing.T) {
+ var buf bytes.Buffer
+ err := mapEncoder(reflect.TypeOf(test.v), test.encK, test.encV)(reflect.ValueOf(test.v), &buf)
+ if err != nil {
+ t.Fatalf("mapEncoder(%q) = %v", test.v, err)
+ }
+ if d := cmp.Diff(test.encoded, buf.Bytes()); d != "" {
+ t.Errorf("mapEncoder(%q) = %v, want %v diff(-want,+got):\n %v", test.v, buf.Bytes(), test.encoded, d)
+ }
+ })
+ }
+ t.Run(fmt.Sprintf("decode %v", test.v), func(t *testing.T) {
+ buf := bytes.NewBuffer(test.encoded)
+ rt := reflect.TypeOf(test.v)
+ var dec func(reflect.Value, io.Reader) error
+ dec = mapDecoder(rt, test.decK, test.decV)
+ rv := reflect.New(rt).Elem()
+ err := dec(rv, buf)
+ if err != nil {
+ t.Fatalf("mapDecoder(%q) = %v", test.encoded, err)
+ }
+ got := rv.Interface()
+ if d := cmp.Diff(test.v, got); d != "" {
+ t.Errorf("mapDecoder(%q) = %q, want %v diff(-want,+got):\n %v", test.encoded, got, test.v, d)
+ }
+ })
+ }
+}
diff --git a/sdks/go/pkg/beam/core/graph/coder/row.go b/sdks/go/pkg/beam/core/graph/coder/row.go
index aac34ac..00b4c26 100644
--- a/sdks/go/pkg/beam/core/graph/coder/row.go
+++ b/sdks/go/pkg/beam/core/graph/coder/row.go
@@ -152,21 +152,26 @@
return nil
}
}
- decf := decoderForSingleTypeReflect(t.Elem())
- sdec := iterableDecoderForSlice(t, decf)
- return func(rv reflect.Value, r io.Reader) error {
- return sdec(rv, r)
- }
+ decf := containerDecoderForType(t.Elem())
+ return iterableDecoderForSlice(t, decf)
case reflect.Array:
- decf := decoderForSingleTypeReflect(t.Elem())
- sdec := iterableDecoderForArray(t, decf)
- return func(rv reflect.Value, r io.Reader) error {
- return sdec(rv, r)
- }
+ decf := containerDecoderForType(t.Elem())
+ return iterableDecoderForArray(t, decf)
+ case reflect.Map:
+ decK := containerDecoderForType(t.Key())
+ decV := containerDecoderForType(t.Elem())
+ return mapDecoder(t, decK, decV)
}
panic(fmt.Sprintf("unimplemented type to decode: %v", t))
}
+func containerDecoderForType(t reflect.Type) func(reflect.Value, io.Reader) error {
+ if t.Kind() == reflect.Ptr {
+ return containerNilDecoder(decoderForSingleTypeReflect(t.Elem()))
+ }
+ return decoderForSingleTypeReflect(t)
+}
+
type typeDecoderReflect struct {
typ reflect.Type
fields []func(reflect.Value, io.Reader) error
@@ -270,15 +275,26 @@
return EncodeBytes(rv.Bytes(), w)
}
}
- encf := encoderForSingleTypeReflect(t.Elem())
+ encf := containerEncoderForType(t.Elem())
return iterableEncoder(t, encf)
case reflect.Array:
- encf := encoderForSingleTypeReflect(t.Elem())
+ encf := containerEncoderForType(t.Elem())
return iterableEncoder(t, encf)
+ case reflect.Map:
+ encK := containerEncoderForType(t.Key())
+ encV := containerEncoderForType(t.Elem())
+ return mapEncoder(t, encK, encV)
}
panic(fmt.Sprintf("unimplemented type to encode: %v", t))
}
+func containerEncoderForType(t reflect.Type) func(reflect.Value, io.Writer) error {
+ if t.Kind() == reflect.Ptr {
+ return containerNilEncoder(encoderForSingleTypeReflect(t.Elem()))
+ }
+ return encoderForSingleTypeReflect(t)
+}
+
type typeEncoderReflect struct {
fields []func(reflect.Value, io.Writer) error
}
diff --git a/sdks/go/pkg/beam/core/graph/coder/row_test.go b/sdks/go/pkg/beam/core/graph/coder/row_test.go
index f1089b8..38b7c5d 100644
--- a/sdks/go/pkg/beam/core/graph/coder/row_test.go
+++ b/sdks/go/pkg/beam/core/graph/coder/row_test.go
@@ -78,16 +78,18 @@
V12 [0]int
V13 [2]int
V14 []int
- // V15 map[string]int // not yet a standard coder (BEAM-7996)
+ V15 map[string]int
V16 float32
V17 float64
V18 []byte
+ V19 [2]*int
+ V20 map[*string]*int
}{},
}, {
want: struct {
V00 bool
- V01 byte
- V02 uint8
+ V01 byte // unsupported by spec (same as uint8)
+ V02 uint8 // unsupported by spec
V03 int16
// V04 uint16 // unsupported by spec
V05 int32
@@ -100,10 +102,13 @@
V12 [0]int
V13 [2]int
V14 []int
- // V15 map[string]int // not yet a standard coder (BEAM-7996) (encoding unspecified)
+ V15 map[string]int
V16 float32
V17 float64
V18 []byte
+ V19 [2]*int
+ V20 map[string]*int
+ V21 []*int
}{
V00: true,
V01: 1,
@@ -117,9 +122,16 @@
V12: [0]int{},
V13: [2]int{72, 908},
V14: []int{12, 9326, 641346, 6},
+ V15: map[string]int{"pants": 42},
V16: 3.14169,
V17: 2.6e100,
V18: []byte{21, 17, 65, 255, 0, 16},
+ V19: [2]*int{nil, &num},
+ V20: map[string]*int{
+ "notnil": &num,
+ "nil": nil,
+ },
+ V21: []*int{nil, &num, nil},
},
// TODO add custom types such as protocol buffers.
},
diff --git a/sdks/go/pkg/beam/core/runtime/exec/datasource.go b/sdks/go/pkg/beam/core/runtime/exec/datasource.go
index 054e34f..ad7b601 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/datasource.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/datasource.go
@@ -16,6 +16,7 @@
package exec
import (
+ "bytes"
"context"
"fmt"
"io"
@@ -25,7 +26,6 @@
"time"
"github.com/apache/beam/sdks/go/pkg/beam/core/graph/coder"
- "github.com/apache/beam/sdks/go/pkg/beam/core/sdf"
"github.com/apache/beam/sdks/go/pkg/beam/core/util/ioutilx"
"github.com/apache/beam/sdks/go/pkg/beam/internal/errors"
"github.com/apache/beam/sdks/go/pkg/beam/log"
@@ -47,20 +47,24 @@
splitIdx int64
start time.Time
- // rt is non-nil if this DataSource feeds directly to a splittable unit,
- // and receives the current restriction tracker being processed.
- rt chan sdf.RTracker
+ // su is non-nil if this DataSource feeds directly to a splittable unit,
+ // and receives that splittable unit when it is available for splitting.
+ // While the splittable unit is received, it is blocked from processing
+ // new elements, so it must be sent back through the channel once the
+ // DataSource is finished using it.
+ su chan SplittableUnit
mu sync.Mutex
}
-// Initializes the rt channel from the following unit when applicable.
+// InitSplittable initializes the SplittableUnit channel from the output unit,
+// if it provides one.
func (n *DataSource) InitSplittable() {
if n.Out == nil {
return
}
if u, ok := n.Out.(*ProcessSizedElementsAndRestrictions); ok == true {
- n.rt = u.Rt
+ n.su = u.SU
}
}
@@ -284,16 +288,21 @@
// Split takes a sorted set of potential split indices and a fraction of the
// remainder to split at, selects and actuates a split on an appropriate split
-// index, and returns the selected split index if successful or an error when
-// unsuccessful.
+// index, and returns the selected split index in a SplitResult if successful or
+// an error when unsuccessful.
+//
+// If the following transform is splittable, and the split indices and fraction
+// allow for splitting on the currently processing element, then a sub-element
+// split is performed, and the appropriate information is returned in the
+// SplitResult.
//
// The bufSize param specifies the estimated number of elements that will be
// sent to this DataSource, and is used to be able to perform accurate splits
// even if the DataSource has not yet received all its elements. A bufSize of
// 0 or less indicates that its unknown, and so uses the current known size.
-func (n *DataSource) Split(splits []int64, frac float64, bufSize int64) (int64, error) {
+func (n *DataSource) Split(splits []int64, frac float64, bufSize int64) (SplitResult, error) {
if n == nil {
- return 0, fmt.Errorf("failed to split at requested splits: {%v}, DataSource not initialized", splits)
+ return SplitResult{}, fmt.Errorf("failed to split at requested splits: {%v}, DataSource not initialized", splits)
}
if frac > 1.0 {
frac = 1.0
@@ -302,31 +311,83 @@
}
n.mu.Lock()
+ defer n.mu.Unlock()
+
var currProg float64 // Current element progress.
- if n.index < 0 { // Progress is at the end of the non-existant -1st element.
+ var su SplittableUnit
+ if n.index < 0 { // Progress is at the end of the non-existant -1st element.
currProg = 1.0
- } else if n.rt == nil { // If this isn't sub-element splittable, estimate some progress.
+ } else if n.su == nil { // If this isn't sub-element splittable, estimate some progress.
currProg = 0.5
} else { // If this is sub-element splittable, get progress of the current element.
- rt := <-n.rt
- d, r := rt.GetProgress()
- currProg = d / (d + r)
- n.rt <- rt
+
+ select {
+ case su = <-n.su:
+ // If an element is processing, we'll get a splittable unit.
+ if su == nil {
+ return SplitResult{}, fmt.Errorf("failed to split: splittable unit was nil")
+ }
+ defer func() {
+ n.su <- su
+ }()
+ currProg = su.GetProgress()
+ case <-time.After(500 * time.Millisecond):
+ // Otherwise, the current element hasn't started processing yet
+ // or has already finished. By adding a short timeout, we avoid
+ // the first possibility, and can assume progress is at max.
+ currProg = 1.0
+ }
}
// Size to split within is the minimum of bufSize or splitIdx so we avoid
// including elements we already know won't be processed.
if bufSize <= 0 || n.splitIdx < bufSize {
bufSize = n.splitIdx
}
- s, _, err := splitHelper(n.index, bufSize, currProg, splits, frac, false)
+ s, f, err := splitHelper(n.index, bufSize, currProg, splits, frac, su != nil)
if err != nil {
- n.mu.Unlock()
- return 0, err
+ return SplitResult{}, err
}
- n.splitIdx = s
- fs := n.splitIdx
- n.mu.Unlock()
- return fs, nil
+
+ // No fraction returned, perform channel split.
+ if f < 0 {
+ n.splitIdx = s
+ return SplitResult{PI: s - 1, RI: s}, nil
+ }
+ // Otherwise, perform a sub-element split.
+ fr := f / (1.0 - currProg)
+ p, r, err := su.Split(fr)
+ if err != nil {
+ return SplitResult{}, err
+ }
+
+ if p == nil || r == nil { // Unsuccessful split.
+ // Fallback to channel split, so split at next elm, not current.
+ n.splitIdx = s + 1
+ return SplitResult{PI: s, RI: s + 1}, nil
+ }
+
+ // TODO(BEAM-10579) Eventually encode elements with the splittable
+ // unit's input coder instead of the DataSource's coder.
+ wc := MakeWindowEncoder(n.Coder.Window)
+ ec := MakeElementEncoder(coder.SkipW(n.Coder))
+ pEnc, err := encodeElm(p, wc, ec)
+ if err != nil {
+ return SplitResult{}, err
+ }
+ rEnc, err := encodeElm(r, wc, ec)
+ if err != nil {
+ return SplitResult{}, err
+ }
+ n.splitIdx = s + 1 // In a sub-element split, s is currIdx.
+ res := SplitResult{
+ PI: s - 1,
+ RI: s + 1,
+ PS: pEnc,
+ RS: rEnc,
+ TId: su.GetTransformId(),
+ InId: su.GetInputId(),
+ }
+ return res, nil
}
// splitHelper is a helper function that finds a split point in a range.
@@ -348,7 +409,8 @@
// Returns the element index to split at (first element of residual), and the
// fraction within that element to split, iff the split point is the current
// element, the splittable param is set to true, and both the element being
-// split and the following element are valid split points.
+// split and the following element are valid split points. If there is no
+// fraction, returns -1.
func splitHelper(
currIdx, endIdx int64,
currProg float64,
@@ -377,7 +439,7 @@
if splitIdx < safeStart {
splitIdx = safeStart
}
- return splitIdx, 0.0, nil
+ return splitIdx, -1.0, nil
}
// Cases where we have to find a valid split point.
@@ -419,10 +481,21 @@
}
}
if bestS != -1 {
- return bestS, 0.0, nil
+ return bestS, -1.0, nil
}
- return -1, 0.0, fmt.Errorf("failed to split DataSource (at index: %v) at requested splits: {%v}", currIdx, splits)
+ return -1, -1.0, fmt.Errorf("failed to split DataSource (at index: %v) at requested splits: {%v}", currIdx, splits)
+}
+
+func encodeElm(elm *FullValue, wc WindowEncoder, ec ElementEncoder) ([]byte, error) {
+ var b bytes.Buffer
+ if err := EncodeWindowedValueHeader(wc, elm.Windows, elm.Timestamp, &b); err != nil {
+ return nil, err
+ }
+ if err := ec.Encode(elm, &b); err != nil {
+ return nil, err
+ }
+ return b.Bytes(), nil
}
type concatReStream struct {
diff --git a/sdks/go/pkg/beam/core/runtime/exec/datasource_test.go b/sdks/go/pkg/beam/core/runtime/exec/datasource_test.go
index ff6126e..4089a1c 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/datasource_test.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/datasource_test.go
@@ -25,6 +25,7 @@
"github.com/apache/beam/sdks/go/pkg/beam/core/graph/coder"
"github.com/apache/beam/sdks/go/pkg/beam/core/graph/mtime"
"github.com/apache/beam/sdks/go/pkg/beam/core/graph/window"
+ "github.com/apache/beam/sdks/go/pkg/beam/internal/errors"
)
func TestDataSource_PerElement(t *testing.T) {
@@ -278,11 +279,17 @@
runOnRoots(ctx, t, p, "StartBundle", func(root Root, ctx context.Context) error { return root.StartBundle(ctx, "1", dc) })
// SDK never splits on 0, so check that every test.
- if splitIdx, err := p.Split(SplitPoints{Splits: []int64{0, test.splitIdx}}); err != nil {
+ splitRes, err := p.Split(SplitPoints{Splits: []int64{0, test.splitIdx}})
+ if err != nil {
t.Fatalf("error in Split: %v", err)
- } else if got, want := splitIdx, test.splitIdx; got != want {
+ }
+ if got, want := splitRes.RI, test.splitIdx; got != want {
t.Fatalf("error in Split: got splitIdx = %v, want %v ", got, want)
}
+ if got, want := splitRes.PI, test.splitIdx-1; got != want {
+ t.Fatalf("error in Split: got primary index = %v, want %v ", got, want)
+ }
+
runOnRoots(ctx, t, p, "Process", Root.Process)
runOnRoots(ctx, t, p, "FinishBundle", Root.FinishBundle)
@@ -342,10 +349,15 @@
<-blockedCh
// Validate that we do not split on the element we're blocking on index.
// The first valid split is at test.splitIdx.
- if splitIdx, err := source.Split([]int64{0, 1, 2, 3, 4, 5}, -1, 0); err != nil {
+ if splitRes, err := source.Split([]int64{0, 1, 2, 3, 4, 5}, -1, 0); err != nil {
t.Errorf("error in Split: %v", err)
- } else if got, want := splitIdx, test.splitIdx; got != want {
- t.Errorf("error in Split: got splitIdx = %v, want %v ", got, want)
+ } else {
+ if got, want := splitRes.RI, test.splitIdx; got != want {
+ t.Errorf("error in Split: got splitIdx = %v, want %v ", got, want)
+ }
+ if got, want := splitRes.PI, test.splitIdx-1; got != want {
+ t.Errorf("error in Split: got primary index = %v, want %v ", got, want)
+ }
}
// Validate that our progress is where we expect it to be. (test.splitIdx - 1)
if got, want := source.Progress().Count, test.splitIdx-1; got != want {
@@ -412,17 +424,120 @@
// SDK never splits on 0, so check that every test.
sp := SplitPoints{Splits: test.splitPts, Frac: test.frac, BufSize: test.bufSize}
- if splitIdx, err := p.Split(sp); err != nil {
+ splitRes, err := p.Split(sp)
+ if err != nil {
t.Fatalf("error in Split: %v", err)
- } else if got, want := splitIdx, test.splitIdx; got != want {
+ }
+ if got, want := splitRes.RI, test.splitIdx; got != want {
t.Fatalf("error in Split: got splitIdx = %v, want %v ", got, want)
}
+ if got, want := splitRes.PI, test.splitIdx-1; got != want {
+ t.Fatalf("error in Split: got primary index = %v, want %v ", got, want)
+ }
runOnRoots(ctx, t, p, "Process", Root.Process)
runOnRoots(ctx, t, p, "FinishBundle", Root.FinishBundle)
validateSource(t, out, source, makeValues(test.expected...))
})
+ // Test splitting on sub-elements works when available.
+ t.Run("subElement", func(t *testing.T) {
+ // Each test will process up to an element, then split at different
+ // fractions and check that a sub-element split either was, or was not
+ // performed.
+ const blockOn int64 = 3 // Should leave 2 elements unprocessed, including blocked element.
+ numElms := int64(len(elements))
+ tests := []struct {
+ fraction float64
+ splitIdx int64
+ isSubElm bool
+ }{
+ {fraction: 0.0, splitIdx: blockOn + 1, isSubElm: true},
+ {fraction: 0.01, splitIdx: blockOn + 1, isSubElm: true},
+ {fraction: 0.49, splitIdx: blockOn + 1, isSubElm: true}, // Should be just within current element.
+ {fraction: 0.51, splitIdx: blockOn + 1, isSubElm: false}, // Should be just past current element.
+ {fraction: 0.99, splitIdx: numElms, isSubElm: false},
+ }
+ for _, test := range tests {
+ test := test
+ name := fmt.Sprintf("withFraction_%v", test.fraction)
+ t.Run(name, func(t *testing.T) {
+ source, out, pr := initSourceTest(name)
+ unblockCh, blockedCh := make(chan struct{}), make(chan struct{}, 1)
+ // Block on the one less than the desired split,
+ // so the desired split is the first valid split.
+ blocker := &BlockingNode{
+ UID: 3,
+ Block: func(elm *FullValue) bool {
+ if source.index == blockOn {
+ // Signal to call Split
+ blockedCh <- struct{}{}
+ return true
+ }
+ return false
+ },
+ Unblock: unblockCh,
+ Out: out,
+ }
+ source.Out = blocker
+
+ splittableCh := make(chan SplittableUnit, 1)
+ source.su = splittableCh
+ splittableCh <- &TestSplittableUnit{elm: elements[blockOn]}
+
+ go func() {
+ // Wait to call Split until the DoFn is blocked at the desired element.
+ <-blockedCh
+ // Validate that we either do or do not perform a sub-element split with the
+ // given fraction.
+ if splitRes, err := source.Split([]int64{0, 1, 2, 3, 4, 5}, test.fraction, int64(len(elements))); err != nil {
+ t.Errorf("error in Split: %v", err)
+ } else {
+ // For sub-element splits, check sub-element split only results.
+ isSubElm := splitRes.RS != nil && splitRes.PS != nil
+ if isSubElm != test.isSubElm {
+ t.Errorf("error in Split: got sub-element split = %t, want %t", isSubElm, test.isSubElm)
+ }
+ if isSubElm {
+ if got, want := splitRes.TId, testTransformId; got != want {
+ t.Errorf("error in Split: got incorrect Transform Id = %v, want %v", got, want)
+ }
+ if got, want := splitRes.InId, testInputId; got != want {
+ t.Errorf("error in Split: got incorrect Input Id = %v, want %v", got, want)
+ }
+ }
+
+ // Check that split indices are correct, for both sub-element and channel splits.
+ var wantPI, wantRI = test.splitIdx - 1, test.splitIdx
+ if isSubElm {
+ // In sub-element splits, primary index is expected to be one element
+ // before the current (split) element.
+ wantPI--
+ }
+ if splitRes.PI != wantPI || splitRes.RI != wantRI {
+ t.Errorf("error in Split: got split indices of (primary, residual) = (%d, %d), want (%d, %d)",
+ splitRes.PI, splitRes.RI, wantPI, wantRI)
+ }
+ }
+ // Validate that our progress is where we expect it to be. (blockOn)
+ if got, want := source.Progress().Count, blockOn; got != want {
+ t.Errorf("error in Progress: got finished processing Count = %v, want %v ", got, want)
+ }
+ unblockCh <- struct{}{}
+ }()
+
+ constructAndExecutePlanWithContext(t, []Unit{out, blocker, source}, DataContext{
+ Data: &TestDataManager{R: pr},
+ })
+
+ validateSource(t, out, source, makeValues(elements[:test.splitIdx]...))
+ if got, want := source.Progress().Count, test.splitIdx; got != want {
+ t.Fatalf("progress didn't match split: got %v, want %v", got, want)
+ }
+ })
+ }
+ })
+
// Test expects splitting errors, but for processing to be successful.
t.Run("errors", func(t *testing.T) {
source, out, pr := initSourceTest("noSplitsUntilStarted")
@@ -471,6 +586,39 @@
})
}
+const testTransformId = "transform_id"
+const testInputId = "input_id"
+
+// TestSplittableUnit is an implementation of the SplittableUnit interface
+// for DataSource tests.
+type TestSplittableUnit struct {
+ elm interface{} // The element to split.
+}
+
+// Split checks the input fraction for correctness, but otherwise always returns
+// a successful split. The split elements are just copies of the original.
+func (n *TestSplittableUnit) Split(f float64) (*FullValue, *FullValue, error) {
+ if f > 1.0 || f < 0.0 {
+ return nil, nil, errors.Errorf("Error")
+ }
+ return &FullValue{Elm: n.elm}, &FullValue{Elm: n.elm}, nil
+}
+
+// GetProgress always returns 0, to keep tests consistent.
+func (n *TestSplittableUnit) GetProgress() float64 {
+ return 0
+}
+
+// GetTransformId returns a constant transform ID that can be tested for.
+func (n *TestSplittableUnit) GetTransformId() string {
+ return testTransformId
+}
+
+// GetInputId returns a constant input ID that can be tested for.
+func (n *TestSplittableUnit) GetInputId() string {
+ return testInputId
+}
+
func floatEquals(a, b, epsilon float64) bool {
return math.Abs(a-b) < epsilon
}
@@ -501,7 +649,7 @@
for _, test := range tests {
test := test
t.Run(fmt.Sprintf("(%v of [%v, %v])", test.frac, test.curr, test.size), func(t *testing.T) {
- wantFrac := 0.0
+ wantFrac := -1.0
got, gotFrac, err := splitHelper(test.curr, test.size, 0.0, nil, test.frac, false)
if err != nil {
t.Fatalf("error in splitHelper: %v", err)
@@ -533,7 +681,7 @@
for _, test := range tests {
test := test
t.Run(fmt.Sprintf("(%v of [%v, %v])", test.frac, float64(test.curr)+test.currProg, test.size), func(t *testing.T) {
- wantFrac := 0.0
+ wantFrac := -1.0
got, gotFrac, err := splitHelper(test.curr, test.size, test.currProg, nil, test.frac, false)
if err != nil {
t.Fatalf("error in splitHelper: %v", err)
@@ -572,7 +720,7 @@
for _, test := range tests {
test := test
t.Run(fmt.Sprintf("(%v of [%v, %v], splits = %v)", test.frac, test.curr, test.size, test.splits), func(t *testing.T) {
- wantFrac := 0.0
+ wantFrac := -1.0
got, gotFrac, err := splitHelper(test.curr, test.size, 0.0, test.splits, test.frac, false)
if test.err {
if err == nil {
@@ -602,10 +750,10 @@
wantFrac float64
}{
// Split between future elements at element boundaries.
- {curr: 0, currProg: 0, size: 4, frac: 0.51, want: 2},
- {curr: 0, currProg: 0, size: 4, frac: 0.49, want: 2},
- {curr: 0, currProg: 0, size: 4, frac: 0.26, want: 1},
- {curr: 0, currProg: 0, size: 4, frac: 0.25, want: 1},
+ {curr: 0, currProg: 0, size: 4, frac: 0.51, want: 2, wantFrac: -1.0},
+ {curr: 0, currProg: 0, size: 4, frac: 0.49, want: 2, wantFrac: -1.0},
+ {curr: 0, currProg: 0, size: 4, frac: 0.26, want: 1, wantFrac: -1.0},
+ {curr: 0, currProg: 0, size: 4, frac: 0.25, want: 1, wantFrac: -1.0},
// If the split falls inside the first, splittable element, split there.
{curr: 0, currProg: 0, size: 4, frac: 0.20, want: 0, wantFrac: 0.8},
@@ -613,11 +761,11 @@
{curr: 0, currProg: 0, size: 4, frac: 0.125, want: 0, wantFrac: 0.5},
// Here we are far enough into the first element that splitting at 0.2 of the
// remainder falls outside the first element.
- {curr: 0, currProg: 0.5, size: 4, frac: 0.2, want: 1},
+ {curr: 0, currProg: 0.5, size: 4, frac: 0.2, want: 1, wantFrac: -1.0},
// Verify the above logic when we are partially through the stream.
- {curr: 2, currProg: 0, size: 4, frac: 0.6, want: 3},
- {curr: 2, currProg: 0.9, size: 4, frac: 0.6, want: 4},
+ {curr: 2, currProg: 0, size: 4, frac: 0.6, want: 3, wantFrac: -1.0},
+ {curr: 2, currProg: 0.9, size: 4, frac: 0.6, want: 4, wantFrac: -1.0},
{curr: 2, currProg: 0.5, size: 4, frac: 0.2, want: 2, wantFrac: 0.8},
}
for _, test := range tests {
@@ -649,12 +797,12 @@
// This is where we would like to split, when all split points are available.
{curr: 2, currProg: 0, size: 5, frac: 0.2, splits: []int64{1, 2, 3, 4, 5}, want: 2, wantFrac: 0.6},
// We can't split element at index 2, because 3 is not a split point.
- {curr: 2, currProg: 0, size: 5, frac: 0.2, splits: []int64{1, 2, 4, 5}, want: 4},
+ {curr: 2, currProg: 0, size: 5, frac: 0.2, splits: []int64{1, 2, 4, 5}, want: 4, wantFrac: -1.0},
// We can't even split element at index 4 as above, because 4 is also not a
// split point.
- {curr: 2, currProg: 0, size: 5, frac: 0.2, splits: []int64{1, 2, 5}, want: 5},
+ {curr: 2, currProg: 0, size: 5, frac: 0.2, splits: []int64{1, 2, 5}, want: 5, wantFrac: -1.0},
// We can't split element at index 2, because 2 is not a split point.
- {curr: 2, currProg: 0, size: 5, frac: 0.2, splits: []int64{1, 3, 4, 5}, want: 3},
+ {curr: 2, currProg: 0, size: 5, frac: 0.2, splits: []int64{1, 3, 4, 5}, want: 3, wantFrac: -1.0},
}
for _, test := range tests {
test := test
diff --git a/sdks/go/pkg/beam/core/runtime/exec/dynsplit_test.go b/sdks/go/pkg/beam/core/runtime/exec/dynsplit_test.go
new file mode 100644
index 0000000..3b17a0a
--- /dev/null
+++ b/sdks/go/pkg/beam/core/runtime/exec/dynsplit_test.go
@@ -0,0 +1,456 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package exec
+
+import (
+ "bytes"
+ "context"
+ "io"
+ "reflect"
+ "sync"
+ "testing"
+
+ "github.com/apache/beam/sdks/go/pkg/beam/core/graph"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/graph/coder"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/graph/mtime"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/graph/window"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/typex"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/util/reflectx"
+ "github.com/apache/beam/sdks/go/pkg/beam/internal/errors"
+ "github.com/apache/beam/sdks/go/pkg/beam/io/rtrackers/offsetrange"
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+)
+
+// TestDynamicSplit tests that a dynamic split of an in-progress SDF succeeds
+// with valid input. It coordinates the two threads (processing and splitting)
+// to test what happens if operations happen in various orders. The test then
+// validates that the output of the SDF is correct according to the split.
+func TestDynamicSplit(t *testing.T) {
+ tests := []struct {
+ name string
+ // driver is a function determining how the processing and splitting
+ // threads are created and coordinated.
+ driver func(*Plan, DataContext, *splitTestSdf) (error, splitResult)
+ }{
+ {
+ // Complete a split before beginning processing.
+ name: "Simple",
+ driver: nonBlockingDriver,
+ },
+ {
+ // Try claiming while blocked on a split.
+ name: "BlockOnSplit",
+ driver: splitBlockingDriver,
+ },
+ {
+ // Try splitting while blocked on a claim.
+ name: "BlockOnClaim",
+ driver: claimBlockingDriver,
+ },
+ }
+ for _, test := range tests {
+ test := test
+ t.Run(test.name, func(t *testing.T) {
+ // Create pipeline.
+ sdf := newSplitTestSdf()
+ dfn, err := graph.NewDoFn(sdf, graph.NumMainInputs(graph.MainSingle))
+ if err != nil {
+ t.Fatalf("invalid function: %v", err)
+ }
+ cdr := createSplitTestInCoder()
+ plan, out := createSdfPlan(t, t.Name(), dfn, cdr)
+
+ // Create thread to send element to pipeline.
+ pr, pw := io.Pipe()
+ elm := createElm()
+ go writeElm(elm, cdr, pw)
+ dc := DataContext{Data: &TestDataManager{R: pr}}
+
+ // Call driver to coordinate processing & splitting threads.
+ procRes, splitRes := test.driver(plan, dc, sdf)
+
+ // Validate we get a valid split result, aside from split elements.
+ if splitRes.err != nil {
+ t.Fatalf("Plan.Split failed: %v", splitRes.err)
+ }
+ wantSplit := SplitResult{
+ PI: -1,
+ RI: 1,
+ PS: nil,
+ RS: nil,
+ TId: testTransformId,
+ InId: indexToInputId(0),
+ }
+ if diff := cmp.Diff(splitRes.split, wantSplit, cmpopts.IgnoreFields(SplitResult{}, "PS", "RS")); diff != "" {
+ t.Errorf("Incorrect split result (ignoring split elements): %v", diff)
+ }
+
+ // Validate split elements are encoded correctly by decoding them
+ // with the input coder to the path.
+ // TODO(BEAM-10579) Switch to using splittable unit's input coder
+ // once that is implemented.
+ p, err := decodeDynSplitElm(splitRes.split.PS, cdr)
+ if err != nil {
+ t.Errorf("Failed decoding primary element split: %v", err)
+ }
+ _, err = decodeDynSplitElm(splitRes.split.RS, cdr)
+ if err != nil {
+ t.Errorf("Failed decoding residual element split: %v", err)
+ }
+
+ // Validate SDF output. Make sure each restriction matches the split result.
+ if err := procRes; err != nil {
+ t.Fatal(err)
+ }
+ pRest := p.Elm.(*FullValue).Elm2.(offsetrange.Restriction)
+ if got, want := len(out.Elements), int(pRest.End-pRest.Start); got != want {
+ t.Errorf("Unexpected number of elements: got: %v, want: %v", got, want)
+ }
+ for i, fv := range out.Elements {
+ rest := fv.Elm.(offsetrange.Restriction)
+ if got, want := rest, pRest; !cmp.Equal(got, want) {
+ t.Errorf("Output element %v had incorrect restriction: got: %v, want: %v", i, got, want)
+ }
+ }
+ })
+ }
+}
+
+// nonBlockingDriver performs a split before starting processing, so no thread
+// is forced to wait on a mutex.
+func nonBlockingDriver(plan *Plan, dc DataContext, sdf *splitTestSdf) (procRes error, splitRes splitResult) {
+ // Begin processing pipeline.
+ procResCh := make(chan error)
+ go processPlan(plan, dc, procResCh)
+ rt := <-sdf.rt // Tracker is created first, retrieve that.
+
+ // Complete a split before unblocking processing.
+ splitResCh := make(chan splitResult)
+ go splitPlan(plan, splitResCh)
+ <-rt.split
+ <-rt.blockSplit
+ splitRes = <-splitResCh
+
+ // Unblock and finishing processing.
+ <-sdf.proc
+ <-rt.claim
+ <-rt.blockClaim
+ <-rt.endClaim
+ procRes = <-procResCh
+
+ return procRes, splitRes
+}
+
+// splitBlockingDriver blocks on a split request so that the SDF attempts to
+// claim while the split is occurring.
+func splitBlockingDriver(plan *Plan, dc DataContext, sdf *splitTestSdf) (procRes error, splitRes splitResult) {
+ // Begin processing pipeline.
+ procResCh := make(chan error)
+ go processPlan(plan, dc, procResCh)
+ rt := <-sdf.rt // Tracker is created first, retrieve that.
+
+ // Start a split, but block on it so it holds the mutex.
+ splitResCh := make(chan splitResult)
+ go splitPlan(plan, splitResCh)
+ <-rt.split
+
+ // Start processing and start a claim, that'll be waiting for the mutex.
+ <-sdf.proc
+ <-rt.claim
+
+ // Unblock and finish splitting and free the mutex.
+ <-rt.blockSplit
+ splitRes = <-splitResCh
+
+ // Unblock and finish claiming and processing.
+ <-rt.blockClaim
+ <-rt.endClaim
+ procRes = <-procResCh
+
+ return procRes, splitRes
+}
+
+// claimBlockingDriver blocks on a claim request so that the SDF attempts to
+// split while the claim is occurring.
+func claimBlockingDriver(plan *Plan, dc DataContext, sdf *splitTestSdf) (procRes error, splitRes splitResult) {
+ // Begin processing pipeline.
+ procResCh := make(chan error)
+ go processPlan(plan, dc, procResCh)
+ rt := <-sdf.rt // Tracker is created first, retrieve that.
+
+ // Start a claim, but block on it so it holds the mutex.
+ <-sdf.proc
+ <-rt.claim
+
+ // Start a split that'll be waiting for the mutex.
+ splitResCh := make(chan splitResult)
+ go splitPlan(plan, splitResCh)
+ <-rt.split
+
+ // Unblock the claim, freeing the mutex (but not finishing processing yet).
+ <-rt.blockClaim
+
+ // Finish splitting, allowing processing to finish.
+ <-rt.blockSplit
+ splitRes = <-splitResCh
+ <-rt.endClaim // Delay the claim end so we don't process too much before splitting.
+ procRes = <-procResCh
+
+ return procRes, splitRes
+}
+
+// createElm creates the element for our test pipeline.
+func createElm() *FullValue {
+ return &FullValue{
+ Elm: &FullValue{
+ Elm: 20,
+ Elm2: offsetrange.Restriction{Start: 0, End: 20},
+ },
+ Elm2: float64(20),
+ }
+}
+
+// createSplitTestInCoder outputs the coder for inputs to our test pipeline,
+// (in particular, the DataSource transform of the pipeline). For the specific
+// element this is a coder for, see createElm.
+func createSplitTestInCoder() *coder.Coder {
+ restT := reflect.TypeOf((*offsetrange.Restriction)(nil)).Elem()
+ restCdr := coder.LookupCustomCoder(restT)
+
+ cdr := coder.NewW(
+ coder.NewKV([]*coder.Coder{
+ coder.NewKV([]*coder.Coder{
+ intCoder(reflectx.Int),
+ {Kind: coder.Custom, T: typex.New(restT), Custom: restCdr},
+ }),
+ coder.NewDouble(),
+ }),
+ coder.NewGlobalWindow())
+ return cdr
+}
+
+// createSdfPlan creates a plan containing the test pipeline, which is
+// DataSource -> SDF.ProcessSizedElementsAndRestrictions -> CaptureNode.
+func createSdfPlan(t *testing.T, name string, fn *graph.DoFn, cdr *coder.Coder) (*Plan, *CaptureNode) {
+ out := &CaptureNode{UID: 0}
+ n := &ParDo{UID: 1, Fn: fn, Out: []Node{out}}
+ sdf := &ProcessSizedElementsAndRestrictions{PDo: n, TfId: testTransformId}
+ ds := &DataSource{
+ UID: 2,
+ SID: StreamID{PtransformID: "DataSource"},
+ Name: "name",
+ Coder: cdr,
+ Out: sdf,
+ }
+ units := []Unit{ds, sdf, out}
+
+ p, err := NewPlan(name+"_plan", units)
+ if err != nil {
+ t.Fatalf("NewPlan failed: %v", err)
+ }
+ return p, out
+}
+
+// writeElm is meant to be the goroutine for feeding an element to the
+// DataSourc of the test pipeline.
+func writeElm(elm *FullValue, cdr *coder.Coder, pw *io.PipeWriter) {
+ wc := MakeWindowEncoder(cdr.Window)
+ ec := MakeElementEncoder(coder.SkipW(cdr))
+ if err := EncodeWindowedValueHeader(wc, window.SingleGlobalWindow, mtime.ZeroTimestamp, pw); err != nil {
+ panic("err")
+ }
+ if err := ec.Encode(elm, pw); err != nil {
+ panic("err")
+ }
+ if err := pw.Close(); err != nil {
+ panic("err")
+ }
+}
+
+func decodeDynSplitElm(elm []byte, cdr *coder.Coder) (*FullValue, error) {
+ wd := MakeWindowDecoder(cdr.Window)
+ ed := MakeElementDecoder(coder.SkipW(cdr))
+ b := bytes.NewBuffer(elm)
+ w, t, err := DecodeWindowedValueHeader(wd, b)
+ if err != nil {
+ return nil, err
+ }
+ e, err := ed.Decode(b)
+ if err != nil {
+ return nil, err
+ }
+ e.Windows = w
+ e.Timestamp = t
+ return e, nil
+}
+
+// processPlan is meant to be the goroutine representing the thread processing
+// the SDF.
+func processPlan(plan *Plan, dc DataContext, result chan error) {
+ if err := plan.Execute(context.Background(), plan.ID()+"_execute", dc); err != nil {
+ result <- errors.Wrap(err, "Plan.Execute failed")
+ }
+ if err := plan.Down(context.Background()); err != nil {
+ result <- errors.Wrap(err, "Plan.Down failed")
+ }
+ result <- nil
+}
+
+type splitResult struct {
+ split SplitResult
+ err error
+}
+
+// splitPlan is meant to be the goroutine representing the thread handling a
+// split request for the SDF.
+func splitPlan(plan *Plan, result chan splitResult) {
+ split, err := plan.Split(SplitPoints{Frac: 0.5, BufSize: 1})
+ result <- splitResult{split: split, err: err}
+}
+
+// splitTestRTracker adds signals needed to coordinate splitting and claiming
+// over multiple threads for this test. Semantically, this tracker is an
+// offset range tracker representing a range of integers to output.
+type splitTestRTracker struct {
+ mu sync.Mutex // Lock on accessing underlying tracker.
+ rt *offsetrange.Tracker
+
+ // Send signals when starting a claim, blocking a claim, and ending a claim.
+ claim chan struct{}
+ blockClaim chan struct{}
+ endClaim chan struct{}
+ blockInd int64 // Only send signals when claiming a specific position.
+
+ // Send signals when starting a split, and blocking a split. Important note:
+ // the spot to use these in this test is dependent on the first operation
+ // taking place on a split, which may not necessarily be TrySplit.
+ split chan struct{}
+ blockSplit chan struct{}
+}
+
+func newSplitTestRTracker(rest offsetrange.Restriction) *splitTestRTracker {
+ return &splitTestRTracker{
+ rt: offsetrange.NewTracker(rest),
+ claim: make(chan struct{}, 1),
+ blockClaim: make(chan struct{}),
+ endClaim: make(chan struct{}),
+ blockInd: rest.Start,
+ split: make(chan struct{}, 1),
+ blockSplit: make(chan struct{}),
+ }
+}
+
+func (rt *splitTestRTracker) TryClaim(pos interface{}) bool {
+ i := pos.(int64)
+ if i == rt.blockInd {
+ rt.claim <- struct{}{}
+ }
+
+ rt.mu.Lock()
+ if i == rt.blockInd {
+ rt.blockClaim <- struct{}{}
+ }
+ result := rt.rt.TryClaim(pos)
+ rt.mu.Unlock()
+
+ if i == rt.blockInd {
+ rt.endClaim <- struct{}{}
+ }
+ return result
+}
+
+func (rt *splitTestRTracker) GetError() error {
+ rt.mu.Lock()
+ defer rt.mu.Unlock()
+ return rt.rt.GetError()
+}
+
+func (rt *splitTestRTracker) TrySplit(fraction float64) (interface{}, interface{}, error) {
+ rt.mu.Lock()
+ defer rt.mu.Unlock()
+ rt.blockSplit <- struct{}{}
+ return rt.rt.TrySplit(fraction)
+}
+
+func (rt *splitTestRTracker) GetProgress() (float64, float64) {
+ // Note: Currently, GetProgress is called first in a split and blocks if
+ // TryClaim is being called.
+ rt.split <- struct{}{}
+
+ rt.mu.Lock()
+ defer rt.mu.Unlock()
+ return rt.rt.GetProgress()
+}
+
+func (rt *splitTestRTracker) IsDone() bool {
+ rt.mu.Lock()
+ defer rt.mu.Unlock()
+ return rt.rt.IsDone()
+}
+
+func (rt *splitTestRTracker) GetRestriction() interface{} {
+ rt.mu.Lock()
+ defer rt.mu.Unlock()
+ return rt.rt.GetRestriction()
+}
+
+// splitTestSdf has signals needed to control processing behavior over multiple
+// threads. The actual behavior is to accept an integer N as the element and
+// output each element in the range of [0, N).
+type splitTestSdf struct {
+ proc chan struct{}
+ rt chan *splitTestRTracker // Used to provide created trackers to the test code.
+}
+
+func newSplitTestSdf() *splitTestSdf {
+ return &splitTestSdf{
+ proc: make(chan struct{}),
+ rt: make(chan *splitTestRTracker),
+ }
+}
+
+func (fn *splitTestSdf) ProcessElement(rt *splitTestRTracker, _ int, emit func(offsetrange.Restriction, int)) {
+ i := rt.GetRestriction().(offsetrange.Restriction).Start
+ fn.proc <- struct{}{}
+
+ for rt.TryClaim(i) == true {
+ rest := rt.GetRestriction().(offsetrange.Restriction)
+ emit(rest, int(i))
+ i++
+ }
+}
+
+func (fn *splitTestSdf) CreateInitialRestriction(i int) offsetrange.Restriction {
+ return offsetrange.Restriction{
+ Start: 0,
+ End: int64(i),
+ }
+}
+
+func (fn *splitTestSdf) SplitRestriction(_ int, rest offsetrange.Restriction) []offsetrange.Restriction {
+ return []offsetrange.Restriction{rest}
+}
+
+func (fn *splitTestSdf) RestrictionSize(_ int, rest offsetrange.Restriction) float64 {
+ return rest.Size()
+}
+
+func (fn *splitTestSdf) CreateTracker(rest offsetrange.Restriction) *splitTestRTracker {
+ rt := newSplitTestRTracker(rest)
+ fn.rt <- rt
+ return rt
+}
diff --git a/sdks/go/pkg/beam/core/runtime/exec/plan.go b/sdks/go/pkg/beam/core/runtime/exec/plan.go
index 53891d6..23ce1c7 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/plan.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/plan.go
@@ -207,13 +207,28 @@
BufSize int64
}
+// SplitResult contains the result of performing a split on a Plan.
+type SplitResult struct {
+ // Indices are always included, for both channel and sub-element splits.
+ PI int64 // Primary index, last element of the primary.
+ RI int64 // Residual index, first element of the residual.
+
+ // Extra information included for sub-element splits. If PS and RS are
+ // present then a sub-element split occurred.
+ PS []byte // Primary split. If an element is split, this is the encoded primary.
+ RS []byte // Residual split. If an element is split, this is the encoded residual.
+ TId string // Transform ID of the transform receiving the split elements.
+ InId string // Input ID of the input the split elements are received from.
+}
+
// Split takes a set of potential split indexes, and if successful returns
-// the split index of the first element of the residual, on which processing
-// will be halted.
+// the split result.
// Returns an error when unable to split.
-func (p *Plan) Split(s SplitPoints) (int64, error) {
+func (p *Plan) Split(s SplitPoints) (SplitResult, error) {
+ // TODO: When bundles with multiple sources, are supported, perform splits
+ // on all sources.
if p.source != nil {
return p.source.Split(s.Splits, s.Frac, s.BufSize)
}
- return 0, fmt.Errorf("failed to split at requested splits: {%v}, Source not initialized", s)
+ return SplitResult{}, fmt.Errorf("failed to split at requested splits: {%v}, Source not initialized", s)
}
diff --git a/sdks/go/pkg/beam/core/runtime/exec/sdf.go b/sdks/go/pkg/beam/core/runtime/exec/sdf.go
index c24e34a..29b40c8 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/sdf.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/sdf.go
@@ -43,7 +43,7 @@
}
// Up performs one-time setup for this executor.
-func (n *PairWithRestriction) Up(ctx context.Context) error {
+func (n *PairWithRestriction) Up(_ context.Context) error {
fn := (*graph.SplittableDoFn)(n.Fn).CreateInitialRestrictionFn()
var err error
if n.inv, err = newCreateInitialRestrictionInvoker(fn); err != nil {
@@ -88,7 +88,7 @@
}
// Down currently does nothing.
-func (n *PairWithRestriction) Down(ctx context.Context) error {
+func (n *PairWithRestriction) Down(_ context.Context) error {
return nil
}
@@ -117,7 +117,7 @@
}
// Up performs one-time setup for this executor.
-func (n *SplitAndSizeRestrictions) Up(ctx context.Context) error {
+func (n *SplitAndSizeRestrictions) Up(_ context.Context) error {
fn := (*graph.SplittableDoFn)(n.Fn).SplitRestrictionFn()
var err error
if n.splitInv, err = newSplitRestrictionInvoker(fn); err != nil {
@@ -200,7 +200,7 @@
}
// Down currently does nothing.
-func (n *SplitAndSizeRestrictions) Down(ctx context.Context) error {
+func (n *SplitAndSizeRestrictions) Down(_ context.Context) error {
return nil
}
@@ -215,14 +215,27 @@
// changes to support the SDF's method signatures and the expected structure
// of the FullValue being received.
type ProcessSizedElementsAndRestrictions struct {
- PDo *ParDo
+ PDo *ParDo
+ TfId string // Transform ID. Needed for splitting.
+ ctInv *ctInvoker
+ sizeInv *rsInvoker
- inv *ctInvoker
+ // SU is a buffered channel for indicating when this unit is splittable.
+ // When this unit is processing an element, it sends a SplittableUnit
+ // interface through the channel. That interface can be received on other
+ // threads and used to perform splitting or other related operation.
+ //
+ // This channel should be received on in a non-blocking manner, to avoid
+ // hanging if no element is processing.
+ //
+ // Receiving the SplittableUnit prevents the current element from finishing
+ // processing, so the element does not unexpectedly change during a split.
+ // Therefore, receivers of the SplittableUnit must send it back through the
+ // channel once finished with it, or it will block indefinitely.
+ SU chan SplittableUnit
- // Rt allows this unit to send out restriction trackers being processed.
- // Receivers of the tracker do not own it, and must send it back through the
- // same channel once finished with it.
- Rt chan sdf.RTracker
+ elm *FullValue // Currently processing element.
+ rt sdf.RTracker // Currently processing element's restriction tracker.
}
// ID calls the ParDo's ID method.
@@ -234,10 +247,14 @@
func (n *ProcessSizedElementsAndRestrictions) Up(ctx context.Context) error {
fn := (*graph.SplittableDoFn)(n.PDo.Fn).CreateTrackerFn()
var err error
- if n.inv, err = newCreateTrackerInvoker(fn); err != nil {
+ if n.ctInv, err = newCreateTrackerInvoker(fn); err != nil {
return errors.WithContextf(err, "%v", n)
}
- n.Rt = make(chan sdf.RTracker, 1)
+ fn = (*graph.SplittableDoFn)(n.PDo.Fn).RestrictionSizeFn()
+ if n.sizeInv, err = newRestrictionSizeInvoker(fn); err != nil {
+ return errors.WithContextf(err, "%v", n)
+ }
+ n.SU = make(chan SplittableUnit, 1)
return n.PDo.Up(ctx)
}
@@ -268,15 +285,22 @@
// and processes each element using the underlying ParDo and adding the
// restriction tracker to the normal invocation. Sizing information is present
// but currently ignored. Output is forwarded to the underlying ParDo's outputs.
-func (n *ProcessSizedElementsAndRestrictions) ProcessElement(ctx context.Context, elm *FullValue, values ...ReStream) error {
+func (n *ProcessSizedElementsAndRestrictions) ProcessElement(_ context.Context, elm *FullValue, values ...ReStream) error {
if n.PDo.status != Active {
err := errors.Errorf("invalid status %v, want Active", n.PDo.status)
return errors.WithContextf(err, "%v", n)
}
rest := elm.Elm.(*FullValue).Elm2
- rt := n.inv.Invoke(rest)
- n.Rt <- rt
+ rt := n.ctInv.Invoke(rest)
+
+ n.rt = rt
+ n.elm = elm
+ n.SU <- n
+ defer func() {
+ <-n.SU
+ }()
+
mainIn := &MainInput{
Values: values,
RTracker: rt,
@@ -303,20 +327,18 @@
}
}
- err := n.PDo.processMainInput(mainIn)
- <-n.Rt
- return err
+ return n.PDo.processMainInput(mainIn)
}
// FinishBundle resets the invokers and then calls the ParDo's FinishBundle method.
func (n *ProcessSizedElementsAndRestrictions) FinishBundle(ctx context.Context) error {
- n.inv.Reset()
+ n.ctInv.Reset()
+ n.sizeInv.Reset()
return n.PDo.FinishBundle(ctx)
}
-// Down closes open channels and calls the ParDo's Down method.
+// Down calls the ParDo's Down method.
func (n *ProcessSizedElementsAndRestrictions) Down(ctx context.Context) error {
- close(n.Rt)
return n.PDo.Down(ctx)
}
@@ -325,6 +347,104 @@
return fmt.Sprintf("SDF.ProcessSizedElementsAndRestrictions[%v] UID:%v Out:%v", path.Base(n.PDo.Fn.Name()), n.PDo.ID(), IDs(n.PDo.Out...))
}
+// SplittableUnit is an interface that defines sub-element splitting operations
+// for a unit, and provides access to them on other threads.
+type SplittableUnit interface {
+ // Split performs a split on a fraction of a currently processing element
+ // and returns the primary and residual elements resulting from it, or an
+ // error if the split failed.
+ Split(fraction float64) (primary, residual *FullValue, err error)
+
+ // GetProgress returns the fraction of progress the current element has
+ // made in processing. (ex. 0.0 means no progress, and 1.0 means fully
+ // processed.)
+ GetProgress() float64
+
+ // GetTransformId returns the transform ID of the splittable unit.
+ GetTransformId() string
+
+ // GetInputId returns the local input ID of the input that the element being
+ // split was received from.
+ GetInputId() string
+}
+
+// Split splits the currently processing element using its restriction tracker.
+// Then it returns an element for primary and residual, following the expected
+// input structure to this unit, including updating the size of the split
+// elements.
+func (n *ProcessSizedElementsAndRestrictions) Split(f float64) (*FullValue, *FullValue, error) {
+ addContext := func(err error) error {
+ return errors.WithContext(err, "Attempting split in ProcessSizedElementsAndRestrictions")
+ }
+
+ // Check that the restriction tracker is in a state where it can be split.
+ if n.rt == nil {
+ return nil, nil, addContext(errors.New("Restriction tracker missing."))
+ }
+ if err := n.rt.GetError(); err != nil {
+ return nil, nil, addContext(err)
+ }
+ if n.rt.IsDone() { // Not an error, but not splittable.
+ return nil, nil, nil
+ }
+
+ p, r, err := n.rt.TrySplit(f)
+ if err != nil {
+ return nil, nil, addContext(err)
+ }
+ if r == nil { // If r is nil then the split failed/returned an empty residual.
+ return nil, nil, nil
+ }
+
+ var pfv, rfv *FullValue
+ var pSize, rSize float64
+ elm := n.elm.Elm.(*FullValue).Elm
+ if fv, ok := elm.(*FullValue); ok {
+ pSize = n.sizeInv.Invoke(fv, p)
+ rSize = n.sizeInv.Invoke(fv, r)
+ } else {
+ fv := &FullValue{Elm: elm}
+ pSize = n.sizeInv.Invoke(fv, p)
+ rSize = n.sizeInv.Invoke(fv, r)
+ }
+ pfv = &FullValue{
+ Elm: &FullValue{
+ Elm: elm,
+ Elm2: p,
+ },
+ Elm2: pSize,
+ Timestamp: n.elm.Timestamp,
+ Windows: n.elm.Windows,
+ }
+ rfv = &FullValue{
+ Elm: &FullValue{
+ Elm: elm,
+ Elm2: r,
+ },
+ Elm2: rSize,
+ Timestamp: n.elm.Timestamp,
+ Windows: n.elm.Windows,
+ }
+ return pfv, rfv, nil
+}
+
+// GetProgress returns the current restriction tracker's progress as a fraction.
+func (n *ProcessSizedElementsAndRestrictions) GetProgress() float64 {
+ d, r := n.rt.GetProgress()
+ return d / (d + r)
+}
+
+// GetTransformId returns this transform's transform ID.
+func (n *ProcessSizedElementsAndRestrictions) GetTransformId() string {
+ return n.TfId
+}
+
+// GetInputId returns the main input ID, since main input elements are being
+// split.
+func (n *ProcessSizedElementsAndRestrictions) GetInputId() string {
+ return indexToInputId(0)
+}
+
// SdfFallback is an executor used when an SDF isn't expanded into steps by the
// runner, indicating that the runner doesn't support splitting. It executes all
// the SDF steps together in one unit.
@@ -370,7 +490,7 @@
// restrictions, and then creating restriction trackers and processing each
// restriction with the underlying ParDo. This executor skips the sizing step
// because sizing information is unnecessary for unexpanded SDFs.
-func (n *SdfFallback) ProcessElement(ctx context.Context, elm *FullValue, values ...ReStream) error {
+func (n *SdfFallback) ProcessElement(_ context.Context, elm *FullValue, values ...ReStream) error {
if n.PDo.status != Active {
err := errors.Errorf("invalid status %v, want Active", n.PDo.status)
return errors.WithContextf(err, "%v", n)
diff --git a/sdks/go/pkg/beam/core/runtime/exec/sdf_invokers_test.go b/sdks/go/pkg/beam/core/runtime/exec/sdf_invokers_test.go
index 5e3abd6..7dbe3e8 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/sdf_invokers_test.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/sdf_invokers_test.go
@@ -16,9 +16,10 @@
package exec
import (
+ "testing"
+
"github.com/apache/beam/sdks/go/pkg/beam/core/graph"
"github.com/google/go-cmp/cmp"
- "testing"
)
// TestInvokes runs tests on each SDF method invoker, using the SDFs defined
diff --git a/sdks/go/pkg/beam/core/runtime/exec/sdf_test.go b/sdks/go/pkg/beam/core/runtime/exec/sdf_test.go
index e746f85..fb85620 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/sdf_test.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/sdf_test.go
@@ -16,11 +16,13 @@
package exec
import (
+ "context"
+ "testing"
+
"github.com/apache/beam/sdks/go/pkg/beam/core/graph"
"github.com/apache/beam/sdks/go/pkg/beam/core/graph/window"
"github.com/apache/beam/sdks/go/pkg/beam/core/typex"
"github.com/google/go-cmp/cmp"
- "testing"
)
// testTimestamp is a constant used to check that timestamps are retained.
@@ -386,3 +388,145 @@
}
})
}
+
+// TestAsSplittableUnit tests ProcessSizedElementsAndRestrictions' implementation
+// of the SplittableUnit interface.
+func TestAsSplittableUnit(t *testing.T) {
+ dfn, err := graph.NewDoFn(&VetSdf{}, graph.NumMainInputs(graph.MainSingle))
+ if err != nil {
+ t.Fatalf("invalid function: %v", err)
+ }
+ kvdfn, err := graph.NewDoFn(&VetKvSdf{}, graph.NumMainInputs(graph.MainKv))
+ if err != nil {
+ t.Fatalf("invalid function: %v", err)
+ }
+
+ // Test that Split returns properly structured results and calls Split on
+ // the restriction tracker.
+ t.Run("Split", func(t *testing.T) {
+ tests := []struct {
+ name string
+ fn *graph.DoFn
+ in FullValue
+ wantPrimary FullValue
+ wantResidual FullValue
+ }{
+ {
+ name: "SingleElem",
+ fn: dfn,
+ in: FullValue{
+ Elm: &FullValue{
+ Elm: 1,
+ Elm2: &VetRestriction{ID: "Sdf"},
+ },
+ Elm2: 1.0,
+ Timestamp: testTimestamp,
+ Windows: testWindows,
+ },
+ wantPrimary: FullValue{
+ Elm: &FullValue{
+ Elm: 1,
+ Elm2: &VetRestriction{ID: "Sdf.1", RestSize: true, Val: 1},
+ },
+ Elm2: 1.0,
+ Timestamp: testTimestamp,
+ Windows: testWindows,
+ },
+ wantResidual: FullValue{
+ Elm: &FullValue{
+ Elm: 1,
+ Elm2: &VetRestriction{ID: "Sdf.2", RestSize: true, Val: 1},
+ },
+ Elm2: 1.0,
+ Timestamp: testTimestamp,
+ Windows: testWindows,
+ },
+ },
+ {
+ name: "KvElem",
+ fn: kvdfn,
+ in: FullValue{
+ Elm: &FullValue{
+ Elm: &FullValue{
+ Elm: 1,
+ Elm2: 2,
+ },
+ Elm2: &VetRestriction{ID: "KvSdf"},
+ },
+ Elm2: 3.0,
+ Timestamp: testTimestamp,
+ Windows: testWindows,
+ },
+ wantPrimary: FullValue{
+ Elm: &FullValue{
+ Elm: &FullValue{
+ Elm: 1,
+ Elm2: 2,
+ },
+ Elm2: &VetRestriction{ID: "KvSdf.1", RestSize: true, Key: 1, Val: 2},
+ },
+ Elm2: 3.0,
+ Timestamp: testTimestamp,
+ Windows: testWindows,
+ },
+ wantResidual: FullValue{
+ Elm: &FullValue{
+ Elm: &FullValue{
+ Elm: 1,
+ Elm2: 2,
+ },
+ Elm2: &VetRestriction{ID: "KvSdf.2", RestSize: true, Key: 1, Val: 2},
+ },
+ Elm2: 3.0,
+ Timestamp: testTimestamp,
+ Windows: testWindows,
+ },
+ },
+ }
+ for _, test := range tests {
+ test := test
+ t.Run(test.name, func(t *testing.T) {
+ // Setup, create transforms, inputs, and desired outputs.
+ n := &ParDo{UID: 1, Fn: test.fn, Out: []Node{}}
+ node := &ProcessSizedElementsAndRestrictions{PDo: n}
+ node.rt = &SplittableUnitRTracker{
+ VetRTracker: VetRTracker{Rest: test.in.Elm.(*FullValue).Elm2.(*VetRestriction)},
+ }
+ node.elm = &test.in
+
+ // Call from SplittableUnit and check results.
+ su := SplittableUnit(node)
+ frac := 0.5
+ if err := node.Up(context.Background()); err != nil {
+ t.Fatalf("ProcessSizedElementsAndRestrictions.Up() failed: %v", err)
+ }
+ gotPrimary, gotResidual, err := su.Split(frac)
+ if err != nil {
+ t.Fatalf("SplittableUnit.Split(%v) failed: %v", frac, err)
+ }
+ if diff := cmp.Diff(gotPrimary, &test.wantPrimary); diff != "" {
+ t.Errorf("SplittableUnit.Split(%v) has incorrect primary: %v", frac, diff)
+ }
+ if diff := cmp.Diff(gotResidual, &test.wantResidual); diff != "" {
+ t.Errorf("SplittableUnit.Split(%v) has incorrect residual: %v", frac, diff)
+ }
+ })
+ }
+ })
+}
+
+// SplittableUnitRTracker is a VetRTracker with some added behavior needed for
+// TestAsSplittableUnit.
+type SplittableUnitRTracker struct {
+ VetRTracker
+}
+
+func (rt *SplittableUnitRTracker) IsDone() bool { return false }
+
+func (rt *SplittableUnitRTracker) TrySplit(_ float64) (interface{}, interface{}, error) {
+ rest1 := rt.Rest.copy()
+ rest1.ID += ".1"
+ rest2 := rt.Rest.copy()
+ rest2.ID += ".2"
+ return &rest1, &rest2, nil
+}
diff --git a/sdks/go/pkg/beam/core/runtime/exec/translate.go b/sdks/go/pkg/beam/core/runtime/exec/translate.go
index caa5a22..fbbdab3 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/translate.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/translate.go
@@ -418,7 +418,7 @@
}
u = n
if urn == urnProcessSizedElementsAndRestrictions {
- u = &ProcessSizedElementsAndRestrictions{PDo: n}
+ u = &ProcessSizedElementsAndRestrictions{PDo: n, TfId: id.to}
} else if dofn.IsSplittable() {
u = &SdfFallback{PDo: n}
}
@@ -580,7 +580,7 @@
var unordered []string
for key := range m {
- if i, err := strconv.Atoi(strings.TrimPrefix(key, "i")); strings.HasPrefix(key, "i") && err == nil {
+ if i, err := inputIdToIndex(key); err == nil {
if i < len(m) {
ordered[i] = key
continue
@@ -605,6 +605,24 @@
return ret
}
+// inputIdToIndex converts a local input ID for a transform into an index. Use
+// this to avoid relying on format details for input IDs.
+//
+// Currently, expects IDs in the format "iN" where N is the index. If the ID is
+// in an invalid form, returns an error.
+func inputIdToIndex(id string) (int, error) {
+ if !strings.HasPrefix(id, "i") {
+ return 0, errors.New("invalid input ID format")
+ }
+ return strconv.Atoi(strings.TrimPrefix(id, "i"))
+}
+
+// inputIdToIndex converts an index into a local input ID for a transform. Use
+// this to avoid relying on format details for input IDs.
+func indexToInputId(i int) string {
+ return "i" + strconv.Itoa(i)
+}
+
func unmarshalPort(data []byte) (Port, string, error) {
var port fnpb.RemoteGrpcPort
if err := proto.Unmarshal(data, &port); err != nil {
diff --git a/sdks/go/pkg/beam/core/runtime/graphx/schema/schema.go b/sdks/go/pkg/beam/core/runtime/graphx/schema/schema.go
new file mode 100644
index 0000000..ac724b3
--- /dev/null
+++ b/sdks/go/pkg/beam/core/runtime/graphx/schema/schema.go
@@ -0,0 +1,269 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package schema contains utility functions for relating Go types and Beam Schemas.
+//
+// Not all Go types can be converted to schemas. This is Go is more expressive than
+// Beam schemas. Just as not all Go types can be serialized, similarly,
+// not all Beam Schemas will have a conversion to Go types, until the correct
+// mechanism exists in the SDK to handle them.
+//
+// While efforts will be made to have conversions be reversable, this will not
+// be possible in all instances. Eg. Go arrays as fields will be converted to
+// Beam Arrays, but a Beam Array type will map by default to a Go slice.
+package schema
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+
+ "github.com/apache/beam/sdks/go/pkg/beam/core/util/reflectx"
+ "github.com/apache/beam/sdks/go/pkg/beam/internal/errors"
+ pipepb "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+)
+
+// FromType returns a Beam Schema of the passed in type.
+// Returns an error if the type cannot be converted to a Schema.
+func FromType(ot reflect.Type) (*pipepb.Schema, error) {
+ t := ot // keep the original type for errors.
+ // The top level schema for a pointer to struct and the struct is the same.
+ if t.Kind() == reflect.Ptr {
+ t = t.Elem()
+ }
+ if t.Kind() != reflect.Struct {
+ return nil, errors.Errorf("cannot convert %v to schema. FromType only converts structs to schemas", ot)
+ }
+ return structToSchema(t)
+}
+
+func structToSchema(t reflect.Type) (*pipepb.Schema, error) {
+ fields := make([]*pipepb.Field, 0, t.NumField())
+ for i := 0; i < t.NumField(); i++ {
+ f, err := structFieldToField(t.Field(i))
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot convert field %v to schema", t.Field(i).Name)
+ }
+ fields = append(fields, f)
+ }
+ return &pipepb.Schema{
+ Fields: fields,
+ }, nil
+}
+
+func structFieldToField(sf reflect.StructField) (*pipepb.Field, error) {
+ name := sf.Name
+ if tag := sf.Tag.Get("beam"); tag != "" {
+ name, _ = parseTag(tag)
+ }
+ ftype, err := reflectTypeToFieldType(sf.Type)
+ if err != nil {
+ return nil, err
+ }
+ return &pipepb.Field{
+ Name: name,
+ Type: ftype,
+ }, nil
+}
+
+func reflectTypeToFieldType(ot reflect.Type) (*pipepb.FieldType, error) {
+ var isPtr bool
+ t := ot
+ if t.Kind() == reflect.Ptr {
+ isPtr = true
+ t = t.Elem()
+ }
+ switch t.Kind() {
+ case reflect.Map:
+ kt, err := reflectTypeToFieldType(t.Key())
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to convert key of %v to schema field", ot)
+ }
+ vt, err := reflectTypeToFieldType(t.Elem())
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to convert value of %v to schema field", ot)
+ }
+ return &pipepb.FieldType{
+ Nullable: isPtr,
+ TypeInfo: &pipepb.FieldType_MapType{
+ MapType: &pipepb.MapType{
+ KeyType: kt,
+ ValueType: vt,
+ },
+ },
+ }, nil
+ case reflect.Struct:
+ sch, err := structToSchema(t)
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to convert %v to schema field", ot)
+ }
+ return &pipepb.FieldType{
+ Nullable: isPtr,
+ TypeInfo: &pipepb.FieldType_RowType{
+ RowType: &pipepb.RowType{
+ Schema: sch,
+ },
+ },
+ }, nil
+ case reflect.Slice, reflect.Array:
+ // Special handling for []byte
+ if t == reflectx.ByteSlice {
+ return &pipepb.FieldType{
+ Nullable: isPtr,
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_BYTES,
+ },
+ }, nil
+ }
+ vt, err := reflectTypeToFieldType(t.Elem())
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to convert element type of %v to schema field", ot)
+ }
+ return &pipepb.FieldType{
+ Nullable: isPtr,
+ TypeInfo: &pipepb.FieldType_ArrayType{
+ ArrayType: &pipepb.ArrayType{
+ ElementType: vt,
+ },
+ },
+ }, nil
+ case reflect.Interface, reflect.Chan, reflect.UnsafePointer, reflect.Complex128, reflect.Complex64:
+ return nil, errors.Errorf("unable to convert unsupported type %v to schema", ot)
+ default: // must be an atomic type
+ if enum, ok := reflectTypeToAtomicTypeMap[t.Kind()]; ok {
+ return &pipepb.FieldType{
+ Nullable: isPtr,
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: enum,
+ },
+ }, nil
+ }
+ return nil, errors.Errorf("unable to map %v to pipepb.AtomicType", t)
+ }
+}
+
+var reflectTypeToAtomicTypeMap = map[reflect.Kind]pipepb.AtomicType{
+ reflect.Uint8: pipepb.AtomicType_BYTE,
+ reflect.Int16: pipepb.AtomicType_INT16,
+ reflect.Int32: pipepb.AtomicType_INT32,
+ reflect.Int64: pipepb.AtomicType_INT64,
+ reflect.Int: pipepb.AtomicType_INT64,
+ reflect.Float32: pipepb.AtomicType_FLOAT,
+ reflect.Float64: pipepb.AtomicType_DOUBLE,
+ reflect.String: pipepb.AtomicType_STRING,
+ reflect.Bool: pipepb.AtomicType_BOOLEAN,
+}
+
+// ToType returns a Go type of the passed in Schema.
+// Types returned by ToType are always of Struct kind.
+// Returns an error if the Schema cannot be converted to a type.
+func ToType(s *pipepb.Schema) (reflect.Type, error) {
+ fields := make([]reflect.StructField, 0, len(s.GetFields()))
+ for _, sf := range s.GetFields() {
+ rf, err := fieldToStructField(sf)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot convert schema field %v to field", sf.GetName())
+ }
+ fields = append(fields, rf)
+ }
+ return reflect.StructOf(fields), nil
+}
+
+func fieldToStructField(sf *pipepb.Field) (reflect.StructField, error) {
+ name := sf.GetName()
+ rt, err := fieldTypeToReflectType(sf.GetType())
+ if err != nil {
+ return reflect.StructField{}, err
+ }
+ return reflect.StructField{
+ Name: strings.ToUpper(name[:1]) + name[1:], // Go field name must be capitalized for export and encoding.
+ Type: rt,
+ Tag: reflect.StructTag(fmt.Sprintf("beam:\"%s\"", name)),
+ }, nil
+}
+
+var atomicTypeToReflectType = map[pipepb.AtomicType]reflect.Type{
+ pipepb.AtomicType_BYTE: reflectx.Uint8,
+ pipepb.AtomicType_INT16: reflectx.Int16,
+ pipepb.AtomicType_INT32: reflectx.Int32,
+ pipepb.AtomicType_INT64: reflectx.Int64,
+ pipepb.AtomicType_FLOAT: reflectx.Float32,
+ pipepb.AtomicType_DOUBLE: reflectx.Float64,
+ pipepb.AtomicType_STRING: reflectx.String,
+ pipepb.AtomicType_BOOLEAN: reflectx.Bool,
+ pipepb.AtomicType_BYTES: reflectx.ByteSlice,
+}
+
+func fieldTypeToReflectType(sft *pipepb.FieldType) (reflect.Type, error) {
+ var t reflect.Type
+ switch sft.GetTypeInfo().(type) {
+ case *pipepb.FieldType_AtomicType:
+ var ok bool
+ if t, ok = atomicTypeToReflectType[sft.GetAtomicType()]; !ok {
+ return nil, errors.Errorf("unknown atomic type: %v", sft.GetAtomicType())
+ }
+ case *pipepb.FieldType_ArrayType:
+ rt, err := fieldTypeToReflectType(sft.GetArrayType().GetElementType())
+ if err != nil {
+ return nil, errors.Wrap(err, "unable to convert array element type")
+ }
+ t = reflect.SliceOf(rt)
+ case *pipepb.FieldType_MapType:
+ kt, err := fieldTypeToReflectType(sft.GetMapType().GetKeyType())
+ if err != nil {
+ return nil, errors.Wrap(err, "unable to convert map key type")
+ }
+ vt, err := fieldTypeToReflectType(sft.GetMapType().GetValueType())
+ if err != nil {
+ return nil, errors.Wrap(err, "unable to convert map value type")
+ }
+ t = reflect.MapOf(kt, vt) // Panics for invalid map keys (slices/iterables)
+ case *pipepb.FieldType_RowType:
+ rt, err := ToType(sft.GetRowType().GetSchema())
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to convert row type: %v", sft.GetRowType().GetSchema().GetId())
+ }
+ t = rt
+ // case *pipepb.FieldType_IterableType:
+ // TODO(BEAM-9615): handle IterableTypes.
+
+ // case *pipepb.FieldType_LogicalType:
+ // TODO(BEAM-9615): handle LogicalTypes types.
+
+ // Logical Types are for things that have more specialized user representation already, or
+ // things like Time or protocol buffers.
+ // They would be encoded with the schema encoding.
+
+ default:
+ return nil, errors.Errorf("unknown fieldtype: %T", sft.GetTypeInfo())
+ }
+ if sft.GetNullable() {
+ return reflect.PtrTo(t), nil
+ }
+ return t, nil
+}
+
+// parseTag splits a struct field's beam tag into its name and
+// comma-separated options.
+func parseTag(tag string) (string, options) {
+ if idx := strings.Index(tag, ","); idx != -1 {
+ return tag[:idx], options(tag[idx+1:])
+ }
+ return tag, options("")
+}
+
+type options string
+
+// TODO(BEAM-9615): implement looking up specific options from the tags.
diff --git a/sdks/go/pkg/beam/core/runtime/graphx/schema/schema_test.go b/sdks/go/pkg/beam/core/runtime/graphx/schema/schema_test.go
new file mode 100644
index 0000000..f6b5ff7
--- /dev/null
+++ b/sdks/go/pkg/beam/core/runtime/graphx/schema/schema_test.go
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+
+ pipepb "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+ "github.com/google/go-cmp/cmp"
+)
+
+func TestSchemaConversion(t *testing.T) {
+ tests := []struct {
+ st *pipepb.Schema
+ rt reflect.Type
+ }{
+ {
+ st: &pipepb.Schema{
+ Fields: []*pipepb.Field{
+ &pipepb.Field{
+ Name: "firstField",
+ Type: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_INT32,
+ },
+ },
+ },
+ },
+ },
+ rt: reflect.TypeOf(struct {
+ FirstField int32 `beam:"firstField"`
+ }{}),
+ }, {
+ st: &pipepb.Schema{
+ Fields: []*pipepb.Field{
+ &pipepb.Field{
+ Name: "stringField",
+ Type: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_STRING,
+ },
+ },
+ },
+ &pipepb.Field{
+ Name: "intPtrField",
+ Type: &pipepb.FieldType{
+ Nullable: true,
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_INT32,
+ },
+ },
+ },
+ },
+ },
+ rt: reflect.TypeOf(struct {
+ StringField string `beam:"stringField"`
+ IntPtrField *int32 `beam:"intPtrField"`
+ }{}),
+ }, {
+ st: &pipepb.Schema{
+ Fields: []*pipepb.Field{
+ &pipepb.Field{
+ Name: "cypher",
+ Type: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_MapType{
+ MapType: &pipepb.MapType{
+ KeyType: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_BOOLEAN,
+ },
+ },
+ ValueType: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_FLOAT,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ rt: reflect.TypeOf(struct {
+ Cypher map[bool]float32 `beam:"cypher"`
+ }{}),
+ }, {
+ st: &pipepb.Schema{
+ Fields: []*pipepb.Field{
+ &pipepb.Field{
+ Name: "wrapper",
+ Type: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_RowType{
+ RowType: &pipepb.RowType{
+ Schema: &pipepb.Schema{
+ Fields: []*pipepb.Field{{
+ Name: "threshold",
+ Type: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_DOUBLE,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ rt: reflect.TypeOf(struct {
+ Wrapper struct {
+ Threshold float64 `beam:"threshold"`
+ } `beam:"wrapper"`
+ }{}),
+ }, {
+ st: &pipepb.Schema{
+ Fields: []*pipepb.Field{
+ &pipepb.Field{
+ Name: "payloads",
+ Type: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_ArrayType{
+ ArrayType: &pipepb.ArrayType{
+ ElementType: &pipepb.FieldType{
+ TypeInfo: &pipepb.FieldType_AtomicType{
+ AtomicType: pipepb.AtomicType_BYTES,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ rt: reflect.TypeOf(struct {
+ Payloads [][]byte `beam:"payloads"`
+ }{}),
+ },
+ }
+
+ for _, test := range tests {
+ test := test
+ t.Run(fmt.Sprintf("%v", test.rt), func(t *testing.T) {
+ {
+ got, err := ToType(test.st)
+ if err != nil {
+ t.Fatalf("error ToType(%v) = %v", test.st, err)
+ }
+
+ if d := cmp.Diff(reflect.New(test.rt).Elem().Interface(), reflect.New(got).Elem().Interface()); d != "" {
+ t.Errorf("diff (-want, +got): %v", d)
+ }
+ }
+ {
+ got, err := FromType(test.rt)
+ if err != nil {
+ t.Fatalf("error FromType(%v) = %v", test.rt, err)
+ }
+
+ if d := cmp.Diff(test.st, got); d != "" {
+ t.Errorf("diff (-want, +got): %v", d)
+ }
+
+ }
+ })
+ }
+}
diff --git a/sdks/go/pkg/beam/core/runtime/graphx/translate.go b/sdks/go/pkg/beam/core/runtime/graphx/translate.go
index b079cb4..09c6994 100644
--- a/sdks/go/pkg/beam/core/runtime/graphx/translate.go
+++ b/sdks/go/pkg/beam/core/runtime/graphx/translate.go
@@ -76,47 +76,40 @@
return append(capabilities, knownStandardCoders()...)
}
+// CreateEnvironment produces the appropriate payload for the type of environment.
func CreateEnvironment(ctx context.Context, urn string, extractEnvironmentConfig func(context.Context) string) *pipepb.Environment {
+ var serializedPayload []byte
switch urn {
case "beam:env:process:v1":
// TODO Support process based SDK Harness.
panic(fmt.Sprintf("Unsupported environment %v", urn))
+ case "beam:env:external:v1":
+ config := extractEnvironmentConfig(ctx)
+ payload := &pipepb.ExternalPayload{Endpoint: &pipepb.ApiServiceDescriptor{Url: config}}
+ serializedPayload = protox.MustEncode(payload)
case "beam:env:docker:v1":
fallthrough
default:
config := extractEnvironmentConfig(ctx)
payload := &pipepb.DockerPayload{ContainerImage: config}
- serializedPayload, err := proto.Marshal(payload)
- if err != nil {
- panic(fmt.Sprintf(
- "Failed to serialize Environment payload %v for config %v: %v", payload, config, err))
- }
-
- return &pipepb.Environment{
- Urn: urn,
- Payload: serializedPayload,
- Capabilities: goCapabilities(),
- Dependencies: []*pipepb.ArtifactInformation{
- &pipepb.ArtifactInformation{
- TypeUrn: URNArtifactGoWorker,
- RoleUrn: URNArtifactStagingTo,
- RolePayload: MustMarshal(&pipepb.ArtifactStagingToRolePayload{
- StagedName: "worker",
- }),
- },
+ serializedPayload = protox.MustEncode(payload)
+ }
+ return &pipepb.Environment{
+ Urn: urn,
+ Payload: serializedPayload,
+ Capabilities: goCapabilities(),
+ Dependencies: []*pipepb.ArtifactInformation{
+ &pipepb.ArtifactInformation{
+ TypeUrn: URNArtifactGoWorker,
+ RoleUrn: URNArtifactStagingTo,
+ RolePayload: protox.MustEncode(&pipepb.ArtifactStagingToRolePayload{
+ StagedName: "worker",
+ }),
},
- }
+ },
}
}
-func MustMarshal(msg proto.Message) []byte {
- res, err := proto.Marshal(msg)
- if err != nil {
- panic(err)
- }
- return res
-}
-
// TODO(herohde) 11/6/2017: move some of the configuration into the graph during construction.
// Options for marshalling a graph into a model pipeline.
diff --git a/sdks/go/pkg/beam/core/runtime/harness/harness.go b/sdks/go/pkg/beam/core/runtime/harness/harness.go
index 0b7b8b7..3aa8d43 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/harness.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/harness.go
@@ -21,6 +21,7 @@
"fmt"
"io"
"sync"
+ "sync/atomic"
"time"
"github.com/apache/beam/sdks/go/pkg/beam/core/runtime/exec"
@@ -87,6 +88,7 @@
log.Errorf(ctx, "control.Send: Failed to respond: %v", err)
}
}
+ log.Debugf(ctx, "control response channel closed")
}()
ctrl := &control{
@@ -103,9 +105,12 @@
// is responsible for managing the network data. All it does is pull data from
// the stream, and hand off the message to a goroutine to actually be handled,
// so as to avoid blocking the underlying network channel.
+ var shutdown int32
for {
req, err := stub.Recv()
if err != nil {
+ // An error means we can't send or receive anymore. Shut down.
+ atomic.AddInt32(&shutdown, 1)
close(respc)
wg.Wait()
@@ -128,7 +133,7 @@
hooks.RunResponseHooks(ctx, req, resp)
recordInstructionResponse(resp)
- if resp != nil {
+ if resp != nil && atomic.LoadInt32(&shutdown) == 0 {
respc <- resp
}
}
@@ -310,7 +315,7 @@
if ds == nil {
return fail(ctx, instID, "failed to split: desired splits for root of %v was empty.", ref)
}
- split, err := plan.Split(exec.SplitPoints{
+ sr, err := plan.Split(exec.SplitPoints{
Splits: ds.GetAllowedSplitPoints(),
Frac: ds.GetFractionOfRemainder(),
BufSize: ds.GetEstimatedInputElements(),
@@ -324,12 +329,22 @@
InstructionId: string(instID),
Response: &fnpb.InstructionResponse_ProcessBundleSplit{
ProcessBundleSplit: &fnpb.ProcessBundleSplitResponse{
- ChannelSplits: []*fnpb.ProcessBundleSplitResponse_ChannelSplit{
- &fnpb.ProcessBundleSplitResponse_ChannelSplit{
- LastPrimaryElement: split - 1,
- FirstResidualElement: split,
+ PrimaryRoots: []*fnpb.BundleApplication{{
+ TransformId: sr.TId,
+ InputId: sr.InId,
+ Element: sr.PS,
+ }},
+ ResidualRoots: []*fnpb.DelayedBundleApplication{{
+ Application: &fnpb.BundleApplication{
+ TransformId: sr.TId,
+ InputId: sr.InId,
+ Element: sr.RS,
},
- },
+ }},
+ ChannelSplits: []*fnpb.ProcessBundleSplitResponse_ChannelSplit{{
+ LastPrimaryElement: sr.PI,
+ FirstResidualElement: sr.RI,
+ }},
},
},
}
diff --git a/sdks/go/pkg/beam/core/runtime/harness/logging.go b/sdks/go/pkg/beam/core/runtime/harness/logging.go
index d6cd31c..291e524 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/logging.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/logging.go
@@ -143,7 +143,7 @@
recordLogEntries(list)
if err := client.Send(list); err != nil {
- fmt.Fprintf(os.Stderr, "Failed to send message: %v\n%v", err, msg)
+ fmt.Fprintf(os.Stderr, "Failed to send message: %v\n%v\n", err, msg)
return err
}
diff --git a/sdks/go/pkg/beam/core/sdf/sdf.go b/sdks/go/pkg/beam/core/sdf/sdf.go
index 3d663d5..1de3f5c 100644
--- a/sdks/go/pkg/beam/core/sdf/sdf.go
+++ b/sdks/go/pkg/beam/core/sdf/sdf.go
@@ -64,8 +64,9 @@
//
// This method modifies the underlying restriction in the RTracker to reflect the primary. It
// then returns a copy of the newly modified restriction as a primary, and returns a new
- // restriction for the residual. If the split would produce an empty residual (i.e. the only
- // split point is the end of the restriction), then the returned residual is nil.
+ // restriction for the residual. If the split would produce an empty residual (either because
+ // the only split point is the end of the restriction, or the split failed for some recoverable
+ // reason), then this function returns nil as the residual.
//
// If an error is returned, some catastrophic failure occurred and the entire bundle will fail.
TrySplit(fraction float64) (primary, residual interface{}, err error)
@@ -77,8 +78,8 @@
// IsDone returns a boolean indicating whether all blocks inside the restriction have been
// claimed. This method is called by the SDK Harness to validate that a splittable DoFn has
- // correctly processed all work in a restriction before finishing. If this method returns false
- // then GetError is expected to return a non-nil error.
+ // correctly processed all work in a restriction before finishing. If this method still returns
+ // false after processing, then GetError is expected to return a non-nil error.
IsDone() bool
// GetRestriction returns the restriction this tracker is tracking, or nil if the restriction
diff --git a/sdks/go/pkg/beam/io/rtrackers/offsetrange/offsetrange.go b/sdks/go/pkg/beam/io/rtrackers/offsetrange/offsetrange.go
index f01ba50..e140a7e 100644
--- a/sdks/go/pkg/beam/io/rtrackers/offsetrange/offsetrange.go
+++ b/sdks/go/pkg/beam/io/rtrackers/offsetrange/offsetrange.go
@@ -20,16 +20,39 @@
package offsetrange
import (
+ "bytes"
+ "encoding/binary"
"errors"
"math"
"reflect"
- "github.com/apache/beam/sdks/go/pkg/beam"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/graph/coder"
+ "github.com/apache/beam/sdks/go/pkg/beam/core/runtime"
)
func init() {
- beam.RegisterType(reflect.TypeOf((*Tracker)(nil)))
- beam.RegisterType(reflect.TypeOf((*Restriction)(nil)))
+ runtime.RegisterType(reflect.TypeOf((*Tracker)(nil)))
+ runtime.RegisterType(reflect.TypeOf((*Restriction)(nil)).Elem())
+ runtime.RegisterFunction(restEnc)
+ runtime.RegisterFunction(restDec)
+ coder.RegisterCoder(reflect.TypeOf((*Restriction)(nil)).Elem(), restEnc, restDec)
+}
+
+func restEnc(in Restriction) ([]byte, error) {
+ buf := new(bytes.Buffer)
+ if err := binary.Write(buf, binary.BigEndian, in); err != nil {
+ return nil, err
+ }
+ return buf.Bytes(), nil
+}
+
+func restDec(in []byte) (Restriction, error) {
+ buf := bytes.NewBuffer(in)
+ rest := Restriction{}
+ if err := binary.Read(buf, binary.BigEndian, &rest); err != nil {
+ return rest, err
+ }
+ return rest, nil
}
// Restriction is an offset range restriction, which represents a range of
@@ -44,10 +67,10 @@
//
// Num should be greater than 0. Otherwise there is no way to split the
// restriction and this function will return the original restriction.
-func (r *Restriction) EvenSplits(num int64) (splits []Restriction) {
+func (r Restriction) EvenSplits(num int64) (splits []Restriction) {
if num <= 1 {
// Don't split, just return original restriction.
- return append(splits, *r)
+ return append(splits, r)
}
offset := r.Start
@@ -67,7 +90,7 @@
}
// Size returns the restriction's size as the difference between Start and End.
-func (r *Restriction) Size() float64 {
+func (r Restriction) Size() float64 {
return float64(r.End - r.Start)
}
diff --git a/sdks/go/pkg/beam/options/jobopts/options.go b/sdks/go/pkg/beam/options/jobopts/options.go
index 3b2dd38..3b6968f 100644
--- a/sdks/go/pkg/beam/options/jobopts/options.go
+++ b/sdks/go/pkg/beam/options/jobopts/options.go
@@ -40,7 +40,7 @@
// EnvironmentType is the environment type to run the user code.
EnvironmentType = flag.String("environment_type", "DOCKER",
- "Environment Type. Possible options are DOCKER and PROCESS.")
+ "Environment Type. Possible options are DOCKER, and LOOPBACK.")
// EnvironmentConfig is the environment configuration for running the user code.
EnvironmentConfig = flag.String("environment_config",
@@ -99,6 +99,8 @@
switch env := strings.ToLower(*EnvironmentType); env {
case "process":
return "beam:env:process:v1"
+ case "loopback", "external":
+ return "beam:env:external:v1"
case "docker":
return "beam:env:docker:v1"
default:
@@ -107,6 +109,11 @@
}
}
+// IsLoopback returns whether the EnvironmentType is loopback.
+func IsLoopback() bool {
+ return strings.ToLower(*EnvironmentType) == "loopback"
+}
+
// GetEnvironmentConfig returns the specified configuration for specified SDK Harness,
// if not present, the default development container for the current user.
// Convenience function.
diff --git a/sdks/go/pkg/beam/runners/universal/extworker/extworker.go b/sdks/go/pkg/beam/runners/universal/extworker/extworker.go
new file mode 100644
index 0000000..70d637a
--- /dev/null
+++ b/sdks/go/pkg/beam/runners/universal/extworker/extworker.go
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package extworker provides an external worker service and related utilities.
+package extworker
+
+import (
+ "context"
+ "fmt"
+ "net"
+ "sync"
+
+ "github.com/apache/beam/sdks/go/pkg/beam/core/runtime/harness"
+ "github.com/apache/beam/sdks/go/pkg/beam/log"
+ fnpb "github.com/apache/beam/sdks/go/pkg/beam/model/fnexecution_v1"
+ "github.com/apache/beam/sdks/go/pkg/beam/util/grpcx"
+ "google.golang.org/grpc"
+)
+
+// StartLoopback initializes a Loopback ExternalWorkerService, at the given port.
+func StartLoopback(ctx context.Context, port int) (*Loopback, error) {
+ lis, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", port))
+ if err != nil {
+ return nil, err
+ }
+
+ log.Infof(ctx, "starting Loopback server at %v", lis.Addr())
+ grpcServer := grpc.NewServer()
+ root, cancel := context.WithCancel(ctx)
+ s := &Loopback{lis: lis, root: root, rootCancel: cancel, workers: map[string]context.CancelFunc{},
+ grpcServer: grpcServer}
+ fnpb.RegisterBeamFnExternalWorkerPoolServer(grpcServer, s)
+ go grpcServer.Serve(lis)
+ return s, nil
+}
+
+// Loopback implements fnpb.BeamFnExternalWorkerPoolServer
+type Loopback struct {
+ lis net.Listener
+ root context.Context
+ rootCancel context.CancelFunc
+
+ mu sync.Mutex
+ workers map[string]context.CancelFunc
+
+ grpcServer *grpc.Server
+}
+
+// StartWorker initializes a new worker harness, implementing BeamFnExternalWorkerPoolServer.StartWorker.
+func (s *Loopback) StartWorker(ctx context.Context, req *fnpb.StartWorkerRequest) (*fnpb.StartWorkerResponse, error) {
+ log.Infof(ctx, "starting worker %v", req.GetWorkerId())
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if _, ok := s.workers[req.GetWorkerId()]; ok {
+ return &fnpb.StartWorkerResponse{
+ Error: fmt.Sprintf("worker with ID %q already exists", req.GetWorkerId()),
+ }, nil
+ }
+ if req.GetLoggingEndpoint() == nil {
+ return &fnpb.StartWorkerResponse{Error: fmt.Sprintf("Missing logging endpoint for worker %v", req.GetWorkerId())}, nil
+ }
+ if req.GetControlEndpoint() == nil {
+ return &fnpb.StartWorkerResponse{Error: fmt.Sprintf("Missing control endpoint for worker %v", req.GetWorkerId())}, nil
+ }
+ if req.GetLoggingEndpoint().Authentication != nil || req.GetControlEndpoint().Authentication != nil {
+ return &fnpb.StartWorkerResponse{Error: "[BEAM-10610] Secure endpoints not supported."}, nil
+ }
+
+ ctx = grpcx.WriteWorkerID(s.root, req.GetWorkerId())
+ ctx, s.workers[req.GetWorkerId()] = context.WithCancel(ctx)
+
+ go harness.Main(ctx, req.GetLoggingEndpoint().GetUrl(), req.GetControlEndpoint().GetUrl())
+ return &fnpb.StartWorkerResponse{}, nil
+}
+
+// StopWorker terminates a worker harness, implementing BeamFnExternalWorkerPoolServer.StopWorker.
+func (s *Loopback) StopWorker(ctx context.Context, req *fnpb.StopWorkerRequest) (*fnpb.StopWorkerResponse, error) {
+ log.Infof(ctx, "stopping worker %v", req.GetWorkerId())
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if cancelfn, ok := s.workers[req.GetWorkerId()]; ok {
+ cancelfn()
+ delete(s.workers, req.GetWorkerId())
+ return &fnpb.StopWorkerResponse{}, nil
+ }
+ return &fnpb.StopWorkerResponse{
+ Error: fmt.Sprintf("no worker with id %q running", req.GetWorkerId()),
+ }, nil
+
+}
+
+// Stop terminates the service and stops all workers.
+func (s *Loopback) Stop(ctx context.Context) error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ log.Infof(ctx, "stopping Loopback, and %d workers", len(s.workers))
+ s.workers = map[string]context.CancelFunc{}
+ s.lis.Close()
+ s.rootCancel()
+ s.grpcServer.GracefulStop()
+ return nil
+}
+
+// EnvironmentConfig returns the environment config for this service instance.
+func (s *Loopback) EnvironmentConfig(context.Context) string {
+ return fmt.Sprintf("localhost:%d", s.lis.Addr().(*net.TCPAddr).Port)
+}
diff --git a/sdks/go/pkg/beam/runners/universal/extworker/extworker_test.go b/sdks/go/pkg/beam/runners/universal/extworker/extworker_test.go
new file mode 100644
index 0000000..3c80065
--- /dev/null
+++ b/sdks/go/pkg/beam/runners/universal/extworker/extworker_test.go
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extworker
+
+import (
+ "context"
+ "testing"
+
+ fnpb "github.com/apache/beam/sdks/go/pkg/beam/model/fnexecution_v1"
+ pipepb "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+)
+
+func TestLoopback(t *testing.T) {
+ endpoint := &pipepb.ApiServiceDescriptor{
+ Url: "localhost:0",
+ }
+ secureEndpoint := &pipepb.ApiServiceDescriptor{
+ Url: "localhost:0",
+ Authentication: &pipepb.AuthenticationSpec{
+ Urn: "beam:authentication:oauth2_client_credentials_grant:v1",
+ },
+ }
+
+ ctx := context.Background()
+ server, err := StartLoopback(ctx, 0)
+ if err != nil {
+ t.Fatalf("Unable to start server: %v", err)
+ }
+
+ startTests := []struct {
+ req *fnpb.StartWorkerRequest
+ errExpected bool
+ }{
+ {
+ req: &fnpb.StartWorkerRequest{
+ WorkerId: "Worker1",
+ ControlEndpoint: endpoint,
+ LoggingEndpoint: endpoint,
+ },
+ }, {
+ req: &fnpb.StartWorkerRequest{
+ WorkerId: "Worker2",
+ ControlEndpoint: endpoint,
+ LoggingEndpoint: endpoint,
+ },
+ }, {
+ req: &fnpb.StartWorkerRequest{
+ WorkerId: "Worker1",
+ ControlEndpoint: endpoint,
+ LoggingEndpoint: endpoint,
+ },
+ errExpected: true, // Repeated start
+ }, {
+ req: &fnpb.StartWorkerRequest{
+ WorkerId: "missingControl",
+ LoggingEndpoint: endpoint,
+ },
+ errExpected: true,
+ }, {
+ req: &fnpb.StartWorkerRequest{
+ WorkerId: "missingLogging",
+ ControlEndpoint: endpoint,
+ },
+ errExpected: true,
+ }, {
+ req: &fnpb.StartWorkerRequest{
+ WorkerId: "secureLogging",
+ LoggingEndpoint: secureEndpoint,
+ ControlEndpoint: endpoint,
+ },
+ errExpected: true,
+ }, {
+ req: &fnpb.StartWorkerRequest{
+ WorkerId: "secureControl",
+ LoggingEndpoint: endpoint,
+ ControlEndpoint: secureEndpoint,
+ },
+ errExpected: true,
+ },
+ }
+ for _, test := range startTests {
+ resp, err := server.StartWorker(ctx, test.req)
+ if test.errExpected {
+ if err != nil || resp.Error == "" {
+ t.Errorf("Expected error starting %v: err: %v, resp: %v", test.req.GetWorkerId(), err, resp)
+ }
+ } else {
+ if err != nil || resp.Error != "" {
+ t.Errorf("Unexpected error starting %v: err: %v, resp: %v", test.req.GetWorkerId(), err, resp)
+ }
+ }
+ }
+ stopTests := []struct {
+ req *fnpb.StopWorkerRequest
+ errExpected bool
+ }{
+ {
+ req: &fnpb.StopWorkerRequest{
+ WorkerId: "Worker1",
+ },
+ }, {
+ req: &fnpb.StopWorkerRequest{
+ WorkerId: "Worker1",
+ },
+ errExpected: true,
+ }, {
+ req: &fnpb.StopWorkerRequest{
+ WorkerId: "NonExistent",
+ },
+ errExpected: true,
+ },
+ }
+ for _, test := range stopTests {
+ resp, err := server.StopWorker(ctx, test.req)
+ if test.errExpected {
+ if err != nil || resp.Error == "" {
+ t.Errorf("Expected error starting %v: err: %v, resp: %v", test.req.GetWorkerId(), err, resp)
+ }
+ } else {
+ if err != nil || resp.Error != "" {
+ t.Errorf("Unexpected error starting %v: err: %v, resp: %v", test.req.GetWorkerId(), err, resp)
+ }
+ }
+ }
+ if err := server.Stop(ctx); err != nil {
+ t.Fatalf("error stopping server: err: %v", err)
+ }
+}
diff --git a/sdks/go/pkg/beam/runners/universal/universal.go b/sdks/go/pkg/beam/runners/universal/universal.go
index 1f80866..96b7753 100644
--- a/sdks/go/pkg/beam/runners/universal/universal.go
+++ b/sdks/go/pkg/beam/runners/universal/universal.go
@@ -23,11 +23,13 @@
"github.com/apache/beam/sdks/go/pkg/beam"
"github.com/apache/beam/sdks/go/pkg/beam/core/runtime/graphx"
+
// Importing to get the side effect of the remote execution hook. See init().
_ "github.com/apache/beam/sdks/go/pkg/beam/core/runtime/harness/init"
"github.com/apache/beam/sdks/go/pkg/beam/internal/errors"
"github.com/apache/beam/sdks/go/pkg/beam/log"
"github.com/apache/beam/sdks/go/pkg/beam/options/jobopts"
+ "github.com/apache/beam/sdks/go/pkg/beam/runners/universal/extworker"
"github.com/apache/beam/sdks/go/pkg/beam/runners/universal/runnerlib"
"github.com/apache/beam/sdks/go/pkg/beam/runners/vet"
"github.com/golang/protobuf/proto"
@@ -61,8 +63,21 @@
if err != nil {
return err
}
+ envUrn := jobopts.GetEnvironmentUrn(ctx)
+ getEnvCfg := jobopts.GetEnvironmentConfig
+
+ if jobopts.IsLoopback() {
+ // TODO(BEAM-10610): Allow user configuration of this port, rather than kernel selected.
+ srv, err := extworker.StartLoopback(ctx, 0)
+ if err != nil {
+ return err
+ }
+ defer srv.Stop(ctx)
+ getEnvCfg = srv.EnvironmentConfig
+ }
+
pipeline, err := graphx.Marshal(edges, &graphx.Options{Environment: graphx.CreateEnvironment(
- ctx, jobopts.GetEnvironmentUrn(ctx), jobopts.GetEnvironmentConfig)})
+ ctx, envUrn, getEnvCfg)})
if err != nil {
return errors.WithContextf(err, "generating model pipeline")
}
diff --git a/sdks/go/pkg/beam/transforms/stats/count.go b/sdks/go/pkg/beam/transforms/stats/count.go
index cbaa995..d7fbab8 100644
--- a/sdks/go/pkg/beam/transforms/stats/count.go
+++ b/sdks/go/pkg/beam/transforms/stats/count.go
@@ -45,7 +45,8 @@
col = beam.DropKey(s, col)
}
pre := beam.ParDo(s, countFn, col)
- return Sum(s, pre)
+ zero := beam.Create(s, 0)
+ return Sum(s, beam.Flatten(s, pre, zero))
}
func countFn(_ beam.T) int {
diff --git a/sdks/go/pkg/beam/transforms/stats/count_test.go b/sdks/go/pkg/beam/transforms/stats/count_test.go
index 4fd20a9..8665593 100644
--- a/sdks/go/pkg/beam/transforms/stats/count_test.go
+++ b/sdks/go/pkg/beam/transforms/stats/count_test.go
@@ -79,6 +79,13 @@
count int
}{
{
+ name: "empty",
+ in: func(s beam.Scope) beam.PCollection {
+ return beam.CreateList(s, []int{})
+ },
+ count: 0,
+ },
+ {
name: "single",
in: func(s beam.Scope) beam.PCollection {
return beam.Create(s, 1)
diff --git a/sdks/go/pkg/beam/util/grpcx/dial.go b/sdks/go/pkg/beam/util/grpcx/dial.go
index 8651c96..c549665 100644
--- a/sdks/go/pkg/beam/util/grpcx/dial.go
+++ b/sdks/go/pkg/beam/util/grpcx/dial.go
@@ -17,6 +17,7 @@
import (
"context"
+ "math"
"time"
"github.com/apache/beam/sdks/go/pkg/beam/internal/errors"
@@ -33,7 +34,7 @@
defer cancel()
cc, err := grpc.DialContext(ctx, endpoint, grpc.WithInsecure(), grpc.WithBlock(),
- grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(50<<20)))
+ grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)))
if err != nil {
return nil, errors.Wrapf(err, "failed to dial server at %v", endpoint)
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/expansion/ExternalTransformRegistrar.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/expansion/ExternalTransformRegistrar.java
index bbdd3a5..aa5288c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/expansion/ExternalTransformRegistrar.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/expansion/ExternalTransformRegistrar.java
@@ -17,10 +17,14 @@
*/
package org.apache.beam.sdk.expansion;
+import java.lang.reflect.Constructor;
import java.util.Map;
+import java.util.Map.Entry;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.annotations.Experimental.Kind;
import org.apache.beam.sdk.transforms.ExternalTransformBuilder;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
/**
* A registrar which contains a mapping from URNs to available {@link ExternalTransformBuilder}s.
@@ -29,6 +33,42 @@
@Experimental(Kind.PORTABILITY)
public interface ExternalTransformRegistrar {
- /** A mapping from URN to an {@link ExternalTransformBuilder} class. */
- Map<String, Class<? extends ExternalTransformBuilder<?, ?, ?>>> knownBuilders();
+ /**
+ * A mapping from URN to an {@link ExternalTransformBuilder} class.
+ *
+ * @deprecated Prefer implementing 'knownBuilderInstances'. This method will be removed in a
+ * future version of Beam.
+ */
+ @Deprecated
+ default Map<String, Class<? extends ExternalTransformBuilder<?, ?, ?>>> knownBuilders() {
+ return ImmutableMap.<String, Class<? extends ExternalTransformBuilder<?, ?, ?>>>builder()
+ .build();
+ }
+
+ /** A mapping from URN to an {@link ExternalTransformBuilder} instance. */
+ default Map<String, ExternalTransformBuilder<?, ?, ?>> knownBuilderInstances() {
+ ImmutableMap.Builder builder = ImmutableMap.<String, ExternalTransformBuilder>builder();
+ Map<String, Class<? extends ExternalTransformBuilder<?, ?, ?>>> knownBuilders = knownBuilders();
+ for (Entry<String, Class<? extends ExternalTransformBuilder<?, ?, ?>>> knownBuilder :
+ knownBuilders.entrySet()) {
+ Preconditions.checkState(
+ ExternalTransformBuilder.class.isAssignableFrom(knownBuilder.getValue()),
+ "Provided identifier %s is not an ExternalTransformBuilder.",
+ knownBuilder.getValue().getName());
+ try {
+ Constructor<? extends ExternalTransformBuilder> constructor =
+ knownBuilder.getValue().getDeclaredConstructor();
+
+ constructor.setAccessible(true);
+ builder.put(knownBuilder.getKey(), constructor.newInstance());
+
+ } catch (RuntimeException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new RuntimeException(
+ "Unable to instantiate ExternalTransformBuilder from constructor.");
+ }
+ }
+ return builder.build();
+ }
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java
index 0995522..226924f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java
@@ -1273,7 +1273,7 @@
public int indexOf(String fieldName) {
Integer index = fieldIndices.get(fieldName);
Preconditions.checkArgument(
- index != null, String.format("Cannot find field %s in schema %s", fieldName, this));
+ index != null, "Cannot find field %s in schema %s", fieldName, this);
return index;
}
@@ -1285,7 +1285,7 @@
/** Return the name of field by index. */
public String nameOf(int fieldIndex) {
String name = fieldIndices.inverse().get(fieldIndex);
- Preconditions.checkArgument(name != null, String.format("Cannot find field %d", fieldIndex));
+ Preconditions.checkArgument(name != null, "Cannot find field %s", fieldIndex);
return name;
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/DateTime.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/DateTime.java
new file mode 100644
index 0000000..148c726
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/DateTime.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.schemas.logicaltypes;
+
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.values.Row;
+
+/**
+ * A datetime without a time-zone.
+ *
+ * <p>It cannot represent an instant on the time-line without additional information such as an
+ * offset or time-zone.
+ *
+ * <p>Its input type is a {@link LocalDateTime}, and base type is a {@link Row} containing Date
+ * field and Time field. Date field is the same as the base type of {@link Date}, which is a Long
+ * that represents incrementing count of days where day 0 is 1970-01-01 (ISO). Time field is the
+ * same as the base type of {@link Time}, which is a Long that represents a count of time in
+ * nanoseconds.
+ */
+public class DateTime implements Schema.LogicalType<LocalDateTime, Row> {
+ public static final String DATE_FIELD_NAME = "Date";
+ public static final String TIME_FIELD_NAME = "Time";
+ public static final Schema DATETIME_SCHEMA =
+ Schema.builder().addInt64Field(DATE_FIELD_NAME).addInt64Field(TIME_FIELD_NAME).build();
+
+ @Override
+ public String getIdentifier() {
+ return "beam:logical_type:datetime:v1";
+ }
+
+ // unused
+ @Override
+ public Schema.FieldType getArgumentType() {
+ return Schema.FieldType.STRING;
+ }
+
+ // unused
+ @Override
+ public String getArgument() {
+ return "";
+ }
+
+ @Override
+ public Schema.FieldType getBaseType() {
+ return Schema.FieldType.row(DATETIME_SCHEMA);
+ }
+
+ @Override
+ public Row toBaseType(LocalDateTime input) {
+ return input == null
+ ? null
+ : Row.withSchema(DATETIME_SCHEMA)
+ .addValues(input.toLocalDate().toEpochDay(), input.toLocalTime().toNanoOfDay())
+ .build();
+ }
+
+ @Override
+ public LocalDateTime toInputType(Row base) {
+ return base == null
+ ? null
+ : LocalDateTime.of(
+ LocalDate.ofEpochDay(base.getInt64(DATE_FIELD_NAME)),
+ LocalTime.ofNanoOfDay(base.getInt64(TIME_FIELD_NAME)));
+ }
+}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/SqlTypes.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/SqlTypes.java
index d22b77a..ef6a68a 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/SqlTypes.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/SqlTypes.java
@@ -18,8 +18,10 @@
package org.apache.beam.sdk.schemas.logicaltypes;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import org.apache.beam.sdk.schemas.Schema.LogicalType;
+import org.apache.beam.sdk.values.Row;
/** Beam {@link org.apache.beam.sdk.schemas.Schema.LogicalType}s corresponding to SQL data types. */
public class SqlTypes {
@@ -31,4 +33,7 @@
/** Beam LogicalType corresponding to ZetaSQL/CalciteSQL TIME type. */
public static final LogicalType<LocalTime, Long> TIME = new Time();
+
+ /** Beam LogicalType corresponding to ZetaSQL DATETIME type. */
+ public static final LogicalType<LocalDateTime, Row> DATETIME = new DateTime();
}
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTest.java
index 17518a6..05ed20d 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTest.java
@@ -351,4 +351,24 @@
Schema schema4 = Schema.builder().addInt32Field("foo").build();
assertFalse(schema1.typesEqual(schema4)); // schema1 and schema4 differ by types
}
+
+ @Test
+ public void testIllegalIndexOf() {
+ Schema schema = Schema.builder().addStringField("foo").build();
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Cannot find field bar in schema " + schema);
+
+ schema.indexOf("bar");
+ }
+
+ @Test
+ public void testIllegalNameOf() {
+ Schema schema = Schema.builder().addStringField("foo").build();
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Cannot find field 1");
+
+ schema.nameOf(1);
+ }
}
diff --git a/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionService.java b/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionService.java
index c20e8d2..09a42e3 100644
--- a/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionService.java
+++ b/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionService.java
@@ -22,7 +22,6 @@
import com.google.auto.service.AutoService;
import java.io.IOException;
-import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.util.ArrayDeque;
import java.util.Collections;
@@ -107,17 +106,21 @@
ImmutableMap.builder();
for (ExternalTransformRegistrar registrar :
ServiceLoader.load(ExternalTransformRegistrar.class)) {
- for (Map.Entry<String, Class<? extends ExternalTransformBuilder<?, ?, ?>>> entry :
- registrar.knownBuilders().entrySet()) {
+ for (Map.Entry<String, ExternalTransformBuilder<?, ?, ?>> entry :
+ registrar.knownBuilderInstances().entrySet()) {
String urn = entry.getKey();
- Class<? extends ExternalTransformBuilder<?, ?, ?>> builderClass = entry.getValue();
+ ExternalTransformBuilder builderInstance = entry.getValue();
builder.put(
urn,
spec -> {
try {
ExternalTransforms.ExternalConfigurationPayload payload =
ExternalTransforms.ExternalConfigurationPayload.parseFrom(spec.getPayload());
- return translate(payload, builderClass);
+ return builderInstance.buildExternal(
+ payloadToConfig(
+ payload,
+ (Class<? extends ExternalTransformBuilder<?, ?, ?>>)
+ builderInstance.getClass()));
} catch (Exception e) {
throw new RuntimeException(
String.format("Failed to build transform %s from spec %s", urn, spec), e);
@@ -125,21 +128,17 @@
});
}
}
+
return builder.build();
}
- private static PTransform<?, ?> translate(
+ Object payloadToConfig(
ExternalTransforms.ExternalConfigurationPayload payload,
Class<? extends ExternalTransformBuilder<?, ?, ?>> builderClass)
throws Exception {
- Preconditions.checkState(
- ExternalTransformBuilder.class.isAssignableFrom(builderClass),
- "Provided identifier %s is not an ExternalTransformBuilder.",
- builderClass.getName());
-
Object configObject = initConfiguration(builderClass);
populateConfiguration(configObject, payload);
- return buildTransform(builderClass, configObject);
+ return configObject;
}
private static Object initConfiguration(
@@ -239,28 +238,6 @@
return coderBuilder.build();
}
-
- private static PTransform<?, ?> buildTransform(
- Class<? extends ExternalTransformBuilder<?, ?, ?>> builderClass, Object configObject)
- throws Exception {
- Constructor<? extends ExternalTransformBuilder<?, ?, ?>> constructor =
- builderClass.getDeclaredConstructor();
- constructor.setAccessible(true);
- ExternalTransformBuilder<?, ?, ?> externalTransformBuilder = constructor.newInstance();
- Method buildMethod = builderClass.getMethod("buildExternal", configObject.getClass());
- buildMethod.setAccessible(true);
-
- PTransform<?, ?> transform =
- (PTransform<?, ?>)
- checkArgumentNotNull(
- buildMethod.invoke(externalTransformBuilder, configObject),
- "Invoking %s.%s(%s) returned null, violating its type.",
- builderClass.getCanonicalName(),
- "buildExternal",
- configObject);
-
- return transform;
- }
}
/**
diff --git a/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java b/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java
index 4604a00..42efbbd 100644
--- a/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java
+++ b/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java
@@ -39,7 +39,7 @@
.put("BOOL", FieldType.BOOLEAN)
.put("BYTES", FieldType.BYTES)
.put("DATE", FieldType.logicalType(SqlTypes.DATE))
- .put("DATETIME", FieldType.DATETIME)
+ .put("DATETIME", FieldType.logicalType(SqlTypes.DATETIME))
.put("DOUBLE", FieldType.DOUBLE)
.put("FLOAT", FieldType.DOUBLE)
.put("FLOAT64", FieldType.DOUBLE)
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/ScalarFunctionImpl.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/ScalarFunctionImpl.java
index 3ef4d9f..b4a9d7e 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/ScalarFunctionImpl.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/ScalarFunctionImpl.java
@@ -124,7 +124,7 @@
@Override
public RelDataType getReturnType(RelDataTypeFactory typeFactory) {
- return CalciteUtils.sqlTypeWithAutoCast(typeFactory, method.getReturnType());
+ return CalciteUtils.sqlTypeWithAutoCast(typeFactory, method.getGenericReturnType());
}
@Override
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPCall.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPCall.java
new file mode 100644
index 0000000..fb1d6da
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPCall.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexCall;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLiteral;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexPatternFieldRef;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlOperator;
+
+/**
+ * A {@code CEPCall} instance represents an operation (node) that contains an operator and a list of
+ * operands. It has the similar functionality as Calcite's {@code RexCall}.
+ */
+public class CEPCall extends CEPOperation {
+
+ private final CEPOperator operator;
+ private final List<CEPOperation> operands;
+
+ private CEPCall(CEPOperator operator, List<CEPOperation> operands) {
+ this.operator = operator;
+ this.operands = operands;
+ }
+
+ public CEPOperator getOperator() {
+ return operator;
+ }
+
+ public List<CEPOperation> getOperands() {
+ return operands;
+ }
+
+ public static CEPCall of(RexCall operation) {
+ SqlOperator call = operation.getOperator();
+ CEPOperator myOp = CEPOperator.of(call);
+
+ ArrayList<CEPOperation> operandsList = new ArrayList<>();
+ for (RexNode i : operation.getOperands()) {
+ if (i.getClass() == RexCall.class) {
+ CEPCall callToAdd = CEPCall.of((RexCall) i);
+ operandsList.add(callToAdd);
+ } else if (i.getClass() == RexLiteral.class) {
+ RexLiteral lit = (RexLiteral) i;
+ CEPLiteral litToAdd = CEPLiteral.of(lit);
+ operandsList.add(litToAdd);
+ } else if (i.getClass() == RexPatternFieldRef.class) {
+ RexPatternFieldRef fieldRef = (RexPatternFieldRef) i;
+ CEPFieldRef fieldRefToAdd = CEPFieldRef.of(fieldRef);
+ operandsList.add(fieldRefToAdd);
+ } else {
+ throw new UnsupportedOperationException("RexNode not supported: " + i.getClass().getName());
+ }
+ }
+
+ return new CEPCall(myOp, operandsList);
+ }
+
+ @Override
+ public String toString() {
+ ArrayList<String> operandStrings = new ArrayList<>();
+ for (CEPOperation i : operands) {
+ operandStrings.add(i.toString());
+ }
+ return operator.toString() + "(" + String.join(", ", operandStrings) + ")";
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPFieldRef.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPFieldRef.java
new file mode 100644
index 0000000..68aaf8d
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPFieldRef.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexPatternFieldRef;
+
+/**
+ * A {@code CEPFieldRef} instance represents a node that points to a specified field in a {@code
+ * Row}. It has similar functionality as Calcite's {@code RexPatternFieldRef}.
+ */
+public class CEPFieldRef extends CEPOperation {
+
+ private final String alpha;
+ private final int fieldIndex;
+
+ CEPFieldRef(String alpha, int fieldIndex) {
+ this.alpha = alpha;
+ this.fieldIndex = fieldIndex;
+ }
+
+ public static CEPFieldRef of(RexPatternFieldRef rexFieldRef) {
+ return new CEPFieldRef(rexFieldRef.getAlpha(), rexFieldRef.getIndex());
+ }
+
+ public String getAlpha() {
+ return alpha;
+ }
+
+ public int getIndex() {
+ return fieldIndex;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s.$%d", alpha, fieldIndex);
+ }
+}
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPKind.java
similarity index 63%
copy from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
copy to sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPKind.java
index 2aa89c2..896f3f7 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPKind.java
@@ -15,22 +15,25 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.beam.sdk.extensions.sql.impl.cep;
-import PrecommitJobBuilder
+import java.io.Serializable;
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
+/**
+ * {@code CEPKind} corresponds to Calcite's {@code SqlKind}. It records some special SQL operations.
+ */
+public enum CEPKind implements Serializable {
+ COUNT,
+ AVG,
+ SUM,
+ FIRST,
+ LAST,
+ PREV,
+ NEXT,
+ EQUALS,
+ GREATER_THAN,
+ GREATER_THAN_OR_EQUAL,
+ LESS_THAN,
+ LESS_THAN_OR_EQUAL,
+ NONE
}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPLiteral.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPLiteral.java
new file mode 100644
index 0000000..b243293
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPLiteral.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.math.BigDecimal;
+import org.apache.beam.sdk.extensions.sql.impl.SqlConversionException;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLiteral;
+import org.joda.time.ReadableDateTime;
+
+/**
+ * {@code CEPLiteral} represents a literal node. It corresponds to {@code RexLiteral} in Calcite.
+ */
+public class CEPLiteral extends CEPOperation {
+
+ private final Schema.TypeName typeName;
+
+ private CEPLiteral(Schema.TypeName typeName) {
+ this.typeName = typeName;
+ }
+
+ // TODO: deal with other types (byte, short...)
+ public static CEPLiteral of(RexLiteral lit) {
+ switch (lit.getTypeName()) {
+ case INTEGER:
+ return of(lit.getValueAs(Integer.class));
+ case BIGINT:
+ return of(lit.getValueAs(Long.class));
+ case DECIMAL:
+ return of(lit.getValueAs(BigDecimal.class));
+ case FLOAT:
+ return of(lit.getValueAs(Float.class));
+ case DOUBLE:
+ return of(lit.getValueAs(Double.class));
+ case BOOLEAN:
+ return of(lit.getValueAs(Boolean.class));
+ case DATE:
+ return of(lit.getValueAs(ReadableDateTime.class));
+ case CHAR:
+ case VARCHAR:
+ return of(lit.getValueAs(String.class));
+ default:
+ throw new SqlConversionException("SQL type not supported: " + lit.getTypeName().toString());
+ }
+ }
+
+ public static CEPLiteral of(Byte myByte) {
+ return new CEPLiteral(Schema.TypeName.BYTE) {
+ @Override
+ public Byte getByte() {
+ return myByte;
+ }
+ };
+ }
+
+ public static CEPLiteral of(Short myShort) {
+ return new CEPLiteral(Schema.TypeName.INT16) {
+ @Override
+ public Short getInt16() {
+ return myShort;
+ }
+ };
+ }
+
+ public static CEPLiteral of(Integer myInt) {
+ return new CEPLiteral(Schema.TypeName.INT32) {
+ @Override
+ public Integer getInt32() {
+ return myInt;
+ }
+ };
+ }
+
+ public static CEPLiteral of(Long myLong) {
+ return new CEPLiteral(Schema.TypeName.INT64) {
+ @Override
+ public Long getInt64() {
+ return myLong;
+ }
+ };
+ }
+
+ public static CEPLiteral of(BigDecimal myDecimal) {
+ return new CEPLiteral(Schema.TypeName.DECIMAL) {
+ @Override
+ public BigDecimal getDecimal() {
+ return myDecimal;
+ }
+ };
+ }
+
+ public static CEPLiteral of(Float myFloat) {
+ return new CEPLiteral(Schema.TypeName.FLOAT) {
+ @Override
+ public Float getFloat() {
+ return myFloat;
+ }
+ };
+ }
+
+ public static CEPLiteral of(Double myDouble) {
+ return new CEPLiteral(Schema.TypeName.DOUBLE) {
+ @Override
+ public Double getDouble() {
+ return myDouble;
+ }
+ };
+ }
+
+ public static CEPLiteral of(ReadableDateTime myDateTime) {
+ return new CEPLiteral(Schema.TypeName.DATETIME) {
+ @Override
+ public ReadableDateTime getDateTime() {
+ return myDateTime;
+ }
+ };
+ }
+
+ public static CEPLiteral of(Boolean myBoolean) {
+ return new CEPLiteral(Schema.TypeName.BOOLEAN) {
+ @Override
+ public Boolean getBoolean() {
+ return myBoolean;
+ }
+ };
+ }
+
+ public static CEPLiteral of(String myString) {
+ return new CEPLiteral(Schema.TypeName.STRING) {
+ @Override
+ public String getString() {
+ return myString;
+ }
+ };
+ }
+
+ public Byte getByte() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public Short getInt16() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public Integer getInt32() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public Long getInt64() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public BigDecimal getDecimal() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public Float getFloat() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public Double getDouble() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public ReadableDateTime getDateTime() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public Boolean getBoolean() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public String getString() {
+ throw new SqlConversionException("the class must be subclassed properly to get the value");
+ }
+
+ public Schema.TypeName getTypeName() {
+ return typeName;
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPMeasure.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPMeasure.java
new file mode 100644
index 0000000..884e887
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPMeasure.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.io.Serializable;
+import org.apache.beam.sdk.schemas.Schema;
+
+/**
+ * The {@code CEPMeasure} class represents the Measures clause and contains information about output
+ * columns.
+ */
+public class CEPMeasure implements Serializable {
+
+ private final String outTableName;
+ private final CEPOperation opr;
+ private final CEPFieldRef fieldRef;
+ private final Schema.FieldType fieldType;
+
+ public CEPMeasure(Schema streamSchema, String outTableName, CEPOperation opr) {
+ this.outTableName = outTableName;
+ this.opr = opr;
+ this.fieldRef = CEPUtil.getFieldRef(opr);
+ this.fieldType = CEPUtil.getFieldType(streamSchema, opr);
+ }
+
+ // return the out column name
+ public String getName() {
+ return outTableName;
+ }
+
+ public CEPOperation getOperation() {
+ return opr;
+ }
+
+ public CEPFieldRef getField() {
+ return fieldRef;
+ }
+
+ public Schema.FieldType getType() {
+ return fieldType;
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPOperation.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPOperation.java
new file mode 100644
index 0000000..50f735f
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPOperation.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.io.Serializable;
+import org.apache.beam.sdk.extensions.sql.impl.SqlConversionException;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexCall;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLiteral;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexPatternFieldRef;
+
+/**
+ * {@code CEPOperation} is the base class for the evaluation operations defined in the {@code
+ * DEFINE} syntax of {@code MATCH_RECOGNIZE}. {@code CEPCall}, {@code CEPFieldRef}, {@code
+ * CEPLiteral} are the subclasses of it.
+ */
+public abstract class CEPOperation implements Serializable {
+
+ public static CEPOperation of(RexNode operation) {
+ if (operation.getClass() == RexCall.class) {
+ RexCall call = (RexCall) operation;
+ return CEPCall.of(call);
+ } else if (operation.getClass() == RexLiteral.class) {
+ RexLiteral lit = (RexLiteral) operation;
+ return CEPLiteral.of(lit);
+ } else if (operation.getClass() == RexPatternFieldRef.class) {
+ RexPatternFieldRef fieldRef = (RexPatternFieldRef) operation;
+ return CEPFieldRef.of(fieldRef);
+ } else {
+ throw new SqlConversionException("RexNode not supported: " + operation.getClass().getName());
+ }
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPOperator.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPOperator.java
new file mode 100644
index 0000000..a2382a3
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPOperator.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.io.Serializable;
+import java.util.Map;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlKind;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlOperator;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+
+/**
+ * The {@code CEPOperator} records the operators (i.e. functions) in the {@code DEFINE} clause of
+ * {@code MATCH_RECOGNIZE}.
+ */
+public class CEPOperator implements Serializable {
+ private final CEPKind cepKind;
+ private static final Map<SqlKind, CEPKind> CEPKindTable =
+ ImmutableMap.<SqlKind, CEPKind>builder()
+ .put(SqlKind.SUM, CEPKind.SUM)
+ .put(SqlKind.COUNT, CEPKind.COUNT)
+ .put(SqlKind.AVG, CEPKind.AVG)
+ .put(SqlKind.FIRST, CEPKind.FIRST)
+ .put(SqlKind.LAST, CEPKind.LAST)
+ .put(SqlKind.PREV, CEPKind.PREV)
+ .put(SqlKind.NEXT, CEPKind.NEXT)
+ .put(SqlKind.EQUALS, CEPKind.EQUALS)
+ .put(SqlKind.GREATER_THAN, CEPKind.GREATER_THAN)
+ .put(SqlKind.GREATER_THAN_OR_EQUAL, CEPKind.GREATER_THAN_OR_EQUAL)
+ .put(SqlKind.LESS_THAN, CEPKind.LESS_THAN)
+ .put(SqlKind.LESS_THAN_OR_EQUAL, CEPKind.LESS_THAN_OR_EQUAL)
+ .build();
+
+ private CEPOperator(CEPKind cepKind) {
+ this.cepKind = cepKind;
+ }
+
+ public CEPKind getCepKind() {
+ return cepKind;
+ }
+
+ public static CEPOperator of(SqlOperator op) {
+ SqlKind opKind = op.getKind();
+ return new CEPOperator(CEPKindTable.getOrDefault(opKind, CEPKind.NONE));
+ }
+
+ @Override
+ public String toString() {
+ return cepKind.name();
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPPattern.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPPattern.java
new file mode 100644
index 0000000..1e60aa7
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPPattern.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.io.Serializable;
+import java.math.BigDecimal;
+import java.util.List;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexCall;
+
+/** Core pattern class that stores the definition of a single pattern. */
+public class CEPPattern implements Serializable {
+
+ private final Schema mySchema;
+ private final String patternVar;
+ private final PatternCondition patternCondition;
+ private final Quantifier quant;
+
+ private CEPPattern(
+ Schema mySchema, String patternVar, @Nullable RexCall patternDef, Quantifier quant) {
+
+ this.mySchema = mySchema;
+ this.patternVar = patternVar;
+ this.quant = quant;
+
+ if (patternDef == null) {
+ this.patternCondition =
+ new PatternCondition(this) {
+ @Override
+ public boolean eval(Row eleRow) {
+ return true;
+ }
+ };
+ return;
+ }
+
+ CEPCall cepCall = CEPCall.of(patternDef);
+ CEPOperator cepOperator = cepCall.getOperator();
+ List<CEPOperation> cepOperands = cepCall.getOperands();
+ CEPCall cepOpr0 = (CEPCall) cepOperands.get(0);
+ CEPLiteral cepOpr1 = (CEPLiteral) cepOperands.get(1);
+
+ switch (cepOperator.getCepKind()) {
+ case EQUALS:
+ this.patternCondition =
+ new PatternCondition(this) {
+ @Override
+ public boolean eval(Row eleRow) {
+ return evalOperation(cepOpr0, cepOpr1, eleRow) == 0;
+ }
+ };
+ break;
+ case GREATER_THAN:
+ this.patternCondition =
+ new PatternCondition(this) {
+ @Override
+ public boolean eval(Row eleRow) {
+ return evalOperation(cepOpr0, cepOpr1, eleRow) > 0;
+ }
+ };
+ break;
+ case GREATER_THAN_OR_EQUAL:
+ this.patternCondition =
+ new PatternCondition(this) {
+ @Override
+ public boolean eval(Row eleRow) {
+ return evalOperation(cepOpr0, cepOpr1, eleRow) >= 0;
+ }
+ };
+ break;
+ case LESS_THAN:
+ this.patternCondition =
+ new PatternCondition(this) {
+ @Override
+ public boolean eval(Row eleRow) {
+ return evalOperation(cepOpr0, cepOpr1, eleRow) < 0;
+ }
+ };
+ break;
+ case LESS_THAN_OR_EQUAL:
+ this.patternCondition =
+ new PatternCondition(this) {
+ @Override
+ public boolean eval(Row eleRow) {
+ return evalOperation(cepOpr0, cepOpr1, eleRow) <= 0;
+ }
+ };
+ break;
+ default:
+ throw new UnsupportedOperationException("Comparison operator not recognized.");
+ }
+ }
+
+ // LAST(*.$1, 0)
+ private int evalOperation(CEPCall operation, CEPLiteral lit, Row rowEle) {
+ CEPOperator call = operation.getOperator();
+ List<CEPOperation> operands = operation.getOperands();
+
+ if (call.getCepKind() == CEPKind.LAST) { // support only simple match for now: LAST(*.$, 0)
+ CEPOperation opr0 = operands.get(0);
+ CEPLiteral opr1 = (CEPLiteral) operands.get(1);
+ if (opr0.getClass() == CEPFieldRef.class && opr1.getDecimal().equals(BigDecimal.ZERO)) {
+ int fIndex = ((CEPFieldRef) opr0).getIndex();
+ Schema.Field fd = mySchema.getField(fIndex);
+ Schema.FieldType dtype = fd.getType();
+
+ switch (dtype.getTypeName()) {
+ case BYTE:
+ return rowEle.getByte(fIndex).compareTo(lit.getByte());
+ case INT16:
+ return rowEle.getInt16(fIndex).compareTo(lit.getInt16());
+ case INT32:
+ return rowEle.getInt32(fIndex).compareTo(lit.getInt32());
+ case INT64:
+ return rowEle.getInt64(fIndex).compareTo(lit.getInt64());
+ case DECIMAL:
+ return rowEle.getDecimal(fIndex).compareTo(lit.getDecimal());
+ case FLOAT:
+ return rowEle.getFloat(fIndex).compareTo(lit.getFloat());
+ case DOUBLE:
+ return rowEle.getDouble(fIndex).compareTo(lit.getDouble());
+ case STRING:
+ return rowEle.getString(fIndex).compareTo(lit.getString());
+ case DATETIME:
+ return rowEle.getDateTime(fIndex).compareTo(lit.getDateTime());
+ case BOOLEAN:
+ return rowEle.getBoolean(fIndex).compareTo(lit.getBoolean());
+ default:
+ throw new UnsupportedOperationException(
+ "Specified column not comparable: " + fd.getName());
+ }
+ }
+ }
+ throw new UnsupportedOperationException(
+ "backward functions (PREV, NEXT) not supported for now");
+ }
+
+ public boolean evalRow(Row rowEle) {
+ return patternCondition.eval(rowEle);
+ }
+
+ @Override
+ public String toString() {
+ return patternVar + quant.toString();
+ }
+
+ public String getPatternVar() {
+ return patternVar;
+ }
+
+ public static CEPPattern of(
+ Schema theSchema, String patternVar, RexCall patternDef, Quantifier quant) {
+ return new CEPPattern(theSchema, patternVar, patternDef, quant);
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPUtil.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPUtil.java
new file mode 100644
index 0000000..d501abd
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/CEPUtil.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelCollation;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelFieldCollation;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexCall;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexInputRef;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexLiteral;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlKind;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlOperator;
+
+/**
+ * Some utility methods for transforming Calcite's constructs into our own Beam constructs (for
+ * serialization purpose).
+ */
+public class CEPUtil {
+
+ private static Quantifier getQuantifier(int start, int end, boolean isReluctant) {
+ Quantifier quantToAdd;
+ if (!isReluctant) {
+ if (start == end) {
+ quantToAdd = new Quantifier("{ " + start + " }");
+ } else {
+ if (end == -1) {
+ if (start == 0) {
+ quantToAdd = Quantifier.ASTERISK;
+ } else if (start == 1) {
+ quantToAdd = Quantifier.PLUS;
+ } else {
+ quantToAdd = new Quantifier("{ " + start + " }");
+ }
+ } else {
+ if (start == 0 && end == 1) {
+ quantToAdd = Quantifier.QMARK;
+ } else if (start == -1) {
+ quantToAdd = new Quantifier("{ , " + end + " }");
+ } else {
+ quantToAdd = new Quantifier("{ " + start + " , }");
+ }
+ }
+ }
+ } else {
+ if (start == end) {
+ quantToAdd = new Quantifier("{ " + start + " }?");
+ } else {
+ if (end == -1) {
+ if (start == 0) {
+ quantToAdd = Quantifier.ASTERISK_RELUCTANT;
+ } else if (start == 1) {
+ quantToAdd = Quantifier.PLUS_RELUCTANT;
+ } else {
+ quantToAdd = new Quantifier("{ " + start + " }?");
+ }
+ } else {
+ if (start == 0 && end == 1) {
+ quantToAdd = Quantifier.QMARK_RELUCTANT;
+ } else if (start == -1) {
+ quantToAdd = new Quantifier("{ , " + end + " }?");
+ } else {
+ quantToAdd = new Quantifier("{ " + start + " , }?");
+ }
+ }
+ }
+ }
+
+ return quantToAdd;
+ }
+
+ /** Construct a list of {@code CEPPattern}s from a {@code RexNode}. */
+ public static ArrayList<CEPPattern> getCEPPatternFromPattern(
+ Schema upStreamSchema, RexNode call, Map<String, RexNode> patternDefs) {
+ ArrayList<CEPPattern> patternList = new ArrayList<>();
+ if (call.getClass() == RexLiteral.class) {
+ String p = ((RexLiteral) call).getValueAs(String.class);
+ RexNode pd = patternDefs.get(p);
+ patternList.add(CEPPattern.of(upStreamSchema, p, (RexCall) pd, Quantifier.NONE));
+ } else {
+ RexCall patCall = (RexCall) call;
+ SqlOperator operator = patCall.getOperator();
+ List<RexNode> operands = patCall.getOperands();
+
+ // check if if the node has quantifier
+ if (operator.getKind() == SqlKind.PATTERN_QUANTIFIER) {
+ String p = ((RexLiteral) operands.get(0)).getValueAs(String.class);
+ RexNode pd = patternDefs.get(p);
+ int start = ((RexLiteral) operands.get(1)).getValueAs(Integer.class);
+ int end = ((RexLiteral) operands.get(2)).getValueAs(Integer.class);
+ boolean isReluctant = ((RexLiteral) operands.get(3)).getValueAs(Boolean.class);
+
+ patternList.add(
+ CEPPattern.of(upStreamSchema, p, (RexCall) pd, getQuantifier(start, end, isReluctant)));
+ } else {
+ for (RexNode i : operands) {
+ patternList.addAll(getCEPPatternFromPattern(upStreamSchema, i, patternDefs));
+ }
+ }
+ }
+ return patternList;
+ }
+
+ /** Recursively construct a regular expression from a {@code RexNode}. */
+ public static String getRegexFromPattern(RexNode call) {
+ if (call.getClass() == RexLiteral.class) {
+ return ((RexLiteral) call).getValueAs(String.class);
+ } else {
+ RexCall opr = (RexCall) call;
+ SqlOperator operator = opr.getOperator();
+ List<RexNode> operands = opr.getOperands();
+ if (operator.getKind() == SqlKind.PATTERN_QUANTIFIER) {
+ String p = ((RexLiteral) operands.get(0)).getValueAs(String.class);
+ int start = ((RexLiteral) operands.get(1)).getValueAs(Integer.class);
+ int end = ((RexLiteral) operands.get(2)).getValueAs(Integer.class);
+ boolean isReluctant = ((RexLiteral) operands.get(3)).getValueAs(Boolean.class);
+ Quantifier quantifier = getQuantifier(start, end, isReluctant);
+ return p + quantifier.toString();
+ }
+ return getRegexFromPattern(opr.getOperands().get(0))
+ + getRegexFromPattern(opr.getOperands().get(1));
+ }
+ }
+
+ /** Transform a list of keys in Calcite to {@code ORDER BY} to {@code OrderKey}s. */
+ public static ArrayList<OrderKey> makeOrderKeysFromCollation(RelCollation orderKeys) {
+ List<RelFieldCollation> relOrderKeys = orderKeys.getFieldCollations();
+
+ ArrayList<OrderKey> orderKeysList = new ArrayList<>();
+ for (RelFieldCollation i : relOrderKeys) {
+ orderKeysList.add(OrderKey.of(i));
+ }
+
+ return orderKeysList;
+ }
+
+ /** Transform the partition columns into serializable CEPFieldRef. */
+ public static List<CEPFieldRef> getCEPFieldRefFromParKeys(List<RexNode> parKeys) {
+ ArrayList<CEPFieldRef> fieldList = new ArrayList<>();
+ for (RexNode i : parKeys) {
+ RexInputRef parKey = (RexInputRef) i;
+ fieldList.add(new CEPFieldRef(parKey.getName(), parKey.getIndex()));
+ }
+ return fieldList;
+ }
+
+ /** a function that finds a pattern reference recursively. */
+ public static CEPFieldRef getFieldRef(CEPOperation opr) {
+ if (opr.getClass() == CEPFieldRef.class) {
+ CEPFieldRef field = (CEPFieldRef) opr;
+ return field;
+ } else if (opr.getClass() == CEPCall.class) {
+ CEPCall call = (CEPCall) opr;
+ CEPFieldRef field;
+
+ for (CEPOperation i : call.getOperands()) {
+ field = getFieldRef(i);
+ if (field != null) {
+ return field;
+ }
+ }
+ return null;
+ } else {
+ return null;
+ }
+ }
+
+ public static Schema.FieldType getFieldType(Schema streamSchema, CEPOperation measureOperation) {
+
+ if (measureOperation.getClass() == CEPFieldRef.class) {
+ CEPFieldRef field = (CEPFieldRef) measureOperation;
+ return streamSchema.getField(field.getIndex()).getType();
+ } else if (measureOperation.getClass() == CEPCall.class) {
+
+ CEPCall call = (CEPCall) measureOperation;
+ CEPKind oprKind = call.getOperator().getCepKind();
+
+ if (oprKind == CEPKind.SUM || oprKind == CEPKind.COUNT) {
+ return Schema.FieldType.INT32;
+ } else if (oprKind == CEPKind.AVG) {
+ return Schema.FieldType.DOUBLE;
+ }
+ CEPFieldRef refOpt;
+ for (CEPOperation i : call.getOperands()) {
+ refOpt = getFieldRef(i);
+ if (refOpt != null) {
+ return streamSchema.getField(refOpt.getIndex()).getType();
+ }
+ }
+ throw new UnsupportedOperationException("the function in Measures is not recognized.");
+ } else {
+ throw new UnsupportedOperationException("the function in Measures is not recognized.");
+ }
+ }
+
+ public static Schema decideSchema(
+ List<CEPMeasure> measures,
+ boolean allRows,
+ List<CEPFieldRef> parKeys,
+ Schema upstreamSchema) {
+ // if the measures clause does not present
+ // then output the schema from the pattern and the partition columns
+ if (measures.isEmpty() && !allRows) {
+ throw new UnsupportedOperationException(
+ "The Measures clause cannot be empty for ONE ROW PER MATCH");
+ }
+
+ // TODO: implement ALL ROWS PER MATCH
+ // for now, return all rows as they were (return the origin schema)
+ if (allRows) {
+ return upstreamSchema;
+ }
+
+ Schema.Builder outTableSchemaBuilder = new Schema.Builder();
+
+ // take the partition keys first
+ for (CEPFieldRef i : parKeys) {
+ outTableSchemaBuilder.addField(upstreamSchema.getField(i.getIndex()));
+ }
+
+ // add the fields in the Measures clause
+ for (CEPMeasure i : measures) {
+ Schema.Field fieldToAdd = Schema.Field.of(i.getName(), i.getType());
+ outTableSchemaBuilder.addField(fieldToAdd);
+ }
+
+ // TODO: add any columns left for ALL ROWS PER MATCH
+
+ return outTableSchemaBuilder.build();
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/OrderKey.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/OrderKey.java
new file mode 100644
index 0000000..85825e6
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/OrderKey.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.io.Serializable;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelFieldCollation;
+
+/**
+ * The {@code OrderKey} class stores the information to sort a column.
+ *
+ * <h3>Constraints</h3>
+ *
+ * <ul>
+ * <ui>Strict orders are not supported for now.
+ * </ul>
+ */
+public class OrderKey implements Serializable {
+
+ private final int fIndex;
+ private final boolean dir;
+ private final boolean nullFirst;
+
+ private OrderKey(int fIndex, boolean dir, boolean nullFirst) {
+ this.fIndex = fIndex;
+ this.dir = dir;
+ this.nullFirst = nullFirst;
+ }
+
+ public int getIndex() {
+ return fIndex;
+ }
+
+ public boolean getDir() {
+ return dir;
+ }
+
+ public boolean getNullFirst() {
+ return nullFirst;
+ }
+
+ public static OrderKey of(RelFieldCollation orderKey) {
+ int fieldIndex = orderKey.getFieldIndex();
+ RelFieldCollation.Direction dir = orderKey.direction;
+ RelFieldCollation.NullDirection nullDir = orderKey.nullDirection;
+ if (!dir.isDescending()) {
+ if (nullDir == RelFieldCollation.NullDirection.FIRST) {
+ return new OrderKey(fieldIndex, true, true);
+ } else {
+ return new OrderKey(fieldIndex, true, false);
+ }
+ } else {
+ if (nullDir == RelFieldCollation.NullDirection.FIRST) {
+ return new OrderKey(fieldIndex, false, true);
+ } else {
+ return new OrderKey(fieldIndex, false, false);
+ }
+ }
+ }
+}
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/PatternCondition.java
similarity index 62%
copy from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
copy to sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/PatternCondition.java
index 2aa89c2..75f073f 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/PatternCondition.java
@@ -15,22 +15,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.beam.sdk.extensions.sql.impl.cep;
-import PrecommitJobBuilder
+import java.io.Serializable;
+import org.apache.beam.sdk.values.Row;
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
+/**
+ * {@code PatternCondition} stores the function to decide whether a row is a match of a single
+ * pattern.
+ */
+public abstract class PatternCondition implements Serializable {
+
+ private String patternVar;
+
+ PatternCondition(CEPPattern pattern) {
+ this.patternVar = pattern.toString();
+ };
+
+ public abstract boolean eval(Row eleRow);
}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/Quantifier.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/Quantifier.java
new file mode 100644
index 0000000..12e8a1f
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/Quantifier.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
+
+import java.io.Serializable;
+
+/**
+ * The {@code Quantifier} class is intended for storing the information of the quantifier for a
+ * pattern variable.
+ */
+public class Quantifier implements Serializable {
+
+ public static final Quantifier NONE = new Quantifier("");
+ public static final Quantifier PLUS = new Quantifier("+");
+ public static final Quantifier QMARK = new Quantifier("?");
+ public static final Quantifier ASTERISK = new Quantifier("*");
+ public static final Quantifier PLUS_RELUCTANT = new Quantifier("+?");
+ public static final Quantifier ASTERISK_RELUCTANT = new Quantifier("*?");
+ public static final Quantifier QMARK_RELUCTANT = new Quantifier("??");
+
+ private final String repr;
+
+ Quantifier(String repr) {
+ this.repr = repr;
+ }
+
+ @Override
+ public String toString() {
+ return repr;
+ }
+}
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/package-info.java
similarity index 63%
copy from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
copy to sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/package-info.java
index 2aa89c2..ddd1057 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/cep/package-info.java
@@ -16,21 +16,11 @@
* limitations under the License.
*/
-import PrecommitJobBuilder
-
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
-}
+/**
+ * Utilities for Complex Event Processing (CEP).
+ *
+ * <p>For serialization, Beam needs serializable classes to replace Calcite's classes. This package
+ * includes some "cloned" classes to replace Calcite {@code RexNode}, {@code RelCollation}, {@code
+ * SqlOperator} etc.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.cep;
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRelDataTypeSystem.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRelDataTypeSystem.java
index 1d234b0..4452422 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRelDataTypeSystem.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRelDataTypeSystem.java
@@ -48,6 +48,8 @@
switch (typeName) {
case TIME:
return 6; // support microsecond time precision
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ return 6; // support microsecond datetime precision
default:
return super.getMaxPrecision(typeName);
}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java
index 2ecc26b..d9bc236 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/planner/BeamRuleSets.java
@@ -31,6 +31,7 @@
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamIntersectRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamJoinAssociateRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamJoinPushThroughJoinRule;
+import org.apache.beam.sdk.extensions.sql.impl.rule.BeamMatchRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamMinusRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamSideInputJoinRule;
import org.apache.beam.sdk.extensions.sql.impl.rule.BeamSideInputLookupJoinRule;
@@ -164,7 +165,8 @@
BeamUnnestRule.INSTANCE,
BeamSideInputJoinRule.INSTANCE,
BeamCoGBKJoinRule.INSTANCE,
- BeamSideInputLookupJoinRule.INSTANCE);
+ BeamSideInputLookupJoinRule.INSTANCE,
+ BeamMatchRule.INSTANCE);
private static final List<RelOptRule> BEAM_TO_ENUMERABLE =
ImmutableList.of(BeamEnumerableConverterRule.INSTANCE);
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
index b6d606b..d9ad401 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
@@ -27,6 +27,7 @@
import java.lang.reflect.Type;
import java.math.BigDecimal;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.AbstractList;
import java.util.AbstractMap;
@@ -35,13 +36,14 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TimeZone;
import org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions;
import org.apache.beam.sdk.extensions.sql.impl.planner.BeamJavaTypeFactory;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.CharType;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimeWithLocalTzType;
-import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimestampWithLocalTzType;
import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.logicaltypes.DateTime;
import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
@@ -92,6 +94,7 @@
public class BeamCalcRel extends AbstractBeamCalcRel {
private static final long NANOS_PER_MILLISECOND = 1000000L;
+ private static final long MILLIS_PER_DAY = 86400000L;
private static final ParameterExpression outputSchemaParam =
Expressions.parameter(Schema.class, "outputSchema");
@@ -344,6 +347,18 @@
valueDateTime = Expressions.unbox(valueDateTime);
}
valueDateTime = Expressions.call(LocalDate.class, "ofEpochDay", valueDateTime);
+ } else if (CalciteUtils.TIMESTAMP_WITH_LOCAL_TZ.typesEqual(toType)
+ || CalciteUtils.NULLABLE_TIMESTAMP_WITH_LOCAL_TZ.typesEqual(toType)) {
+ // Convert TimeStamp_With_Local_TimeZone to LocalDateTime
+ Expression dateValue =
+ Expressions.divide(valueDateTime, Expressions.constant(MILLIS_PER_DAY));
+ Expression date = Expressions.call(LocalDate.class, "ofEpochDay", dateValue);
+ Expression timeValue =
+ Expressions.multiply(
+ Expressions.modulo(valueDateTime, Expressions.constant(MILLIS_PER_DAY)),
+ Expressions.constant(NANOS_PER_MILLISECOND));
+ Expression time = Expressions.call(LocalTime.class, "ofNanoOfDay", timeValue);
+ valueDateTime = Expressions.call(LocalDateTime.class, "of", date, time);
} else {
throw new UnsupportedOperationException("Unknown DateTime type " + toType);
}
@@ -385,7 +400,7 @@
.put(SqlTypes.DATE.getIdentifier(), Long.class)
.put(SqlTypes.TIME.getIdentifier(), Long.class)
.put(TimeWithLocalTzType.IDENTIFIER, ReadableInstant.class)
- .put(TimestampWithLocalTzType.IDENTIFIER, ReadableInstant.class)
+ .put(SqlTypes.DATETIME.getIdentifier(), Row.class)
.put(CharType.IDENTIFIER, String.class)
.build();
@@ -442,6 +457,16 @@
value, Expressions.divide(value, Expressions.constant(NANOS_PER_MILLISECOND)));
} else if (SqlTypes.DATE.getIdentifier().equals(logicalId)) {
return value;
+ } else if (SqlTypes.DATETIME.getIdentifier().equals(logicalId)) {
+ Expression dateValue =
+ Expressions.call(value, "getInt64", Expressions.constant(DateTime.DATE_FIELD_NAME));
+ Expression timeValue =
+ Expressions.call(value, "getInt64", Expressions.constant(DateTime.TIME_FIELD_NAME));
+ Expression returnValue =
+ Expressions.add(
+ Expressions.multiply(dateValue, Expressions.constant(MILLIS_PER_DAY)),
+ Expressions.divide(timeValue, Expressions.constant(NANOS_PER_MILLISECOND)));
+ return nullOr(value, returnValue);
} else if (!CharType.IDENTIFIER.equals(logicalId)) {
throw new UnsupportedOperationException(
"Unknown LogicalType " + type.getLogicalType().getIdentifier());
@@ -563,6 +588,8 @@
|| name.equals(DataContext.Variable.CURRENT_TIMESTAMP.camelName)
|| name.equals(DataContext.Variable.LOCAL_TIMESTAMP.camelName)) {
return System.currentTimeMillis();
+ } else if (name.equals(Variable.TIME_ZONE.camelName)) {
+ return TimeZone.getDefault();
}
return null;
}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamMatchRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamMatchRel.java
new file mode 100644
index 0000000..84d6dbb
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamMatchRel.java
@@ -0,0 +1,529 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.rel;
+
+import static org.apache.beam.sdk.extensions.sql.impl.cep.CEPUtil.makeOrderKeysFromCollation;
+import static org.apache.beam.vendor.calcite.v1_20_0.com.google.common.base.Preconditions.checkArgument;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.RowCoder;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPCall;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPFieldRef;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPKind;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPLiteral;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPMeasure;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPOperation;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPPattern;
+import org.apache.beam.sdk.extensions.sql.impl.cep.CEPUtil;
+import org.apache.beam.sdk.extensions.sql.impl.cep.OrderKey;
+import org.apache.beam.sdk.extensions.sql.impl.planner.BeamCostModel;
+import org.apache.beam.sdk.extensions.sql.impl.planner.NodeStats;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptCluster;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptPlanner;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelTraitSet;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelCollation;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Match;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.type.RelDataType;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexCall;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexInputRef;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rex.RexNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlKind;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@code BeamRelNode} to replace a {@code Match} node.
+ *
+ * <p>The {@code BeamMatchRel} is the Beam implementation of {@code MATCH_RECOGNIZE} in SQL.
+ *
+ * <p>For now, the underline implementation is based on java.util.regex.
+ */
+public class BeamMatchRel extends Match implements BeamRelNode {
+
+ public static final Logger LOG = LoggerFactory.getLogger(BeamMatchRel.class);
+
+ public BeamMatchRel(
+ RelOptCluster cluster,
+ RelTraitSet traitSet,
+ RelNode input,
+ RelDataType rowType,
+ RexNode pattern,
+ boolean strictStart,
+ boolean strictEnd,
+ Map<String, RexNode> patternDefinitions,
+ Map<String, RexNode> measures,
+ RexNode after,
+ Map<String, ? extends SortedSet<String>> subsets,
+ boolean allRows,
+ List<RexNode> partitionKeys,
+ RelCollation orderKeys,
+ RexNode interval) {
+
+ super(
+ cluster,
+ traitSet,
+ input,
+ rowType,
+ pattern,
+ strictStart,
+ strictEnd,
+ patternDefinitions,
+ measures,
+ after,
+ subsets,
+ allRows,
+ partitionKeys,
+ orderKeys,
+ interval);
+ }
+
+ @Override
+ public BeamCostModel beamComputeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
+ return BeamCostModel.FACTORY.makeTinyCost(); // return constant costModel for now
+ }
+
+ @Override
+ public NodeStats estimateNodeStats(RelMetadataQuery mq) {
+ // a simple way of getting some estimate data
+ // to be examined further
+ NodeStats inputEstimate = BeamSqlRelUtils.getNodeStats(input, mq);
+ double numRows = inputEstimate.getRowCount();
+ double winSize = inputEstimate.getWindow();
+ double rate = inputEstimate.getRate();
+
+ return NodeStats.create(numRows, rate, winSize).multiply(0.5);
+ }
+
+ @Override
+ public PTransform<PCollectionList<Row>, PCollection<Row>> buildPTransform() {
+
+ return new MatchTransform(
+ partitionKeys, orderKeys, measures, allRows, pattern, patternDefinitions);
+ }
+
+ private static class MatchTransform extends PTransform<PCollectionList<Row>, PCollection<Row>> {
+
+ private final List<RexNode> parKeys;
+ private final RelCollation orderKeys;
+ private final Map<String, RexNode> measures;
+ private final boolean allRows;
+ private final RexNode pattern;
+ private final Map<String, RexNode> patternDefs;
+
+ public MatchTransform(
+ List<RexNode> parKeys,
+ RelCollation orderKeys,
+ Map<String, RexNode> measures,
+ boolean allRows,
+ RexNode pattern,
+ Map<String, RexNode> patternDefs) {
+ this.parKeys = parKeys;
+ this.orderKeys = orderKeys;
+ this.measures = measures;
+ this.allRows = allRows;
+ this.pattern = pattern;
+ this.patternDefs = patternDefs;
+ }
+
+ @Override
+ public PCollection<Row> expand(PCollectionList<Row> pinput) {
+ checkArgument(
+ pinput.size() == 1,
+ "Wrong number of inputs for %s: %s",
+ BeamMatchRel.class.getSimpleName(),
+ pinput);
+ PCollection<Row> upstream = pinput.get(0);
+
+ Schema upstreamSchema = upstream.getSchema();
+
+ Schema.Builder schemaBuilder = new Schema.Builder();
+ for (RexNode i : parKeys) {
+ RexInputRef varNode = (RexInputRef) i;
+ int index = varNode.getIndex();
+ schemaBuilder.addField(upstreamSchema.getField(index));
+ }
+ Schema partitionKeySchema = schemaBuilder.build();
+
+ // partition according to the partition keys
+ PCollection<KV<Row, Row>> keyedUpstream =
+ upstream.apply(ParDo.of(new MapKeys(partitionKeySchema)));
+
+ // group by keys
+ PCollection<KV<Row, Iterable<Row>>> groupedUpstream =
+ keyedUpstream
+ .setCoder(KvCoder.of(RowCoder.of(partitionKeySchema), RowCoder.of(upstreamSchema)))
+ .apply(GroupByKey.create());
+
+ // sort within each keyed partition
+ ArrayList<OrderKey> orderKeyList = makeOrderKeysFromCollation(orderKeys);
+ // This will rely on an assumption that Fusion will fuse
+ // operators here so the sorted result will be preserved
+ // for the next match transform.
+ // In most of the runners (if not all) this should be true.
+ PCollection<KV<Row, Iterable<Row>>> orderedUpstream =
+ groupedUpstream.apply(ParDo.of(new SortPerKey(orderKeyList)));
+
+ // apply the pattern match in each partition
+ ArrayList<CEPPattern> cepPattern =
+ CEPUtil.getCEPPatternFromPattern(upstreamSchema, pattern, patternDefs);
+ String regexPattern = CEPUtil.getRegexFromPattern(pattern);
+ List<CEPMeasure> cepMeasures = new ArrayList<>();
+ for (Map.Entry<String, RexNode> i : measures.entrySet()) {
+ String outTableName = i.getKey();
+ CEPOperation measureOperation;
+ // TODO: support FINAL clause, for now, get rid of the FINAL operation
+ if (i.getValue().getClass() == RexCall.class) {
+ RexCall rexCall = (RexCall) i.getValue();
+ if (rexCall.getOperator().getKind() == SqlKind.FINAL) {
+ measureOperation = CEPOperation.of(rexCall.getOperands().get(0));
+ cepMeasures.add(new CEPMeasure(upstreamSchema, outTableName, measureOperation));
+ continue;
+ }
+ }
+ measureOperation = CEPOperation.of(i.getValue());
+ cepMeasures.add(new CEPMeasure(upstreamSchema, outTableName, measureOperation));
+ }
+
+ List<CEPFieldRef> cepParKeys = CEPUtil.getCEPFieldRefFromParKeys(parKeys);
+ Schema outSchema = CEPUtil.decideSchema(cepMeasures, allRows, cepParKeys, upstreamSchema);
+ PCollection<Row> outStream =
+ orderedUpstream
+ .apply(
+ ParDo.of(
+ new MatchPattern(
+ upstreamSchema,
+ cepParKeys,
+ cepPattern,
+ regexPattern,
+ cepMeasures,
+ allRows,
+ outSchema)))
+ .setCoder(RowCoder.of(outSchema));
+
+ // apply the ParDo for the measures clause
+ // for now, output all rows of each pattern matched (for testing purpose)
+ // for now, support FINAL only
+ // TODO: add ONE ROW PER MATCH and MEASURES implementation.
+ // TODO: handle the no aggregate in pattern with potentially multiple matches
+ // TODO: add support for FINAL/RUNNING
+
+ return outStream;
+ }
+
+ // TODO: support both ALL ROWS PER MATCH and ONE ROW PER MATCH.
+ // support only one row per match for now.
+ private static class MatchPattern extends DoFn<KV<Row, Iterable<Row>>, Row> {
+
+ public static final Logger LOG = LoggerFactory.getLogger(MatchTransform.class);
+
+ private final Schema upstreamSchema;
+ private final Schema outSchema;
+ private final List<CEPFieldRef> parKeys;
+ private final ArrayList<CEPPattern> pattern;
+ private final String regexPattern;
+ private final List<CEPMeasure> measures;
+ private final boolean allRows;
+
+ MatchPattern(
+ Schema upstreamSchema,
+ List<CEPFieldRef> parKeys,
+ ArrayList<CEPPattern> pattern,
+ String regexPattern,
+ List<CEPMeasure> measures,
+ boolean allRows,
+ Schema outSchema) {
+ this.upstreamSchema = upstreamSchema;
+ this.parKeys = parKeys;
+ this.pattern = pattern;
+ this.regexPattern = regexPattern;
+ this.measures = measures;
+ this.allRows = allRows;
+ this.outSchema = outSchema;
+ }
+
+ @ProcessElement
+ public void processElement(@Element KV<Row, Iterable<Row>> keyRows, OutputReceiver<Row> out) {
+ ArrayList<Row> rows = new ArrayList<>();
+ StringBuilder patternStringBuilder = new StringBuilder();
+ for (Row i : keyRows.getValue()) {
+ rows.add(i);
+ // check pattern of row i
+ String patternOfRow = " "; // a row with no matched pattern is marked by a space
+ for (int j = 0; j < pattern.size(); ++j) {
+ CEPPattern tryPattern = pattern.get(j);
+ if (tryPattern.evalRow(i)) {
+ patternOfRow = tryPattern.getPatternVar();
+ }
+ }
+ patternStringBuilder.append(patternOfRow);
+ }
+
+ String patternString = patternStringBuilder.toString();
+
+ Pattern p = Pattern.compile(regexPattern);
+ Matcher m = p.matcher(patternString);
+
+ while (m.find()) {
+ // out put each matched sequence as specified by the Measure clause
+ // TODO: for now (regex implementation), assume deterministic pattern match
+ // (i.e. each row match to exactly one pattern or none)
+
+ if (allRows) {
+ Iterable<Row> outRows = rows.subList(m.start(), m.end());
+ for (Row i : outRows) {
+ out.output(i);
+ }
+ } else { // one row per match
+ List<Row> matchedRows = rows.subList(m.start(), m.end());
+
+ // a mapping from a pattern variable to a list of rows that match it
+ // this part should be replaced by an NFA
+ ImmutableMap.Builder<String, List<Row>> patternMappedRowsBuilder =
+ ImmutableMap.<String, List<Row>>builder();
+ int patternIndex = 0;
+ for (int i = 0; i < matchedRows.size(); ) {
+ ArrayList<Row> rowsOfAPattern = new ArrayList<>();
+ CEPPattern patternToTest;
+ if (patternIndex < pattern.size()) {
+ patternToTest = pattern.get(patternIndex);
+ } else {
+ break;
+ }
+ String patternStr = patternToTest.getPatternVar();
+ Row rowToTest = matchedRows.get(i);
+ while (patternToTest.evalRow(rowToTest) && i < matchedRows.size()) {
+ rowsOfAPattern.add(rowToTest);
+ ++i;
+ if (i < matchedRows.size()) {
+ rowToTest = matchedRows.get(i);
+ }
+ }
+ patternMappedRowsBuilder.put(patternStr, rowsOfAPattern);
+ ++patternIndex;
+ }
+ Map<String, List<Row>> patternMappedRows = patternMappedRowsBuilder.build();
+
+ // output corresponding columns according to the measures schema
+ Row.Builder newRowBuilder = Row.withSchema(outSchema);
+ Row.FieldValueBuilder newFieldBuilder = null;
+
+ // add partition key columns
+ for (CEPFieldRef i : parKeys) {
+ int colIndex = i.getIndex();
+ Schema.Field parSchema = upstreamSchema.getField(colIndex);
+ if (!matchedRows.isEmpty()) {
+ Row firstRow = matchedRows.get(0);
+ if (newFieldBuilder == null) {
+ newFieldBuilder =
+ newRowBuilder.withFieldValue(
+ parSchema.getName(), firstRow.getValue(colIndex));
+ } else {
+ newFieldBuilder =
+ newFieldBuilder.withFieldValue(
+ parSchema.getName(), firstRow.getValue(colIndex));
+ }
+ } else {
+ break;
+ }
+ }
+
+ // add measure columns
+ for (CEPMeasure i : measures) {
+ String outName = i.getName();
+ CEPFieldRef patternRef = i.getField();
+ String patternVar = patternRef.getAlpha();
+ List<Row> patternRows = patternMappedRows.get(patternVar);
+
+ // implement CEPOperation as functions
+ CEPOperation opr = i.getOperation();
+ if (opr.getClass() == CEPCall.class) {
+ CEPCall call = (CEPCall) opr;
+ CEPKind funcName = call.getOperator().getCepKind();
+ switch (funcName) {
+ case FIRST:
+ CEPFieldRef colFirstField = (CEPFieldRef) call.getOperands().get(0);
+ CEPLiteral colFirstIndex = (CEPLiteral) call.getOperands().get(1);
+ Row rowFirstToProc = patternRows.get(colFirstIndex.getInt32());
+ if (newFieldBuilder == null) {
+ newFieldBuilder =
+ newRowBuilder.withFieldValue(
+ outName, rowFirstToProc.getValue(colFirstField.getIndex()));
+ } else {
+ newFieldBuilder =
+ newFieldBuilder.withFieldValue(
+ outName, rowFirstToProc.getValue(colFirstField.getIndex()));
+ }
+ break;
+ case LAST:
+ CEPFieldRef colLastField = (CEPFieldRef) call.getOperands().get(0);
+ CEPLiteral colLastIndex = (CEPLiteral) call.getOperands().get(1);
+ Row rowLastToProc =
+ patternRows.get(
+ patternRows.size() - 1 - colLastIndex.getDecimal().intValue());
+ if (newFieldBuilder == null) {
+ newFieldBuilder =
+ newRowBuilder.withFieldValue(
+ outName, rowLastToProc.getValue(colLastField.getIndex()));
+ } else {
+ newFieldBuilder =
+ newFieldBuilder.withFieldValue(
+ outName, rowLastToProc.getValue(colLastField.getIndex()));
+ }
+ break;
+ default:
+ throw new UnsupportedOperationException(
+ "The measure function is not recognized: " + funcName.name());
+ }
+ } else if (opr.getClass() == CEPFieldRef.class) {
+ Row rowToProc = patternRows.get(0);
+ CEPFieldRef fieldRef = (CEPFieldRef) opr;
+ if (newFieldBuilder == null) {
+ newFieldBuilder =
+ newRowBuilder.withFieldValue(
+ outName, rowToProc.getValue(fieldRef.getIndex()));
+ } else {
+ newFieldBuilder =
+ newFieldBuilder.withFieldValue(
+ outName, rowToProc.getValue(fieldRef.getIndex()));
+ }
+ } else {
+ throw new UnsupportedOperationException(
+ "CEP operation is not recognized: " + opr.getClass().getName());
+ }
+ }
+ Row newRow;
+ if (newFieldBuilder == null) {
+ newRow = newRowBuilder.build();
+ } else {
+ newRow = newFieldBuilder.build();
+ }
+ out.output(newRow);
+ }
+ }
+ }
+ }
+
+ private static class SortPerKey extends DoFn<KV<Row, Iterable<Row>>, KV<Row, Iterable<Row>>> {
+
+ private final ArrayList<OrderKey> orderKeys;
+
+ public SortPerKey(ArrayList<OrderKey> orderKeys) {
+ this.orderKeys = orderKeys;
+ }
+
+ @ProcessElement
+ public void processElement(
+ @Element KV<Row, Iterable<Row>> keyRows, OutputReceiver<KV<Row, Iterable<Row>>> out) {
+ ArrayList<Row> rows = new ArrayList<>();
+ for (Row i : keyRows.getValue()) {
+ rows.add(i);
+ }
+
+ ArrayList<Integer> fIndexList = new ArrayList<>();
+ ArrayList<Boolean> dirList = new ArrayList<>();
+ ArrayList<Boolean> nullDirList = new ArrayList<>();
+
+ // reversely traverse the order key list
+ for (int i = (orderKeys.size() - 1); i >= 0; --i) {
+ OrderKey thisKey = orderKeys.get(i);
+ fIndexList.add(thisKey.getIndex());
+ dirList.add(thisKey.getDir());
+ nullDirList.add(thisKey.getNullFirst());
+ }
+
+ rows.sort(new BeamSortRel.BeamSqlRowComparator(fIndexList, dirList, nullDirList));
+
+ out.output(KV.of(keyRows.getKey(), rows));
+ }
+ }
+ }
+
+ private static class MapKeys extends DoFn<Row, KV<Row, Row>> {
+
+ private final Schema partitionKeySchema;
+
+ public MapKeys(Schema partitionKeySchema) {
+ this.partitionKeySchema = partitionKeySchema;
+ }
+
+ @ProcessElement
+ public void processElement(@Element Row eleRow, OutputReceiver<KV<Row, Row>> out) {
+ Row.Builder newRowBuilder = Row.withSchema(partitionKeySchema);
+
+ // no partition specified would result in empty row as keys for rows
+ for (Schema.Field i : partitionKeySchema.getFields()) {
+ String fieldName = i.getName();
+ newRowBuilder.addValue(eleRow.getValue(fieldName));
+ }
+ KV kvPair = KV.of(newRowBuilder.build(), eleRow);
+ out.output(kvPair);
+ }
+ }
+
+ @Override
+ public Match copy(
+ RelNode input,
+ RelDataType rowType,
+ RexNode pattern,
+ boolean strictStart,
+ boolean strictEnd,
+ Map<String, RexNode> patternDefinitions,
+ Map<String, RexNode> measures,
+ RexNode after,
+ Map<String, ? extends SortedSet<String>> subsets,
+ boolean allRows,
+ List<RexNode> partitionKeys,
+ RelCollation orderKeys,
+ RexNode interval) {
+
+ return new BeamMatchRel(
+ getCluster(),
+ getTraitSet(),
+ input,
+ rowType,
+ pattern,
+ strictStart,
+ strictEnd,
+ patternDefinitions,
+ measures,
+ after,
+ subsets,
+ allRows,
+ partitionKeys,
+ orderKeys,
+ interval);
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamMatchRule.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamMatchRule.java
new file mode 100644
index 0000000..6441c79
--- /dev/null
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rule/BeamMatchRule.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.rule;
+
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamLogicalConvention;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamMatchRel;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.Convention;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.RelNode;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.convert.ConverterRule;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.core.Match;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.rel.logical.LogicalMatch;
+
+/** {@code ConverterRule} to replace {@code Match} with {@code BeamMatchRel}. */
+public class BeamMatchRule extends ConverterRule {
+ public static final BeamMatchRule INSTANCE = new BeamMatchRule();
+
+ private BeamMatchRule() {
+ super(LogicalMatch.class, Convention.NONE, BeamLogicalConvention.INSTANCE, "BeamMatchRule");
+ }
+
+ @Override
+ public RelNode convert(RelNode rel) {
+ Match match = (Match) rel;
+ final RelNode input = match.getInput();
+ return new BeamMatchRel(
+ match.getCluster(),
+ match.getTraitSet().replace(BeamLogicalConvention.INSTANCE),
+ convert(input, input.getTraitSet().replace(BeamLogicalConvention.INSTANCE)),
+ match.getRowType(),
+ match.getPattern(),
+ match.isStrictStart(),
+ match.isStrictEnd(),
+ match.getPatternDefinitions(),
+ match.getMeasures(),
+ match.getAfter(),
+ match.getSubsets(),
+ match.isAllRows(),
+ match.getPartitionKeys(),
+ match.getOrderKeys(),
+ match.getInterval());
+ }
+}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java
index c0ab5eb..c2a03df 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java
@@ -151,10 +151,16 @@
case INT32:
return Integer.valueOf(raw);
case INT64:
+ if (raw.equals("")) {
+ return null;
+ }
return Long.valueOf(raw);
case FLOAT:
return Float.valueOf(raw);
case DOUBLE:
+ if (raw.equals("")) {
+ return null;
+ }
return Double.valueOf(raw);
default:
throw new UnsupportedOperationException(
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java
index acb4ee1..6ded492 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.sdk.extensions.sql.impl.utils;
+import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.util.Date;
import java.util.Map;
@@ -53,15 +54,6 @@
}
}
- /** A LogicalType corresponding to TIMESTAMP_WITH_LOCAL_TIME_ZONE. */
- public static class TimestampWithLocalTzType extends PassThroughLogicalType<Instant> {
- public static final String IDENTIFIER = "SqlTimestampWithLocalTzType";
-
- public TimestampWithLocalTzType() {
- super(IDENTIFIER, FieldType.STRING, "", FieldType.DATETIME);
- }
- }
-
/** A LogicalType corresponding to CHAR. */
public static class CharType extends PassThroughLogicalType<String> {
public static final String IDENTIFIER = "SqlCharType";
@@ -82,7 +74,7 @@
return logicalId.equals(SqlTypes.DATE.getIdentifier())
|| logicalId.equals(SqlTypes.TIME.getIdentifier())
|| logicalId.equals(TimeWithLocalTzType.IDENTIFIER)
- || logicalId.equals(TimestampWithLocalTzType.IDENTIFIER);
+ || logicalId.equals(SqlTypes.DATETIME.getIdentifier());
}
return false;
}
@@ -121,8 +113,9 @@
FieldType.logicalType(new TimeWithLocalTzType());
public static final FieldType TIMESTAMP = FieldType.DATETIME;
public static final FieldType NULLABLE_TIMESTAMP = FieldType.DATETIME.withNullable(true);
- public static final FieldType TIMESTAMP_WITH_LOCAL_TZ =
- FieldType.logicalType(new TimestampWithLocalTzType());
+ public static final FieldType TIMESTAMP_WITH_LOCAL_TZ = FieldType.logicalType(SqlTypes.DATETIME);
+ public static final FieldType NULLABLE_TIMESTAMP_WITH_LOCAL_TZ =
+ FieldType.logicalType(SqlTypes.DATETIME).withNullable(true);
private static final BiMap<FieldType, SqlTypeName> BEAM_TO_CALCITE_TYPE_MAPPING =
ImmutableBiMap.<FieldType, SqlTypeName>builder()
@@ -283,18 +276,26 @@
/**
* SQL-Java type mapping, with specified Beam rules: <br>
- * 1. redirect {@link AbstractInstant} to {@link Date} so Calcite can recognize it.
+ * 1. redirect {@link AbstractInstant} to {@link Date} so Calcite can recognize it. <br>
+ * 2. For a list, the component type is needed to create a Sql array type.
*
- * @param rawType
- * @return
+ * @param type
+ * @return Calcite RelDataType
*/
- public static RelDataType sqlTypeWithAutoCast(RelDataTypeFactory typeFactory, Type rawType) {
+ public static RelDataType sqlTypeWithAutoCast(RelDataTypeFactory typeFactory, Type type) {
// For Joda time types, return SQL type for java.util.Date.
- if (rawType instanceof Class && AbstractInstant.class.isAssignableFrom((Class<?>) rawType)) {
+ if (type instanceof Class && AbstractInstant.class.isAssignableFrom((Class<?>) type)) {
return typeFactory.createJavaType(Date.class);
- } else if (rawType instanceof Class && ByteString.class.isAssignableFrom((Class<?>) rawType)) {
+ } else if (type instanceof Class && ByteString.class.isAssignableFrom((Class<?>) type)) {
return typeFactory.createJavaType(byte[].class);
+ } else if (type instanceof ParameterizedType
+ && java.util.List.class.isAssignableFrom(
+ (Class<?>) ((ParameterizedType) type).getRawType())) {
+ ParameterizedType parameterizedType = (ParameterizedType) type;
+ Class<?> genericType = (Class<?>) parameterizedType.getActualTypeArguments()[0];
+ RelDataType collectionElementType = typeFactory.createJavaType(genericType);
+ return typeFactory.createArrayType(collectionElementType, UNLIMITED_ARRAY_SIZE);
}
- return typeFactory.createJavaType((Class) rawType);
+ return typeFactory.createJavaType((Class) type);
}
}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BeamSqlUnparseContext.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BeamSqlUnparseContext.java
index 7aa5032..f107dc3 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BeamSqlUnparseContext.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BeamSqlUnparseContext.java
@@ -38,6 +38,7 @@
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.type.SqlTypeName;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.BitString;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.TimestampString;
import org.checkerframework.checker.nullness.qual.Nullable;
public class BeamSqlUnparseContext extends SqlImplementor.SimpleContext {
@@ -69,8 +70,12 @@
public SqlNode toSql(RexProgram program, RexNode rex) {
if (rex.getKind().equals(SqlKind.LITERAL)) {
final RexLiteral literal = (RexLiteral) rex;
- SqlTypeFamily family = literal.getTypeName().getFamily();
- if (SqlTypeFamily.BINARY.equals(family)) {
+ SqlTypeName name = literal.getTypeName();
+ SqlTypeFamily family = name.getFamily();
+ if (SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE.equals(name)) {
+ TimestampString timestampString = literal.getValueAs(TimestampString.class);
+ return new SqlDateTimeLiteral(timestampString, POS);
+ } else if (SqlTypeFamily.BINARY.equals(family)) {
ByteString byteString = literal.getValueAs(ByteString.class);
BitString bitString = BitString.createFromHexString(byteString.toString(16));
return new SqlByteStringLiteral(bitString, POS);
@@ -92,6 +97,21 @@
return super.toSql(program, rex);
}
+ private static class SqlDateTimeLiteral extends SqlLiteral {
+
+ private final TimestampString timestampString;
+
+ SqlDateTimeLiteral(TimestampString timestampString, SqlParserPos pos) {
+ super(timestampString, SqlTypeName.TIMESTAMP, pos);
+ this.timestampString = timestampString;
+ }
+
+ @Override
+ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) {
+ writer.literal("DATETIME '" + timestampString.toString() + "'");
+ }
+ }
+
private static class SqlByteStringLiteral extends SqlLiteral {
SqlByteStringLiteral(BitString bytes, SqlParserPos pos) {
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java
index 1b1641f..0519798 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java
@@ -18,6 +18,7 @@
package org.apache.beam.sdk.extensions.sql;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.Arrays;
import java.util.HashMap;
@@ -368,22 +369,135 @@
}
@Test
- public void testNullDatetimeFields() {
+ public void testDatetimeFields() {
Instant current = new Instant(1561671380000L); // Long value corresponds to 27/06/2019
Schema dateTimeFieldSchema =
Schema.builder()
.addField("dateTimeField", FieldType.DATETIME)
.addNullableField("nullableDateTimeField", FieldType.DATETIME)
- .addField("timeTypeField", FieldType.logicalType(SqlTypes.TIME))
- .addNullableField("nullableTimeTypeField", FieldType.logicalType(SqlTypes.TIME))
+ .build();
+
+ Row dateTimeRow = Row.withSchema(dateTimeFieldSchema).addValues(current, null).build();
+
+ PCollection<Row> outputRow =
+ pipeline
+ .apply(Create.of(dateTimeRow))
+ .setRowSchema(dateTimeFieldSchema)
+ .apply(
+ SqlTransform.query(
+ "select EXTRACT(YEAR from dateTimeField) as yyyy, "
+ + " EXTRACT(YEAR from nullableDateTimeField) as year_with_null, "
+ + " EXTRACT(MONTH from dateTimeField) as mm, "
+ + " EXTRACT(MONTH from nullableDateTimeField) as month_with_null "
+ + " from PCOLLECTION"));
+
+ Schema outputRowSchema =
+ Schema.builder()
+ .addField("yyyy", FieldType.INT64)
+ .addNullableField("year_with_null", FieldType.INT64)
+ .addField("mm", FieldType.INT64)
+ .addNullableField("month_with_null", FieldType.INT64)
+ .build();
+
+ PAssert.that(outputRow)
+ .containsInAnyOrder(
+ Row.withSchema(outputRowSchema).addValues(2019L, null, 06L, null).build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
+ }
+
+ @Test
+ public void testSqlLogicalTypeDateFields() {
+ Schema dateTimeFieldSchema =
+ Schema.builder()
.addField("dateTypeField", FieldType.logicalType(SqlTypes.DATE))
.addNullableField("nullableDateTypeField", FieldType.logicalType(SqlTypes.DATE))
.build();
+ Row dateRow =
+ Row.withSchema(dateTimeFieldSchema).addValues(LocalDate.of(2019, 6, 27), null).build();
+
+ PCollection<Row> outputRow =
+ pipeline
+ .apply(Create.of(dateRow))
+ .setRowSchema(dateTimeFieldSchema)
+ .apply(
+ SqlTransform.query(
+ "select EXTRACT(DAY from dateTypeField) as dd, "
+ + " EXTRACT(DAY from nullableDateTypeField) as day_with_null, "
+ + " dateTypeField + interval '1' day as date_with_day_added, "
+ + " nullableDateTypeField + interval '1' day as day_added_with_null "
+ + " from PCOLLECTION"));
+
+ Schema outputRowSchema =
+ Schema.builder()
+ .addField("dd", FieldType.INT64)
+ .addNullableField("day_with_null", FieldType.INT64)
+ .addField("date_with_day_added", FieldType.logicalType(SqlTypes.DATE))
+ .addNullableField("day_added_with_null", FieldType.logicalType(SqlTypes.DATE))
+ .build();
+
+ PAssert.that(outputRow)
+ .containsInAnyOrder(
+ Row.withSchema(outputRowSchema)
+ .addValues(27L, null, LocalDate.of(2019, 6, 28), null)
+ .build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
+ }
+
+ @Test
+ public void testSqlLogicalTypeTimeFields() {
+ Schema dateTimeFieldSchema =
+ Schema.builder()
+ .addField("timeTypeField", FieldType.logicalType(SqlTypes.TIME))
+ .addNullableField("nullableTimeTypeField", FieldType.logicalType(SqlTypes.TIME))
+ .build();
+
+ Row timeRow =
+ Row.withSchema(dateTimeFieldSchema).addValues(LocalTime.of(1, 0, 0), null).build();
+
+ PCollection<Row> outputRow =
+ pipeline
+ .apply(Create.of(timeRow))
+ .setRowSchema(dateTimeFieldSchema)
+ .apply(
+ SqlTransform.query(
+ "select timeTypeField + interval '1' hour as time_with_hour_added, "
+ + " nullableTimeTypeField + interval '1' hour as hour_added_with_null, "
+ + " timeTypeField - INTERVAL '60' SECOND as time_with_seconds_added, "
+ + " nullableTimeTypeField - INTERVAL '60' SECOND as seconds_added_with_null "
+ + " from PCOLLECTION"));
+
+ Schema outputRowSchema =
+ Schema.builder()
+ .addField("time_with_hour_added", FieldType.logicalType(SqlTypes.TIME))
+ .addNullableField("hour_added_with_null", FieldType.logicalType(SqlTypes.TIME))
+ .addField("time_with_seconds_added", FieldType.logicalType(SqlTypes.TIME))
+ .addNullableField("seconds_added_with_null", FieldType.logicalType(SqlTypes.TIME))
+ .build();
+
+ PAssert.that(outputRow)
+ .containsInAnyOrder(
+ Row.withSchema(outputRowSchema)
+ .addValues(LocalTime.of(2, 0, 0), null, LocalTime.of(0, 59, 0), null)
+ .build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
+ }
+
+ @Test
+ public void testSqlLogicalTypeDatetimeFields() {
+ Schema dateTimeFieldSchema =
+ Schema.builder()
+ .addField("dateTimeField", FieldType.logicalType(SqlTypes.DATETIME))
+ .addNullableField("nullableDateTimeField", FieldType.logicalType(SqlTypes.DATETIME))
+ .build();
+
Row dateTimeRow =
Row.withSchema(dateTimeFieldSchema)
- .addValues(current, null, LocalTime.of(1, 0, 0), null, LocalDate.of(2019, 6, 27), null)
+ .addValues(LocalDateTime.of(2008, 12, 25, 15, 30, 0), null)
.build();
PCollection<Row> outputRow =
@@ -396,14 +510,14 @@
+ " EXTRACT(YEAR from nullableDateTimeField) as year_with_null, "
+ " EXTRACT(MONTH from dateTimeField) as mm, "
+ " EXTRACT(MONTH from nullableDateTimeField) as month_with_null, "
- + " timeTypeField + interval '1' hour as time_with_hour_added, "
- + " nullableTimeTypeField + interval '1' hour as hour_added_with_null, "
- + " timeTypeField - INTERVAL '60' SECOND as time_with_seconds_added, "
- + " nullableTimeTypeField - INTERVAL '60' SECOND as seconds_added_with_null, "
- + " EXTRACT(DAY from dateTypeField) as dd, "
- + " EXTRACT(DAY from nullableDateTypeField) as day_with_null, "
- + " dateTypeField + interval '1' day as date_with_day_added, "
- + " nullableDateTypeField + interval '1' day as day_added_with_null "
+ + " dateTimeField + interval '1' hour as time_with_hour_added, "
+ + " nullableDateTimeField + interval '1' hour as hour_added_with_null, "
+ + " dateTimeField - INTERVAL '60' SECOND as time_with_seconds_added, "
+ + " nullableDateTimeField - INTERVAL '60' SECOND as seconds_added_with_null, "
+ + " EXTRACT(DAY from dateTimeField) as dd, "
+ + " EXTRACT(DAY from nullableDateTimeField) as day_with_null, "
+ + " dateTimeField + interval '1' day as date_with_day_added, "
+ + " nullableDateTimeField + interval '1' day as day_added_with_null "
+ " from PCOLLECTION"));
Schema outputRowSchema =
@@ -412,31 +526,31 @@
.addNullableField("year_with_null", FieldType.INT64)
.addField("mm", FieldType.INT64)
.addNullableField("month_with_null", FieldType.INT64)
- .addField("time_with_hour_added", FieldType.logicalType(SqlTypes.TIME))
- .addNullableField("hour_added_with_null", FieldType.logicalType(SqlTypes.TIME))
- .addField("time_with_seconds_added", FieldType.logicalType(SqlTypes.TIME))
- .addNullableField("seconds_added_with_null", FieldType.logicalType(SqlTypes.TIME))
+ .addField("time_with_hour_added", FieldType.logicalType(SqlTypes.DATETIME))
+ .addNullableField("hour_added_with_null", FieldType.logicalType(SqlTypes.DATETIME))
+ .addField("time_with_seconds_added", FieldType.logicalType(SqlTypes.DATETIME))
+ .addNullableField("seconds_added_with_null", FieldType.logicalType(SqlTypes.DATETIME))
.addField("dd", FieldType.INT64)
.addNullableField("day_with_null", FieldType.INT64)
- .addField("date_with_day_added", FieldType.logicalType(SqlTypes.DATE))
- .addNullableField("day_added_with_null", FieldType.logicalType(SqlTypes.DATE))
+ .addField("date_with_day_added", FieldType.logicalType(SqlTypes.DATETIME))
+ .addNullableField("day_added_with_null", FieldType.logicalType(SqlTypes.DATETIME))
.build();
PAssert.that(outputRow)
.containsInAnyOrder(
Row.withSchema(outputRowSchema)
.addValues(
- 2019L,
+ 2008L,
null,
- 06L,
+ 12L,
null,
- LocalTime.of(2, 0, 0),
+ LocalDateTime.of(2008, 12, 25, 16, 30, 0),
null,
- LocalTime.of(0, 59, 0),
+ LocalDateTime.of(2008, 12, 25, 15, 29, 0),
null,
- 27L,
+ 25L,
null,
- LocalDate.of(2019, 6, 28),
+ LocalDateTime.of(2008, 12, 26, 15, 30, 0),
null)
.build());
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslUdfUdafTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslUdfUdafTest.java
index 75e8a08..c2afc5d 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslUdfUdafTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslUdfUdafTest.java
@@ -23,6 +23,7 @@
import com.google.auto.service.AutoService;
import java.sql.Timestamp;
+import java.util.Arrays;
import java.util.Map;
import java.util.stream.IntStream;
import org.apache.beam.sdk.extensions.sql.impl.BeamCalciteTable;
@@ -30,6 +31,7 @@
import org.apache.beam.sdk.extensions.sql.meta.provider.UdfUdafProvider;
import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestBoundedTable;
import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.transforms.Combine.CombineFn;
import org.apache.beam.sdk.transforms.SerializableFunction;
@@ -101,6 +103,29 @@
pipeline.run().waitUntilFinish();
}
+ @Test
+ public void testListUdf() throws Exception {
+ Schema resultType1 = Schema.builder().addArrayField("array_field", FieldType.INT64).build();
+ Row row1 = Row.withSchema(resultType1).addValue(Arrays.asList(1L)).build();
+ String sql1 = "SELECT test_array(1)";
+ PCollection<Row> result1 =
+ boundedInput1.apply(
+ "testArrayUdf",
+ SqlTransform.query(sql1).registerUdf("test_array", TestReturnTypeList.class));
+ PAssert.that(result1).containsInAnyOrder(row1);
+
+ Schema resultType2 = Schema.builder().addInt32Field("int_field").build();
+ Row row2 = Row.withSchema(resultType2).addValue(3).build();
+ String sql2 = "select array_length(ARRAY[1, 2, 3])";
+ PCollection<Row> result2 =
+ boundedInput1.apply(
+ "testArrayUdf2",
+ SqlTransform.query(sql2).registerUdf("array_length", TestListLength.class));
+ PAssert.that(result2).containsInAnyOrder(row2);
+
+ pipeline.run().waitUntilFinish();
+ }
+
/** Test that an indirect subclass of a {@link CombineFn} works as a UDAF. BEAM-3777 */
@Test
public void testUdafMultiLevelDescendent() {
@@ -347,6 +372,20 @@
}
}
+ /** A UDF to test support of array as return type. */
+ public static final class TestReturnTypeList implements BeamSqlUdf {
+ public static java.util.List<Long> eval(Long i) {
+ return Arrays.asList(i);
+ }
+ }
+
+ /** A UDF to test support of array as argument type. */
+ public static final class TestListLength implements BeamSqlUdf {
+ public static Integer eval(java.util.List<Long> i) {
+ return i.size();
+ }
+ }
+
/**
* UDF to test support for {@link
* org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.schema.TableMacro}.
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamMatchRelTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamMatchRelTest.java
new file mode 100644
index 0000000..097ca19
--- /dev/null
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamMatchRelTest.java
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.impl.rel;
+
+import static org.apache.beam.sdk.extensions.sql.impl.rel.BaseRelTest.compilePipeline;
+import static org.apache.beam.sdk.extensions.sql.impl.rel.BaseRelTest.registerTable;
+
+import org.apache.beam.sdk.extensions.sql.TestUtils;
+import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestBoundedTable;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+
+/** Test for {@code BeamMatchRel}. */
+public class BeamMatchRelTest {
+
+ @Rule public final TestPipeline pipeline = TestPipeline.create();
+
+ @Test
+ public void matchLogicalPlanTest() {
+ Schema schemaType =
+ Schema.builder()
+ .addInt32Field("id")
+ .addStringField("name")
+ .addInt32Field("proctime")
+ .build();
+
+ registerTable(
+ "TestTable", TestBoundedTable.of(schemaType).addRows(1, "a", 1, 1, "b", 2, 1, "c", 3));
+
+ String sql =
+ "SELECT * "
+ + "FROM TestTable "
+ + "MATCH_RECOGNIZE ("
+ + "PARTITION BY id "
+ + "ORDER BY proctime "
+ + "ALL ROWS PER MATCH "
+ + "PATTERN (A B C) "
+ + "DEFINE "
+ + "A AS name = 'a', "
+ + "B AS name = 'b', "
+ + "C AS name = 'c' "
+ + ") AS T";
+
+ PCollection<Row> result = compilePipeline(sql, pipeline);
+
+ PAssert.that(result)
+ .containsInAnyOrder(
+ TestUtils.RowsBuilder.of(
+ Schema.FieldType.INT32, "id",
+ Schema.FieldType.STRING, "name",
+ Schema.FieldType.INT32, "proctime")
+ .addRows(1, "a", 1, 1, "b", 2, 1, "c", 3)
+ .getRows());
+
+ pipeline.run().waitUntilFinish();
+ }
+
+ @Test
+ public void matchQuantifierTest() {
+ Schema schemaType =
+ Schema.builder()
+ .addInt32Field("id")
+ .addStringField("name")
+ .addInt32Field("proctime")
+ .build();
+
+ registerTable(
+ "TestTable",
+ TestBoundedTable.of(schemaType).addRows(1, "a", 1, 1, "a", 2, 1, "b", 3, 1, "c", 4));
+
+ String sql =
+ "SELECT * "
+ + "FROM TestTable "
+ + "MATCH_RECOGNIZE ("
+ + "PARTITION BY id "
+ + "ORDER BY proctime "
+ + "ALL ROWS PER MATCH "
+ + "PATTERN (A+ B C) "
+ + "DEFINE "
+ + "A AS name = 'a', "
+ + "B AS name = 'b', "
+ + "C AS name = 'c' "
+ + ") AS T";
+
+ PCollection<Row> result = compilePipeline(sql, pipeline);
+
+ PAssert.that(result)
+ .containsInAnyOrder(
+ TestUtils.RowsBuilder.of(
+ Schema.FieldType.INT32, "id",
+ Schema.FieldType.STRING, "name",
+ Schema.FieldType.INT32, "proctime")
+ .addRows(1, "a", 1, 1, "a", 2, 1, "b", 3, 1, "c", 4)
+ .getRows());
+
+ pipeline.run().waitUntilFinish();
+ }
+
+ @Test
+ public void matchMeasuresTest() {
+ Schema schemaType =
+ Schema.builder()
+ .addInt32Field("id")
+ .addStringField("name")
+ .addInt32Field("proctime")
+ .build();
+
+ registerTable(
+ "TestTable",
+ TestBoundedTable.of(schemaType)
+ .addRows(
+ 1, "a", 1, 1, "a", 2, 1, "b", 3, 1, "c", 4, 1, "b", 8, 1, "a", 7, 1, "c", 9, 2, "a",
+ 6, 2, "b", 10, 2, "c", 11, 5, "a", 0));
+
+ String sql =
+ "SELECT * "
+ + "FROM TestTable "
+ + "MATCH_RECOGNIZE ("
+ + "PARTITION BY id "
+ + "ORDER BY proctime "
+ + "MEASURES "
+ + "LAST (A.proctime) AS atime, "
+ + "B.proctime AS btime, "
+ + "C.proctime AS ctime "
+ + "PATTERN (A+ B C) "
+ + "DEFINE "
+ + "A AS name = 'a', "
+ + "B AS name = 'b', "
+ + "C AS name = 'c' "
+ + ") AS T";
+
+ PCollection<Row> result = compilePipeline(sql, pipeline);
+
+ PAssert.that(result)
+ .containsInAnyOrder(
+ TestUtils.RowsBuilder.of(
+ Schema.FieldType.INT32, "id",
+ Schema.FieldType.INT32, "T.atime",
+ Schema.FieldType.INT32, "T.btime",
+ Schema.FieldType.INT32, "T.ctime")
+ .addRows(1, 2, 3, 4, 1, 7, 8, 9, 2, 6, 10, 11)
+ .getRows());
+
+ pipeline.run().waitUntilFinish();
+ }
+
+ @Ignore("NFA has not been implemented for now.")
+ @Test
+ public void matchNFATest() {
+ Schema schemaType =
+ Schema.builder()
+ .addStringField("Symbol")
+ .addDateTimeField("TradeDay")
+ .addInt32Field("Price")
+ .build();
+
+ registerTable(
+ "Ticker",
+ TestBoundedTable.of(schemaType)
+ .addRows(
+ "a",
+ "2020-07-01",
+ 32, // 1st A
+ "a",
+ "2020-06-01",
+ 34,
+ "a",
+ "2020-07-02",
+ 31, // B
+ "a",
+ "2020-08-30",
+ 30, // B
+ "a",
+ "2020-08-31",
+ 35, // C
+ "a",
+ "2020-10-01",
+ 28,
+ "a",
+ "2020-10-15",
+ 30, // 2nd A
+ "a",
+ "2020-11-01",
+ 22, // B
+ "a",
+ "2020-11-08",
+ 29, // C
+ "a",
+ "2020-12-10",
+ 30, // C
+ "b",
+ "2020-12-01",
+ 22,
+ "c",
+ "2020-05-16",
+ 27, // A
+ "c",
+ "2020-09-14",
+ 26, // B
+ "c",
+ "2020-10-13",
+ 30)); // C
+
+ // match `V` shapes in prices
+ String sql =
+ "SELECT M.Symbol,"
+ + " M.Matchno,"
+ + " M.Startp,"
+ + " M.Bottomp,"
+ + " M.Endp,"
+ + " M.Avgp"
+ + "FROM Ticker "
+ + "MATCH_RECOGNIZE ("
+ + "PARTITION BY Symbol "
+ + "ORDER BY Tradeday "
+ + "MEASURES "
+ + "MATCH_NUMBER() AS Matchno, "
+ + "A.price AS Startp, "
+ + "LAST (B.Price) AS Bottomp, "
+ + "LAST (C.Price) AS ENDp, "
+ + "AVG (U.Price) AS Avgp "
+ + "AFTER MATCH SKIP PAST LAST ROW "
+ + "PATTERN (A B+ C+) "
+ + "SUBSET U = (A, B, C) "
+ + "DEFINE "
+ + "B AS B.Price < PREV (B.Price), "
+ + "C AS C.Price > PREV (C.Price) "
+ + ") AS T";
+
+ PCollection<Row> result = compilePipeline(sql, pipeline);
+
+ PAssert.that(result)
+ .containsInAnyOrder(
+ TestUtils.RowsBuilder.of(
+ Schema.FieldType.INT32, "id",
+ Schema.FieldType.STRING, "name",
+ Schema.FieldType.INT32, "proctime")
+ .addRows(1, "a", 1, 1, "b", 2, 1, "c", 3)
+ .getRows());
+
+ pipeline.run().waitUntilFinish();
+ }
+}
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamSqlRowCoderTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamSqlRowCoderTest.java
index f0854bc..d4819fc 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamSqlRowCoderTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamSqlRowCoderTest.java
@@ -19,6 +19,7 @@
import java.math.BigDecimal;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
@@ -51,6 +52,7 @@
.add("col_string_varchar", SqlTypeName.VARCHAR)
.add("col_time", SqlTypeName.TIME)
.add("col_date", SqlTypeName.DATE)
+ .add("col_timestamp_with_local_time_zone", SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE)
.add("col_timestamp", SqlTypeName.TIMESTAMP)
.add("col_boolean", SqlTypeName.BOOLEAN)
.build();
@@ -70,6 +72,7 @@
"hello",
LocalTime.now(),
LocalDate.now(),
+ LocalDateTime.now(),
DateTime.now().toInstant(),
true)
.build();
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/DateTimeUtils.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/DateTimeUtils.java
index cc25380..5186099 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/DateTimeUtils.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/DateTimeUtils.java
@@ -20,11 +20,13 @@
import com.google.zetasql.CivilTimeEncoder;
import com.google.zetasql.Value;
import io.grpc.Status;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.List;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.avatica.util.TimeUnit;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.DateString;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.TimeString;
+import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.util.TimestampString;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Splitter;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
@@ -161,10 +163,17 @@
}
}
- public static TimeString convertTimeValueToTimeString(Value value) {
- LocalTime localTime = CivilTimeEncoder.decodePacked64TimeNanosAsJavaTime(value.getTimeValue());
- return new TimeString(localTime.getHour(), localTime.getMinute(), localTime.getSecond())
- .withNanos(localTime.getNano());
+ public static TimestampString convertDateTimeValueToTimeStampString(Value value) {
+ LocalDateTime dateTime =
+ CivilTimeEncoder.decodePacked96DatetimeNanosAsJavaTime(value.getDatetimeValue());
+ return new TimestampString(
+ dateTime.getYear(),
+ dateTime.getMonthValue(),
+ dateTime.getDayOfMonth(),
+ dateTime.getHour(),
+ dateTime.getMinute(),
+ dateTime.getSecond())
+ .withNanos(dateTime.getNano());
}
// dates are represented as an int32 value, indicating the offset
@@ -174,6 +183,12 @@
return DateString.fromDaysSinceEpoch(value.getDateValue());
}
+ public static TimeString convertTimeValueToTimeString(Value value) {
+ LocalTime localTime = CivilTimeEncoder.decodePacked64TimeNanosAsJavaTime(value.getTimeValue());
+ return new TimeString(localTime.getHour(), localTime.getMinute(), localTime.getSecond())
+ .withNanos(localTime.getNano());
+ }
+
public static Value parseDateToValue(String dateString) {
DateTime dateTime = parseDate(dateString);
return Value.createDateValue((int) (dateTime.getMillis() / MILLIS_PER_DAY));
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java
index 6ccfb26..eb8d51f 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java
@@ -197,23 +197,23 @@
// Time functions
FunctionSignatureId.FN_CURRENT_DATE, // current_date
- // FunctionSignatureId.FN_CURRENT_DATETIME, // current_datetime
+ FunctionSignatureId.FN_CURRENT_DATETIME, // current_datetime
FunctionSignatureId.FN_CURRENT_TIME, // current_time
FunctionSignatureId.FN_CURRENT_TIMESTAMP, // current_timestamp
FunctionSignatureId.FN_DATE_ADD_DATE, // date_add
- // FunctionSignatureId.FN_DATETIME_ADD, // datetime_add
+ FunctionSignatureId.FN_DATETIME_ADD, // datetime_add
FunctionSignatureId.FN_TIME_ADD, // time_add
FunctionSignatureId.FN_TIMESTAMP_ADD, // timestamp_add
FunctionSignatureId.FN_DATE_DIFF_DATE, // date_diff
- // FunctionSignatureId.FN_DATETIME_DIFF, // datetime_diff
+ FunctionSignatureId.FN_DATETIME_DIFF, // datetime_diff
FunctionSignatureId.FN_TIME_DIFF, // time_diff
FunctionSignatureId.FN_TIMESTAMP_DIFF, // timestamp_diff
FunctionSignatureId.FN_DATE_SUB_DATE, // date_sub
- // FunctionSignatureId.FN_DATETIME_SUB, // datetime_sub
+ FunctionSignatureId.FN_DATETIME_SUB, // datetime_sub
FunctionSignatureId.FN_TIME_SUB, // time_sub
FunctionSignatureId.FN_TIMESTAMP_SUB, // timestamp_sub
FunctionSignatureId.FN_DATE_TRUNC_DATE, // date_trunc
- // FunctionSignatureId.FN_DATETIME_TRUNC, // datetime_trunc
+ FunctionSignatureId.FN_DATETIME_TRUNC, // datetime_trunc
FunctionSignatureId.FN_TIME_TRUNC, // time_trunc
FunctionSignatureId.FN_TIMESTAMP_TRUNC, // timestamp_trunc
FunctionSignatureId.FN_DATE_FROM_UNIX_DATE, // date_from_unix_date
@@ -234,19 +234,18 @@
FunctionSignatureId.FN_UNIX_MILLIS_FROM_TIMESTAMP,
// FunctionSignatureId.FN_UNIX_MICROS_FROM_TIMESTAMP,
FunctionSignatureId.FN_DATE_FROM_TIMESTAMP, // date
- // FunctionSignatureId.FN_DATE_FROM_DATETIME, // date
+ FunctionSignatureId.FN_DATE_FROM_DATETIME, // date
FunctionSignatureId.FN_DATE_FROM_YEAR_MONTH_DAY, // date
FunctionSignatureId.FN_TIMESTAMP_FROM_STRING, // timestamp
FunctionSignatureId.FN_TIMESTAMP_FROM_DATE, // timestamp
- // FunctionSignatureId.FN_TIMESTAMP_FROM_DATETIME, // timestamp
+ FunctionSignatureId.FN_TIMESTAMP_FROM_DATETIME, // timestamp
FunctionSignatureId.FN_TIME_FROM_HOUR_MINUTE_SECOND, // time
FunctionSignatureId.FN_TIME_FROM_TIMESTAMP, // time
- // FunctionSignatureId.FN_TIME_FROM_DATETIME, // time
- // FunctionSignatureId.FN_DATETIME_FROM_DATE_AND_TIME, // datetime
- // FunctionSignatureId.FN_DATETIME_FROM_YEAR_MONTH_DAY_HOUR_MINUTE_SECOND, // datetime
- // FunctionSignatureId.FN_DATETIME_FROM_TIMESTAMP, // datetime
- // FunctionSignatureId.FN_DATETIME_FROM_DATE, // datetime
-
+ FunctionSignatureId.FN_TIME_FROM_DATETIME, // time
+ FunctionSignatureId.FN_DATETIME_FROM_DATE_AND_TIME, // datetime
+ FunctionSignatureId.FN_DATETIME_FROM_YEAR_MONTH_DAY_HOUR_MINUTE_SECOND, // datetime
+ FunctionSignatureId.FN_DATETIME_FROM_TIMESTAMP, // datetime
+ FunctionSignatureId.FN_DATETIME_FROM_DATE, // datetime
FunctionSignatureId.FN_STRING_FROM_TIMESTAMP, // string
// Signatures for extracting date parts, taking a date/timestamp
@@ -258,23 +257,24 @@
// Signatures specific to extracting the DATE date part from a DATETIME or a
// TIMESTAMP.
- // FunctionSignatureId.FN_EXTRACT_DATE_FROM_DATETIME, // $extract_date
+ FunctionSignatureId.FN_EXTRACT_DATE_FROM_DATETIME, // $extract_date
FunctionSignatureId.FN_EXTRACT_DATE_FROM_TIMESTAMP, // $extract_date
// Signatures specific to extracting the TIME date part from a DATETIME or a
// TIMESTAMP.
- // FunctionSignatureId.FN_EXTRACT_TIME_FROM_DATETIME, // $extract_time
+ FunctionSignatureId.FN_EXTRACT_TIME_FROM_DATETIME, // $extract_time
FunctionSignatureId.FN_EXTRACT_TIME_FROM_TIMESTAMP, // $extract_time
// Signature specific to extracting the DATETIME date part from a TIMESTAMP.
- // FunctionSignatureId.FN_EXTRACT_DATETIME_FROM_TIMESTAMP, // $extract_datetime
+ FunctionSignatureId.FN_EXTRACT_DATETIME_FROM_TIMESTAMP, // $extract_datetime
+ // Signature for formatting and parsing
FunctionSignatureId.FN_FORMAT_DATE, // format_date
- // FunctionSignatureId.FN_FORMAT_DATETIME, // format_datetime
+ FunctionSignatureId.FN_FORMAT_DATETIME, // format_datetime
FunctionSignatureId.FN_FORMAT_TIME, // format_time
FunctionSignatureId.FN_FORMAT_TIMESTAMP, // format_timestamp
FunctionSignatureId.FN_PARSE_DATE, // parse_date
- // FunctionSignatureId.FN_PARSE_DATETIME, // parse_datetime
+ FunctionSignatureId.FN_PARSE_DATETIME, // parse_datetime
FunctionSignatureId.FN_PARSE_TIME, // parse_time
FunctionSignatureId.FN_PARSE_TIMESTAMP, // parse_timestamp
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtils.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtils.java
index 073aa41..dbab34a 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtils.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtils.java
@@ -28,6 +28,7 @@
import com.google.zetasql.ZetaSQLType.TypeKind;
import java.math.BigDecimal;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.ArrayList;
import java.util.List;
@@ -36,6 +37,7 @@
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.Field;
import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.DateTime;
import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.math.LongMath;
@@ -45,7 +47,6 @@
* Utility methods for ZetaSQL <=> Beam translation.
*
* <p>Unsupported ZetaSQL types: INT32, UINT32, UINT64, FLOAT, ENUM, PROTO, GEOGRAPHY
- * TODO[BEAM-10238]: support ZetaSQL types: TIME, DATETIME, NUMERIC
*/
@Internal
public final class ZetaSqlBeamTranslationUtils {
@@ -106,6 +107,9 @@
} else if (SqlTypes.TIME.getIdentifier().equals(identifier)) {
// Time type
return TypeFactory.createSimpleType(TypeKind.TYPE_TIME);
+ } else if (SqlTypes.DATETIME.getIdentifier().equals(identifier)) {
+ // DateTime type
+ return TypeFactory.createSimpleType(TypeKind.TYPE_DATETIME);
} else {
throw new UnsupportedOperationException("Unknown Beam logical type: " + identifier);
}
@@ -184,6 +188,20 @@
} else { // input type
return Value.createTimeValue(CivilTimeEncoder.encodePacked64TimeNanos((LocalTime) object));
}
+ } else if (SqlTypes.DATETIME.getIdentifier().equals(identifier)) {
+ // DateTime value
+ LocalDateTime datetime;
+ if (object instanceof Row) { // base type
+ datetime =
+ LocalDateTime.of(
+ LocalDate.ofEpochDay(((Row) object).getInt64(DateTime.DATE_FIELD_NAME)),
+ LocalTime.ofNanoOfDay(((Row) object).getInt64(DateTime.TIME_FIELD_NAME)));
+ } else { // input type
+ datetime = (LocalDateTime) object;
+ }
+ // TODO[BEAM-10611]: Create ZetaSQL Value.createDatetimeValue(LocalDateTime) function
+ return Value.createDatetimeValue(
+ CivilTimeEncoder.encodePacked64DatetimeSeconds(datetime), datetime.getNano());
} else {
throw new UnsupportedOperationException("Unknown Beam logical type: " + identifier);
}
@@ -208,6 +226,8 @@
return FieldType.logicalType(SqlTypes.DATE).withNullable(true);
case TYPE_TIME:
return FieldType.logicalType(SqlTypes.TIME).withNullable(true);
+ case TYPE_DATETIME:
+ return FieldType.logicalType(SqlTypes.DATETIME).withNullable(true);
case TYPE_TIMESTAMP:
return FieldType.DATETIME.withNullable(true);
case TYPE_ARRAY:
@@ -314,6 +334,9 @@
} else if (SqlTypes.TIME.getIdentifier().equals(identifier)) {
// Time value
return CivilTimeEncoder.decodePacked64TimeNanosAsJavaTime(value.getTimeValue());
+ } else if (SqlTypes.DATETIME.getIdentifier().equals(identifier)) {
+ // DateTime value
+ return CivilTimeEncoder.decodePacked96DatetimeNanosAsJavaTime(value.getDatetimeValue());
} else {
throw new UnsupportedOperationException("Unknown Beam logical type: " + identifier);
}
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java
index d8394ab..81bc142 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java
@@ -20,6 +20,7 @@
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_BOOL;
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_BYTES;
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_DATE;
+import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_DATETIME;
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_DOUBLE;
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_INT64;
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_NUMERIC;
@@ -46,7 +47,6 @@
* Utility methods for ZetaSQL <=> Calcite translation.
*
* <p>Unsupported ZetaSQL types: INT32, UINT32, UINT64, FLOAT, ENUM, PROTO, GEOGRAPHY
- * TODO[BEAM-10238]: support ZetaSQL types: TIME, DATETIME, NUMERIC
*/
@Internal
public final class ZetaSqlCalciteTranslationUtils {
@@ -72,6 +72,8 @@
return TypeFactory.createSimpleType(TYPE_DATE);
case TIME:
return TypeFactory.createSimpleType(TYPE_TIME);
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ return TypeFactory.createSimpleType(TYPE_DATETIME);
case TIMESTAMP:
return TypeFactory.createSimpleType(TYPE_TIMESTAMP);
case ARRAY:
@@ -107,6 +109,8 @@
return SqlTypeName.DATE;
case TYPE_TIME:
return SqlTypeName.TIME;
+ case TYPE_DATETIME:
+ return SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE;
case TYPE_TIMESTAMP:
// TODO: handle timestamp with time zone.
return SqlTypeName.TIMESTAMP;
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java
index 19f18cd..fd5651f 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/ExpressionConverter.java
@@ -24,6 +24,7 @@
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_INT64;
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_STRING;
import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_TIMESTAMP;
+import static org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.convertDateTimeValueToTimeStampString;
import static org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.convertDateValueToDateString;
import static org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.convertTimeValueToTimeString;
import static org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.safeMicrosToMillis;
@@ -545,7 +546,7 @@
case TYPE_TIMESTAMP:
case TYPE_DATE:
case TYPE_TIME:
- // case TYPE_DATETIME:
+ case TYPE_DATETIME:
case TYPE_BYTES:
case TYPE_ARRAY:
case TYPE_STRUCT:
@@ -709,7 +710,7 @@
case TYPE_TIMESTAMP:
case TYPE_DATE:
case TYPE_TIME:
- // case TYPE_DATETIME:
+ case TYPE_DATETIME:
case TYPE_BYTES:
ret = convertSimpleValueToRexNode(type.getKind(), value);
break;
@@ -792,9 +793,7 @@
rexBuilder()
.makeCall(
SqlOperators.createZetaSqlFunction(wrapperFun, returnType.getSqlTypeName()),
- ImmutableList.of(
- rexBuilder()
- .makeApproxLiteral(new BigDecimal(Math.random()), returnType)));
+ rexBuilder().makeApproxLiteral(new BigDecimal(Math.random()), returnType));
;
} else {
ret =
@@ -823,12 +822,11 @@
SqlOperators.createZetaSqlFunction(
BeamBigQuerySqlDialect.NUMERIC_LITERAL_FUNCTION,
ZetaSqlCalciteTranslationUtils.toCalciteTypeName(kind)),
- ImmutableList.of(
- rexBuilder()
- .makeExactLiteral(
- value.getNumericValue(),
- ZetaSqlCalciteTranslationUtils.toSimpleRelDataType(
- kind, rexBuilder()))));
+ rexBuilder()
+ .makeExactLiteral(
+ value.getNumericValue(),
+ ZetaSqlCalciteTranslationUtils.toSimpleRelDataType(
+ kind, rexBuilder())));
break;
case TYPE_TIMESTAMP:
ret =
@@ -850,6 +848,15 @@
// TODO: Doing micro to mills truncation, need to throw exception.
ret = rexBuilder().makeLiteral(convertTimeValueToTimeString(value), timeType, false);
break;
+ case TYPE_DATETIME:
+ ret =
+ rexBuilder()
+ .makeTimestampWithLocalTimeZoneLiteral(
+ convertDateTimeValueToTimeStampString(value),
+ typeFactory()
+ .getTypeSystem()
+ .getMaxPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE));
+ break;
case TYPE_BYTES:
ret = rexBuilder().makeBinaryLiteral(new ByteString(value.getBytesValue().toByteArray()));
break;
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/TableScanConverter.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/TableScanConverter.java
index 25f40ed..9137b94 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/TableScanConverter.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/translation/TableScanConverter.java
@@ -17,17 +17,13 @@
*/
package org.apache.beam.sdk.extensions.sql.zetasql.translation;
-import static com.google.zetasql.ZetaSQLType.TypeKind.TYPE_DATETIME;
import static org.apache.beam.vendor.calcite.v1_20_0.com.google.common.base.Preconditions.checkNotNull;
-import com.google.zetasql.ZetaSQLType.TypeKind;
-import com.google.zetasql.resolvedast.ResolvedColumn;
import com.google.zetasql.resolvedast.ResolvedNodes.ResolvedTableScan;
import java.util.List;
import java.util.Properties;
import org.apache.beam.sdk.extensions.sql.zetasql.TableResolution;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableList;
-import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableSet;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.config.CalciteConnectionConfigImpl;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.jdbc.CalciteSchema;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.plan.RelOptCluster;
@@ -44,16 +40,12 @@
/** Converts table scan. */
class TableScanConverter extends RelConverter<ResolvedTableScan> {
- private static final ImmutableSet<TypeKind> UNSUPPORTED_DATA_TYPES =
- ImmutableSet.of(TYPE_DATETIME);
-
TableScanConverter(ConversionContext context) {
super(context);
}
@Override
public RelNode convert(ResolvedTableScan zetaNode, List<RelNode> inputs) {
- checkTableScanSchema(zetaNode.getColumnList());
List<String> tablePath = getTablePath(zetaNode.getTable());
@@ -115,15 +107,4 @@
}
};
}
-
- private void checkTableScanSchema(List<ResolvedColumn> columnList) {
- if (columnList != null) {
- for (ResolvedColumn resolvedColumn : columnList) {
- if (UNSUPPORTED_DATA_TYPES.contains(resolvedColumn.getType().getKind())) {
- throw new UnsupportedOperationException(
- "Does not support " + UNSUPPORTED_DATA_TYPES + " types in source tables");
- }
- }
- }
- }
}
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java
index a761d9f..2edb4d0 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java
@@ -21,6 +21,7 @@
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.Arrays;
import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestBoundedTable;
@@ -225,47 +226,59 @@
public static final TestBoundedTable TABLE_EMPTY =
TestBoundedTable.of(Schema.builder().addInt64Field("ColId").addStringField("Value").build());
- private static final Schema TABLE_WTH_MAP_SCHEMA =
+ private static final Schema TABLE_WITH_MAP_SCHEMA =
Schema.builder()
.addMapField("map_field", FieldType.STRING, FieldType.STRING)
.addRowField("row_field", structSchema)
.build();
public static final TestBoundedTable TABLE_WITH_MAP =
- TestBoundedTable.of(TABLE_WTH_MAP_SCHEMA)
+ TestBoundedTable.of(TABLE_WITH_MAP_SCHEMA)
.addRows(
ImmutableMap.of("MAP_KEY_1", "MAP_VALUE_1"),
Row.withSchema(structSchema).addValues(1L, "data1").build());
- private static final Schema TABLE_WTH_DATE_SCHEMA =
+ private static final Schema TABLE_WITH_DATE_SCHEMA =
Schema.builder()
.addLogicalTypeField("date_field", SqlTypes.DATE)
.addStringField("str_field")
.build();
public static final TestBoundedTable TABLE_WITH_DATE =
- TestBoundedTable.of(TABLE_WTH_DATE_SCHEMA)
+ TestBoundedTable.of(TABLE_WITH_DATE_SCHEMA)
.addRows(LocalDate.of(2008, 12, 25), "s")
.addRows(LocalDate.of(2020, 4, 7), "s");
- private static final Schema TABLE_WTH_TIME_SCHEMA =
+ private static final Schema TABLE_WITH_TIME_SCHEMA =
Schema.builder()
.addLogicalTypeField("time_field", SqlTypes.TIME)
.addStringField("str_field")
.build();
public static final TestBoundedTable TABLE_WITH_TIME =
- TestBoundedTable.of(TABLE_WTH_TIME_SCHEMA)
+ TestBoundedTable.of(TABLE_WITH_TIME_SCHEMA)
.addRows(LocalTime.of(15, 30, 0), "s")
.addRows(LocalTime.of(23, 35, 59), "s");
- private static final Schema TABLE_WTH_NUMERIC_SCHEMA =
+ private static final Schema TABLE_WITH_NUMERIC_SCHEMA =
Schema.builder().addDecimalField("numeric_field").addStringField("str_field").build();
+
public static final TestBoundedTable TABLE_WITH_NUMERIC =
- TestBoundedTable.of(TABLE_WTH_NUMERIC_SCHEMA)
+ TestBoundedTable.of(TABLE_WITH_NUMERIC_SCHEMA)
.addRows(ZetaSqlTypesUtils.bigDecimalAsNumeric("123.4567"), "str1")
.addRows(ZetaSqlTypesUtils.bigDecimalAsNumeric("765.4321"), "str2")
.addRows(ZetaSqlTypesUtils.bigDecimalAsNumeric("-555.5555"), "str3");
+ private static final Schema TABLE_WITH_DATETIME_SCHEMA =
+ Schema.builder()
+ .addLogicalTypeField("datetime_field", SqlTypes.DATETIME)
+ .addStringField("str_field")
+ .build();
+
+ public static final TestBoundedTable TABLE_WITH_DATETIME =
+ TestBoundedTable.of(TABLE_WITH_DATETIME_SCHEMA)
+ .addRows(LocalDateTime.of(2008, 12, 25, 15, 30, 0).withNano(123456000), "s")
+ .addRows(LocalDateTime.of(2012, 10, 6, 11, 45, 0).withNano(987654000), "s");
+
private static byte[] stringToBytes(String s) {
return s.getBytes(StandardCharsets.UTF_8);
}
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtilsTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtilsTest.java
index 0510590..7b450fb 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtilsTest.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlBeamTranslationUtilsTest.java
@@ -28,6 +28,7 @@
import com.google.zetasql.Value;
import com.google.zetasql.ZetaSQLType.TypeKind;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.Arrays;
import org.apache.beam.sdk.schemas.Schema;
@@ -54,12 +55,12 @@
.addField("f_string", FieldType.STRING)
.addField("f_bytes", FieldType.BYTES)
.addLogicalTypeField("f_date", SqlTypes.DATE)
- // .addLogicalTypeField("f_datetime", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_datetime", SqlTypes.DATETIME)
.addLogicalTypeField("f_time", SqlTypes.TIME)
.addField("f_timestamp", FieldType.DATETIME)
.addArrayField("f_array", FieldType.DOUBLE)
.addRowField("f_struct", TEST_INNER_SCHEMA)
- // .addLogicalTypeField("f_numeric", SqlTypes.NUMERIC)
+ .addField("f_numeric", FieldType.DECIMAL)
.addNullableField("f_null", FieldType.INT64)
.build();
@@ -83,10 +84,12 @@
new StructField("f_string", TypeFactory.createSimpleType(TypeKind.TYPE_STRING)),
new StructField("f_bytes", TypeFactory.createSimpleType(TypeKind.TYPE_BYTES)),
new StructField("f_date", TypeFactory.createSimpleType(TypeKind.TYPE_DATE)),
+ new StructField("f_datetime", TypeFactory.createSimpleType(TypeKind.TYPE_DATETIME)),
new StructField("f_time", TypeFactory.createSimpleType(TypeKind.TYPE_TIME)),
new StructField("f_timestamp", TypeFactory.createSimpleType(TypeKind.TYPE_TIMESTAMP)),
new StructField("f_array", TEST_INNER_ARRAY_TYPE),
new StructField("f_struct", TEST_INNER_STRUCT_TYPE),
+ new StructField("f_numeric", TypeFactory.createSimpleType(TypeKind.TYPE_NUMERIC)),
new StructField("f_null", TypeFactory.createSimpleType(TypeKind.TYPE_INT64))));
private static final Row TEST_ROW =
@@ -97,10 +100,12 @@
.addValue("Hello")
.addValue(new byte[] {0x11, 0x22})
.addValue(LocalDate.of(2020, 6, 4))
+ .addValue(LocalDateTime.of(2008, 12, 25, 15, 30, 0))
.addValue(LocalTime.of(15, 30, 45))
.addValue(Instant.ofEpochMilli(12345678L))
.addArray(3.0, 6.5)
.addValue(Row.withSchema(TEST_INNER_SCHEMA).addValues(0L, "world").build())
+ .addValue(ZetaSqlTypesUtils.bigDecimalAsNumeric("12346"))
.addValue(null)
.build();
@@ -114,6 +119,10 @@
Value.createStringValue("Hello"),
Value.createBytesValue(ByteString.copyFrom(new byte[] {0x11, 0x22})),
Value.createDateValue((int) LocalDate.of(2020, 6, 4).toEpochDay()),
+ Value.createDatetimeValue(
+ CivilTimeEncoder.encodePacked64DatetimeSeconds(
+ LocalDateTime.of(2008, 12, 25, 15, 30, 0)),
+ LocalDateTime.of(2008, 12, 25, 15, 30, 0).getNano()),
Value.createTimeValue(
CivilTimeEncoder.encodePacked64TimeNanos(LocalTime.of(15, 30, 45))),
Value.createTimestampValueFromUnixMicros(12345678000L),
@@ -123,6 +132,7 @@
Value.createStructValue(
TEST_INNER_STRUCT_TYPE,
Arrays.asList(Value.createInt64Value(0L), Value.createStringValue("world"))),
+ Value.createNumericValue(ZetaSqlTypesUtils.bigDecimalAsNumeric("12346")),
Value.createNullValue(TypeFactory.createSimpleType(TypeKind.TYPE_INT64))));
@Test
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java
index e9c51c7..d148012 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlDialectSpecTest.java
@@ -2942,7 +2942,7 @@
}
@Test
- @Ignore("BEAM-9515")
+ @Ignore("[BEAM-9515] ArrayScanToUncollectConverter Unnest does not support sub-queries")
public void testUNNESTExpression() {
String sql = "SELECT * FROM UNNEST(ARRAY(SELECT Value FROM KeyValue));";
ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTestBase.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTestBase.java
index 6d9ba67..483a9c1 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTestBase.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTestBase.java
@@ -61,6 +61,7 @@
testBoundedTableMap.put("table_with_date", TestInput.TABLE_WITH_DATE);
testBoundedTableMap.put("table_with_time", TestInput.TABLE_WITH_TIME);
testBoundedTableMap.put("table_with_numeric", TestInput.TABLE_WITH_NUMERIC);
+ testBoundedTableMap.put("table_with_datetime", TestInput.TABLE_WITH_DATETIME);
testBoundedTableMap.put(
"table_with_struct_ts_string", TestInput.TABLE_WITH_STRUCT_TIMESTAMP_STRING);
testBoundedTableMap.put("streaming_sql_test_table_a", TestInput.STREAMING_SQL_TABLE_A);
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTimeFunctionsTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTimeFunctionsTest.java
index 6789d63..109ca1e 100644
--- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTimeFunctionsTest.java
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlTimeFunctionsTest.java
@@ -23,9 +23,11 @@
import static org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.parseTimestampWithTimeZone;
import static org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.parseTimestampWithUTCTimeZone;
+import com.google.zetasql.CivilTimeEncoder;
import com.google.zetasql.Value;
import com.google.zetasql.ZetaSQLType.TypeKind;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
import org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions;
import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
@@ -40,7 +42,6 @@
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
import org.joda.time.Duration;
import org.junit.Before;
-import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
@@ -148,7 +149,7 @@
+ " EXTRACT(ISOYEAR FROM date) AS isoyear,\n"
+ " EXTRACT(YEAR FROM date) AS year,\n"
+ " EXTRACT(ISOWEEK FROM date) AS isoweek,\n"
- // TODO[BEAM-9178]: Add tests for DATE_TRUNC and EXTRACT with "week with weekday" date
+ // TODO[BEAM-10606]: Add tests for DATE_TRUNC and EXTRACT with "week with weekday" date
// parts once they are supported
// + " EXTRACT(WEEK FROM date) AS week,\n"
+ " EXTRACT(MONTH FROM date) AS month,\n"
@@ -219,6 +220,22 @@
}
@Test
+ public void testDateFromDateTime() {
+ String sql = "SELECT DATE(DATETIME '2008-12-25 15:30:00.123456')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addLogicalTypeField("f_date", SqlTypes.DATE).build())
+ .addValues(LocalDate.of(2008, 12, 25))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
public void testDateAdd() {
String sql =
"SELECT "
@@ -580,6 +597,22 @@
}
@Test
+ public void testTimeFromDateTime() {
+ String sql = "SELECT TIME(DATETIME '2008-12-25 15:30:00.123456')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addLogicalTypeField("f_time", SqlTypes.TIME).build())
+ .addValues(LocalTime.of(15, 30, 0, 123456000))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
public void testTimeAdd() {
String sql =
"SELECT "
@@ -753,13 +786,420 @@
/////////////////////////////////////////////////////////////////////////////
@Test
- @Ignore("Does not support Datetime literal.")
- public void testDatetimeLiteral() {
- String sql = "SELECT DATETIME '2018-01-01 05:30:00.334'";
+ public void testDateTimeLiteral() {
+ String sql = "SELECT DATETIME '2008-12-25 15:30:00.123456'";
+
ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
- thrown.expect(RuntimeException.class);
- thrown.expectMessage("Unsupported ResolvedLiteral type: DATETIME");
- zetaSQLQueryPlanner.convertToBeamRel(sql);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("f_datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2008, 12, 25, 15, 30, 0).withNano(123456000))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeColumn() {
+ String sql = "SELECT FORMAT_DATETIME('%D %T %E6S', datetime_field) FROM table_with_datetime";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addStringField("f_datetime_str").build())
+ .addValues("12/25/08 15:30:00 00.123456")
+ .build(),
+ Row.withSchema(Schema.builder().addStringField("f_datetime_str").build())
+ .addValues("10/06/12 11:45:00 00.987654")
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testGroupByDateTime() {
+ String sql = "SELECT datetime_field, COUNT(*) FROM table_with_datetime GROUP BY datetime_field";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ final Schema schema =
+ Schema.builder()
+ .addLogicalTypeField("datetime_field", SqlTypes.DATETIME)
+ .addInt64Field("count")
+ .build();
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(schema)
+ .addValues(LocalDateTime.of(2008, 12, 25, 15, 30, 0).withNano(123456000), 1L)
+ .build(),
+ Row.withSchema(schema)
+ .addValues(LocalDateTime.of(2012, 10, 6, 11, 45, 0).withNano(987654000), 1L)
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testAggregateOnDateTime() {
+ String sql = "SELECT MAX(datetime_field) FROM table_with_datetime GROUP BY str_field";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder()
+ .addLogicalTypeField("datetime_field", SqlTypes.DATETIME)
+ .build())
+ .addValues(LocalDateTime.of(2012, 10, 6, 11, 45, 0).withNano(987654000))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ // TODO[BEAM-9166]: Add a test for CURRENT_DATETIME function ("SELECT CURRENT_DATETIME()")
+
+ @Test
+ public void testExtractFromDateTime() {
+ String sql =
+ "SELECT "
+ + "EXTRACT(YEAR FROM DATETIME '2008-12-25 15:30:00') as year, "
+ + "EXTRACT(QUARTER FROM DATETIME '2008-12-25 15:30:00') as quarter, "
+ + "EXTRACT(MONTH FROM DATETIME '2008-12-25 15:30:00') as month, "
+ // TODO[BEAM-10606]: Add tests for DATETIME_TRUNC and EXTRACT with "week with weekday"
+ // date parts once they are supported
+ // + "EXTRACT(WEEK FROM DATETIME '2008-12-25 15:30:00') as week, "
+ + "EXTRACT(DAY FROM DATETIME '2008-12-25 15:30:00') as day, "
+ + "EXTRACT(DAYOFWEEK FROM DATETIME '2008-12-25 15:30:00') as dayofweek, "
+ + "EXTRACT(DAYOFYEAR FROM DATETIME '2008-12-25 15:30:00') as dayofyear, "
+ + "EXTRACT(HOUR FROM DATETIME '2008-12-25 15:30:00.123456') as hour, "
+ + "EXTRACT(MINUTE FROM DATETIME '2008-12-25 15:30:00.123456') as minute, "
+ + "EXTRACT(SECOND FROM DATETIME '2008-12-25 15:30:00.123456') as second, "
+ + "EXTRACT(MILLISECOND FROM DATETIME '2008-12-25 15:30:00.123456') as millisecond, "
+ + "EXTRACT(MICROSECOND FROM DATETIME '2008-12-25 15:30:00.123456') as microsecond, "
+ + "EXTRACT(DATE FROM DATETIME '2008-12-25 15:30:00.123456') as date, "
+ + "EXTRACT(TIME FROM DATETIME '2008-12-25 15:30:00.123456') as time ";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ final Schema schema =
+ Schema.builder()
+ .addInt64Field("year")
+ .addInt64Field("quarter")
+ .addInt64Field("month")
+ // .addInt64Field("week")
+ .addInt64Field("day")
+ .addInt64Field("dayofweek")
+ .addInt64Field("dayofyear")
+ .addInt64Field("hour")
+ .addInt64Field("minute")
+ .addInt64Field("second")
+ .addInt64Field("millisecond")
+ .addInt64Field("microsecond")
+ .addLogicalTypeField("date", SqlTypes.DATE)
+ .addLogicalTypeField("time", SqlTypes.TIME)
+ .build();
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(schema)
+ .addValues(
+ 2008L,
+ 4L,
+ 12L,
+ // 52L,
+ 25L,
+ 5L,
+ 360L,
+ 15L,
+ 30L,
+ 0L,
+ 123L,
+ 123456L,
+ LocalDate.of(2008, 12, 25),
+ LocalTime.of(15, 30, 0, 123456000))
+ .build());
+
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeFromDateAndTime() {
+ String sql = "SELECT DATETIME(DATE '2008-12-25', TIME '15:30:00.123456')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("f_datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2008, 12, 25, 15, 30, 0).withNano(123456000))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeFromDate() {
+ String sql = "SELECT DATETIME(DATE '2008-12-25')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("f_datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2008, 12, 25, 0, 0, 0))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeFromYearMonthDayHourMinuteSecond() {
+ String sql = "SELECT DATETIME(2008, 12, 25, 15, 30, 0)";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("f_datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2008, 12, 25, 15, 30, 0))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeFromTimestamp() {
+ String sql = "SELECT DATETIME(TIMESTAMP '2008-12-25 15:30:00+08', 'America/Los_Angeles')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("f_datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2008, 12, 24, 23, 30, 0))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeAdd() {
+ String sql =
+ "SELECT "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MICROSECOND), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MILLISECOND), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 SECOND), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MINUTE), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 HOUR), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 DAY), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MONTH), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 QUARTER), "
+ + "DATETIME_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 10 YEAR) ";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder()
+ .addLogicalTypeField("f_time1", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time2", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time3", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time4", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time5", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time6", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time7", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time8", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time9", SqlTypes.DATETIME)
+ .build())
+ .addValues(
+ LocalDateTime.of(2008, 12, 25, 15, 30, 0).withNano(10000),
+ LocalDateTime.of(2008, 12, 25, 15, 30, 0).withNano(10000000),
+ LocalDateTime.of(2008, 12, 25, 15, 30, 10),
+ LocalDateTime.of(2008, 12, 25, 15, 40, 0),
+ LocalDateTime.of(2008, 12, 26, 1, 30, 0),
+ LocalDateTime.of(2009, 1, 4, 15, 30, 0),
+ LocalDateTime.of(2009, 10, 25, 15, 30, 0),
+ LocalDateTime.of(2011, 6, 25, 15, 30, 0),
+ LocalDateTime.of(2018, 12, 25, 15, 30, 0))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeAddWithParameter() {
+ String sql = "SELECT DATETIME_ADD(@p0, INTERVAL @p1 HOUR)";
+
+ LocalDateTime datetime = LocalDateTime.of(2008, 12, 25, 15, 30, 00).withNano(123456000);
+ ImmutableMap<String, Value> params =
+ ImmutableMap.of(
+ "p0",
+ Value.createDatetimeValue(
+ CivilTimeEncoder.encodePacked64DatetimeSeconds(datetime), datetime.getNano()),
+ "p1", Value.createInt64Value(3L));
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql, params);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("f_datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2008, 12, 25, 18, 30, 00).withNano(123456000))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeSub() {
+ String sql =
+ "SELECT "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MICROSECOND), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MILLISECOND), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 SECOND), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MINUTE), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 HOUR), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 DAY), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 MONTH), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 QUARTER), "
+ + "DATETIME_SUB(DATETIME '2008-12-25 15:30:00', INTERVAL 10 YEAR) ";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder()
+ .addLogicalTypeField("f_time1", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time2", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time3", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time4", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time5", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time6", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time7", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time8", SqlTypes.DATETIME)
+ .addLogicalTypeField("f_time9", SqlTypes.DATETIME)
+ .build())
+ .addValues(
+ LocalDateTime.of(2008, 12, 25, 15, 29, 59).withNano(999990000),
+ LocalDateTime.of(2008, 12, 25, 15, 29, 59).withNano(990000000),
+ LocalDateTime.of(2008, 12, 25, 15, 29, 50),
+ LocalDateTime.of(2008, 12, 25, 15, 20, 0),
+ LocalDateTime.of(2008, 12, 25, 5, 30, 0),
+ LocalDateTime.of(2008, 12, 15, 15, 30, 0),
+ LocalDateTime.of(2008, 2, 25, 15, 30, 0),
+ LocalDateTime.of(2006, 6, 25, 15, 30, 0),
+ LocalDateTime.of(1998, 12, 25, 15, 30, 0))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeDiff() {
+ String sql =
+ "SELECT DATETIME_DIFF(DATETIME '2008-12-25 15:30:00', DATETIME '2008-10-25 15:30:00', DAY)";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addInt64Field("f_datetime_diff").build())
+ .addValues(61L)
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeDiffNegativeResult() {
+ String sql =
+ "SELECT DATETIME_DIFF(DATETIME '2008-10-25 15:30:00', DATETIME '2008-12-25 15:30:00', DAY)";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addInt64Field("f_datetime_diff").build())
+ .addValues(-61L)
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testDateTimeTrunc() {
+ String sql = "SELECT DATETIME_TRUNC(DATETIME '2008-12-25 15:30:00', HOUR)";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder()
+ .addLogicalTypeField("f_datetime_trunc", SqlTypes.DATETIME)
+ .build())
+ .addValues(LocalDateTime.of(2008, 12, 25, 15, 0, 0))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testFormatDateTime() {
+ String sql = "SELECT FORMAT_DATETIME('%D %T %E6S', DATETIME '2008-12-25 15:30:00.123456')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addStringField("f_datetime_str").build())
+ .addValues("12/25/08 15:30:00 00.123456")
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ public void testParseDateTime() {
+ String sql = "SELECT PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S', '2008-12-25 15:30:00.123456')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("f_datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2008, 12, 25, 15, 30, 0).withNano(123456000))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
/////////////////////////////////////////////////////////////////////////////
@@ -846,7 +1286,7 @@
+ " EXTRACT(ISOYEAR FROM timestamp) AS isoyear,\n"
+ " EXTRACT(YEAR FROM timestamp) AS year,\n"
+ " EXTRACT(ISOWEEK FROM timestamp) AS isoweek,\n"
- // TODO[BEAM-9178]: Add tests for TIMESTAMP_TRUNC and EXTRACT with "week with weekday"
+ // TODO[BEAM-10606]: Add tests for TIMESTAMP_TRUNC and EXTRACT with "week with weekday"
// date parts once they are supported
// + " EXTRACT(WEEK FROM timestamp) AS week,\n"
+ " EXTRACT(MONTH FROM timestamp) AS month,\n"
@@ -926,6 +1366,23 @@
}
@Test
+ public void testExtractDateTimeFromTimestamp() {
+ String sql = "SELECT EXTRACT(DATETIME FROM TIMESTAMP '2017-05-26 12:34:56')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(
+ Schema.builder().addLogicalTypeField("datetime", SqlTypes.DATETIME).build())
+ .addValues(LocalDateTime.of(2017, 5, 26, 12, 34, 56))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
public void testExtractFromTimestampAtTimeZone() {
String sql =
"WITH Timestamps AS (\n"
@@ -1028,6 +1485,45 @@
}
@Test
+ public void testTimestampFromDateTime() {
+ String sql = "SELECT TIMESTAMP(DATETIME '2008-12-25 15:30:00')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addDateTimeField("f_timestamp").build())
+ .addValues(parseTimestampWithTimeZone("2008-12-25 15:30:00+00"))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
+ // test default timezone works properly in query execution stage
+ public void testTimestampFromDateTimeWithDefaultTimezoneSet() {
+ String sql = "SELECT TIMESTAMP(DATETIME '2008-12-25 15:30:00')";
+
+ ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+ zetaSQLQueryPlanner.setDefaultTimezone("Asia/Shanghai");
+ pipeline
+ .getOptions()
+ .as(BeamSqlPipelineOptions.class)
+ .setZetaSqlDefaultTimezone("Asia/Shanghai");
+
+ BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
+ PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+
+ PAssert.that(stream)
+ .containsInAnyOrder(
+ Row.withSchema(Schema.builder().addDateTimeField("f_timestamp").build())
+ .addValues(parseTimestampWithTimeZone("2008-12-25 15:30:00+08"))
+ .build());
+ pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
+ }
+
+ @Test
public void testTimestampAdd() {
String sql =
"SELECT "
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
index 702bcf4..20e915c 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
@@ -230,7 +230,7 @@
private final DoFnInvoker<InputT, OutputT> doFnInvoker;
private final StartBundleArgumentProvider startBundleArgumentProvider;
private final ProcessBundleContextBase processContext;
- private OnTimerContext onTimerContext;
+ private final OnTimerContext<?> onTimerContext;
private final FinishBundleArgumentProvider finishBundleArgumentProvider;
/**
@@ -426,6 +426,7 @@
tagToSideInputSpecMap = tagToSideInputSpecMapBuilder.build();
this.splitListener = splitListener;
this.bundleFinalizer = bundleFinalizer;
+ this.onTimerContext = new OnTimerContext();
try {
this.mainInputId = ParDoTranslation.getMainInputName(pTransform);
@@ -1242,7 +1243,6 @@
String timerIdOrTimerFamilyId, TimeDomain timeDomain, Timer<K> timer) {
currentTimer = timer;
currentTimeDomain = timeDomain;
- onTimerContext = new OnTimerContext<>(timer.getUserKey());
// The timerIdOrTimerFamilyId contains either a timerId from timer declaration or timerFamilyId
// from timer family declaration.
String timerId =
@@ -2014,11 +2014,6 @@
/** Provides arguments for a {@link DoFnInvoker} for {@link DoFn.OnTimer @OnTimer}. */
private class OnTimerContext<K> extends BaseArgumentProvider<InputT, OutputT> {
- private final K key;
-
- public OnTimerContext(K key) {
- this.key = key;
- }
private class Context extends DoFn<InputT, OutputT>.OnTimerContext {
private Context() {
@@ -2119,7 +2114,7 @@
@Override
public K key() {
- return key;
+ return (K) currentTimer.getUserKey();
}
@Override
diff --git a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
index d634389..1e74107 100644
--- a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
+++ b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
@@ -227,7 +227,7 @@
CASSANDRA_KEYSPACE,
CASSANDRA_TABLE));
}
- flushMemTables();
+ flushMemTablesAndRefreshSizeEstimates();
}
/**
@@ -241,7 +241,7 @@
* /src/java/org/apache/cassandra/tools/nodetool/Flush.java
*/
@SuppressWarnings("unused")
- private static void flushMemTables() throws Exception {
+ private static void flushMemTablesAndRefreshSizeEstimates() throws Exception {
JMXServiceURL url =
new JMXServiceURL(
String.format(
@@ -253,6 +253,7 @@
StorageServiceMBean mBeanProxy =
JMX.newMBeanProxy(mBeanServerConnection, objectName, StorageServiceMBean.class);
mBeanProxy.forceKeyspaceFlush(CASSANDRA_KEYSPACE, CASSANDRA_TABLE);
+ mBeanProxy.refreshSizeEstimates();
jmxConnector.close();
Thread.sleep(FLUSH_TIMEOUT);
}
@@ -378,7 +379,8 @@
.withPort(cassandraPort)
.withKeyspace(CASSANDRA_KEYSPACE)
.withEntity(ScientistWrite.class));
- // table to write to is specified in the entity in @Table annotation (in that case scientist)
+ // table to write to is specified in the entity in @Table annotation (in that case
+ // scientist_write)
pipeline.run();
List<Row> results = getRows(CASSANDRA_TABLE_WRITE);
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
index 70b9b49..96773df 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
@@ -39,6 +39,7 @@
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
import org.apache.beam.sdk.io.gcp.bigquery.WriteBundlesToFiles.Result;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.ValueProvider;
@@ -263,7 +264,8 @@
private WriteResult expandTriggered(PCollection<KV<DestinationT, ElementT>> input) {
checkArgument(numFileShards > 0);
Pipeline p = input.getPipeline();
- final PCollectionView<String> loadJobIdPrefixView = createLoadJobIdPrefixView(p);
+ final PCollectionView<String> loadJobIdPrefixView = createJobIdPrefixView(p, JobType.LOAD);
+ final PCollectionView<String> copyJobIdPrefixView = createJobIdPrefixView(p, JobType.COPY);
final PCollectionView<String> tempFilePrefixView =
createTempFilePrefixView(p, loadJobIdPrefixView);
// The user-supplied triggeringDuration is often chosen to control how many BigQuery load
@@ -342,12 +344,12 @@
ParDo.of(
new WriteRename(
bigQueryServices,
- loadJobIdPrefixView,
+ copyJobIdPrefixView,
writeDisposition,
createDisposition,
maxRetryJobs,
kmsKey))
- .withSideInputs(loadJobIdPrefixView));
+ .withSideInputs(copyJobIdPrefixView));
writeSinglePartition(partitions.get(singlePartitionTag), loadJobIdPrefixView);
return writeResult(p);
}
@@ -355,7 +357,7 @@
// Expand the pipeline when the user has not requested periodically-triggered file writes.
public WriteResult expandUntriggered(PCollection<KV<DestinationT, ElementT>> input) {
Pipeline p = input.getPipeline();
- final PCollectionView<String> loadJobIdPrefixView = createLoadJobIdPrefixView(p);
+ final PCollectionView<String> loadJobIdPrefixView = createJobIdPrefixView(p, JobType.LOAD);
final PCollectionView<String> tempFilePrefixView =
createTempFilePrefixView(p, loadJobIdPrefixView);
PCollection<KV<DestinationT, ElementT>> inputInGlobalWindow =
@@ -416,24 +418,24 @@
}
// Generate the base job id string.
- private PCollectionView<String> createLoadJobIdPrefixView(Pipeline p) {
+ private PCollectionView<String> createJobIdPrefixView(Pipeline p, final JobType type) {
// Create a singleton job ID token at execution time. This will be used as the base for all
// load jobs issued from this instance of the transform.
- return p.apply("JobIdCreationRoot", Create.of((Void) null))
+ return p.apply("JobIdCreationRoot_" + type.toString(), Create.of((Void) null))
.apply(
- "CreateJobId",
+ "CreateJobId_" + type.toString(),
ParDo.of(
new DoFn<Void, String>() {
@ProcessElement
public void process(ProcessContext c) {
c.output(
- String.format(
- "beam_load_%s_%s",
- c.getPipelineOptions().getJobName().replaceAll("-", ""),
- BigQueryHelpers.randomUUIDString()));
+ BigQueryResourceNaming.createJobIdPrefix(
+ c.getPipelineOptions().getJobName(),
+ BigQueryHelpers.randomUUIDString(),
+ type));
}
}))
- .apply(View.asSingleton());
+ .apply("JobIdSideInput_" + type.toString(), View.asSingleton());
}
// Generate the temporary-file prefix.
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java
index fa4b754..6685267 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java
@@ -36,7 +36,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
import java.util.UUID;
import java.util.regex.Matcher;
import org.apache.beam.sdk.extensions.gcp.util.BackOffAdapter;
@@ -49,7 +48,6 @@
import org.apache.beam.sdk.util.FluentBackoff;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.Hashing;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.joda.time.Duration;
import org.slf4j.Logger;
@@ -593,22 +591,6 @@
}
}
- // Create a unique job id for a table load.
- static String createJobId(
- String prefix, TableDestination tableDestination, int partition, long index) {
- // Job ID must be different for each partition of each table.
- String destinationHash =
- Hashing.murmur3_128().hashUnencodedChars(tableDestination.toString()).toString();
- String jobId = String.format("%s_%s", prefix, destinationHash);
- if (partition >= 0) {
- jobId += String.format("_%05d", partition);
- }
- if (index >= 0) {
- jobId += String.format("_%05d", index);
- }
- return jobId;
- }
-
@VisibleForTesting
static class JsonSchemaToTableSchema implements SerializableFunction<String, TableSchema> {
@Override
@@ -683,24 +665,6 @@
}
}
- static String createJobIdToken(String jobName, String stepUuid) {
- return String.format("beam_job_%s_%s", stepUuid, jobName.replaceAll("-", ""));
- }
-
- static String getExtractJobId(String jobIdToken) {
- return String.format("%s-extract", jobIdToken);
- }
-
- static TableReference createTempTableReference(
- String projectId, String jobUuid, Optional<String> tempDatasetIdOpt) {
- String tempDatasetId = tempDatasetIdOpt.orElse("temp_dataset_" + jobUuid);
- String queryTempTableId = "temp_table_" + jobUuid;
- return new TableReference()
- .setProjectId(projectId)
- .setDatasetId(tempDatasetId)
- .setTableId(queryTempTableId);
- }
-
static String resolveTempLocation(
String tempLocationDir, String bigQueryOperationName, String stepUuid) {
return FileSystems.matchNewResource(tempLocationDir, true)
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index c3a6e17..325f1ff 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -17,10 +17,8 @@
*/
package org.apache.beam.sdk.io.gcp.bigquery;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createJobIdToken;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.getExtractJobId;
import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.resolveTempLocation;
+import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
@@ -77,6 +75,7 @@
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableSpecToTableRef;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TimePartitioningToJson;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient;
@@ -457,6 +456,22 @@
* </a> for security and permission related information specific to BigQuery.
*/
public class BigQueryIO {
+
+ /**
+ * Template for BigQuery jobs created by BigQueryIO. This template is: {@code
+ * "beam_bq_job_{TYPE}_{JOB_ID}_{STEP}_{RANDOM}"}, where:
+ *
+ * <ul>
+ * <li>{@code TYPE} represents the BigQuery job type (e.g. extract / copy / load / query)
+ * <li>{@code JOB_ID} is the Beam job name.
+ * <li>{@code STEP} is a UUID representing the the Dataflow step that created the BQ job.
+ * <li>{@code RANDOM} is a random string.
+ * </ul>
+ *
+ * <p><b>NOTE:</b> This job name template does not have backwards compatibility guarantees.
+ */
+ public static final String BIGQUERY_JOB_TEMPLATE = "beam_bq_job_{TYPE}_{JOB_ID}_{STEP}_{RANDOM}";
+
private static final Logger LOG = LoggerFactory.getLogger(BigQueryIO.class);
/** Singleton instance of the JSON factory used to read and write JSON formatted rows. */
@@ -1153,7 +1168,9 @@
JobReference jobRef =
new JobReference()
.setProjectId(executingProject)
- .setJobId(getExtractJobId(createJobIdToken(bqOptions.getJobName(), jobUuid)));
+ .setJobId(
+ BigQueryResourceNaming.createJobIdPrefix(
+ bqOptions.getJobName(), jobUuid, JobType.EXPORT));
Job extractJob = getBigQueryServices().getJobService(bqOptions).getJob(jobRef);
@@ -1362,7 +1379,8 @@
TableReference tempTable =
createTempTableReference(
options.getProject(),
- createJobIdToken(options.getJobName(), jobUuid),
+ BigQueryResourceNaming.createJobIdPrefix(
+ options.getJobName(), jobUuid, JobType.QUERY),
queryTempDataset);
DatasetService datasetService = getBigQueryServices().getDatasetService(options);
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQueryHelper.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQueryHelper.java
index 61cbfed..367617f 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQueryHelper.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQueryHelper.java
@@ -17,8 +17,7 @@
*/
package org.apache.beam.sdk.io.gcp.bigquery;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createJobIdToken;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference;
+import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
import com.google.api.services.bigquery.model.EncryptionConfiguration;
@@ -35,6 +34,7 @@
import java.util.concurrent.atomic.AtomicReference;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.Status;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.QueryPriority;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
import org.checkerframework.checker.nullness.qual.Nullable;
@@ -112,10 +112,11 @@
// Step 2: Create a temporary dataset in the query location only if the user has not specified a
// temp dataset.
- String jobIdToken = createJobIdToken(options.getJobName(), stepUuid);
+ String queryJobId =
+ BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY);
Optional<String> queryTempDatasetOpt = Optional.ofNullable(queryTempDatasetId);
TableReference queryResultTable =
- createTempTableReference(options.getProject(), jobIdToken, queryTempDatasetOpt);
+ createTempTableReference(options.getProject(), queryJobId, queryTempDatasetOpt);
boolean beamToCreateTempDataset = !queryTempDatasetOpt.isPresent();
// Create dataset only if it has not been set by the user
@@ -142,7 +143,6 @@
// be retried after the temporary dataset and table have been deleted by a previous attempt --
// in that case, we want to regenerate the temporary dataset and table, and we'll need a fresh
// query ID to do that.
- String queryJobId = jobIdToken + "-query-" + BigQueryHelpers.randomUUIDString();
LOG.info(
"Exporting query results into temporary table {} using job {}",
queryResultTable,
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java
index f1f2ff4..441902f 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java
@@ -17,8 +17,7 @@
*/
package org.apache.beam.sdk.io.gcp.bigquery;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createJobIdToken;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference;
+import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
import com.google.api.services.bigquery.model.JobStatistics;
@@ -31,6 +30,7 @@
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.annotations.Experimental.Kind;
import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.transforms.SerializableFunction;
@@ -129,7 +129,8 @@
TableReference tableToRemove =
createTempTableReference(
bqOptions.getProject(),
- createJobIdToken(bqOptions.getJobName(), stepUuid),
+ BigQueryResourceNaming.createJobIdPrefix(
+ bqOptions.getJobName(), stepUuid, JobType.QUERY),
queryTempDatasetOpt);
BigQueryServices.DatasetService tableService = bqServices.getDatasetService(bqOptions);
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryResourceNaming.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryResourceNaming.java
new file mode 100644
index 0000000..aa4ad89
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryResourceNaming.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.BIGQUERY_JOB_TEMPLATE;
+
+import com.google.api.services.bigquery.model.TableReference;
+import java.util.Optional;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.Hashing;
+
+/**
+ * This class contains utilities to standardize how resources are named by BigQueryIO.
+ *
+ * <p>Resources can be any type of BigQuery job started by BigQueryIO and temporary BigQuery
+ * resources created by BigQueryIO.
+ *
+ * <p>Some examples are: - BigQuery jobs - Export jobs - Query jobs - Load jobs - Copy jobs -
+ * Temporary datasets - Temporary tables.
+ *
+ * <p>BigQuery resource naming follows this conveniton: {@code
+ * "beam_bq_job_{TYPE}_{JOB_ID}_{STEP}_{RANDOM}"}.
+ *
+ * <p>This class has no backwards compatibility guaantees. It is considered internal.
+ */
+class BigQueryResourceNaming {
+
+ /**
+ * Generate a BigQuery job ID based on a prefix from {@link
+ * BigQueryResourceNaming::createJobIdPrefix}, with destination information added to it.
+ *
+ * @param prefix A prefix generated in {@link BigQueryResourceNaming::createJobIdPrefix}.
+ * @param tableDestination A descriptor of the destination table.
+ * @param partition A partition number in the destination table.
+ * @param index
+ * @return
+ */
+ static String createJobIdWithDestination(
+ String prefix, TableDestination tableDestination, int partition, long index) {
+ // Job ID must be different for each partition of each table.
+ String destinationHash =
+ Hashing.murmur3_128().hashUnencodedChars(tableDestination.toString()).toString();
+ String jobId = String.format("%s_%s", prefix, destinationHash);
+ if (partition >= 0) {
+ jobId += String.format("_%05d", partition);
+ }
+ if (index >= 0) {
+ jobId += String.format("_%05d", index);
+ }
+ return jobId;
+ }
+
+ public enum JobType {
+ LOAD,
+ COPY,
+ EXPORT,
+ QUERY,
+ }
+
+ /**
+ * Generate a name to be used for BigQuery jobs. The name can be used as-is, or as a prefix for BQ
+ * job names that have destinations appended to them.
+ *
+ * @param jobName The name of the Apache Beam job.
+ * @param stepUuid A uuid representing the step from which the job is launched
+ * @param type The job type.
+ * @param random A random string to use when naming jobs. If no random string is provided, then
+ * the parameter will be ignored.
+ * @return
+ */
+ static String createJobIdPrefix(String jobName, String stepUuid, JobType type, String random) {
+ jobName = jobName.replaceAll("-", "");
+ String result =
+ BIGQUERY_JOB_TEMPLATE
+ .replaceFirst("\\{TYPE}", type.toString())
+ .replaceFirst("\\{JOB_ID}", jobName)
+ .replaceFirst("\\{STEP}", stepUuid);
+
+ if (random != null) {
+ return result.replaceFirst("\\{RANDOM}", random);
+ } else {
+ return result.replaceFirst("_\\{RANDOM}", "");
+ }
+ }
+
+ static String createJobIdPrefix(String jobName, String stepUuid, JobType type) {
+ return createJobIdPrefix(jobName, stepUuid, type, null);
+ }
+
+ static TableReference createTempTableReference(
+ String projectId, String jobUuid, Optional<String> tempDatasetIdOpt) {
+ String tempDatasetId = tempDatasetIdOpt.orElse("temp_dataset_" + jobUuid);
+ String queryTempTableId = "temp_table_" + jobUuid;
+ return new TableReference()
+ .setProjectId(projectId)
+ .setDatasetId(tempDatasetId)
+ .setTableId(queryTempTableId);
+ }
+}
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index 56ab4e6..dfa2b37 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -19,6 +19,7 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+import com.google.api.client.googleapis.json.GoogleJsonError;
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
import com.google.api.client.http.HttpRequestInitializer;
@@ -91,6 +92,7 @@
import org.apache.beam.sdk.values.ValueInSingleWindow;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.joda.time.Duration;
import org.slf4j.Logger;
@@ -119,6 +121,9 @@
private static final FluentBackoff DEFAULT_BACKOFF_FACTORY =
FluentBackoff.DEFAULT.withMaxRetries(MAX_RPC_RETRIES).withInitialBackoff(INITIAL_RPC_BACKOFF);
+ // The error code for quota exceeded error (https://cloud.google.com/bigquery/docs/error-messages)
+ private static final String QUOTA_EXCEEDED = "quotaExceeded";
+
@Override
public JobService getJobService(BigQueryOptions options) {
return new JobServiceImpl(options);
@@ -373,10 +378,17 @@
Exception lastException;
do {
try {
- return client.jobs().get(jobRef.getProjectId(), jobId).execute();
+ return client
+ .jobs()
+ .get(jobRef.getProjectId(), jobId)
+ .setLocation(jobRef.getLocation())
+ .execute();
} catch (GoogleJsonResponseException e) {
if (errorExtractor.itemNotFound(e)) {
- LOG.info("No BigQuery job with job id {} found.", jobId);
+ LOG.info(
+ "No BigQuery job with job id {} found in location {}.",
+ jobId,
+ jobRef.getLocation());
return null;
}
LOG.info(
@@ -806,6 +818,19 @@
try {
return insert.execute().getInsertErrors();
} catch (IOException e) {
+ GoogleJsonError.ErrorInfo errorInfo = getErrorInfo(e);
+ if (errorInfo == null) {
+ throw e;
+ }
+ /**
+ * TODO(BEAM-10584): Check for QUOTA_EXCEEDED error will be replaced by
+ * ApiErrorExtractor.INSTANCE.quotaExceeded(e) after the next release of
+ * GoogleCloudDataproc/hadoop-connectors
+ */
+ if (!ApiErrorExtractor.INSTANCE.rateLimited(e)
+ && !errorInfo.getReason().equals(QUOTA_EXCEEDED)) {
+ throw e;
+ }
LOG.info(
String.format(
"BigQuery insertAll error, retrying: %s",
@@ -915,6 +940,15 @@
ignoreInsertIds);
}
+ protected GoogleJsonError.ErrorInfo getErrorInfo(IOException e) {
+ if (!(e instanceof GoogleJsonResponseException)) {
+ return null;
+ }
+ GoogleJsonError jsonError = ((GoogleJsonResponseException) e).getDetails();
+ GoogleJsonError.ErrorInfo errorInfo = Iterables.getFirst(jsonError.getErrors(), null);
+ return errorInfo;
+ }
+
@Override
public Table patchTableDescription(
TableReference tableReference, @Nullable String tableDescription)
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
index 798ced8..3034410 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
@@ -18,8 +18,6 @@
package org.apache.beam.sdk.io.gcp.bigquery;
import static org.apache.beam.sdk.io.FileSystems.match;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createJobIdToken;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.getExtractJobId;
import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.resolveTempLocation;
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
@@ -39,6 +37,7 @@
import org.apache.beam.sdk.io.fs.MatchResult;
import org.apache.beam.sdk.io.fs.ResourceId;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.Status;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.SerializableFunction;
@@ -120,7 +119,8 @@
TableSchema schema = table.getSchema();
JobService jobService = bqServices.getJobService(bqOptions);
- String extractJobId = getExtractJobId(createJobIdToken(options.getJobName(), stepUuid));
+ String extractJobId =
+ BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.EXPORT);
final String extractDestinationDir =
resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", stepUuid);
String bqLocation =
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
index 8aeb982..9b80f5f 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
@@ -154,7 +154,7 @@
// Make sure each destination table gets a unique job id.
String jobIdPrefix =
- BigQueryHelpers.createJobId(
+ BigQueryResourceNaming.createJobIdWithDestination(
c.sideInput(jobIdToken), finalTableDestination, -1, c.pane().getIndex());
BigQueryHelpers.PendingJob retryJob =
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
index b214838..45f5ea4 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
@@ -192,7 +192,7 @@
Integer partition = c.element().getKey().getShardNumber();
List<String> partitionFiles = Lists.newArrayList(c.element().getValue());
String jobIdPrefix =
- BigQueryHelpers.createJobId(
+ BigQueryResourceNaming.createJobIdWithDestination(
c.sideInput(loadJobIdPrefixView), tableDestination, partition, c.pane().getIndex());
if (tempTable) {
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java
index 9521443..3f449d2 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java
@@ -129,7 +129,7 @@
new PubsubMessage().encodeData(outgoingMessage.message().getData().toByteArray());
pubsubMessage.setAttributes(getMessageAttributes(outgoingMessage));
if (!outgoingMessage.message().getOrderingKey().isEmpty()) {
- pubsubMessage.put("orderingKey", outgoingMessage.message().getOrderingKey());
+ pubsubMessage.setOrderingKey(outgoingMessage.message().getOrderingKey());
}
pubsubMessages.add(pubsubMessage);
}
@@ -156,6 +156,7 @@
}
@Override
+ @SuppressWarnings("ProtoFieldNullComparison")
public List<IncomingMessage> pull(
long requestTimeMsSinceEpoch,
SubscriptionPath subscription,
@@ -207,8 +208,12 @@
com.google.pubsub.v1.PubsubMessage.newBuilder();
protoMessage.setData(ByteString.copyFrom(elementBytes));
protoMessage.putAllAttributes(attributes);
- protoMessage.setOrderingKey(
- (String) pubsubMessage.getUnknownKeys().getOrDefault("orderingKey", ""));
+ // PubsubMessage uses `null` to represent no ordering key where we want a default of "".
+ if (pubsubMessage.getOrderingKey() != null) {
+ protoMessage.setOrderingKey(pubsubMessage.getOrderingKey());
+ } else {
+ protoMessage.setOrderingKey("");
+ }
incomingMessages.add(
IncomingMessage.of(
protoMessage.build(),
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpersTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpersTest.java
index 440b074..c7679ea 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpersTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpersTest.java
@@ -212,7 +212,7 @@
String projectId = "this-is-my-project";
String jobUuid = "this-is-my-job";
TableReference noDataset =
- BigQueryHelpers.createTempTableReference(projectId, jobUuid, Optional.empty());
+ BigQueryResourceNaming.createTempTableReference(projectId, jobUuid, Optional.empty());
assertEquals(noDataset.getProjectId(), projectId);
assertEquals(noDataset.getDatasetId(), "temp_dataset_" + jobUuid);
@@ -220,7 +220,7 @@
Optional<String> dataset = Optional.ofNullable("my-tmp-dataset");
TableReference tempTableReference =
- BigQueryHelpers.createTempTableReference(projectId, jobUuid, dataset);
+ BigQueryResourceNaming.createTempTableReference(projectId, jobUuid, dataset);
assertEquals(tempTableReference.getProjectId(), noDataset.getProjectId());
assertEquals(tempTableReference.getDatasetId(), dataset.get());
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java
index 6970ef8..3a6ce15 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java
@@ -17,8 +17,7 @@
*/
package org.apache.beam.sdk.io.gcp.bigquery;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createJobIdToken;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference;
+import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference;
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.hasItem;
@@ -49,6 +48,7 @@
import org.apache.beam.sdk.extensions.protobuf.ProtoCoder;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.QueryPriority;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
import org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices;
import org.apache.beam.sdk.io.gcp.testing.FakeDatasetService;
import org.apache.beam.sdk.io.gcp.testing.FakeJobService;
@@ -757,7 +757,7 @@
TableReference tempTableReference =
createTempTableReference(
bqOptions.getProject(),
- createJobIdToken(options.getJobName(), stepUuid),
+ BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY),
Optional.empty());
fakeJobService.expectDryRunQuery(
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java
index 5c9bea5..e39ee99 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageQueryTest.java
@@ -17,8 +17,7 @@
*/
package org.apache.beam.sdk.io.gcp.bigquery;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createJobIdToken;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference;
+import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference;
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.hasItem;
@@ -71,6 +70,7 @@
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.QueryPriority;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient;
import org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices;
import org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices.FakeBigQueryServerStream;
@@ -384,7 +384,7 @@
TableReference tempTableReference =
createTempTableReference(
options.getProject(),
- createJobIdToken(options.getJobName(), stepUuid),
+ BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY),
Optional.empty());
CreateReadSessionRequest expectedRequest =
@@ -475,7 +475,7 @@
TableReference tempTableReference =
createTempTableReference(
options.getProject(),
- createJobIdToken(options.getJobName(), stepUuid),
+ BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY),
Optional.empty());
CreateReadSessionRequest expectedRequest =
@@ -623,7 +623,7 @@
TableReference tempTableReference =
createTempTableReference(
options.getProject(),
- createJobIdToken(options.getJobName(), stepUuid),
+ BigQueryResourceNaming.createJobIdPrefix(options.getJobName(), stepUuid, JobType.QUERY),
Optional.empty());
CreateReadSessionRequest expectedRequest =
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
index 37999aa..4489139 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
@@ -1631,7 +1631,8 @@
String tableName = String.format("project-id:dataset-id.table%05d", i);
TableDestination tableDestination = new TableDestination(tableName, tableName);
for (int j = 0; j < numPartitions; ++j) {
- String tempTableId = BigQueryHelpers.createJobId(jobIdToken, tableDestination, j, 0);
+ String tempTableId =
+ BigQueryResourceNaming.createJobIdWithDestination(jobIdToken, tableDestination, j, 0);
List<String> filesPerPartition = Lists.newArrayList();
for (int k = 0; k < numFilesPerPartition; ++k) {
String filename =
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryResourceNamingTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryResourceNamingTest.java
new file mode 100644
index 0000000..5a73e54
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryResourceNamingTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.matchesPattern;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class BigQueryResourceNamingTest {
+
+ public static final String BQ_JOB_PATTERN_REGEXP =
+ "beam_bq_job_[A-Z]+_[a-z0-9-]+_[a-z0-9-]+(_[A-Za-z0-9-]+)?";
+
+ @Test
+ public void testJobTypesInNames() {
+ assertEquals(
+ "beam_bq_job_EXPORT_beamappjobtest_abcd",
+ BigQueryResourceNaming.createJobIdPrefix("beamapp-job-test", "abcd", JobType.EXPORT));
+
+ assertEquals(
+ "beam_bq_job_LOAD_beamappjobtest_abcd",
+ BigQueryResourceNaming.createJobIdPrefix("beamapp-job-test", "abcd", JobType.LOAD));
+
+ assertEquals(
+ "beam_bq_job_QUERY_beamappjobtest_abcd",
+ BigQueryResourceNaming.createJobIdPrefix("beamapp-job-test", "abcd", JobType.QUERY));
+
+ assertEquals(
+ "beam_bq_job_COPY_beamappjobtest_abcd",
+ BigQueryResourceNaming.createJobIdPrefix("beamapp-job-test", "abcd", JobType.COPY));
+ }
+
+ @Test
+ public void testJobRandomInNames() {
+ assertEquals(
+ "beam_bq_job_EXPORT_beamappjobtest_abcd_RANDOME",
+ BigQueryResourceNaming.createJobIdPrefix(
+ "beamapp-job-test", "abcd", JobType.EXPORT, "RANDOME"));
+ }
+
+ @Test
+ public void testMatchesBigQueryJobTemplate() {
+ assertThat(
+ BigQueryResourceNaming.createJobIdPrefix(
+ "beamapp-job-test", "abcd", JobType.EXPORT, "RANDOME"),
+ matchesPattern(BQ_JOB_PATTERN_REGEXP));
+
+ assertThat(
+ BigQueryResourceNaming.createJobIdPrefix("beamapp-job-test", "abcd", JobType.COPY),
+ matchesPattern(BQ_JOB_PATTERN_REGEXP));
+ }
+}
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
index 413acba..15bfc0a 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImplTest.java
@@ -27,6 +27,7 @@
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -35,6 +36,7 @@
import com.google.api.client.googleapis.json.GoogleJsonError.ErrorInfo;
import com.google.api.client.googleapis.json.GoogleJsonErrorContainer;
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpResponseException;
import com.google.api.client.http.LowLevelHttpResponse;
import com.google.api.client.json.GenericJson;
import com.google.api.client.json.Json;
@@ -110,7 +112,6 @@
return response;
}
};
-
// A mock transport that lets us mock the API responses.
MockHttpTransport transport =
new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build();
@@ -715,11 +716,11 @@
}
/**
- * Tests that {@link DatasetServiceImpl#insertAll} retries other non-rate-limited,
+ * Tests that {@link DatasetServiceImpl#insertAll} will not retry other non-rate-limited,
* non-quota-exceeded attempts.
*/
@Test
- public void testInsertOtherRetry() throws Throwable {
+ public void testFailInsertOtherRetry() throws Exception {
TableReference ref =
new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
@@ -733,26 +734,29 @@
when(response.getContent())
.thenReturn(toStream(errorWithReasonAndStatus("actually forbidden", 403)))
.thenReturn(toStream(new TableDataInsertAllResponse()));
-
DatasetServiceImpl dataService =
new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
- dataService.insertAll(
- ref,
- rows,
- null,
- BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()),
- TEST_BACKOFF,
- new MockSleeper(),
- InsertRetryPolicy.alwaysRetry(),
- null,
- null,
- false,
- false,
- false);
- verify(response, times(2)).getStatusCode();
- verify(response, times(2)).getContent();
- verify(response, times(2)).getContentType();
- expectedLogs.verifyInfo("BigQuery insertAll error, retrying:");
+ thrown.expect(RuntimeException.class);
+ thrown.expectMessage("actually forbidden");
+ try {
+ dataService.insertAll(
+ ref,
+ rows,
+ null,
+ BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()),
+ TEST_BACKOFF,
+ new MockSleeper(),
+ InsertRetryPolicy.alwaysRetry(),
+ null,
+ null,
+ false,
+ false,
+ false);
+ } finally {
+ verify(response, times(1)).getStatusCode();
+ verify(response, times(1)).getContent();
+ verify(response, times(1)).getContentType();
+ }
}
/**
@@ -919,6 +923,23 @@
}
@Test
+ public void testGetErrorInfo() throws IOException {
+ DatasetServiceImpl dataService =
+ new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
+ ErrorInfo info = new ErrorInfo();
+ List<ErrorInfo> infoList = new ArrayList<>();
+ infoList.add(info);
+ info.setReason("QuotaExceeded");
+ GoogleJsonError error = new GoogleJsonError();
+ error.setErrors(infoList);
+ HttpResponseException.Builder builder = mock(HttpResponseException.Builder.class);
+ IOException validException = new GoogleJsonResponseException(builder, error);
+ IOException invalidException = new IOException();
+ assertEquals(info.getReason(), dataService.getErrorInfo(validException).getReason());
+ assertNull(dataService.getErrorInfo(invalidException));
+ }
+
+ @Test
public void testCreateTableSucceeds() throws IOException {
TableReference ref =
new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java
index aad9729..22c1cb1 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClientTest.java
@@ -101,7 +101,7 @@
.setAttributes(
ImmutableMap.of(
TIMESTAMP_ATTRIBUTE, String.valueOf(MESSAGE_TIME), ID_ATTRIBUTE, RECORD_ID))
- .set("orderingKey", ORDERING_KEY);
+ .setOrderingKey(ORDERING_KEY);
ReceivedMessage expectedReceivedMessage =
new ReceivedMessage().setMessage(expectedPubsubMessage).setAckId(ACK_ID);
PullResponse expectedResponse =
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
index 13aabc8..08847f6 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -452,9 +452,15 @@
// Set required defaults
setTopicPartitions(Collections.emptyList());
setConsumerFactoryFn(Read.KAFKA_CONSUMER_FACTORY_FN);
- setMaxNumRecords(Long.MAX_VALUE);
+ if (config.maxReadTime != null) {
+ setMaxReadTime(Duration.standardSeconds(config.maxReadTime));
+ }
+ setMaxNumRecords(config.maxNumRecords == null ? Long.MAX_VALUE : config.maxNumRecords);
setCommitOffsetsInFinalizeEnabled(false);
setTimestampPolicyFactory(TimestampPolicyFactory.withProcessingTime());
+ if (config.startReadTime != null) {
+ setStartReadTime(Instant.ofEpochMilli(config.startReadTime));
+ }
// We do not include Metadata until we can encode KafkaRecords cross-language
return build().withoutMetadata();
}
@@ -507,6 +513,9 @@
private Iterable<String> topics;
private String keyDeserializer;
private String valueDeserializer;
+ private Long startReadTime;
+ private Long maxNumRecords;
+ private Long maxReadTime;
public void setConsumerConfig(Iterable<KV<String, String>> consumerConfig) {
this.consumerConfig = consumerConfig;
@@ -523,6 +532,18 @@
public void setValueDeserializer(String valueDeserializer) {
this.valueDeserializer = valueDeserializer;
}
+
+ public void setStartReadTime(Long startReadTime) {
+ this.startReadTime = startReadTime;
+ }
+
+ public void setMaxNumRecords(Long maxNumRecords) {
+ this.maxNumRecords = maxNumRecords;
+ }
+
+ public void setMaxReadTime(Long maxReadTime) {
+ this.maxReadTime = maxReadTime;
+ }
}
}
diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java
index d157c16..3e44c17 100644
--- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java
+++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java
@@ -34,6 +34,7 @@
import org.apache.beam.sdk.coders.IterableCoder;
import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VarLongCoder;
import org.apache.beam.sdk.expansion.service.ExpansionService;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.Impulse;
@@ -67,6 +68,7 @@
.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializer)
.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializer)
.build();
+ Long startReadTime = 100L;
ExternalTransforms.ExternalConfigurationPayload payload =
ExternalTransforms.ExternalConfigurationPayload.newBuilder()
@@ -98,6 +100,12 @@
.addCoderUrn("beam:coder:string_utf8:v1")
.setPayload(ByteString.copyFrom(encodeString(valueDeserializer)))
.build())
+ .putConfiguration(
+ "start_read_time",
+ ExternalTransforms.ConfigValue.newBuilder()
+ .addCoderUrn("beam:coder:varint:v1")
+ .setPayload(ByteString.copyFrom(encodeLong(startReadTime)))
+ .build())
.build();
RunnerApi.Components defaultInstance = RunnerApi.Components.getDefaultInstance();
@@ -280,6 +288,12 @@
return baos.toByteArray();
}
+ private static byte[] encodeLong(Long str) throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ VarLongCoder.of().encode(str, baos);
+ return baos.toByteArray();
+ }
+
private static class TestStreamObserver<T> implements StreamObserver<T> {
private T result;
diff --git a/sdks/java/io/snowflake/build.gradle b/sdks/java/io/snowflake/build.gradle
index 32ad7af..ea8204d 100644
--- a/sdks/java/io/snowflake/build.gradle
+++ b/sdks/java/io/snowflake/build.gradle
@@ -22,6 +22,7 @@
automaticModuleName: 'org.apache.beam.sdk.io.snowflake')
provideIntegrationTestingDependencies()
enableJavaPerformanceTesting()
+
description = "Apache Beam :: SDKs :: Java :: IO :: Snowflake"
ext.summary = "IO to read and write on Snowflake."
dependencies {
@@ -31,6 +32,7 @@
compile library.java.slf4j_api
compile group: 'net.snowflake', name: 'snowflake-jdbc', version: '3.12.7'
compile group: 'com.opencsv', name: 'opencsv', version: '5.0'
+ compile 'net.snowflake:snowflake-ingest-sdk:0.9.9'
testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
testCompile project(path: ":sdks:java:io:common", configuration: "testRuntime")
testCompile project(path: ":sdks:java:testing:test-utils", configuration: "testRuntime")
diff --git a/sdks/java/io/snowflake/expansion-service/build.gradle b/sdks/java/io/snowflake/expansion-service/build.gradle
new file mode 100644
index 0000000..8a6ea6c
--- /dev/null
+++ b/sdks/java/io/snowflake/expansion-service/build.gradle
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+apply plugin: 'org.apache.beam.module'
+apply plugin: 'application'
+mainClassName = "org.apache.beam.sdk.expansion.service.ExpansionService"
+
+applyJavaNature(enableChecker:false,
+ automaticModuleName: 'org.apache.beam.sdk.io.expansion.service',
+ exportJavadoc: false,
+ validateShadowJar: false,
+ shadowClosure: {},
+)
+
+description = "Apache Beam :: SDKs :: Java :: IO :: Snowflake ::Expansion Service"
+ ext.summary = "Expansion service serving Snowflake IO"
+
+dependencies {
+ compile project(":sdks:java:expansion-service")
+ compile project(":sdks:java:io:snowflake")
+ runtime library.java.slf4j_jdk14
+}
+
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java
index 845f137..a2c0a7e 100644
--- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java
@@ -37,6 +37,8 @@
import java.util.stream.Collectors;
import javax.sql.DataSource;
import net.snowflake.client.jdbc.SnowflakeBasicDataSource;
+import net.snowflake.ingest.SimpleIngestManager;
+import net.snowflake.ingest.connection.HistoryResponse;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.ListCoder;
@@ -51,13 +53,18 @@
import org.apache.beam.sdk.io.snowflake.credentials.KeyPairSnowflakeCredentials;
import org.apache.beam.sdk.io.snowflake.credentials.OAuthTokenSnowflakeCredentials;
import org.apache.beam.sdk.io.snowflake.credentials.SnowflakeCredentials;
+import org.apache.beam.sdk.io.snowflake.credentials.SnowflakeCredentialsFactory;
import org.apache.beam.sdk.io.snowflake.credentials.UsernamePasswordSnowflakeCredentials;
import org.apache.beam.sdk.io.snowflake.data.SnowflakeTableSchema;
import org.apache.beam.sdk.io.snowflake.enums.CreateDisposition;
+import org.apache.beam.sdk.io.snowflake.enums.StreamingLogLevel;
import org.apache.beam.sdk.io.snowflake.enums.WriteDisposition;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeBatchServiceConfig;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeBatchServiceImpl;
import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
-import org.apache.beam.sdk.io.snowflake.services.SnowflakeServiceConfig;
-import org.apache.beam.sdk.io.snowflake.services.SnowflakeServiceImpl;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeStreamingServiceConfig;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeStreamingServiceImpl;
+import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.transforms.Combine;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
@@ -73,12 +80,20 @@
import org.apache.beam.sdk.transforms.Wait;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.display.HasDisplayData;
+import org.apache.beam.sdk.transforms.windowing.AfterFirst;
+import org.apache.beam.sdk.transforms.windowing.AfterPane;
+import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
+import org.apache.beam.sdk.transforms.windowing.Repeatedly;
+import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PDone;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Splitter;
import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -92,9 +107,9 @@
*
* <p>To configure SnowflakeIO to read/write from your Snowflake instance, you have to provide a
* {@link DataSourceConfiguration} using {@link
- * DataSourceConfiguration#create(SnowflakeCredentials)}, where {@link SnowflakeCredentials might be
- * created using {@link org.apache.beam.sdk.io.snowflake.credentials.SnowflakeCredentialsFactory}}.
- * Additionally one of {@link DataSourceConfiguration#withServerName(String)} or {@link
+ * DataSourceConfiguration#create(SnowflakeCredentials)}, where {@link SnowflakeCredentials} might
+ * be created using {@link SnowflakeCredentialsFactory }. Additionally one of {@link
+ * DataSourceConfiguration#withServerName(String)} or {@link
* DataSourceConfiguration#withUrl(String)} must be used to tell SnowflakeIO which instance to use.
* <br>
* There are also other options available to configure connection to Snowflake:
@@ -147,8 +162,8 @@
*
* <h3>Writing to Snowflake</h3>
*
- * <p>SnowflakeIO.Write supports writing records into a database. It writes a {@link PCollection<T>}
- * to the database by converting each T into a {@link Object[]} via a user-provided {@link
+ * <p>SnowflakeIO.Write supports writing records into a database. It writes a {@link PCollection} to
+ * the database by converting each T into a {@link Object[]} via a user-provided {@link
* UserDataMapper}.
*
* <p>For example
@@ -171,7 +186,13 @@
private static final Logger LOG = LoggerFactory.getLogger(SnowflakeIO.class);
private static final String CSV_QUOTE_CHAR = "'";
- private static final String WRITE_TMP_PATH = "data";
+
+ static final int DEFAULT_FLUSH_ROW_LIMIT = 10000;
+ static final int DEFAULT_STREAMING_SHARDS_NUMBER = 1;
+ static final int DEFAULT_BATCH_SHARDS_NUMBER = 0;
+ static final Duration DEFAULT_FLUSH_TIME_LIMIT = Duration.millis(30000); // 30 seconds
+ static final Duration DEFAULT_STREAMING_LOGS_MAX_SLEEP = Duration.standardMinutes(2);
+ static final Duration DEFAULT_SLEEP_STREAMING_LOGS = Duration.standardSeconds(5000);
/**
* Read data from Snowflake.
@@ -182,6 +203,7 @@
public static <T> Read<T> read(SnowflakeService snowflakeService) {
return new AutoValue_SnowflakeIO_Read.Builder<T>()
.setSnowflakeService(snowflakeService)
+ .setQuotationMark(CSV_QUOTE_CHAR)
.build();
}
@@ -191,7 +213,7 @@
* @param <T> Type of the data to be read.
*/
public static <T> Read<T> read() {
- return read(new SnowflakeServiceImpl());
+ return read(new SnowflakeBatchServiceImpl());
}
/**
@@ -226,6 +248,10 @@
.setFileNameTemplate("output")
.setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.setWriteDisposition(WriteDisposition.APPEND)
+ .setFlushTimeLimit(DEFAULT_FLUSH_TIME_LIMIT)
+ .setShardsNumber(DEFAULT_BATCH_SHARDS_NUMBER)
+ .setFlushRowLimit(DEFAULT_FLUSH_ROW_LIMIT)
+ .setQuotationMark(CSV_QUOTE_CHAR)
.build();
}
@@ -249,6 +275,9 @@
abstract @Nullable SnowflakeService getSnowflakeService();
+ @Nullable
+ abstract String getQuotationMark();
+
abstract Builder<T> toBuilder();
@AutoValue.Builder
@@ -270,13 +299,15 @@
abstract Builder<T> setSnowflakeService(SnowflakeService snowflakeService);
+ abstract Builder<T> setQuotationMark(String quotationMark);
+
abstract Read<T> build();
}
/**
* Setting information about Snowflake server.
*
- * @param config - An instance of {@link DataSourceConfiguration}.
+ * @param config An instance of {@link DataSourceConfiguration}.
*/
public Read<T> withDataSourceConfiguration(final DataSourceConfiguration config) {
return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
@@ -295,7 +326,7 @@
/**
* A query to be executed in Snowflake.
*
- * @param query - String with query.
+ * @param query String with query.
*/
public Read<T> fromQuery(String query) {
return toBuilder().setQuery(query).build();
@@ -304,7 +335,7 @@
/**
* A table name to be read in Snowflake.
*
- * @param table - String with the name of the table.
+ * @param table String with the name of the table.
*/
public Read<T> fromTable(String table) {
return toBuilder().setTable(table).build();
@@ -313,9 +344,12 @@
/**
* Name of the cloud bucket (GCS by now) to use as tmp location of CSVs during COPY statement.
*
- * @param stagingBucketName - String with the name of the bucket.
+ * @param stagingBucketName String with the name of the bucket.
*/
public Read<T> withStagingBucketName(String stagingBucketName) {
+ checkArgument(
+ stagingBucketName.endsWith("/"),
+ "stagingBucketName must be a cloud storage path ending with /");
return toBuilder().setStagingBucketName(stagingBucketName).build();
}
@@ -324,7 +358,7 @@
* https://docs.snowflake.com/en/sql-reference/sql/create-storage-integration.html for
* reference.
*
- * @param integrationName - String with the name of the Storage Integration.
+ * @param integrationName String with the name of the Storage Integration.
*/
public Read<T> withStorageIntegrationName(String integrationName) {
return toBuilder().setStorageIntegrationName(integrationName).build();
@@ -333,7 +367,7 @@
/**
* User-defined function mapping CSV lines into user data.
*
- * @param csvMapper - an instance of {@link CsvMapper}.
+ * @param csvMapper an instance of {@link CsvMapper}.
*/
public Read<T> withCsvMapper(CsvMapper<T> csvMapper) {
return toBuilder().setCsvMapper(csvMapper).build();
@@ -342,21 +376,29 @@
/**
* A Coder to be used by the output PCollection generated by the source.
*
- * @param coder - an instance of {@link Coder}.
+ * @param coder an instance of {@link Coder}.
*/
public Read<T> withCoder(Coder<T> coder) {
return toBuilder().setCoder(coder).build();
}
+ /**
+ * Sets Snowflake-specific quotations around strings.
+ *
+ * @param quotationMark with possible single quote {@code '}, double quote {@code "} or nothing.
+ * Default value is single quotation {@code '}.
+ * @return
+ */
+ public Read<T> withQuotationMark(String quotationMark) {
+ return toBuilder().setQuotationMark(quotationMark).build();
+ }
+
@Override
public PCollection<T> expand(PBegin input) {
checkArguments();
- String tmpDirName = makeTmpDirName();
- String stagingBucketDir = String.format("%s/%s/", getStagingBucketName(), tmpDirName);
-
PCollection<Void> emptyCollection = input.apply(Create.of((Void) null));
-
+ String tmpDirName = makeTmpDirName();
PCollection<T> output =
emptyCollection
.apply(
@@ -366,20 +408,22 @@
getQuery(),
getTable(),
getStorageIntegrationName(),
- stagingBucketDir,
- getSnowflakeService())))
+ getStagingBucketName(),
+ tmpDirName,
+ getSnowflakeService(),
+ getQuotationMark())))
.apply(Reshuffle.viaRandomKey())
.apply(FileIO.matchAll())
.apply(FileIO.readMatches())
.apply(readFiles())
- .apply(ParDo.of(new MapCsvToStringArrayFn()))
+ .apply(ParDo.of(new MapCsvToStringArrayFn(getQuotationMark())))
.apply(ParDo.of(new MapStringArrayToUserDataFn<>(getCsvMapper())));
output.setCoder(getCoder());
emptyCollection
.apply(Wait.on(output))
- .apply(ParDo.of(new CleanTmpFilesFromGcsFn(stagingBucketDir)));
+ .apply(ParDo.of(new CleanTmpFilesFromGcsFn(getStagingBucketName(), tmpDirName)));
return output;
}
@@ -387,8 +431,9 @@
// Either table or query is required. If query is present, it's being used, table is used
// otherwise
- checkArgument(getStorageIntegrationName() != null, "withStorageIntegrationName is required");
- checkArgument(getStagingBucketName() != null, "withStagingBucketName is required");
+ checkArgument(
+ getStorageIntegrationName() != null, "withStorageIntegrationName() is required");
+ checkArgument(getStagingBucketName() != null, "withStagingBucketName() is required");
checkArgument(
getQuery() != null || getTable() != null, "fromTable() or fromQuery() is required");
@@ -414,9 +459,13 @@
private final SerializableFunction<Void, DataSource> dataSourceProviderFn;
private final String query;
private final String table;
+ private final String database;
+ private final String schema;
+ private final String tmpDirName;
private final String storageIntegrationName;
private final String stagingBucketDir;
private final SnowflakeService snowflakeService;
+ private final String quotationMark;
private CopyIntoStageFn(
SerializableFunction<Void, DataSource> dataSourceProviderFn,
@@ -424,23 +473,44 @@
String table,
String storageIntegrationName,
String stagingBucketDir,
- SnowflakeService snowflakeService) {
+ String tmpDirName,
+ SnowflakeService snowflakeService,
+ String quotationMark) {
this.dataSourceProviderFn = dataSourceProviderFn;
this.query = query;
this.table = table;
this.storageIntegrationName = storageIntegrationName;
- this.stagingBucketDir =
- String.format(
- "%s/run_%s/", stagingBucketDir, UUID.randomUUID().toString().subSequence(0, 8));
this.snowflakeService = snowflakeService;
+ this.quotationMark = quotationMark;
+ this.stagingBucketDir = stagingBucketDir;
+ this.tmpDirName = tmpDirName;
+ DataSourceProviderFromDataSourceConfiguration
+ dataSourceProviderFromDataSourceConfiguration =
+ (DataSourceProviderFromDataSourceConfiguration) this.dataSourceProviderFn;
+ DataSourceConfiguration config = dataSourceProviderFromDataSourceConfiguration.getConfig();
+
+ this.database = config.getDatabase();
+ this.schema = config.getSchema();
}
@ProcessElement
public void processElement(ProcessContext context) throws Exception {
- SnowflakeServiceConfig config =
- new SnowflakeServiceConfig(
- dataSourceProviderFn, table, query, storageIntegrationName, stagingBucketDir);
+ String stagingBucketRunDir =
+ String.format(
+ "%s/%s/run_%s/",
+ stagingBucketDir, tmpDirName, UUID.randomUUID().toString().subSequence(0, 8));
+
+ SnowflakeBatchServiceConfig config =
+ new SnowflakeBatchServiceConfig(
+ dataSourceProviderFn,
+ database,
+ schema,
+ table,
+ query,
+ storageIntegrationName,
+ stagingBucketRunDir,
+ quotationMark);
String output = snowflakeService.read(config);
@@ -448,11 +518,21 @@
}
}
+ /**
+ * Parses {@code String} from incoming data in {@link PCollection} to have proper format for CSV
+ * files.
+ */
public static class MapCsvToStringArrayFn extends DoFn<String, String[]> {
+ private String quoteChar;
+
+ public MapCsvToStringArrayFn(String quoteChar) {
+ this.quoteChar = quoteChar;
+ }
+
@ProcessElement
public void processElement(ProcessContext c) throws IOException {
String csvLine = c.element();
- CSVParser parser = new CSVParserBuilder().withQuoteChar(CSV_QUOTE_CHAR.charAt(0)).build();
+ CSVParser parser = new CSVParserBuilder().withQuoteChar(quoteChar.charAt(0)).build();
String[] parts = parser.parseLine(csvLine);
c.output(parts);
}
@@ -471,16 +551,25 @@
}
}
+ /** Removes temporary staged files after reading. */
public static class CleanTmpFilesFromGcsFn extends DoFn<Object, Object> {
private final String stagingBucketDir;
+ private final String tmpDirName;
- public CleanTmpFilesFromGcsFn(String stagingBucketDir) {
+ /**
+ * Created object that will remove temp files from stage.
+ *
+ * @param stagingBucketDir bucket and directory where temporary files are saved
+ * @param tmpDirName temporary directory created on bucket where files were saved
+ */
+ public CleanTmpFilesFromGcsFn(String stagingBucketDir, String tmpDirName) {
this.stagingBucketDir = stagingBucketDir;
+ this.tmpDirName = tmpDirName;
}
@ProcessElement
public void processElement(ProcessContext c) throws IOException {
- String combinedPath = stagingBucketDir + "/**";
+ String combinedPath = String.format("%s/%s/**", stagingBucketDir, tmpDirName);
List<ResourceId> paths =
FileSystems.match(combinedPath).metadata().stream()
.map(metadata -> metadata.resourceId())
@@ -521,10 +610,18 @@
abstract @Nullable String getStagingBucketName();
- abstract @Nullable String getQuery();
+ abstract @Nullable ValueProvider<String> getSnowPipe();
+
+ abstract @Nullable Integer getFlushRowLimit();
+
+ abstract @Nullable Integer getShardsNumber();
+
+ abstract @Nullable Duration getFlushTimeLimit();
abstract @Nullable String getFileNameTemplate();
+ abstract @Nullable String getQuery();
+
abstract @Nullable WriteDisposition getWriteDisposition();
abstract @Nullable CreateDisposition getCreateDisposition();
@@ -535,6 +632,12 @@
abstract @Nullable SnowflakeService getSnowflakeService();
+ @Nullable
+ abstract String getQuotationMark();
+
+ @Nullable
+ abstract StreamingLogLevel getDebugMode();
+
abstract Builder<T> toBuilder();
@AutoValue.Builder
@@ -550,6 +653,14 @@
abstract Builder<T> setQuery(String query);
+ abstract Builder<T> setSnowPipe(ValueProvider<String> snowPipe);
+
+ abstract Builder<T> setFlushRowLimit(Integer rowsCount);
+
+ abstract Builder<T> setShardsNumber(Integer shardsNumber);
+
+ abstract Builder<T> setFlushTimeLimit(Duration triggeringFrequency);
+
abstract Builder<T> setFileNameTemplate(String fileNameTemplate);
abstract Builder<T> setUserDataMapper(UserDataMapper userDataMapper);
@@ -562,13 +673,17 @@
abstract Builder<T> setSnowflakeService(SnowflakeService snowflakeService);
+ abstract Builder<T> setQuotationMark(String quotationMark);
+
+ abstract Builder<T> setDebugMode(StreamingLogLevel debugLevel);
+
abstract Write<T> build();
}
/**
* Setting information about Snowflake server.
*
- * @param config - An instance of {@link DataSourceConfiguration}.
+ * @param config An instance of {@link DataSourceConfiguration}.
*/
public Write<T> withDataSourceConfiguration(final DataSourceConfiguration config) {
return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
@@ -587,7 +702,7 @@
/**
* A table name to be written in Snowflake.
*
- * @param table - String with the name of the table.
+ * @param table String with the name of the table.
*/
public Write<T> to(String table) {
return toBuilder().setTable(table).build();
@@ -596,9 +711,12 @@
/**
* Name of the cloud bucket (GCS by now) to use as tmp location of CSVs during COPY statement.
*
- * @param stagingBucketName - String with the name of the bucket.
+ * @param stagingBucketName String with the name of the bucket.
*/
public Write<T> withStagingBucketName(String stagingBucketName) {
+ checkArgument(
+ stagingBucketName.endsWith("/"),
+ "stagingBucketName must be a cloud storage path ending with /");
return toBuilder().setStagingBucketName(stagingBucketName).build();
}
@@ -607,7 +725,7 @@
* https://docs.snowflake.com/en/sql-reference/sql/create-storage-integration.html for
* reference.
*
- * @param integrationName - String with the name of the Storage Integration.
+ * @param integrationName String with the name of the Storage Integration.
*/
public Write<T> withStorageIntegrationName(String integrationName) {
return toBuilder().setStorageIntegrationName(integrationName).build();
@@ -616,7 +734,7 @@
/**
* A query to be executed in Snowflake.
*
- * @param query - String with query.
+ * @param query String with query.
*/
public Write<T> withQueryTransformation(String query) {
return toBuilder().setQuery(query).build();
@@ -625,7 +743,7 @@
/**
* A template name for files saved to GCP.
*
- * @param fileNameTemplate - String with template name for files.
+ * @param fileNameTemplate String with template name for files.
*/
public Write<T> withFileNameTemplate(String fileNameTemplate) {
return toBuilder().setFileNameTemplate(fileNameTemplate).build();
@@ -634,16 +752,96 @@
/**
* User-defined function mapping user data into CSV lines.
*
- * @param userDataMapper - an instance of {@link UserDataMapper}.
+ * @param userDataMapper an instance of {@link UserDataMapper}.
*/
public Write<T> withUserDataMapper(UserDataMapper userDataMapper) {
return toBuilder().setUserDataMapper(userDataMapper).build();
}
/**
+ * Sets duration how often staged files will be created and then how often ingested by Snowflake
+ * during streaming.
+ *
+ * @param triggeringFrequency time for triggering frequency in {@link Duration} type.
+ * @return
+ */
+ public Write<T> withFlushTimeLimit(Duration triggeringFrequency) {
+ return toBuilder().setFlushTimeLimit(triggeringFrequency).build();
+ }
+
+ /**
+ * Sets name of <a
+ * href="https://docs.snowflake.com/en/user-guide/data-load-snowpipe-intro.html">SnowPipe</a>
+ * which can be created in Snowflake dashboard or cli:
+ *
+ * <pre>{@code
+ * CREATE snowPipeName AS COPY INTO your_table from @yourstage;
+ * }</pre>
+ *
+ * <p>The stage in <a
+ * href="https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html">COPY</a>
+ * statement should be pointing to the cloud <a
+ * href="https://docs.snowflake.com/en/sql-reference/sql/create-storage-integration.html">integration</a>
+ * with the valid bucket url, ex. for GCS:
+ *
+ * <pre>{@code
+ * CREATE STAGE yourstage
+ * URL = 'gcs://yourbucket/path/'
+ * STORAGE_INTEGRATION = your_integration;
+ * }</pre>
+ *
+ * <pre>{@code
+ * CREATE STORAGE INTEGRATION your_integration
+ * TYPE = EXTERNAL_STAGE
+ * STORAGE_PROVIDER = GCS
+ * ENABLED = TRUE
+ * STORAGE_ALLOWED_LOCATIONS = ('gcs://yourbucket/path/')
+ * }</pre>
+ *
+ * @param snowPipe name of created SnowPipe in Snowflake dashboard.
+ * @return
+ */
+ public Write<T> withSnowPipe(String snowPipe) {
+ return toBuilder().setSnowPipe(ValueProvider.StaticValueProvider.of(snowPipe)).build();
+ }
+
+ /**
+ * Same as {@code withSnowPipe(String}, but with a {@link ValueProvider}.
+ *
+ * @param snowPipe name of created SnowPipe in Snowflake dashboard.
+ * @return
+ */
+ public Write<T> withSnowPipe(ValueProvider<String> snowPipe) {
+ return toBuilder().setSnowPipe(snowPipe).build();
+ }
+
+ /**
+ * Number of shards that are created per window.
+ *
+ * @param shardsNumber defined number of shards or 1 by default.
+ * @return
+ */
+ public Write<T> withShardsNumber(Integer shardsNumber) {
+ return toBuilder().setShardsNumber(shardsNumber).build();
+ }
+
+ /**
+ * Sets number of row limit that will be saved to the staged file and then loaded to Snowflake.
+ * If the number of rows will be lower than the limit it will be loaded with current number of
+ * rows after certain time specified by setting {@code withFlushTimeLimit(Duration
+ * triggeringFrequency)}
+ *
+ * @param rowsCount Number of rows that will be in one file staged for loading. Default: 10000.
+ * @return
+ */
+ public Write<T> withFlushRowLimit(Integer rowsCount) {
+ return toBuilder().setFlushRowLimit(rowsCount).build();
+ }
+
+ /**
* A disposition to be used during writing to table phase.
*
- * @param writeDisposition - an instance of {@link WriteDisposition}.
+ * @param writeDisposition an instance of {@link WriteDisposition}.
*/
public Write<T> withWriteDisposition(WriteDisposition writeDisposition) {
return toBuilder().setWriteDisposition(writeDisposition).build();
@@ -668,28 +866,56 @@
}
/**
- * A snowflake service which is supposed to be used. Note: Currently we have {@link
- * SnowflakeServiceImpl} with corresponding {@link FakeSnowflakeServiceImpl} used for testing.
+ * A snowflake service {@link SnowflakeService} implementation which is supposed to be used.
*
- * @param snowflakeService - an instance of {@link SnowflakeService}.
+ * @param snowflakeService an instance of {@link SnowflakeService}.
*/
public Write<T> withSnowflakeService(SnowflakeService snowflakeService) {
return toBuilder().setSnowflakeService(snowflakeService).build();
}
+ /**
+ * Sets Snowflake-specific quotations around strings.
+ *
+ * @param quotationMark with possible single quote {@code '}, double quote {@code "} or nothing.
+ * Default value is single quotation {@code '}.
+ * @return
+ */
+ public Write<T> withQuotationMark(String quotationMark) {
+ return toBuilder().setQuotationMark(quotationMark).build();
+ }
+
+ /**
+ * The option to verbose info (or only errors) of loaded files while streaming. It is not set by
+ * default because it may influence performance. For details: <a
+ * href="https://docs.snowflake.com/en/user-guide/data-load-snowpipe-rest-apis.html#endpoint-insertreport">insert
+ * report REST API.</a>
+ *
+ * @param debugLevel error or info debug level from enum {@link StreamingLogLevel}
+ * @return
+ */
+ public Write<T> withDebugMode(StreamingLogLevel debugLevel) {
+ return toBuilder().setDebugMode(debugLevel).build();
+ }
+
@Override
public PDone expand(PCollection<T> input) {
- checkArguments();
+ checkArguments(input);
- String stagingBucketDir = String.format("%s/%s/", getStagingBucketName(), WRITE_TMP_PATH);
+ PCollection out;
- PCollection<String> out = write(input, stagingBucketDir);
+ if (getSnowPipe() != null) {
+ out = writeStream(input, getStagingBucketName());
+ } else {
+ out = writeBatch(input, getStagingBucketName());
+ }
+
out.setCoder(StringUtf8Coder.of());
return PDone.in(out.getPipeline());
}
- private void checkArguments() {
+ private void checkArguments(PCollection<T> input) {
checkArgument(getStagingBucketName() != null, "withStagingBucketName is required");
checkArgument(getUserDataMapper() != null, "withUserDataMapper() is required");
@@ -698,14 +924,65 @@
(getDataSourceProviderFn() != null),
"withDataSourceConfiguration() or withDataSourceProviderFn() is required");
- checkArgument(getTable() != null, "to() is required");
+ if (input.isBounded() == PCollection.IsBounded.UNBOUNDED) {
+ checkArgument(
+ getSnowPipe() != null,
+ "in streaming (unbounded) write it is required to specify SnowPipe name via withSnowPipe() method.");
+ } else {
+ checkArgument(
+ getTable() != null,
+ "in batch writing it is required to specify destination table name via to() method.");
+ }
}
- private PCollection<String> write(PCollection<T> input, String stagingBucketDir) {
+ private PCollection<T> writeStream(PCollection<T> input, String stagingBucketDir) {
SnowflakeService snowflakeService =
- getSnowflakeService() != null ? getSnowflakeService() : new SnowflakeServiceImpl();
+ getSnowflakeService() != null
+ ? getSnowflakeService()
+ : new SnowflakeStreamingServiceImpl();
- PCollection<String> files = writeFiles(input, stagingBucketDir);
+ /* Ensure that files will be created after specific record count or duration specified */
+ PCollection<T> inputInGlobalWindow =
+ input.apply(
+ "Rewindow Into Global",
+ Window.<T>into(new GlobalWindows())
+ .triggering(
+ Repeatedly.forever(
+ AfterFirst.of(
+ AfterProcessingTime.pastFirstElementInPane()
+ .plusDelayOf(getFlushTimeLimit()),
+ AfterPane.elementCountAtLeast(getFlushRowLimit()))))
+ .discardingFiredPanes());
+
+ int shards = (getShardsNumber() > 0) ? getShardsNumber() : DEFAULT_STREAMING_SHARDS_NUMBER;
+ PCollection files = writeFiles(inputInGlobalWindow, stagingBucketDir, shards);
+
+ /* Ensuring that files will be ingested after flush time */
+ files =
+ (PCollection)
+ files.apply(
+ "Apply User Trigger",
+ Window.<T>into(new GlobalWindows())
+ .triggering(
+ Repeatedly.forever(
+ AfterProcessingTime.pastFirstElementInPane()
+ .plusDelayOf(getFlushTimeLimit())))
+ .discardingFiredPanes());
+ files =
+ (PCollection)
+ files.apply(
+ "Create list of files for loading via SnowPipe",
+ Combine.globally(new Concatenate()).withoutDefaults());
+
+ return (PCollection)
+ files.apply("Stream files to table", streamToTable(snowflakeService, stagingBucketDir));
+ }
+
+ private PCollection writeBatch(PCollection input, String stagingBucketDir) {
+ SnowflakeService snowflakeService =
+ getSnowflakeService() != null ? getSnowflakeService() : new SnowflakeBatchServiceImpl();
+
+ PCollection<String> files = writeBatchFiles(input, stagingBucketDir);
// Combining PCollection of files as a side input into one list of files
ListCoder<String> coder = ListCoder.of(StringUtf8Coder.of());
@@ -721,7 +998,12 @@
files.apply("Copy files to table", copyToTable(snowflakeService, stagingBucketDir));
}
- private PCollection<String> writeFiles(PCollection<T> input, String stagingBucketDir) {
+ private PCollection writeBatchFiles(PCollection<T> input, String outputDirectory) {
+ return writeFiles(input, outputDirectory, DEFAULT_BATCH_SHARDS_NUMBER);
+ }
+
+ private PCollection<String> writeFiles(
+ PCollection<T> input, String stagingBucketDir, int numShards) {
PCollection<String> mappedUserData =
input
@@ -733,7 +1015,9 @@
return getUserDataMapper().mapRow(element);
}
}))
- .apply("Map Objects array to CSV lines", ParDo.of(new MapObjectsArrayToCsvFn()))
+ .apply(
+ "Map Objects array to CSV lines",
+ ParDo.of(new MapObjectsArrayToCsvFn(getQuotationMark())))
.setCoder(StringUtf8Coder.of());
WriteFilesResult filesResult =
@@ -742,8 +1026,9 @@
FileIO.<String>write()
.via(TextIO.sink())
.to(stagingBucketDir)
- .withPrefix(getFileNameTemplate())
+ .withPrefix(UUID.randomUUID().toString().subSequence(0, 8).toString())
.withSuffix(".csv")
+ .withNumShards(numShards)
.withCompression(Compression.GZIP));
return (PCollection)
@@ -764,10 +1049,25 @@
getCreateDisposition(),
getWriteDisposition(),
getTableSchema(),
+ snowflakeService,
+ getQuotationMark()));
+ }
+
+ protected PTransform streamToTable(SnowflakeService snowflakeService, String stagingBucketDir) {
+ return ParDo.of(
+ new StreamToTableFn(
+ getDataSourceProviderFn(),
+ getSnowPipe(),
+ stagingBucketDir,
+ getDebugMode(),
snowflakeService));
}
}
+ /**
+ * Combines list of {@code String} to provide one {@code String} with paths where files were
+ * staged for write.
+ */
public static class Concatenate extends Combine.CombineFn<String, List<String>, List<String>> {
@Override
public List<String> createAccumulator() {
@@ -801,6 +1101,11 @@
* <p>Adds Snowflake-specific quotations around strings.
*/
private static class MapObjectsArrayToCsvFn extends DoFn<Object[], String> {
+ private String quotationMark;
+
+ public MapObjectsArrayToCsvFn(String quotationMark) {
+ this.quotationMark = quotationMark;
+ }
@ProcessElement
public void processElement(ProcessContext context) {
@@ -820,7 +1125,7 @@
}
private String quoteField(String field) {
- return quoteField(field, CSV_QUOTE_CHAR);
+ return quoteField(field, this.quotationMark);
}
private String quoteField(String field, String quotation) {
@@ -830,11 +1135,14 @@
private static class CopyToTableFn<ParameterT, OutputT> extends DoFn<ParameterT, OutputT> {
private final SerializableFunction<Void, DataSource> dataSourceProviderFn;
+ private final String database;
+ private final String schema;
private final String table;
private final String query;
private final SnowflakeTableSchema tableSchema;
private final String stagingBucketDir;
private final String storageIntegrationName;
+ private final String quotationMark;
private final WriteDisposition writeDisposition;
private final CreateDisposition createDisposition;
private final SnowflakeService snowflakeService;
@@ -848,35 +1156,156 @@
CreateDisposition createDisposition,
WriteDisposition writeDisposition,
SnowflakeTableSchema tableSchema,
- SnowflakeService snowflakeService) {
+ SnowflakeService snowflakeService,
+ String quotationMark) {
this.dataSourceProviderFn = dataSourceProviderFn;
- this.table = table;
this.query = query;
+ this.table = table;
this.stagingBucketDir = stagingBucketDir;
this.storageIntegrationName = storageIntegrationName;
this.writeDisposition = writeDisposition;
this.createDisposition = createDisposition;
this.tableSchema = tableSchema;
this.snowflakeService = snowflakeService;
+ this.quotationMark = quotationMark;
+
+ DataSourceProviderFromDataSourceConfiguration dataSourceProviderFromDataSourceConfiguration =
+ (DataSourceProviderFromDataSourceConfiguration) this.dataSourceProviderFn;
+ DataSourceConfiguration config = dataSourceProviderFromDataSourceConfiguration.getConfig();
+
+ this.database = config.getDatabase();
+ this.schema = config.getSchema();
}
@ProcessElement
public void processElement(ProcessContext context) throws Exception {
- SnowflakeServiceConfig config =
- new SnowflakeServiceConfig(
+ SnowflakeBatchServiceConfig config =
+ new SnowflakeBatchServiceConfig(
dataSourceProviderFn,
(List<String>) context.element(),
+ database,
+ schema,
table,
query,
tableSchema,
createDisposition,
writeDisposition,
storageIntegrationName,
- stagingBucketDir);
+ stagingBucketDir,
+ quotationMark);
snowflakeService.write(config);
}
}
+ /** Custom DoFn that streams data to Snowflake table. */
+ private static class StreamToTableFn<ParameterT, OutputT> extends DoFn<ParameterT, OutputT> {
+ private final SerializableFunction<Void, DataSource> dataSourceProviderFn;
+ private final String stagingBucketDir;
+ private final ValueProvider<String> snowPipe;
+ private final StreamingLogLevel debugMode;
+ private final SnowflakeService snowflakeService;
+ private transient SimpleIngestManager ingestManager;
+
+ private transient DataSource dataSource;
+ ArrayList<String> trackedFilesNames;
+
+ StreamToTableFn(
+ SerializableFunction<Void, DataSource> dataSourceProviderFn,
+ ValueProvider<String> snowPipe,
+ String stagingBucketDir,
+ StreamingLogLevel debugMode,
+ SnowflakeService snowflakeService) {
+ this.dataSourceProviderFn = dataSourceProviderFn;
+ this.stagingBucketDir = stagingBucketDir;
+ this.snowPipe = snowPipe;
+ this.debugMode = debugMode;
+ this.snowflakeService = snowflakeService;
+ trackedFilesNames = new ArrayList<>();
+ }
+
+ @Setup
+ public void setup() throws Exception {
+ dataSource = dataSourceProviderFn.apply(null);
+
+ DataSourceProviderFromDataSourceConfiguration dataSourceProviderFromDataSourceConfiguration =
+ (DataSourceProviderFromDataSourceConfiguration) this.dataSourceProviderFn;
+ DataSourceConfiguration config = dataSourceProviderFromDataSourceConfiguration.getConfig();
+
+ checkArgument(config.getPrivateKey() != null, "KeyPair is required for authentication");
+
+ String hostName = config.getServerName();
+ List<String> path = Splitter.on('.').splitToList(hostName);
+ String account = path.get(0);
+ String username = config.getUsername();
+ PrivateKey privateKey = config.getPrivateKey();
+ String schema = config.getSchema();
+ String database = config.getDatabase();
+ String snowPipeName = String.format("%s.%s.%s", database, schema, snowPipe.get());
+
+ this.ingestManager =
+ new SimpleIngestManager(
+ account, username, snowPipeName, privateKey, "https", hostName, 443);
+ }
+
+ @ProcessElement
+ public void processElement(ProcessContext context) throws Exception {
+ List<String> filesList = (List<String>) context.element();
+
+ if (debugMode != null) {
+ trackedFilesNames.addAll(filesList);
+ }
+ SnowflakeStreamingServiceConfig config =
+ new SnowflakeStreamingServiceConfig(filesList, this.stagingBucketDir, this.ingestManager);
+ snowflakeService.write(config);
+ }
+
+ @FinishBundle
+ public void finishBundle() throws Exception {
+ if (debugMode != null) {
+ String beginMark = null;
+ Duration currentSleep = Duration.ZERO;
+
+ while (currentSleep.isShorterThan(DEFAULT_STREAMING_LOGS_MAX_SLEEP)
+ && trackedFilesNames.size() > 0) {
+ Thread.sleep(DEFAULT_SLEEP_STREAMING_LOGS.getMillis());
+ currentSleep = currentSleep.plus(DEFAULT_SLEEP_STREAMING_LOGS);
+ HistoryResponse response = ingestManager.getHistory(null, null, beginMark);
+
+ if (response != null && response.getNextBeginMark() != null) {
+ beginMark = response.getNextBeginMark();
+ }
+ if (response != null && response.files != null) {
+ response.files.forEach(
+ entry -> {
+ if (entry.getPath() != null && entry.isComplete()) {
+ String responseFileName =
+ String.format("'%s%s'", entry.getStageLocation(), entry.getPath())
+ .toLowerCase()
+ .replace("gcs://", "gs://");
+ if (trackedFilesNames.contains(responseFileName)) {
+ trackedFilesNames.remove(responseFileName);
+
+ if (entry.getErrorsSeen() > 0) {
+ LOG.error(String.format("Snowflake SnowPipe ERROR: %s", entry.toString()));
+ } else if (entry.getErrorsSeen() == 0
+ && debugMode.equals(StreamingLogLevel.INFO)) {
+ LOG.info(String.format("Snowflake SnowPipe INFO: %s", entry.toString()));
+ }
+ }
+ }
+ });
+ }
+ }
+ trackedFilesNames.forEach(
+ file -> LOG.info(String.format("File %s was not found in ingest history", file)));
+ }
+ }
+ }
+
+ private static String getValueOrNull(ValueProvider<String> valueProvider) {
+ return valueProvider != null ? valueProvider.get() : null;
+ }
+
/**
* A POJO describing a {@link DataSource}, providing all properties allowing to create a {@link
* DataSource}.
@@ -954,7 +1383,7 @@
/**
* Creates {@link DataSourceConfiguration} from existing instance of {@link DataSource}.
*
- * @param dataSource - an instance of {@link DataSource}.
+ * @param dataSource an instance of {@link DataSource}.
*/
public static DataSourceConfiguration create(DataSource dataSource) {
checkArgument(dataSource instanceof Serializable, "dataSource must be Serializable");
@@ -967,7 +1396,7 @@
/**
* Creates {@link DataSourceConfiguration} from instance of {@link SnowflakeCredentials}.
*
- * @param credentials - an instance of {@link SnowflakeCredentials}.
+ * @param credentials an instance of {@link SnowflakeCredentials}.
*/
public static DataSourceConfiguration create(SnowflakeCredentials credentials) {
if (credentials instanceof UsernamePasswordSnowflakeCredentials) {
@@ -998,7 +1427,7 @@
*
* <p>Either withUrl or withServerName is required.
*
- * @param url - String with URL of the Snowflake server.
+ * @param url String with URL of the Snowflake server.
*/
public DataSourceConfiguration withUrl(String url) {
checkArgument(
@@ -1013,7 +1442,7 @@
/**
* Sets database to use.
*
- * @param database - String with database name.
+ * @param database String with database name.
*/
public DataSourceConfiguration withDatabase(String database) {
return builder().setDatabase(database).build();
@@ -1022,7 +1451,7 @@
/**
* Sets Snowflake Warehouse to use.
*
- * @param warehouse - String with warehouse name.
+ * @param warehouse String with warehouse name.
*/
public DataSourceConfiguration withWarehouse(String warehouse) {
return builder().setWarehouse(warehouse).build();
@@ -1031,7 +1460,7 @@
/**
* Sets schema to use when connecting to Snowflake.
*
- * @param schema - String with schema name.
+ * @param schema String with schema name.
*/
public DataSourceConfiguration withSchema(String schema) {
return builder().setSchema(schema).build();
@@ -1043,7 +1472,7 @@
*
* <p>Either withServerName or withUrl is required.
*
- * @param serverName - String with server name.
+ * @param serverName String with server name.
*/
public DataSourceConfiguration withServerName(String serverName) {
checkArgument(
@@ -1055,7 +1484,7 @@
/**
* Sets port number to use to connect to Snowflake.
*
- * @param portNumber - Integer with port number.
+ * @param portNumber Integer with port number.
*/
public DataSourceConfiguration withPortNumber(Integer portNumber) {
return builder().setPortNumber(portNumber).build();
@@ -1064,16 +1493,16 @@
/**
* Sets user's role to be used when running queries on Snowflake.
*
- * @param role - String with role name.
+ * @param role String with role name.
*/
public DataSourceConfiguration withRole(String role) {
return builder().setRole(role).build();
}
/**
- * Sets loginTimeout that will be used in {@link SnowflakeBasicDataSource:setLoginTimeout}.
+ * Sets loginTimeout that will be used in {@link SnowflakeBasicDataSource#setLoginTimeout}.
*
- * @param loginTimeout - Integer with timeout value.
+ * @param loginTimeout Integer with timeout value.
*/
public DataSourceConfiguration withLoginTimeout(Integer loginTimeout) {
return builder().setLoginTimeout(loginTimeout).build();
@@ -1156,6 +1585,7 @@
}
}
+ /** Wraps {@link DataSourceConfiguration} to provide DataSource. */
public static class DataSourceProviderFromDataSourceConfiguration
implements SerializableFunction<Void, DataSource>, HasDisplayData {
private static final ConcurrentHashMap<DataSourceConfiguration, DataSource> instances =
@@ -1188,5 +1618,9 @@
public void populateDisplayData(DisplayData.Builder builder) {
config.populateDisplayData(builder);
}
+
+ public DataSourceConfiguration getConfig() {
+ return this.config;
+ }
}
}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakePipelineOptions.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakePipelineOptions.java
index bf91e0c..201f71c 100644
--- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakePipelineOptions.java
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakePipelineOptions.java
@@ -20,9 +20,11 @@
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.StreamingOptions;
import org.apache.beam.sdk.options.Validation;
+import org.apache.beam.sdk.options.ValueProvider;
-public interface SnowflakePipelineOptions extends PipelineOptions {
+public interface SnowflakePipelineOptions extends PipelineOptions, StreamingOptions {
String BASIC_CONNECTION_INFO_VALIDATION_GROUP = "BASIC_CONNECTION_INFO_GROUP";
String AUTH_VALIDATION_GROUP = "AUTH_VALIDATION_GROUP";
@@ -120,4 +122,9 @@
String getStorageIntegrationName();
void setStorageIntegrationName(String storageIntegrationName);
+
+ @Description("SnowPipe name. Optional.")
+ ValueProvider<String> getSnowPipe();
+
+ void setSnowPipe(ValueProvider<String> snowPipe);
}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/credentials/SnowflakeCredentialsFactory.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/credentials/SnowflakeCredentialsFactory.java
index 3876c2f..2b45dc1 100644
--- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/credentials/SnowflakeCredentialsFactory.java
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/credentials/SnowflakeCredentialsFactory.java
@@ -18,38 +18,52 @@
package org.apache.beam.sdk.io.snowflake.credentials;
import org.apache.beam.sdk.io.snowflake.SnowflakePipelineOptions;
+import org.apache.beam.sdk.io.snowflake.crosslanguage.SnowflakeReadRegistrar;
/**
* Factory class for creating implementations of {@link SnowflakeCredentials} from {@link
* SnowflakePipelineOptions}.
*/
public class SnowflakeCredentialsFactory {
- public static SnowflakeCredentials of(SnowflakePipelineOptions options) {
- if (oauthOptionsAvailable(options)) {
- return new OAuthTokenSnowflakeCredentials(options.getOauthToken());
- } else if (usernamePasswordOptionsAvailable(options)) {
- return new UsernamePasswordSnowflakeCredentials(options.getUsername(), options.getPassword());
- } else if (keyPairOptionsAvailable(options)) {
+ public static SnowflakeCredentials of(SnowflakePipelineOptions o) {
+ if (oauthOptionsAvailable(o.getOauthToken())) {
+ return new OAuthTokenSnowflakeCredentials(o.getOauthToken());
+ } else if (usernamePasswordOptionsAvailable(o.getUsername(), o.getPassword())) {
+ return new UsernamePasswordSnowflakeCredentials(o.getUsername(), o.getPassword());
+ } else if (keyPairOptionsAvailable(
+ o.getUsername(), o.getPrivateKeyPath(), o.getPrivateKeyPassphrase())) {
return new KeyPairSnowflakeCredentials(
- options.getUsername(), options.getPrivateKeyPath(), options.getPrivateKeyPassphrase());
+ o.getUsername(), o.getPrivateKeyPath(), o.getPrivateKeyPassphrase());
}
throw new RuntimeException("Can't get credentials from Options");
}
- private static boolean oauthOptionsAvailable(SnowflakePipelineOptions options) {
- return options.getOauthToken() != null && !options.getOauthToken().isEmpty();
+ public static SnowflakeCredentials of(SnowflakeReadRegistrar.ReadConfiguration c) {
+ if (oauthOptionsAvailable(c.getOAuthToken())) {
+ return new OAuthTokenSnowflakeCredentials(c.getOAuthToken());
+ } else if (usernamePasswordOptionsAvailable(c.getUsername(), c.getPassword())) {
+ return new UsernamePasswordSnowflakeCredentials(c.getUsername(), c.getPassword());
+ } else if (keyPairOptionsAvailable(
+ c.getUsername(), c.getPrivateKeyPath(), c.getPrivateKeyPassphrase())) {
+ return new KeyPairSnowflakeCredentials(
+ c.getUsername(), c.getPrivateKeyPath(), c.getPrivateKeyPassphrase());
+ }
+ throw new RuntimeException("Can't get credentials from Options");
}
- private static boolean usernamePasswordOptionsAvailable(SnowflakePipelineOptions options) {
- return options.getUsername() != null
- && !options.getUsername().isEmpty()
- && !options.getPassword().isEmpty();
+ private static boolean oauthOptionsAvailable(String token) {
+ return token != null && !token.isEmpty();
}
- private static boolean keyPairOptionsAvailable(SnowflakePipelineOptions options) {
- return options.getUsername() != null
- && !options.getUsername().isEmpty()
- && !options.getPrivateKeyPath().isEmpty()
- && !options.getPrivateKeyPassphrase().isEmpty();
+ private static boolean usernamePasswordOptionsAvailable(String username, String password) {
+ return username != null && !username.isEmpty() && !password.isEmpty();
+ }
+
+ private static boolean keyPairOptionsAvailable(
+ String username, String privateKeyPath, String privateKeyPassphrase) {
+ return username != null
+ && !username.isEmpty()
+ && !privateKeyPath.isEmpty()
+ && !privateKeyPassphrase.isEmpty();
}
}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/Configuration.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/Configuration.java
new file mode 100644
index 0000000..38162ae
--- /dev/null
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/Configuration.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.snowflake.crosslanguage;
+
+/** Parameters abstract class to expose the transforms to an external SDK. */
+public abstract class Configuration {
+ private String serverName;
+ private String username;
+ private String password;
+ private String privateKeyPath;
+ private String privateKeyPassphrase;
+ private String oAuthToken;
+ private String database;
+ private String schema;
+ private String table;
+ private String query;
+ private String stagingBucketName;
+ private String storageIntegrationName;
+
+ public String getServerName() {
+ return serverName;
+ }
+
+ public void setServerName(String serverName) {
+ this.serverName = serverName;
+ }
+
+ public String getUsername() {
+ return username;
+ }
+
+ public void setUsername(String username) {
+ this.username = username;
+ }
+
+ public String getPassword() {
+ return password;
+ }
+
+ public void setPassword(String password) {
+ this.password = password;
+ }
+
+ public String getPrivateKeyPath() {
+ return privateKeyPath;
+ }
+
+ public void setPrivateKeyPath(String privateKeyPath) {
+ this.privateKeyPath = privateKeyPath;
+ }
+
+ public String getPrivateKeyPassphrase() {
+ return privateKeyPassphrase;
+ }
+
+ public void setPrivateKeyPassphrase(String privateKeyPassphrase) {
+ this.privateKeyPassphrase = privateKeyPassphrase;
+ }
+
+ public String getOAuthToken() {
+ return oAuthToken;
+ }
+
+ public void setOAuthToken(String oAuthToken) {
+ this.oAuthToken = oAuthToken;
+ }
+
+ public String getDatabase() {
+ return database;
+ }
+
+ public void setDatabase(String database) {
+ this.database = database;
+ }
+
+ public String getSchema() {
+ return schema;
+ }
+
+ public void setSchema(String schema) {
+ this.schema = schema;
+ }
+
+ public String getTable() {
+ return table;
+ }
+
+ public void setTable(String table) {
+ this.table = table;
+ }
+
+ public String getQuery() {
+ return query;
+ }
+
+ public void setQuery(String query) {
+ this.query = query;
+ }
+
+ public String getStagingBucketName() {
+ return stagingBucketName;
+ }
+
+ public void setStagingBucketName(String stagingBucketName) {
+ this.stagingBucketName = stagingBucketName;
+ }
+
+ public String getStorageIntegrationName() {
+ return storageIntegrationName;
+ }
+
+ public void setStorageIntegrationName(String storageIntegrationName) {
+ this.storageIntegrationName = storageIntegrationName;
+ }
+}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/SnowflakeReadRegistrar.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/SnowflakeReadRegistrar.java
new file mode 100644
index 0000000..1e7be0f
--- /dev/null
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/SnowflakeReadRegistrar.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.snowflake.crosslanguage;
+
+import com.google.auto.service.AutoService;
+import java.io.Serializable;
+import java.nio.charset.Charset;
+import java.util.Map;
+import javax.sql.DataSource;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.coders.ByteArrayCoder;
+import org.apache.beam.sdk.expansion.ExternalTransformRegistrar;
+import org.apache.beam.sdk.io.snowflake.SnowflakeIO;
+import org.apache.beam.sdk.io.snowflake.credentials.SnowflakeCredentials;
+import org.apache.beam.sdk.io.snowflake.credentials.SnowflakeCredentialsFactory;
+import org.apache.beam.sdk.transforms.ExternalTransformBuilder;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
+
+/** Exposes {@link SnowflakeIO.Read} as an external transform for cross-language usage. */
+@Experimental
+@AutoService(ExternalTransformRegistrar.class)
+public final class SnowflakeReadRegistrar implements ExternalTransformRegistrar {
+
+ public static final String URN = "beam:external:java:snowflake:read:v1";
+
+ @Override
+ public Map<String, Class<? extends ExternalTransformBuilder<?, ?, ?>>> knownBuilders() {
+ return ImmutableMap.of(URN, ReadBuilder.class);
+ }
+
+ /** Parameters class to expose the transform to an external SDK. */
+ public static class ReadConfiguration extends Configuration {}
+
+ public static class ReadBuilder
+ implements ExternalTransformBuilder<ReadConfiguration, PBegin, PCollection<byte[]>> {
+ public ReadBuilder() {}
+
+ @Override
+ public PTransform<PBegin, PCollection<byte[]>> buildExternal(ReadConfiguration c) {
+ SnowflakeCredentials credentials = SnowflakeCredentialsFactory.of(c);
+
+ SerializableFunction<Void, DataSource> dataSourceSerializableFunction =
+ SnowflakeIO.DataSourceProviderFromDataSourceConfiguration.of(
+ SnowflakeIO.DataSourceConfiguration.create(credentials)
+ .withServerName(c.getServerName())
+ .withDatabase(c.getDatabase())
+ .withSchema(c.getSchema()));
+
+ return SnowflakeIO.<byte[]>read()
+ .withStorageIntegrationName(c.getStorageIntegrationName())
+ .withStagingBucketName(c.getStagingBucketName())
+ .withDataSourceProviderFn(dataSourceSerializableFunction)
+ .withCsvMapper(CsvMapper.getCsvMapper())
+ .withCoder(ByteArrayCoder.of())
+ .fromTable(c.getTable())
+ .fromQuery(c.getQuery());
+ }
+ }
+
+ private static class CsvMapper implements Serializable {
+
+ public static SnowflakeIO.CsvMapper getCsvMapper() {
+ return (SnowflakeIO.CsvMapper<byte[]>)
+ parts -> {
+ String partsCSV = String.join(",", parts);
+
+ return partsCSV.getBytes(Charset.defaultCharset());
+ };
+ }
+ }
+}
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/package-info.java
similarity index 63%
copy from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
copy to sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/package-info.java
index 2aa89c2..7e24ee9 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/crosslanguage/package-info.java
@@ -16,21 +16,12 @@
* limitations under the License.
*/
-import PrecommitJobBuilder
+/** Cross-language for SnowflakeIO. */
+@Experimental(Kind.PORTABILITY)
+@DefaultAnnotation(NonNull.class)
+package org.apache.beam.sdk.io.snowflake.crosslanguage;
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
-}
+import edu.umd.cs.findbugs.annotations.DefaultAnnotation;
+import edu.umd.cs.findbugs.annotations.NonNull;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Experimental.Kind;
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/enums/StreamingLogLevel.java
similarity index 63%
copy from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
copy to sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/enums/StreamingLogLevel.java
index 2aa89c2..f547b07 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/enums/StreamingLogLevel.java
@@ -15,22 +15,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.beam.sdk.io.snowflake.enums;
-import PrecommitJobBuilder
-
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
- }
+public enum StreamingLogLevel {
+ INFO,
+ ERROR
}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/ServiceConfig.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/ServiceConfig.java
index 09e1368..1826ce9 100644
--- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/ServiceConfig.java
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/ServiceConfig.java
@@ -17,4 +17,8 @@
*/
package org.apache.beam.sdk.io.snowflake.services;
+/**
+ * Configuration abstract class for {@link SnowflakeService} that gives parameters for write and
+ * read (batch and streaming).
+ */
public abstract class ServiceConfig {}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeServiceConfig.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeBatchServiceConfig.java
similarity index 60%
rename from sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeServiceConfig.java
rename to sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeBatchServiceConfig.java
index fc68a00..726e9d7 100644
--- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeServiceConfig.java
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeBatchServiceConfig.java
@@ -24,81 +24,120 @@
import org.apache.beam.sdk.io.snowflake.enums.WriteDisposition;
import org.apache.beam.sdk.transforms.SerializableFunction;
-public class SnowflakeServiceConfig extends ServiceConfig {
- private SerializableFunction<Void, DataSource> dataSourceProviderFn;
+/** Class for preparing configuration for batch write and read. */
+public class SnowflakeBatchServiceConfig extends ServiceConfig {
+ private final SerializableFunction<Void, DataSource> dataSourceProviderFn;
- private String table;
- private String query;
- private String storageIntegrationName;
+ private final String database;
+ private final String schema;
+ private final String table;
+ private final String query;
+ private final String storageIntegrationName;
private List<String> filesList;
-
private WriteDisposition writeDisposition;
private CreateDisposition createDisposition;
private SnowflakeTableSchema tableSchema;
- private String stagingBucketDir;
+ private final String stagingBucketDir;
+ private final String quotationMark;
- public SnowflakeServiceConfig(
+ /** Creating a batch configuration for reading. */
+ public SnowflakeBatchServiceConfig(
SerializableFunction<Void, DataSource> dataSourceProviderFn,
+ String database,
+ String schema,
String table,
String query,
- String storageIntegration,
- String stagingBucketDir) {
+ String storageIntegrationName,
+ String stagingBucketDir,
+ String quotationMark) {
this.dataSourceProviderFn = dataSourceProviderFn;
+ this.database = database;
+ this.schema = schema;
this.table = table;
this.query = query;
- this.storageIntegrationName = storageIntegration;
+ this.storageIntegrationName = storageIntegrationName;
this.stagingBucketDir = stagingBucketDir;
+ this.quotationMark = quotationMark;
}
- public SnowflakeServiceConfig(
+ /** Creating a batch configuration for writing. */
+ public SnowflakeBatchServiceConfig(
SerializableFunction<Void, DataSource> dataSourceProviderFn,
List<String> filesList,
+ String database,
+ String schema,
String table,
String query,
SnowflakeTableSchema tableSchema,
CreateDisposition createDisposition,
WriteDisposition writeDisposition,
String storageIntegrationName,
- String stagingBucketDir) {
+ String stagingBucketDir,
+ String quotationMark) {
this.dataSourceProviderFn = dataSourceProviderFn;
this.filesList = filesList;
+ this.database = database;
+ this.schema = schema;
this.table = table;
this.query = query;
- this.tableSchema = tableSchema;
this.writeDisposition = writeDisposition;
this.createDisposition = createDisposition;
+ this.tableSchema = tableSchema;
this.storageIntegrationName = storageIntegrationName;
this.stagingBucketDir = stagingBucketDir;
+ this.quotationMark = quotationMark;
}
+ /** Getting a DataSource provider function for connection credentials. */
public SerializableFunction<Void, DataSource> getDataSourceProviderFn() {
return dataSourceProviderFn;
}
+ /** Getting a table as a source of reading or destination to write. */
public String getTable() {
return table;
}
+ /** Getting a query which can be source for reading. */
public String getQuery() {
return query;
}
+ /** Getting Snowflake integration which is used in COPY statement. */
public String getStorageIntegrationName() {
return storageIntegrationName;
}
+ /** Getting directory where files are staged. */
public String getStagingBucketDir() {
return stagingBucketDir;
}
+ /** Getting list of names of staged files. */
public List<String> getFilesList() {
return filesList;
}
+ /** Getting disposition how write data to table, see: {@link WriteDisposition}. */
public WriteDisposition getWriteDisposition() {
return writeDisposition;
}
+ /** Getting a character that will surround {@code String} in staged CSV files. */
+ public String getQuotationMark() {
+ return quotationMark;
+ }
+
+ /** Getting a Snowflake database. */
+ public String getDatabase() {
+ return database;
+ }
+
+ /** Getting a schema of a Snowflake table. */
+ public String getSchema() {
+ return schema;
+ }
+
public CreateDisposition getCreateDisposition() {
return createDisposition;
}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeServiceImpl.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeBatchServiceImpl.java
similarity index 79%
rename from sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeServiceImpl.java
rename to sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeBatchServiceImpl.java
index 36e9f3b..2b73bd5 100644
--- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeServiceImpl.java
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeBatchServiceImpl.java
@@ -19,6 +19,8 @@
import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+import java.math.BigInteger;
+import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
@@ -35,23 +37,36 @@
import org.slf4j.LoggerFactory;
/** Implemenation of {@link SnowflakeService} used in production. */
-public class SnowflakeServiceImpl implements SnowflakeService<SnowflakeServiceConfig> {
- private static final Logger LOG = LoggerFactory.getLogger(SnowflakeServiceImpl.class);
+public class SnowflakeBatchServiceImpl implements SnowflakeService<SnowflakeBatchServiceConfig> {
+ private static final Logger LOG = LoggerFactory.getLogger(SnowflakeBatchServiceImpl.class);
private static final String SNOWFLAKE_GCS_PREFIX = "gcs://";
private static final String GCS_PREFIX = "gs://";
+ /** Writing data to Snowflake in batch mode. */
@Override
- public void write(SnowflakeServiceConfig config) throws Exception {
+ public void write(SnowflakeBatchServiceConfig config) throws Exception {
copyToTable(config);
}
+ /** Reading data from Snowflake tables in batch processing. */
@Override
- public String read(SnowflakeServiceConfig config) throws Exception {
+ public String read(SnowflakeBatchServiceConfig config) throws Exception {
return copyIntoStage(config);
}
- public String copyIntoStage(SnowflakeServiceConfig config) throws SQLException {
+ /**
+ * Copies data from specified table to stage (bucket and directory). Uses Snowflake's <a
+ * href="https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html">COPY method</a>.
+ * All the details needed for COPY are inside passed configuration.
+ *
+ * @param config object with configuration to perform COPY query.
+ * @return destination where files were copied into
+ * @throws SQLException in case COPY query failed.
+ */
+ private String copyIntoStage(SnowflakeBatchServiceConfig config) throws SQLException {
SerializableFunction<Void, DataSource> dataSourceProviderFn = config.getDataSourceProviderFn();
+ String database = config.getDatabase();
+ String schema = config.getSchema();
String table = config.getTable();
String query = config.getQuery();
String storageIntegrationName = config.getStorageIntegrationName();
@@ -62,7 +77,7 @@
// Query must be surrounded with brackets
source = String.format("(%s)", query);
} else {
- source = table;
+ source = getTablePath(database, schema, table);
}
String copyQuery =
@@ -71,17 +86,31 @@
getProperBucketDir(stagingBucketDir),
source,
storageIntegrationName,
- CSV_QUOTE_CHAR_FOR_COPY);
+ getASCIICharRepresentation(config.getQuotationMark()));
runStatement(copyQuery, getConnection(dataSourceProviderFn), null);
return stagingBucketDir.concat("*");
}
- public void copyToTable(SnowflakeServiceConfig config) throws SQLException {
+ private String getASCIICharRepresentation(String input) {
+ return String.format("0x%x", new BigInteger(1, input.getBytes(Charset.defaultCharset())));
+ }
+
+ /**
+ * Copies staged data from bucket directory to table. Uses Snowflake's <a
+ * href="https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html">COPY method</a>.
+ * All the details needed for COPY are inside passed configuration.
+ *
+ * @param config object with configuration to perform COPY query.
+ * @throws SQLException
+ */
+ private void copyToTable(SnowflakeBatchServiceConfig config) throws SQLException {
SerializableFunction<Void, DataSource> dataSourceProviderFn = config.getDataSourceProviderFn();
List<String> filesList = config.getFilesList();
+ String database = config.getDatabase();
+ String schema = config.getSchema();
String table = config.getTable();
String query = config.getQuery();
SnowflakeTableSchema tableSchema = config.getTableSchema();
@@ -110,28 +139,29 @@
query =
String.format(
"COPY INTO %s FROM %s FILES=(%s) FILE_FORMAT=(TYPE=CSV FIELD_OPTIONALLY_ENCLOSED_BY='%s' COMPRESSION=GZIP) STORAGE_INTEGRATION=%s;",
- table,
+ getTablePath(database, schema, table),
getProperBucketDir(source),
files,
- CSV_QUOTE_CHAR_FOR_COPY,
+ getASCIICharRepresentation(config.getQuotationMark()),
storageIntegrationName);
} else {
query =
String.format(
"COPY INTO %s FROM %s FILES=(%s) FILE_FORMAT=(TYPE=CSV FIELD_OPTIONALLY_ENCLOSED_BY='%s' COMPRESSION=GZIP);",
- table, source, files, CSV_QUOTE_CHAR_FOR_COPY);
+ table, source, files, getASCIICharRepresentation(config.getQuotationMark()));
}
runStatement(query, dataSource.getConnection(), null);
}
- private void truncateTable(DataSource dataSource, String table) throws SQLException {
- String query = String.format("TRUNCATE %s;", table);
+ private void truncateTable(DataSource dataSource, String tablePath) throws SQLException {
+ String query = String.format("TRUNCATE %s;", tablePath);
runConnectionWithStatement(dataSource, query, null);
}
- private static void checkIfTableIsEmpty(DataSource dataSource, String table) throws SQLException {
- String selectQuery = String.format("SELECT count(*) FROM %s LIMIT 1;", table);
+ private static void checkIfTableIsEmpty(DataSource dataSource, String tablePath)
+ throws SQLException {
+ String selectQuery = String.format("SELECT count(*) FROM %s LIMIT 1;", tablePath);
runConnectionWithStatement(
dataSource,
selectQuery,
@@ -275,4 +305,8 @@
}
return bucketDir;
}
+
+ private String getTablePath(String database, String schema, String table) {
+ return String.format("%s.%s.%s", database, schema, table);
+ }
}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeService.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeService.java
index 16cd3c6..192856e 100644
--- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeService.java
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeService.java
@@ -21,8 +21,6 @@
/** Interface which defines common methods for interacting with Snowflake. */
public interface SnowflakeService<T extends ServiceConfig> extends Serializable {
- String CSV_QUOTE_CHAR_FOR_COPY = "''";
-
String read(T config) throws Exception;
void write(T config) throws Exception;
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeStreamingServiceConfig.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeStreamingServiceConfig.java
new file mode 100644
index 0000000..7039c89
--- /dev/null
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeStreamingServiceConfig.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.snowflake.services;
+
+import java.util.List;
+import net.snowflake.ingest.SimpleIngestManager;
+
+/** Class for preparing configuration for streaming write. */
+public class SnowflakeStreamingServiceConfig extends ServiceConfig {
+ private final SimpleIngestManager ingestManager;
+ private final List<String> filesList;
+ private final String stagingBucketDir;
+
+ /**
+ * Constructor to create configuration for streaming write.
+ *
+ * @param filesList list of strings of staged files' names.
+ * @param stagingBucketDir name of a bucket and directory inside where files are staged and awaits
+ * for being loaded to Snowflake.
+ * @param ingestManager instance of {@link SimpleIngestManager}.
+ */
+ public SnowflakeStreamingServiceConfig(
+ List<String> filesList, String stagingBucketDir, SimpleIngestManager ingestManager) {
+ this.filesList = filesList;
+ this.stagingBucketDir = stagingBucketDir;
+ this.ingestManager = ingestManager;
+ }
+
+ /**
+ * Getter for ingest manager which serves API to load data in streaming mode and retrieve a report
+ * about loaded data.
+ *
+ * @return instance of {@link SimpleIngestManager}.
+ */
+ public SimpleIngestManager getIngestManager() {
+ return ingestManager;
+ }
+
+ /**
+ * Getter for a list of staged files which are will be loaded to Snowflake.
+ *
+ * @return list of strings of staged files' names.
+ */
+ public List<String> getFilesList() {
+ return filesList;
+ }
+
+ /**
+ * Getter for a bucket name with directory where files were staged and waiting for loading.
+ *
+ * @return name of a bucket and directory inside in form {@code gs://mybucket/dir/}
+ */
+ public String getStagingBucketDir() {
+ return stagingBucketDir;
+ }
+}
diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeStreamingServiceImpl.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeStreamingServiceImpl.java
new file mode 100644
index 0000000..8d555ed
--- /dev/null
+++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/services/SnowflakeStreamingServiceImpl.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.snowflake.services;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import net.snowflake.ingest.SimpleIngestManager;
+import net.snowflake.ingest.connection.IngestResponseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Implemenation of {@link SnowflakeService} used in production. */
+public class SnowflakeStreamingServiceImpl
+ implements SnowflakeService<SnowflakeStreamingServiceConfig> {
+ private static final Logger LOG = LoggerFactory.getLogger(SnowflakeStreamingServiceImpl.class);
+ private transient SimpleIngestManager ingestManager;
+
+ /** Writing data to Snowflake in streaming mode. */
+ @Override
+ public void write(SnowflakeStreamingServiceConfig config) throws Exception {
+ ingest(config);
+ }
+
+ /** Reading data from Snowflake in streaming mode is not supported. */
+ @Override
+ public String read(SnowflakeStreamingServiceConfig config) throws Exception {
+ throw new UnsupportedOperationException("Not supported by SnowflakeIO.");
+ }
+
+ /**
+ * SnowPipe is processing files from stage in streaming mode.
+ *
+ * @param config configuration object containing parameters for writing files to Snowflake
+ * @throws IngestResponseException REST API response error
+ * @throws IOException Snowflake problem while streaming
+ * @throws URISyntaxException creating request error
+ */
+ private void ingest(SnowflakeStreamingServiceConfig config)
+ throws IngestResponseException, IOException, URISyntaxException {
+ List<String> filesList = config.getFilesList();
+ String stagingBucketDir = config.getStagingBucketDir();
+ ingestManager = config.getIngestManager();
+
+ Set<String> files =
+ filesList.stream()
+ .map(e -> e.replaceAll(String.valueOf(stagingBucketDir), ""))
+ .map(e -> e.replaceAll("'", ""))
+ .collect(Collectors.toSet());
+
+ if (!files.isEmpty()) {
+ this.ingestManager.ingestFiles(SimpleIngestManager.wrapFilepaths(files), null);
+ }
+ }
+}
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeServiceImpl.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeBatchServiceImpl.java
similarity index 89%
rename from sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeServiceImpl.java
rename to sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeBatchServiceImpl.java
index 7ff097d..90ee4b9 100644
--- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeServiceImpl.java
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeBatchServiceImpl.java
@@ -28,23 +28,24 @@
import org.apache.beam.sdk.io.snowflake.data.SnowflakeTableSchema;
import org.apache.beam.sdk.io.snowflake.enums.CreateDisposition;
import org.apache.beam.sdk.io.snowflake.enums.WriteDisposition;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeBatchServiceConfig;
import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
-import org.apache.beam.sdk.io.snowflake.services.SnowflakeServiceConfig;
/** Fake implementation of {@link SnowflakeService} used in tests. */
-public class FakeSnowflakeServiceImpl implements SnowflakeService<SnowflakeServiceConfig> {
+public class FakeSnowflakeBatchServiceImpl
+ implements SnowflakeService<SnowflakeBatchServiceConfig> {
@Override
- public void write(SnowflakeServiceConfig config) throws Exception {
+ public void write(SnowflakeBatchServiceConfig config) throws Exception {
copyToTable(config);
}
@Override
- public String read(SnowflakeServiceConfig config) throws Exception {
+ public String read(SnowflakeBatchServiceConfig config) throws Exception {
return copyIntoStage(config);
}
- public String copyIntoStage(SnowflakeServiceConfig config) throws SQLException {
+ public String copyIntoStage(SnowflakeBatchServiceConfig config) throws SQLException {
String table = config.getTable();
String query = config.getQuery();
@@ -60,7 +61,7 @@
return String.format("./%s/*", stagingBucketDir);
}
- public void copyToTable(SnowflakeServiceConfig config) throws SQLException {
+ public void copyToTable(SnowflakeBatchServiceConfig config) throws SQLException {
List<String> filesList = config.getFilesList();
String table = config.getTable();
SnowflakeTableSchema tableSchema = config.getTableSchema();
diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeIngestManager.java
similarity index 64%
rename from .test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
rename to sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeIngestManager.java
index 2aa89c2..e144ae7 100644
--- a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL.groovy
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeIngestManager.java
@@ -15,22 +15,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.beam.sdk.io.snowflake.test;
-import PrecommitJobBuilder
+import java.util.List;
-PrecommitJobBuilder builder = new PrecommitJobBuilder(
- scope: this,
- nameBase: 'JavaBeamZetaSQL',
- gradleTask: ':javaPreCommitBeamZetaSQL',
- gradleSwitches: [
- '-PdisableSpotlessCheck=true'
- ], // spotless checked in separate pre-commit
- triggerPathPatterns: [
- '^sdks/java/extensions/sql/.*$',
- ]
- )
-builder.build {
- publishers {
- archiveJunit('**/build/test-results/**/*.xml')
+public class FakeSnowflakeIngestManager {
+ // Only for testing purposes
+ private String table = "TEST_TABLE";
+
+ public FakeSnowflakeIngestManager() {}
+
+ public void ingestFiles(List<String> rows) {
+ FakeSnowflakeDatabase.createTableWithElements(this.table, rows);
+ }
+
+ public String getTable() {
+ return this.table;
}
}
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeStreamingServiceImpl.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeStreamingServiceImpl.java
new file mode 100644
index 0000000..362eb5e
--- /dev/null
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/FakeSnowflakeStreamingServiceImpl.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.snowflake.test;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeStreamingServiceConfig;
+
+/** Fake implementation of {@link SnowflakeService} used in tests. */
+public class FakeSnowflakeStreamingServiceImpl
+ implements SnowflakeService<SnowflakeStreamingServiceConfig> {
+ private FakeSnowflakeIngestManager snowflakeIngestManager;
+
+ @Override
+ public void write(SnowflakeStreamingServiceConfig config) throws Exception {
+ snowflakeIngestManager = new FakeSnowflakeIngestManager();
+ ingest(config);
+ }
+
+ @Override
+ public String read(SnowflakeStreamingServiceConfig config) throws Exception {
+ throw new UnsupportedOperationException("Streaming read is not supported in SnowflakeIO.");
+ }
+
+ public void ingest(SnowflakeStreamingServiceConfig config) {
+ List<String> rows = new ArrayList<>();
+ List<String> filesList = config.getFilesList();
+ for (String file : filesList) {
+ rows.addAll(readGZIPFile(file.replace("'", "")));
+ }
+
+ snowflakeIngestManager.ingestFiles(rows);
+ }
+
+ private List<String> readGZIPFile(String file) {
+ List<String> lines = new ArrayList<>();
+ try {
+ GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(file));
+ BufferedReader br = new BufferedReader(new InputStreamReader(gzip, Charset.defaultCharset()));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ lines.add(line);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to read file", e);
+ }
+
+ return lines;
+ }
+}
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/TestUtils.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/TestUtils.java
index 05c80c5..41eac70 100644
--- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/TestUtils.java
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/TestUtils.java
@@ -138,6 +138,10 @@
return (SnowflakeIO.UserDataMapper<String[]>) recordLine -> recordLine;
}
+ public static SnowflakeIO.UserDataMapper<String> getStringCsvMapper() {
+ return (SnowflakeIO.UserDataMapper<String>) recordLine -> new String[] {recordLine};
+ }
+
public static class ParseToKv extends DoFn<Long, KV<String, Long>> {
@ProcessElement
public void processElement(ProcessContext c) {
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java
index 6016a66..b844760 100644
--- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java
@@ -28,13 +28,15 @@
import org.apache.beam.sdk.io.snowflake.SnowflakeIO;
import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBasicDataSource;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBatchServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeDatabase;
-import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeServiceImpl;
+import org.apache.beam.sdk.io.snowflake.test.TestUtils;
import org.apache.beam.sdk.io.snowflake.test.unit.TestPipelineOptions;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
@@ -46,6 +48,7 @@
public class SnowflakeIOReadTest implements Serializable {
public static final String FAKE_TABLE = "FAKE_TABLE";
public static final String FAKE_QUERY = "SELECT * FROM FAKE_TABLE";
+ public static final String BUCKET_NAME = "BUCKET/";
private static final TestPipelineOptions options =
TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);;
@@ -71,19 +74,24 @@
options.setServerName("NULL.snowflakecomputing.com");
options.setStorageIntegrationName("STORAGE_INTEGRATION");
- options.setStagingBucketName("BUCKET");
+ options.setStagingBucketName(BUCKET_NAME);
dataSourceConfiguration =
SnowflakeIO.DataSourceConfiguration.create(new FakeSnowflakeBasicDataSource())
.withServerName(options.getServerName());
- snowflakeService = new FakeSnowflakeServiceImpl();
+ snowflakeService = new FakeSnowflakeBatchServiceImpl();
+ }
+
+ @AfterClass
+ public static void tearDown() {
+ TestUtils.removeTempDir(BUCKET_NAME);
}
@Test
public void testConfigIsMissingStagingBucketName() {
thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("withStagingBucketName is required");
+ thrown.expectMessage("withStagingBucketName() is required");
pipeline.apply(
SnowflakeIO.<GenericRecord>read(snowflakeService)
@@ -99,7 +107,7 @@
@Test
public void testConfigIsMissingStorageIntegration() {
thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("withStorageIntegrationName is required");
+ thrown.expectMessage("withStorageIntegrationName() is required");
pipeline.apply(
SnowflakeIO.<GenericRecord>read(snowflakeService)
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/CreateDispositionTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/CreateDispositionTest.java
index 4b0f728..2dcd88b 100644
--- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/CreateDispositionTest.java
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/CreateDispositionTest.java
@@ -32,8 +32,8 @@
import org.apache.beam.sdk.io.snowflake.enums.CreateDisposition;
import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBasicDataSource;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBatchServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeDatabase;
-import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.TestUtils;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.testing.TestPipeline;
@@ -73,7 +73,7 @@
stagingBucketName = options.getStagingBucketName();
storageIntegrationName = options.getStorageIntegrationName();
- snowflakeService = new FakeSnowflakeServiceImpl();
+ snowflakeService = new FakeSnowflakeBatchServiceImpl();
testData = LongStream.range(0, 100).boxed().collect(Collectors.toList());
dc =
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/QueryDispositionLocationTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/QueryDispositionLocationTest.java
index 2825c25..5c06dfe 100644
--- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/QueryDispositionLocationTest.java
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/QueryDispositionLocationTest.java
@@ -29,8 +29,8 @@
import org.apache.beam.sdk.io.snowflake.enums.WriteDisposition;
import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBasicDataSource;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBatchServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeDatabase;
-import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.TestUtils;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.testing.TestPipeline;
@@ -47,7 +47,7 @@
@RunWith(JUnit4.class)
public class QueryDispositionLocationTest {
private static final String FAKE_TABLE = "FAKE_TABLE";
- private static final String BUCKET_NAME = "BUCKET";
+ private static final String BUCKET_NAME = "BUCKET/";
@Rule public final transient TestPipeline pipeline = TestPipeline.create();
@Rule public ExpectedException exceptionRule = ExpectedException.none();
@@ -63,7 +63,7 @@
PipelineOptionsFactory.register(SnowflakePipelineOptions.class);
options = TestPipeline.testingPipelineOptions().as(SnowflakePipelineOptions.class);
- snowflakeService = new FakeSnowflakeServiceImpl();
+ snowflakeService = new FakeSnowflakeBatchServiceImpl();
testData = LongStream.range(0, 100).boxed().collect(Collectors.toList());
}
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SchemaDispositionTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SchemaDispositionTest.java
index ac0af04..fe8e98e 100644
--- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SchemaDispositionTest.java
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SchemaDispositionTest.java
@@ -38,8 +38,8 @@
import org.apache.beam.sdk.io.snowflake.enums.CreateDisposition;
import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBasicDataSource;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBatchServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeDatabase;
-import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.TestUtils;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.testing.TestPipeline;
@@ -78,7 +78,7 @@
stagingBucketName = options.getStagingBucketName();
storageIntegrationName = options.getStorageIntegrationName();
- snowflakeService = new FakeSnowflakeServiceImpl();
+ snowflakeService = new FakeSnowflakeBatchServiceImpl();
dc =
SnowflakeIO.DataSourceConfiguration.create(new FakeSnowflakeBasicDataSource())
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java
index e73760b..95798c5 100644
--- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java
@@ -20,6 +20,7 @@
import static org.junit.Assert.assertTrue;
import java.sql.SQLException;
+import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
@@ -28,8 +29,8 @@
import org.apache.beam.sdk.io.snowflake.SnowflakePipelineOptions;
import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBasicDataSource;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBatchServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeDatabase;
-import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeServiceImpl;
import org.apache.beam.sdk.io.snowflake.test.TestUtils;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.testing.TestPipeline;
@@ -48,7 +49,7 @@
@RunWith(JUnit4.class)
public class SnowflakeIOWriteTest {
private static final String FAKE_TABLE = "FAKE_TABLE";
- private static final String BUCKET_NAME = "BUCKET";
+ private static final String BUCKET_NAME = "BUCKET/";
@Rule public final transient TestPipeline pipeline = TestPipeline.create();
@@ -59,11 +60,21 @@
private static SnowflakeService snowflakeService;
private static List<Long> testData;
+ private static List<String> testDataInStrings;
@BeforeClass
public static void setupAll() {
- snowflakeService = new FakeSnowflakeServiceImpl();
+ snowflakeService = new FakeSnowflakeBatchServiceImpl();
testData = LongStream.range(0, 100).boxed().collect(Collectors.toList());
+
+ testDataInStrings = new ArrayList<>();
+ testDataInStrings.add("First row");
+ testDataInStrings.add("Second row with 'single' quotation");
+ testDataInStrings.add("Second row with single one ' quotation");
+ testDataInStrings.add("Second row with single twice '' quotation");
+ testDataInStrings.add("Third row with \"double\" quotation");
+ testDataInStrings.add("Third row with double one \" quotation");
+ testDataInStrings.add("Third row with double twice \"\" quotation");
}
@Before
@@ -147,7 +158,7 @@
List<String> actualData = FakeSnowflakeDatabase.getElements(FAKE_TABLE);
List<String> testDataInStrings =
- testData.stream().map(e -> e.toString()).collect(Collectors.toList());
+ testData.stream().map(Object::toString).collect(Collectors.toList());
assertTrue(TestUtils.areListsEqual(testDataInStrings, actualData));
}
@@ -174,4 +185,55 @@
assertTrue(TestUtils.areListsEqual(testData, actualData));
}
+
+ @Test
+ public void writeToExternalWithDoubleQuotation() throws SnowflakeSQLException {
+
+ pipeline
+ .apply(Create.of(testDataInStrings))
+ .apply(
+ "Write SnowflakeIO",
+ SnowflakeIO.<String>write()
+ .withDataSourceConfiguration(dc)
+ .withUserDataMapper(TestUtils.getStringCsvMapper())
+ .to(FAKE_TABLE)
+ .withStagingBucketName(options.getStagingBucketName())
+ .withStorageIntegrationName(options.getStorageIntegrationName())
+ .withSnowflakeService(snowflakeService)
+ .withQuotationMark("\""));
+
+ pipeline.run(options).waitUntilFinish();
+
+ List<String> actualData = FakeSnowflakeDatabase.getElements(FAKE_TABLE);
+ List<String> escapedTestData =
+ testDataInStrings.stream()
+ .map(e -> e.replace("'", "''"))
+ .map(e -> String.format("\"%s\"", e))
+ .collect(Collectors.toList());
+ assertTrue(TestUtils.areListsEqual(escapedTestData, actualData));
+ }
+
+ @Test
+ public void writeToExternalWithBlankQuotation() throws SnowflakeSQLException {
+ pipeline
+ .apply(Create.of(testDataInStrings))
+ .apply(
+ "Write SnowflakeIO",
+ SnowflakeIO.<String>write()
+ .withDataSourceConfiguration(dc)
+ .withUserDataMapper(TestUtils.getStringCsvMapper())
+ .to(FAKE_TABLE)
+ .withStagingBucketName(options.getStagingBucketName())
+ .withStorageIntegrationName(options.getStorageIntegrationName())
+ .withSnowflakeService(snowflakeService)
+ .withQuotationMark(""));
+
+ pipeline.run(options).waitUntilFinish();
+
+ List<String> actualData = FakeSnowflakeDatabase.getElements(FAKE_TABLE);
+
+ List<String> escapedTestData =
+ testDataInStrings.stream().map(e -> e.replace("'", "''")).collect(Collectors.toList());
+ assertTrue(TestUtils.areListsEqual(escapedTestData, actualData));
+ }
}
diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/StreamingWriteTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/StreamingWriteTest.java
new file mode 100644
index 0000000..3f8ffe5
--- /dev/null
+++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/StreamingWriteTest.java
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.snowflake.test.unit.write;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.LongStream;
+import net.snowflake.client.jdbc.SnowflakeSQLException;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.snowflake.SnowflakeIO;
+import org.apache.beam.sdk.io.snowflake.SnowflakePipelineOptions;
+import org.apache.beam.sdk.io.snowflake.credentials.SnowflakeCredentialsFactory;
+import org.apache.beam.sdk.io.snowflake.services.SnowflakeService;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeBasicDataSource;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeDatabase;
+import org.apache.beam.sdk.io.snowflake.test.FakeSnowflakeStreamingServiceImpl;
+import org.apache.beam.sdk.io.snowflake.test.TestUtils;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.testing.TestStream;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.hamcrest.MatcherAssert;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(JUnit4.class)
+public class StreamingWriteTest {
+ private static final Logger LOG = LoggerFactory.getLogger(StreamingWriteTest.class);
+ private static final String FAKE_TABLE = "TEST_TABLE";
+ private static final String STAGING_BUCKET_NAME = "BUCKET/";
+ private static final String STORAGE_INTEGRATION_NAME = "STORAGE_INTEGRATION";
+ private static final String SNOW_PIPE = "Snowpipe";
+ private static final Instant START_TIME = new Instant(0);
+
+ @Rule public final transient TestPipeline pipeline = TestPipeline.create();
+
+ @Rule public ExpectedException exceptionRule = ExpectedException.none();
+ private static SnowflakeIO.DataSourceConfiguration dataSourceConfiguration;
+ private static SnowflakeService snowflakeService;
+ private static SnowflakePipelineOptions options;
+ private static List<Long> testData;
+
+ private static final List<String> SENTENCES =
+ Arrays.asList(
+ "Snowflake window 1 1",
+ "Snowflake window 1 2",
+ "Snowflake window 1 3",
+ "Snowflake window 1 4",
+ "Snowflake window 2 1",
+ "Snowflake window 2 2");
+
+ private static final List<String> FIRST_WIN_WORDS = SENTENCES.subList(0, 4);
+ private static final List<String> SECOND_WIN_WORDS = SENTENCES.subList(4, 6);
+ private static final Duration WINDOW_DURATION = Duration.standardMinutes(1);
+
+ @BeforeClass
+ public static void setup() {
+ snowflakeService = new FakeSnowflakeStreamingServiceImpl();
+
+ PipelineOptionsFactory.register(SnowflakePipelineOptions.class);
+ options = TestPipeline.testingPipelineOptions().as(SnowflakePipelineOptions.class);
+ options.setUsername("username");
+
+ options.setServerName("NULL.snowflakecomputing.com");
+
+ testData = LongStream.range(0, 100).boxed().collect(Collectors.toList());
+
+ FakeSnowflakeDatabase.createTable(FAKE_TABLE);
+ dataSourceConfiguration =
+ SnowflakeIO.DataSourceConfiguration.create(new FakeSnowflakeBasicDataSource())
+ .withServerName(options.getServerName())
+ .withoutValidation()
+ .withSchema("PUBLIC")
+ .withDatabase("DATABASE")
+ .withWarehouse("WAREHOUSE");
+ }
+
+ @After
+ public void tearDown() {
+ TestUtils.removeTempDir(STAGING_BUCKET_NAME);
+ }
+
+ @Test
+ public void streamWriteWithOAuthFails() {
+ options.setOauthToken("token");
+ dataSourceConfiguration =
+ SnowflakeIO.DataSourceConfiguration.create(SnowflakeCredentialsFactory.of(options))
+ .withoutValidation()
+ .withServerName(options.getServerName())
+ .withSchema("PUBLIC")
+ .withDatabase("DATABASE")
+ .withWarehouse("WAREHOUSE");
+
+ exceptionRule.expectMessage("KeyPair is required for authentication");
+
+ pipeline
+ .apply(Create.of(testData))
+ .apply(
+ SnowflakeIO.<Long>write()
+ .withDataSourceConfiguration(dataSourceConfiguration)
+ .to(FAKE_TABLE)
+ .withStagingBucketName(STAGING_BUCKET_NAME)
+ .withStorageIntegrationName(STORAGE_INTEGRATION_NAME)
+ .withSnowPipe(SNOW_PIPE)
+ .withUserDataMapper(TestUtils.getLongCsvMapper())
+ .withSnowflakeService(snowflakeService));
+
+ pipeline.run(options);
+ }
+
+ @Test
+ public void streamWriteWithUserPasswordFails() {
+ options.setPassword("password");
+ dataSourceConfiguration =
+ SnowflakeIO.DataSourceConfiguration.create(SnowflakeCredentialsFactory.of(options))
+ .withoutValidation()
+ .withServerName(options.getServerName())
+ .withSchema("PUBLIC")
+ .withDatabase("DATABASE")
+ .withWarehouse("WAREHOUSE");
+
+ exceptionRule.expectMessage("KeyPair is required for authentication");
+
+ pipeline
+ .apply(Create.of(testData))
+ .apply(
+ SnowflakeIO.<Long>write()
+ .withDataSourceConfiguration(dataSourceConfiguration)
+ .to(FAKE_TABLE)
+ .withStagingBucketName(STAGING_BUCKET_NAME)
+ .withStorageIntegrationName(STORAGE_INTEGRATION_NAME)
+ .withSnowPipe(SNOW_PIPE)
+ .withUserDataMapper(TestUtils.getLongCsvMapper())
+ .withSnowflakeService(snowflakeService));
+
+ pipeline.run(options);
+ }
+
+ @Test
+ public void streamWriteWithKey() throws SnowflakeSQLException {
+ String quotationMark = "'";
+ options.setPrivateKeyPath(TestUtils.getPrivateKeyPath(getClass()));
+ options.setPrivateKeyPassphrase(TestUtils.getPrivateKeyPassphrase());
+
+ TestStream<String> stringsStream =
+ TestStream.create(StringUtf8Coder.of())
+ .advanceWatermarkTo(START_TIME)
+ .addElements(event(FIRST_WIN_WORDS.get(0), 2L))
+ .advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(27L)))
+ .addElements(
+ event(FIRST_WIN_WORDS.get(1), 25L),
+ event(FIRST_WIN_WORDS.get(2), 18L),
+ event(FIRST_WIN_WORDS.get(3), 26L))
+ .advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(65L)))
+ // This are late elements after window ends so they should not be saved
+ .addElements(event(SECOND_WIN_WORDS.get(0), 67L), event(SECOND_WIN_WORDS.get(1), 68L))
+ .advanceWatermarkToInfinity();
+
+ dataSourceConfiguration =
+ SnowflakeIO.DataSourceConfiguration.create(SnowflakeCredentialsFactory.of(options))
+ .withServerName(options.getServerName())
+ .withoutValidation()
+ .withSchema("PUBLIC")
+ .withDatabase("DATABASE")
+ .withWarehouse("WAREHOUSE");
+
+ pipeline
+ .apply(stringsStream)
+ .apply(Window.into(FixedWindows.of(WINDOW_DURATION)))
+ .apply(
+ SnowflakeIO.<String>write()
+ .withDataSourceConfiguration(dataSourceConfiguration)
+ .withStagingBucketName(STAGING_BUCKET_NAME)
+ .withStorageIntegrationName(STORAGE_INTEGRATION_NAME)
+ .withSnowPipe(SNOW_PIPE)
+ .withFlushRowLimit(4)
+ .withFlushTimeLimit(WINDOW_DURATION)
+ .withUserDataMapper(TestUtils.getStringCsvMapper())
+ .withSnowflakeService(snowflakeService));
+
+ pipeline.run(options).waitUntilFinish();
+
+ List<String> actualDataFirstWin =
+ parseResults(FakeSnowflakeDatabase.getElements(String.format(FAKE_TABLE)), quotationMark);
+
+ Map<String, List<String>> mapOfResults = getMapOfFilesAndResults();
+
+ String firstFileKey = "0";
+ List<String> filesResult = parseResults(mapOfResults.get(firstFileKey), quotationMark);
+
+ int amountOfCreatedFiles = 2;
+ MatcherAssert.assertThat(mapOfResults.size(), equalTo(amountOfCreatedFiles));
+ MatcherAssert.assertThat(filesResult, equalTo(FIRST_WIN_WORDS));
+ MatcherAssert.assertThat(actualDataFirstWin, equalTo(SENTENCES));
+ }
+
+ @Test
+ public void streamWriteWithDoubleQuotation() throws SnowflakeSQLException {
+ String quotationMark = "\"";
+ options.setPrivateKeyPath(TestUtils.getPrivateKeyPath(getClass()));
+ options.setPrivateKeyPassphrase(TestUtils.getPrivateKeyPassphrase());
+
+ TestStream<String> stringsStream =
+ TestStream.create(StringUtf8Coder.of())
+ .advanceWatermarkTo(START_TIME)
+ .addElements(event(FIRST_WIN_WORDS.get(0), 2L))
+ .advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(27L)))
+ .addElements(
+ event(FIRST_WIN_WORDS.get(1), 25L),
+ event(FIRST_WIN_WORDS.get(2), 18L),
+ event(FIRST_WIN_WORDS.get(3), 26L))
+ .advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(65L)))
+ // This are late elements after window ends so they should not be saved
+ .addElements(event(SECOND_WIN_WORDS.get(0), 67L), event(SECOND_WIN_WORDS.get(1), 68L))
+ .advanceWatermarkToInfinity();
+
+ dataSourceConfiguration =
+ SnowflakeIO.DataSourceConfiguration.create(SnowflakeCredentialsFactory.of(options))
+ .withServerName(options.getServerName())
+ .withoutValidation()
+ .withSchema("PUBLIC")
+ .withDatabase("DATABASE")
+ .withWarehouse("WAREHOUSE");
+
+ pipeline
+ .apply(stringsStream)
+ .apply(Window.into(FixedWindows.of(WINDOW_DURATION)))
+ .apply(
+ SnowflakeIO.<String>write()
+ .withDataSourceConfiguration(dataSourceConfiguration)
+ .withStagingBucketName(STAGING_BUCKET_NAME)
+ .withStorageIntegrationName(STORAGE_INTEGRATION_NAME)
+ .withSnowPipe(SNOW_PIPE)
+ .withFlushRowLimit(4)
+ .withQuotationMark(quotationMark)
+ .withFlushTimeLimit(WINDOW_DURATION)
+ .withUserDataMapper(TestUtils.getStringCsvMapper())
+ .withSnowflakeService(snowflakeService));
+
+ pipeline.run(options).waitUntilFinish();
+
+ List<String> actualDataFirstWin =
+ parseResults(FakeSnowflakeDatabase.getElements(String.format(FAKE_TABLE)), quotationMark);
+
+ Map<String, List<String>> mapOfResults = getMapOfFilesAndResults();
+
+ String firstFileKey = "0";
+ List<String> filesResult = parseResults(mapOfResults.get(firstFileKey), quotationMark);
+
+ int amountOfCreatedFiles = 2;
+ MatcherAssert.assertThat(mapOfResults.size(), equalTo(amountOfCreatedFiles));
+ MatcherAssert.assertThat(filesResult, equalTo(FIRST_WIN_WORDS));
+ MatcherAssert.assertThat(actualDataFirstWin, equalTo(SENTENCES));
+ }
+
+ private List<String> parseResults(List<String> resultsList, String quotationMark) {
+ return resultsList.stream()
+ .map(s -> s.replaceAll(quotationMark, ""))
+ .collect(Collectors.toList());
+ }
+
+ private Map<String, List<String>> getMapOfFilesAndResults() {
+ return new HashMap<>(getFiles(Paths.get(STAGING_BUCKET_NAME)));
+ }
+
+ private Map<String, List<String>> getFiles(Path file) {
+ Map<String, List<String>> fileNames = new HashMap<>();
+ try (DirectoryStream<Path> paths = Files.newDirectoryStream(file, "*.gz")) {
+
+ paths.forEach(
+ path -> {
+ String key = path.getFileName().toString().split("-", -1)[1];
+ fileNames.put(key, TestUtils.readGZIPFile(path.toString()));
+ });
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to retrieve files", e);
+ }
+ return fileNames;
+ }
+
+ private TimestampedValue<String> event(String word, Long timestamp) {
+ return TimestampedValue.of(word, START_TIME.plus(new Duration(timestamp)));
+ }
+}
diff --git a/sdks/java/testing/tpcds/build.gradle b/sdks/java/testing/tpcds/build.gradle
new file mode 100644
index 0000000..fa249bc93
--- /dev/null
+++ b/sdks/java/testing/tpcds/build.gradle
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+plugins {
+ id 'java'
+}
+
+description = "Apache Beam :: SDKs :: Java :: TPC-DS Benchark"
+
+version '2.24.0-SNAPSHOT'
+
+sourceCompatibility = 1.8
+
+repositories {
+ mavenCentral()
+}
+
+dependencies {
+ compile 'com.googlecode.json-simple:json-simple:1.1.1'
+ compile project(path: ":sdks:java:core", configuration: "shadow")
+ compile project(path: ":runners:google-cloud-dataflow-java")
+ compile project(":sdks:java:io:google-cloud-platform")
+ compile project(":sdks:java:extensions:sql")
+ compile group: 'com.google.auto.service', name: 'auto-service', version: '1.0-rc1'
+ testCompile group: 'junit', name: 'junit', version: '4.12'
+}
+
+// When running via Gradle, this property can be used to pass commandline arguments
+// to the tpcds run
+def tpcdsArgsProperty = "tpcds.args"
+
+task run(type: JavaExec) {
+ main = "org.apache.beam.sdk.tpcds.BeamTpcds"
+ classpath = sourceSets.main.runtimeClasspath
+ def tpcdsArgsStr = project.findProperty(tpcdsArgsProperty) ?: ""
+ args tpcdsArgsStr.split()
+}
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/BeamTpcds.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/BeamTpcds.java
new file mode 100644
index 0000000..0e6e988
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/BeamTpcds.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptors;
+import org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore;
+import org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils;
+import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
+import java.util.List;
+import java.util.concurrent.CompletionService;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+
+/**
+ * To execute this main() method, run the following example command from the command line.
+ *
+ * ./gradlew :sdks:java:testing:tpcds:run -Ptpcds.args="--dataSize=1G \
+ * --queries=3,26,55 \
+ * --tpcParallel=2 \
+ * --project=apache-beam-testing \
+ * --stagingLocation=gs://beamsql_tpcds_1/staging \
+ * --tempLocation=gs://beamsql_tpcds_2/temp \
+ * --runner=DataflowRunner \
+ * --region=us-west1 \
+ * --maxNumWorkers=10"
+ */
+public class BeamTpcds {
+ private static final String dataDirectory = "gs://beamsql_tpcds_1/data";
+ private static final String resultDirectory = "gs://beamsql_tpcds_1/tpcds_results";
+
+ private static String buildTableCreateStatement(String tableName) {
+ String createStatement = "CREATE EXTERNAL TABLE " + tableName + " (%s) TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"csv\", \"csvformat\": \"InformixUnload\"}'";
+ return createStatement;
+ }
+
+ private static String buildDataLocation(String dataSize, String tableName) {
+ String dataLocation = dataDirectory + "/" + dataSize + "/" + tableName + ".dat";
+ return dataLocation;
+ }
+
+ /** Register all tables into env, set their schemas, and set the locations where their corresponding data are stored. */
+ private static void registerAllTables(BeamSqlEnv env, String dataSize) throws Exception {
+ List<String> tableNames = TableSchemaJSONLoader.getAllTableNames();
+ for (String tableName : tableNames) {
+ String createStatement = buildTableCreateStatement(tableName);
+ String tableSchema = TableSchemaJSONLoader.parseTableSchema(tableName);
+ String dataLocation = buildDataLocation(dataSize, tableName);
+ env.executeDdl(String.format(createStatement, tableSchema, dataLocation));
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
+ inMemoryMetaStore.registerProvider(new TextTableProvider());
+
+ TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
+
+ String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
+ String[] queryNameArr = TpcdsParametersReader.getAndCheckQueryNameArray(tpcdsOptions);
+ int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
+
+ // Using ExecutorService and CompletionService to fulfill multi-threading functionality
+ ExecutorService executor = Executors.newFixedThreadPool(nThreads);
+ CompletionService<PipelineResult> completion = new ExecutorCompletionService<>(executor);
+
+ BeamSqlEnv env =
+ BeamSqlEnv
+ .builder(inMemoryMetaStore)
+ .setPipelineOptions(tpcdsOptions)
+ .build();
+
+ registerAllTables(env, dataSize);
+
+ // Make an array of pipelines, each pipeline is responsible for running a corresponding query.
+ Pipeline[] pipelines = new Pipeline[queryNameArr.length];
+
+ // Execute all queries, transform the each result into a PCollection<String>, write them into the txt file and store in a GCP directory.
+ for (int i = 0; i < queryNameArr.length; i++) {
+ // For each query, get a copy of pipelineOptions from command line arguments, cast tpcdsOptions as a DataflowPipelineOptions object to read and set required parameters for pipeline execution.
+ TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
+ DataflowPipelineOptions dataflowPipelineOptionsCopy = tpcdsOptionsCopy.as(DataflowPipelineOptions.class);
+
+ // Set a unique job name using the time stamp so that multiple different pipelines can run together.
+ dataflowPipelineOptionsCopy.setJobName(queryNameArr[i] + "result" + System.currentTimeMillis());
+
+ pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy);
+ String queryString = QueryReader.readQuery(queryNameArr[i]);
+
+ // Query execution
+ PCollection<Row> rows = BeamSqlRelUtils.toPCollection(pipelines[i], env.parseQuery(queryString));
+
+ // Transform the result from PCollection<Row> into PCollection<String>, and write it to the location where results are stored.
+ PCollection<String> rowStrings = rows.apply(MapElements
+ .into(TypeDescriptors.strings())
+ .via((Row row) -> row.toString()));
+ rowStrings.apply(TextIO.write().to(resultDirectory + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
+
+ completion.submit(new TpcdsRun(pipelines[i]));
+ }
+
+ executor.shutdown();
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/QueryReader.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/QueryReader.java
new file mode 100644
index 0000000..1666c78
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/QueryReader.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.Objects;
+
+/**
+ * The QueryReader reads query file (the file's extension is '.sql' and content doesn't end with a ';'), write the query as a string and return it.
+ */
+public class QueryReader {
+ /**
+ * Reads a query file (.sql), return the query as a string.
+ * @param queryFileName The name of the query file (such as "query1, query5...") which is stored in resource/queries directory
+ * @return The query string stored in this file.
+ * @throws Exception
+ */
+ public static String readQuery(String queryFileName) throws Exception {
+ // Prepare the file reader.
+ String queryFilePath = Objects.requireNonNull(QueryReader.class.getClassLoader().getResource("queries/" + queryFileName + ".sql")).getPath();
+ File queryFile = new File(queryFilePath);
+ FileReader fileReader = new FileReader(queryFile);
+ BufferedReader reader = new BufferedReader(fileReader);
+
+ // Read the file into stringBuilder.
+ StringBuilder stringBuilder = new StringBuilder();
+ String line;
+ String ls = System.getProperty("line.separator");
+ while ((line = reader.readLine()) != null) {
+ stringBuilder.append(line);
+ stringBuilder.append(ls);
+ }
+
+ // Delete the last new line separator.
+ stringBuilder.deleteCharAt(stringBuilder.length() - 1);
+ reader.close();
+
+ String queryString = stringBuilder.toString();
+
+ return queryString;
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TableSchemaJSONLoader.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TableSchemaJSONLoader.java
new file mode 100644
index 0000000..420386c
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TableSchemaJSONLoader.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import org.apache.beam.repackaged.core.org.apache.commons.compress.utils.FileNameUtils;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+
+import java.io.File;
+import java.io.FileReader;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.ArrayList;
+
+
+/**
+ * TableSchemaJSONLoader can get all table's names from resource/schemas directory and parse a table's schema into a string.
+ */
+public class TableSchemaJSONLoader {
+ /**
+ * Read a table schema json file from resource/schemas directory, parse the file into a string which can be utilized by BeamSqlEnv.executeDdl method.
+ * @param tableName The name of the json file to be read (fo example: item, store_sales).
+ * @return A string that matches the format in BeamSqlEnv.executeDdl method, such as "d_date_sk bigint, d_date_id varchar"
+ * @throws Exception
+ */
+ public static String parseTableSchema(String tableName) throws Exception {
+ String tableFilePath = Objects.requireNonNull(TableSchemaJSONLoader.class.getClassLoader().getResource("schemas/" + tableName +".json")).getPath();
+
+ JSONObject jsonObject = (JSONObject) new JSONParser().parse(new FileReader(new File(tableFilePath)));
+ JSONArray jsonArray = (JSONArray) jsonObject.get("schema");
+
+ // Iterate each element in jsonArray to construct the schema string
+ StringBuilder schemaStringBuilder = new StringBuilder();
+
+ Iterator jsonArrIterator = jsonArray.iterator();
+ Iterator<Map.Entry> recordIterator;
+ while (jsonArrIterator.hasNext()) {
+ recordIterator = ((Map) jsonArrIterator.next()).entrySet().iterator();
+ while (recordIterator.hasNext()) {
+ Map.Entry pair = recordIterator.next();
+
+ if (pair.getKey().equals("type")) {
+ // If the key of the pair is "type", make some modification before appending it to the schemaStringBuilder, then append a comma.
+ String typeName = (String) pair.getValue();
+ if (typeName.toLowerCase().equals("identifier") || typeName.toLowerCase().equals("integer")) {
+ // Use long type to represent int, prevent overflow
+ schemaStringBuilder.append("bigint");
+ } else if (typeName.contains("decimal")) {
+ // Currently Beam SQL doesn't handle "decimal" type properly, use "double" to replace it for now.
+ schemaStringBuilder.append("double");
+ } else {
+ // Currently Beam SQL doesn't handle "date" type properly, use "varchar" replace it for now.
+ schemaStringBuilder.append("varchar");
+ }
+ schemaStringBuilder.append(',');
+ } else {
+ // If the key of the pair is "name", directly append it to the StringBuilder, then append a space.
+ schemaStringBuilder.append((pair.getValue()));
+ schemaStringBuilder.append(' ');
+ }
+ }
+ }
+
+ // Delete the last ',' in schema string
+ if (schemaStringBuilder.length() > 0) {
+ schemaStringBuilder.deleteCharAt(schemaStringBuilder.length() - 1);
+ }
+
+ String schemaString = schemaStringBuilder.toString();
+
+ return schemaString;
+ }
+
+ /**
+ * Get all tables' names. Tables are stored in resource/schemas directory in the form of json files, such as "item.json", "store_sales.json", they'll be converted to "item", "store_sales".
+ * @return The list of names of all tables.
+ */
+ public static List<String> getAllTableNames() {
+ String tableDirPath = Objects.requireNonNull(TableSchemaJSONLoader.class.getClassLoader().getResource("schemas")).getPath();
+ File tableDir = new File(tableDirPath);
+ File[] tableDirListing = tableDir.listFiles();
+
+ List<String> tableNames = new ArrayList<>();
+
+ if (tableDirListing != null) {
+ for (File file : tableDirListing) {
+ // Remove the .json extension in file name
+ tableNames.add(FileNameUtils.getBaseName((file.getName())));
+ }
+ }
+
+ return tableNames;
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptions.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptions.java
new file mode 100644
index 0000000..1c567dd
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptions.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptions;
+
+/** Options used to configure TPC-DS test */
+public interface TpcdsOptions extends PipelineOptions {
+ @Description("The size of TPC-DS data to run query on, user input should contain the unit, such as '1G', '10G'")
+ String getDataSize();
+
+ void setDataSize(String dataSize);
+
+ // Set the return type to be String since reading from the command line (user input will be like "1,2,55" which represent TPC-DS query1, query3, query55)
+ @Description("The queries numbers, read user input as string, numbers separated by commas")
+ String getQueries();
+
+ void setQueries(String queries);
+
+ @Description("The number of queries to run in parallel")
+ @Default.Integer(1)
+ Integer getTpcParallel();
+
+ void setTpcParallel(Integer parallelism);
+}
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptionsRegistrar.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptionsRegistrar.java
new file mode 100644
index 0000000..d1ddc9d
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptionsRegistrar.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import com.google.auto.service.AutoService;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsRegistrar;
+import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableList;
+
+/** {@link AutoService} registrar for {@link TpcdsOptions}. */
+@AutoService(PipelineOptionsRegistrar.class)
+public class TpcdsOptionsRegistrar implements PipelineOptionsRegistrar{
+
+ @Override
+ public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
+ return ImmutableList.of(TpcdsOptions.class);
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
new file mode 100644
index 0000000..7f0e147
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * Get and check the TpcdsOptions' parameters, throw exceptions when user input is invalid
+ */
+public class TpcdsParametersReader {
+
+ /** The data sizes that have been supported. */
+ private static final Set<String> supportedDataSizes = Stream.of("1G", "10G").collect(Collectors.toCollection(HashSet::new));
+
+ /**
+ * Get and check dataSize entered by user. This dataSize has to have been supported.
+ *
+ * @param tpcdsOptions TpcdsOptions object constructed from user input
+ * @return The dateSize user entered, if it is contained in supportedDataSizes set.
+ * @throws Exception
+ */
+ public static String getAndCheckDataSize(TpcdsOptions tpcdsOptions) throws Exception {
+ String dataSize = tpcdsOptions.getDataSize();
+
+ if (!supportedDataSizes.contains(dataSize)) {
+ throw new Exception("The data size you entered has not been supported.");
+ }
+
+ return dataSize;
+ }
+
+ /**
+ * Get and check queries entered by user. This has to be a string of numbers separated by commas or "all" which means run all 99 queiries.
+ * All query numbers have to be between 1 and 99.
+ *
+ * @param tpcdsOptions TpcdsOptions object constructed from user input
+ * @return An array of query names, for example "1,2,7" will be output as "query1,query2,query7"
+ * @throws Exception
+ */
+ public static String[] getAndCheckQueryNameArray(TpcdsOptions tpcdsOptions) throws Exception {
+ String queryNums = tpcdsOptions.getQueries();
+
+ String[] queryNumArr;
+ if (queryNums.toLowerCase().equals("all")) {
+ // All 99 TPC-DS queries need to be executed.
+ queryNumArr = new String[99];
+ for (int i = 0; i < 99; i++) {
+ queryNumArr[i] = Integer.toString(i + 1);
+ }
+ } else {
+ // Split user input queryNums by spaces and commas, get an array of all query numbers.
+ queryNumArr = queryNums.split("[\\s,]+");
+
+ for (String queryNumStr : queryNumArr) {
+ try {
+ int queryNum = Integer.parseInt(queryNumStr);
+ if (queryNum < 1 || queryNum > 99) {
+ throw new Exception("The queries you entered contains invalid query number, please provide integers between 1 and 99.");
+ }
+ } catch (NumberFormatException e) {
+ System.out.println("The queries you entered should be integers, please provide integers between 1 and 99.");
+ }
+ }
+ }
+
+ String[] queryNameArr = new String[queryNumArr.length];
+ for (int i = 0; i < queryNumArr.length; i++) {
+ queryNameArr[i] = "query" + queryNumArr[i];
+ }
+
+ return queryNameArr;
+ }
+
+ /**
+ * Get and check TpcParallel entered by user. This has to be an integer between 1 and 99.
+ *
+ * @param tpcdsOptions TpcdsOptions object constructed from user input.
+ * @return The TpcParallel user entered.
+ * @throws Exception
+ */
+ public static int getAndCheckTpcParallel(TpcdsOptions tpcdsOptions) throws Exception {
+ int nThreads = tpcdsOptions.getTpcParallel();
+
+ if (nThreads < 1 || nThreads > 99) {
+ throw new Exception("The TpcParallel your entered is invalid, please provide an integer between 1 and 99.");
+ }
+
+ return nThreads;
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsRun.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsRun.java
new file mode 100644
index 0000000..936c24f
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsRun.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import java.util.concurrent.Callable;
+
+/**
+ * To fulfill multi-threaded execution
+ */
+public class TpcdsRun implements Callable<PipelineResult> {
+ private final Pipeline pipeline;
+
+ public TpcdsRun (Pipeline pipeline) {
+ this.pipeline = pipeline;
+ }
+
+ @Override
+ public PipelineResult call() {
+ PipelineResult pipelineResult = pipeline.run();
+ pipelineResult.waitUntilFinish();
+ return pipelineResult;
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query1.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query1.sql
new file mode 100644
index 0000000..3cdf4ca
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query1.sql
@@ -0,0 +1,38 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'TN'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query10.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query10.sql
new file mode 100644
index 0000000..d12ef0d
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query10.sql
@@ -0,0 +1,72 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3,
+ cd_dep_count,
+ count(*) cnt4,
+ cd_dep_employed_count,
+ count(*) cnt5,
+ cd_dep_college_count,
+ count(*) cnt6
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 ANd 4+3) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query11.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query11.sql
new file mode 100644
index 0000000..3955094
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query11.sql
@@ -0,0 +1,94 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ )
+ select
+ t_s_secyear.customer_id
+ ,t_s_secyear.customer_first_name
+ ,t_s_secyear.customer_last_name
+ ,t_s_secyear.customer_email_address
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.dyear = 2001
+ and t_s_secyear.dyear = 2001+1
+ and t_w_firstyear.dyear = 2001
+ and t_w_secyear.dyear = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
+ order by t_s_secyear.customer_id
+ ,t_s_secyear.customer_first_name
+ ,t_s_secyear.customer_last_name
+ ,t_s_secyear.customer_email_address
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query12.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query12.sql
new file mode 100644
index 0000000..c015bff
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query12.sql
@@ -0,0 +1,47 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ws_ext_sales_price) as itemrevenue
+ ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+ ws_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ws_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query13.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query13.sql
new file mode 100644
index 0000000..47fa265
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query13.sql
@@ -0,0 +1,64 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select avg(ss_quantity)
+ ,avg(ss_ext_sales_price)
+ ,avg(ss_ext_wholesale_cost)
+ ,sum(ss_ext_wholesale_cost)
+ from store_sales
+ ,store
+ ,customer_demographics
+ ,household_demographics
+ ,customer_address
+ ,date_dim
+ where s_store_sk = ss_store_sk
+ and ss_sold_date_sk = d_date_sk and d_year = 2001
+ and((ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'D'
+ and cd_education_status = '2 yr Degree'
+ and ss_sales_price between 100.00 and 150.00
+ and hd_dep_count = 3
+ )or
+ (ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'S'
+ and cd_education_status = 'Secondary'
+ and ss_sales_price between 50.00 and 100.00
+ and hd_dep_count = 1
+ ) or
+ (ss_hdemo_sk=hd_demo_sk
+ and cd_demo_sk = ss_cdemo_sk
+ and cd_marital_status = 'W'
+ and cd_education_status = 'Advanced Degree'
+ and ss_sales_price between 150.00 and 200.00
+ and hd_dep_count = 1
+ ))
+ and((ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('CO', 'IL', 'MN')
+ and ss_net_profit between 100 and 200
+ ) or
+ (ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('OH', 'MT', 'NM')
+ and ss_net_profit between 150 and 300
+ ) or
+ (ss_addr_sk = ca_address_sk
+ and ca_country = 'United States'
+ and ca_state in ('TX', 'MO', 'MI')
+ and ss_net_profit between 50 and 250
+ ))
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query14.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query14.sql
new file mode 100644
index 0000000..8d9de3c
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query14.sql
@@ -0,0 +1,223 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with cross_items as
+ (select i_item_sk ss_item_sk
+ from item,
+ (select iss.i_brand_id brand_id
+ ,iss.i_class_id class_id
+ ,iss.i_category_id category_id
+ from store_sales
+ ,item iss
+ ,date_dim d1
+ where ss_item_sk = iss.i_item_sk
+ and ss_sold_date_sk = d1.d_date_sk
+ and d1.d_year between 1998 AND 1998 + 2
+ intersect
+ select ics.i_brand_id
+ ,ics.i_class_id
+ ,ics.i_category_id
+ from catalog_sales
+ ,item ics
+ ,date_dim d2
+ where cs_item_sk = ics.i_item_sk
+ and cs_sold_date_sk = d2.d_date_sk
+ and d2.d_year between 1998 AND 1998 + 2
+ intersect
+ select iws.i_brand_id
+ ,iws.i_class_id
+ ,iws.i_category_id
+ from web_sales
+ ,item iws
+ ,date_dim d3
+ where ws_item_sk = iws.i_item_sk
+ and ws_sold_date_sk = d3.d_date_sk
+ and d3.d_year between 1998 AND 1998 + 2)
+ where i_brand_id = brand_id
+ and i_class_id = class_id
+ and i_category_id = category_id
+),
+ avg_sales as
+ (select avg(quantity*list_price) average_sales
+ from (select ss_quantity quantity
+ ,ss_list_price list_price
+ from store_sales
+ ,date_dim
+ where ss_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2
+ union all
+ select cs_quantity quantity
+ ,cs_list_price list_price
+ from catalog_sales
+ ,date_dim
+ where cs_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2
+ union all
+ select ws_quantity quantity
+ ,ws_list_price list_price
+ from web_sales
+ ,date_dim
+ where ws_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2) x)
+ select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales)
+ from(
+ select 'store' channel, i_brand_id,i_class_id
+ ,i_category_id,sum(ss_quantity*ss_list_price) sales
+ , count(*) number_sales
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk in (select ss_item_sk from cross_items)
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)
+ union all
+ select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk in (select ss_item_sk from cross_items)
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales)
+ union all
+ select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales
+ from web_sales
+ ,item
+ ,date_dim
+ where ws_item_sk in (select ss_item_sk from cross_items)
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales)
+ ) y
+ group by rollup (channel, i_brand_id,i_class_id,i_category_id)
+ order by channel,i_brand_id,i_class_id,i_category_id
+ limit 100;
+with cross_items as
+ (select i_item_sk ss_item_sk
+ from item,
+ (select iss.i_brand_id brand_id
+ ,iss.i_class_id class_id
+ ,iss.i_category_id category_id
+ from store_sales
+ ,item iss
+ ,date_dim d1
+ where ss_item_sk = iss.i_item_sk
+ and ss_sold_date_sk = d1.d_date_sk
+ and d1.d_year between 1998 AND 1998 + 2
+ intersect
+ select ics.i_brand_id
+ ,ics.i_class_id
+ ,ics.i_category_id
+ from catalog_sales
+ ,item ics
+ ,date_dim d2
+ where cs_item_sk = ics.i_item_sk
+ and cs_sold_date_sk = d2.d_date_sk
+ and d2.d_year between 1998 AND 1998 + 2
+ intersect
+ select iws.i_brand_id
+ ,iws.i_class_id
+ ,iws.i_category_id
+ from web_sales
+ ,item iws
+ ,date_dim d3
+ where ws_item_sk = iws.i_item_sk
+ and ws_sold_date_sk = d3.d_date_sk
+ and d3.d_year between 1998 AND 1998 + 2) x
+ where i_brand_id = brand_id
+ and i_class_id = class_id
+ and i_category_id = category_id
+),
+ avg_sales as
+(select avg(quantity*list_price) average_sales
+ from (select ss_quantity quantity
+ ,ss_list_price list_price
+ from store_sales
+ ,date_dim
+ where ss_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2
+ union all
+ select cs_quantity quantity
+ ,cs_list_price list_price
+ from catalog_sales
+ ,date_dim
+ where cs_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2
+ union all
+ select ws_quantity quantity
+ ,ws_list_price list_price
+ from web_sales
+ ,date_dim
+ where ws_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2) x)
+ select this_year.channel ty_channel
+ ,this_year.i_brand_id ty_brand
+ ,this_year.i_class_id ty_class
+ ,this_year.i_category_id ty_category
+ ,this_year.sales ty_sales
+ ,this_year.number_sales ty_number_sales
+ ,last_year.channel ly_channel
+ ,last_year.i_brand_id ly_brand
+ ,last_year.i_class_id ly_class
+ ,last_year.i_category_id ly_category
+ ,last_year.sales ly_sales
+ ,last_year.number_sales ly_number_sales
+ from
+ (select 'store' channel, i_brand_id,i_class_id,i_category_id
+ ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk in (select ss_item_sk from cross_items)
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_week_seq = (select d_week_seq
+ from date_dim
+ where d_year = 1998 + 1
+ and d_moy = 12
+ and d_dom = 16)
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year,
+ (select 'store' channel, i_brand_id,i_class_id
+ ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk in (select ss_item_sk from cross_items)
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_week_seq = (select d_week_seq
+ from date_dim
+ where d_year = 1998
+ and d_moy = 12
+ and d_dom = 16)
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year
+ where this_year.i_brand_id= last_year.i_brand_id
+ and this_year.i_class_id = last_year.i_class_id
+ and this_year.i_category_id = last_year.i_category_id
+ order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query15.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query15.sql
new file mode 100644
index 0000000..1ae0c37
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query15.sql
@@ -0,0 +1,33 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select ca_zip
+ ,sum(cs_sales_price)
+ from catalog_sales
+ ,customer
+ ,customer_address
+ ,date_dim
+ where cs_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
+ '85392', '85460', '80348', '81792')
+ or ca_state in ('CA','WA','GA')
+ or cs_sales_price > 500)
+ and cs_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip
+ order by ca_zip
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query16.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query16.sql
new file mode 100644
index 0000000..54b7164
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query16.sql
@@ -0,0 +1,44 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ count(distinct cs_order_number) as "order count"
+ ,sum(cs_ext_ship_cost) as "total shipping cost"
+ ,sum(cs_net_profit) as "total net profit"
+from
+ catalog_sales cs1
+ ,date_dim
+ ,customer_address
+ ,call_center
+where
+ d_date between '1999-2-01' and
+ (cast('1999-2-01' as date) + 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'IL'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Williamson County','Williamson County','Williamson County','Williamson County',
+ 'Williamson County'
+)
+and exists (select *
+ from catalog_sales cs2
+ where cs1.cs_order_number = cs2.cs_order_number
+ and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+ from catalog_returns cr1
+ where cs1.cs_order_number = cr1.cr_order_number)
+order by count(distinct cs_order_number)
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query17.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query17.sql
new file mode 100644
index 0000000..19ae6b5
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query17.sql
@@ -0,0 +1,58 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id
+ ,i_item_desc
+ ,s_state
+ ,count(ss_quantity) as store_sales_quantitycount
+ ,avg(ss_quantity) as store_sales_quantityave
+ ,stddev_samp(ss_quantity) as store_sales_quantitystdev
+ ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov
+ ,count(sr_return_quantity) as store_returns_quantitycount
+ ,avg(sr_return_quantity) as store_returns_quantityave
+ ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev
+ ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov
+ ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave
+ ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev
+ ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov
+ from store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where d1.d_quarter_name = '1998Q1'
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_quarter_name in ('1998Q1','1998Q2','1998Q3')
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_quarter_name in ('1998Q1','1998Q2','1998Q3')
+ group by i_item_id
+ ,i_item_desc
+ ,s_state
+ order by i_item_id
+ ,i_item_desc
+ ,s_state
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query18.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query18.sql
new file mode 100644
index 0000000..0f03060
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query18.sql
@@ -0,0 +1,47 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id,
+ ca_country,
+ ca_state,
+ ca_county,
+ avg( cast(cs_quantity as decimal(12,2))) agg1,
+ avg( cast(cs_list_price as decimal(12,2))) agg2,
+ avg( cast(cs_coupon_amt as decimal(12,2))) agg3,
+ avg( cast(cs_sales_price as decimal(12,2))) agg4,
+ avg( cast(cs_net_profit as decimal(12,2))) agg5,
+ avg( cast(c_birth_year as decimal(12,2))) agg6,
+ avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7
+ from catalog_sales, customer_demographics cd1,
+ customer_demographics cd2, customer, customer_address, date_dim, item
+ where cs_sold_date_sk = d_date_sk and
+ cs_item_sk = i_item_sk and
+ cs_bill_cdemo_sk = cd1.cd_demo_sk and
+ cs_bill_customer_sk = c_customer_sk and
+ cd1.cd_gender = 'M' and
+ cd1.cd_education_status = 'College' and
+ c_current_cdemo_sk = cd2.cd_demo_sk and
+ c_current_addr_sk = ca_address_sk and
+ c_birth_month in (9,5,12,4,1,10) and
+ d_year = 2001 and
+ ca_state in ('ND','WI','AL'
+ ,'NC','OK','MS','TN')
+ group by rollup (i_item_id, ca_country, ca_state, ca_county)
+ order by ca_country,
+ ca_state,
+ ca_county,
+ i_item_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query19.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query19.sql
new file mode 100644
index 0000000..7a85a10
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query19.sql
@@ -0,0 +1,38 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
+ sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item,customer,customer_address,store
+ where d_date_sk = ss_sold_date_sk
+ and ss_item_sk = i_item_sk
+ and i_manager_id=7
+ and d_moy=11
+ and d_year=1999
+ and ss_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and substr(ca_zip,1,5) <> substr(s_zip,1,5)
+ and ss_store_sk = s_store_sk
+ group by i_brand
+ ,i_brand_id
+ ,i_manufact_id
+ ,i_manufact
+ order by ext_price desc
+ ,i_brand
+ ,i_brand_id
+ ,i_manufact_id
+ ,i_manufact
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query2.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query2.sql
new file mode 100644
index 0000000..9fddb0d
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query2.sql
@@ -0,0 +1,73 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with wscs as
+ (select sold_date_sk
+ ,sales_price
+ from (select ws_sold_date_sk sold_date_sk
+ ,ws_ext_sales_price sales_price
+ from web_sales
+ union all
+ select cs_sold_date_sk sold_date_sk
+ ,cs_ext_sales_price sales_price
+ from catalog_sales)),
+ wswscs as
+ (select d_week_seq,
+ sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales
+ from wscs
+ ,date_dim
+ where d_date_sk = sold_date_sk
+ group by d_week_seq)
+ select d_week_seq1
+ ,round(sun_sales1/sun_sales2,2)
+ ,round(mon_sales1/mon_sales2,2)
+ ,round(tue_sales1/tue_sales2,2)
+ ,round(wed_sales1/wed_sales2,2)
+ ,round(thu_sales1/thu_sales2,2)
+ ,round(fri_sales1/fri_sales2,2)
+ ,round(sat_sales1/sat_sales2,2)
+ from
+ (select wswscs.d_week_seq d_week_seq1
+ ,sun_sales sun_sales1
+ ,mon_sales mon_sales1
+ ,tue_sales tue_sales1
+ ,wed_sales wed_sales1
+ ,thu_sales thu_sales1
+ ,fri_sales fri_sales1
+ ,sat_sales sat_sales1
+ from wswscs,date_dim
+ where date_dim.d_week_seq = wswscs.d_week_seq and
+ d_year = 2001) y,
+ (select wswscs.d_week_seq d_week_seq2
+ ,sun_sales sun_sales2
+ ,mon_sales mon_sales2
+ ,tue_sales tue_sales2
+ ,wed_sales wed_sales2
+ ,thu_sales thu_sales2
+ ,fri_sales fri_sales2
+ ,sat_sales sat_sales2
+ from wswscs
+ ,date_dim
+ where date_dim.d_week_seq = wswscs.d_week_seq and
+ d_year = 2001+1) z
+ where d_week_seq1=d_week_seq2-53
+ order by d_week_seq1
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query20.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query20.sql
new file mode 100644
index 0000000..95e960b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query20.sql
@@ -0,0 +1,43 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(cs_ext_sales_price) as itemrevenue
+ ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and cs_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+ group by i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ order by i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query21.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query21.sql
new file mode 100644
index 0000000..3ba811b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query21.sql
@@ -0,0 +1,43 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select *
+ from(select w_warehouse_name
+ ,i_item_id
+ ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date))
+ then inv_quantity_on_hand
+ else 0 end) as inv_before
+ ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date))
+ then inv_quantity_on_hand
+ else 0 end) as inv_after
+ from inventory
+ ,warehouse
+ ,item
+ ,date_dim
+ where i_current_price between 0.99 and 1.49
+ and i_item_sk = inv_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_date between (cast ('1998-04-08' as date) - 30 days)
+ and (cast ('1998-04-08' as date) + 30 days)
+ group by w_warehouse_name, i_item_id) x
+ where (case when inv_before > 0
+ then inv_after / inv_before
+ else null
+ end) between 2.0/3.0 and 3.0/2.0
+ order by w_warehouse_name
+ ,i_item_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query22.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query22.sql
new file mode 100644
index 0000000..e983b7b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query22.sql
@@ -0,0 +1,33 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_product_name
+ ,i_brand
+ ,i_class
+ ,i_category
+ ,avg(inv_quantity_on_hand) qoh
+ from inventory
+ ,date_dim
+ ,item
+ where inv_date_sk=d_date_sk
+ and inv_item_sk=i_item_sk
+ and d_month_seq between 1212 and 1212 + 11
+ group by rollup(i_product_name
+ ,i_brand
+ ,i_class
+ ,i_category)
+order by qoh, i_product_name, i_brand, i_class, i_category
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query23.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query23.sql
new file mode 100644
index 0000000..0ee1dab
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query23.sql
@@ -0,0 +1,120 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk)),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select
+ *
+from
+ max_store_sales))
+ select sum(sales)
+ from (select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ union all
+ select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))
+ limit 100;
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999 + 1,1999 + 2,1999 + 3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk)),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select
+ *
+ from max_store_sales))
+ select c_last_name,c_first_name,sales
+ from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales
+ from catalog_sales
+ ,customer
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ and cs_bill_customer_sk = c_customer_sk
+ group by c_last_name,c_first_name
+ union all
+ select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales
+ from web_sales
+ ,customer
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ and ws_bill_customer_sk = c_customer_sk
+ group by c_last_name,c_first_name)
+ order by c_last_name,c_first_name,sales
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query24.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query24.sql
new file mode 100644
index 0000000..3f45c4f
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query24.sql
@@ -0,0 +1,119 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ssales as
+(select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size
+ ,sum(ss_sales_price) netpaid
+from store_sales
+ ,store_returns
+ ,store
+ ,item
+ ,customer
+ ,customer_address
+where ss_ticket_number = sr_ticket_number
+ and ss_item_sk = sr_item_sk
+ and ss_customer_sk = c_customer_sk
+ and ss_item_sk = i_item_sk
+ and ss_store_sk = s_store_sk
+ and c_current_addr_sk = ca_address_sk
+ and c_birth_country <> upper(ca_country)
+ and s_zip = ca_zip
+and s_market_id=7
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size)
+select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,sum(netpaid) paid
+from ssales
+where i_color = 'orchid'
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+ from ssales)
+order by c_last_name
+ ,c_first_name
+ ,s_store_name
+;
+with ssales as
+(select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size
+ ,sum(ss_sales_price) netpaid
+from store_sales
+ ,store_returns
+ ,store
+ ,item
+ ,customer
+ ,customer_address
+where ss_ticket_number = sr_ticket_number
+ and ss_item_sk = sr_item_sk
+ and ss_customer_sk = c_customer_sk
+ and ss_item_sk = i_item_sk
+ and ss_store_sk = s_store_sk
+ and c_current_addr_sk = ca_address_sk
+ and c_birth_country <> upper(ca_country)
+ and s_zip = ca_zip
+ and s_market_id = 7
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size)
+select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,sum(netpaid) paid
+from ssales
+where i_color = 'chiffon'
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+ from ssales)
+order by c_last_name
+ ,c_first_name
+ ,s_store_name
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query25.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query25.sql
new file mode 100644
index 0000000..be825fd
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query25.sql
@@ -0,0 +1,61 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,sum(ss_net_profit) as store_sales_profit
+ ,sum(sr_net_loss) as store_returns_loss
+ ,sum(cs_net_profit) as catalog_sales_profit
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 2000
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy between 4 and 10
+ and d2.d_year = 2000
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_moy between 4 and 10
+ and d3.d_year = 2000
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query26.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query26.sql
new file mode 100644
index 0000000..772d545
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query26.sql
@@ -0,0 +1,34 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id,
+ avg(cs_quantity) agg1,
+ avg(cs_list_price) agg2,
+ avg(cs_coupon_amt) agg3,
+ avg(cs_sales_price) agg4
+ from catalog_sales, customer_demographics, date_dim, item, promotion
+ where cs_sold_date_sk = d_date_sk and
+ cs_item_sk = i_item_sk and
+ cs_bill_cdemo_sk = cd_demo_sk and
+ cs_promo_sk = p_promo_sk and
+ cd_gender = 'F' and
+ cd_marital_status = 'W' and
+ cd_education_status = 'Primary' and
+ (p_channel_email = 'N' or p_channel_event = 'N') and
+ d_year = 1998
+ group by i_item_id
+ order by i_item_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query27.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query27.sql
new file mode 100644
index 0000000..37cf1f5
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query27.sql
@@ -0,0 +1,36 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id,
+ s_state, grouping(s_state) g_state,
+ avg(ss_quantity) agg1,
+ avg(ss_list_price) agg2,
+ avg(ss_coupon_amt) agg3,
+ avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, store, item
+ where ss_sold_date_sk = d_date_sk and
+ ss_item_sk = i_item_sk and
+ ss_store_sk = s_store_sk and
+ ss_cdemo_sk = cd_demo_sk and
+ cd_gender = 'F' and
+ cd_marital_status = 'W' and
+ cd_education_status = 'Primary' and
+ d_year = 1998 and
+ s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN')
+ group by rollup (i_item_id, s_state)
+ order by i_item_id
+ ,s_state
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query28.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query28.sql
new file mode 100644
index 0000000..afe2cf8
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query28.sql
@@ -0,0 +1,66 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select *
+from (select avg(ss_list_price) B1_LP
+ ,count(ss_list_price) B1_CNT
+ ,count(distinct ss_list_price) B1_CNTD
+ from store_sales
+ where ss_quantity between 0 and 5
+ and (ss_list_price between 11 and 11+10
+ or ss_coupon_amt between 460 and 460+1000
+ or ss_wholesale_cost between 14 and 14+20)) B1,
+ (select avg(ss_list_price) B2_LP
+ ,count(ss_list_price) B2_CNT
+ ,count(distinct ss_list_price) B2_CNTD
+ from store_sales
+ where ss_quantity between 6 and 10
+ and (ss_list_price between 91 and 91+10
+ or ss_coupon_amt between 1430 and 1430+1000
+ or ss_wholesale_cost between 32 and 32+20)) B2,
+ (select avg(ss_list_price) B3_LP
+ ,count(ss_list_price) B3_CNT
+ ,count(distinct ss_list_price) B3_CNTD
+ from store_sales
+ where ss_quantity between 11 and 15
+ and (ss_list_price between 66 and 66+10
+ or ss_coupon_amt between 920 and 920+1000
+ or ss_wholesale_cost between 4 and 4+20)) B3,
+ (select avg(ss_list_price) B4_LP
+ ,count(ss_list_price) B4_CNT
+ ,count(distinct ss_list_price) B4_CNTD
+ from store_sales
+ where ss_quantity between 16 and 20
+ and (ss_list_price between 142 and 142+10
+ or ss_coupon_amt between 3054 and 3054+1000
+ or ss_wholesale_cost between 80 and 80+20)) B4,
+ (select avg(ss_list_price) B5_LP
+ ,count(ss_list_price) B5_CNT
+ ,count(distinct ss_list_price) B5_CNTD
+ from store_sales
+ where ss_quantity between 21 and 25
+ and (ss_list_price between 135 and 135+10
+ or ss_coupon_amt between 14180 and 14180+1000
+ or ss_wholesale_cost between 38 and 38+20)) B5,
+ (select avg(ss_list_price) B6_LP
+ ,count(ss_list_price) B6_CNT
+ ,count(distinct ss_list_price) B6_CNTD
+ from store_sales
+ where ss_quantity between 26 and 30
+ and (ss_list_price between 28 and 28+10
+ or ss_coupon_amt between 2513 and 2513+1000
+ or ss_wholesale_cost between 42 and 42+20)) B6
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query29.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query29.sql
new file mode 100644
index 0000000..5db4817
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query29.sql
@@ -0,0 +1,60 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,sum(ss_quantity) as store_sales_quantity
+ ,sum(sr_return_quantity) as store_returns_quantity
+ ,sum(cs_quantity) as catalog_sales_quantity
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 1999
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy between 4 and 4 + 3
+ and d2.d_year = 1999
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_year in (1999,1999+1,1999+2)
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query3.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query3.sql
new file mode 100644
index 0000000..fa9025e
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query3.sql
@@ -0,0 +1,34 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select dt.d_year
+ ,item.i_brand_id brand_id
+ ,item.i_brand brand
+ ,sum(ss_ext_sales_price) sum_agg
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manufact_id = 436
+ and dt.d_moy=12
+ group by dt.d_year
+ ,item.i_brand
+ ,item.i_brand_id
+ order by dt.d_year
+ ,sum_agg desc
+ ,brand_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query30.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query30.sql
new file mode 100644
index 0000000..fabdf70
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query30.sql
@@ -0,0 +1,44 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(wr_return_amt) as ctr_total_return
+ from web_returns
+ ,date_dim
+ ,customer_address
+ where wr_returned_date_sk = d_date_sk
+ and d_year =2002
+ and wr_returning_addr_sk = ca_address_sk
+ group by wr_returning_customer_sk
+ ,ca_state)
+ select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+ ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date_sk,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+ ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date_sk,ctr_total_return
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query31.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query31.sql
new file mode 100644
index 0000000..4217c55
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query31.sql
@@ -0,0 +1,65 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ss as
+ (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales
+ from store_sales,date_dim,customer_address
+ where ss_sold_date_sk = d_date_sk
+ and ss_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year),
+ ws as
+ (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales
+ from web_sales,date_dim,customer_address
+ where ws_sold_date_sk = d_date_sk
+ and ws_bill_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year)
+ select
+ ss1.ca_county
+ ,ss1.d_year
+ ,ws2.web_sales/ws1.web_sales web_q1_q2_increase
+ ,ss2.store_sales/ss1.store_sales store_q1_q2_increase
+ ,ws3.web_sales/ws2.web_sales web_q2_q3_increase
+ ,ss3.store_sales/ss2.store_sales store_q2_q3_increase
+ from
+ ss ss1
+ ,ss ss2
+ ,ss ss3
+ ,ws ws1
+ ,ws ws2
+ ,ws ws3
+ where
+ ss1.d_qoy = 1
+ and ss1.d_year = 2000
+ and ss1.ca_county = ss2.ca_county
+ and ss2.d_qoy = 2
+ and ss2.d_year = 2000
+ and ss2.ca_county = ss3.ca_county
+ and ss3.d_qoy = 3
+ and ss3.d_year = 2000
+ and ss1.ca_county = ws1.ca_county
+ and ws1.d_qoy = 1
+ and ws1.d_year = 2000
+ and ws1.ca_county = ws2.ca_county
+ and ws2.d_qoy = 2
+ and ws2.d_year = 2000
+ and ws1.ca_county = ws3.ca_county
+ and ws3.d_qoy = 3
+ and ws3.d_year =2000
+ and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end
+ > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end
+ and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end
+ > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end
+ order by ss1.d_year
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query32.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query32.sql
new file mode 100644
index 0000000..70eb508
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query32.sql
@@ -0,0 +1,41 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select sum(cs_ext_discount_amt) as "excess discount amount"
+from
+ catalog_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = cs_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = cs_sold_date_sk
+and cs_ext_discount_amt
+ > (
+ select
+ 1.3 * avg(cs_ext_discount_amt)
+ from
+ catalog_sales
+ ,date_dim
+ where
+ cs_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = cs_sold_date_sk
+ )
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query33.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query33.sql
new file mode 100644
index 0000000..bb845af
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query33.sql
@@ -0,0 +1,88 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ss as (
+ select
+ i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -5
+ group by i_manufact_id),
+ cs as (
+ select
+ i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -5
+ group by i_manufact_id),
+ ws as (
+ select
+ i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -5
+ group by i_manufact_id)
+ select i_manufact_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query34.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query34.sql
new file mode 100644
index 0000000..b2c5283
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query34.sql
@@ -0,0 +1,44 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select c_last_name
+ ,c_first_name
+ ,c_salutation
+ ,c_preferred_cust_flag
+ ,ss_ticket_number
+ ,cnt from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,count(*) cnt
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28)
+ and (household_demographics.hd_buy_potential = '>10000' or
+ household_demographics.hd_buy_potential = 'Unknown')
+ and household_demographics.hd_vehicle_count > 0
+ and (case when household_demographics.hd_vehicle_count > 0
+ then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count
+ else null
+ end) > 1.2
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County',
+ 'Williamson County','Williamson County','Williamson County','Williamson County')
+ group by ss_ticket_number,ss_customer_sk) dn,customer
+ where ss_customer_sk = c_customer_sk
+ and cnt between 15 and 20
+ order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query35.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query35.sql
new file mode 100644
index 0000000..86ffd3b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query35.sql
@@ -0,0 +1,71 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ count(*) cnt1,
+ avg(cd_dep_count),
+ max(cd_dep_count),
+ sum(cd_dep_count),
+ cd_dep_employed_count,
+ count(*) cnt2,
+ avg(cd_dep_employed_count),
+ max(cd_dep_employed_count),
+ sum(cd_dep_employed_count),
+ cd_dep_college_count,
+ count(*) cnt3,
+ avg(cd_dep_college_count),
+ max(cd_dep_college_count),
+ sum(cd_dep_college_count)
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_qoy < 4))
+ group by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query36.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query36.sql
new file mode 100644
index 0000000..2436ef3
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query36.sql
@@ -0,0 +1,43 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin
+ ,i_category
+ ,i_class
+ ,grouping(i_category)+grouping(i_class) as lochierarchy
+ ,rank() over (
+ partition by grouping(i_category)+grouping(i_class),
+ case when grouping(i_class) = 0 then i_category end
+ order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent
+ from
+ store_sales
+ ,date_dim d1
+ ,item
+ ,store
+ where
+ d1.d_year = 2000
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and s_state in ('TN','TN','TN','TN',
+ 'TN','TN','TN','TN')
+ group by rollup(i_category,i_class)
+ order by
+ lochierarchy desc
+ ,case when lochierarchy = 0 then i_category end
+ ,rank_within_parent
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query37.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query37.sql
new file mode 100644
index 0000000..24237b7
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query37.sql
@@ -0,0 +1,30 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id
+ ,i_item_desc
+ ,i_current_price
+ from item, inventory, date_dim, catalog_sales
+ where i_current_price between 22 and 22 + 30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days)
+ and i_manufact_id in (678,964,918,849)
+ and inv_quantity_on_hand between 100 and 500
+ and cs_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query38.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query38.sql
new file mode 100644
index 0000000..3e781ad
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query38.sql
@@ -0,0 +1,36 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select count(*) from (
+ select distinct c_last_name, c_first_name, d_date
+ from store_sales, date_dim, customer
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+ intersect
+ select distinct c_last_name, c_first_name, d_date
+ from catalog_sales, date_dim, customer
+ where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+ and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+ intersect
+ select distinct c_last_name, c_first_name, d_date
+ from web_sales, date_dim, customer
+ where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+ and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212 + 11
+) hot_cust
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query39.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query39.sql
new file mode 100644
index 0000000..aaed22a
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query39.sql
@@ -0,0 +1,66 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+ from inventory
+ ,item
+ ,warehouse
+ ,date_dim
+ where inv_item_sk = i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_year =1998
+ group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+ ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+ and inv1.w_warehouse_sk = inv2.w_warehouse_sk
+ and inv1.d_moy=4
+ and inv2.d_moy=4+1
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+ ,inv2.d_moy,inv2.mean, inv2.cov
+;
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+ ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+ from inventory
+ ,item
+ ,warehouse
+ ,date_dim
+ where inv_item_sk = i_item_sk
+ and inv_warehouse_sk = w_warehouse_sk
+ and inv_date_sk = d_date_sk
+ and d_year =1998
+ group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+ ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+ and inv1.w_warehouse_sk = inv2.w_warehouse_sk
+ and inv1.d_moy=4
+ and inv2.d_moy=4+1
+ and inv1.cov > 1.5
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+ ,inv2.d_moy,inv2.mean, inv2.cov
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query4.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query4.sql
new file mode 100644
index 0000000..364c1a5
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query4.sql
@@ -0,0 +1,129 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total
+ ,'c' sale_type
+ from customer
+ ,catalog_sales
+ ,date_dim
+ where c_customer_sk = cs_bill_customer_sk
+ and cs_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,c_preferred_cust_flag customer_preferred_cust_flag
+ ,c_birth_country customer_birth_country
+ ,c_login customer_login
+ ,c_email_address customer_email_address
+ ,d_year dyear
+ ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,c_preferred_cust_flag
+ ,c_birth_country
+ ,c_login
+ ,c_email_address
+ ,d_year
+ )
+ select
+ t_s_secyear.customer_id
+ ,t_s_secyear.customer_first_name
+ ,t_s_secyear.customer_last_name
+ ,t_s_secyear.customer_email_address
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_c_firstyear
+ ,year_total t_c_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_c_secyear.customer_id
+ and t_s_firstyear.customer_id = t_c_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_c_firstyear.sale_type = 'c'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_c_secyear.sale_type = 'c'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.dyear = 2001
+ and t_s_secyear.dyear = 2001+1
+ and t_c_firstyear.dyear = 2001
+ and t_c_secyear.dyear = 2001+1
+ and t_w_firstyear.dyear = 2001
+ and t_w_secyear.dyear = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_c_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+ > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ order by t_s_secyear.customer_id
+ ,t_s_secyear.customer_first_name
+ ,t_s_secyear.customer_last_name
+ ,t_s_secyear.customer_email_address
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query40.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query40.sql
new file mode 100644
index 0000000..41a8cba
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query40.sql
@@ -0,0 +1,41 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ w_state
+ ,i_item_id
+ ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date))
+ then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before
+ ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date))
+ then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after
+ from
+ catalog_sales left outer join catalog_returns on
+ (cs_order_number = cr_order_number
+ and cs_item_sk = cr_item_sk)
+ ,warehouse
+ ,item
+ ,date_dim
+ where
+ i_current_price between 0.99 and 1.49
+ and i_item_sk = cs_item_sk
+ and cs_warehouse_sk = w_warehouse_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_date between (cast ('1998-04-08' as date) - 30 days)
+ and (cast ('1998-04-08' as date) + 30 days)
+ group by
+ w_state,i_item_id
+ order by w_state,i_item_id
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query41.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query41.sql
new file mode 100644
index 0000000..e42bef9
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query41.sql
@@ -0,0 +1,65 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select distinct(i_product_name)
+ from item i1
+ where i_manufact_id between 742 and 742+40
+ and (select count(*) as item_cnt
+ from item
+ where (i_manufact = i1.i_manufact and
+ ((i_category = 'Women' and
+ (i_color = 'orchid' or i_color = 'papaya') and
+ (i_units = 'Pound' or i_units = 'Lb') and
+ (i_size = 'petite' or i_size = 'medium')
+ ) or
+ (i_category = 'Women' and
+ (i_color = 'burlywood' or i_color = 'navy') and
+ (i_units = 'Bundle' or i_units = 'Each') and
+ (i_size = 'N/A' or i_size = 'extra large')
+ ) or
+ (i_category = 'Men' and
+ (i_color = 'bisque' or i_color = 'azure') and
+ (i_units = 'N/A' or i_units = 'Tsp') and
+ (i_size = 'small' or i_size = 'large')
+ ) or
+ (i_category = 'Men' and
+ (i_color = 'chocolate' or i_color = 'cornflower') and
+ (i_units = 'Bunch' or i_units = 'Gross') and
+ (i_size = 'petite' or i_size = 'medium')
+ ))) or
+ (i_manufact = i1.i_manufact and
+ ((i_category = 'Women' and
+ (i_color = 'salmon' or i_color = 'midnight') and
+ (i_units = 'Oz' or i_units = 'Box') and
+ (i_size = 'petite' or i_size = 'medium')
+ ) or
+ (i_category = 'Women' and
+ (i_color = 'snow' or i_color = 'steel') and
+ (i_units = 'Carton' or i_units = 'Tbl') and
+ (i_size = 'N/A' or i_size = 'extra large')
+ ) or
+ (i_category = 'Men' and
+ (i_color = 'purple' or i_color = 'gainsboro') and
+ (i_units = 'Dram' or i_units = 'Unknown') and
+ (i_size = 'small' or i_size = 'large')
+ ) or
+ (i_category = 'Men' and
+ (i_color = 'metallic' or i_color = 'forest') and
+ (i_units = 'Gram' or i_units = 'Ounce') and
+ (i_size = 'petite' or i_size = 'medium')
+ )))) > 0
+ order by i_product_name
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query42.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query42.sql
new file mode 100644
index 0000000..a7a8bc8
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query42.sql
@@ -0,0 +1,35 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+ ,sum(ss_ext_sales_price)
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manager_id = 1
+ and dt.d_moy=12
+ and dt.d_year=1998
+ group by dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+ order by sum(ss_ext_sales_price) desc,dt.d_year
+ ,item.i_category_id
+ ,item.i_category
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query43.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query43.sql
new file mode 100644
index 0000000..db4db2b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query43.sql
@@ -0,0 +1,32 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select s_store_name, s_store_id,
+ sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+ s_store_sk = ss_store_sk and
+ s_gmt_offset = -5 and
+ d_year = 1998
+ group by s_store_name, s_store_id
+ order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query44.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query44.sql
new file mode 100644
index 0000000..897ac5f
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query44.sql
@@ -0,0 +1,48 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
+from(select *
+ from (select item_sk,rank() over (order by rank_col asc) rnk
+ from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+ from store_sales ss1
+ where ss_store_sk = 2
+ group by ss_item_sk
+ having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+ from store_sales
+ where ss_store_sk = 2
+ and ss_hdemo_sk is null
+ group by ss_store_sk))V1)V11
+ where rnk < 11) asceding,
+ (select *
+ from (select item_sk,rank() over (order by rank_col desc) rnk
+ from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+ from store_sales ss1
+ where ss_store_sk = 2
+ group by ss_item_sk
+ having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+ from store_sales
+ where ss_store_sk = 2
+ and ss_hdemo_sk is null
+ group by ss_store_sk))V2)V21
+ where rnk < 11) descending,
+item i1,
+item i2
+where asceding.rnk = descending.rnk
+ and i1.i_item_sk=asceding.item_sk
+ and i2.i_item_sk=descending.item_sk
+order by asceding.rnk
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query45.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query45.sql
new file mode 100644
index 0000000..765456a
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query45.sql
@@ -0,0 +1,33 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ws_item_sk = i_item_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
+ or
+ i_item_id in (select i_item_id
+ from item
+ where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+ )
+ )
+ and ws_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query46.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query46.sql
new file mode 100644
index 0000000..f58de5e
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query46.sql
@@ -0,0 +1,48 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,amt,profit
+ from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_coupon_amt) amt
+ ,sum(ss_net_profit) profit
+ from store_sales,date_dim,store,household_demographics,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and (household_demographics.hd_dep_count = 5 or
+ household_demographics.hd_vehicle_count= 3)
+ and date_dim.d_dow in (6,0)
+ and date_dim.d_year in (1999,1999+1,1999+2)
+ and store.s_city in ('Midway','Fairview','Fairview','Midway','Fairview')
+ group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query47.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query47.sql
new file mode 100644
index 0000000..9d2e4ca
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query47.sql
@@ -0,0 +1,64 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with v1 as(
+ select i_category, i_brand,
+ s_store_name, s_company_name,
+ d_year, d_moy,
+ sum(ss_sales_price) sum_sales,
+ avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand,
+ s_store_name, s_company_name, d_year)
+ avg_monthly_sales,
+ rank() over
+ (partition by i_category, i_brand,
+ s_store_name, s_company_name
+ order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
+ (
+ d_year = 2000 or
+ ( d_year = 2000-1 and d_moy =12) or
+ ( d_year = 2000+1 and d_moy =1)
+ )
+ group by i_category, i_brand,
+ s_store_name, s_company_name,
+ d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand
+ ,v1.d_year, v1.d_moy
+ ,v1.avg_monthly_sales
+ ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+ v1.i_category = v1_lead.i_category and
+ v1.i_brand = v1_lag.i_brand and
+ v1.i_brand = v1_lead.i_brand and
+ v1.s_store_name = v1_lag.s_store_name and
+ v1.s_store_name = v1_lead.s_store_name and
+ v1.s_company_name = v1_lag.s_company_name and
+ v1.s_company_name = v1_lead.s_company_name and
+ v1.rn = v1_lag.rn + 1 and
+ v1.rn = v1_lead.rn - 1)
+ select *
+ from v2
+ where d_year = 2000 and
+ avg_monthly_sales > 0 and
+ case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, nsum
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query48.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query48.sql
new file mode 100644
index 0000000..a924396
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query48.sql
@@ -0,0 +1,79 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select sum (ss_quantity)
+ from store_sales, store, customer_demographics, customer_address, date_dim
+ where s_store_sk = ss_store_sk
+ and ss_sold_date_sk = d_date_sk and d_year = 1998
+ and
+ (
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 100.00 and 150.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'D'
+ and
+ cd_education_status = 'Primary'
+ and
+ ss_sales_price between 50.00 and 100.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'U'
+ and
+ cd_education_status = 'Advanced Degree'
+ and
+ ss_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+ (
+ ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ss_net_profit between 0 and 2000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ss_net_profit between 150 and 3000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ss_net_profit between 50 and 25000
+ )
+ )
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query49.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query49.sql
new file mode 100644
index 0000000..2e29e15
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query49.sql
@@ -0,0 +1,142 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select channel, item, return_ratio, return_rank, currency_rank from
+ (select
+ 'web' as channel
+ ,web.item
+ ,web.return_ratio
+ ,web.return_rank
+ ,web.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select ws.ws_item_sk as item
+ ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/
+ cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/
+ cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ from
+ web_sales ws left outer join web_returns wr
+ on (ws.ws_order_number = wr.wr_order_number and
+ ws.ws_item_sk = wr.wr_item_sk)
+ ,date_dim
+ where
+ wr.wr_return_amt > 10000
+ and ws.ws_net_profit > 1
+ and ws.ws_net_paid > 0
+ and ws.ws_quantity > 0
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by ws.ws_item_sk
+ ) in_web
+ ) web
+ where
+ (
+ web.return_rank <= 10
+ or
+ web.currency_rank <= 10
+ )
+ union
+ select
+ 'catalog' as channel
+ ,catalog.item
+ ,catalog.return_ratio
+ ,catalog.return_rank
+ ,catalog.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select
+ cs.cs_item_sk as item
+ ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/
+ cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/
+ cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ from
+ catalog_sales cs left outer join catalog_returns cr
+ on (cs.cs_order_number = cr.cr_order_number and
+ cs.cs_item_sk = cr.cr_item_sk)
+ ,date_dim
+ where
+ cr.cr_return_amount > 10000
+ and cs.cs_net_profit > 1
+ and cs.cs_net_paid > 0
+ and cs.cs_quantity > 0
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by cs.cs_item_sk
+ ) in_cat
+ ) catalog
+ where
+ (
+ catalog.return_rank <= 10
+ or
+ catalog.currency_rank <=10
+ )
+ union
+ select
+ 'store' as channel
+ ,store.item
+ ,store.return_ratio
+ ,store.return_rank
+ ,store.currency_rank
+ from (
+ select
+ item
+ ,return_ratio
+ ,currency_ratio
+ ,rank() over (order by return_ratio) as return_rank
+ ,rank() over (order by currency_ratio) as currency_rank
+ from
+ ( select sts.ss_item_sk as item
+ ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio
+ ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ from
+ store_sales sts left outer join store_returns sr
+ on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk)
+ ,date_dim
+ where
+ sr.sr_return_amt > 10000
+ and sts.ss_net_profit > 1
+ and sts.ss_net_paid > 0
+ and sts.ss_quantity > 0
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by sts.ss_item_sk
+ ) in_store
+ ) store
+ where (
+ store.return_rank <= 10
+ or
+ store.currency_rank <= 10
+ )
+ )
+ order by 1,4,5,2
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query5.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query5.sql
new file mode 100644
index 0000000..da2e30a
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query5.sql
@@ -0,0 +1,141 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ssr as
+ (select s_store_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select ss_store_sk as store_sk,
+ ss_sold_date_sk as date_sk,
+ ss_ext_sales_price as sales_price,
+ ss_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from store_sales
+ union all
+ select sr_store_sk as store_sk,
+ sr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ sr_return_amt as return_amt,
+ sr_net_loss as net_loss
+ from store_returns
+ ) salesreturns,
+ date_dim,
+ store
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and store_sk = s_store_sk
+ group by s_store_id)
+ ,
+ csr as
+ (select cp_catalog_page_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select cs_catalog_page_sk as page_sk,
+ cs_sold_date_sk as date_sk,
+ cs_ext_sales_price as sales_price,
+ cs_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from catalog_sales
+ union all
+ select cr_catalog_page_sk as page_sk,
+ cr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ cr_return_amount as return_amt,
+ cr_net_loss as net_loss
+ from catalog_returns
+ ) salesreturns,
+ date_dim,
+ catalog_page
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and page_sk = cp_catalog_page_sk
+ group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select web_site_id,
+ sum(sales_price) as sales,
+ sum(profit) as profit,
+ sum(return_amt) as returns,
+ sum(net_loss) as profit_loss
+ from
+ ( select ws_web_site_sk as wsr_web_site_sk,
+ ws_sold_date_sk as date_sk,
+ ws_ext_sales_price as sales_price,
+ ws_net_profit as profit,
+ cast(0 as decimal(7,2)) as return_amt,
+ cast(0 as decimal(7,2)) as net_loss
+ from web_sales
+ union all
+ select ws_web_site_sk as wsr_web_site_sk,
+ wr_returned_date_sk as date_sk,
+ cast(0 as decimal(7,2)) as sales_price,
+ cast(0 as decimal(7,2)) as profit,
+ wr_return_amt as return_amt,
+ wr_net_loss as net_loss
+ from web_returns left outer join web_sales on
+ ( wr_item_sk = ws_item_sk
+ and wr_order_number = ws_order_number)
+ ) salesreturns,
+ date_dim,
+ web_site
+ where date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 14 days)
+ and wsr_web_site_sk = web_site_sk
+ group by web_site_id)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , 'store' || s_store_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from ssr
+ union all
+ select 'catalog channel' as channel
+ , 'catalog_page' || cp_catalog_page_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from csr
+ union all
+ select 'web channel' as channel
+ , 'web_site' || web_site_id as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query50.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query50.sql
new file mode 100644
index 0000000..fc37add
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query50.sql
@@ -0,0 +1,72 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days"
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days"
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days"
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and
+ (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days"
+ ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as ">120 days"
+from
+ store_sales
+ ,store_returns
+ ,store
+ ,date_dim d1
+ ,date_dim d2
+where
+ d2.d_year = 2000
+and d2.d_moy = 9
+and ss_ticket_number = sr_ticket_number
+and ss_item_sk = sr_item_sk
+and ss_sold_date_sk = d1.d_date_sk
+and sr_returned_date_sk = d2.d_date_sk
+and ss_customer_sk = sr_customer_sk
+and ss_store_sk = s_store_sk
+group by
+ s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+order by s_store_name
+ ,s_company_id
+ ,s_street_number
+ ,s_street_name
+ ,s_street_type
+ ,s_suite_number
+ ,s_city
+ ,s_county
+ ,s_state
+ ,s_zip
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query51.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query51.sql
new file mode 100644
index 0000000..49b6d1b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query51.sql
@@ -0,0 +1,58 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+WITH web_v1 as (
+select
+ ws_item_sk item_sk, d_date,
+ sum(sum(ws_sales_price))
+ over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from web_sales
+ ,date_dim
+where ws_sold_date_sk=d_date_sk
+ and d_month_seq between 1212 and 1212+11
+ and ws_item_sk is not NULL
+group by ws_item_sk, d_date),
+store_v1 as (
+select
+ ss_item_sk item_sk, d_date,
+ sum(sum(ss_sales_price))
+ over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from store_sales
+ ,date_dim
+where ss_sold_date_sk=d_date_sk
+ and d_month_seq between 1212 and 1212+11
+ and ss_item_sk is not NULL
+group by ss_item_sk, d_date)
+ select *
+from (select item_sk
+ ,d_date
+ ,web_sales
+ ,store_sales
+ ,max(web_sales)
+ over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative
+ ,max(store_sales)
+ over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative
+ from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk
+ ,case when web.d_date is not null then web.d_date else store.d_date end d_date
+ ,web.cume_sales web_sales
+ ,store.cume_sales store_sales
+ from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+ and web.d_date = store.d_date)
+ )x )y
+where web_cumulative > store_cumulative
+order by item_sk
+ ,d_date
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query52.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query52.sql
new file mode 100644
index 0000000..5422e43
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query52.sql
@@ -0,0 +1,35 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select dt.d_year
+ ,item.i_brand_id brand_id
+ ,item.i_brand brand
+ ,sum(ss_ext_sales_price) ext_price
+ from date_dim dt
+ ,store_sales
+ ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ and store_sales.ss_item_sk = item.i_item_sk
+ and item.i_manager_id = 1
+ and dt.d_moy=12
+ and dt.d_year=1998
+ group by dt.d_year
+ ,item.i_brand
+ ,item.i_brand_id
+ order by dt.d_year
+ ,ext_price desc
+ ,brand_id
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query53.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query53.sql
new file mode 100644
index 0000000..5807a7b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query53.sql
@@ -0,0 +1,41 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select * from
+(select i_manufact_id,
+sum(ss_sales_price) sum_sales,
+avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ss_sold_date_sk = d_date_sk and
+ss_store_sk = s_store_sk and
+d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and
+((i_category in ('Books','Children','Electronics') and
+i_class in ('personal','portable','reference','self-help') and
+i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+ 'exportiunivamalg #9','scholaramalgamalg #9'))
+or(i_category in ('Women','Music','Men') and
+i_class in ('accessories','classical','fragrances','pants') and
+i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+ 'importoamalg #1')))
+group by i_manufact_id, d_qoy ) tmp1
+where case when avg_quarterly_sales > 0
+ then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales
+ else null end > 0.1
+order by avg_quarterly_sales,
+ sum_sales,
+ i_manufact_id
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query54.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query54.sql
new file mode 100644
index 0000000..93c5af7
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query54.sql
@@ -0,0 +1,69 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with my_customers as (
+ select distinct c_customer_sk
+ , c_current_addr_sk
+ from
+ ( select cs_sold_date_sk sold_date_sk,
+ cs_bill_customer_sk customer_sk,
+ cs_item_sk item_sk
+ from catalog_sales
+ union all
+ select ws_sold_date_sk sold_date_sk,
+ ws_bill_customer_sk customer_sk,
+ ws_item_sk item_sk
+ from web_sales
+ ) cs_or_ws_sales,
+ item,
+ date_dim,
+ customer
+ where sold_date_sk = d_date_sk
+ and item_sk = i_item_sk
+ and i_category = 'Jewelry'
+ and i_class = 'consignment'
+ and c_customer_sk = cs_or_ws_sales.customer_sk
+ and d_moy = 3
+ and d_year = 1999
+ )
+ , my_revenue as (
+ select c_customer_sk,
+ sum(ss_ext_sales_price) as revenue
+ from my_customers,
+ store_sales,
+ customer_address,
+ store,
+ date_dim
+ where c_current_addr_sk = ca_address_sk
+ and ca_county = s_county
+ and ca_state = s_state
+ and ss_sold_date_sk = d_date_sk
+ and c_customer_sk = ss_customer_sk
+ and d_month_seq between (select distinct d_month_seq+1
+ from date_dim where d_year = 1999 and d_moy = 3)
+ and (select distinct d_month_seq+3
+ from date_dim where d_year = 1999 and d_moy = 3)
+ group by c_customer_sk
+ )
+ , segments as
+ (select cast((revenue/50) as int) as segment
+ from my_revenue
+ )
+ select segment, count(*) as num_customers, segment*50 as segment_base
+ from segments
+ group by segment
+ order by segment, num_customers
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query55.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query55.sql
new file mode 100644
index 0000000..5ae4d34
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query55.sql
@@ -0,0 +1,27 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_brand_id brand_id, i_brand brand,
+ sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item
+ where d_date_sk = ss_sold_date_sk
+ and ss_item_sk = i_item_sk
+ and i_manager_id=36
+ and d_moy=12
+ and d_year=2001
+ group by i_brand, i_brand_id
+ order by ext_price desc, i_brand_id
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query56.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query56.sql
new file mode 100644
index 0000000..0e76d59
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query56.sql
@@ -0,0 +1,82 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id)
+ select i_item_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by total_sales,
+ i_item_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query57.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query57.sql
new file mode 100644
index 0000000..eb3b1fc
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query57.sql
@@ -0,0 +1,61 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with v1 as(
+ select i_category, i_brand,
+ cc_name,
+ d_year, d_moy,
+ sum(cs_sales_price) sum_sales,
+ avg(sum(cs_sales_price)) over
+ (partition by i_category, i_brand,
+ cc_name, d_year)
+ avg_monthly_sales,
+ rank() over
+ (partition by i_category, i_brand,
+ cc_name
+ order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+ cs_sold_date_sk = d_date_sk and
+ cc_call_center_sk= cs_call_center_sk and
+ (
+ d_year = 2000 or
+ ( d_year = 2000-1 and d_moy =12) or
+ ( d_year = 2000+1 and d_moy =1)
+ )
+ group by i_category, i_brand,
+ cc_name , d_year, d_moy),
+ v2 as(
+ select v1.cc_name
+ ,v1.d_year, v1.d_moy
+ ,v1.avg_monthly_sales
+ ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+ v1.i_category = v1_lead.i_category and
+ v1.i_brand = v1_lag.i_brand and
+ v1.i_brand = v1_lead.i_brand and
+ v1. cc_name = v1_lag. cc_name and
+ v1. cc_name = v1_lead. cc_name and
+ v1.rn = v1_lag.rn + 1 and
+ v1.rn = v1_lead.rn - 1)
+ select *
+ from v2
+ where d_year = 2000 and
+ avg_monthly_sales > 0 and
+ case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, nsum
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query58.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query58.sql
new file mode 100644
index 0000000..42366e6
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query58.sql
@@ -0,0 +1,78 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ss_items as
+ (select i_item_id item_id
+ ,sum(ss_ext_sales_price) ss_item_rev
+ from store_sales
+ ,item
+ ,date_dim
+ where ss_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq = (select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and ss_sold_date_sk = d_date_sk
+ group by i_item_id),
+ cs_items as
+ (select i_item_id item_id
+ ,sum(cs_ext_sales_price) cs_item_rev
+ from catalog_sales
+ ,item
+ ,date_dim
+ where cs_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq = (select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and cs_sold_date_sk = d_date_sk
+ group by i_item_id),
+ ws_items as
+ (select i_item_id item_id
+ ,sum(ws_ext_sales_price) ws_item_rev
+ from web_sales
+ ,item
+ ,date_dim
+ where ws_item_sk = i_item_sk
+ and d_date in (select d_date
+ from date_dim
+ where d_week_seq =(select d_week_seq
+ from date_dim
+ where d_date = '1998-02-19'))
+ and ws_sold_date_sk = d_date_sk
+ group by i_item_id)
+ select ss_items.item_id
+ ,ss_item_rev
+ ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev
+ ,cs_item_rev
+ ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev
+ ,ws_item_rev
+ ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev
+ ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
+ from ss_items,cs_items,ws_items
+ where ss_items.item_id=cs_items.item_id
+ and ss_items.item_id=ws_items.item_id
+ and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+ and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+ and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+ and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+ and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ order by item_id
+ ,ss_item_rev
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query59.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query59.sql
new file mode 100644
index 0000000..462ef96
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query59.sql
@@ -0,0 +1,57 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with wss as
+ (select d_week_seq,
+ ss_store_sk,
+ sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+ sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+ sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
+ sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+ sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+ sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+ sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ group by d_week_seq,ss_store_sk
+ )
+ select s_store_name1,s_store_id1,d_week_seq1
+ ,sun_sales1/sun_sales2,mon_sales1/mon_sales2
+ ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2
+ ,fri_sales1/fri_sales2,sat_sales1/sat_sales2
+ from
+ (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1
+ ,s_store_id s_store_id1,sun_sales sun_sales1
+ ,mon_sales mon_sales1,tue_sales tue_sales1
+ ,wed_sales wed_sales1,thu_sales thu_sales1
+ ,fri_sales fri_sales1,sat_sales sat_sales1
+ from wss,store,date_dim d
+ where d.d_week_seq = wss.d_week_seq and
+ ss_store_sk = s_store_sk and
+ d_month_seq between 1185 and 1185 + 11) y,
+ (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2
+ ,s_store_id s_store_id2,sun_sales sun_sales2
+ ,mon_sales mon_sales2,tue_sales tue_sales2
+ ,wed_sales wed_sales2,thu_sales thu_sales2
+ ,fri_sales fri_sales2,sat_sales sat_sales2
+ from wss,store,date_dim d
+ where d.d_week_seq = wss.d_week_seq and
+ ss_store_sk = s_store_sk and
+ d_month_seq between 1185+ 12 and 1185 + 23) x
+ where s_store_id1=s_store_id2
+ and d_week_seq1=d_week_seq2-52
+ order by s_store_name1,s_store_id1,d_week_seq1
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query6.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query6.sql
new file mode 100644
index 0000000..389c61a
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query6.sql
@@ -0,0 +1,39 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select a.ca_state state, count(*) cnt
+ from customer_address a
+ ,customer c
+ ,store_sales s
+ ,date_dim d
+ ,item i
+ where a.ca_address_sk = c.c_current_addr_sk
+ and c.c_customer_sk = s.ss_customer_sk
+ and s.ss_sold_date_sk = d.d_date_sk
+ and s.ss_item_sk = i.i_item_sk
+ and d.d_month_seq =
+ (select distinct (d_month_seq)
+ from date_dim
+ where d_year = 2000
+ and d_moy = 2 )
+ and i.i_current_price > 1.2 *
+ (select avg(j.i_current_price)
+ from item j
+ where j.i_category = i.i_category)
+ group by a.ca_state
+ having count(*) >= 10
+ order by cnt, a.ca_state
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query60.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query60.sql
new file mode 100644
index 0000000..9f84287
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query60.sql
@@ -0,0 +1,91 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ss as (
+ select
+ i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ cs as (
+ select
+ i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ ws as (
+ select
+ i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id)
+ select
+ i_item_id
+,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+ ,total_sales
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query61.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query61.sql
new file mode 100644
index 0000000..4e02adb
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query61.sql
@@ -0,0 +1,57 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100
+from
+ (select sum(ss_ext_sales_price) promotions
+ from store_sales
+ ,store
+ ,promotion
+ ,date_dim
+ ,customer
+ ,customer_address
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and ss_promo_sk = p_promo_sk
+ and ss_customer_sk= c_customer_sk
+ and ca_address_sk = c_current_addr_sk
+ and ss_item_sk = i_item_sk
+ and ca_gmt_offset = -7
+ and i_category = 'Books'
+ and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+ and s_gmt_offset = -7
+ and d_year = 1999
+ and d_moy = 11) promotional_sales,
+ (select sum(ss_ext_sales_price) total
+ from store_sales
+ ,store
+ ,date_dim
+ ,customer
+ ,customer_address
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and ss_customer_sk= c_customer_sk
+ and ca_address_sk = c_current_addr_sk
+ and ss_item_sk = i_item_sk
+ and ca_gmt_offset = -7
+ and i_category = 'Books'
+ and s_gmt_offset = -7
+ and d_year = 1999
+ and d_moy = 11) all_sales
+order by promotions, total
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query62.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query62.sql
new file mode 100644
index 0000000..f3f383e
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query62.sql
@@ -0,0 +1,48 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,web_name
+ ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days"
+ ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and
+ (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days"
+ ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and
+ (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days"
+ ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and
+ (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days"
+ ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as ">120 days"
+from
+ web_sales
+ ,warehouse
+ ,ship_mode
+ ,web_site
+ ,date_dim
+where
+ d_month_seq between 1212 and 1212 + 11
+and ws_ship_date_sk = d_date_sk
+and ws_warehouse_sk = w_warehouse_sk
+and ws_ship_mode_sk = sm_ship_mode_sk
+and ws_web_site_sk = web_site_sk
+group by
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,web_name
+order by substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,web_name
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query63.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query63.sql
new file mode 100644
index 0000000..6e86c17c
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query63.sql
@@ -0,0 +1,42 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select *
+from (select i_manager_id
+ ,sum(ss_sales_price) sum_sales
+ ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales
+ from item
+ ,store_sales
+ ,date_dim
+ ,store
+ where ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and ss_store_sk = s_store_sk
+ and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11)
+ and (( i_category in ('Books','Children','Electronics')
+ and i_class in ('personal','portable','reference','self-help')
+ and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+ 'exportiunivamalg #9','scholaramalgamalg #9'))
+ or( i_category in ('Women','Music','Men')
+ and i_class in ('accessories','classical','fragrances','pants')
+ and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+ 'importoamalg #1')))
+group by i_manager_id, d_moy) tmp1
+where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+order by i_manager_id
+ ,avg_monthly_sales
+ ,sum_sales
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query64.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query64.sql
new file mode 100644
index 0000000..c181e38
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query64.sql
@@ -0,0 +1,134 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with cs_ui as
+ (select cs_item_sk
+ ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund
+ from catalog_sales
+ ,catalog_returns
+ where cs_item_sk = cr_item_sk
+ and cs_order_number = cr_order_number
+ group by cs_item_sk
+ having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)),
+cross_sales as
+ (select i_product_name product_name
+ ,i_item_sk item_sk
+ ,s_store_name store_name
+ ,s_zip store_zip
+ ,ad1.ca_street_number b_street_number
+ ,ad1.ca_street_name b_street_name
+ ,ad1.ca_city b_city
+ ,ad1.ca_zip b_zip
+ ,ad2.ca_street_number c_street_number
+ ,ad2.ca_street_name c_street_name
+ ,ad2.ca_city c_city
+ ,ad2.ca_zip c_zip
+ ,d1.d_year as syear
+ ,d2.d_year as fsyear
+ ,d3.d_year s2year
+ ,count(*) cnt
+ ,sum(ss_wholesale_cost) s1
+ ,sum(ss_list_price) s2
+ ,sum(ss_coupon_amt) s3
+ FROM store_sales
+ ,store_returns
+ ,cs_ui
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,customer
+ ,customer_demographics cd1
+ ,customer_demographics cd2
+ ,promotion
+ ,household_demographics hd1
+ ,household_demographics hd2
+ ,customer_address ad1
+ ,customer_address ad2
+ ,income_band ib1
+ ,income_band ib2
+ ,item
+ WHERE ss_store_sk = s_store_sk AND
+ ss_sold_date_sk = d1.d_date_sk AND
+ ss_customer_sk = c_customer_sk AND
+ ss_cdemo_sk= cd1.cd_demo_sk AND
+ ss_hdemo_sk = hd1.hd_demo_sk AND
+ ss_addr_sk = ad1.ca_address_sk and
+ ss_item_sk = i_item_sk and
+ ss_item_sk = sr_item_sk and
+ ss_ticket_number = sr_ticket_number and
+ ss_item_sk = cs_ui.cs_item_sk and
+ c_current_cdemo_sk = cd2.cd_demo_sk AND
+ c_current_hdemo_sk = hd2.hd_demo_sk AND
+ c_current_addr_sk = ad2.ca_address_sk and
+ c_first_sales_date_sk = d2.d_date_sk and
+ c_first_shipto_date_sk = d3.d_date_sk and
+ ss_promo_sk = p_promo_sk and
+ hd1.hd_income_band_sk = ib1.ib_income_band_sk and
+ hd2.hd_income_band_sk = ib2.ib_income_band_sk and
+ cd1.cd_marital_status <> cd2.cd_marital_status and
+ i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and
+ i_current_price between 35 and 35 + 10 and
+ i_current_price between 35 + 1 and 35 + 15
+group by i_product_name
+ ,i_item_sk
+ ,s_store_name
+ ,s_zip
+ ,ad1.ca_street_number
+ ,ad1.ca_street_name
+ ,ad1.ca_city
+ ,ad1.ca_zip
+ ,ad2.ca_street_number
+ ,ad2.ca_street_name
+ ,ad2.ca_city
+ ,ad2.ca_zip
+ ,d1.d_year
+ ,d2.d_year
+ ,d3.d_year
+)
+select cs1.product_name
+ ,cs1.store_name
+ ,cs1.store_zip
+ ,cs1.b_street_number
+ ,cs1.b_street_name
+ ,cs1.b_city
+ ,cs1.b_zip
+ ,cs1.c_street_number
+ ,cs1.c_street_name
+ ,cs1.c_city
+ ,cs1.c_zip
+ ,cs1.syear
+ ,cs1.cnt
+ ,cs1.s1 as s11
+ ,cs1.s2 as s21
+ ,cs1.s3 as s31
+ ,cs2.s1 as s12
+ ,cs2.s2 as s22
+ ,cs2.s3 as s32
+ ,cs2.syear
+ ,cs2.cnt
+from cross_sales cs1,cross_sales cs2
+where cs1.item_sk=cs2.item_sk and
+ cs1.syear = 2000 and
+ cs2.syear = 2000 + 1 and
+ cs2.cnt <= cs1.cnt and
+ cs1.store_name = cs2.store_name and
+ cs1.store_zip = cs2.store_zip
+order by cs1.product_name
+ ,cs1.store_name
+ ,cs2.cnt
+ ,cs1.s1
+ ,cs2.s1
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query65.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query65.sql
new file mode 100644
index 0000000..a106624
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query65.sql
@@ -0,0 +1,42 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ s_store_name,
+ i_item_desc,
+ sc.revenue,
+ i_current_price,
+ i_wholesale_cost,
+ i_brand
+ from store, item,
+ (select ss_store_sk, avg(revenue) as ave
+ from
+ (select ss_store_sk, ss_item_sk,
+ sum(ss_sales_price) as revenue
+ from store_sales, date_dim
+ where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ group by ss_store_sk, ss_item_sk) sa
+ group by ss_store_sk) sb,
+ (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue
+ from store_sales, date_dim
+ where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ group by ss_store_sk, ss_item_sk) sc
+ where sb.ss_store_sk = sc.ss_store_sk and
+ sc.revenue <= 0.1 * sb.ave and
+ s_store_sk = sc.ss_store_sk and
+ i_item_sk = sc.ss_item_sk
+ order by s_store_name, i_item_desc
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query66.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query66.sql
new file mode 100644
index 0000000..f99b53b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query66.sql
@@ -0,0 +1,233 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,ship_carriers
+ ,year
+ ,sum(jan_sales) as jan_sales
+ ,sum(feb_sales) as feb_sales
+ ,sum(mar_sales) as mar_sales
+ ,sum(apr_sales) as apr_sales
+ ,sum(may_sales) as may_sales
+ ,sum(jun_sales) as jun_sales
+ ,sum(jul_sales) as jul_sales
+ ,sum(aug_sales) as aug_sales
+ ,sum(sep_sales) as sep_sales
+ ,sum(oct_sales) as oct_sales
+ ,sum(nov_sales) as nov_sales
+ ,sum(dec_sales) as dec_sales
+ ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot
+ ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot
+ ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot
+ ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot
+ ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot
+ ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot
+ ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot
+ ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot
+ ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot
+ ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot
+ ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot
+ ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot
+ ,sum(jan_net) as jan_net
+ ,sum(feb_net) as feb_net
+ ,sum(mar_net) as mar_net
+ ,sum(apr_net) as apr_net
+ ,sum(may_net) as may_net
+ ,sum(jun_net) as jun_net
+ ,sum(jul_net) as jul_net
+ ,sum(aug_net) as aug_net
+ ,sum(sep_net) as sep_net
+ ,sum(oct_net) as oct_net
+ ,sum(nov_net) as nov_net
+ ,sum(dec_net) as dec_net
+ from (
+ select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers
+ ,d_year as year
+ ,sum(case when d_moy = 1
+ then ws_sales_price* ws_quantity else 0 end) as jan_sales
+ ,sum(case when d_moy = 2
+ then ws_sales_price* ws_quantity else 0 end) as feb_sales
+ ,sum(case when d_moy = 3
+ then ws_sales_price* ws_quantity else 0 end) as mar_sales
+ ,sum(case when d_moy = 4
+ then ws_sales_price* ws_quantity else 0 end) as apr_sales
+ ,sum(case when d_moy = 5
+ then ws_sales_price* ws_quantity else 0 end) as may_sales
+ ,sum(case when d_moy = 6
+ then ws_sales_price* ws_quantity else 0 end) as jun_sales
+ ,sum(case when d_moy = 7
+ then ws_sales_price* ws_quantity else 0 end) as jul_sales
+ ,sum(case when d_moy = 8
+ then ws_sales_price* ws_quantity else 0 end) as aug_sales
+ ,sum(case when d_moy = 9
+ then ws_sales_price* ws_quantity else 0 end) as sep_sales
+ ,sum(case when d_moy = 10
+ then ws_sales_price* ws_quantity else 0 end) as oct_sales
+ ,sum(case when d_moy = 11
+ then ws_sales_price* ws_quantity else 0 end) as nov_sales
+ ,sum(case when d_moy = 12
+ then ws_sales_price* ws_quantity else 0 end) as dec_sales
+ ,sum(case when d_moy = 1
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net
+ ,sum(case when d_moy = 2
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net
+ ,sum(case when d_moy = 3
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net
+ ,sum(case when d_moy = 4
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net
+ ,sum(case when d_moy = 5
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net
+ ,sum(case when d_moy = 6
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net
+ ,sum(case when d_moy = 7
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net
+ ,sum(case when d_moy = 8
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net
+ ,sum(case when d_moy = 9
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net
+ ,sum(case when d_moy = 10
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net
+ ,sum(case when d_moy = 11
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net
+ ,sum(case when d_moy = 12
+ then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net
+ from
+ web_sales
+ ,warehouse
+ ,date_dim
+ ,time_dim
+ ,ship_mode
+ where
+ ws_warehouse_sk = w_warehouse_sk
+ and ws_sold_date_sk = d_date_sk
+ and ws_sold_time_sk = t_time_sk
+ and ws_ship_mode_sk = sm_ship_mode_sk
+ and d_year = 2002
+ and t_time between 49530 and 49530+28800
+ and sm_carrier in ('DIAMOND','AIRBORNE')
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,d_year
+ union all
+ select
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers
+ ,d_year as year
+ ,sum(case when d_moy = 1
+ then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales
+ ,sum(case when d_moy = 2
+ then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales
+ ,sum(case when d_moy = 3
+ then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales
+ ,sum(case when d_moy = 4
+ then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales
+ ,sum(case when d_moy = 5
+ then cs_ext_sales_price* cs_quantity else 0 end) as may_sales
+ ,sum(case when d_moy = 6
+ then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales
+ ,sum(case when d_moy = 7
+ then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales
+ ,sum(case when d_moy = 8
+ then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales
+ ,sum(case when d_moy = 9
+ then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales
+ ,sum(case when d_moy = 10
+ then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales
+ ,sum(case when d_moy = 11
+ then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales
+ ,sum(case when d_moy = 12
+ then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales
+ ,sum(case when d_moy = 1
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net
+ ,sum(case when d_moy = 2
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net
+ ,sum(case when d_moy = 3
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net
+ ,sum(case when d_moy = 4
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net
+ ,sum(case when d_moy = 5
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net
+ ,sum(case when d_moy = 6
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net
+ ,sum(case when d_moy = 7
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net
+ ,sum(case when d_moy = 8
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net
+ ,sum(case when d_moy = 9
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net
+ ,sum(case when d_moy = 10
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net
+ ,sum(case when d_moy = 11
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net
+ ,sum(case when d_moy = 12
+ then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net
+ from
+ catalog_sales
+ ,warehouse
+ ,date_dim
+ ,time_dim
+ ,ship_mode
+ where
+ cs_warehouse_sk = w_warehouse_sk
+ and cs_sold_date_sk = d_date_sk
+ and cs_sold_time_sk = t_time_sk
+ and cs_ship_mode_sk = sm_ship_mode_sk
+ and d_year = 2002
+ and t_time between 49530 AND 49530+28800
+ and sm_carrier in ('DIAMOND','AIRBORNE')
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,d_year
+ ) x
+ group by
+ w_warehouse_name
+ ,w_warehouse_sq_ft
+ ,w_city
+ ,w_county
+ ,w_state
+ ,w_country
+ ,ship_carriers
+ ,year
+ order by w_warehouse_name
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query67.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query67.sql
new file mode 100644
index 0000000..1d90fa2
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query67.sql
@@ -0,0 +1,57 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select *
+from (select i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sumsales
+ ,rank() over (partition by i_category order by sumsales desc) rk
+ from (select i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales
+ from store_sales
+ ,date_dim
+ ,store
+ ,item
+ where ss_sold_date_sk=d_date_sk
+ and ss_item_sk=i_item_sk
+ and ss_store_sk = s_store_sk
+ and d_month_seq between 1212 and 1212+11
+ group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2
+where rk <= 100
+order by i_category
+ ,i_class
+ ,i_brand
+ ,i_product_name
+ ,d_year
+ ,d_qoy
+ ,d_moy
+ ,s_store_id
+ ,sumsales
+ ,rk
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query68.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query68.sql
new file mode 100644
index 0000000..8ba3933
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query68.sql
@@ -0,0 +1,55 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,extended_price
+ ,extended_tax
+ ,list_price
+ from (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_ext_sales_price) extended_price
+ ,sum(ss_ext_list_price) list_price
+ ,sum(ss_ext_tax) extended_tax
+ from store_sales
+ ,date_dim
+ ,store
+ ,household_demographics
+ ,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_dep_count = 5 or
+ household_demographics.hd_vehicle_count= 3)
+ and date_dim.d_year in (1999,1999+1,1999+2)
+ and store.s_city in ('Midway','Fairview')
+ group by ss_ticket_number
+ ,ss_customer_sk
+ ,ss_addr_sk,ca_city) dn
+ ,customer
+ ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,ss_ticket_number
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query69.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query69.sql
new file mode 100644
index 0000000..103a056
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query69.sql
@@ -0,0 +1,60 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_state in ('CO','IL','MN') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ (not exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ not exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query7.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query7.sql
new file mode 100644
index 0000000..d190e7d
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query7.sql
@@ -0,0 +1,34 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id,
+ avg(ss_quantity) agg1,
+ avg(ss_list_price) agg2,
+ avg(ss_coupon_amt) agg3,
+ avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, item, promotion
+ where ss_sold_date_sk = d_date_sk and
+ ss_item_sk = i_item_sk and
+ ss_cdemo_sk = cd_demo_sk and
+ ss_promo_sk = p_promo_sk and
+ cd_gender = 'F' and
+ cd_marital_status = 'W' and
+ cd_education_status = 'Primary' and
+ (p_channel_email = 'N' or p_channel_event = 'N') and
+ d_year = 1998
+ group by i_item_id
+ order by i_item_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query70.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query70.sql
new file mode 100644
index 0000000..7ea3448
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query70.sql
@@ -0,0 +1,51 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ sum(ss_net_profit) as total_sum
+ ,s_state
+ ,s_county
+ ,grouping(s_state)+grouping(s_county) as lochierarchy
+ ,rank() over (
+ partition by grouping(s_state)+grouping(s_county),
+ case when grouping(s_county) = 0 then s_state end
+ order by sum(ss_net_profit) desc) as rank_within_parent
+ from
+ store_sales
+ ,date_dim d1
+ ,store
+ where
+ d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ss_sold_date_sk
+ and s_store_sk = ss_store_sk
+ and s_state in
+ ( select s_state
+ from (select s_state as s_state,
+ rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking
+ from store_sales, store, date_dim
+ where d_month_seq between 1212 and 1212+11
+ and d_date_sk = ss_sold_date_sk
+ and s_store_sk = ss_store_sk
+ group by s_state
+ ) tmp1
+ where ranking <= 5
+ )
+ group by rollup(s_state,s_county)
+ order by
+ lochierarchy desc
+ ,case when lochierarchy = 0 then s_state end
+ ,rank_within_parent
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query71.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query71.sql
new file mode 100644
index 0000000..09eb27a
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query71.sql
@@ -0,0 +1,52 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
+ sum(ext_price) ext_price
+ from item, (select ws_ext_sales_price as ext_price,
+ ws_sold_date_sk as sold_date_sk,
+ ws_item_sk as sold_item_sk,
+ ws_sold_time_sk as time_sk
+ from web_sales,date_dim
+ where d_date_sk = ws_sold_date_sk
+ and d_moy=12
+ and d_year=2000
+ union all
+ select cs_ext_sales_price as ext_price,
+ cs_sold_date_sk as sold_date_sk,
+ cs_item_sk as sold_item_sk,
+ cs_sold_time_sk as time_sk
+ from catalog_sales,date_dim
+ where d_date_sk = cs_sold_date_sk
+ and d_moy=12
+ and d_year=2000
+ union all
+ select ss_ext_sales_price as ext_price,
+ ss_sold_date_sk as sold_date_sk,
+ ss_item_sk as sold_item_sk,
+ ss_sold_time_sk as time_sk
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ and d_moy=12
+ and d_year=2000
+ ) tmp,time_dim
+ where
+ sold_item_sk = i_item_sk
+ and i_manager_id=1
+ and time_sk = t_time_sk
+ and (t_meal_time = 'breakfast' or t_meal_time = 'dinner')
+ group by i_brand, i_brand_id,t_hour,t_minute
+ order by ext_price desc, i_brand_id
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query72.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query72.sql
new file mode 100644
index 0000000..a1173dc
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query72.sql
@@ -0,0 +1,42 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_desc
+ ,w_warehouse_name
+ ,d1.d_week_seq
+ ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo
+ ,sum(case when p_promo_sk is not null then 1 else 0 end) promo
+ ,count(*) total_cnt
+from catalog_sales
+join inventory on (cs_item_sk = inv_item_sk)
+join warehouse on (w_warehouse_sk=inv_warehouse_sk)
+join item on (i_item_sk = cs_item_sk)
+join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk)
+join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk)
+join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk)
+join date_dim d2 on (inv_date_sk = d2.d_date_sk)
+join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk)
+left outer join promotion on (cs_promo_sk=p_promo_sk)
+left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number)
+where d1.d_week_seq = d2.d_week_seq
+ and inv_quantity_on_hand < cs_quantity
+ and d3.d_date > d1.d_date + 5
+ and hd_buy_potential = '1001-5000'
+ and d1.d_year = 2001
+ and cd_marital_status = 'M'
+group by i_item_desc,w_warehouse_name,d1.d_week_seq
+order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query73.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query73.sql
new file mode 100644
index 0000000..dba1bfa
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query73.sql
@@ -0,0 +1,41 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select c_last_name
+ ,c_first_name
+ ,c_salutation
+ ,c_preferred_cust_flag
+ ,ss_ticket_number
+ ,cnt from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,count(*) cnt
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_buy_potential = '>10000' or
+ household_demographics.hd_buy_potential = 'Unknown')
+ and household_demographics.hd_vehicle_count > 0
+ and case when household_demographics.hd_vehicle_count > 0 then
+ household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County')
+ group by ss_ticket_number,ss_customer_sk) dj,customer
+ where ss_customer_sk = c_customer_sk
+ and cnt between 1 and 5
+ order by cnt desc, c_last_name asc
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query74.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query74.sql
new file mode 100644
index 0000000..384d1e7
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query74.sql
@@ -0,0 +1,74 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with year_total as (
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,d_year as year
+ ,max(ss_net_paid) year_total
+ ,'s' sale_type
+ from customer
+ ,store_sales
+ ,date_dim
+ where c_customer_sk = ss_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (2001,2001+1)
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ union all
+ select c_customer_id customer_id
+ ,c_first_name customer_first_name
+ ,c_last_name customer_last_name
+ ,d_year as year
+ ,max(ws_net_paid) year_total
+ ,'w' sale_type
+ from customer
+ ,web_sales
+ ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year in (2001,2001+1)
+ group by c_customer_id
+ ,c_first_name
+ ,c_last_name
+ ,d_year
+ )
+ select
+ t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name
+ from year_total t_s_firstyear
+ ,year_total t_s_secyear
+ ,year_total t_w_firstyear
+ ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+ and t_s_firstyear.sale_type = 's'
+ and t_w_firstyear.sale_type = 'w'
+ and t_s_secyear.sale_type = 's'
+ and t_w_secyear.sale_type = 'w'
+ and t_s_firstyear.year = 2001
+ and t_s_secyear.year = 2001+1
+ and t_w_firstyear.year = 2001
+ and t_w_secyear.year = 2001+1
+ and t_s_firstyear.year_total > 0
+ and t_w_firstyear.year_total > 0
+ and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ order by 2,1,3
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query75.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query75.sql
new file mode 100644
index 0000000..6d9c689
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query75.sql
@@ -0,0 +1,83 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+WITH all_sales AS (
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,SUM(sales_cnt) AS sales_cnt
+ ,SUM(sales_amt) AS sales_amt
+ FROM (SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt
+ ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt
+ FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk
+ JOIN date_dim ON d_date_sk=cs_sold_date_sk
+ LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number
+ AND cs_item_sk=cr_item_sk)
+ WHERE i_category='Sports'
+ UNION
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt
+ ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt
+ FROM store_sales JOIN item ON i_item_sk=ss_item_sk
+ JOIN date_dim ON d_date_sk=ss_sold_date_sk
+ LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number
+ AND ss_item_sk=sr_item_sk)
+ WHERE i_category='Sports'
+ UNION
+ SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt
+ ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt
+ FROM web_sales JOIN item ON i_item_sk=ws_item_sk
+ JOIN date_dim ON d_date_sk=ws_sold_date_sk
+ LEFT JOIN web_returns ON (ws_order_number=wr_order_number
+ AND ws_item_sk=wr_item_sk)
+ WHERE i_category='Sports') sales_detail
+ GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id)
+ SELECT prev_yr.d_year AS prev_year
+ ,curr_yr.d_year AS year
+ ,curr_yr.i_brand_id
+ ,curr_yr.i_class_id
+ ,curr_yr.i_category_id
+ ,curr_yr.i_manufact_id
+ ,prev_yr.sales_cnt AS prev_yr_cnt
+ ,curr_yr.sales_cnt AS curr_yr_cnt
+ ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff
+ ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff
+ FROM all_sales curr_yr, all_sales prev_yr
+ WHERE curr_yr.i_brand_id=prev_yr.i_brand_id
+ AND curr_yr.i_class_id=prev_yr.i_class_id
+ AND curr_yr.i_category_id=prev_yr.i_category_id
+ AND curr_yr.i_manufact_id=prev_yr.i_manufact_id
+ AND curr_yr.d_year=2002
+ AND prev_yr.d_year=2002-1
+ AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9
+ ORDER BY sales_cnt_diff,sales_amt_diff
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query76.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query76.sql
new file mode 100644
index 0000000..8a87b00
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query76.sql
@@ -0,0 +1,37 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (
+ SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price
+ FROM store_sales, item, date_dim
+ WHERE ss_addr_sk IS NULL
+ AND ss_sold_date_sk=d_date_sk
+ AND ss_item_sk=i_item_sk
+ UNION ALL
+ SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price
+ FROM web_sales, item, date_dim
+ WHERE ws_web_page_sk IS NULL
+ AND ws_sold_date_sk=d_date_sk
+ AND ws_item_sk=i_item_sk
+ UNION ALL
+ SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price
+ FROM catalog_sales, item, date_dim
+ WHERE cs_warehouse_sk IS NULL
+ AND cs_sold_date_sk=d_date_sk
+ AND cs_item_sk=i_item_sk) foo
+GROUP BY channel, col_name, d_year, d_qoy, i_category
+ORDER BY channel, col_name, d_year, d_qoy, i_category
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query77.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query77.sql
new file mode 100644
index 0000000..7257175
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query77.sql
@@ -0,0 +1,121 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ss as
+ (select s_store_sk,
+ sum(ss_ext_sales_price) as sales,
+ sum(ss_net_profit) as profit
+ from store_sales,
+ date_dim,
+ store
+ where ss_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ss_store_sk = s_store_sk
+ group by s_store_sk)
+ ,
+ sr as
+ (select s_store_sk,
+ sum(sr_return_amt) as returns,
+ sum(sr_net_loss) as profit_loss
+ from store_returns,
+ date_dim,
+ store
+ where sr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and sr_store_sk = s_store_sk
+ group by s_store_sk),
+ cs as
+ (select cs_call_center_sk,
+ sum(cs_ext_sales_price) as sales,
+ sum(cs_net_profit) as profit
+ from catalog_sales,
+ date_dim
+ where cs_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ group by cs_call_center_sk
+ ),
+ cr as
+ (select cr_call_center_sk,
+ sum(cr_return_amount) as returns,
+ sum(cr_net_loss) as profit_loss
+ from catalog_returns,
+ date_dim
+ where cr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ group by cr_call_center_sk
+ ),
+ ws as
+ ( select wp_web_page_sk,
+ sum(ws_ext_sales_price) as sales,
+ sum(ws_net_profit) as profit
+ from web_sales,
+ date_dim,
+ web_page
+ where ws_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ws_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk),
+ wr as
+ (select wp_web_page_sk,
+ sum(wr_return_amt) as returns,
+ sum(wr_net_loss) as profit_loss
+ from web_returns,
+ date_dim,
+ web_page
+ where wr_returned_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and wr_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , ss.s_store_sk as id
+ , sales
+ , coalesce(returns, 0) as returns
+ , (profit - coalesce(profit_loss,0)) as profit
+ from ss left join sr
+ on ss.s_store_sk = sr.s_store_sk
+ union all
+ select 'catalog channel' as channel
+ , cs_call_center_sk as id
+ , sales
+ , returns
+ , (profit - profit_loss) as profit
+ from cs
+ , cr
+ union all
+ select 'web channel' as channel
+ , ws.wp_web_page_sk as id
+ , sales
+ , coalesce(returns, 0) returns
+ , (profit - coalesce(profit_loss,0)) as profit
+ from ws left join wr
+ on ws.wp_web_page_sk = wr.wp_web_page_sk
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query78.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query78.sql
new file mode 100644
index 0000000..e7bec2e
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query78.sql
@@ -0,0 +1,71 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ws as
+ (select d_year AS ws_sold_year, ws_item_sk,
+ ws_bill_customer_sk ws_customer_sk,
+ sum(ws_quantity) ws_qty,
+ sum(ws_wholesale_cost) ws_wc,
+ sum(ws_sales_price) ws_sp
+ from web_sales
+ left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk
+ join date_dim on ws_sold_date_sk = d_date_sk
+ where wr_order_number is null
+ group by d_year, ws_item_sk, ws_bill_customer_sk
+ ),
+cs as
+ (select d_year AS cs_sold_year, cs_item_sk,
+ cs_bill_customer_sk cs_customer_sk,
+ sum(cs_quantity) cs_qty,
+ sum(cs_wholesale_cost) cs_wc,
+ sum(cs_sales_price) cs_sp
+ from catalog_sales
+ left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk
+ join date_dim on cs_sold_date_sk = d_date_sk
+ where cr_order_number is null
+ group by d_year, cs_item_sk, cs_bill_customer_sk
+ ),
+ss as
+ (select d_year AS ss_sold_year, ss_item_sk,
+ ss_customer_sk,
+ sum(ss_quantity) ss_qty,
+ sum(ss_wholesale_cost) ss_wc,
+ sum(ss_sales_price) ss_sp
+ from store_sales
+ left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk
+ join date_dim on ss_sold_date_sk = d_date_sk
+ where sr_ticket_number is null
+ group by d_year, ss_item_sk, ss_customer_sk
+ )
+ select
+ss_sold_year, ss_item_sk, ss_customer_sk,
+round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio,
+ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price,
+coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty,
+coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost,
+coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price
+from ss
+left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk)
+left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk)
+where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2000
+order by
+ ss_sold_year, ss_item_sk, ss_customer_sk,
+ ss_qty desc, ss_wc desc, ss_sp desc,
+ other_chan_qty,
+ other_chan_wholesale_cost,
+ other_chan_sales_price,
+ ratio
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query79.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query79.sql
new file mode 100644
index 0000000..0728226
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query79.sql
@@ -0,0 +1,36 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit
+ from
+ (select ss_ticket_number
+ ,ss_customer_sk
+ ,store.s_city
+ ,sum(ss_coupon_amt) amt
+ ,sum(ss_net_profit) profit
+ from store_sales,date_dim,store,household_demographics
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0)
+ and date_dim.d_dow = 1
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_number_employees between 200 and 295
+ group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer
+ where ss_customer_sk = c_customer_sk
+ order by c_last_name,c_first_name,substr(s_city,1,30), profit
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query8.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query8.sql
new file mode 100644
index 0000000..250c118
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query8.sql
@@ -0,0 +1,121 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select s_store_name
+ ,sum(ss_net_profit)
+ from store_sales
+ ,date_dim
+ ,store,
+ (select ca_zip
+ from (
+ SELECT substr(ca_zip,1,5) ca_zip
+ FROM customer_address
+ WHERE substr(ca_zip,1,5) IN (
+ '89436','30868','65085','22977','83927','77557',
+ '58429','40697','80614','10502','32779',
+ '91137','61265','98294','17921','18427',
+ '21203','59362','87291','84093','21505',
+ '17184','10866','67898','25797','28055',
+ '18377','80332','74535','21757','29742',
+ '90885','29898','17819','40811','25990',
+ '47513','89531','91068','10391','18846',
+ '99223','82637','41368','83658','86199',
+ '81625','26696','89338','88425','32200',
+ '81427','19053','77471','36610','99823',
+ '43276','41249','48584','83550','82276',
+ '18842','78890','14090','38123','40936',
+ '34425','19850','43286','80072','79188',
+ '54191','11395','50497','84861','90733',
+ '21068','57666','37119','25004','57835',
+ '70067','62878','95806','19303','18840',
+ '19124','29785','16737','16022','49613',
+ '89977','68310','60069','98360','48649',
+ '39050','41793','25002','27413','39736',
+ '47208','16515','94808','57648','15009',
+ '80015','42961','63982','21744','71853',
+ '81087','67468','34175','64008','20261',
+ '11201','51799','48043','45645','61163',
+ '48375','36447','57042','21218','41100',
+ '89951','22745','35851','83326','61125',
+ '78298','80752','49858','52940','96976',
+ '63792','11376','53582','18717','90226',
+ '50530','94203','99447','27670','96577',
+ '57856','56372','16165','23427','54561',
+ '28806','44439','22926','30123','61451',
+ '92397','56979','92309','70873','13355',
+ '21801','46346','37562','56458','28286',
+ '47306','99555','69399','26234','47546',
+ '49661','88601','35943','39936','25632',
+ '24611','44166','56648','30379','59785',
+ '11110','14329','93815','52226','71381',
+ '13842','25612','63294','14664','21077',
+ '82626','18799','60915','81020','56447',
+ '76619','11433','13414','42548','92713',
+ '70467','30884','47484','16072','38936',
+ '13036','88376','45539','35901','19506',
+ '65690','73957','71850','49231','14276',
+ '20005','18384','76615','11635','38177',
+ '55607','41369','95447','58581','58149',
+ '91946','33790','76232','75692','95464',
+ '22246','51061','56692','53121','77209',
+ '15482','10688','14868','45907','73520',
+ '72666','25734','17959','24677','66446',
+ '94627','53535','15560','41967','69297',
+ '11929','59403','33283','52232','57350',
+ '43933','40921','36635','10827','71286',
+ '19736','80619','25251','95042','15526',
+ '36496','55854','49124','81980','35375',
+ '49157','63512','28944','14946','36503',
+ '54010','18767','23969','43905','66979',
+ '33113','21286','58471','59080','13395',
+ '79144','70373','67031','38360','26705',
+ '50906','52406','26066','73146','15884',
+ '31897','30045','61068','45550','92454',
+ '13376','14354','19770','22928','97790',
+ '50723','46081','30202','14410','20223',
+ '88500','67298','13261','14172','81410',
+ '93578','83583','46047','94167','82564',
+ '21156','15799','86709','37931','74703',
+ '83103','23054','70470','72008','49247',
+ '91911','69998','20961','70070','63197',
+ '54853','88191','91830','49521','19454',
+ '81450','89091','62378','25683','61869',
+ '51744','36580','85778','36871','48121',
+ '28810','83712','45486','67393','26935',
+ '42393','20132','55349','86057','21309',
+ '80218','10094','11357','48819','39734',
+ '40758','30432','21204','29467','30214',
+ '61024','55307','74621','11622','68908',
+ '33032','52868','99194','99900','84936',
+ '69036','99149','45013','32895','59004',
+ '32322','14933','32936','33562','72550',
+ '27385','58049','58200','16808','21360',
+ '32961','18586','79307','15492')
+ intersect
+ select ca_zip
+ from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt
+ FROM customer_address, customer
+ WHERE ca_address_sk = c_current_addr_sk and
+ c_preferred_cust_flag='Y'
+ group by ca_zip
+ having count(*) > 10)A1)A2) V1
+ where ss_store_sk = s_store_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_qoy = 1 and d_year = 2002
+ and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2))
+ group by s_store_name
+ order by s_store_name
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query80.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query80.sql
new file mode 100644
index 0000000..9c6e177
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query80.sql
@@ -0,0 +1,109 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ssr as
+ (select s_store_id as store_id,
+ sum(ss_ext_sales_price) as sales,
+ sum(coalesce(sr_return_amt, 0)) as returns,
+ sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit
+ from store_sales left outer join store_returns on
+ (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number),
+ date_dim,
+ store,
+ item,
+ promotion
+ where ss_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ss_store_sk = s_store_sk
+ and ss_item_sk = i_item_sk
+ and i_current_price > 50
+ and ss_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+ group by s_store_id)
+ ,
+ csr as
+ (select cp_catalog_page_id as catalog_page_id,
+ sum(cs_ext_sales_price) as sales,
+ sum(coalesce(cr_return_amount, 0)) as returns,
+ sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit
+ from catalog_sales left outer join catalog_returns on
+ (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number),
+ date_dim,
+ catalog_page,
+ item,
+ promotion
+ where cs_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and cs_catalog_page_sk = cp_catalog_page_sk
+ and cs_item_sk = i_item_sk
+ and i_current_price > 50
+ and cs_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select web_site_id,
+ sum(ws_ext_sales_price) as sales,
+ sum(coalesce(wr_return_amt, 0)) as returns,
+ sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit
+ from web_sales left outer join web_returns on
+ (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number),
+ date_dim,
+ web_site,
+ item,
+ promotion
+ where ws_sold_date_sk = d_date_sk
+ and d_date between cast('1998-08-04' as date)
+ and (cast('1998-08-04' as date) + 30 days)
+ and ws_web_site_sk = web_site_sk
+ and ws_item_sk = i_item_sk
+ and i_current_price > 50
+ and ws_promo_sk = p_promo_sk
+ and p_channel_tv = 'N'
+group by web_site_id)
+ select channel
+ , id
+ , sum(sales) as sales
+ , sum(returns) as returns
+ , sum(profit) as profit
+ from
+ (select 'store channel' as channel
+ , 'store' || store_id as id
+ , sales
+ , returns
+ , profit
+ from ssr
+ union all
+ select 'catalog channel' as channel
+ , 'catalog_page' || catalog_page_id as id
+ , sales
+ , returns
+ , profit
+ from csr
+ union all
+ select 'web channel' as channel
+ , 'web_site' || web_site_id as id
+ , sales
+ , returns
+ , profit
+ from wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+ ,id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query81.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query81.sql
new file mode 100644
index 0000000..71c49dc
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query81.sql
@@ -0,0 +1,44 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with customer_total_return as
+ (select cr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(cr_return_amt_inc_tax) as ctr_total_return
+ from catalog_returns
+ ,date_dim
+ ,customer_address
+ where cr_returned_date_sk = d_date_sk
+ and d_year =1998
+ and cr_returning_addr_sk = ca_address_sk
+ group by cr_returning_customer_sk
+ ,ca_state )
+ select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+ ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+ ,ca_location_type,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+ ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+ ,ca_location_type,ctr_total_return
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query82.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query82.sql
new file mode 100644
index 0000000..f08cc17
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query82.sql
@@ -0,0 +1,30 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id
+ ,i_item_desc
+ ,i_current_price
+ from item, inventory, date_dim, store_sales
+ where i_current_price between 30 and 30+30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days)
+ and i_manufact_id in (437,129,727,663)
+ and inv_quantity_on_hand between 100 and 500
+ and ss_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query83.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query83.sql
new file mode 100644
index 0000000..bc22723
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query83.sql
@@ -0,0 +1,80 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with sr_items as
+ (select i_item_id item_id,
+ sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+ item,
+ date_dim
+ where sr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and sr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+ sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+ item,
+ date_dim
+ where cr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and cr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+ sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+ item,
+ date_dim
+ where wr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and wr_returned_date_sk = d_date_sk
+ group by i_item_id)
+ select sr_items.item_id
+ ,sr_item_qty
+ ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+ ,cr_item_qty
+ ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+ ,wr_item_qty
+ ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+ ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+ and sr_items.item_id=wr_items.item_id
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query84.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query84.sql
new file mode 100644
index 0000000..2a519de
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query84.sql
@@ -0,0 +1,34 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select c_customer_id as customer_id
+ , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername
+ from customer
+ ,customer_address
+ ,customer_demographics
+ ,household_demographics
+ ,income_band
+ ,store_returns
+ where ca_city = 'Hopewell'
+ and c_current_addr_sk = ca_address_sk
+ and ib_lower_bound >= 32287
+ and ib_upper_bound <= 32287 + 50000
+ and ib_income_band_sk = hd_income_band_sk
+ and cd_demo_sk = c_current_cdemo_sk
+ and hd_demo_sk = c_current_hdemo_sk
+ and sr_cdemo_sk = cd_demo_sk
+ order by c_customer_id
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query85.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query85.sql
new file mode 100644
index 0000000..dea9927
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query85.sql
@@ -0,0 +1,97 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select substr(r_reason_desc,1,20)
+ ,avg(ws_quantity)
+ ,avg(wr_refunded_cash)
+ ,avg(wr_fee)
+ from web_sales, web_returns, web_page, customer_demographics cd1,
+ customer_demographics cd2, customer_address, date_dim, reason
+ where ws_web_page_sk = wp_web_page_sk
+ and ws_item_sk = wr_item_sk
+ and ws_order_number = wr_order_number
+ and ws_sold_date_sk = d_date_sk and d_year = 1998
+ and cd1.cd_demo_sk = wr_refunded_cdemo_sk
+ and cd2.cd_demo_sk = wr_returning_cdemo_sk
+ and ca_address_sk = wr_refunded_addr_sk
+ and r_reason_sk = wr_reason_sk
+ and
+ (
+ (
+ cd1.cd_marital_status = 'M'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = '4 yr Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 100.00 and 150.00
+ )
+ or
+ (
+ cd1.cd_marital_status = 'D'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Primary'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 50.00 and 100.00
+ )
+ or
+ (
+ cd1.cd_marital_status = 'U'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Advanced Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ws_net_profit between 100 and 200
+ )
+ or
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ws_net_profit between 150 and 300
+ )
+ or
+ (
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ws_net_profit between 50 and 250
+ )
+ )
+group by r_reason_desc
+order by substr(r_reason_desc,1,20)
+ ,avg(ws_quantity)
+ ,avg(wr_refunded_cash)
+ ,avg(wr_fee)
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query86.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query86.sql
new file mode 100644
index 0000000..353732b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query86.sql
@@ -0,0 +1,39 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ sum(ws_net_paid) as total_sum
+ ,i_category
+ ,i_class
+ ,grouping(i_category)+grouping(i_class) as lochierarchy
+ ,rank() over (
+ partition by grouping(i_category)+grouping(i_class),
+ case when grouping(i_class) = 0 then i_category end
+ order by sum(ws_net_paid) desc) as rank_within_parent
+ from
+ web_sales
+ ,date_dim d1
+ ,item
+ where
+ d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ws_sold_date_sk
+ and i_item_sk = ws_item_sk
+ group by rollup(i_category,i_class)
+ order by
+ lochierarchy desc,
+ case when lochierarchy = 0 then i_category end,
+ rank_within_parent
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query87.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query87.sql
new file mode 100644
index 0000000..12b0384
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query87.sql
@@ -0,0 +1,35 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select count(*)
+from ((select distinct c_last_name, c_first_name, d_date
+ from store_sales, date_dim, customer
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+ except
+ (select distinct c_last_name, c_first_name, d_date
+ from catalog_sales, date_dim, customer
+ where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+ and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+ except
+ (select distinct c_last_name, c_first_name, d_date
+ from web_sales, date_dim, customer
+ where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+ and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+ and d_month_seq between 1212 and 1212+11)
+) cool_cust
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query88.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query88.sql
new file mode 100644
index 0000000..8ca2616
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query88.sql
@@ -0,0 +1,106 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s6,
+ (select count(*) h11_30_to_12
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s7,
+ (select count(*) h12_to_12_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 12
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s8
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query89.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query89.sql
new file mode 100644
index 0000000..217a22a
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query89.sql
@@ -0,0 +1,41 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select *
+from(
+select i_category, i_class, i_brand,
+ s_store_name, s_company_name,
+ d_moy,
+ sum(ss_sales_price) sum_sales,
+ avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand, s_store_name, s_company_name)
+ avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
+ d_year in (2000) and
+ ((i_category in ('Home','Books','Electronics') and
+ i_class in ('wallpaper','parenting','musical')
+ )
+ or (i_category in ('Shoes','Jewelry','Men') and
+ i_class in ('womens','birdal','pants')
+ ))
+group by i_category, i_class, i_brand,
+ s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query9.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query9.sql
new file mode 100644
index 0000000..ab72251
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query9.sql
@@ -0,0 +1,63 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select case when (select count(*)
+ from store_sales
+ where ss_quantity between 1 and 20) > 25437
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 1 and 20)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 1 and 20) end bucket1 ,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 21 and 40) > 22746
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 21 and 40)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 21 and 40) end bucket2,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 41 and 60) > 9387
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 41 and 60)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 41 and 60) end bucket3,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 61 and 80) > 10098
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 61 and 80)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 61 and 80) end bucket4,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 81 and 100) > 18213
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 81 and 100)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query90.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query90.sql
new file mode 100644
index 0000000..2dfa02a
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query90.sql
@@ -0,0 +1,35 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 6 and 6+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) at,
+ ( select count(*) pmc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 14 and 14+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+ limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query91.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query91.sql
new file mode 100644
index 0000000..49d5fda
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query91.sql
@@ -0,0 +1,44 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ cc_call_center_id Call_Center,
+ cc_name Call_Center_Name,
+ cc_manager Manager,
+ sum(cr_net_loss) Returns_Loss
+from
+ call_center,
+ catalog_returns,
+ date_dim,
+ customer,
+ customer_address,
+ customer_demographics,
+ household_demographics
+where
+ cr_call_center_sk = cc_call_center_sk
+and cr_returned_date_sk = d_date_sk
+and cr_returning_customer_sk= c_customer_sk
+and cd_demo_sk = c_current_cdemo_sk
+and hd_demo_sk = c_current_hdemo_sk
+and ca_address_sk = c_current_addr_sk
+and d_year = 1999
+and d_moy = 11
+and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown')
+ or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree'))
+and hd_buy_potential like '0-500%'
+and ca_gmt_offset = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query92.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query92.sql
new file mode 100644
index 0000000..a7ce3a3
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query92.sql
@@ -0,0 +1,43 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ sum(ws_ext_discount_amt) as "Excess Discount Amount"
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = ws_sold_date_sk
+and ws_ext_discount_amt
+ > (
+ SELECT
+ 1.3 * avg(ws_ext_discount_amt)
+ FROM
+ web_sales
+ ,date_dim
+ WHERE
+ ws_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = ws_sold_date_sk
+ )
+order by sum(ws_ext_discount_amt)
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query93.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query93.sql
new file mode 100644
index 0000000..20aa0a8
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query93.sql
@@ -0,0 +1,31 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select ss_customer_sk
+ ,sum(act_sales) sumsales
+ from (select ss_item_sk
+ ,ss_ticket_number
+ ,ss_customer_sk
+ ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+ else (ss_quantity*ss_sales_price) end act_sales
+ from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+ and sr_ticket_number = ss_ticket_number)
+ ,reason
+ where sr_reason_sk = r_reason_sk
+ and r_reason_desc = 'Did not like the warranty') t
+ group by ss_customer_sk
+ order by sumsales, ss_customer_sk
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query94.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query94.sql
new file mode 100644
index 0000000..dab63be
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query94.sql
@@ -0,0 +1,42 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ count(distinct ws_order_number) as "order count"
+ ,sum(ws_ext_ship_cost) as "total shipping cost"
+ ,sum(ws_net_profit) as "total net profit"
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+ from web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+ from web_returns wr1
+ where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query95.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query95.sql
new file mode 100644
index 0000000..b082826
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query95.sql
@@ -0,0 +1,45 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select
+ count(distinct ws_order_number) as "order count"
+ ,sum(ws_ext_ship_cost) as "total shipping cost"
+ ,sum(ws_net_profit) as "total net profit"
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+ from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+ from web_returns,ws_wh
+ where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query96.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query96.sql
new file mode 100644
index 0000000..97cf08b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query96.sql
@@ -0,0 +1,29 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select count(*)
+from store_sales
+ ,household_demographics
+ ,time_dim, store
+where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and household_demographics.hd_dep_count = 5
+ and store.s_store_name = 'ese'
+order by count(*)
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query97.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query97.sql
new file mode 100644
index 0000000..c2d51a7
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query97.sql
@@ -0,0 +1,38 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+with ssci as (
+select ss_customer_sk customer_sk
+ ,ss_item_sk item_sk
+from store_sales,date_dim
+where ss_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by ss_customer_sk
+ ,ss_item_sk),
+csci as(
+ select cs_bill_customer_sk customer_sk
+ ,cs_item_sk item_sk
+from catalog_sales,date_dim
+where cs_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by cs_bill_customer_sk
+ ,cs_item_sk)
+ select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
+ ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
+ ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
+from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
+ and ssci.item_sk = csci.item_sk)
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query98.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query98.sql
new file mode 100644
index 0000000..29d5757
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query98.sql
@@ -0,0 +1,46 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ss_ext_sales_price) as itemrevenue
+ ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ store_sales
+ ,item
+ ,date_dim
+where
+ ss_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ss_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
diff --git a/sdks/java/testing/tpcds/src/main/resources/queries/query99.sql b/sdks/java/testing/tpcds/src/main/resources/queries/query99.sql
new file mode 100644
index 0000000..de8e8ca
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/queries/query99.sql
@@ -0,0 +1,48 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+select
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days"
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days"
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days"
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days"
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as ">120 days"
+from
+ catalog_sales
+ ,warehouse
+ ,ship_mode
+ ,call_center
+ ,date_dim
+where
+ d_month_seq between 1212 and 1212 + 11
+and cs_ship_date_sk = d_date_sk
+and cs_warehouse_sk = w_warehouse_sk
+and cs_ship_mode_sk = sm_ship_mode_sk
+and cs_call_center_sk = cc_call_center_sk
+group by
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+order by substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+limit 100
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/call_center.json b/sdks/java/testing/tpcds/src/main/resources/schemas/call_center.json
new file mode 100644
index 0000000..ec95095
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/call_center.json
@@ -0,0 +1,33 @@
+{"schema": [
+ {"name":"cc_call_center_sk","type":"identifier"},
+ {"name":"cc_call_center_id","type":"char(16)"},
+ {"name":"cc_rec_start_date","type":"date"},
+ {"name":"cc_rec_end_date","type":"date"},
+ {"name":"cc_closed_date_sk","type":"integer"},
+ {"name":"cc_open_date_sk","type":"integer"},
+ {"name":"cc_name","type":"varchar(50)"},
+ {"name":"cc_class","type":"varchar(50)"},
+ {"name":"cc_employees","type":"integer"},
+ {"name":"cc_sq_ft","type":"integer"},
+ {"name":"cc_hours","type":"char(20)"},
+ {"name":"cc_manager","type":"varchar(40)"},
+ {"name":"cc_mkt_id","type":"integer"},
+ {"name":"cc_mkt_class","type":"char(50)"},
+ {"name":"cc_mkt_desc","type":"varchar(100)"},
+ {"name":"cc_market_manager","type":"varchar(40)"},
+ {"name":"cc_division","type":"integer"},
+ {"name":"cc_division_name","type":"varchar(50)"},
+ {"name":"cc_company","type":"integer"},
+ {"name":"cc_company_name","type":"char(50)"},
+ {"name":"cc_street_number","type":"char(10)"},
+ {"name":"cc_street_name","type":"varchar(60)"},
+ {"name":"cc_street_type","type":"char(15)"},
+ {"name":"cc_suite_number","type":"char(10)"},
+ {"name":"cc_city","type":"varchar(60)"},
+ {"name":"cc_county","type":"varchar(30)"},
+ {"name":"cc_state","type":"char(2)"},
+ {"name":"cc_zip","type":"char(10)"},
+ {"name":"cc_country","type":"varchar(20)"},
+ {"name":"cc_gmt_offset","type":"decimal(5,2)"},
+ {"name":"cc_tax_percentage","type":"decimal(5,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_page.json b/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_page.json
new file mode 100644
index 0000000..15a549466
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_page.json
@@ -0,0 +1,11 @@
+{"schema": [
+ {"name":"cp_catalog_page_sk","type":"identifier"},
+ {"name":"cp_catalog_page_id","type":"char(16)"},
+ {"name":"cp_start_date_sk","type":"integer"},
+ {"name":"cp_end_date_sk","type":"integer"},
+ {"name":"cp_department","type":"varchar(50)"},
+ {"name":"cp_catalog_number","type":"integer"},
+ {"name":"cp_catalog_page_number","type":"integer"},
+ {"name":"cp_description","type":"varchar(100)"},
+ {"name":"cp_type","type":"varchar(100)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_returns.json b/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_returns.json
new file mode 100644
index 0000000..d967a9e
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_returns.json
@@ -0,0 +1,29 @@
+{"schema": [
+ {"name":"cr_returned_date_sk","type":"identifier"},
+ {"name":"cr_returned_time_sk","type":"identifier"},
+ {"name":"cr_item_sk","type":"identifier"},
+ {"name":"cr_refunded_customer_sk","type":"identifier"},
+ {"name":"cr_refunded_cdemo_sk","type":"identifier"},
+ {"name":"cr_refunded_hdemo_sk","type":"identifier"},
+ {"name":"cr_refunded_addr_sk","type":"identifier"},
+ {"name":"cr_returning_customer_sk","type":"identifier"},
+ {"name":"cr_returning_cdemo_sk","type":"identifier"},
+ {"name":"cr_returning_hdemo_sk","type":"identifier"},
+ {"name":"cr_returning_addr_sk","type":"identifier"},
+ {"name":"cr_call_center_sk","type":"identifier"},
+ {"name":"cr_catalog_page_sk","type":"identifier"},
+ {"name":"cr_ship_mode_sk","type":"identifier"},
+ {"name":"cr_warehouse_sk","type":"identifier"},
+ {"name":"cr_reason_sk","type":"identifier"},
+ {"name":"cr_order_number","type":"identifier"},
+ {"name":"cr_return_quantity","type":"integer"},
+ {"name":"cr_return_amount","type":"decimal(7,2)"},
+ {"name":"cr_return_tax","type":"decimal(7,2)"},
+ {"name":"cr_return_amt_inc_tax","type":"decimal(7,2)"},
+ {"name":"cr_fee","type":"decimal(7,2)"},
+ {"name":"cr_return_ship_cost","type":"decimal(7,2)"},
+ {"name":"cr_refunded_cash","type":"decimal(7,2)"},
+ {"name":"cr_reversed_charge","type":"decimal(7,2)"},
+ {"name":"cr_store_credit","type":"decimal(7,2)"},
+ {"name":"cr_net_loss","type":"decimal(7,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_sales.json b/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_sales.json
new file mode 100644
index 0000000..50fae92
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/catalog_sales.json
@@ -0,0 +1,36 @@
+{"schema": [
+ {"name":"cs_sold_date_sk","type":"identifier"},
+ {"name":"cs_sold_time_sk","type":"identifier"},
+ {"name":"cs_ship_date_sk","type":"identifier"},
+ {"name":"cs_bill_customer_sk","type":"identifier"},
+ {"name":"cs_bill_cdemo_sk","type":"identifier"},
+ {"name":"cs_bill_hdemo_sk","type":"identifier"},
+ {"name":"cs_bill_addr_sk","type":"identifier"},
+ {"name":"cs_ship_customer_sk","type":"identifier"},
+ {"name":"cs_ship_cdemo_sk","type":"identifier"},
+ {"name":"cs_ship_hdemo_sk","type":"identifier"},
+ {"name":"cs_ship_addr_sk","type":"identifier"},
+ {"name":"cs_call_center_sk","type":"identifier"},
+ {"name":"cs_catalog_page_sk","type":"identifier"},
+ {"name":"cs_ship_mode_sk","type":"identifier"},
+ {"name":"cs_warehouse_sk","type":"identifier"},
+ {"name":"cs_item_sk","type":"identifier"},
+ {"name":"cs_promo_sk","type":"identifier"},
+ {"name":"cs_order_number","type":"identifier"},
+ {"name":"cs_quantity","type":"integer"},
+ {"name":"cs_wholesale_cost","type":"decimal(7,2)"},
+ {"name":"cs_list_price","type":"decimal(7,2)"},
+ {"name":"cs_sales_price","type":"decimal(7,2)"},
+ {"name":"cs_ext_discount_amt","type":"decimal(7,2)"},
+ {"name":"cs_ext_sales_price","type":"decimal(7,2)"},
+ {"name":"cs_ext_wholesale_cost","type":"decimal(7,2)"},
+ {"name":"cs_ext_list_price","type":"decimal(7,2)"},
+ {"name":"cs_ext_tax","type":"decimal(7,2)"},
+ {"name":"cs_coupon_amt","type":"decimal(7,2)"},
+ {"name":"cs_ext_ship_cost","type":"decimal(7,2)"},
+ {"name":"cs_net_paid","type":"decimal(7,2)"},
+ {"name":"cs_net_paid_inc_tax","type":"decimal(7,2)"},
+ {"name":"cs_net_paid_inc_ship","type":"decimal(7,2)"},
+ {"name":"cs_net_paid_inc_ship_tax","type":"decimal(7,2)"},
+ {"name":"cs_net_profit","type":"decimal(7,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/customer.json b/sdks/java/testing/tpcds/src/main/resources/schemas/customer.json
new file mode 100644
index 0000000..fa1fcfb
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/customer.json
@@ -0,0 +1,20 @@
+{"schema": [
+ {"name":"c_customer_sk","type":"identifier"},
+ {"name":"c_customer_id","type":"char(16)"},
+ {"name":"c_current_cdemo_sk","type":"identifier"},
+ {"name":"c_current_hdemo_sk","type":"identifier"},
+ {"name":"c_current_addr_sk","type":"identifier"},
+ {"name":"c_first_shipto_date_sk","type":"identifier"},
+ {"name":"c_first_sales_date_sk","type":"identifier"},
+ {"name":"c_salutation","type":"char(10)"},
+ {"name":"c_first_name","type":"char(20)"},
+ {"name":"c_last_name","type":"char(30)"},
+ {"name":"c_preferred_cust_flag","type":"char(1)"},
+ {"name":"c_birth_day","type":"integer"},
+ {"name":"c_birth_month","type":"integer"},
+ {"name":"c_birth_year","type":"integer"},
+ {"name":"c_birth_country","type":"varchar(20)"},
+ {"name":"c_login","type":"char(13)"},
+ {"name":"c_email_address","type":"char(50)"},
+ {"name":"c_last_review_date_sk","type":"identifier"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/customer_address.json b/sdks/java/testing/tpcds/src/main/resources/schemas/customer_address.json
new file mode 100644
index 0000000..a37bdbe
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/customer_address.json
@@ -0,0 +1,15 @@
+{"schema": [
+ {"name":"ca_address_sk","type":"identifier"},
+ {"name":"ca_address_id","type":"char(16)"},
+ {"name":"ca_street_number","type":"char(10)"},
+ {"name":"ca_street_name","type":"varchar(60)"},
+ {"name":"ca_street_type","type":"char(15)"},
+ {"name":"ca_suite_number","type":"char(10)"},
+ {"name":"ca_city","type":"varchar(60)"},
+ {"name":"ca_county","type":"varchar(30)"},
+ {"name":"ca_state","type":"char(2)"},
+ {"name":"ca_zip","type":"char(10)"},
+ {"name":"ca_country","type":"varchar(20)"},
+ {"name":"ca_gmt_offset","type":"decimal(5,2)"},
+ {"name":"ca_location_type","type":"char(20)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/customer_demographics.json b/sdks/java/testing/tpcds/src/main/resources/schemas/customer_demographics.json
new file mode 100644
index 0000000..2b2211b
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/customer_demographics.json
@@ -0,0 +1,11 @@
+{"schema": [
+ {"name":"cd_demo_sk","type":"identifier"},
+ {"name":"cd_gender","type":"char(1)"},
+ {"name":"cd_marital_status","type":"char(1)"},
+ {"name":"cd_education_status","type":"char(20)"},
+ {"name":"cd_purchase_estimate","type":"integer"},
+ {"name":"cd_credit_rating","type":"char(10)"},
+ {"name":"cd_dep_count","type":"integer"},
+ {"name":"cd_dep_employed_count","type":"integer"},
+ {"name":"cd_dep_college_count","type":"integer"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/date_dim.json b/sdks/java/testing/tpcds/src/main/resources/schemas/date_dim.json
new file mode 100644
index 0000000..287738f
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/date_dim.json
@@ -0,0 +1,30 @@
+{"schema": [
+ {"name":"d_date_sk","type":"identifier"},
+ {"name":"d_date_id","type":"char(16)"},
+ {"name":"d_date","type":"date"},
+ {"name":"d_month_seq","type":"integer"},
+ {"name":"d_week_seq","type":"integer"},
+ {"name":"d_quarter_seq","type":"integer"},
+ {"name":"d_year","type":"integer"},
+ {"name":"d_dow","type":"integer"},
+ {"name":"d_moy","type":"integer"},
+ {"name":"d_dom","type":"integer"},
+ {"name":"d_qoy","type":"integer"},
+ {"name":"d_fy_year","type":"integer"},
+ {"name":"d_fy_quarter_seq","type":"integer"},
+ {"name":"d_fy_week_seq","type":"integer"},
+ {"name":"d_day_name","type":"char(9)"},
+ {"name":"d_quarter_name","type":"char(6)"},
+ {"name":"d_holiday","type":"char(1)"},
+ {"name":"d_weekend","type":"char(1)"},
+ {"name":"d_following_holiday","type":"char(1)"},
+ {"name":"d_first_dom","type":"integer"},
+ {"name":"d_last_dom","type":"integer"},
+ {"name":"d_same_day_ly","type":"integer"},
+ {"name":"d_same_day_lq","type":"integer"},
+ {"name":"d_current_day","type":"char(1)"},
+ {"name":"d_current_week","type":"char(1)"},
+ {"name":"d_current_month","type":"char(1)"},
+ {"name":"d_current_quarter","type":"char(1)"},
+ {"name":"d_current_year","type":"char(1)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/household_demographics.json b/sdks/java/testing/tpcds/src/main/resources/schemas/household_demographics.json
new file mode 100644
index 0000000..a261ae9
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/household_demographics.json
@@ -0,0 +1,7 @@
+{"schema": [
+ {"name":"hd_demo_sk","type":"identifier"},
+ {"name":"hd_income_band_sk","type":"identifier"},
+ {"name":"hd_buy_potential","type":"char(15)"},
+ {"name":"hd_dep_count","type":"integer"},
+ {"name":"hd_vehicle_count","type":"integer"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/income_band.json b/sdks/java/testing/tpcds/src/main/resources/schemas/income_band.json
new file mode 100644
index 0000000..3066b27
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/income_band.json
@@ -0,0 +1,5 @@
+{"schema": [
+ {"name":"ib_income_band_sk","type":"identifier"},
+ {"name":"ib_lower_bound","type":"integer"},
+ {"name":"ib_upper_bound","type":"integer"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/inventory.json b/sdks/java/testing/tpcds/src/main/resources/schemas/inventory.json
new file mode 100644
index 0000000..ee786bc
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/inventory.json
@@ -0,0 +1,7 @@
+{"schema": [
+ {"name":"inv_date_sk","type":"identifier"},
+ {"name":"inv_item_sk","type":"identifier"},
+ {"name":"inv_warehouse_sk","type":"identifier"},
+ {"name":"inv_quantity_on_hand","type":"integer"}
+]}
+
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/item.json b/sdks/java/testing/tpcds/src/main/resources/schemas/item.json
new file mode 100644
index 0000000..593aff1
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/item.json
@@ -0,0 +1,24 @@
+{"schema": [
+ {"name":"i_item_sk","type":"identifier"},
+ {"name":"i_item_id","type":"char(16)"},
+ {"name":"i_rec_start_date","type":"date"},
+ {"name":"i_rec_end_date","type":"date"},
+ {"name":"i_item_desc","type":"varchar(200)"},
+ {"name":"i_current_price","type":"decimal(7,2)"},
+ {"name":"i_wholesale_cost","type":"decimal(7,2)"},
+ {"name":"i_brand_id","type":"integer"},
+ {"name":"i_brand","type":"char(50)"},
+ {"name":"i_class_id","type":"integer"},
+ {"name":"i_class","type":"char(50)"},
+ {"name":"i_category_id","type":"integer"},
+ {"name":"i_category","type":"char(50)"},
+ {"name":"i_manufact_id","type":"integer"},
+ {"name":"i_manufact","type":"char(50)"},
+ {"name":"i_size","type":"char(20)"},
+ {"name":"i_formulation","type":"char(20)"},
+ {"name":"i_color","type":"char(20)"},
+ {"name":"i_units","type":"char(10)"},
+ {"name":"i_container","type":"char(10)"},
+ {"name":"i_manager_id","type":"integer"},
+ {"name":"i_product_name","type":"char(50)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/promotion.json b/sdks/java/testing/tpcds/src/main/resources/schemas/promotion.json
new file mode 100644
index 0000000..28d57d4
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/promotion.json
@@ -0,0 +1,21 @@
+{"schema": [
+ {"name":"p_promo_sk","type":"identifier"},
+ {"name":"p_promo_id","type":"char(16)"},
+ {"name":"p_start_date_sk","type":"identifier"},
+ {"name":"p_end_date_sk","type":"identifier"},
+ {"name":"p_item_sk","type":"identifier"},
+ {"name":"p_cost","type":"decimal(15,2)"},
+ {"name":"p_response_target","type":"integer"},
+ {"name":"p_promo_name","type":"char(50)"},
+ {"name":"p_channel_dmail","type":"char(1)"},
+ {"name":"p_channel_email","type":"char(1)"},
+ {"name":"p_channel_catalog","type":"char(1)"},
+ {"name":"p_channel_tv","type":"char(1)"},
+ {"name":"p_channel_radio","type":"char(1)"},
+ {"name":"p_channel_press","type":"char(1)"},
+ {"name":"p_channel_event","type":"char(1)"},
+ {"name":"p_channel_demo","type":"char(1)"},
+ {"name":"p_channel_details","type":"varchar(100)"},
+ {"name":"p_purpose","type":"char(15)"},
+ {"name":"p_discount_active","type":"char(1)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/reason.json b/sdks/java/testing/tpcds/src/main/resources/schemas/reason.json
new file mode 100644
index 0000000..64b9723
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/reason.json
@@ -0,0 +1,5 @@
+{"schema": [
+ {"name":"r_reason_sk","type":"identifier"},
+ {"name":"r_reason_id","type":"char(16)"},
+ {"name":"r_reason_desc","type":"char(100)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/ship_mode.json b/sdks/java/testing/tpcds/src/main/resources/schemas/ship_mode.json
new file mode 100644
index 0000000..09c4873
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/ship_mode.json
@@ -0,0 +1,8 @@
+{"schema": [
+ {"name":"sm_ship_mode_sk","type":"identifier"},
+ {"name":"sm_ship_mode_id","type":"char(16)"},
+ {"name":"sm_type","type":"char(30)"},
+ {"name":"sm_code","type":"char(10)"},
+ {"name":"sm_carrier","type":"char(20)"},
+ {"name":"sm_contract","type":"char(20)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/store.json b/sdks/java/testing/tpcds/src/main/resources/schemas/store.json
new file mode 100644
index 0000000..3df8465
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/store.json
@@ -0,0 +1,31 @@
+{"schema": [
+ {"name":"s_store_sk","type":"identifier"},
+ {"name":"s_store_id","type":"char(16)"},
+ {"name":"s_rec_start_date","type":"date"},
+ {"name":"s_rec_end_date","type":"date"},
+ {"name":"s_closed_date_sk","type":"identifier"},
+ {"name":"s_store_name","type":"varchar(50)"},
+ {"name":"s_number_employees","type":"integer"},
+ {"name":"s_floor_space","type":"integer"},
+ {"name":"s_hours","type":"char(20)"},
+ {"name":"S_manager","type":"varchar(40)"},
+ {"name":"S_market_id","type":"integer"},
+ {"name":"S_geography_class","type":"varchar(100)"},
+ {"name":"S_market_desc","type":"varchar(100)"},
+ {"name":"s_market_manager","type":"varchar(40)"},
+ {"name":"s_division_id","type":"integer"},
+ {"name":"s_division_name","type":"varchar(50)"},
+ {"name":"s_company_id","type":"integer"},
+ {"name":"s_company_name","type":"varchar(50)"},
+ {"name":"s_street_number","type":"varchar(10)"},
+ {"name":"s_street_name","type":"varchar(60)"},
+ {"name":"s_street_type","type":"char(15)"},
+ {"name":"s_suite_number","type":"char(10)"},
+ {"name":"s_city","type":"varchar(60)"},
+ {"name":"s_county","type":"varchar(30)"},
+ {"name":"s_state","type":"char(2)"},
+ {"name":"s_zip","type":"char(10)"},
+ {"name":"s_country","type":"varchar(20)"},
+ {"name":"s_gmt_offset","type":"decimal(5,2)"},
+ {"name":"s_tax_percentage","type":"decimal(5,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/store_returns.json b/sdks/java/testing/tpcds/src/main/resources/schemas/store_returns.json
new file mode 100644
index 0000000..99b0db1
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/store_returns.json
@@ -0,0 +1,22 @@
+{"schema": [
+ {"name":"sr_returned_date_sk","type":"identifier"},
+ {"name":"sr_return_time_sk","type":"identifier"},
+ {"name":"sr_item_sk","type":"identifier"},
+ {"name":"sr_customer_sk","type":"identifier"},
+ {"name":"sr_cdemo_sk","type":"identifier"},
+ {"name":"sr_hdemo_sk","type":"identifier"},
+ {"name":"sr_addr_sk","type":"identifier"},
+ {"name":"sr_store_sk","type":"identifier"},
+ {"name":"sr_reason_sk","type":"identifier"},
+ {"name":"sr_ticket_number","type":"identifier"},
+ {"name":"sr_return_quantity","type":"integer"},
+ {"name":"sr_return_amt","type":"decimal(7,2)"},
+ {"name":"sr_return_tax","type":"decimal(7,2)"},
+ {"name":"sr_return_amt_inc_tax","type":"decimal(7,2)"},
+ {"name":"sr_fee","type":"decimal(7,2)"},
+ {"name":"sr_return_ship_cost","type":"decimal(7,2)"},
+ {"name":"sr_refunded_cash","type":"decimal(7,2)"},
+ {"name":"sr_reversed_charge","type":"decimal(7,2)"},
+ {"name":"sr_store_credit","type":"decimal(7,2)"},
+ {"name":"sr_net_loss","type":"decimal(7,2)"}
+]}
\ No newline at end of file
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/store_sales.json b/sdks/java/testing/tpcds/src/main/resources/schemas/store_sales.json
new file mode 100644
index 0000000..3b133d8
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/store_sales.json
@@ -0,0 +1,25 @@
+{"schema": [
+ {"name":"ss_sold_date_sk","type":"identifier"},
+ {"name":"ss_sold_time_sk","type":"identifier"},
+ {"name":"ss_item_sk","type":"identifier"},
+ {"name":"ss_customer_sk","type":"identifier"},
+ {"name":"ss_cdemo_sk","type":"identifier"},
+ {"name":"ss_hdemo_sk","type":"identifier"},
+ {"name":"ss_addr_sk","type":"identifier"},
+ {"name":"ss_store_sk","type":"identifier"},
+ {"name":"ss_promo_sk","type":"identifier"},
+ {"name":"ss_ticket_number","type":"identifier"},
+ {"name":"ss_quantity","type":"integer"},
+ {"name":"ss_wholesale_cost","type":"decimal(7,2)"},
+ {"name":"ss_list_price","type":"decimal(7,2)"},
+ {"name":"ss_sales_price","type":"decimal(7,2)"},
+ {"name":"ss_ext_discount_amt","type":"decimal(7,2)"},
+ {"name":"ss_ext_sales_price","type":"decimal(7,2)"},
+ {"name":"ss_ext_wholesale_cost","type":"decimal(7,2)"},
+ {"name":"ss_ext_list_price","type":"decimal(7,2)"},
+ {"name":"ss_ext_tax","type":"decimal(7,2)"},
+ {"name":"ss_coupon_amt","type":"decimal(7,2)"},
+ {"name":"ss_net_paid","type":"decimal(7,2)"},
+ {"name":"ss_net_paid_inc_tax","type":"decimal(7,2)"},
+ {"name":"ss_net_profit","type":"decimal(7,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/time_dim.json b/sdks/java/testing/tpcds/src/main/resources/schemas/time_dim.json
new file mode 100644
index 0000000..e1d51f6
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/time_dim.json
@@ -0,0 +1,12 @@
+{"schema": [
+ {"name":"t_time_sk","type":"Identifier"},
+ {"name":"t_time_id","type":"char(16)"},
+ {"name":"t_time","type":"Integer"},
+ {"name":"t_hour","type":"Integer"},
+ {"name":"t_minute","type":"Integer"},
+ {"name":"t_second","type":"Integer"},
+ {"name":"t_am_pm","type":"char(2)"},
+ {"name":"t_shift","type":"char(20)"},
+ {"name":"t_sub_shift","type":"char(20)"},
+ {"name":"t_meal_time","type":"char(20)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/warehouse.json b/sdks/java/testing/tpcds/src/main/resources/schemas/warehouse.json
new file mode 100644
index 0000000..e3126de
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/warehouse.json
@@ -0,0 +1,16 @@
+{"schema": [
+ {"name":"w_warehouse_sk","type":"identifier"},
+ {"name":"w_warehouse_id","type":"char(16)"},
+ {"name":"w_warehouse_name","type":"varchar(20)"},
+ {"name":"w_warehouse_sq_ft","type":"integer"},
+ {"name":"w_street_number","type":"char(10)"},
+ {"name":"w_street_name","type":"varchar(60)"},
+ {"name":"w_street_type","type":"char(15)"},
+ {"name":"w_suite_number","type":"char(10)"},
+ {"name":"w_city","type":"varchar(60)"},
+ {"name":"w_county","type":"varchar(30)"},
+ {"name":"w_state","type":"char(2)"},
+ {"name":"w_zip","type":"char(10)"},
+ {"name":"w_country","type":"varchar(20)"},
+ {"name":"w_gmt_offset","type":"decimal(5,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/web_page.json b/sdks/java/testing/tpcds/src/main/resources/schemas/web_page.json
new file mode 100644
index 0000000..4dc3436
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/web_page.json
@@ -0,0 +1,16 @@
+{"schema": [
+ {"name":"wp_web_page_sk","type":"identifier"},
+ {"name":"wp_web_page_id","type":"char(16)"},
+ {"name":"wp_rec_start_date","type":"date"},
+ {"name":"wp_rec_end_date","type":"date"},
+ {"name":"wp_creation_date_sk","type":"identifier"},
+ {"name":"wp_access_date_sk","type":"identifier"},
+ {"name":"wp_autogen_flag","type":"char(1)"},
+ {"name":"wp_customer_sk","type":"identifier"},
+ {"name":"wp_url","type":"varchar(100)"},
+ {"name":"wp_type","type":"char(50)"},
+ {"name":"wp_char_count","type":"integer"},
+ {"name":"wp_link_count","type":"integer"},
+ {"name":"wp_image_count","type":"integer"},
+ {"name":"wp_max_ad_count","type":"integer"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/web_returns.json b/sdks/java/testing/tpcds/src/main/resources/schemas/web_returns.json
new file mode 100644
index 0000000..101ef1c
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/web_returns.json
@@ -0,0 +1,26 @@
+{"schema": [
+ {"name":"wr_returned_date_sk","type":"identifier"},
+ {"name":"wr_returned_time_sk","type":"identifier"},
+ {"name":"wr_item_sk","type":"identifier"},
+ {"name":"wr_refunded_customer_sk","type":"identifier"},
+ {"name":"wr_refunded_cdemo_sk","type":"identifier"},
+ {"name":"wr_refunded_hdemo_sk","type":"identifier"},
+ {"name":"wr_refunded_addr_sk","type":"identifier"},
+ {"name":"wr_returning_customer_sk","type":"identifier"},
+ {"name":"wr_returning_cdemo_sk","type":"identifier"},
+ {"name":"wr_returning_hdemo_sk","type":"identifier"},
+ {"name":"wr_returning_addr_sk","type":"identifier"},
+ {"name":"wr_web_page_sk","type":"identifier"},
+ {"name":"wr_reason_sk","type":"identifier"},
+ {"name":"wr_order_number","type":"identifier"},
+ {"name":"wr_return_quantity","type":"integer"},
+ {"name":"wr_return_amt","type":"decimal(7,2)"},
+ {"name":"wr_return_tax","type":"decimal(7,2)"},
+ {"name":"wr_return_amt_inc_tax","type":"decimal(7,2)"},
+ {"name":"wr_fee","type":"decimal(7,2)"},
+ {"name":"wr_return_ship_cost","type":"decimal(7,2)"},
+ {"name":"wr_refunded_cash","type":"decimal(7,2)"},
+ {"name":"wr_reversed_charge","type":"decimal(7,2)"},
+ {"name":"wr_account_credit","type":"decimal(7,2)"},
+ {"name":"wr_net_loss","type":"decimal(7,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/web_sales.json b/sdks/java/testing/tpcds/src/main/resources/schemas/web_sales.json
new file mode 100644
index 0000000..2cbcdaa
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/web_sales.json
@@ -0,0 +1,36 @@
+{"schema": [
+ {"name":"ws_sold_date_sk","type":"identifier"},
+ {"name":"ws_sold_time_sk","type":"identifier"},
+ {"name":"ws_ship_date_sk","type":"identifier"},
+ {"name":"ws_item_sk","type":"identifier"},
+ {"name":"ws_bill_customer_sk","type":"identifier"},
+ {"name":"ws_bill_cdemo_sk","type":"identifier"},
+ {"name":"ws_bill_hdemo_sk","type":"identifier"},
+ {"name":"ws_bill_addr_sk","type":"identifier"},
+ {"name":"ws_ship_customer_sk","type":"identifier"},
+ {"name":"ws_ship_cdemo_sk","type":"identifier"},
+ {"name":"ws_ship_hdemo_sk","type":"identifier"},
+ {"name":"ws_ship_addr_sk","type":"identifier"},
+ {"name":"ws_web_page_sk","type":"identifier"},
+ {"name":"ws_web_site_sk","type":"identifier"},
+ {"name":"ws_ship_mode_sk","type":"identifier"},
+ {"name":"ws_warehouse_sk","type":"identifier"},
+ {"name":"ws_promo_sk","type":"identifier"},
+ {"name":"ws_order_number","type":"identifier"},
+ {"name":"ws_quantity","type":"integer"},
+ {"name":"ws_wholesale_cost","type":"decimal(7,2)"},
+ {"name":"ws_list_price","type":"decimal(7,2)"},
+ {"name":"ws_sales_price","type":"decimal(7,2)"},
+ {"name":"ws_ext_discount_amt","type":"decimal(7,2)"},
+ {"name":"ws_ext_sales_price","type":"decimal(7,2)"},
+ {"name":"ws_ext_wholesale_cost","type":"decimal(7,2)"},
+ {"name":"ws_ext_list_price","type":"decimal(7,2)"},
+ {"name":"ws_ext_tax","type":"decimal(7,2)"},
+ {"name":"ws_coupon_amt","type":"decimal(7,2)"},
+ {"name":"ws_ext_ship_cost","type":"decimal(7,2)"},
+ {"name":"ws_net_paid","type":"decimal(7,2)"},
+ {"name":"ws_net_paid_inc_tax","type":"decimal(7,2)"},
+ {"name":"ws_net_paid_inc_ship","type":"decimal(7,2)"},
+ {"name":"ws_net_paid_inc_ship_tax","type":"decimal(7,2)"},
+ {"name":"ws_net_profit","type":"decimal(7,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas/web_site.json b/sdks/java/testing/tpcds/src/main/resources/schemas/web_site.json
new file mode 100644
index 0000000..7cecde0
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/main/resources/schemas/web_site.json
@@ -0,0 +1,28 @@
+{"schema": [
+ {"name":"web_site_sk","type":"varchar(100)"},
+ {"name":"web_site_id","type":"char(16)"},
+ {"name":"web_rec_start_date","type":"date"},
+ {"name":"web_rec_end_date","type":"date"},
+ {"name":"web_name","type":"varchar(50)"},
+ {"name":"web_open_date_sk","type":"identifier"},
+ {"name":"web_close_date_sk","type":"identifier"},
+ {"name":"web_class","type":"varchar(50)"},
+ {"name":"web_manager","type":"varchar(40)"},
+ {"name":"web_mkt_id","type":"integer"},
+ {"name":"web_mkt_class","type":"varchar(50)"},
+ {"name":"web_mkt_desc","type":"varchar(100)"},
+ {"name":"web_market_manager","type":"varchar(40)"},
+ {"name":"web_company_id","type":"integer"},
+ {"name":"web_company_name","type":"char(50)"},
+ {"name":"web_street_number","type":"char(10)"},
+ {"name":"web_street_name","type":"varchar(60)"},
+ {"name":"web_street_type","type":"char(15)"},
+ {"name":"web_suite_number","type":"char(10)"},
+ {"name":"web_city","type":"varchar(60)"},
+ {"name":"web_county","type":"varchar(30)"},
+ {"name":"web_state","type":"char(2)"},
+ {"name":"web_zip","type":"char(10)"},
+ {"name":"web_country","type":"varchar(20)"},
+ {"name":"web_gmt_offset","type":"decimal(5,2)"},
+ {"name":"web_tax_percentage","type":"decimal(5,2)"}
+]}
diff --git a/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/QueryReaderTest.java b/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/QueryReaderTest.java
new file mode 100644
index 0000000..5696410
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/QueryReaderTest.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+
+public class QueryReaderTest {
+ private final String headers = "-- Licensed to the Apache Software Foundation (ASF) under one\n" +
+ "-- or more contributor license agreements. See the NOTICE file\n" +
+ "-- distributed with this work for additional information\n" +
+ "-- regarding copyright ownership. The ASF licenses this file\n" +
+ "-- to you under the Apache License, Version 2.0 (the\n" +
+ "-- \"License\"); you may not use this file except in compliance\n" +
+ "-- with the License. You may obtain a copy of the License at\n" +
+ "--\n" +
+ "-- http://www.apache.org/licenses/LICENSE-2.0\n" +
+ "--\n" +
+ "-- Unless required by applicable law or agreed to in writing, software\n" +
+ "-- distributed under the License is distributed on an \"AS IS\" BASIS,\n" +
+ "-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" +
+ "-- See the License for the specific language governing permissions and\n" +
+ "-- limitations under the License.\n";
+
+ @Test
+ public void testQuery3String() throws Exception {
+ String query3String = QueryReader.readQuery("query3");
+ String expected = "select dt.d_year \n" +
+ " ,item.i_brand_id brand_id \n" +
+ " ,item.i_brand brand\n" +
+ " ,sum(ss_ext_sales_price) sum_agg\n" +
+ " from date_dim dt \n" +
+ " ,store_sales\n" +
+ " ,item\n" +
+ " where dt.d_date_sk = store_sales.ss_sold_date_sk\n" +
+ " and store_sales.ss_item_sk = item.i_item_sk\n" +
+ " and item.i_manufact_id = 436\n" +
+ " and dt.d_moy=12\n" +
+ " group by dt.d_year\n" +
+ " ,item.i_brand\n" +
+ " ,item.i_brand_id\n" +
+ " order by dt.d_year\n" +
+ " ,sum_agg desc\n" +
+ " ,brand_id\n" +
+ " limit 100";
+ String query3StringNoSpaces = query3String.replaceAll("\\s+", "");
+ String expectedNoSpaces = (headers + expected).replaceAll("\\s+", "");
+ assertEquals(expectedNoSpaces, query3StringNoSpaces);
+ }
+
+ @Test
+ public void testQuery4String() throws Exception {
+ String query4String = QueryReader.readQuery("query4");
+ String expected = "with year_total as (\n" +
+ " select c_customer_id customer_id\n" +
+ " ,c_first_name customer_first_name\n" +
+ " ,c_last_name customer_last_name\n" +
+ " ,c_preferred_cust_flag customer_preferred_cust_flag\n" +
+ " ,c_birth_country customer_birth_country\n" +
+ " ,c_login customer_login\n" +
+ " ,c_email_address customer_email_address\n" +
+ " ,d_year dyear\n" +
+ " ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total\n" +
+ " ,'s' sale_type\n" +
+ " from customer\n" +
+ " ,store_sales\n" +
+ " ,date_dim\n" +
+ " where c_customer_sk = ss_customer_sk\n" +
+ " and ss_sold_date_sk = d_date_sk\n" +
+ " group by c_customer_id\n" +
+ " ,c_first_name\n" +
+ " ,c_last_name\n" +
+ " ,c_preferred_cust_flag\n" +
+ " ,c_birth_country\n" +
+ " ,c_login\n" +
+ " ,c_email_address\n" +
+ " ,d_year\n" +
+ " union all\n" +
+ " select c_customer_id customer_id\n" +
+ " ,c_first_name customer_first_name\n" +
+ " ,c_last_name customer_last_name\n" +
+ " ,c_preferred_cust_flag customer_preferred_cust_flag\n" +
+ " ,c_birth_country customer_birth_country\n" +
+ " ,c_login customer_login\n" +
+ " ,c_email_address customer_email_address\n" +
+ " ,d_year dyear\n" +
+ " ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total\n" +
+ " ,'c' sale_type\n" +
+ " from customer\n" +
+ " ,catalog_sales\n" +
+ " ,date_dim\n" +
+ " where c_customer_sk = cs_bill_customer_sk\n" +
+ " and cs_sold_date_sk = d_date_sk\n" +
+ " group by c_customer_id\n" +
+ " ,c_first_name\n" +
+ " ,c_last_name\n" +
+ " ,c_preferred_cust_flag\n" +
+ " ,c_birth_country\n" +
+ " ,c_login\n" +
+ " ,c_email_address\n" +
+ " ,d_year\n" +
+ "union all\n" +
+ " select c_customer_id customer_id\n" +
+ " ,c_first_name customer_first_name\n" +
+ " ,c_last_name customer_last_name\n" +
+ " ,c_preferred_cust_flag customer_preferred_cust_flag\n" +
+ " ,c_birth_country customer_birth_country\n" +
+ " ,c_login customer_login\n" +
+ " ,c_email_address customer_email_address\n" +
+ " ,d_year dyear\n" +
+ " ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total\n" +
+ " ,'w' sale_type\n" +
+ " from customer\n" +
+ " ,web_sales\n" +
+ " ,date_dim\n" +
+ " where c_customer_sk = ws_bill_customer_sk\n" +
+ " and ws_sold_date_sk = d_date_sk\n" +
+ " group by c_customer_id\n" +
+ " ,c_first_name\n" +
+ " ,c_last_name\n" +
+ " ,c_preferred_cust_flag\n" +
+ " ,c_birth_country\n" +
+ " ,c_login\n" +
+ " ,c_email_address\n" +
+ " ,d_year\n" +
+ " )\n" +
+ " select \n" +
+ " t_s_secyear.customer_id\n" +
+ " ,t_s_secyear.customer_first_name\n" +
+ " ,t_s_secyear.customer_last_name\n" +
+ " ,t_s_secyear.customer_email_address\n" +
+ " from year_total t_s_firstyear\n" +
+ " ,year_total t_s_secyear\n" +
+ " ,year_total t_c_firstyear\n" +
+ " ,year_total t_c_secyear\n" +
+ " ,year_total t_w_firstyear\n" +
+ " ,year_total t_w_secyear\n" +
+ " where t_s_secyear.customer_id = t_s_firstyear.customer_id\n" +
+ " and t_s_firstyear.customer_id = t_c_secyear.customer_id\n" +
+ " and t_s_firstyear.customer_id = t_c_firstyear.customer_id\n" +
+ " and t_s_firstyear.customer_id = t_w_firstyear.customer_id\n" +
+ " and t_s_firstyear.customer_id = t_w_secyear.customer_id\n" +
+ " and t_s_firstyear.sale_type = 's'\n" +
+ " and t_c_firstyear.sale_type = 'c'\n" +
+ " and t_w_firstyear.sale_type = 'w'\n" +
+ " and t_s_secyear.sale_type = 's'\n" +
+ " and t_c_secyear.sale_type = 'c'\n" +
+ " and t_w_secyear.sale_type = 'w'\n" +
+ " and t_s_firstyear.dyear = 2001\n" +
+ " and t_s_secyear.dyear = 2001+1\n" +
+ " and t_c_firstyear.dyear = 2001\n" +
+ " and t_c_secyear.dyear = 2001+1\n" +
+ " and t_w_firstyear.dyear = 2001\n" +
+ " and t_w_secyear.dyear = 2001+1\n" +
+ " and t_s_firstyear.year_total > 0\n" +
+ " and t_c_firstyear.year_total > 0\n" +
+ " and t_w_firstyear.year_total > 0\n" +
+ " and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end\n" +
+ " > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end\n" +
+ " and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end\n" +
+ " > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end\n" +
+ " order by t_s_secyear.customer_id\n" +
+ " ,t_s_secyear.customer_first_name\n" +
+ " ,t_s_secyear.customer_last_name\n" +
+ " ,t_s_secyear.customer_email_address\n" +
+ "limit 100";
+ String query4StringNoSpaces = query4String.replaceAll("\\s+", "");
+ String expectedNoSpaces = (headers + expected).replaceAll("\\s+", "");
+ assertEquals(expectedNoSpaces, query4StringNoSpaces);
+ }
+
+ @Test
+ public void testQuery55String() throws Exception {
+ String query55String = QueryReader.readQuery("query55");
+ String expected = "select i_brand_id brand_id, i_brand brand,\n" +
+ " \tsum(ss_ext_sales_price) ext_price\n" +
+ " from date_dim, store_sales, item\n" +
+ " where d_date_sk = ss_sold_date_sk\n" +
+ " \tand ss_item_sk = i_item_sk\n" +
+ " \tand i_manager_id=36\n" +
+ " \tand d_moy=12\n" +
+ " \tand d_year=2001\n" +
+ " group by i_brand, i_brand_id\n" +
+ " order by ext_price desc, i_brand_id\n" +
+ "limit 100";
+ String query55StringNoSpaces = query55String.replaceAll("\\s+", "");
+ String expectedNoSpaces = (headers + expected).replaceAll("\\s+", "");
+ assertEquals(expectedNoSpaces, query55StringNoSpaces);
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/TableSchemaJSONLoaderTest.java b/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/TableSchemaJSONLoaderTest.java
new file mode 100644
index 0000000..7748bee
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/TableSchemaJSONLoaderTest.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+
+public class TableSchemaJSONLoaderTest {
+ @Test
+ public void testStoreReturnsTable() throws Exception {
+ String storeReturnsSchemaString = TableSchemaJSONLoader.parseTableSchema("store_returns");
+ String expected = "sr_returned_date_sk bigint,"
+ + "sr_return_time_sk bigint,"
+ + "sr_item_sk bigint,"
+ + "sr_customer_sk bigint,"
+ + "sr_cdemo_sk bigint,"
+ + "sr_hdemo_sk bigint,"
+ + "sr_addr_sk bigint,"
+ + "sr_store_sk bigint,"
+ + "sr_reason_sk bigint,"
+ + "sr_ticket_number bigint,"
+ + "sr_return_quantity bigint,"
+ + "sr_return_amt double,"
+ + "sr_return_tax double,"
+ + "sr_return_amt_inc_tax double,"
+ + "sr_fee double,"
+ + "sr_return_ship_cost double,"
+ + "sr_refunded_cash double,"
+ + "sr_reversed_charge double,"
+ + "sr_store_credit double,"
+ + "sr_net_loss double";
+ assertEquals(expected, storeReturnsSchemaString);
+ }
+
+ @Test
+ public void testItemTable() throws Exception {
+ String itemSchemaString = TableSchemaJSONLoader.parseTableSchema("item");
+ String expected = "i_item_sk bigint,"
+ + "i_item_id varchar,"
+ + "i_rec_start_date varchar,"
+ + "i_rec_end_date varchar,"
+ + "i_item_desc varchar,"
+ + "i_current_price double,"
+ + "i_wholesale_cost double,"
+ + "i_brand_id bigint,"
+ + "i_brand varchar,"
+ + "i_class_id bigint,"
+ + "i_class varchar,"
+ + "i_category_id bigint,"
+ + "i_category varchar,"
+ + "i_manufact_id bigint,"
+ + "i_manufact varchar,"
+ + "i_size varchar,"
+ + "i_formulation varchar,"
+ + "i_color varchar,"
+ + "i_units varchar,"
+ + "i_container varchar,"
+ + "i_manager_id bigint,"
+ + "i_product_name varchar";
+ assertEquals(expected, itemSchemaString);
+ }
+
+ @Test
+ public void testDateDimTable() throws Exception {
+ String dateDimSchemaString = TableSchemaJSONLoader.parseTableSchema("date_dim");
+ String expected = "d_date_sk bigint,"
+ + "d_date_id varchar,"
+ + "d_date varchar,"
+ + "d_month_seq bigint,"
+ + "d_week_seq bigint,"
+ + "d_quarter_seq bigint,"
+ + "d_year bigint,"
+ + "d_dow bigint,"
+ + "d_moy bigint,"
+ + "d_dom bigint,"
+ + "d_qoy bigint,"
+ + "d_fy_year bigint,"
+ + "d_fy_quarter_seq bigint,"
+ + "d_fy_week_seq bigint,"
+ + "d_day_name varchar,"
+ + "d_quarter_name varchar,"
+ + "d_holiday varchar,"
+ + "d_weekend varchar,"
+ + "d_following_holiday varchar,"
+ + "d_first_dom bigint,"
+ + "d_last_dom bigint,"
+ + "d_same_day_ly bigint,"
+ + "d_same_day_lq bigint,"
+ + "d_current_day varchar,"
+ + "d_current_week varchar,"
+ + "d_current_month varchar,"
+ + "d_current_quarter varchar,"
+ + "d_current_year varchar";
+ assertEquals(expected, dateDimSchemaString);
+ }
+
+ @Test
+ public void testWarehouseTable() throws Exception {
+ String warehouseSchemaString = TableSchemaJSONLoader.parseTableSchema("warehouse");
+ String expected = "w_warehouse_sk bigint,"
+ + "w_warehouse_id varchar,"
+ + "w_warehouse_name varchar,"
+ + "w_warehouse_sq_ft bigint,"
+ + "w_street_number varchar,"
+ + "w_street_name varchar,"
+ + "w_street_type varchar,"
+ + "w_suite_number varchar,"
+ + "w_city varchar,"
+ + "w_county varchar,"
+ + "w_state varchar,"
+ + "w_zip varchar,"
+ + "w_country varchar,"
+ + "w_gmt_offset double";
+ assertEquals(expected, warehouseSchemaString);
+ }
+
+ @Test
+ public void testGetAllTableNames() {
+ List<String> tableNames = TableSchemaJSONLoader.getAllTableNames();
+ Collections.sort(tableNames);
+ List<String> expectedTableNames = Arrays.asList("call_center", "catalog_page", "catalog_returns", "catalog_sales", "customer", "customer_address", "customer_demographics",
+ "date_dim", "household_demographics", "income_band", "inventory", "item", "promotion", "reason", "ship_mode", "store", "store_returns", "store_sales", "time_dim",
+ "warehouse", "web_page", "web_returns", "web_sales", "web_site");
+
+ assertEquals(expectedTableNames.size(), tableNames.size());
+
+ for (int i = 0; i < tableNames.size(); i++) {
+ assertEquals(expectedTableNames.get(i), tableNames.get(i));
+ }
+ }
+}
diff --git a/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/TpcdsParametersReaderTest.java b/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/TpcdsParametersReaderTest.java
new file mode 100644
index 0000000..3f8c951
--- /dev/null
+++ b/sdks/java/testing/tpcds/src/test/java/org/apache/beam/sdk/tpcds/TpcdsParametersReaderTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.tpcds;
+
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TpcdsParametersReaderTest {
+ private TpcdsOptions tpcdsOptions;
+ private TpcdsOptions tpcdsOptionsError;
+
+ @Before
+ public void initializeTpcdsOptions() {
+ tpcdsOptions = PipelineOptionsFactory.as(TpcdsOptions.class);
+ tpcdsOptionsError = PipelineOptionsFactory.as(TpcdsOptions.class);
+
+ tpcdsOptions.setDataSize("1G");
+ tpcdsOptions.setQueries("1,2,3");
+ tpcdsOptions.setTpcParallel(2);
+
+ tpcdsOptionsError.setDataSize("5G");
+ tpcdsOptionsError.setQueries("0,100");
+ tpcdsOptionsError.setTpcParallel(0);
+ }
+
+ @Test
+ public void testGetAndCheckDataSize() throws Exception {
+ String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
+ String expected = "1G";
+ assertEquals(expected, dataSize);
+ }
+
+ @Test( expected = Exception.class)
+ public void testGetAndCheckDataSizeException() throws Exception {
+ TpcdsParametersReader.getAndCheckDataSize(tpcdsOptionsError);
+ }
+
+ @Test
+ public void testGetAndCheckQueries() throws Exception {
+ TpcdsOptions tpcdsOptionsAll = PipelineOptionsFactory.as(TpcdsOptions.class);
+ tpcdsOptionsAll.setQueries("all");
+ String[] queryNameArray = TpcdsParametersReader.getAndCheckQueryNameArray(tpcdsOptionsAll);
+ String[] expected = new String[99];
+ for (int i = 0; i < 99; i++) {
+ expected[i] = "query" + (i + 1);
+ }
+ Assert.assertArrayEquals(expected, queryNameArray);
+ }
+
+ @Test
+ public void testGetAndCheckAllQueries() throws Exception {
+ String[] queryNameArray = TpcdsParametersReader.getAndCheckQueryNameArray(tpcdsOptions);
+ String[] expected = {"query1", "query2", "query3"};
+ Assert.assertArrayEquals(expected, queryNameArray);
+ }
+
+ @Test( expected = Exception.class)
+ public void testGetAndCheckQueriesException() throws Exception {
+ TpcdsParametersReader.getAndCheckQueryNameArray(tpcdsOptionsError);
+ }
+
+ @Test
+ public void testGetAndCheckTpcParallel() throws Exception {
+ int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
+ int expected = 2;
+ assertEquals(expected, nThreads);
+ }
+
+ @Test( expected = Exception.class)
+ public void ttestGetAndCheckTpcParallelException() throws Exception {
+ TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptionsError);
+ }
+}
diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py
index 8fdc4a4..54c63e7 100644
--- a/sdks/python/apache_beam/coders/coder_impl.py
+++ b/sdks/python/apache_beam/coders/coder_impl.py
@@ -530,6 +530,92 @@
return 1
+class MapCoderImpl(StreamCoderImpl):
+ """For internal use only; no backwards-compatibility guarantees.
+
+ Note this implementation always uses nested context when encoding keys
+ and values. This differs from Java's MapCoder, which uses
+ nested=False if possible for the last value encoded.
+
+ This difference is acceptable because MapCoder is not standard. It is only
+ used in a standard context by RowCoder which always uses nested context for
+ attribute values.
+
+ A coder for typing.Mapping objects."""
+ def __init__(
+ self,
+ key_coder, # type: CoderImpl
+ value_coder # type: CoderImpl
+ ):
+ self._key_coder = key_coder
+ self._value_coder = value_coder
+
+ def encode_to_stream(self, dict_value, out, nested):
+ out.write_bigendian_int32(len(dict_value))
+ for key, value in dict_value.items():
+ # Note this implementation always uses nested context when encoding keys
+ # and values which differs from Java. See note in docstring.
+ self._key_coder.encode_to_stream(key, out, True)
+ self._value_coder.encode_to_stream(value, out, True)
+
+ def decode_from_stream(self, in_stream, nested):
+ size = in_stream.read_bigendian_int32()
+ result = {}
+ for _ in range(size):
+ # Note this implementation always uses nested context when encoding keys
+ # and values which differs from Java. See note in docstring.
+ key = self._key_coder.decode_from_stream(in_stream, True)
+ value = self._value_coder.decode_from_stream(in_stream, True)
+ result[key] = value
+
+ return result
+
+ def estimate_size(self, unused_value, nested=False):
+ estimate = 4 # 4 bytes for int32 size prefix
+ for key, value in unused_value.items():
+ estimate += self._key_coder.estimate_size(key, True)
+ estimate += self._value_coder.estimate_size(value, True)
+ return estimate
+
+
+class NullableCoderImpl(StreamCoderImpl):
+ """For internal use only; no backwards-compatibility guarantees.
+
+ A coder for typing.Optional objects."""
+
+ ENCODE_NULL = 0
+ ENCODE_PRESENT = 1
+
+ def __init__(
+ self,
+ value_coder # type: CoderImpl
+ ):
+ self._value_coder = value_coder
+
+ def encode_to_stream(self, value, out, nested):
+ if value is None:
+ out.write_byte(self.ENCODE_NULL)
+ else:
+ out.write_byte(self.ENCODE_PRESENT)
+ self._value_coder.encode_to_stream(value, out, nested)
+
+ def decode_from_stream(self, in_stream, nested):
+ null_indicator = in_stream.read_byte()
+ if null_indicator == self.ENCODE_NULL:
+ return None
+ elif null_indicator == self.ENCODE_PRESENT:
+ return self._value_coder.decode_from_stream(in_stream, nested)
+ else:
+ raise ValueError(
+ "Encountered unexpected value for null indicator: '%s'" %
+ null_indicator)
+
+ def estimate_size(self, unused_value, nested=False):
+ return 1 + (
+ self._value_coder.estimate_size(unused_value)
+ if unused_value is not None else 0)
+
+
class FloatCoderImpl(StreamCoderImpl):
"""For internal use only; no backwards-compatibility guarantees."""
def encode_to_stream(self, value, out, nested):
diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py
index 399a46d..b6aca0a 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -83,6 +83,8 @@
'FastPrimitivesCoder',
'FloatCoder',
'IterableCoder',
+ 'MapCoder',
+ 'NullableCoder',
'PickleCoder',
'ProtoCoder',
'SingletonCoder',
@@ -520,6 +522,57 @@
Coder.register_structured_urn(common_urns.coders.BOOL.urn, BooleanCoder)
+class MapCoder(FastCoder):
+ def __init__(self, key_coder, value_coder):
+ # type: (Coder, Coder) -> None
+ self._key_coder = key_coder
+ self._value_coder = value_coder
+
+ def _create_impl(self):
+ return coder_impl.MapCoderImpl(
+ self._key_coder.get_impl(), self._value_coder.get_impl())
+
+ def to_type_hint(self):
+ return typehints.Dict[self._key_coder.to_type_hint(),
+ self._value_coder.to_type_hint()]
+
+ def is_deterministic(self):
+ # () -> bool
+ # Map ordering is non-deterministic
+ return False
+
+ def __eq__(self, other):
+ return (
+ type(self) == type(other) and self._key_coder == other._key_coder and
+ self._value_coder == other._value_coder)
+
+ def __hash__(self):
+ return hash(type(self)) + hash(self._key_coder) + hash(self._value_coder)
+
+
+class NullableCoder(FastCoder):
+ def __init__(self, value_coder):
+ # type: (Coder) -> None
+ self._value_coder = value_coder
+
+ def _create_impl(self):
+ return coder_impl.NullableCoderImpl(self._value_coder.get_impl())
+
+ def to_type_hint(self):
+ return typehints.Optional[self._value_coder.to_type_hint()]
+
+ def is_deterministic(self):
+ # () -> bool
+ return self._value_coder.is_deterministic()
+
+ def __eq__(self, other):
+ return (
+ type(self) == type(other) and self._value_coder == other._value_coder)
+
+ def __hash__(self):
+ return hash(type(self)) + hash(self._value_coder)
+
+
class VarIntCoder(FastCoder):
"""Variable-length integer coder."""
def _create_impl(self):
diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py
index f1a771a..e1ce23b 100644
--- a/sdks/python/apache_beam/coders/coders_test_common.py
+++ b/sdks/python/apache_beam/coders/coders_test_common.py
@@ -82,8 +82,8 @@
coders.ToBytesCoder
])
cls.seen_nested -= set([coders.ProtoCoder, CustomCoder])
- assert not standard - cls.seen
- assert not cls.seen_nested - standard
+ assert not standard - cls.seen, str(standard - cls.seen)
+ assert not cls.seen_nested - standard, str(cls.seen_nested - standard)
@classmethod
def _observe(cls, coder):
@@ -560,6 +560,16 @@
context=context,
test_size_estimation=False)
+ def test_nullable_coder(self):
+ self.check_coder(coders.NullableCoder(coders.VarIntCoder()), None, 2 * 64)
+
+ def test_map_coder(self):
+ self.check_coder(
+ coders.MapCoder(coders.VarIntCoder(), coders.StrUtf8Coder()), {
+ 1: "one", 300: "three hundred"
+ }, {}, {i: str(i)
+ for i in range(5000)})
+
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/coders/row_coder.py b/sdks/python/apache_beam/coders/row_coder.py
index 3ad880f..02c7f06 100644
--- a/sdks/python/apache_beam/coders/row_coder.py
+++ b/sdks/python/apache_beam/coders/row_coder.py
@@ -30,6 +30,8 @@
from apache_beam.coders.coders import FastCoder
from apache_beam.coders.coders import FloatCoder
from apache_beam.coders.coders import IterableCoder
+from apache_beam.coders.coders import MapCoder
+from apache_beam.coders.coders import NullableCoder
from apache_beam.coders.coders import StrUtf8Coder
from apache_beam.coders.coders import TupleCoder
from apache_beam.coders.coders import VarIntCoder
@@ -58,8 +60,9 @@
to encode/decode.
"""
self.schema = schema
+ # Use non-null coders because null values are represented separately
self.components = [
- RowCoder.coder_from_type(field.type) for field in self.schema.fields
+ _nonnull_coder_from_type(field.type) for field in self.schema.fields
]
def _create_impl(self):
@@ -102,32 +105,6 @@
# type: (bytes) -> RowCoder
return RowCoder(proto_utils.parse_Bytes(payload, schema_pb2.Schema))
- @staticmethod
- def coder_from_type(field_type):
- type_info = field_type.WhichOneof("type_info")
- if type_info == "atomic_type":
- if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64):
- return VarIntCoder()
- elif field_type.atomic_type == schema_pb2.DOUBLE:
- return FloatCoder()
- elif field_type.atomic_type == schema_pb2.STRING:
- return StrUtf8Coder()
- elif field_type.atomic_type == schema_pb2.BOOLEAN:
- return BooleanCoder()
- elif field_type.atomic_type == schema_pb2.BYTES:
- return BytesCoder()
- elif type_info == "array_type":
- return IterableCoder(
- RowCoder.coder_from_type(field_type.array_type.element_type))
- elif type_info == "row_type":
- return RowCoder(field_type.row_type.schema)
-
- # The Java SDK supports several more types, but the coders are not yet
- # standard, and are not implemented in Python.
- raise ValueError(
- "Encountered a type that is not currently supported by RowCoder: %s" %
- field_type)
-
def __reduce__(self):
# when pickling, use bytes representation of the schema. schema_pb2.Schema
# objects cannot be pickled.
@@ -137,6 +114,43 @@
typecoders.registry.register_coder(row_type.RowTypeConstraint, RowCoder)
+def _coder_from_type(field_type):
+ coder = _nonnull_coder_from_type(field_type)
+ if field_type.nullable:
+ return NullableCoder(coder)
+ else:
+ return coder
+
+
+def _nonnull_coder_from_type(field_type):
+ type_info = field_type.WhichOneof("type_info")
+ if type_info == "atomic_type":
+ if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64):
+ return VarIntCoder()
+ elif field_type.atomic_type == schema_pb2.DOUBLE:
+ return FloatCoder()
+ elif field_type.atomic_type == schema_pb2.STRING:
+ return StrUtf8Coder()
+ elif field_type.atomic_type == schema_pb2.BOOLEAN:
+ return BooleanCoder()
+ elif field_type.atomic_type == schema_pb2.BYTES:
+ return BytesCoder()
+ elif type_info == "array_type":
+ return IterableCoder(_coder_from_type(field_type.array_type.element_type))
+ elif type_info == "map_type":
+ return MapCoder(
+ _coder_from_type(field_type.map_type.key_type),
+ _coder_from_type(field_type.map_type.value_type))
+ elif type_info == "row_type":
+ return RowCoder(field_type.row_type.schema)
+
+ # The Java SDK supports several more types, but the coders are not yet
+ # standard, and are not implemented in Python.
+ raise ValueError(
+ "Encountered a type that is not currently supported by RowCoder: %s" %
+ field_type)
+
+
class RowCoderImpl(StreamCoderImpl):
"""For internal use only; no backwards-compatibility guarantees."""
SIZE_CODER = VarIntCoder().get_impl()
diff --git a/sdks/python/apache_beam/coders/row_coder_test.py b/sdks/python/apache_beam/coders/row_coder_test.py
index 65b1024..4277e58 100644
--- a/sdks/python/apache_beam/coders/row_coder_test.py
+++ b/sdks/python/apache_beam/coders/row_coder_test.py
@@ -45,14 +45,15 @@
("aliases", typing.List[unicode]),
("knows_javascript", bool),
# TODO(BEAM-7372): Use bytes instead of ByteString
- ("payload", typing.Optional[typing.ByteString])
+ ("payload", typing.Optional[typing.ByteString]),
+ ("custom_metadata", typing.Mapping[unicode, int])
])
coders_registry.register_coder(Person, RowCoder)
class RowCoderTest(unittest.TestCase):
- JON_SNOW = Person("Jon Snow", 23, None, ["crow", "wildling"], False, None)
+ JON_SNOW = Person("Jon Snow", 23, None, ["crow", "wildling"], False, None, {})
PEOPLE = [
JON_SNOW,
Person(
@@ -60,8 +61,9 @@
25,
"Westeros", ["Mother of Dragons"],
False,
- None),
- Person("Michael Bluth", 30, None, [], True, b"I've made a huge mistake")
+ None, {"dragons": 3}),
+ Person(
+ "Michael Bluth", 30, None, [], True, b"I've made a huge mistake", {})
]
def test_create_row_coder_from_named_tuple(self):
@@ -102,6 +104,15 @@
name="payload",
type=schema_pb2.FieldType(
atomic_type=schema_pb2.BYTES, nullable=True)),
+ schema_pb2.Field(
+ name="custom_metadata",
+ type=schema_pb2.FieldType(
+ map_type=schema_pb2.MapType(
+ key_type=schema_pb2.FieldType(
+ atomic_type=schema_pb2.STRING),
+ value_type=schema_pb2.FieldType(
+ atomic_type=schema_pb2.INT64),
+ ))),
])
coder = RowCoder(schema)
diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py
index 4a13895..df88978 100644
--- a/sdks/python/apache_beam/coders/standard_coders_test.py
+++ b/sdks/python/apache_beam/coders/standard_coders_test.py
@@ -29,6 +29,7 @@
import sys
import unittest
from builtins import map
+from copy import deepcopy
from typing import Dict
from typing import Tuple
@@ -78,6 +79,13 @@
def value_parser_from_schema(schema):
def attribute_parser_from_type(type_):
+ parser = nonnull_attribute_parser_from_type(type_)
+ if type_.nullable:
+ return lambda x: None if x is None else parser(x)
+ else:
+ return parser
+
+ def nonnull_attribute_parser_from_type(type_):
# TODO: This should be exhaustive
type_info = type_.WhichOneof("type_info")
if type_info == "atomic_type":
@@ -89,8 +97,8 @@
element_parser = attribute_parser_from_type(type_.array_type.element_type)
return lambda x: list(map(element_parser, x))
elif type_info == "map_type":
- key_parser = attribute_parser_from_type(type_.array_type.key_type)
- value_parser = attribute_parser_from_type(type_.array_type.value_type)
+ key_parser = attribute_parser_from_type(type_.map_type.key_type)
+ value_parser = attribute_parser_from_type(type_.map_type.value_type)
return lambda x: dict(
(key_parser(k), value_parser(v)) for k, v in x.items())
@@ -101,6 +109,7 @@
def value_parser(x):
result = []
+ x = deepcopy(x)
for name, parser in parsers:
value = x.pop(name)
result.append(None if value is None else parser(value))
diff --git a/sdks/python/apache_beam/dataframe/frame_base.py b/sdks/python/apache_beam/dataframe/frame_base.py
index 7780cd3..0b1c296 100644
--- a/sdks/python/apache_beam/dataframe/frame_base.py
+++ b/sdks/python/apache_beam/dataframe/frame_base.py
@@ -16,7 +16,9 @@
from __future__ import absolute_import
+import functools
import inspect
+import sys
from typing import Any
from typing import Callable
from typing import Dict
@@ -29,6 +31,18 @@
from apache_beam.dataframe import expressions
from apache_beam.dataframe import partitionings
+# pylint: disable=deprecated-method
+if sys.version_info < (3, ):
+ _getargspec = inspect.getargspec
+
+ def _unwrap(func):
+ while hasattr(func, '__wrapped__'):
+ func = func.__wrapped__
+ return func
+else:
+ _getargspec = inspect.getfullargspec
+ _unwrap = inspect.unwrap
+
class DeferredBase(object):
@@ -146,8 +160,7 @@
value = kwargs[key]
else:
try:
- # pylint: disable=deprecated-method
- ix = inspect.getargspec(func).args.index(key)
+ ix = _getargspec(func).args.index(key)
except ValueError:
# TODO: fix for delegation?
continue
@@ -226,6 +239,68 @@
return wrapper
+def maybe_inplace(func):
+ @functools.wraps(func)
+ def wrapper(self, inplace=False, **kwargs):
+ result = func(self, **kwargs)
+ if inplace:
+ self._expr = result._expr
+ else:
+ return result
+
+ return wrapper
+
+
+def args_to_kwargs(base_type):
+ def wrap(func):
+ arg_names = _getargspec(_unwrap(getattr(base_type, func.__name__))).args
+
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ for name, value in zip(arg_names, args):
+ if name in kwargs:
+ raise TypeError(
+ "%s() got multiple values for argument '%s'" %
+ (func.__name__, name))
+ kwargs[name] = value
+ return func(**kwargs)
+
+ return wrapper
+
+ return wrap
+
+
+def populate_defaults(base_type):
+ def wrap(func):
+ base_argspec = _getargspec(_unwrap(getattr(base_type, func.__name__)))
+ if not base_argspec.defaults:
+ return func
+
+ arg_to_default = dict(
+ zip(
+ base_argspec.args[-len(base_argspec.defaults):],
+ base_argspec.defaults))
+
+ unwrapped_func = _unwrap(func)
+ # args that do not have defaults in func, but do have defaults in base
+ func_argspec = _getargspec(unwrapped_func)
+ num_non_defaults = len(func_argspec.args) - len(func_argspec.defaults or ())
+ defaults_to_populate = set(
+ func_argspec.args[:num_non_defaults]).intersection(
+ arg_to_default.keys())
+
+ @functools.wraps(func)
+ def wrapper(**kwargs):
+ for name in defaults_to_populate:
+ if name not in kwargs:
+ kwargs[name] = arg_to_default[name]
+ return func(**kwargs)
+
+ return wrapper
+
+ return wrap
+
+
class WontImplementError(NotImplementedError):
"""An subclass of NotImplementedError to raise indicating that implementing
the given method is infeasible.
diff --git a/sdks/python/apache_beam/dataframe/frame_base_test.py b/sdks/python/apache_beam/dataframe/frame_base_test.py
index 392272c..b527da0 100644
--- a/sdks/python/apache_beam/dataframe/frame_base_test.py
+++ b/sdks/python/apache_beam/dataframe/frame_base_test.py
@@ -41,6 +41,59 @@
self.assertTrue(sub(x, b)._expr.evaluate_at(session).equals(a - b))
self.assertTrue(sub(a, y)._expr.evaluate_at(session).equals(a - b))
+ def test_maybe_inplace(self):
+ @frame_base.maybe_inplace
+ def add_one(frame):
+ return frame + 1
+
+ frames.DeferredSeries.add_one = add_one
+ original_expr = expressions.PlaceholderExpression(pd.Series([1, 2, 3]))
+ x = frames.DeferredSeries(original_expr)
+ x.add_one()
+ self.assertIs(x._expr, original_expr)
+ x.add_one(inplace=False)
+ self.assertIs(x._expr, original_expr)
+ x.add_one(inplace=True)
+ self.assertIsNot(x._expr, original_expr)
+
+ def test_args_to_kwargs(self):
+ class Base(object):
+ def func(self, a=1, b=2, c=3):
+ pass
+
+ class Proxy(object):
+ @frame_base.args_to_kwargs(Base)
+ def func(self, **kwargs):
+ return kwargs
+
+ proxy = Proxy()
+ # pylint: disable=too-many-function-args
+ self.assertEqual(proxy.func(), {})
+ self.assertEqual(proxy.func(100), {'a': 100})
+ self.assertEqual(proxy.func(2, 4, 6), {'a': 2, 'b': 4, 'c': 6})
+ self.assertEqual(proxy.func(2, c=6), {'a': 2, 'c': 6})
+ self.assertEqual(proxy.func(c=6, a=2), {'a': 2, 'c': 6})
+
+ def test_args_to_kwargs_populates_defaults(self):
+ class Base(object):
+ def func(self, a=1, b=2, c=3):
+ pass
+
+ class Proxy(object):
+ @frame_base.args_to_kwargs(Base)
+ @frame_base.populate_defaults(Base)
+ def func(self, a, c=1000, **kwargs):
+ return dict(kwargs, a=a, c=c)
+
+ proxy = Proxy()
+ # pylint: disable=too-many-function-args
+ self.assertEqual(proxy.func(), {'a': 1, 'c': 1000})
+ self.assertEqual(proxy.func(100), {'a': 100, 'c': 1000})
+ self.assertEqual(proxy.func(2, 4, 6), {'a': 2, 'b': 4, 'c': 6})
+ self.assertEqual(proxy.func(2, c=6), {'a': 2, 'c': 6})
+ self.assertEqual(proxy.func(c=6, a=2), {'a': 2, 'c': 6})
+ self.assertEqual(proxy.func(c=6), {'a': 1, 'c': 6})
+
if __name__ == '__main__':
unittest.main()
diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py
index 89e9154..9e2e97a 100644
--- a/sdks/python/apache_beam/dataframe/frames.py
+++ b/sdks/python/apache_beam/dataframe/frames.py
@@ -54,29 +54,20 @@
'order-sensitive')
diff = frame_base.wont_implement_method('order-sensitive')
- def replace(
- self,
- to_replace=None,
- value=None,
- inplace=False,
- limit=None,
- *args,
- **kwargs):
+ @frame_base.args_to_kwargs(pd.Series)
+ @frame_base.populate_defaults(pd.Series)
+ @frame_base.maybe_inplace
+ def replace(self, limit, **kwargs):
if limit is None:
requires_partition_by = partitionings.Nothing()
else:
requires_partition_by = partitionings.Singleton()
- result = frame_base.DeferredFrame.wrap(
+ return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'replace',
- lambda df: df.replace(
- to_replace, value, False, limit, *args, **kwargs), [self._expr],
+ lambda df: df.replace(limit=limit, **kwargs), [self._expr],
preserves_partition_by=partitionings.Singleton(),
requires_partition_by=requires_partition_by))
- if inplace:
- self._expr = result._expr
- else:
- return result
def unstack(self, *args, **kwargs):
raise frame_base.WontImplementError('non-deferred column values')
@@ -159,14 +150,15 @@
def loc(self):
return _DeferredLoc(self)
- def aggregate(self, *args, **kwargs):
- if 'axis' in kwargs and kwargs['axis'] is None:
- return self.agg(*args, **dict(kwargs, axis=1)).agg(
- *args, **dict(kwargs, axis=0))
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ def aggregate(self, axis, **kwargs):
+ if axis is None:
+ return self.agg(axis=1, **kwargs).agg(axis=0, **kwargs)
return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'aggregate',
- lambda df: df.agg(*args, **kwargs),
+ lambda df: df.agg(axis=axis, **kwargs),
[self._expr],
# TODO(robertwb): Sub-aggregate when possible.
requires_partition_by=partitionings.Singleton()))
@@ -188,32 +180,22 @@
min = frame_base._associative_agg_method('min')
mode = frame_base._agg_method('mode')
- def dropna(
- self,
- axis=0,
- how='any',
- thresh=None,
- subset=None,
- inplace=False,
- *args,
- **kwargs):
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ @frame_base.maybe_inplace
+ def dropna(self, axis, **kwargs):
# TODO(robertwb): This is a common pattern. Generalize?
if axis == 1 or axis == 'columns':
requires_partition_by = partitionings.Singleton()
else:
requires_partition_by = partitionings.Nothing()
- result = frame_base.DeferredFrame.wrap(
+ return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'dropna',
- lambda df: df.dropna(
- axis, how, thresh, subset, False, *args, **kwargs),
+ lambda df: df.dropna(axis=axis, **kwargs),
[self._expr],
preserves_partition_by=partitionings.Singleton(),
requires_partition_by=requires_partition_by))
- if inplace:
- self._expr = result._expr
- else:
- return result
items = itertuples = iterrows = iteritems = frame_base.wont_implement_method(
'non-lazy')
@@ -223,13 +205,15 @@
prod = product = frame_base._associative_agg_method('prod')
- def quantile(self, q=0.5, axis=0, *args, **kwargs):
- if axis != 0:
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ def quantile(self, axis, **kwargs):
+ if axis == 1 or axis == 'columns':
raise frame_base.WontImplementError('non-deferred column values')
return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'quantile',
- lambda df: df.quantile(q, axis, *args, **kwargs),
+ lambda df: df.quantile(axis=axis, **kwargs),
[self._expr],
#TODO(robertwb): Approximate quantiles?
requires_partition_by=partitionings.Singleton(),
@@ -237,28 +221,26 @@
query = frame_base._elementwise_method('query')
- def replace(self, to_replace=None,
- value=None,
- inplace=False,
- limit=None, *args, **kwargs):
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ @frame_base.maybe_inplace
+ def replace(self, limit, **kwargs):
if limit is None:
requires_partition_by = partitionings.Nothing()
else:
requires_partition_by = partitionings.Singleton()
- result = frame_base.DeferredFrame.wrap(
+ return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'replace',
- lambda df: df.replace(
- to_replace, value, False, limit, *args, **kwargs),
+ lambda df: df.replace(limit=limit, **kwargs),
[self._expr],
preserves_partition_by=partitionings.Singleton(),
requires_partition_by=requires_partition_by))
- if inplace:
- self._expr = result._expr
- else:
- return result
- def reset_index(self, level=None, drop=False, inplace=False, *args, **kwargs):
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ @frame_base.maybe_inplace
+ def reset_index(self, level, **kwargs):
if level is not None and not isinstance(level, (tuple, list)):
level = [level]
if level is None or len(level) == len(self._expr.proxy().index.levels):
@@ -266,22 +248,20 @@
requires_partition_by = partitionings.Singleton()
else:
requires_partition_by = partitionings.Nothing()
- result = frame_base.DeferredFrame.wrap(
+ return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'reset_index',
- lambda df: df.reset_index(level, drop, False, *args, **kwargs),
+ lambda df: df.reset_index(level=level, **kwargs),
[self._expr],
preserves_partition_by=partitionings.Singleton(),
requires_partition_by=requires_partition_by))
- if inplace:
- self._expr = result._expr
- else:
- return result
round = frame_base._elementwise_method('round')
select_dtypes = frame_base._elementwise_method('select_dtypes')
- def shift(self, periods=1, freq=None, axis=0, *args, **kwargs):
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ def shift(self, axis, **kwargs):
if axis == 1 or axis == 'columns':
requires_partition_by = partitionings.Nothing()
else:
@@ -289,7 +269,7 @@
return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'shift',
- lambda df: df.shift(periods, freq, axis, *args, **kwargs),
+ lambda df: df.shift(axis=axis, **kwargs),
[self._expr],
preserves_partition_by=partitionings.Singleton(),
requires_partition_by=requires_partition_by))
@@ -298,24 +278,21 @@
def shape(self):
raise frame_base.WontImplementError('scalar value')
- def sort_values(
- self, by, axis=0, ascending=True, inplace=False, *args, **kwargs):
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ @frame_base.maybe_inplace
+ def sort_values(self, axis, **kwargs):
if axis == 1 or axis == 'columns':
requires_partition_by = partitionings.Nothing()
else:
requires_partition_by = partitionings.Singleton()
- result = frame_base.DeferredFrame.wrap(
+ return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'sort_values',
- lambda df: df.sort_values(
- by, axis, ascending, False, *args, **kwargs),
+ lambda df: df.sort_values(axis=axis, **kwargs),
[self._expr],
preserves_partition_by=partitionings.Singleton(),
requires_partition_by=requires_partition_by))
- if inplace:
- self._expr = result._expr
- else:
- return result
stack = frame_base._elementwise_method('stack')
diff --git a/sdks/python/apache_beam/examples/snippets/snippets_test_py3.py b/sdks/python/apache_beam/examples/snippets/snippets_test_py3.py
index 0f0b668..5eb1d4b 100644
--- a/sdks/python/apache_beam/examples/snippets/snippets_test_py3.py
+++ b/sdks/python/apache_beam/examples/snippets/snippets_test_py3.py
@@ -96,6 +96,18 @@
ids = numbers | 'to_id' >> beam.Map(my_fn)
# [END type_hints_map_annotations]
+ # Example using an annotated PTransform.
+ with self.assertRaises(typehints.TypeCheckError):
+ # [START type_hints_ptransforms]
+ from apache_beam.pvalue import PCollection
+
+ class IntToStr(beam.PTransform):
+ def expand(self, pcoll: PCollection[int]) -> PCollection[str]:
+ return pcoll | beam.Map(lambda elem: str(elem))
+
+ ids = numbers | 'convert to str' >> IntToStr()
+ # [END type_hints_ptransforms]
+
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/examples/sql_taxi.py b/sdks/python/apache_beam/examples/sql_taxi.py
new file mode 100644
index 0000000..607dea1
--- /dev/null
+++ b/sdks/python/apache_beam/examples/sql_taxi.py
@@ -0,0 +1,101 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""An example that processes streaming NYC Taxi data with SqlTransform.
+
+This example reads from the PubSub NYC Taxi stream described in
+https://github.com/googlecodelabs/cloud-dataflow-nyc-taxi-tycoon, aggregates
+the data in 15s windows using SqlTransform, and writes the output to
+a user-defined PubSub topic.
+
+Java 8 must be available to run this pipeline, and the
+--experiments=use_runner_v2 flag must be passed when running on Dataflow.
+Docker must also be available to run this pipeline locally.
+"""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import json
+import logging
+
+import apache_beam as beam
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.transforms.sql import SqlTransform
+
+
+def run(output_topic, pipeline_args):
+ pipeline_options = PipelineOptions(
+ pipeline_args, save_main_session=True, streaming=True)
+
+ with beam.Pipeline(options=pipeline_options) as pipeline:
+ _ = (
+ pipeline
+ | beam.io.ReadFromPubSub(
+ topic='projects/pubsub-public-data/topics/taxirides-realtime',
+ timestamp_attribute="ts").with_output_types(bytes)
+ | "Parse JSON payload" >> beam.Map(json.loads)
+ # Use beam.Row to create a schema-aware PCollection
+ | "Create beam Row" >> beam.Map(
+ lambda x: beam.Row(
+ ride_status=str(x['ride_status']),
+ passenger_count=int(x['passenger_count'])))
+ # SqlTransform will computes result within an existing window
+ | "15s fixed windows" >> beam.WindowInto(beam.window.FixedWindows(15))
+ # Aggregate drop offs and pick ups that occur within each 15s window
+ | SqlTransform(
+ """
+ SELECT
+ ride_status,
+ COUNT(*) AS num_rides,
+ SUM(passenger_count) AS total_passengers
+ FROM PCOLLECTION
+ WHERE NOT ride_status = 'enroute'
+ GROUP BY ride_status""")
+ # SqlTransform yields python objects with attributes corresponding to
+ # the outputs of the query.
+ # Collect those attributes, as well as window information, into a dict
+ | "Assemble Dictionary" >> beam.Map(
+ lambda row,
+ window=beam.DoFn.WindowParam: {
+ "ride_status": row.ride_status,
+ "num_rides": row.num_rides,
+ "total_passengers": row.total_passengers,
+ "window_start": window.start.to_rfc3339(),
+ "window_end": window.end.to_rfc3339()
+ })
+ | "Convert to JSON" >> beam.Map(json.dumps)
+ | beam.io.WriteStringsToPubSub(topic=output_topic))
+
+
+if __name__ == '__main__':
+ logging.getLogger().setLevel(logging.INFO)
+ import argparse
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--output_topic',
+ dest='output_topic',
+ required=True,
+ help=(
+ 'Cloud PubSub topic to write to (e.g. '
+ 'projects/my-project/topics/my-topic), must be created prior to '
+ 'running the pipeline.'))
+ known_args, pipeline_args = parser.parse_known_args()
+
+ run(known_args.output_topic, pipeline_args)
diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py
index 6732568..aa07802 100644
--- a/sdks/python/apache_beam/examples/wordcount.py
+++ b/sdks/python/apache_beam/examples/wordcount.py
@@ -30,24 +30,12 @@
import apache_beam as beam
from apache_beam.io import ReadFromText
from apache_beam.io import WriteToText
-from apache_beam.metrics import Metrics
-from apache_beam.metrics.metric import MetricsFilter
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
class WordExtractingDoFn(beam.DoFn):
"""Parse each line of input text into words."""
- def __init__(self):
- # TODO(BEAM-6158): Revert the workaround once we can pickle super() on py3.
- # super(WordExtractingDoFn, self).__init__()
- beam.DoFn.__init__(self)
- self.words_counter = Metrics.counter(self.__class__, 'words')
- self.word_lengths_counter = Metrics.counter(self.__class__, 'word_lengths')
- self.word_lengths_dist = Metrics.distribution(
- self.__class__, 'word_len_dist')
- self.empty_line_counter = Metrics.counter(self.__class__, 'empty_lines')
-
def process(self, element):
"""Returns an iterator over the words of this element.
@@ -59,15 +47,7 @@
Returns:
The processed element.
"""
- text_line = element.strip()
- if not text_line:
- self.empty_line_counter.inc(1)
- words = re.findall(r'[\w\']+', text_line, re.UNICODE)
- for w in words:
- self.words_counter.inc()
- self.word_lengths_counter.inc(len(w))
- self.word_lengths_dist.update(len(w))
- return words
+ return re.findall(r'[\w\']+', element, re.UNICODE)
def run(argv=None, save_main_session=True):
@@ -89,52 +69,29 @@
# workflow rely on global context (e.g., a module imported at module level).
pipeline_options = PipelineOptions(pipeline_args)
pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
- p = beam.Pipeline(options=pipeline_options)
- # Read the text file[pattern] into a PCollection.
- lines = p | 'read' >> ReadFromText(known_args.input)
+ # The pipeline will be run on exiting the with block.
+ with beam.Pipeline(options=pipeline_options) as p:
- # Count the occurrences of each word.
- def count_ones(word_ones):
- (word, ones) = word_ones
- return (word, sum(ones))
+ # Read the text file[pattern] into a PCollection.
+ lines = p | 'Read' >> ReadFromText(known_args.input)
- counts = (
- lines
- | 'split' >>
- (beam.ParDo(WordExtractingDoFn()).with_output_types(unicode))
- | 'pair_with_one' >> beam.Map(lambda x: (x, 1))
- | 'group' >> beam.GroupByKey()
- | 'count' >> beam.Map(count_ones))
+ counts = (
+ lines
+ | 'Split' >>
+ (beam.ParDo(WordExtractingDoFn()).with_output_types(unicode))
+ | 'PairWIthOne' >> beam.Map(lambda x: (x, 1))
+ | 'GroupAndSum' >> beam.CombinePerKey(sum))
- # Format the counts into a PCollection of strings.
- def format_result(word_count):
- (word, count) = word_count
- return '%s: %d' % (word, count)
+ # Format the counts into a PCollection of strings.
+ def format_result(word, count):
+ return '%s: %d' % (word, count)
- output = counts | 'format' >> beam.Map(format_result)
+ output = counts | 'Format' >> beam.MapTuple(format_result)
- # Write the output using a "Write" transform that has side effects.
- # pylint: disable=expression-not-assigned
- output | 'write' >> WriteToText(known_args.output)
-
- result = p.run()
- result.wait_until_finish()
-
- # Do not query metrics when creating a template which doesn't run
- if (not hasattr(result, 'has_job') # direct runner
- or result.has_job): # not just a template creation
- empty_lines_filter = MetricsFilter().with_name('empty_lines')
- query_result = result.metrics().query(empty_lines_filter)
- if query_result['counters']:
- empty_lines_counter = query_result['counters'][0]
- logging.info('number of empty lines: %d', empty_lines_counter.result)
-
- word_lengths_filter = MetricsFilter().with_name('word_len_dist')
- query_result = result.metrics().query(word_lengths_filter)
- if query_result['distributions']:
- word_lengths_dist = query_result['distributions'][0]
- logging.info('average word length: %d', word_lengths_dist.result.mean)
+ # Write the output using a "Write" transform that has side effects.
+ # pylint: disable=expression-not-assigned
+ output | 'Write' >> WriteToText(known_args.output)
if __name__ == '__main__':
diff --git a/sdks/python/apache_beam/examples/wordcount_with_metrics.py b/sdks/python/apache_beam/examples/wordcount_with_metrics.py
new file mode 100644
index 0000000..6732568
--- /dev/null
+++ b/sdks/python/apache_beam/examples/wordcount_with_metrics.py
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""A word-counting workflow."""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import argparse
+import logging
+import re
+
+from past.builtins import unicode
+
+import apache_beam as beam
+from apache_beam.io import ReadFromText
+from apache_beam.io import WriteToText
+from apache_beam.metrics import Metrics
+from apache_beam.metrics.metric import MetricsFilter
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.options.pipeline_options import SetupOptions
+
+
+class WordExtractingDoFn(beam.DoFn):
+ """Parse each line of input text into words."""
+ def __init__(self):
+ # TODO(BEAM-6158): Revert the workaround once we can pickle super() on py3.
+ # super(WordExtractingDoFn, self).__init__()
+ beam.DoFn.__init__(self)
+ self.words_counter = Metrics.counter(self.__class__, 'words')
+ self.word_lengths_counter = Metrics.counter(self.__class__, 'word_lengths')
+ self.word_lengths_dist = Metrics.distribution(
+ self.__class__, 'word_len_dist')
+ self.empty_line_counter = Metrics.counter(self.__class__, 'empty_lines')
+
+ def process(self, element):
+ """Returns an iterator over the words of this element.
+
+ The element is a line of text. If the line is blank, note that, too.
+
+ Args:
+ element: the element being processed
+
+ Returns:
+ The processed element.
+ """
+ text_line = element.strip()
+ if not text_line:
+ self.empty_line_counter.inc(1)
+ words = re.findall(r'[\w\']+', text_line, re.UNICODE)
+ for w in words:
+ self.words_counter.inc()
+ self.word_lengths_counter.inc(len(w))
+ self.word_lengths_dist.update(len(w))
+ return words
+
+
+def run(argv=None, save_main_session=True):
+ """Main entry point; defines and runs the wordcount pipeline."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input',
+ dest='input',
+ default='gs://dataflow-samples/shakespeare/kinglear.txt',
+ help='Input file to process.')
+ parser.add_argument(
+ '--output',
+ dest='output',
+ required=True,
+ help='Output file to write results to.')
+ known_args, pipeline_args = parser.parse_known_args(argv)
+
+ # We use the save_main_session option because one or more DoFn's in this
+ # workflow rely on global context (e.g., a module imported at module level).
+ pipeline_options = PipelineOptions(pipeline_args)
+ pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
+ p = beam.Pipeline(options=pipeline_options)
+
+ # Read the text file[pattern] into a PCollection.
+ lines = p | 'read' >> ReadFromText(known_args.input)
+
+ # Count the occurrences of each word.
+ def count_ones(word_ones):
+ (word, ones) = word_ones
+ return (word, sum(ones))
+
+ counts = (
+ lines
+ | 'split' >>
+ (beam.ParDo(WordExtractingDoFn()).with_output_types(unicode))
+ | 'pair_with_one' >> beam.Map(lambda x: (x, 1))
+ | 'group' >> beam.GroupByKey()
+ | 'count' >> beam.Map(count_ones))
+
+ # Format the counts into a PCollection of strings.
+ def format_result(word_count):
+ (word, count) = word_count
+ return '%s: %d' % (word, count)
+
+ output = counts | 'format' >> beam.Map(format_result)
+
+ # Write the output using a "Write" transform that has side effects.
+ # pylint: disable=expression-not-assigned
+ output | 'write' >> WriteToText(known_args.output)
+
+ result = p.run()
+ result.wait_until_finish()
+
+ # Do not query metrics when creating a template which doesn't run
+ if (not hasattr(result, 'has_job') # direct runner
+ or result.has_job): # not just a template creation
+ empty_lines_filter = MetricsFilter().with_name('empty_lines')
+ query_result = result.metrics().query(empty_lines_filter)
+ if query_result['counters']:
+ empty_lines_counter = query_result['counters'][0]
+ logging.info('number of empty lines: %d', empty_lines_counter.result)
+
+ word_lengths_filter = MetricsFilter().with_name('word_len_dist')
+ query_result = result.metrics().query(word_lengths_filter)
+ if query_result['distributions']:
+ word_lengths_dist = query_result['distributions'][0]
+ logging.info('average word length: %d', word_lengths_dist.result.mean)
+
+
+if __name__ == '__main__':
+ logging.getLogger().setLevel(logging.INFO)
+ run()
diff --git a/sdks/python/apache_beam/internal/pickler.py b/sdks/python/apache_beam/internal/pickler.py
index 9b10955..c4bfb44 100644
--- a/sdks/python/apache_beam/internal/pickler.py
+++ b/sdks/python/apache_beam/internal/pickler.py
@@ -190,16 +190,15 @@
if obj_id not in known_module_dicts:
# Trigger loading of lazily loaded modules (such as pytest vendored
# modules).
- # This first pass over sys.modules needs to iterate on a copy of
- # sys.modules since lazy loading modifies the dictionary, hence the use
- # of list().
+ # This pass over sys.modules needs to iterate on a copy of sys.modules
+ # since lazy loading modifies the dictionary, hence the use of list().
for m in list(sys.modules.values()):
try:
_ = m.__dict__
except AttributeError:
pass
- for m in sys.modules.values():
+ for m in list(sys.modules.values()):
try:
if (m and m.__name__ != '__main__' and
isinstance(m, dill.dill.ModuleType)):
diff --git a/sdks/python/apache_beam/io/external/snowflake.py b/sdks/python/apache_beam/io/external/snowflake.py
new file mode 100644
index 0000000..e7ffa6a
--- /dev/null
+++ b/sdks/python/apache_beam/io/external/snowflake.py
@@ -0,0 +1,204 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Snowflake transforms tested against Flink portable runner.
+
+ **Setup**
+
+ Transforms provided in this module are cross-language transforms
+ implemented in the Beam Java SDK. During the pipeline construction, Python SDK
+ will connect to a Java expansion service to expand these transforms.
+ To facilitate this, a small amount of setup is needed before using these
+ transforms in a Beam Python pipeline.
+
+ There are several ways to setup cross-language Snowflake transforms.
+
+ * Option 1: use the default expansion service
+ * Option 2: specify a custom expansion service
+
+ See below for details regarding each of these options.
+
+ *Option 1: Use the default expansion service*
+
+ This is the recommended and easiest setup option for using Python Snowflake
+ transforms.This option requires following pre-requisites
+ before running the Beam pipeline.
+
+ * Install Java runtime in the computer from where the pipeline is constructed
+ and make sure that 'java' command is available.
+
+ In this option, Python SDK will either download (for released Beam version) or
+ build (when running from a Beam Git clone) a expansion service jar and use
+ that to expand transforms. Currently Snowflake transforms use the
+ 'beam-sdks-java-io-expansion-service' jar for this purpose.
+
+ *Option 2: specify a custom expansion service*
+
+ In this option, you startup your own expansion service and provide that as
+ a parameter when using the transforms provided in this module.
+
+ This option requires following pre-requisites before running the Beam
+ pipeline.
+
+ * Startup your own expansion service.
+ * Update your pipeline to provide the expansion service address when
+ initiating Snowflake transforms provided in this module.
+
+ Flink Users can use the built-in Expansion Service of the Flink Runner's
+ Job Server. If you start Flink's Job Server, the expansion service will be
+ started on port 8097. For a different address, please set the
+ expansion_service parameter.
+
+ **More information**
+
+ For more information regarding cross-language transforms see:
+ - https://beam.apache.org/roadmap/portability/
+
+ For more information specific to Flink runner see:
+ - https://beam.apache.org/documentation/runners/flink/
+"""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import typing
+
+from past.builtins import unicode
+
+import apache_beam as beam
+from apache_beam.transforms.external import BeamJarExpansionService
+from apache_beam.transforms.external import ExternalTransform
+from apache_beam.transforms.external import NamedTupleBasedPayloadBuilder
+
+ReadFromSnowflakeSchema = typing.NamedTuple(
+ 'ReadFromSnowflakeSchema',
+ [
+ ('server_name', unicode),
+ ('schema', unicode),
+ ('database', unicode),
+ ('staging_bucket_name', unicode),
+ ('storage_integration_name', unicode),
+ ('username', typing.Optional[unicode]),
+ ('password', typing.Optional[unicode]),
+ ('private_key_path', typing.Optional[unicode]),
+ ('private_key_passphrase', typing.Optional[unicode]),
+ ('o_auth_token', typing.Optional[unicode]),
+ ('table', typing.Optional[unicode]),
+ ('query', typing.Optional[unicode]),
+ ])
+
+
+def default_io_expansion_service():
+ return BeamJarExpansionService(
+ 'sdks:java:io:snowflake:expansion-service:shadowJar')
+
+
+class ReadFromSnowflake(beam.PTransform):
+ """
+ An external PTransform which reads from Snowflake.
+ """
+
+ URN = 'beam:external:java:snowflake:read:v1'
+
+ def __init__(
+ self,
+ server_name,
+ schema,
+ database,
+ staging_bucket_name,
+ storage_integration_name,
+ csv_mapper,
+ username=None,
+ password=None,
+ private_key_path=None,
+ private_key_passphrase=None,
+ o_auth_token=None,
+ table=None,
+ query=None,
+ expansion_service=None):
+ """
+ Initializes a read operation from Snowflake.
+
+ Required parameters:
+
+ :param server_name: full Snowflake server name with the following format
+ account.region.gcp.snowflakecomputing.com.
+ :param schema: name of the Snowflake schema in the database to use.
+ :param database: name of the Snowflake database to use.
+ :param staging_bucket_name: name of the Google Cloud Storage bucket.::
+ Bucket will be used as a temporary location for storing CSV files.
+ Those temporary directories will be named
+ 'sf_copy_csv_DATE_TIME_RANDOMSUFFIX'
+ and they will be removed automatically once Read operation finishes.
+ :param storage_integration_name: is the name of storage integration
+ object created according to Snowflake documentation.
+ :param csv_mapper: specifies a function which must translate
+ user-defined object to array of strings.
+ SnowflakeIO uses a COPY INTO <location> statement to move data from
+ a Snowflake table to Google Cloud Storage as CSV files.These files
+ are then downloaded via FileIO and processed line by line.
+ Each line is split into an array of Strings using the OpenCSV
+ The csv_mapper function job is to give the user the possibility to
+ convert the array of Strings to a user-defined type,
+ ie. GenericRecord for Avro or Parquet files, or custom objects.
+ Example:
+ def csv_mapper(strings_array)
+ return User(strings_array[0], int(strings_array[1])))
+ :param table: specifies a Snowflake table name.
+ :param query: specifies a Snowflake custom SQL query.
+ :param expansion_service: specifies URL of expansion service.
+
+ Authentication parameters:
+
+ :param username: specifies username for
+ username/password authentication method.
+ :param password: specifies password for
+ username/password authentication method.
+ :param private_key_path: specifies a private key file for
+ key/ pair authentication method.
+ :param private_key_passphrase: specifies password for
+ key/ pair authentication method.
+ :param o_auth_token: specifies access token for
+ OAuth authentication method.
+ """
+ self.params = ReadFromSnowflakeSchema(
+ server_name=server_name,
+ schema=schema,
+ database=database,
+ staging_bucket_name=staging_bucket_name,
+ storage_integration_name=storage_integration_name,
+ username=username,
+ password=password,
+ private_key_path=private_key_path,
+ private_key_passphrase=private_key_passphrase,
+ o_auth_token=o_auth_token,
+ table=table,
+ query=query)
+ self.csv_mapper = csv_mapper
+ self.expansion_service = expansion_service or default_io_expansion_service()
+
+ def expand(self, pbegin):
+ return (
+ pbegin
+ | ExternalTransform(
+ self.URN,
+ NamedTupleBasedPayloadBuilder(self.params),
+ self.expansion_service,
+ )
+ | 'CSV to array mapper' >> beam.Map(lambda csv: csv.split(b','))
+ | 'CSV mapper' >> beam.Map(self.csv_mapper))
diff --git a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py
index f8c9985..73bd7da 100644
--- a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py
+++ b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py
@@ -78,8 +78,7 @@
'Do not run this test on precommit suites.')
class CrossLanguageJdbcIOTest(unittest.TestCase):
def setUp(self):
- self.postgres = PostgresContainer('postgres:latest')
- self.postgres.start()
+ self.start_postgres_container(retries=3)
self.engine = sqlalchemy.create_engine(self.postgres.get_connection_url())
self.username = 'test'
self.password = 'test'
@@ -91,7 +90,12 @@
self.host, self.port, self.database_name)
def tearDown(self):
- self.postgres.stop()
+ # Sometimes stopping the container raises ReadTimeout. We can ignore it
+ # here to avoid the test failure.
+ try:
+ self.postgres.stop()
+ except: # pylint: disable=bare-except
+ logging.error('Could not stop the postgreSQL container.')
def test_xlang_jdbc_write(self):
table_name = 'jdbc_external_test_write'
@@ -150,6 +154,19 @@
assert_that(
result, equal_to([JdbcReadTestRow(i) for i in range(ROW_COUNT)]))
+ # Creating a container with testcontainers sometimes raises ReadTimeout
+ # error. In java there are 2 retries set by default.
+ def start_postgres_container(self, retries):
+ for i in range(retries):
+ try:
+ self.postgres = PostgresContainer('postgres:12.3')
+ self.postgres.start()
+ break
+ except Exception as e: # pylint: disable=bare-except
+ if i == retries - 1:
+ logging.error('Unable to initialize postgreSQL container.')
+ raise e
+
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py b/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py
index 0dad234..cec1d9b 100644
--- a/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py
+++ b/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py
@@ -28,12 +28,17 @@
import time
import typing
import unittest
+import uuid
import apache_beam as beam
from apache_beam.io.kafka import ReadFromKafka
from apache_beam.io.kafka import WriteToKafka
from apache_beam.metrics import Metrics
from apache_beam.testing.test_pipeline import TestPipeline
+from apache_beam.testing.util import assert_that
+from apache_beam.testing.util import equal_to
+
+NUM_RECORDS = 1000
class CrossLanguageKafkaIO(object):
@@ -47,7 +52,7 @@
_ = (
pipeline
| 'Impulse' >> beam.Impulse()
- | 'Generate' >> beam.FlatMap(lambda x: range(1000)) # pylint: disable=range-builtin-not-iterating
+ | 'Generate' >> beam.FlatMap(lambda x: range(NUM_RECORDS)) # pylint: disable=range-builtin-not-iterating
| 'Reshuffle' >> beam.Reshuffle()
| 'MakeKV' >> beam.Map(lambda x:
(b'', str(x).encode())).with_output_types(
@@ -57,8 +62,8 @@
topic=self.topic,
expansion_service=self.expansion_service))
- def build_read_pipeline(self, pipeline):
- _ = (
+ def build_read_pipeline(self, pipeline, max_num_records=None):
+ kafka_records = (
pipeline
| 'ReadFromKafka' >> ReadFromKafka(
consumer_config={
@@ -66,7 +71,14 @@
'auto.offset.reset': 'earliest'
},
topics=[self.topic],
- expansion_service=self.expansion_service)
+ max_num_records=max_num_records,
+ expansion_service=self.expansion_service))
+
+ if max_num_records:
+ return kafka_records
+
+ return (
+ kafka_records
| 'Windowing' >> beam.WindowInto(
beam.window.FixedWindows(300),
trigger=beam.transforms.trigger.AfterProcessingTime(60),
@@ -86,6 +98,30 @@
os.environ.get('LOCAL_KAFKA_JAR'),
"LOCAL_KAFKA_JAR environment var is not provided.")
class CrossLanguageKafkaIOTest(unittest.TestCase):
+ def test_kafkaio(self):
+ kafka_topic = 'xlang_kafkaio_test_{}'.format(uuid.uuid4())
+ local_kafka_jar = os.environ.get('LOCAL_KAFKA_JAR')
+ with self.local_kafka_service(local_kafka_jar) as kafka_port:
+ bootstrap_servers = '{}:{}'.format(
+ self.get_platform_localhost(), kafka_port)
+ pipeline_creator = CrossLanguageKafkaIO(bootstrap_servers, kafka_topic)
+
+ self.run_kafka_write(pipeline_creator)
+ self.run_kafka_read(pipeline_creator)
+
+ def run_kafka_write(self, pipeline_creator):
+ with TestPipeline() as pipeline:
+ pipeline.not_use_test_runner_api = True
+ pipeline_creator.build_write_pipeline(pipeline)
+
+ def run_kafka_read(self, pipeline_creator):
+ with TestPipeline() as pipeline:
+ pipeline.not_use_test_runner_api = True
+ result = pipeline_creator.build_read_pipeline(pipeline, NUM_RECORDS)
+ assert_that(
+ result,
+ equal_to([(b'', str(i).encode()) for i in range(NUM_RECORDS)]))
+
def get_platform_localhost(self):
if sys.platform == 'darwin':
return 'host.docker.internal'
@@ -119,18 +155,6 @@
if kafka_server:
kafka_server.kill()
- def test_kafkaio_write(self):
- local_kafka_jar = os.environ.get('LOCAL_KAFKA_JAR')
- with self.local_kafka_service(local_kafka_jar) as kafka_port:
- p = TestPipeline()
- p.not_use_test_runner_api = True
- xlang_kafkaio = CrossLanguageKafkaIO(
- '%s:%s' % (self.get_platform_localhost(), kafka_port),
- 'xlang_kafkaio_test')
- xlang_kafkaio.build_write_pipeline(p)
- job = p.run()
- job.wait_until_finish()
-
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
index 1f39928..c2dc3cd 100644
--- a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
@@ -174,7 +174,6 @@
'use_standard_sql': False,
'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
'on_success_matcher': all_of(*pipeline_verifiers),
- 'experiments': 'use_beam_bq_sink',
}
options = self.test_pipeline.get_full_options_as_args(**extra_opts)
big_query_query_to_table_pipeline.run_bq_pipeline(options)
@@ -198,7 +197,6 @@
'use_standard_sql': True,
'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
'on_success_matcher': all_of(*pipeline_verifiers),
- 'experiments': 'use_beam_bq_sink',
}
options = self.test_pipeline.get_full_options_as_args(**extra_opts)
big_query_query_to_table_pipeline.run_bq_pipeline(options)
@@ -227,6 +225,7 @@
'on_success_matcher': all_of(*pipeline_verifiers),
'kms_key': kms_key,
'native': True,
+ 'experiments': 'use_legacy_bq_sink',
}
options = self.test_pipeline.get_full_options_as_args(**extra_opts)
big_query_query_to_table_pipeline.run_bq_pipeline(options)
@@ -281,7 +280,6 @@
'use_standard_sql': False,
'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
'on_success_matcher': all_of(*pipeline_verifiers),
- 'experiments': 'use_beam_bq_sink',
}
options = self.test_pipeline.get_full_options_as_args(**extra_opts)
big_query_query_to_table_pipeline.run_bq_pipeline(options)
@@ -305,7 +303,8 @@
'use_standard_sql': False,
'native': True,
'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
- 'on_success_matcher': all_of(*pipeline_verifiers)
+ 'on_success_matcher': all_of(*pipeline_verifiers),
+ 'experiments': 'use_legacy_bq_sink',
}
options = self.test_pipeline.get_full_options_as_args(**extra_opts)
big_query_query_to_table_pipeline.run_bq_pipeline(options)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index bbe7b0b..d011394 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -290,6 +290,20 @@
]
_LOGGER = logging.getLogger(__name__)
+"""
+Template for BigQuery jobs created by BigQueryIO. This template is:
+`"beam_bq_job_{job_type}_{job_id}_{step_id}_{random}"`, where:
+
+- `job_type` represents the BigQuery job type (e.g. extract / copy / load /
+ query).
+- `job_id` is the Beam job name.
+- `step_id` is a UUID representing the the Dataflow step that created the
+ BQ job.
+- `random` is a random string.
+
+NOTE: This job name template does not have backwards compatibility guarantees.
+"""
+BQ_JOB_NAME_TEMPLATE = "beam_bq_job_{job_type}_{job_id}_{step_id}{random}"
@deprecated(since='2.11.0', current="bigquery_tools.parse_table_reference")
@@ -1474,9 +1488,7 @@
def _compute_method(self, experiments, is_streaming_pipeline):
# If the new BQ sink is not activated for experiment flags, then we use
# streaming inserts by default (it gets overridden in dataflow_runner.py).
- if 'use_beam_bq_sink' not in experiments:
- return self.Method.STREAMING_INSERTS
- elif self.method == self.Method.DEFAULT and is_streaming_pipeline:
+ if self.method == self.Method.DEFAULT and is_streaming_pipeline:
return self.Method.STREAMING_INSERTS
elif self.method == self.Method.DEFAULT and not is_streaming_pipeline:
return self.Method.FILE_LOADS
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py
index 7543dca..773f19d 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py
@@ -30,7 +30,6 @@
from __future__ import absolute_import
-import datetime
import hashlib
import logging
import random
@@ -69,10 +68,9 @@
_FILE_TRIGGERING_RECORD_COUNT = 500000
-def _generate_load_job_name():
- datetime_component = datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")
- # TODO(pabloem): include job id / pipeline component?
- return 'beam_load_%s_%s' % (datetime_component, random.randint(0, 100))
+def _generate_job_name(job_name, job_type, step_name):
+ return bigquery_tools.generate_bq_job_name(
+ job_name, step_name, job_type, random.randint(0, 1000))
def file_prefix_generator(
@@ -358,15 +356,10 @@
copy_from_reference.projectId = vp.RuntimeValueProvider.get_value(
'project', str, '')
- copy_job_name = '%s_copy_%s_to_%s' % (
+ copy_job_name = '%s_%s' % (
job_name_prefix,
_bq_uuid(
'%s:%s.%s' % (
- copy_from_reference.projectId,
- copy_from_reference.datasetId,
- copy_from_reference.tableId)),
- _bq_uuid(
- '%s:%s.%s' % (
copy_to_reference.projectId,
copy_to_reference.datasetId,
copy_to_reference.tableId)))
@@ -788,6 +781,7 @@
partitions_using_temp_tables,
partitions_direct_to_destination,
load_job_name_pcv,
+ copy_job_name_pcv,
singleton_pc):
"""Load data to BigQuery
@@ -833,7 +827,7 @@
create_disposition=self.create_disposition,
write_disposition=self.write_disposition,
test_client=self.test_client),
- load_job_name_pcv))
+ copy_job_name_pcv))
finished_copy_jobs_pc = (
singleton_pc
@@ -884,13 +878,27 @@
p = pcoll.pipeline
temp_location = p.options.view_as(GoogleCloudOptions).temp_location
+ job_name = (
+ p.options.view_as(GoogleCloudOptions).job_name or 'AUTOMATIC_JOB_NAME')
empty_pc = p | "ImpulseEmptyPC" >> beam.Create([])
singleton_pc = p | "ImpulseSingleElementPC" >> beam.Create([None])
load_job_name_pcv = pvalue.AsSingleton(
singleton_pc
- | beam.Map(lambda _: _generate_load_job_name()))
+ | "LoadJobNamePrefix" >> beam.Map(
+ lambda _: _generate_job_name(
+ job_name,
+ bigquery_tools.BigQueryJobTypes.LOAD,
+ 'LOAD_NAME_STEP')))
+
+ copy_job_name_pcv = pvalue.AsSingleton(
+ singleton_pc
+ | "CopyJobNamePrefix" >> beam.Map(
+ lambda _: _generate_job_name(
+ job_name,
+ bigquery_tools.BigQueryJobTypes.COPY,
+ 'COPY_NAME_STEP')))
file_prefix_pcv = pvalue.AsSingleton(
singleton_pc
@@ -934,14 +942,17 @@
multiple_partitions_per_destination_pc,
single_partition_per_destination_pc)
| "FlattenPartitions" >> beam.Flatten())
- destination_load_job_ids_pc, destination_copy_job_ids_pc = self.\
- _load_data(all_partitions, empty_pc, load_job_name_pcv,
- singleton_pc)
+ destination_load_job_ids_pc, destination_copy_job_ids_pc = (
+ self._load_data(all_partitions,
+ empty_pc,
+ load_job_name_pcv,
+ copy_job_name_pcv,
+ singleton_pc))
else:
- destination_load_job_ids_pc, destination_copy_job_ids_pc = self.\
- _load_data(multiple_partitions_per_destination_pc,
- single_partition_per_destination_pc,
- load_job_name_pcv, singleton_pc)
+ destination_load_job_ids_pc, destination_copy_job_ids_pc = (
+ self._load_data(multiple_partitions_per_destination_pc,
+ single_partition_per_destination_pc,
+ load_job_name_pcv, copy_job_name_pcv, singleton_pc))
return {
self.DESTINATION_JOBID_PAIRS: destination_load_job_ids_pc,
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
index f9e0212..fca7d9c 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
@@ -673,8 +673,7 @@
]
args = self.test_pipeline.get_full_options_as_args(
- on_success_matcher=all_of(*pipeline_verifiers),
- experiments='use_beam_bq_sink')
+ on_success_matcher=all_of(*pipeline_verifiers))
with beam.Pipeline(argv=args) as p:
input = p | beam.Create(_ELEMENTS, reshuffle=False)
@@ -733,9 +732,7 @@
data=[(i, ) for i in range(100)])
args = self.test_pipeline.get_full_options_as_args(
- on_success_matcher=all_of(state_matcher, bq_matcher),
- experiments='use_beam_bq_sink',
- streaming=True)
+ on_success_matcher=all_of(state_matcher, bq_matcher), streaming=True)
with beam.Pipeline(argv=args) as p:
stream_source = (
TestStream().advance_watermark_to(0).advance_processing_time(
@@ -790,8 +787,7 @@
data=[])
]
- args = self.test_pipeline.get_full_options_as_args(
- experiments='use_beam_bq_sink')
+ args = self.test_pipeline.get_full_options_as_args()
with self.assertRaises(Exception):
# The pipeline below fails because neither a schema nor SCHEMA_AUTODETECT
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py
index 5c05978..b341dc6 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py
@@ -563,8 +563,7 @@
self.assertEqual(expected_dict_schema, dict_schema)
def test_schema_autodetect_not_allowed_with_avro_file_loads(self):
- with TestPipeline(
- additional_pipeline_args=["--experiments=use_beam_bq_sink"]) as p:
+ with TestPipeline() as p:
pc = p | beam.Impulse()
with self.assertRaisesRegex(ValueError, '^A schema must be provided'):
@@ -593,8 +592,7 @@
"""
FULL_OUTPUT_TABLE = 'test_project:output_table'
- p = TestPipeline(
- additional_pipeline_args=["--experiments=use_beam_bq_sink"])
+ p = TestPipeline()
# Used for testing side input parameters.
table_record_pcv = beam.pvalue.AsDict(
@@ -886,8 +884,7 @@
]
args = self.test_pipeline.get_full_options_as_args(
- on_success_matcher=hc.all_of(*pipeline_verifiers),
- experiments='use_beam_bq_sink')
+ on_success_matcher=hc.all_of(*pipeline_verifiers))
with beam.Pipeline(argv=args) as p:
input = p | beam.Create([row for row in _ELEMENTS if 'language' in row])
@@ -967,8 +964,7 @@
]
args = self.test_pipeline.get_full_options_as_args(
- on_success_matcher=hc.all_of(*pipeline_verifiers),
- experiments='use_beam_bq_sink')
+ on_success_matcher=hc.all_of(*pipeline_verifiers))
with beam.Pipeline(argv=args) as p:
if streaming:
@@ -1083,7 +1079,6 @@
args = self.test_pipeline.get_full_options_as_args(
on_success_matcher=hc.all_of(*matchers),
wait_until_finish_duration=self.WAIT_UNTIL_FINISH_DURATION,
- experiments='use_beam_bq_sink',
streaming=True)
def add_schema_info(element):
@@ -1182,7 +1177,6 @@
args = self.test_pipeline.get_full_options_as_args(
on_success_matcher=hc.all_of(*pipeline_verifiers),
- experiments='use_beam_bq_sink',
)
with beam.Pipeline(argv=args) as p:
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
index 504f530..1230d1d 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
@@ -1481,3 +1481,21 @@
dict_table_schema = get_dict_table_schema(schema)
return bigquery_avro_tools.get_record_schema_from_dict_table_schema(
"root", dict_table_schema)
+
+
+class BigQueryJobTypes:
+ EXPORT = 'EXPORT'
+ COPY = 'COPY'
+ LOAD = 'LOAD'
+ QUERY = 'QUERY'
+
+
+def generate_bq_job_name(job_name, step_id, job_type, random=None):
+ from apache_beam.io.gcp.bigquery import BQ_JOB_NAME_TEMPLATE
+ random = ("_%s" % random) if random else ""
+ return str.format(
+ BQ_JOB_NAME_TEMPLATE,
+ job_type=job_type,
+ job_id=job_name.replace("-", ""),
+ step_id=step_id,
+ random=random)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
index 9c874fa..90716d0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
@@ -42,8 +42,10 @@
from apache_beam.io.gcp.bigquery_test import HttpError
from apache_beam.io.gcp.bigquery_tools import JSON_COMPLIANCE_ERROR
from apache_beam.io.gcp.bigquery_tools import AvroRowWriter
+from apache_beam.io.gcp.bigquery_tools import BigQueryJobTypes
from apache_beam.io.gcp.bigquery_tools import JsonRowWriter
from apache_beam.io.gcp.bigquery_tools import RowAsDictJsonCoder
+from apache_beam.io.gcp.bigquery_tools import generate_bq_job_name
from apache_beam.io.gcp.bigquery_tools import parse_table_schema_from_json
from apache_beam.io.gcp.internal.clients import bigquery
from apache_beam.options.pipeline_options import PipelineOptions
@@ -812,6 +814,43 @@
self.assertEqual(records[0]['stamp'], stamp)
+class TestBQJobNames(unittest.TestCase):
+ def test_simple_names(self):
+ self.assertEqual(
+ "beam_bq_job_EXPORT_beamappjobtest_abcd",
+ generate_bq_job_name(
+ "beamapp-job-test", "abcd", BigQueryJobTypes.EXPORT))
+
+ self.assertEqual(
+ "beam_bq_job_LOAD_beamappjobtest_abcd",
+ generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.LOAD))
+
+ self.assertEqual(
+ "beam_bq_job_QUERY_beamappjobtest_abcd",
+ generate_bq_job_name(
+ "beamapp-job-test", "abcd", BigQueryJobTypes.QUERY))
+
+ self.assertEqual(
+ "beam_bq_job_COPY_beamappjobtest_abcd",
+ generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.COPY))
+
+ def test_random_in_name(self):
+ self.assertEqual(
+ "beam_bq_job_COPY_beamappjobtest_abcd_randome",
+ generate_bq_job_name(
+ "beamapp-job-test", "abcd", BigQueryJobTypes.COPY, "randome"))
+
+ def test_matches_template(self):
+ base_pattern = "beam_bq_job_[A-Z]+_[a-z0-9-]+_[a-z0-9-]+(_[a-z0-9-]+)?"
+ job_name = generate_bq_job_name(
+ "beamapp-job-test", "abcd", BigQueryJobTypes.COPY, "randome")
+ self.assertRegex(job_name, base_pattern)
+
+ job_name = generate_bq_job_name(
+ "beamapp-job-test", "abcd", BigQueryJobTypes.COPY)
+ self.assertRegex(job_name, base_pattern)
+
+
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
unittest.main()
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
index 9ea75ea..a5c1ce7 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
@@ -197,8 +197,7 @@
]
args = self.test_pipeline.get_full_options_as_args(
- on_success_matcher=hc.all_of(*pipeline_verifiers),
- experiments='use_beam_bq_sink')
+ on_success_matcher=hc.all_of(*pipeline_verifiers))
with beam.Pipeline(argv=args) as p:
# pylint: disable=expression-not-assigned
diff --git a/sdks/python/apache_beam/io/gcp/dicomclient.py b/sdks/python/apache_beam/io/gcp/dicomclient.py
new file mode 100644
index 0000000..e38a310
--- /dev/null
+++ b/sdks/python/apache_beam/io/gcp/dicomclient.py
@@ -0,0 +1,128 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+from google.auth import default
+from google.auth.transport import requests
+
+
+class DicomApiHttpClient:
+ """DICOM api client that talk to api via http request"""
+ healthcare_base_url = "https://healthcare.googleapis.com/v1"
+ session = None
+
+ def get_session(self, credential):
+ if self.session:
+ return self.session
+
+ # if the credential is not provided, use the default credential.
+ if not credential:
+ credential, _ = default()
+ new_seesion = requests.AuthorizedSession(credential)
+ self.session = new_seesion
+ return new_seesion
+
+ def qido_search(
+ self,
+ project_id,
+ region,
+ dataset_id,
+ dicom_store_id,
+ search_type,
+ params=None,
+ credential=None):
+ """function for searching a DICOM store"""
+
+ # sending request to the REST healthcare api.
+ api_endpoint = "{}/projects/{}/locations/{}".format(
+ self.healthcare_base_url, project_id, region)
+
+ # base of dicomweb path.
+ dicomweb_path = "{}/datasets/{}/dicomStores/{}/dicomWeb/{}".format(
+ api_endpoint, dataset_id, dicom_store_id, search_type)
+
+ # Make an authenticated API request
+ session = self.get_session(credential)
+ headers = {"Content-Type": "application/dicom+json; charset=utf-8"}
+ page_size = 500
+
+ if params and 'limit' in params:
+ page_size = params['limit']
+ elif params:
+ params['limit'] = page_size
+ else:
+ params = {'limit': page_size}
+
+ offset = 0
+ output = []
+ # iterate to get all the results
+ while True:
+ params['offset'] = offset
+ response = session.get(dicomweb_path, headers=headers, params=params)
+ response.raise_for_status()
+ status = response.status_code
+ if status != 200:
+ if offset == 0:
+ return [], status
+ params['offset'] = offset - 1
+ params['limit'] = 1
+ response = session.get(dicomweb_path, headers=headers, params=params)
+ response.raise_for_status()
+ check_status = response.status_code
+ if check_status == 200:
+ # if the number of results equals to page size
+ return output, check_status
+ else:
+ # something wrong with the request or server
+ return [], status
+ results = response.json()
+ output += results
+ if len(results) < page_size:
+ # got all the results, return
+ break
+ offset += len(results)
+
+ return output, status
+
+ def dicomweb_store_instance(
+ self,
+ project_id,
+ region,
+ dataset_id,
+ dicom_store_id,
+ dcm_file,
+ credential=None):
+ """function for storing an instance."""
+
+ api_endpoint = "{}/projects/{}/locations/{}".format(
+ self.healthcare_base_url, project_id, region)
+
+ dicomweb_path = "{}/datasets/{}/dicomStores/{}/dicomWeb/studies".format(
+ api_endpoint, dataset_id, dicom_store_id)
+
+ # Make an authenticated API request
+ session = self.get_session(credential)
+ content_type = "application/dicom"
+ headers = {"Content-Type": content_type}
+
+ response = session.post(dicomweb_path, data=dcm_file, headers=headers)
+ response.raise_for_status()
+
+ return None, response.status_code
diff --git a/sdks/python/apache_beam/io/gcp/dicomio.py b/sdks/python/apache_beam/io/gcp/dicomio.py
new file mode 100644
index 0000000..e33d99d
--- /dev/null
+++ b/sdks/python/apache_beam/io/gcp/dicomio.py
@@ -0,0 +1,579 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""DICOM IO connector
+This module implements several tools to facilitate the interaction between
+a Google Cloud Healthcare DICOM store and a Beam pipeline.
+
+For more details on DICOM store and API:
+https://cloud.google.com/healthcare/docs/how-tos/dicom
+
+The DICOM IO connector can be used to search metadata or write DICOM files
+to DICOM store.
+
+When used together with Google Pubsub message connector, the
+`FormatToQido` PTransform implemented in this module can be used
+to convert Pubsub messages to search requests.
+
+Since Traceability is crucial for healthcare
+API users, every input or error message will be recorded in the output of
+the DICOM IO connector. As a result, every PTransform in this module will
+return a PCollection of dict that encodes results and detailed error messages.
+
+Search instance's metadata (QIDO request)
+===================================================
+DicomSearch() wraps the QIDO request client and supports 3 levels of search.
+Users should specify the level by setting the 'search_type' entry in the input
+dict. They can also refine the search by adding tags to filter the results using
+the 'params' entry. Here is a sample usage:
+
+ with Pipeline() as p:
+ input_dict = p | beam.Create(
+ [{'project_id': 'abc123', 'type': 'instances',...},
+ {'project_id': 'dicom_go', 'type': 'series',...}])
+
+ results = input_dict | io.gcp.DicomSearch()
+ results | 'print successful search' >> beam.Map(
+ lambda x: print(x['result'] if x['success'] else None))
+
+ results | 'print failed search' >> beam.Map(
+ lambda x: print(x['result'] if not x['success'] else None))
+
+In the example above, successful qido search results and error messages for
+failed requests are printed. When used in real life, user can choose to filter
+those data and output them to wherever they want.
+
+Convert DICOM Pubsub message to Qido search request
+===================================================
+Healthcare API users might read messages from Pubsub to monitor the store
+operations (e.g. new file) in a DICOM storage. Pubsub message encode
+DICOM as a web store path as well as instance ids. If users are interested in
+getting new instance's metadata, they can use the `FormatToQido` transform
+to convert the message into Qido Search dict then use the `DicomSearch`
+transform. Here is a sample usage:
+
+ pipeline_options = PipelineOptions()
+ pipeline_options.view_as(StandardOptions).streaming = True
+ p = beam.Pipeline(options=pipeline_options)
+ pubsub = p | beam.io.ReadStringFromPubsub(subscription='a_dicom_store')
+ results = pubsub | FormatToQido()
+ success = results | 'filter message' >> beam.Filter(lambda x: x['success'])
+ qido_dict = success | 'get qido request' >> beam.Map(lambda x: x['result'])
+ metadata = qido_dict | DicomSearch()
+
+In the example above, the pipeline is listening to a pubsub topic and waiting
+for messages from DICOM API. When a new DICOM file comes into the storage, the
+pipeline will receive a pubsub message, convert it to a Qido request dict and
+feed it to DicomSearch() PTransform. As a result, users can get the metadata for
+every new DICOM file. Note that not every pubsub message received is from DICOM
+API, so we to filter the results first.
+
+Store a DICOM file in a DICOM storage
+===================================================
+UploadToDicomStore() wraps store request API and users can use it to send a
+DICOM file to a DICOM store. It supports two types of input: 1.file data in
+byte[] 2.fileio object. Users should set the 'input_type' when initialzing
+this PTransform. Here are the examples:
+
+ with Pipeline() as p:
+ input_dict = {'project_id': 'abc123', 'type': 'instances',...}
+ path = "gcs://bucketname/something/a.dcm"
+ match = p | fileio.MatchFiles(path)
+ fileio_obj = match | fileio.ReadAll()
+ results = fileio_obj | UploadToDicomStore(input_dict, 'fileio')
+
+ with Pipeline() as p:
+ input_dict = {'project_id': 'abc123', 'type': 'instances',...}
+ f = open("abc.dcm", "rb")
+ dcm_file = f.read()
+ byte_file = p | 'create byte file' >> beam.Create([dcm_file])
+ results = byte_file | UploadToDicomStore(input_dict, 'bytes')
+
+The first example uses a PCollection of fileio objects as input.
+UploadToDicomStore will read DICOM files from the objects and send them
+to a DICOM storage.
+The second example uses a PCollection of byte[] as input. UploadToDicomStore
+will directly send those DICOM files to a DICOM storage.
+Users can also get the operation results in the output PCollection if they want
+to handle the failed store requests.
+"""
+
+# pytype: skip-file
+from __future__ import absolute_import
+
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import as_completed
+
+import apache_beam as beam
+from apache_beam.io.gcp.dicomclient import DicomApiHttpClient
+from apache_beam.transforms import PTransform
+
+
+class DicomSearch(PTransform):
+ """A PTransform used for retrieving DICOM instance metadata from Google
+ Cloud DICOM store. It takes a PCollection of dicts as input and return
+ a PCollection of dict as results:
+ INPUT:
+ The input dict represents DICOM web path parameters, which has the following
+ string keys and values:
+ {
+ 'project_id': str,
+ 'region': str,
+ 'dataset_id': str,
+ 'dicom_store_id': str,
+ 'search_type': str,
+ 'params': dict(str,str) (Optional),
+ }
+
+ Key-value pairs:
+ project_id: Id of the project in which the DICOM store is
+ located. (Required)
+ region: Region where the DICOM store resides. (Required)
+ dataset_id: Id of the dataset where DICOM store belongs to. (Required)
+ dicom_store_id: Id of the dicom store. (Required)
+ search_type: Which type of search it is, could only be one of the three
+ values: 'instances', 'series', or 'studies'. (Required)
+ params: A dict of str:str pairs used to refine QIDO search. (Optional)
+ Supported tags in three categories:
+ 1.Studies:
+ * StudyInstanceUID,
+ * PatientName,
+ * PatientID,
+ * AccessionNumber,
+ * ReferringPhysicianName,
+ * StudyDate,
+ 2.Series: all study level search terms and
+ * SeriesInstanceUID,
+ * Modality,
+ 3.Instances: all study/series level search terms and
+ * SOPInstanceUID,
+
+ e.g. {"StudyInstanceUID":"1","SeriesInstanceUID":"2"}
+
+ OUTPUT:
+ The output dict wraps results as well as error messages:
+ {
+ 'result': a list of dicts in JSON style.
+ 'success': boolean value telling whether the operation is successful.
+ 'input': detail ids and dicomweb path for this retrieval.
+ 'status': status code from the server, used as error message.
+ }
+
+ """
+ def __init__(
+ self, buffer_size=8, max_workers=5, client=None, credential=None):
+ """Initializes DicomSearch.
+ Args:
+ buffer_size: # type: Int. Size of the request buffer.
+ max_workers: # type: Int. Maximum number of threads a worker can
+ create. If it is set to one, all the request will be processed
+ sequentially in a worker.
+ client: # type: object. If it is specified, all the Api calls will
+ made by this client instead of the default one (DicomApiHttpClient).
+ credential: # type: Google credential object, if it is specified, the
+ Http client will use it to create sessions instead of the default.
+ """
+ self.buffer_size = buffer_size
+ self.max_workers = max_workers
+ self.client = client or DicomApiHttpClient()
+ self.credential = credential
+
+ def expand(self, pcoll):
+ return pcoll | beam.ParDo(
+ _QidoReadFn(
+ self.buffer_size, self.max_workers, self.client, self.credential))
+
+
+class _QidoReadFn(beam.DoFn):
+ """A DoFn for executing every qido query request."""
+ def __init__(self, buffer_size, max_workers, client, credential=None):
+ self.buffer_size = buffer_size
+ self.max_workers = max_workers
+ self.client = client
+ self.credential = credential
+
+ def start_bundle(self):
+ self.buffer = []
+
+ def finish_bundle(self):
+ for item in self._flush():
+ yield item
+
+ def validate_element(self, element):
+ # Check if all required keys present.
+ required_keys = [
+ 'project_id', 'region', 'dataset_id', 'dicom_store_id', 'search_type'
+ ]
+
+ for key in required_keys:
+ if key not in element:
+ error_message = 'Must have %s in the dict.' % (key)
+ return False, error_message
+
+ # Check if return type is correct.
+ if element['search_type'] in ['instances', "studies", "series"]:
+ return True, None
+ else:
+ error_message = (
+ 'Search type can only be "studies", '
+ '"instances" or "series"')
+ return False, error_message
+
+ def process(
+ self,
+ element,
+ window=beam.DoFn.WindowParam,
+ timestamp=beam.DoFn.TimestampParam):
+ # Check if the element is valid
+ valid, error_message = self.validate_element(element)
+
+ if valid:
+ self.buffer.append((element, window, timestamp))
+ if len(self.buffer) >= self.buffer_size:
+ for item in self._flush():
+ yield item
+ else:
+ # Return this when the input dict dose not meet the requirements
+ out = {}
+ out['result'] = []
+ out['status'] = error_message
+ out['input'] = element
+ out['success'] = False
+ yield out
+
+ def make_request(self, element):
+ # Sending Qido request to DICOM Api
+ project_id = element['project_id']
+ region = element['region']
+ dataset_id = element['dataset_id']
+ dicom_store_id = element['dicom_store_id']
+ search_type = element['search_type']
+ params = element['params'] if 'params' in element else None
+
+ # Call qido search http client
+ result, status_code = self.client.qido_search(
+ project_id, region, dataset_id, dicom_store_id,
+ search_type, params, self.credential
+ )
+
+ out = {}
+ out['result'] = result
+ out['status'] = status_code
+ out['input'] = element
+ out['success'] = (status_code == 200)
+ return out
+
+ def process_buffer_element(self, buffer_element):
+ # Thread job runner - each thread makes a Qido search request
+ value = self.make_request(buffer_element[0])
+ windows = [buffer_element[1]]
+ timestamp = buffer_element[2]
+ return beam.utils.windowed_value.WindowedValue(
+ value=value, timestamp=timestamp, windows=windows)
+
+ def _flush(self):
+ # Create thread pool executor and process the buffered elements in paralllel
+ executor = ThreadPoolExecutor(max_workers=self.max_workers)
+ futures = [
+ executor.submit(self.process_buffer_element, ele) for ele in self.buffer
+ ]
+ self.buffer = []
+ for f in as_completed(futures):
+ yield f.result()
+
+
+class FormatToQido(PTransform):
+ """A PTransform for converting pubsub messages into search input dict.
+ Takes PCollection of string as input and returns a PCollection of dict as
+ results. Note that some pubsub messages may not be from DICOM API, which
+ will be recorded as failed conversions.
+ INPUT:
+ The input are normally strings from Pubsub topic:
+ "projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID/
+ dicomStores/DICOM_STORE_ID/dicomWeb/studies/STUDY_UID/
+ series/SERIES_UID/instances/INSTANCE_UID"
+
+ OUTPUT:
+ The output dict encodes results as well as error messages:
+ {
+ 'result': a dict representing instance level qido search request.
+ 'success': boolean value telling whether the conversion is successful.
+ 'input': input pubsub message string.
+ }
+
+ """
+ def __init__(self, credential=None):
+ """Initializes FormatToQido.
+ Args:
+ credential: # type: Google credential object, if it is specified, the
+ Http client will use it instead of the default one.
+ """
+ self.credential = credential
+
+ def expand(self, pcoll):
+ return pcoll | beam.ParDo(_ConvertStringToQido())
+
+
+class _ConvertStringToQido(beam.DoFn):
+ """A DoFn for converting pubsub string to qido search parameters."""
+ def process(self, element):
+ # Some constants for DICOM pubsub message
+ NUM_PUBSUB_STR_ENTRIES = 15
+ NUM_DICOM_WEBPATH_PARAMETERS = 5
+ NUM_TOTAL_PARAMETERS = 8
+ INDEX_PROJECT_ID = 1
+ INDEX_REGION = 3
+ INDEX_DATASET_ID = 5
+ INDEX_DICOMSTORE_ID = 7
+ INDEX_STUDY_ID = 10
+ INDEX_SERIE_ID = 12
+ INDEX_INSTANCE_ID = 14
+
+ entries = element.split('/')
+
+ # Output dict with error message, used when
+ # receiving invalid pubsub string.
+ error_dict = {}
+ error_dict['result'] = {}
+ error_dict['input'] = element
+ error_dict['success'] = False
+
+ if len(entries) != NUM_PUBSUB_STR_ENTRIES:
+ return [error_dict]
+
+ required_keys = [
+ 'projects',
+ 'locations',
+ 'datasets',
+ 'dicomStores',
+ 'dicomWeb',
+ 'studies',
+ 'series',
+ 'instances'
+ ]
+
+ # Check if the required keys present and
+ # the positions of those keys are correct
+ for i in range(NUM_DICOM_WEBPATH_PARAMETERS):
+ if required_keys[i] != entries[i * 2]:
+ return [error_dict]
+ for i in range(NUM_DICOM_WEBPATH_PARAMETERS, NUM_TOTAL_PARAMETERS):
+ if required_keys[i] != entries[i * 2 - 1]:
+ return [error_dict]
+
+ # Compose dicom webpath parameters for qido search
+ qido_dict = {}
+ qido_dict['project_id'] = entries[INDEX_PROJECT_ID]
+ qido_dict['region'] = entries[INDEX_REGION]
+ qido_dict['dataset_id'] = entries[INDEX_DATASET_ID]
+ qido_dict['dicom_store_id'] = entries[INDEX_DICOMSTORE_ID]
+ qido_dict['search_type'] = 'instances'
+
+ # Compose instance level params for qido search
+ params = {}
+ params['StudyInstanceUID'] = entries[INDEX_STUDY_ID]
+ params['SeriesInstanceUID'] = entries[INDEX_SERIE_ID]
+ params['SOPInstanceUID'] = entries[INDEX_INSTANCE_ID]
+ qido_dict['params'] = params
+
+ out = {}
+ out['result'] = qido_dict
+ out['input'] = element
+ out['success'] = True
+
+ return [out]
+
+
+class UploadToDicomStore(PTransform):
+ """A PTransform for storing instances to a DICOM store.
+ Takes PCollection of byte[] as input and return a PCollection of dict as
+ results. The inputs are normally DICOM file in bytes or str filename.
+ INPUT:
+ This PTransform supports two types of input:
+ 1. Byte[]: representing dicom file.
+ 2. Fileio object: stream file object.
+
+ OUTPUT:
+ The output dict encodes status as well as error messages:
+ {
+ 'success': boolean value telling whether the store is successful.
+ 'input': undeliverable data. Exactly the same as the input,
+ only set if the operation is failed.
+ 'status': status code from the server, used as error messages.
+ }
+
+ """
+ def __init__(
+ self,
+ destination_dict,
+ input_type,
+ buffer_size=8,
+ max_workers=5,
+ client=None,
+ credential=None):
+ """Initializes UploadToDicomStore.
+ Args:
+ destination_dict: # type: python dict, encodes DICOM endpoint information:
+ {
+ 'project_id': str,
+ 'region': str,
+ 'dataset_id': str,
+ 'dicom_store_id': str,
+ }
+
+ Key-value pairs:
+ * project_id: Id of the project in which DICOM store locates. (Required)
+ * region: Region where the DICOM store resides. (Required)
+ * dataset_id: Id of the dataset where DICOM store belongs to. (Required)
+ * dicom_store_id: Id of the dicom store. (Required)
+
+ input_type: # type: string, could only be 'bytes' or 'fileio'
+ buffer_size: # type: Int. Size of the request buffer.
+ max_workers: # type: Int. Maximum number of threads a worker can
+ create. If it is set to one, all the request will be processed
+ sequentially in a worker.
+ client: # type: object. If it is specified, all the Api calls will
+ made by this client instead of the default one (DicomApiHttpClient).
+ credential: # type: Google credential object, if it is specified, the
+ Http client will use it instead of the default one.
+ """
+ self.destination_dict = destination_dict
+ # input_type pre-check
+ if input_type not in ['bytes', 'fileio']:
+ raise ValueError("input_type could only be 'bytes' or 'fileio'")
+ self.input_type = input_type
+ self.buffer_size = buffer_size
+ self.max_workers = max_workers
+ self.client = client
+ self.credential = credential
+
+ def expand(self, pcoll):
+ return pcoll | beam.ParDo(
+ _StoreInstance(
+ self.destination_dict,
+ self.input_type,
+ self.buffer_size,
+ self.max_workers,
+ self.client,
+ self.credential))
+
+
+class _StoreInstance(beam.DoFn):
+ """A DoFn read or fetch dicom files then push it to a dicom store."""
+ def __init__(
+ self,
+ destination_dict,
+ input_type,
+ buffer_size,
+ max_workers,
+ client,
+ credential=None):
+ # pre-check destination dict
+ required_keys = ['project_id', 'region', 'dataset_id', 'dicom_store_id']
+ for key in required_keys:
+ if key not in destination_dict:
+ raise ValueError('Must have %s in the dict.' % (key))
+ self.destination_dict = destination_dict
+ self.input_type = input_type
+ self.buffer_size = buffer_size
+ self.max_workers = max_workers
+ self.client = client
+ self.credential = credential
+
+ def start_bundle(self):
+ self.buffer = []
+
+ def finish_bundle(self):
+ for item in self._flush():
+ yield item
+
+ def process(
+ self,
+ element,
+ window=beam.DoFn.WindowParam,
+ timestamp=beam.DoFn.TimestampParam):
+ self.buffer.append((element, window, timestamp))
+ if len(self.buffer) >= self.buffer_size:
+ for item in self._flush():
+ yield item
+
+ def make_request(self, dicom_file):
+ # Send file to DICOM store and records the results.
+ project_id = self.destination_dict['project_id']
+ region = self.destination_dict['region']
+ dataset_id = self.destination_dict['dataset_id']
+ dicom_store_id = self.destination_dict['dicom_store_id']
+
+ # Feed the dicom file into store client
+ if self.client:
+ _, status_code = self.client.dicomweb_store_instance(
+ project_id, region, dataset_id, dicom_store_id, dicom_file,
+ self.credential
+ )
+ else:
+ _, status_code = DicomApiHttpClient().dicomweb_store_instance(
+ project_id, region, dataset_id, dicom_store_id, dicom_file,
+ self.credential
+ )
+
+ out = {}
+ out['status'] = status_code
+ out['success'] = (status_code == 200)
+ return out
+
+ def read_dicom_file(self, buffer_element):
+ # Read the file based on different input. If the read fails ,return
+ # an error dict which records input and error messages.
+ try:
+ if self.input_type == 'fileio':
+ f = buffer_element.open()
+ data = f.read()
+ f.close()
+ return True, data
+ else:
+ return True, buffer_element
+ except Exception as error_message:
+ error_out = {}
+ error_out['status'] = error_message
+ error_out['success'] = False
+ return False, error_out
+
+ def process_buffer_element(self, buffer_element):
+ # Thread job runner - each thread stores a DICOM file
+ success, read_result = self.read_dicom_file(buffer_element[0])
+ windows = [buffer_element[1]]
+ timestamp = buffer_element[2]
+ value = None
+ if success:
+ value = self.make_request(read_result)
+ else:
+ value = read_result
+ # save the undeliverable data
+ if not value['success']:
+ value['input'] = buffer_element[0]
+ return beam.utils.windowed_value.WindowedValue(
+ value=value, timestamp=timestamp, windows=windows)
+
+ def _flush(self):
+ # Create thread pool executor and process the buffered elements in paralllel
+ executor = ThreadPoolExecutor(max_workers=self.max_workers)
+ futures = [
+ executor.submit(self.process_buffer_element, ele) for ele in self.buffer
+ ]
+ self.buffer = []
+ for f in as_completed(futures):
+ yield f.result()
diff --git a/sdks/python/apache_beam/io/gcp/dicomio_test.py b/sdks/python/apache_beam/io/gcp/dicomio_test.py
new file mode 100644
index 0000000..2594e45
--- /dev/null
+++ b/sdks/python/apache_beam/io/gcp/dicomio_test.py
@@ -0,0 +1,468 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Unit tests for Dicom IO connectors."""
+
+# pytype: skip-file
+
+from __future__ import absolute_import
+
+import json
+import os
+import unittest
+
+# patches unittest.TestCase to be python3 compatible
+import future.tests.base # pylint: disable=unused-import
+from mock import patch
+
+import apache_beam as beam
+from apache_beam.io import fileio
+from apache_beam.io.filebasedsink_test import _TestCaseWithTempDirCleanUp
+from apache_beam.io.filesystems import FileSystems
+from apache_beam.testing.test_pipeline import TestPipeline
+from apache_beam.testing.util import assert_that
+from apache_beam.testing.util import equal_to
+
+# Protect against environments where gcp library is not available.
+# pylint: disable=wrong-import-order, wrong-import-position
+try:
+ from apache_beam.io.gcp.dicomio import DicomSearch
+ from apache_beam.io.gcp.dicomio import FormatToQido
+ from apache_beam.io.gcp.dicomio import UploadToDicomStore
+except ImportError:
+ DicomSearch = None # type: ignore
+# pylint: enable=wrong-import-order, wrong-import-position
+
+
+class FakeHttpClient():
+ # a fake http client that talks directly to a in-memory dicom store
+ def __init__(self):
+ # set 5 fake dicom instances
+ dicom_metadata = []
+ dicom_metadata.append({
+ 'PatientName': 'Albert', 'Age': 21, 'TestResult': 'Positive'
+ })
+ dicom_metadata.append({
+ 'PatientName': 'Albert', 'Age': 21, 'TestResult': 'Negative'
+ })
+ dicom_metadata.append({
+ 'PatientName': 'Brian', 'Age': 20, 'TestResult': 'Positive'
+ })
+ dicom_metadata.append({
+ 'PatientName': 'Colin', 'Age': 25, 'TestResult': 'Negative'
+ })
+ dicom_metadata.append({
+ 'PatientName': 'Daniel', 'Age': 22, 'TestResult': 'Negative'
+ })
+ dicom_metadata.append({
+ 'PatientName': 'Eric', 'Age': 50, 'TestResult': 'Negative'
+ })
+ self.dicom_metadata = dicom_metadata
+ # ids for this dicom sotre
+ self.project_id = 'test_project'
+ self.region = 'test_region'
+ self.dataset_id = 'test_dataset_id'
+ self.dicom_store_id = 'test_dicom_store_id'
+
+ def qido_search(
+ self,
+ project_id,
+ region,
+ dataset_id,
+ dicom_store_id,
+ search_type,
+ params=None,
+ credential=None):
+ # qido search function for this fake client
+ if project_id != self.project_id or region != self.region or \
+ dataset_id != self.dataset_id or dicom_store_id != self.dicom_store_id:
+ return [], 204
+ # only supports all instance search in this client
+ if not params:
+ return self.dicom_metadata, 200
+ # only supports patient name filter in this client
+ patient_name = params['PatientName']
+ out = []
+ for meta in self.dicom_metadata:
+ if meta['PatientName'] == patient_name:
+ out.append(meta)
+ return out, 200
+
+ def dicomweb_store_instance(
+ self,
+ project_id,
+ region,
+ dataset_id,
+ dicom_store_id,
+ dcm_file,
+ credential=None):
+ if project_id != self.project_id or region != self.region or \
+ dataset_id != self.dataset_id or dicom_store_id != self.dicom_store_id:
+ return [], 204
+ # convert the bytes file back to dict
+ string_array = dcm_file.decode('utf-8')
+ metadata_dict = json.loads(string_array)
+ self.dicom_metadata.append(metadata_dict)
+ return None, 200
+
+
+@unittest.skipIf(DicomSearch is None, 'GCP dependencies are not installed')
+class TestFormatToQido(unittest.TestCase):
+ valid_pubsub_string = (
+ "projects/PROJECT_ID/locations/LOCATION/datasets"
+ "/DATASET_ID/dicomStores/DICOM_STORE_ID/dicomWeb/"
+ "studies/STUDY_UID/series/SERIES_UID/instances/INSTANCE_UID")
+ expected_valid_pubsub_dict = {
+ 'result': {
+ 'project_id': 'PROJECT_ID',
+ 'region': 'LOCATION',
+ 'dataset_id': 'DATASET_ID',
+ 'dicom_store_id': 'DICOM_STORE_ID',
+ 'search_type': 'instances',
+ 'params': {
+ 'StudyInstanceUID': 'STUDY_UID',
+ 'SeriesInstanceUID': 'SERIES_UID',
+ 'SOPInstanceUID': 'INSTANCE_UID'
+ }
+ },
+ 'input': valid_pubsub_string,
+ 'success': True
+ }
+ invalid_pubsub_string = "this is not a valid pubsub message"
+ expected_invalid_pubsub_dict = {
+ 'result': {},
+ 'input': 'this is not a valid pubsub message',
+ 'success': False
+ }
+
+ def test_normal_convert(self):
+ with TestPipeline() as p:
+ convert_result = (
+ p
+ | beam.Create([self.valid_pubsub_string])
+ | FormatToQido())
+ assert_that(convert_result, equal_to([self.expected_valid_pubsub_dict]))
+
+ def test_failed_convert(self):
+ with TestPipeline() as p:
+ convert_result = (
+ p
+ | beam.Create([self.invalid_pubsub_string])
+ | FormatToQido())
+ assert_that(convert_result, equal_to([self.expected_invalid_pubsub_dict]))
+
+
+@unittest.skipIf(DicomSearch is None, 'GCP dependencies are not installed')
+class TestDicomSearch(unittest.TestCase):
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_successful_search(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+ input_dict['search_type'] = "instances"
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+
+ expected_dict = {}
+ expected_dict['result'] = fc.dicom_metadata
+ expected_dict['status'] = 200
+ expected_dict['input'] = input_dict
+ expected_dict['success'] = True
+
+ with TestPipeline() as p:
+ results = (p | beam.Create([input_dict]) | DicomSearch())
+ assert_that(results, equal_to([expected_dict]))
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_Qido_search_small_buffer_flush(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+ input_dict['search_type'] = "instances"
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+
+ expected_dict = {}
+ expected_dict['result'] = fc.dicom_metadata
+ expected_dict['status'] = 200
+ expected_dict['input'] = input_dict
+ expected_dict['success'] = True
+
+ with TestPipeline() as p:
+ results = (p | beam.Create([input_dict] * 5) | DicomSearch(buffer_size=1))
+ assert_that(results, equal_to([expected_dict] * 5))
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_param_dict_passing(self, FakeClient):
+ input_dict = {}
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+ input_dict['search_type'] = "instances"
+ input_dict['params'] = {'PatientName': 'Brian'}
+
+ expected_dict = {}
+ expected_dict['result'] = [{
+ 'PatientName': 'Brian', 'Age': 20, 'TestResult': 'Positive'
+ }]
+ expected_dict['status'] = 200
+ expected_dict['input'] = input_dict
+ expected_dict['success'] = True
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+ with TestPipeline() as p:
+ results = (p | beam.Create([input_dict]) | DicomSearch())
+ assert_that(results, equal_to([expected_dict]))
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_wrong_input_type(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+ input_dict['search_type'] = "not exist type"
+
+ expected_invalid_dict = {}
+ expected_invalid_dict['result'] = []
+ expected_invalid_dict[
+ 'status'] = 'Search type can only be "studies", "instances" or "series"'
+ expected_invalid_dict['input'] = input_dict
+ expected_invalid_dict['success'] = False
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+ with TestPipeline() as p:
+ results = (p | beam.Create([input_dict]) | DicomSearch())
+ assert_that(results, equal_to([expected_invalid_dict]))
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_missing_parameters(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+
+ expected_invalid_dict = {}
+ expected_invalid_dict['result'] = []
+ expected_invalid_dict['status'] = 'Must have dataset_id in the dict.'
+ expected_invalid_dict['input'] = input_dict
+ expected_invalid_dict['success'] = False
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+ with TestPipeline() as p:
+ results = (p | beam.Create([input_dict]) | DicomSearch())
+ assert_that(results, equal_to([expected_invalid_dict]))
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_client_search_notfound(self, FakeClient):
+ input_dict = {}
+ # search instances in a not exist store
+ input_dict['project_id'] = "wrong_project"
+ input_dict['region'] = "wrong_region"
+ input_dict['dataset_id'] = "wrong_dataset_id"
+ input_dict['dicom_store_id'] = "wrong_dicom_store_id"
+ input_dict['search_type'] = "instances"
+
+ expected_invalid_dict = {}
+ expected_invalid_dict['result'] = []
+ expected_invalid_dict['status'] = 204
+ expected_invalid_dict['input'] = input_dict
+ expected_invalid_dict['success'] = False
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+ with TestPipeline() as p:
+ results = (p | beam.Create([input_dict]) | DicomSearch())
+ assert_that(results, equal_to([expected_invalid_dict]))
+
+
+@unittest.skipIf(DicomSearch is None, 'GCP dependencies are not installed')
+class TestDicomStoreInstance(_TestCaseWithTempDirCleanUp):
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_store_byte_file(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+
+ dict_input = {'PatientName': 'George', 'Age': 23, 'TestResult': 'Negative'}
+ str_input = json.dumps(dict_input)
+ bytes_input = bytes(str_input.encode("utf-8"))
+ with TestPipeline() as p:
+ results = (
+ p
+ | beam.Create([bytes_input])
+ | UploadToDicomStore(input_dict, 'bytes')
+ | beam.Map(lambda x: x['success']))
+ assert_that(results, equal_to([True]))
+ self.assertTrue(dict_input in fc.dicom_metadata)
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_store_byte_file_small_buffer_flush(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+
+ dict_input_1 = {
+ 'PatientName': 'George', 'Age': 23, 'TestResult': 'Negative'
+ }
+ str_input_1 = json.dumps(dict_input_1)
+ bytes_input_1 = bytes(str_input_1.encode("utf-8"))
+ dict_input_2 = {'PatientName': 'Peter', 'Age': 54, 'TestResult': 'Positive'}
+ str_input_2 = json.dumps(dict_input_2)
+ bytes_input_2 = bytes(str_input_2.encode("utf-8"))
+ dict_input_3 = {'PatientName': 'Zen', 'Age': 27, 'TestResult': 'Negative'}
+ str_input_3 = json.dumps(dict_input_3)
+ bytes_input_3 = bytes(str_input_3.encode("utf-8"))
+ with TestPipeline() as p:
+ results = (
+ p
+ | beam.Create([bytes_input_1, bytes_input_2, bytes_input_3])
+ | UploadToDicomStore(input_dict, 'bytes', buffer_size=1)
+ | beam.Map(lambda x: x['success']))
+ assert_that(results, equal_to([True] * 3))
+ self.assertTrue(dict_input_1 in fc.dicom_metadata)
+ self.assertTrue(dict_input_2 in fc.dicom_metadata)
+ self.assertTrue(dict_input_3 in fc.dicom_metadata)
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_store_fileio_file(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+
+ dict_input = {'PatientName': 'George', 'Age': 23, 'TestResult': 'Negative'}
+ str_input = json.dumps(dict_input)
+ temp_dir = '%s%s' % (self._new_tempdir(), os.sep)
+ self._create_temp_file(dir=temp_dir, content=str_input)
+
+ with TestPipeline() as p:
+ results = (
+ p
+ | beam.Create([FileSystems.join(temp_dir, '*')])
+ | fileio.MatchAll()
+ | fileio.ReadMatches()
+ | UploadToDicomStore(input_dict, 'fileio')
+ | beam.Map(lambda x: x['success']))
+ assert_that(results, equal_to([True]))
+ self.assertTrue(dict_input in fc.dicom_metadata)
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_store_fileio_file_small_buffer_flush(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+ input_dict['dataset_id'] = "test_dataset_id"
+ input_dict['dicom_store_id'] = "test_dicom_store_id"
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+
+ temp_dir = '%s%s' % (self._new_tempdir(), os.sep)
+ dict_input_1 = {
+ 'PatientName': 'George', 'Age': 23, 'TestResult': 'Negative'
+ }
+ str_input_1 = json.dumps(dict_input_1)
+ self._create_temp_file(dir=temp_dir, content=str_input_1)
+ dict_input_2 = {'PatientName': 'Peter', 'Age': 54, 'TestResult': 'Positive'}
+ str_input_2 = json.dumps(dict_input_2)
+ self._create_temp_file(dir=temp_dir, content=str_input_2)
+ dict_input_3 = {'PatientName': 'Zen', 'Age': 27, 'TestResult': 'Negative'}
+ str_input_3 = json.dumps(dict_input_3)
+ self._create_temp_file(dir=temp_dir, content=str_input_3)
+
+ with TestPipeline() as p:
+ results = (
+ p
+ | beam.Create([FileSystems.join(temp_dir, '*')])
+ | fileio.MatchAll()
+ | fileio.ReadMatches()
+ | UploadToDicomStore(input_dict, 'fileio', buffer_size=1)
+ | beam.Map(lambda x: x['success']))
+ assert_that(results, equal_to([True] * 3))
+ self.assertTrue(dict_input_1 in fc.dicom_metadata)
+ self.assertTrue(dict_input_2 in fc.dicom_metadata)
+ self.assertTrue(dict_input_3 in fc.dicom_metadata)
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_destination_notfound(self, FakeClient):
+ input_dict = {}
+ # search instances in a not exist store
+ input_dict['project_id'] = "wrong_project"
+ input_dict['region'] = "wrong_region"
+ input_dict['dataset_id'] = "wrong_dataset_id"
+ input_dict['dicom_store_id'] = "wrong_dicom_store_id"
+
+ expected_invalid_dict = {}
+ expected_invalid_dict['status'] = 204
+ expected_invalid_dict['input'] = ''
+ expected_invalid_dict['success'] = False
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+ with TestPipeline() as p:
+ results = (
+ p | beam.Create(['']) | UploadToDicomStore(input_dict, 'bytes'))
+ assert_that(results, equal_to([expected_invalid_dict]))
+
+ @patch("apache_beam.io.gcp.dicomio.DicomApiHttpClient")
+ def test_missing_parameters(self, FakeClient):
+ input_dict = {}
+ input_dict['project_id'] = "test_project"
+ input_dict['region'] = "test_region"
+
+ expected_invalid_dict = {}
+ expected_invalid_dict['result'] = []
+ expected_invalid_dict['status'] = 'Must have dataset_id in the dict.'
+ expected_invalid_dict['input'] = input_dict
+ expected_invalid_dict['success'] = False
+
+ fc = FakeHttpClient()
+ FakeClient.return_value = fc
+ with self.assertRaisesRegex(ValueError,
+ "Must have dataset_id in the dict."):
+ p = TestPipeline()
+ _ = (p | beam.Create(['']) | UploadToDicomStore(input_dict, 'bytes'))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/sdks/python/apache_beam/io/kafka.py b/sdks/python/apache_beam/io/kafka.py
index 4336bec..dc75b73 100644
--- a/sdks/python/apache_beam/io/kafka.py
+++ b/sdks/python/apache_beam/io/kafka.py
@@ -97,6 +97,9 @@
('topics', typing.List[unicode]),
('key_deserializer', unicode),
('value_deserializer', unicode),
+ ('start_read_time', typing.Optional[int]),
+ ('max_num_records', typing.Optional[int]),
+ ('max_read_time', typing.Optional[int]),
])
@@ -125,24 +128,30 @@
topics,
key_deserializer=byte_array_deserializer,
value_deserializer=byte_array_deserializer,
- expansion_service=None):
+ start_read_time=None,
+ max_num_records=None,
+ max_read_time=None,
+ expansion_service=None,
+ ):
"""
Initializes a read operation from Kafka.
:param consumer_config: A dictionary containing the consumer configuration.
:param topics: A list of topic strings.
:param key_deserializer: A fully-qualified Java class name of a Kafka
- Deserializer for the topic's key, e.g.
- 'org.apache.kafka.common.
- serialization.LongDeserializer'.
- Default: 'org.apache.kafka.common.
- serialization.ByteArrayDeserializer'.
+ Deserializer for the topic's key, e.g.
+ 'org.apache.kafka.common.serialization.LongDeserializer'.
+ Default: 'org.apache.kafka.common.serialization.ByteArrayDeserializer'.
:param value_deserializer: A fully-qualified Java class name of a Kafka
- Deserializer for the topic's value, e.g.
- 'org.apache.kafka.common.
- serialization.LongDeserializer'.
- Default: 'org.apache.kafka.common.
- serialization.ByteArrayDeserializer'.
+ Deserializer for the topic's value, e.g.
+ 'org.apache.kafka.common.serialization.LongDeserializer'.
+ Default: 'org.apache.kafka.common.serialization.ByteArrayDeserializer'.
+ :param start_read_time: Use timestamp to set up start offset in milliseconds
+ epoch.
+ :param max_num_records: Maximum amount of records to be read. Mainly used
+ for tests and demo applications.
+ :param max_read_time: Maximum amount of time in seconds the transform
+ executes. Mainly used for tests and demo applications.
:param expansion_service: The address (host:port) of the ExpansionService.
"""
super(ReadFromKafka, self).__init__(
@@ -153,6 +162,9 @@
topics=topics,
key_deserializer=key_deserializer,
value_deserializer=value_deserializer,
+ max_num_records=max_num_records,
+ max_read_time=max_read_time,
+ start_read_time=start_read_time,
)),
expansion_service or default_io_expansion_service())
@@ -195,17 +207,13 @@
:param producer_config: A dictionary containing the producer configuration.
:param topic: A Kafka topic name.
:param key_deserializer: A fully-qualified Java class name of a Kafka
- Serializer for the topic's key, e.g.
- 'org.apache.kafka.common.
- serialization.LongSerializer'.
- Default: 'org.apache.kafka.common.
- serialization.ByteArraySerializer'.
+ Serializer for the topic's key, e.g.
+ 'org.apache.kafka.common.serialization.LongSerializer'.
+ Default: 'org.apache.kafka.common.serialization.ByteArraySerializer'.
:param value_deserializer: A fully-qualified Java class name of a Kafka
- Serializer for the topic's value, e.g.
- 'org.apache.kafka.common.
- serialization.LongSerializer'.
- Default: 'org.apache.kafka.common.
- serialization.ByteArraySerializer'.
+ Serializer for the topic's value, e.g.
+ 'org.apache.kafka.common.serialization.LongSerializer'.
+ Default: 'org.apache.kafka.common.serialization.ByteArraySerializer'.
:param expansion_service: The address (host:port) of the ExpansionService.
"""
super(WriteToKafka, self).__init__(
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 8f2a9cf..dca3a39 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -592,15 +592,13 @@
return result
def _maybe_add_unified_worker_missing_options(self, options):
- # set default beam_fn_api and use_beam_bq_sink experiment if use unified
+ # set default beam_fn_api experiment if use unified
# worker experiment flag exists, no-op otherwise.
debug_options = options.view_as(DebugOptions)
from apache_beam.runners.dataflow.internal import apiclient
if apiclient._use_unified_worker(options):
if not debug_options.lookup_experiment('beam_fn_api'):
debug_options.add_experiment('beam_fn_api')
- if not debug_options.lookup_experiment('use_beam_bq_sink'):
- debug_options.add_experiment('use_beam_bq_sink')
def _get_typehint_based_encoding(self, typehint, window_coder):
"""Returns an encoding based on a typehint object."""
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
index 1e4f6f9..0315af3 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
@@ -698,6 +698,7 @@
def test_write_bigquery_translation(self):
runner = DataflowRunner()
+ self.default_properties.append('--experiments=use_legacy_bq_sink')
with beam.Pipeline(runner=runner,
options=PipelineOptions(self.default_properties)) as p:
# pylint: disable=expression-not-assigned
@@ -749,12 +750,13 @@
"""Tests that WriteToBigQuery cannot have any consumers if replaced."""
runner = DataflowRunner()
- with self.assertRaises(ValueError):
+ self.default_properties.append('--experiments=use_legacy_bq_sink')
+ with self.assertRaises(Exception):
with beam.Pipeline(runner=runner,
options=PipelineOptions(self.default_properties)) as p:
# pylint: disable=expression-not-assigned
out = p | beam.Create([1]) | beam.io.WriteToBigQuery('some.table')
- out['FailedRows'] | 'MyTransform' >> beam.Map(lambda _: _)
+ out['destination_file_pairs'] | 'MyTransform' >> beam.Map(lambda _: _)
class CustomMergingWindowFn(window.WindowFn):
diff --git a/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py b/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py
index 75b1db0..5743f52 100644
--- a/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py
+++ b/sdks/python/apache_beam/runners/dataflow/ptransform_overrides.py
@@ -236,7 +236,10 @@
self.visit_transform(transform_node)
def visit_transform(self, transform_node):
- if [o for o in self.outputs if o in transform_node.inputs]:
+ # Internal consumers of the outputs we're overriding are expected.
+ # We only error out on non-internal consumers.
+ if ('BigQueryBatchFileLoads' not in transform_node.full_label and
+ [o for o in self.outputs if o in transform_node.inputs]):
raise ValueError(
'WriteToBigQuery was being replaced with the native '
'BigQuerySink, but the transform "{}" has an input which will be '
@@ -250,16 +253,13 @@
# Imported here to avoid circular dependencies.
# pylint: disable=wrong-import-order, wrong-import-position
from apache_beam import io
- from apache_beam.runners.dataflow.internal import apiclient
-
transform = applied_ptransform.transform
if (not isinstance(transform, io.WriteToBigQuery) or
getattr(transform, 'override', False)):
return False
- use_fnapi = apiclient._use_fnapi(self.options)
experiments = self.options.view_as(DebugOptions).experiments or []
- if (use_fnapi or 'use_beam_bq_sink' in experiments):
+ if 'use_legacy_bq_sink' not in experiments:
return False
if transform.schema == io.gcp.bigquery.SCHEMA_AUTODETECT:
diff --git a/sdks/python/apache_beam/runners/direct/evaluation_context.py b/sdks/python/apache_beam/runners/direct/evaluation_context.py
index 100b0e5..48a99bd 100644
--- a/sdks/python/apache_beam/runners/direct/evaluation_context.py
+++ b/sdks/python/apache_beam/runners/direct/evaluation_context.py
@@ -275,6 +275,7 @@
self._metrics = DirectMetrics()
self._lock = threading.Lock()
+ self.shutdown_requested = False
def _initialize_keyed_states(self, root_transforms, value_to_consumers):
"""Initialize user state dicts.
@@ -453,6 +454,9 @@
return self._side_inputs_container.get_value_or_block_until_ready(
side_input, task, block_until)
+ def shutdown(self):
+ self.shutdown_requested = True
+
class DirectUnmergedState(InMemoryUnmergedState):
"""UnmergedState implementation for the DirectRunner."""
diff --git a/sdks/python/apache_beam/runners/direct/executor.py b/sdks/python/apache_beam/runners/direct/executor.py
index b23f385..1c5dff5 100644
--- a/sdks/python/apache_beam/runners/direct/executor.py
+++ b/sdks/python/apache_beam/runners/direct/executor.py
@@ -492,6 +492,8 @@
def request_shutdown(self):
self.executor_service.shutdown()
+ self.executor_service.await_completion()
+ self.evaluation_context.shutdown()
def schedule_consumers(self, committed_bundle):
# type: (_Bundle) -> None
diff --git a/sdks/python/apache_beam/runners/direct/test_stream_impl.py b/sdks/python/apache_beam/runners/direct/test_stream_impl.py
index a32a114..2897f30 100644
--- a/sdks/python/apache_beam/runners/direct/test_stream_impl.py
+++ b/sdks/python/apache_beam/runners/direct/test_stream_impl.py
@@ -283,7 +283,7 @@
channel.put(_EndOfStream())
@staticmethod
- def events_from_rpc(endpoint, output_tags, coder):
+ def events_from_rpc(endpoint, output_tags, coder, evaluation_context):
"""Yields the events received from the given endpoint.
This method starts a new thread that reads from the TestStreamService and
@@ -296,13 +296,16 @@
"""
# Shared variable with the producer queue. This shuts down the producer if
# the consumer exits early.
- is_alive = True
+ shutdown_requested = False
+
+ def is_alive():
+ return not (shutdown_requested or evaluation_context.shutdown_requested)
# The shared queue that allows the producer and consumer to communicate.
channel = Queue() # type: Queue[Union[test_stream.Event, _EndOfStream]]
event_stream = Thread(
target=_TestStream._stream_events_from_rpc,
- args=(endpoint, output_tags, coder, channel, lambda: is_alive))
+ args=(endpoint, output_tags, coder, channel, is_alive))
event_stream.setDaemon(True)
event_stream.start()
@@ -322,7 +325,7 @@
_LOGGER.warning(
'TestStream timed out waiting for new events from service.'
' Stopping pipeline.')
- is_alive = False
+ shutdown_requested = True
raise e
@staticmethod
diff --git a/sdks/python/apache_beam/runners/direct/transform_evaluator.py b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
index 89106ad..e7b969e 100644
--- a/sdks/python/apache_beam/runners/direct/transform_evaluator.py
+++ b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
@@ -221,7 +221,10 @@
# TestStreamService.
if test_stream.endpoint:
_TestStreamEvaluator.event_stream = _TestStream.events_from_rpc(
- test_stream.endpoint, test_stream.output_tags, test_stream.coder)
+ test_stream.endpoint,
+ test_stream.output_tags,
+ test_stream.coder,
+ self._evaluation_context)
else:
_TestStreamEvaluator.event_stream = (
_TestStream.events_from_script(test_stream._events))
diff --git a/sdks/python/apache_beam/runners/interactive/background_caching_job.py b/sdks/python/apache_beam/runners/interactive/background_caching_job.py
index 117cd59..1b05285 100644
--- a/sdks/python/apache_beam/runners/interactive/background_caching_job.py
+++ b/sdks/python/apache_beam/runners/interactive/background_caching_job.py
@@ -291,7 +291,7 @@
'data to start at the same time, all captured data has been '
'cleared and a new segment of data will be recorded.')
- ie.current_env().cleanup()
+ ie.current_env().cleanup(user_pipeline)
ie.current_env().set_cached_source_signature(
user_pipeline, current_signature)
return is_changed
diff --git a/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py b/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py
index 45c65dd..803f6ce 100644
--- a/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py
+++ b/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py
@@ -91,8 +91,6 @@
sys.version_info < (3, 6), 'The tests require at least Python 3.6 to work.')
class BackgroundCachingJobTest(unittest.TestCase):
def tearDown(self):
- for _, job in ie.current_env()._background_caching_jobs.items():
- job.cancel()
ie.new_env()
# TODO(BEAM-8335): remove the patches when there are appropriate test sources
@@ -302,9 +300,11 @@
def test_determine_a_test_stream_service_running(self):
pipeline = _build_an_empty_stream_pipeline()
test_stream_service = TestStreamServiceController(reader=None)
+ test_stream_service.start()
ie.current_env().set_test_stream_service_controller(
pipeline, test_stream_service)
self.assertTrue(bcj.is_a_test_stream_service_running(pipeline))
+ # the test_stream_service will be cleaned up on teardown.
def test_stop_a_running_test_stream_service(self):
pipeline = _build_an_empty_stream_pipeline()
diff --git a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
index d11cb24..ce7ec0e 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pcoll_visualization.py
@@ -47,7 +47,6 @@
from IPython.core.display import Javascript # pylint: disable=import-error
from IPython.core.display import display # pylint: disable=import-error
from IPython.core.display import display_javascript # pylint: disable=import-error
- from IPython.core.display import update_display # pylint: disable=import-error
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator # pylint: disable=import-error
from timeloop import Timeloop # pylint: disable=import-error
@@ -134,6 +133,10 @@
<script>
{script_in_jquery_with_datatable}
</script>"""
+_NO_DATA_TEMPLATE = _CSS + """
+ <div id="no_data_{id}">No data to display.</div>"""
+_NO_DATA_REMOVAL_SCRIPT = """
+ $("#no_data_{id}").remove();"""
def visualize(
@@ -382,16 +385,23 @@
if update and not update._is_datatable_empty:
display_javascript(Javascript(script_in_jquery_with_datatable))
else:
- html = _DATAFRAME_PAGINATION_TEMPLATE.format(
- table_id=table_id,
- script_in_jquery_with_datatable=script_in_jquery_with_datatable)
+ if data.empty:
+ html = _NO_DATA_TEMPLATE.format(id=table_id)
+ else:
+ html = _DATAFRAME_PAGINATION_TEMPLATE.format(
+ table_id=table_id,
+ script_in_jquery_with_datatable=script_in_jquery_with_datatable)
if update:
if not data.empty:
- # Re-initialize a datatable to replace the existing empty datatable.
- update_display(HTML(html), display_id=update._df_display_id)
+ # Initialize a datatable to replace the existing no data div.
+ display(
+ Javascript(
+ ie._JQUERY_WITH_DATATABLE_TEMPLATE.format(
+ customized_script=_NO_DATA_REMOVAL_SCRIPT.format(
+ id=table_id))))
+ display(HTML(html), display_id=update._df_display_id)
update._is_datatable_empty = False
else:
- # Initialize a datatable for the first time rendering.
display(HTML(html), display_id=self._df_display_id)
if not data.empty:
self._is_datatable_empty = False
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/README.md b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/README.md
index 07cd934..f87765a 100644
--- a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/README.md
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/README.md
@@ -66,6 +66,32 @@
Now every change will be built locally and bundled into JupyterLab. Be sure to refresh your browser page after saving file changes to reload the extension (note: you'll need to wait for webpack to finish, which can take 10s+ at times).
+### Test
+
+To run all tests, under `apache-beam-jupyterlab-sidepanel` directory, simply do:
+
+```bash
+# Make sure all dependencies are installed.
+jlpm
+
+# Run all tests.
+jlpm jest
+```
+
+This project uses `ts-jest` to test all ts/tsx files under `src/__tests__` directory.
+
+To run a single test, find out the name of a test in the source code that looks like:
+
+```javascript
+it('does ABC', () => {...})
+```
+
+Then run:
+
+```bash
+jlpm jest -t 'does ABC'
+```
+
### Format and lint
The project uses prettier for formatting and eslint for lint.
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/jest.config.js b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/jest.config.js
new file mode 100644
index 0000000..c916ee9
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/jest.config.js
@@ -0,0 +1,32 @@
+// Licensed under the Apache License, Version 2.0 (the 'License'); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+/**
+ * Configures jest test framework.
+ */
+
+module.exports = {
+ "roots": [
+ "<rootDir>/src"
+ ],
+ "testMatch": [
+ "<rootDir>/src/__tests__/**/*.test.(ts|tsx)"
+ ],
+ // Use ts-jest to test ts and tsx files.
+ "transform": {
+ "^.+\\.(ts|tsx)$": "ts-jest"
+ },
+ // Use identity-obj-proxy to load css and less files in tests.
+ "moduleNameMapper": {
+ "\\.(css|less)$": "identity-obj-proxy"
+ }
+}
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/package.json b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/package.json
index f82474c..6b568a4 100644
--- a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/package.json
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/package.json
@@ -36,13 +36,17 @@
"@jupyterlab/application": "^2.0.0"
},
"devDependencies": {
+ "@types/jest": "^26.0.7",
"@typescript-eslint/eslint-plugin": "^2.25.0",
"@typescript-eslint/parser": "^2.25.0",
"eslint": "^6.8.0",
"eslint-config-prettier": "^6.10.1",
"eslint-plugin-prettier": "^3.1.2",
- "prettier": "1.16.4",
+ "identity-obj-proxy": "^3.0.0",
+ "jest": "^26.1.0",
+ "prettier": "^1.19.0",
"rimraf": "^2.6.1",
+ "ts-jest": "^26.1.3",
"typescript": "~3.7.0"
},
"sideEffects": [
@@ -50,5 +54,6 @@
],
"jupyterlab": {
"extension": true
- }
+ },
+ "test": "jest"
}
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/__tests__/kernel/KernelModel.test.ts b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/__tests__/kernel/KernelModel.test.ts
new file mode 100644
index 0000000..9293193
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/__tests__/kernel/KernelModel.test.ts
@@ -0,0 +1,124 @@
+// Licensed under the Apache License, Version 2.0 (the 'License'); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+/**
+ * Tests for KernelModel module.
+ *
+ * Non camelcase fields are nbformat fields used in notebooks. Lint is ignored
+ * for them.
+ */
+
+import { KernelModel } from '../../kernel/KernelModel';
+
+const fakeSessionContext = {
+ session: {
+ kernel: {
+ requestExecute: function(): object {
+ return {
+ onIOPub: function(): void {
+ // do nothing
+ }
+ };
+ }
+ }
+ }
+};
+
+it('creates new future with IOPub callbacks when executing new code in kernel', () => {
+ const kernelModel = new KernelModel(fakeSessionContext as any);
+ kernelModel.execute('new code');
+ expect(kernelModel.future).not.toBe(null);
+ expect(kernelModel.future.onIOPub).not.toBe(null);
+});
+
+it('handles execute result from IOPub channel', () => {
+ const kernelModel = new KernelModel(fakeSessionContext as any);
+ kernelModel.execute('any code');
+ kernelModel.future.onIOPub({
+ header: {
+ // eslint-disable-next-line @typescript-eslint/camelcase
+ msg_type: 'execute_result'
+ },
+ content: {
+ data: {
+ 'text/plain':
+ '\'{"pipelineId": {"metadata": {"name": "pipeline", "inMemoryId": 1, "type": "pipeline"}, "pcolls": {"pcollId": {"name": "pcoll", "inMemoryId": 2, "type": "pcollection"}}}}\''
+ },
+ channel: 'iopub'
+ }
+ } as any);
+ expect(kernelModel.executeResult).toEqual({
+ pipelineId: {
+ metadata: {
+ name: 'pipeline',
+ inMemoryId: 1,
+ type: 'pipeline'
+ },
+ pcolls: {
+ pcollId: {
+ name: 'pcoll',
+ inMemoryId: 2,
+ type: 'pcollection'
+ }
+ }
+ }
+ });
+});
+
+it('handles display data from IOPub channel', () => {
+ const kernelModel = new KernelModel(fakeSessionContext as any);
+ kernelModel.execute('any code');
+ const displayData = {
+ // eslint-disable-next-line @typescript-eslint/camelcase
+ output_type: 'display_data',
+ data: {
+ 'text/html': '<div></div>',
+ 'application/javascript': 'console.log(1);'
+ },
+ metadata: {
+ some: 'data'
+ }
+ };
+
+ kernelModel.future.onIOPub({
+ header: {
+ // eslint-disable-next-line @typescript-eslint/camelcase
+ msg_type: 'display_data'
+ },
+ content: displayData
+ } as any);
+ expect(kernelModel.displayData).toEqual([displayData]);
+});
+
+it('handles display update from IOPub channel', () => {
+ const kernelModel = new KernelModel(fakeSessionContext as any);
+ kernelModel.execute('any code');
+ const updateDisplayData = {
+ // eslint-disable-next-line @typescript-eslint/camelcase
+ output_type: 'update_display_data',
+ data: {
+ 'text/html': '<div id="abc"></div>',
+ 'application/javascript': 'console.log(2)'
+ },
+ metadata: {
+ some: 'data'
+ }
+ };
+ kernelModel.future.onIOPub({
+ header: {
+ // eslint-disable-next-line @typescript-eslint/camelcase
+ msg_type: 'update_display_data'
+ },
+ content: updateDisplayData
+ } as any);
+ expect(kernelModel.displayUpdate).toEqual([updateDisplayData]);
+});
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/kernel/KernelCode.ts b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/kernel/KernelCode.ts
new file mode 100644
index 0000000..0fb75ae
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/kernel/KernelCode.ts
@@ -0,0 +1,23 @@
+// Licensed under the Apache License, Version 2.0 (the 'License'); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+/**
+ * KernelCode namespace holds constant Python code that can be executed in an
+ * IPython kernel that has Apache Beam Python SDK installed.
+ */
+export namespace KernelCode {
+ export const COMMON_KERNEL_IMPORTS: string =
+ 'from apache_beam.runners.interactive' +
+ ' import interactive_beam as ib\n' +
+ 'from apache_beam.runners.interactive' +
+ ' import interactive_environment as ie\n';
+}
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/kernel/KernelModel.ts b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/kernel/KernelModel.ts
new file mode 100644
index 0000000..39d244c
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/kernel/KernelModel.ts
@@ -0,0 +1,161 @@
+// Licensed under the Apache License, Version 2.0 (the 'License'); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+/**
+ * The module holds the model that handles messaging between the frontend and
+ * the connected kernel.
+ */
+
+import { ISessionContext } from '@jupyterlab/apputils';
+
+import {
+ IDisplayData,
+ IDisplayUpdate,
+ IExecuteResult
+} from '@jupyterlab/nbformat';
+
+import { Kernel, KernelMessage } from '@jupyterlab/services';
+
+import { ISignal, Signal } from '@lumino/signaling';
+
+import { KernelCode } from '../kernel/KernelCode';
+
+export class KernelModel {
+ constructor(sessionContext: ISessionContext, enableConsoleLog = false) {
+ this._sessionContext = sessionContext;
+ this._enableConsoleLog = enableConsoleLog;
+ }
+
+ get future(): Kernel.IFuture<
+ KernelMessage.IExecuteRequestMsg,
+ KernelMessage.IExecuteReplyMsg
+ > | null {
+ return this._future;
+ }
+
+ set future(
+ value: Kernel.IFuture<
+ KernelMessage.IExecuteRequestMsg,
+ KernelMessage.IExecuteReplyMsg
+ > | null
+ ) {
+ if (this._future === value) {
+ return;
+ }
+
+ if (this._future) {
+ this._future.dispose();
+ }
+
+ this._future = value;
+
+ if (!value) {
+ return;
+ }
+
+ value.onIOPub = this._onIOPub.bind(this);
+ }
+
+ get executeResult(): object {
+ if (this._executeResult) {
+ const dataInPlainText = this._executeResult.data['text/plain'] as string;
+ if (dataInPlainText) {
+ try {
+ // The slice removes trailing single quotes from the nbformat output.
+ // The replace removes literal backslashes from the nbformat output.
+ const dataInJsonString = dataInPlainText
+ .slice(1, -1)
+ .replace(/\\'/g, "'");
+ return JSON.parse(dataInJsonString);
+ } catch (e) {
+ console.error(e);
+ return {};
+ }
+ }
+ }
+ return {};
+ }
+
+ get displayData(): Array<IDisplayData> {
+ return this._displayData;
+ }
+
+ get displayUpdate(): Array<IDisplayUpdate> {
+ return this._displayUpdate;
+ }
+
+ get stateChanged(): ISignal<KernelModel, void> {
+ return this._stateChanged;
+ }
+
+ execute(code: string, expectReply = true): void {
+ // Dispose the kernel future so that no more IOPub will be handled.
+ if (this.future) {
+ this.future.dispose();
+ this.future = null;
+ }
+ // Clear the outputs from previous kernel executions.
+ this._executeResult = null;
+ this._displayData.length = 0;
+ this._displayUpdate.length = 0;
+ if (!this._sessionContext || !this._sessionContext.session?.kernel) {
+ return;
+ }
+ this.future = this._sessionContext.session?.kernel?.requestExecute({
+ code: KernelCode.COMMON_KERNEL_IMPORTS + code,
+ silent: !expectReply,
+ store_history: false // eslint-disable-line @typescript-eslint/camelcase
+ });
+ }
+
+ private _onIOPub(msg: KernelMessage.IIOPubMessage): void {
+ if (this._enableConsoleLog) {
+ console.log(msg);
+ }
+ const msgType = msg.header.msg_type;
+ switch (msgType) {
+ case 'execute_result': {
+ const executeResult = msg.content as IExecuteResult;
+ this._executeResult = executeResult;
+ this._stateChanged.emit();
+ break;
+ }
+ case 'display_data': {
+ const displayData = msg.content as IDisplayData;
+ this._displayData.push(displayData);
+ this._stateChanged.emit();
+ break;
+ }
+ case 'update_display_data': {
+ const displayUpdate = msg.content as IDisplayUpdate;
+ this._displayUpdate.push(displayUpdate);
+ this._stateChanged.emit();
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+ return;
+ }
+
+ private _future: Kernel.IFuture<
+ KernelMessage.IExecuteRequestMsg,
+ KernelMessage.IExecuteReplyMsg
+ > | null = null;
+ private _displayData: Array<IDisplayData> = new Array<IDisplayData>();
+ private _displayUpdate: Array<IDisplayUpdate> = new Array<IDisplayUpdate>();
+ private _executeResult: IExecuteResult | null = null;
+ private _sessionContext: ISessionContext;
+ private _stateChanged = new Signal<KernelModel, void>(this);
+ private _enableConsoleLog = false;
+}
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/tsconfig.json b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/tsconfig.json
index 81139f5..680ce33 100644
--- a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/tsconfig.json
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/tsconfig.json
@@ -18,7 +18,13 @@
"strict": true,
"strictNullChecks": false,
"target": "es2017",
- "types": []
+ "types": [
+ "jest"
+ ]
},
- "include": ["src/*"]
+ "include": [
+ "src/*",
+ "src/kernel/*",
+ "src/__tests__/**/*"
+ ]
}
diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock
index 78442f2..8d2f32b 100644
--- a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock
+++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock
@@ -2,18 +2,138 @@
# yarn lockfile v1
-"@babel/code-frame@^7.0.0":
+"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.10.4":
version "7.10.4"
resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.10.4.tgz#168da1a36e90da68ae8d49c0f1b48c7c6249213a"
integrity sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==
dependencies:
"@babel/highlight" "^7.10.4"
+"@babel/core@^7.1.0", "@babel/core@^7.7.5":
+ version "7.10.5"
+ resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.10.5.tgz#1f15e2cca8ad9a1d78a38ddba612f5e7cdbbd330"
+ integrity sha512-O34LQooYVDXPl7QWCdW9p4NR+QlzOr7xShPPJz8GsuCU3/8ua/wqTr7gmnxXv+WBESiGU/G5s16i6tUvHkNb+w==
+ dependencies:
+ "@babel/code-frame" "^7.10.4"
+ "@babel/generator" "^7.10.5"
+ "@babel/helper-module-transforms" "^7.10.5"
+ "@babel/helpers" "^7.10.4"
+ "@babel/parser" "^7.10.5"
+ "@babel/template" "^7.10.4"
+ "@babel/traverse" "^7.10.5"
+ "@babel/types" "^7.10.5"
+ convert-source-map "^1.7.0"
+ debug "^4.1.0"
+ gensync "^1.0.0-beta.1"
+ json5 "^2.1.2"
+ lodash "^4.17.19"
+ resolve "^1.3.2"
+ semver "^5.4.1"
+ source-map "^0.5.0"
+
+"@babel/generator@^7.10.5":
+ version "7.10.5"
+ resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.10.5.tgz#1b903554bc8c583ee8d25f1e8969732e6b829a69"
+ integrity sha512-3vXxr3FEW7E7lJZiWQ3bM4+v/Vyr9C+hpolQ8BGFr9Y8Ri2tFLWTixmwKBafDujO1WVah4fhZBeU1bieKdghig==
+ dependencies:
+ "@babel/types" "^7.10.5"
+ jsesc "^2.5.1"
+ source-map "^0.5.0"
+
+"@babel/helper-function-name@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.10.4.tgz#d2d3b20c59ad8c47112fa7d2a94bc09d5ef82f1a"
+ integrity sha512-YdaSyz1n8gY44EmN7x44zBn9zQ1Ry2Y+3GTA+3vH6Mizke1Vw0aWDM66FOYEPw8//qKkmqOckrGgTYa+6sceqQ==
+ dependencies:
+ "@babel/helper-get-function-arity" "^7.10.4"
+ "@babel/template" "^7.10.4"
+ "@babel/types" "^7.10.4"
+
+"@babel/helper-get-function-arity@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-get-function-arity/-/helper-get-function-arity-7.10.4.tgz#98c1cbea0e2332f33f9a4661b8ce1505b2c19ba2"
+ integrity sha512-EkN3YDB+SRDgiIUnNgcmiD361ti+AVbL3f3Henf6dqqUyr5dMsorno0lJWJuLhDhkI5sYEpgj6y9kB8AOU1I2A==
+ dependencies:
+ "@babel/types" "^7.10.4"
+
+"@babel/helper-member-expression-to-functions@^7.10.4":
+ version "7.10.5"
+ resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.10.5.tgz#172f56e7a63e78112f3a04055f24365af702e7ee"
+ integrity sha512-HiqJpYD5+WopCXIAbQDG0zye5XYVvcO9w/DHp5GsaGkRUaamLj2bEtu6i8rnGGprAhHM3qidCMgp71HF4endhA==
+ dependencies:
+ "@babel/types" "^7.10.5"
+
+"@babel/helper-module-imports@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.10.4.tgz#4c5c54be04bd31670a7382797d75b9fa2e5b5620"
+ integrity sha512-nEQJHqYavI217oD9+s5MUBzk6x1IlvoS9WTPfgG43CbMEeStE0v+r+TucWdx8KFGowPGvyOkDT9+7DHedIDnVw==
+ dependencies:
+ "@babel/types" "^7.10.4"
+
+"@babel/helper-module-transforms@^7.10.5":
+ version "7.10.5"
+ resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.10.5.tgz#120c271c0b3353673fcdfd8c053db3c544a260d6"
+ integrity sha512-4P+CWMJ6/j1W915ITJaUkadLObmCRRSC234uctJfn/vHrsLNxsR8dwlcXv9ZhJWzl77awf+mWXSZEKt5t0OnlA==
+ dependencies:
+ "@babel/helper-module-imports" "^7.10.4"
+ "@babel/helper-replace-supers" "^7.10.4"
+ "@babel/helper-simple-access" "^7.10.4"
+ "@babel/helper-split-export-declaration" "^7.10.4"
+ "@babel/template" "^7.10.4"
+ "@babel/types" "^7.10.5"
+ lodash "^4.17.19"
+
+"@babel/helper-optimise-call-expression@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.10.4.tgz#50dc96413d594f995a77905905b05893cd779673"
+ integrity sha512-n3UGKY4VXwXThEiKrgRAoVPBMqeoPgHVqiHZOanAJCG9nQUL2pLRQirUzl0ioKclHGpGqRgIOkgcIJaIWLpygg==
+ dependencies:
+ "@babel/types" "^7.10.4"
+
+"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.8.0":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.10.4.tgz#2f75a831269d4f677de49986dff59927533cf375"
+ integrity sha512-O4KCvQA6lLiMU9l2eawBPMf1xPP8xPfB3iEQw150hOVTqj/rfXz0ThTb4HEzqQfs2Bmo5Ay8BzxfzVtBrr9dVg==
+
+"@babel/helper-replace-supers@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.10.4.tgz#d585cd9388ea06e6031e4cd44b6713cbead9e6cf"
+ integrity sha512-sPxZfFXocEymYTdVK1UNmFPBN+Hv5mJkLPsYWwGBxZAxaWfFu+xqp7b6qWD0yjNuNL2VKc6L5M18tOXUP7NU0A==
+ dependencies:
+ "@babel/helper-member-expression-to-functions" "^7.10.4"
+ "@babel/helper-optimise-call-expression" "^7.10.4"
+ "@babel/traverse" "^7.10.4"
+ "@babel/types" "^7.10.4"
+
+"@babel/helper-simple-access@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.10.4.tgz#0f5ccda2945277a2a7a2d3a821e15395edcf3461"
+ integrity sha512-0fMy72ej/VEvF8ULmX6yb5MtHG4uH4Dbd6I/aHDb/JVg0bbivwt9Wg+h3uMvX+QSFtwr5MeItvazbrc4jtRAXw==
+ dependencies:
+ "@babel/template" "^7.10.4"
+ "@babel/types" "^7.10.4"
+
+"@babel/helper-split-export-declaration@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.10.4.tgz#2c70576eaa3b5609b24cb99db2888cc3fc4251d1"
+ integrity sha512-pySBTeoUff56fL5CBU2hWm9TesA4r/rOkI9DyJLvvgz09MB9YtfIYe3iBriVaYNaPe+Alua0vBIOVOLs2buWhg==
+ dependencies:
+ "@babel/types" "^7.10.4"
+
"@babel/helper-validator-identifier@^7.10.4":
version "7.10.4"
resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.10.4.tgz#a78c7a7251e01f616512d31b10adcf52ada5e0d2"
integrity sha512-3U9y+43hz7ZM+rzG24Qe2mufW5KhvFg/NhnNph+i9mgCtdTCtMJuI1TMkrIUiK7Ix4PYlRF9I5dhqaLYA/ADXw==
+"@babel/helpers@^7.10.4":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.10.4.tgz#2abeb0d721aff7c0a97376b9e1f6f65d7a475044"
+ integrity sha512-L2gX/XeUONeEbI78dXSrJzGdz4GQ+ZTA/aazfUsFaWjSe95kiCuOZ5HsXvkiw3iwF+mFHSRUfJU8t6YavocdXA==
+ dependencies:
+ "@babel/template" "^7.10.4"
+ "@babel/traverse" "^7.10.4"
+ "@babel/types" "^7.10.4"
+
"@babel/highlight@^7.10.4":
version "7.10.4"
resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.10.4.tgz#7d1bdfd65753538fabe6c38596cdb76d9ac60143"
@@ -23,6 +143,88 @@
chalk "^2.0.0"
js-tokens "^4.0.0"
+"@babel/parser@^7.1.0", "@babel/parser@^7.10.4", "@babel/parser@^7.10.5":
+ version "7.10.5"
+ resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.10.5.tgz#e7c6bf5a7deff957cec9f04b551e2762909d826b"
+ integrity sha512-wfryxy4bE1UivvQKSQDU4/X6dr+i8bctjUjj8Zyt3DQy7NtPizJXT8M52nqpNKL+nq2PW8lxk4ZqLj0fD4B4hQ==
+
+"@babel/plugin-syntax-async-generators@^7.8.4":
+ version "7.8.4"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz#a983fb1aeb2ec3f6ed042a210f640e90e786fe0d"
+ integrity sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-bigint@^7.8.3":
+ version "7.8.3"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz#4c9a6f669f5d0cdf1b90a1671e9a146be5300cea"
+ integrity sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-class-properties@^7.8.3":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.10.4.tgz#6644e6a0baa55a61f9e3231f6c9eeb6ee46c124c"
+ integrity sha512-GCSBF7iUle6rNugfURwNmCGG3Z/2+opxAMLs1nND4bhEG5PuxTIggDBoeYYSujAlLtsupzOHYJQgPS3pivwXIA==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-syntax-import-meta@^7.8.3":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz#ee601348c370fa334d2207be158777496521fd51"
+ integrity sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-syntax-json-strings@^7.8.3":
+ version "7.8.3"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz#01ca21b668cd8218c9e640cb6dd88c5412b2c96a"
+ integrity sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-logical-assignment-operators@^7.8.3":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz#ca91ef46303530448b906652bac2e9fe9941f699"
+ integrity sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-syntax-nullish-coalescing-operator@^7.8.3":
+ version "7.8.3"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz#167ed70368886081f74b5c36c65a88c03b66d1a9"
+ integrity sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-numeric-separator@^7.8.3":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz#b9b070b3e33570cd9fd07ba7fa91c0dd37b9af97"
+ integrity sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-syntax-object-rest-spread@^7.8.3":
+ version "7.8.3"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz#60e225edcbd98a640332a2e72dd3e66f1af55871"
+ integrity sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-optional-catch-binding@^7.8.3":
+ version "7.8.3"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz#6111a265bcfb020eb9efd0fdfd7d26402b9ed6c1"
+ integrity sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-optional-chaining@^7.8.3":
+ version "7.8.3"
+ resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz#4f69c2ab95167e0180cd5336613f8c5788f7d48a"
+ integrity sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.8.0"
+
"@babel/runtime@^7.1.2":
version "7.10.5"
resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.10.5.tgz#303d8bd440ecd5a491eae6117fd3367698674c5c"
@@ -30,6 +232,44 @@
dependencies:
regenerator-runtime "^0.13.4"
+"@babel/template@^7.10.4", "@babel/template@^7.3.3":
+ version "7.10.4"
+ resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.10.4.tgz#3251996c4200ebc71d1a8fc405fba940f36ba278"
+ integrity sha512-ZCjD27cGJFUB6nmCB1Enki3r+L5kJveX9pq1SvAUKoICy6CZ9yD8xO086YXdYhvNjBdnekm4ZnaP5yC8Cs/1tA==
+ dependencies:
+ "@babel/code-frame" "^7.10.4"
+ "@babel/parser" "^7.10.4"
+ "@babel/types" "^7.10.4"
+
+"@babel/traverse@^7.1.0", "@babel/traverse@^7.10.4", "@babel/traverse@^7.10.5":
+ version "7.10.5"
+ resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.10.5.tgz#77ce464f5b258be265af618d8fddf0536f20b564"
+ integrity sha512-yc/fyv2gUjPqzTz0WHeRJH2pv7jA9kA7mBX2tXl/x5iOE81uaVPuGPtaYk7wmkx4b67mQ7NqI8rmT2pF47KYKQ==
+ dependencies:
+ "@babel/code-frame" "^7.10.4"
+ "@babel/generator" "^7.10.5"
+ "@babel/helper-function-name" "^7.10.4"
+ "@babel/helper-split-export-declaration" "^7.10.4"
+ "@babel/parser" "^7.10.5"
+ "@babel/types" "^7.10.5"
+ debug "^4.1.0"
+ globals "^11.1.0"
+ lodash "^4.17.19"
+
+"@babel/types@^7.0.0", "@babel/types@^7.10.4", "@babel/types@^7.10.5", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
+ version "7.10.5"
+ resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.10.5.tgz#d88ae7e2fde86bfbfe851d4d81afa70a997b5d15"
+ integrity sha512-ixV66KWfCI6GKoA/2H9v6bQdbfXEwwpOdQ8cRvb4F+eyvhlaHxWFMQB4+3d9QFJXZsiiiqVrewNV0DFEQpyT4Q==
+ dependencies:
+ "@babel/helper-validator-identifier" "^7.10.4"
+ lodash "^4.17.19"
+ to-fast-properties "^2.0.0"
+
+"@bcoe/v8-coverage@^0.2.3":
+ version "0.2.3"
+ resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
+ integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==
+
"@blueprintjs/core@^3.22.2", "@blueprintjs/core@^3.30.0":
version "3.30.0"
resolved "https://registry.yarnpkg.com/@blueprintjs/core/-/core-3.30.0.tgz#d847e451741735a7b11216fb3ec1c669f201bb90"
@@ -64,11 +304,211 @@
classnames "^2.2"
tslib "~1.10.0"
+"@cnakazawa/watch@^1.0.3":
+ version "1.0.4"
+ resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.4.tgz#f864ae85004d0fcab6f50be9141c4da368d1656a"
+ integrity sha512-v9kIhKwjeZThiWrLmj0y17CWoyddASLj9O2yvbZkbvw/N3rWOYy9zkV66ursAoVr0mV15bL8g0c4QZUE6cdDoQ==
+ dependencies:
+ exec-sh "^0.3.2"
+ minimist "^1.2.0"
+
"@fortawesome/fontawesome-free@^5.12.0":
version "5.14.0"
resolved "https://registry.yarnpkg.com/@fortawesome/fontawesome-free/-/fontawesome-free-5.14.0.tgz#a371e91029ebf265015e64f81bfbf7d228c9681f"
integrity sha512-OfdMsF+ZQgdKHP9jUbmDcRrP0eX90XXrsXIdyjLbkmSBzmMXPABB8eobUJtivaupucYaByz6WNe1PI1JuYm3qA==
+"@istanbuljs/load-nyc-config@^1.0.0":
+ version "1.1.0"
+ resolved "https://registry.yarnpkg.com/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz#fd3db1d59ecf7cf121e80650bb86712f9b55eced"
+ integrity sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==
+ dependencies:
+ camelcase "^5.3.1"
+ find-up "^4.1.0"
+ get-package-type "^0.1.0"
+ js-yaml "^3.13.1"
+ resolve-from "^5.0.0"
+
+"@istanbuljs/schema@^0.1.2":
+ version "0.1.2"
+ resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.2.tgz#26520bf09abe4a5644cd5414e37125a8954241dd"
+ integrity sha512-tsAQNx32a8CoFhjhijUIhI4kccIAgmGhy8LZMZgGfmXcpMbPRUqn5LWmgRttILi6yeGmBJd2xsPkFMs0PzgPCw==
+
+"@jest/console@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/console/-/console-26.1.0.tgz#f67c89e4f4d04dbcf7b052aed5ab9c74f915b954"
+ integrity sha512-+0lpTHMd/8pJp+Nd4lyip+/Iyf2dZJvcCqrlkeZQoQid+JlThA4M9vxHtheyrQ99jJTMQam+es4BcvZ5W5cC3A==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ jest-message-util "^26.1.0"
+ jest-util "^26.1.0"
+ slash "^3.0.0"
+
+"@jest/core@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/core/-/core-26.1.0.tgz#4580555b522de412a7998b3938c851e4f9da1c18"
+ integrity sha512-zyizYmDJOOVke4OO/De//aiv8b07OwZzL2cfsvWF3q9YssfpcKfcnZAwDY8f+A76xXSMMYe8i/f/LPocLlByfw==
+ dependencies:
+ "@jest/console" "^26.1.0"
+ "@jest/reporters" "^26.1.0"
+ "@jest/test-result" "^26.1.0"
+ "@jest/transform" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ ansi-escapes "^4.2.1"
+ chalk "^4.0.0"
+ exit "^0.1.2"
+ graceful-fs "^4.2.4"
+ jest-changed-files "^26.1.0"
+ jest-config "^26.1.0"
+ jest-haste-map "^26.1.0"
+ jest-message-util "^26.1.0"
+ jest-regex-util "^26.0.0"
+ jest-resolve "^26.1.0"
+ jest-resolve-dependencies "^26.1.0"
+ jest-runner "^26.1.0"
+ jest-runtime "^26.1.0"
+ jest-snapshot "^26.1.0"
+ jest-util "^26.1.0"
+ jest-validate "^26.1.0"
+ jest-watcher "^26.1.0"
+ micromatch "^4.0.2"
+ p-each-series "^2.1.0"
+ rimraf "^3.0.0"
+ slash "^3.0.0"
+ strip-ansi "^6.0.0"
+
+"@jest/environment@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-26.1.0.tgz#378853bcdd1c2443b4555ab908cfbabb851e96da"
+ integrity sha512-86+DNcGongbX7ai/KE/S3/NcUVZfrwvFzOOWX/W+OOTvTds7j07LtC+MgGydH5c8Ri3uIrvdmVgd1xFD5zt/xA==
+ dependencies:
+ "@jest/fake-timers" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ jest-mock "^26.1.0"
+
+"@jest/fake-timers@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-26.1.0.tgz#9a76b7a94c351cdbc0ad53e5a748789f819a65fe"
+ integrity sha512-Y5F3kBVWxhau3TJ825iuWy++BAuQzK/xEa+wD9vDH3RytW9f2DbMVodfUQC54rZDX3POqdxCgcKdgcOL0rYUpA==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ "@sinonjs/fake-timers" "^6.0.1"
+ jest-message-util "^26.1.0"
+ jest-mock "^26.1.0"
+ jest-util "^26.1.0"
+
+"@jest/globals@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-26.1.0.tgz#6cc5d7cbb79b76b120f2403d7d755693cf063ab1"
+ integrity sha512-MKiHPNaT+ZoG85oMaYUmGHEqu98y3WO2yeIDJrs2sJqHhYOy3Z6F7F/luzFomRQ8SQ1wEkmahFAz2291Iv8EAw==
+ dependencies:
+ "@jest/environment" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ expect "^26.1.0"
+
+"@jest/reporters@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-26.1.0.tgz#08952e90c90282e14ff49e927bdf1873617dae78"
+ integrity sha512-SVAysur9FOIojJbF4wLP0TybmqwDkdnFxHSPzHMMIYyBtldCW9gG+Q5xWjpMFyErDiwlRuPyMSJSU64A67Pazg==
+ dependencies:
+ "@bcoe/v8-coverage" "^0.2.3"
+ "@jest/console" "^26.1.0"
+ "@jest/test-result" "^26.1.0"
+ "@jest/transform" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ collect-v8-coverage "^1.0.0"
+ exit "^0.1.2"
+ glob "^7.1.2"
+ graceful-fs "^4.2.4"
+ istanbul-lib-coverage "^3.0.0"
+ istanbul-lib-instrument "^4.0.3"
+ istanbul-lib-report "^3.0.0"
+ istanbul-lib-source-maps "^4.0.0"
+ istanbul-reports "^3.0.2"
+ jest-haste-map "^26.1.0"
+ jest-resolve "^26.1.0"
+ jest-util "^26.1.0"
+ jest-worker "^26.1.0"
+ slash "^3.0.0"
+ source-map "^0.6.0"
+ string-length "^4.0.1"
+ terminal-link "^2.0.0"
+ v8-to-istanbul "^4.1.3"
+ optionalDependencies:
+ node-notifier "^7.0.0"
+
+"@jest/source-map@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-26.1.0.tgz#a6a020d00e7d9478f4b690167c5e8b77e63adb26"
+ integrity sha512-XYRPYx4eEVX15cMT9mstnO7hkHP3krNtKfxUYd8L7gbtia8JvZZ6bMzSwa6IQJENbudTwKMw5R1BePRD+bkEmA==
+ dependencies:
+ callsites "^3.0.0"
+ graceful-fs "^4.2.4"
+ source-map "^0.6.0"
+
+"@jest/test-result@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-26.1.0.tgz#a93fa15b21ad3c7ceb21c2b4c35be2e407d8e971"
+ integrity sha512-Xz44mhXph93EYMA8aYDz+75mFbarTV/d/x0yMdI3tfSRs/vh4CqSxgzVmCps1fPkHDCtn0tU8IH9iCKgGeGpfw==
+ dependencies:
+ "@jest/console" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ "@types/istanbul-lib-coverage" "^2.0.0"
+ collect-v8-coverage "^1.0.0"
+
+"@jest/test-sequencer@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-26.1.0.tgz#41a6fc8b850c3f33f48288ea9ea517c047e7f14e"
+ integrity sha512-Z/hcK+rTq56E6sBwMoQhSRDVjqrGtj1y14e2bIgcowARaIE1SgOanwx6gvY4Q9gTKMoZQXbXvptji+q5GYxa6Q==
+ dependencies:
+ "@jest/test-result" "^26.1.0"
+ graceful-fs "^4.2.4"
+ jest-haste-map "^26.1.0"
+ jest-runner "^26.1.0"
+ jest-runtime "^26.1.0"
+
+"@jest/transform@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-26.1.0.tgz#697f48898c2a2787c9b4cb71d09d7e617464e509"
+ integrity sha512-ICPm6sUXmZJieq45ix28k0s+d/z2E8CHDsq+WwtWI6kW8m7I8kPqarSEcUN86entHQ570ZBRci5OWaKL0wlAWw==
+ dependencies:
+ "@babel/core" "^7.1.0"
+ "@jest/types" "^26.1.0"
+ babel-plugin-istanbul "^6.0.0"
+ chalk "^4.0.0"
+ convert-source-map "^1.4.0"
+ fast-json-stable-stringify "^2.0.0"
+ graceful-fs "^4.2.4"
+ jest-haste-map "^26.1.0"
+ jest-regex-util "^26.0.0"
+ jest-util "^26.1.0"
+ micromatch "^4.0.2"
+ pirates "^4.0.1"
+ slash "^3.0.0"
+ source-map "^0.6.1"
+ write-file-atomic "^3.0.0"
+
+"@jest/types@^25.5.0":
+ version "25.5.0"
+ resolved "https://registry.yarnpkg.com/@jest/types/-/types-25.5.0.tgz#4d6a4793f7b9599fc3680877b856a97dbccf2a9d"
+ integrity sha512-OXD0RgQ86Tu3MazKo8bnrkDRaDXXMGUqd+kTtLtK1Zb7CRzQcaSRPPPV37SvYTdevXEBVxe0HXylEjs8ibkmCw==
+ dependencies:
+ "@types/istanbul-lib-coverage" "^2.0.0"
+ "@types/istanbul-reports" "^1.1.1"
+ "@types/yargs" "^15.0.0"
+ chalk "^3.0.0"
+
+"@jest/types@^26.1.0":
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/@jest/types/-/types-26.1.0.tgz#f8afaaaeeb23b5cad49dd1f7779689941dcb6057"
+ integrity sha512-GXigDDsp6ZlNMhXQDeuy/iYCDsRIHJabWtDzvnn36+aqFfG14JmFV0e/iXxY4SP9vbXSiPNOWdehU5MeqrYHBQ==
+ dependencies:
+ "@types/istanbul-lib-coverage" "^2.0.0"
+ "@types/istanbul-reports" "^1.1.1"
+ "@types/yargs" "^15.0.0"
+ chalk "^4.0.0"
+
"@jupyterlab/application@^2.0.0":
version "2.2.0"
resolved "https://registry.yarnpkg.com/@jupyterlab/application/-/application-2.2.0.tgz#65d37cfc6146e28e7aa250667996024134400bfa"
@@ -433,6 +873,53 @@
"@lumino/signaling" "^1.4.2"
"@lumino/virtualdom" "^1.7.2"
+"@sinonjs/commons@^1.7.0":
+ version "1.8.1"
+ resolved "https://registry.yarnpkg.com/@sinonjs/commons/-/commons-1.8.1.tgz#e7df00f98a203324f6dc7cc606cad9d4a8ab2217"
+ integrity sha512-892K+kWUUi3cl+LlqEWIDrhvLgdL79tECi8JZUyq6IviKy/DNhuzCRlbHUjxK89f4ypPMMaFnFuR9Ie6DoIMsw==
+ dependencies:
+ type-detect "4.0.8"
+
+"@sinonjs/fake-timers@^6.0.1":
+ version "6.0.1"
+ resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-6.0.1.tgz#293674fccb3262ac782c7aadfdeca86b10c75c40"
+ integrity sha512-MZPUxrmFubI36XS1DI3qmI0YdN1gks62JtFZvxR67ljjSNCeK6U08Zx4msEWOXuofgqUt6zPHSi1H9fbjR/NRA==
+ dependencies:
+ "@sinonjs/commons" "^1.7.0"
+
+"@types/babel__core@^7.0.0", "@types/babel__core@^7.1.7":
+ version "7.1.9"
+ resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.9.tgz#77e59d438522a6fb898fa43dc3455c6e72f3963d"
+ integrity sha512-sY2RsIJ5rpER1u3/aQ8OFSI7qGIy8o1NEEbgb2UaJcvOtXOMpd39ko723NBpjQFg9SIX7TXtjejZVGeIMLhoOw==
+ dependencies:
+ "@babel/parser" "^7.1.0"
+ "@babel/types" "^7.0.0"
+ "@types/babel__generator" "*"
+ "@types/babel__template" "*"
+ "@types/babel__traverse" "*"
+
+"@types/babel__generator@*":
+ version "7.6.1"
+ resolved "https://registry.yarnpkg.com/@types/babel__generator/-/babel__generator-7.6.1.tgz#4901767b397e8711aeb99df8d396d7ba7b7f0e04"
+ integrity sha512-bBKm+2VPJcMRVwNhxKu8W+5/zT7pwNEqeokFOmbvVSqGzFneNxYcEBro9Ac7/N9tlsaPYnZLK8J1LWKkMsLAew==
+ dependencies:
+ "@babel/types" "^7.0.0"
+
+"@types/babel__template@*":
+ version "7.0.2"
+ resolved "https://registry.yarnpkg.com/@types/babel__template/-/babel__template-7.0.2.tgz#4ff63d6b52eddac1de7b975a5223ed32ecea9307"
+ integrity sha512-/K6zCpeW7Imzgab2bLkLEbz0+1JlFSrUMdw7KoIIu+IUdu51GWaBZpd3y1VXGVXzynvGa4DaIaxNZHiON3GXUg==
+ dependencies:
+ "@babel/parser" "^7.1.0"
+ "@babel/types" "^7.0.0"
+
+"@types/babel__traverse@*", "@types/babel__traverse@^7.0.6":
+ version "7.0.13"
+ resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.0.13.tgz#1874914be974a492e1b4cb00585cabb274e8ba18"
+ integrity sha512-i+zS7t6/s9cdQvbqKDARrcbrPvtJGlbYsMkazo03nTAK3RX9FNrLllXys22uiTGJapPOTZTQ35nHh4ISph4SLQ==
+ dependencies:
+ "@babel/types" "^7.3.0"
+
"@types/color-name@^1.1.1":
version "1.1.1"
resolved "https://registry.yarnpkg.com/@types/color-name/-/color-name-1.1.1.tgz#1c1261bbeaa10a8055bbc5d8ab84b7b2afc846a0"
@@ -448,11 +935,61 @@
resolved "https://registry.yarnpkg.com/@types/eslint-visitor-keys/-/eslint-visitor-keys-1.0.0.tgz#1ee30d79544ca84d68d4b3cdb0af4f205663dd2d"
integrity sha512-OCutwjDZ4aFS6PB1UZ988C4YgwlBHJd6wCeQqaLdmadZ/7e+w79+hbMUFC1QXDNCmdyoRfAFdm0RypzwR+Qpag==
+"@types/graceful-fs@^4.1.2":
+ version "4.1.3"
+ resolved "https://registry.yarnpkg.com/@types/graceful-fs/-/graceful-fs-4.1.3.tgz#039af35fe26bec35003e8d86d2ee9c586354348f"
+ integrity sha512-AiHRaEB50LQg0pZmm659vNBb9f4SJ0qrAnteuzhSeAUcJKxoYgEnprg/83kppCnc2zvtCKbdZry1a5pVY3lOTQ==
+ dependencies:
+ "@types/node" "*"
+
+"@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0", "@types/istanbul-lib-coverage@^2.0.1":
+ version "2.0.3"
+ resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.3.tgz#4ba8ddb720221f432e443bd5f9117fd22cfd4762"
+ integrity sha512-sz7iLqvVUg1gIedBOvlkxPlc8/uVzyS5OwGz1cKjXzkl3FpL3al0crU8YGU1WoHkxn0Wxbw5tyi6hvzJKNzFsw==
+
+"@types/istanbul-lib-report@*":
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz#c14c24f18ea8190c118ee7562b7ff99a36552686"
+ integrity sha512-plGgXAPfVKFoYfa9NpYDAkseG+g6Jr294RqeqcqDixSbU34MZVJRi/P+7Y8GDpzkEwLaGZZOpKIEmeVZNtKsrg==
+ dependencies:
+ "@types/istanbul-lib-coverage" "*"
+
+"@types/istanbul-reports@^1.1.1":
+ version "1.1.2"
+ resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-1.1.2.tgz#e875cc689e47bce549ec81f3df5e6f6f11cfaeb2"
+ integrity sha512-P/W9yOX/3oPZSpaYOCQzGqgCQRXn0FFO/V8bWrCQs+wLmvVVxk6CRBXALEvNs9OHIatlnlFokfhuDo2ug01ciw==
+ dependencies:
+ "@types/istanbul-lib-coverage" "*"
+ "@types/istanbul-lib-report" "*"
+
+"@types/jest@^26.0.7":
+ version "26.0.7"
+ resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.7.tgz#495cb1d1818c1699dbc3b8b046baf1c86ef5e324"
+ integrity sha512-+x0077/LoN6MjqBcVOe1y9dpryWnfDZ+Xfo3EqGeBcfPRJlQp3Lw62RvNlWxuGv7kOEwlHriAa54updi3Jvvwg==
+ dependencies:
+ jest-diff "^25.2.1"
+ pretty-format "^25.2.1"
+
"@types/json-schema@^7.0.3":
version "7.0.5"
resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.5.tgz#dcce4430e64b443ba8945f0290fb564ad5bac6dd"
integrity sha512-7+2BITlgjgDhH0vvwZU/HZJVyk+2XUlvxXe8dFMedNX/aMkaOq++rMAFXc0tM7ij15QaWlbdQASBR9dihi+bDQ==
+"@types/node@*":
+ version "14.0.26"
+ resolved "https://registry.yarnpkg.com/@types/node/-/node-14.0.26.tgz#22a3b8a46510da8944b67bfc27df02c34a35331c"
+ integrity sha512-W+fpe5s91FBGE0pEa0lnqGLL4USgpLgs4nokw16SrBBco/gQxuua7KnArSEOd5iaMqbbSHV10vUDkJYJJqpXKA==
+
+"@types/normalize-package-data@^2.4.0":
+ version "2.4.0"
+ resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
+ integrity sha512-f5j5b/Gf71L+dbqxIpQ4Z2WlmI/mPJ0fOkGGmFgtb6sAu97EPczzbS3/tJKxmcYDj55OX6ssqwDAWOHIYDRDGA==
+
+"@types/prettier@^2.0.0":
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.0.2.tgz#5bb52ee68d0f8efa9cc0099920e56be6cc4e37f3"
+ integrity sha512-IkVfat549ggtkZUthUzEX49562eGikhSYeVGX97SkMFn+sTZrgRewXjQ4tPKFPCykZHkX1Zfd9OoELGqKU2jJA==
+
"@types/prop-types@*":
version "15.7.3"
resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.3.tgz#2ab0d5da2e5815f94b0b9d4b95d1e5f243ab2ca7"
@@ -466,6 +1003,23 @@
"@types/prop-types" "*"
csstype "^2.2.0"
+"@types/stack-utils@^1.0.1":
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-1.0.1.tgz#0a851d3bd96498fa25c33ab7278ed3bd65f06c3e"
+ integrity sha512-l42BggppR6zLmpfU6fq9HEa2oGPEI8yrSPL3GITjfRInppYFahObbIQOQK3UGxEnyQpltZLaPe75046NOZQikw==
+
+"@types/yargs-parser@*":
+ version "15.0.0"
+ resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-15.0.0.tgz#cb3f9f741869e20cce330ffbeb9271590483882d"
+ integrity sha512-FA/BWv8t8ZWJ+gEOnLLd8ygxH/2UFbAvgEonyfN6yWGLKc7zVjbpl2Y4CTjid9h2RfgPP6SEt6uHwEOply00yw==
+
+"@types/yargs@^15.0.0":
+ version "15.0.5"
+ resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.5.tgz#947e9a6561483bdee9adffc983e91a6902af8b79"
+ integrity sha512-Dk/IDOPtOgubt/IaevIUbTgV7doaKkoorvOyYM2CMwuDyP89bekI7H4xLIwunNYiK9jhCkmc6pUrJk3cj2AB9w==
+ dependencies:
+ "@types/yargs-parser" "*"
+
"@typescript-eslint/eslint-plugin@^2.25.0":
version "2.34.0"
resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-2.34.0.tgz#6f8ce8a46c7dea4a6f1d171d2bb8fbae6dac2be9"
@@ -509,17 +1063,35 @@
semver "^7.3.2"
tsutils "^3.17.1"
+abab@^2.0.3:
+ version "2.0.3"
+ resolved "https://registry.yarnpkg.com/abab/-/abab-2.0.3.tgz#623e2075e02eb2d3f2475e49f99c91846467907a"
+ integrity sha512-tsFzPpcttalNjFBCFMqsKYQcWxxen1pgJR56by//QwvJc4/OUS3kPOOttx2tSIfjsylB0pYu7f5D3K1RCxUnUg==
+
+acorn-globals@^6.0.0:
+ version "6.0.0"
+ resolved "https://registry.yarnpkg.com/acorn-globals/-/acorn-globals-6.0.0.tgz#46cdd39f0f8ff08a876619b55f5ac8a6dc770b45"
+ integrity sha512-ZQl7LOWaF5ePqqcX4hLuv/bLXYQNfNWw2c0/yX/TsPRKamzHcTGQnlCjHT3TsmkOUVEPS3crCxiPfdzE/Trlhg==
+ dependencies:
+ acorn "^7.1.1"
+ acorn-walk "^7.1.1"
+
acorn-jsx@^5.2.0:
version "5.2.0"
resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.2.0.tgz#4c66069173d6fdd68ed85239fc256226182b2ebe"
integrity sha512-HiUX/+K2YpkpJ+SzBffkM/AQ2YE03S0U1kjTLVpoJdhZMOWy8qvXVN9JdLqv2QsaQ6MPYQIuNmwD8zOiYUofLQ==
+acorn-walk@^7.1.1:
+ version "7.2.0"
+ resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-7.2.0.tgz#0de889a601203909b0fbe07b8938dc21d2e967bc"
+ integrity sha512-OPdCF6GsMIP+Az+aWfAAOEt2/+iVDKE7oy6lJ098aoe59oAmK76qV6Gw60SbZ8jHuG2wH058GF4pLFbYamYrVA==
+
acorn@^7.1.1:
version "7.3.1"
resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.3.1.tgz#85010754db53c3fbaf3b9ea3e083aa5c5d147ffd"
integrity sha512-tLc0wSnatxAQHVHUapaHdz72pi9KUyHjq5KyHjGg9Y8Ifdc79pTh2XvI6I1/chZbnM7QtNKzh66ooDogPZSleA==
-ajv@^6.10.0, ajv@^6.10.2:
+ajv@^6.10.0, ajv@^6.10.2, ajv@^6.5.5:
version "6.12.3"
resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.3.tgz#18c5af38a111ddeb4f2697bd78d68abc1cabd706"
integrity sha512-4K0cK3L1hsqk9xIb2z9vs/XU+PGJZ9PNpJRDS9YLzmNdX6jmVPfamLvTJr0aDAusnHyCHO6MjzlkAsgtqp9teA==
@@ -553,7 +1125,7 @@
dependencies:
color-convert "^1.9.0"
-ansi-styles@^4.1.0:
+ansi-styles@^4.0.0, ansi-styles@^4.1.0:
version "4.2.1"
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.2.1.tgz#90ae75c424d008d2624c5bf29ead3177ebfcf359"
integrity sha512-9VGjrMsG1vePxcSweQsN20KY/c4zN0h9fLjqAbwbPfahM3t+NL+M9HC8xeXG2I8pX5NoamTGNuomEUFI7fcUjA==
@@ -561,6 +1133,22 @@
"@types/color-name" "^1.1.1"
color-convert "^2.0.1"
+anymatch@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-2.0.0.tgz#bcb24b4f37934d9aa7ac17b4adaf89e7c76ef2eb"
+ integrity sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==
+ dependencies:
+ micromatch "^3.1.4"
+ normalize-path "^2.1.1"
+
+anymatch@^3.0.3:
+ version "3.1.1"
+ resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.1.tgz#c55ecf02185e2469259399310c173ce31233b142"
+ integrity sha512-mM8522psRCqzV+6LhomX5wgp25YVibjh8Wj23I5RPkPppSVSjyKD2A2mBJmWGa+KN7f2D6LNh9jkBCeyLktzjg==
+ dependencies:
+ normalize-path "^3.0.0"
+ picomatch "^2.0.4"
+
argparse@^1.0.7:
version "1.0.10"
resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911"
@@ -568,21 +1156,158 @@
dependencies:
sprintf-js "~1.0.2"
+arr-diff@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/arr-diff/-/arr-diff-4.0.0.tgz#d6461074febfec71e7e15235761a329a5dc7c520"
+ integrity sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=
+
+arr-flatten@^1.1.0:
+ version "1.1.0"
+ resolved "https://registry.yarnpkg.com/arr-flatten/-/arr-flatten-1.1.0.tgz#36048bbff4e7b47e136644316c99669ea5ae91f1"
+ integrity sha512-L3hKV5R/p5o81R7O02IGnwpDmkp6E982XhtbuwSe3O4qOtMMMtodicASA1Cny2U+aCXcNpml+m4dPsvsJ3jatg==
+
+arr-union@^3.1.0:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4"
+ integrity sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=
+
array-uniq@^1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/array-uniq/-/array-uniq-1.0.3.tgz#af6ac877a25cc7f74e058894753858dfdb24fdb6"
integrity sha1-r2rId6Jcx/dOBYiUdThY39sk/bY=
+array-unique@^0.3.2:
+ version "0.3.2"
+ resolved "https://registry.yarnpkg.com/array-unique/-/array-unique-0.3.2.tgz#a894b75d4bc4f6cd679ef3244a9fd8f46ae2d428"
+ integrity sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=
+
+asn1@~0.2.3:
+ version "0.2.4"
+ resolved "https://registry.yarnpkg.com/asn1/-/asn1-0.2.4.tgz#8d2475dfab553bb33e77b54e59e880bb8ce23136"
+ integrity sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==
+ dependencies:
+ safer-buffer "~2.1.0"
+
+assert-plus@1.0.0, assert-plus@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525"
+ integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=
+
+assign-symbols@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367"
+ integrity sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=
+
astral-regex@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-1.0.0.tgz#6c8c3fb827dd43ee3918f27b82782ab7658a6fd9"
integrity sha512-+Ryf6g3BKoRc7jfp7ad8tM4TtMiaWvbF/1/sQcZPkkS7ag3D5nMBCe2UfOTONtAkaG0tO0ij3C5Lwmf1EiyjHg==
+asynckit@^0.4.0:
+ version "0.4.0"
+ resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
+ integrity sha1-x57Zf380y48robyXkLzDZkdLS3k=
+
+atob@^2.1.2:
+ version "2.1.2"
+ resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9"
+ integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg==
+
+aws-sign2@~0.7.0:
+ version "0.7.0"
+ resolved "https://registry.yarnpkg.com/aws-sign2/-/aws-sign2-0.7.0.tgz#b46e890934a9591f2d2f6f86d7e6a9f1b3fe76a8"
+ integrity sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=
+
+aws4@^1.8.0:
+ version "1.10.0"
+ resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.10.0.tgz#a17b3a8ea811060e74d47d306122400ad4497ae2"
+ integrity sha512-3YDiu347mtVtjpyV3u5kVqQLP242c06zwDOgpeRnybmXlYYsLbtTrUBUm8i8srONt+FWobl5aibnU1030PeeuA==
+
+babel-jest@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-26.1.0.tgz#b20751185fc7569a0f135730584044d1cb934328"
+ integrity sha512-Nkqgtfe7j6PxLO6TnCQQlkMm8wdTdnIF8xrdpooHCuD5hXRzVEPbPneTJKknH5Dsv3L8ip9unHDAp48YQ54Dkg==
+ dependencies:
+ "@jest/transform" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ "@types/babel__core" "^7.1.7"
+ babel-plugin-istanbul "^6.0.0"
+ babel-preset-jest "^26.1.0"
+ chalk "^4.0.0"
+ graceful-fs "^4.2.4"
+ slash "^3.0.0"
+
+babel-plugin-istanbul@^6.0.0:
+ version "6.0.0"
+ resolved "https://registry.yarnpkg.com/babel-plugin-istanbul/-/babel-plugin-istanbul-6.0.0.tgz#e159ccdc9af95e0b570c75b4573b7c34d671d765"
+ integrity sha512-AF55rZXpe7trmEylbaE1Gv54wn6rwU03aptvRoVIGP8YykoSxqdVLV1TfwflBCE/QtHmqtP8SWlTENqbK8GCSQ==
+ dependencies:
+ "@babel/helper-plugin-utils" "^7.0.0"
+ "@istanbuljs/load-nyc-config" "^1.0.0"
+ "@istanbuljs/schema" "^0.1.2"
+ istanbul-lib-instrument "^4.0.0"
+ test-exclude "^6.0.0"
+
+babel-plugin-jest-hoist@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-26.1.0.tgz#c6a774da08247a28285620a64dfadbd05dd5233a"
+ integrity sha512-qhqLVkkSlqmC83bdMhM8WW4Z9tB+JkjqAqlbbohS9sJLT5Ha2vfzuKqg5yenXrAjOPG2YC0WiXdH3a9PvB+YYw==
+ dependencies:
+ "@babel/template" "^7.3.3"
+ "@babel/types" "^7.3.3"
+ "@types/babel__core" "^7.0.0"
+ "@types/babel__traverse" "^7.0.6"
+
+babel-preset-current-node-syntax@^0.1.2:
+ version "0.1.3"
+ resolved "https://registry.yarnpkg.com/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-0.1.3.tgz#b4b547acddbf963cba555ba9f9cbbb70bfd044da"
+ integrity sha512-uyexu1sVwcdFnyq9o8UQYsXwXflIh8LvrF5+cKrYam93ned1CStffB3+BEcsxGSgagoA3GEyjDqO4a/58hyPYQ==
+ dependencies:
+ "@babel/plugin-syntax-async-generators" "^7.8.4"
+ "@babel/plugin-syntax-bigint" "^7.8.3"
+ "@babel/plugin-syntax-class-properties" "^7.8.3"
+ "@babel/plugin-syntax-import-meta" "^7.8.3"
+ "@babel/plugin-syntax-json-strings" "^7.8.3"
+ "@babel/plugin-syntax-logical-assignment-operators" "^7.8.3"
+ "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.3"
+ "@babel/plugin-syntax-numeric-separator" "^7.8.3"
+ "@babel/plugin-syntax-object-rest-spread" "^7.8.3"
+ "@babel/plugin-syntax-optional-catch-binding" "^7.8.3"
+ "@babel/plugin-syntax-optional-chaining" "^7.8.3"
+
+babel-preset-jest@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-26.1.0.tgz#612f714e5b457394acfd863793c564cbcdb7d1c1"
+ integrity sha512-na9qCqFksknlEj5iSdw1ehMVR06LCCTkZLGKeEtxDDdhg8xpUF09m29Kvh1pRbZ07h7AQ5ttLYUwpXL4tO6w7w==
+ dependencies:
+ babel-plugin-jest-hoist "^26.1.0"
+ babel-preset-current-node-syntax "^0.1.2"
+
balanced-match@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767"
integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c=
+base@^0.11.1:
+ version "0.11.2"
+ resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f"
+ integrity sha512-5T6P4xPgpp0YDFvSWwEZ4NoE3aM4QBQXDzmVbraCkFj8zHM+mba8SyqB5DbZWyR7mYHo6Y7BdQo3MoA4m0TeQg==
+ dependencies:
+ cache-base "^1.0.1"
+ class-utils "^0.3.5"
+ component-emitter "^1.2.1"
+ define-property "^1.0.0"
+ isobject "^3.0.1"
+ mixin-deep "^1.2.0"
+ pascalcase "^0.1.1"
+
+bcrypt-pbkdf@^1.0.0:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz#a4301d389b6a43f9b67ff3ca11a3f6637e360e9e"
+ integrity sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=
+ dependencies:
+ tweetnacl "^0.14.3"
+
brace-expansion@^1.1.7:
version "1.1.11"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
@@ -591,11 +1316,95 @@
balanced-match "^1.0.0"
concat-map "0.0.1"
+braces@^2.3.1:
+ version "2.3.2"
+ resolved "https://registry.yarnpkg.com/braces/-/braces-2.3.2.tgz#5979fd3f14cd531565e5fa2df1abfff1dfaee729"
+ integrity sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==
+ dependencies:
+ arr-flatten "^1.1.0"
+ array-unique "^0.3.2"
+ extend-shallow "^2.0.1"
+ fill-range "^4.0.0"
+ isobject "^3.0.1"
+ repeat-element "^1.1.2"
+ snapdragon "^0.8.1"
+ snapdragon-node "^2.0.1"
+ split-string "^3.0.2"
+ to-regex "^3.0.1"
+
+braces@^3.0.1:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107"
+ integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==
+ dependencies:
+ fill-range "^7.0.1"
+
+browser-process-hrtime@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626"
+ integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
+
+bs-logger@0.x:
+ version "0.2.6"
+ resolved "https://registry.yarnpkg.com/bs-logger/-/bs-logger-0.2.6.tgz#eb7d365307a72cf974cc6cda76b68354ad336bd8"
+ integrity sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==
+ dependencies:
+ fast-json-stable-stringify "2.x"
+
+bser@2.1.1:
+ version "2.1.1"
+ resolved "https://registry.yarnpkg.com/bser/-/bser-2.1.1.tgz#e6787da20ece9d07998533cfd9de6f5c38f4bc05"
+ integrity sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==
+ dependencies:
+ node-int64 "^0.4.0"
+
+buffer-from@1.x, buffer-from@^1.0.0:
+ version "1.1.1"
+ resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef"
+ integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==
+
+cache-base@^1.0.1:
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/cache-base/-/cache-base-1.0.1.tgz#0a7f46416831c8b662ee36fe4e7c59d76f666ab2"
+ integrity sha512-AKcdTnFSWATd5/GCPRxr2ChwIJ85CeyrEyjRHlKxQ56d4XJMGym0uAiKn0xbLOGOl3+yRpOTi484dVCEc5AUzQ==
+ dependencies:
+ collection-visit "^1.0.0"
+ component-emitter "^1.2.1"
+ get-value "^2.0.6"
+ has-value "^1.0.0"
+ isobject "^3.0.1"
+ set-value "^2.0.0"
+ to-object-path "^0.3.0"
+ union-value "^1.0.0"
+ unset-value "^1.0.0"
+
callsites@^3.0.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73"
integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==
+camelcase@^5.0.0, camelcase@^5.3.1:
+ version "5.3.1"
+ resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
+ integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
+
+camelcase@^6.0.0:
+ version "6.0.0"
+ resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.0.0.tgz#5259f7c30e35e278f1bdc2a4d91230b37cad981e"
+ integrity sha512-8KMDF1Vz2gzOq54ONPJS65IvTUaB1cHJ2DMM7MbPmLZljDH1qpzzLsWdiN9pHh6qvkRVDTi/07+eNGch/oLU4w==
+
+capture-exit@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/capture-exit/-/capture-exit-2.0.0.tgz#fb953bfaebeb781f62898239dabb426d08a509a4"
+ integrity sha512-PiT/hQmTonHhl/HFGN+Lx3JJUznrVYJ3+AQsnthneZbvW7x+f08Tk7yLJTLEOUvBTbduLeeBkxEaYXUOUrRq6g==
+ dependencies:
+ rsvp "^4.8.4"
+
+caseless@~0.12.0:
+ version "0.12.0"
+ resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc"
+ integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=
+
chalk@^2.0.0, chalk@^2.1.0, chalk@^2.4.1, chalk@^2.4.2:
version "2.4.2"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
@@ -605,7 +1414,15 @@
escape-string-regexp "^1.0.5"
supports-color "^5.3.0"
-chalk@^4.1.0:
+chalk@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4"
+ integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
+ dependencies:
+ ansi-styles "^4.1.0"
+ supports-color "^7.1.0"
+
+chalk@^4.0.0, chalk@^4.1.0:
version "4.1.0"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a"
integrity sha512-qwx12AxXe2Q5xQ43Ac//I6v5aXTipYrSESdOgzrN+9XjgEpyjpKuvSGaN4qE93f7TQTlerQQ8S+EQ0EyDoVL1A==
@@ -613,11 +1430,31 @@
ansi-styles "^4.1.0"
supports-color "^7.1.0"
+char-regex@^1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/char-regex/-/char-regex-1.0.2.tgz#d744358226217f981ed58f479b1d6bcc29545dcf"
+ integrity sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==
+
chardet@^0.7.0:
version "0.7.0"
resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e"
integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==
+ci-info@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46"
+ integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==
+
+class-utils@^0.3.5:
+ version "0.3.6"
+ resolved "https://registry.yarnpkg.com/class-utils/-/class-utils-0.3.6.tgz#f93369ae8b9a7ce02fd41faad0ca83033190c463"
+ integrity sha512-qOhPa/Fj7s6TY8H8esGu5QNpMMQxz79h+urzrNYN6mn+9BnxlDGf5QZ+XeCDsxSjPqsSR56XOZOJmpeurnLMeg==
+ dependencies:
+ arr-union "^3.1.0"
+ define-property "^0.2.5"
+ isobject "^3.0.0"
+ static-extend "^0.1.1"
+
classnames@^2.2:
version "2.2.6"
resolved "https://registry.yarnpkg.com/classnames/-/classnames-2.2.6.tgz#43935bffdd291f326dad0a205309b38d00f650ce"
@@ -635,11 +1472,38 @@
resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-3.0.0.tgz#a2f48437a2caa9a22436e794bf071ec9e61cedf6"
integrity sha512-FxqpkPPwu1HjuN93Omfm4h8uIanXofW0RxVEW3k5RKx+mJJYSthzNhp32Kzxxy3YAEZ/Dc/EWN1vZRY0+kOhbw==
+cliui@^6.0.0:
+ version "6.0.0"
+ resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1"
+ integrity sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==
+ dependencies:
+ string-width "^4.2.0"
+ strip-ansi "^6.0.0"
+ wrap-ansi "^6.2.0"
+
+co@^4.6.0:
+ version "4.6.0"
+ resolved "https://registry.yarnpkg.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184"
+ integrity sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=
+
codemirror@~5.53.2:
version "5.53.2"
resolved "https://registry.yarnpkg.com/codemirror/-/codemirror-5.53.2.tgz#9799121cf8c50809cca487304e9de3a74d33f428"
integrity sha512-wvSQKS4E+P8Fxn/AQ+tQtJnF1qH5UOlxtugFLpubEZ5jcdH2iXTVinb+Xc/4QjshuOxRm4fUsU2QPF1JJKiyXA==
+collect-v8-coverage@^1.0.0:
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz#cc2c8e94fc18bbdffe64d6534570c8a673b27f59"
+ integrity sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==
+
+collection-visit@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/collection-visit/-/collection-visit-1.0.0.tgz#4bc0373c164bc3291b4d368c829cf1a80a59dca0"
+ integrity sha1-S8A3PBZLwykbTTaMgpzxqApZ3KA=
+ dependencies:
+ map-visit "^1.0.0"
+ object-visit "^1.0.0"
+
color-convert@^1.9.0:
version "1.9.3"
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
@@ -664,11 +1528,40 @@
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
+combined-stream@^1.0.6, combined-stream@~1.0.6:
+ version "1.0.8"
+ resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
+ integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
+ dependencies:
+ delayed-stream "~1.0.0"
+
+component-emitter@^1.2.1:
+ version "1.3.0"
+ resolved "https://registry.yarnpkg.com/component-emitter/-/component-emitter-1.3.0.tgz#16e4070fba8ae29b679f2215853ee181ab2eabc0"
+ integrity sha512-Rd3se6QB+sO1TwqZjscQrurpEPIfO0/yYnSin6Q/rD3mOutHvUrCAhJub3r90uNb+SESBuE0QYoB90YdfatsRg==
+
concat-map@0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
+convert-source-map@^1.4.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
+ version "1.7.0"
+ resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.7.0.tgz#17a2cb882d7f77d3490585e2ce6c524424a3a442"
+ integrity sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==
+ dependencies:
+ safe-buffer "~5.1.1"
+
+copy-descriptor@^0.1.0:
+ version "0.1.1"
+ resolved "https://registry.yarnpkg.com/copy-descriptor/-/copy-descriptor-0.1.1.tgz#676f6eb3c39997c2ee1ac3a924fd6124748f578d"
+ integrity sha1-Z29us8OZl8LuGsOpJP1hJHSPV40=
+
+core-util-is@1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
+ integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=
+
create-react-context@^0.3.0:
version "0.3.0"
resolved "https://registry.yarnpkg.com/create-react-context/-/create-react-context-0.3.0.tgz#546dede9dc422def0d3fc2fe03afe0bc0f4f7d8c"
@@ -677,7 +1570,7 @@
gud "^1.0.0"
warning "^4.0.3"
-cross-spawn@^6.0.5:
+cross-spawn@^6.0.0, cross-spawn@^6.0.5:
version "6.0.5"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==
@@ -688,6 +1581,32 @@
shebang-command "^1.2.0"
which "^1.2.9"
+cross-spawn@^7.0.0:
+ version "7.0.3"
+ resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
+ integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
+ dependencies:
+ path-key "^3.1.0"
+ shebang-command "^2.0.0"
+ which "^2.0.1"
+
+cssom@^0.4.4:
+ version "0.4.4"
+ resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.4.4.tgz#5a66cf93d2d0b661d80bf6a44fb65f5c2e4e0a10"
+ integrity sha512-p3pvU7r1MyyqbTk+WbNJIgJjG2VmTIaB10rI93LzVPrmDJKkzKYMtxxyAvQXR/NS6otuzveI7+7BBq3SjBS2mw==
+
+cssom@~0.3.6:
+ version "0.3.8"
+ resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.3.8.tgz#9f1276f5b2b463f2114d3f2c75250af8c1a36f4a"
+ integrity sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==
+
+cssstyle@^2.2.0:
+ version "2.3.0"
+ resolved "https://registry.yarnpkg.com/cssstyle/-/cssstyle-2.3.0.tgz#ff665a0ddbdc31864b09647f34163443d90b0852"
+ integrity sha512-AZL67abkUzIuvcHqk7c09cezpGNcxUxU4Ioi/05xHk4DQeTkWmGYftIE6ctU6AEt+Gn4n1lDStOtj7FKycP71A==
+ dependencies:
+ cssom "~0.3.6"
+
csstype@2.6.9:
version "2.6.9"
resolved "https://registry.yarnpkg.com/csstype/-/csstype-2.6.9.tgz#05141d0cd557a56b8891394c1911c40c8a98d098"
@@ -698,13 +1617,51 @@
resolved "https://registry.yarnpkg.com/csstype/-/csstype-2.6.11.tgz#452f4d024149ecf260a852b025e36562a253ffc5"
integrity sha512-l8YyEC9NBkSm783PFTvh0FmJy7s5pFKrDp49ZL7zBGX3fWkO+N4EEyan1qqp8cwPLDcD0OSdyY6hAMoxp34JFw==
-debug@^4.0.1, debug@^4.1.1:
+dashdash@^1.12.0:
+ version "1.14.1"
+ resolved "https://registry.yarnpkg.com/dashdash/-/dashdash-1.14.1.tgz#853cfa0f7cbe2fed5de20326b8dd581035f6e2f0"
+ integrity sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=
+ dependencies:
+ assert-plus "^1.0.0"
+
+data-urls@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/data-urls/-/data-urls-2.0.0.tgz#156485a72963a970f5d5821aaf642bef2bf2db9b"
+ integrity sha512-X5eWTSXO/BJmpdIKCRuKUgSCgAN0OwliVK3yPKbwIWU1Tdw5BRajxlzMidvh+gwko9AfQ9zIj52pzF91Q3YAvQ==
+ dependencies:
+ abab "^2.0.3"
+ whatwg-mimetype "^2.3.0"
+ whatwg-url "^8.0.0"
+
+debug@^2.2.0, debug@^2.3.3:
+ version "2.6.9"
+ resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
+ integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
+ dependencies:
+ ms "2.0.0"
+
+debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
version "4.1.1"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791"
integrity sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==
dependencies:
ms "^2.1.1"
+decamelize@^1.2.0:
+ version "1.2.0"
+ resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
+ integrity sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=
+
+decimal.js@^10.2.0:
+ version "10.2.0"
+ resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.2.0.tgz#39466113a9e036111d02f82489b5fd6b0b5ed231"
+ integrity sha512-vDPw+rDgn3bZe1+F/pyEwb1oMG2XTlRVgAa6B4KccTEpYgF8w6eQllVbQcfIJnZyvzFtFpxnpGtx8dd7DJp/Rw==
+
+decode-uri-component@^0.2.0:
+ version "0.2.0"
+ resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.0.tgz#eb3913333458775cb84cd1a1fae062106bb87545"
+ integrity sha1-6zkTMzRYd1y4TNGh+uBiEGu4dUU=
+
deep-equal@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/deep-equal/-/deep-equal-1.1.1.tgz#b5c98c942ceffaf7cb051e24e1434a25a2e6076a"
@@ -722,6 +1679,11 @@
resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
integrity sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=
+deepmerge@^4.2.2:
+ version "4.2.2"
+ resolved "https://registry.yarnpkg.com/deepmerge/-/deepmerge-4.2.2.tgz#44d2ea3679b8f4d4ffba33f03d865fc1e7bf4955"
+ integrity sha512-FJ3UgI4gIl+PHZm53knsuSFpE+nESMr7M4v9QcgB7S63Kj/6WqMiFQJpBBYz1Pt+66bZpP3Q7Lye0Oo9MPKEdg==
+
define-properties@^1.1.2, define-properties@^1.1.3:
version "1.1.3"
resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.1.3.tgz#cf88da6cbee26fe6db7094f61d870cbd84cee9f1"
@@ -729,6 +1691,48 @@
dependencies:
object-keys "^1.0.12"
+define-property@^0.2.5:
+ version "0.2.5"
+ resolved "https://registry.yarnpkg.com/define-property/-/define-property-0.2.5.tgz#c35b1ef918ec3c990f9a5bc57be04aacec5c8116"
+ integrity sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=
+ dependencies:
+ is-descriptor "^0.1.0"
+
+define-property@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/define-property/-/define-property-1.0.0.tgz#769ebaaf3f4a63aad3af9e8d304c9bbe79bfb0e6"
+ integrity sha1-dp66rz9KY6rTr56NMEybvnm/sOY=
+ dependencies:
+ is-descriptor "^1.0.0"
+
+define-property@^2.0.2:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/define-property/-/define-property-2.0.2.tgz#d459689e8d654ba77e02a817f8710d702cb16e9d"
+ integrity sha512-jwK2UV4cnPpbcG7+VRARKTZPUWowwXA8bzH5NP6ud0oeAxyYPuGZUAC7hMugpCdz4BeSZl2Dl9k66CHJ/46ZYQ==
+ dependencies:
+ is-descriptor "^1.0.2"
+ isobject "^3.0.1"
+
+delayed-stream@~1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
+ integrity sha1-3zrhmayt+31ECqrgsp4icrJOxhk=
+
+detect-newline@^3.0.0:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
+ integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
+
+diff-sequences@^25.2.6:
+ version "25.2.6"
+ resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-25.2.6.tgz#5f467c00edd35352b7bca46d7927d60e687a76dd"
+ integrity sha512-Hq8o7+6GaZeoFjtpgvRBUknSXNeJiCx7V9Fr94ZMljNiCr9n9L8H8aJqgWOQiDDGdyn29fRNcDdRVJ5fdyihfg==
+
+diff-sequences@^26.0.0:
+ version "26.0.0"
+ resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-26.0.0.tgz#0760059a5c287637b842bd7085311db7060e88a6"
+ integrity sha512-JC/eHYEC3aSS0vZGjuoc4vHA0yAQTzhQQldXMeMF+JlxLGJlCO38Gma82NV9gk1jGFz8mDzUMeaKXvjRRdJ2dg==
+
doctrine@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-3.0.0.tgz#addebead72a6574db783639dc87a121773973961"
@@ -766,6 +1770,13 @@
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.0.1.tgz#1f8bdfe91f5a78063274e803b4bdcedf6e94f94d"
integrity sha512-5HOHUDsYZWV8FGWN0Njbr/Rn7f/eWSQi1v7+HsUVwXgn8nWWlL64zKDkS0n8ZmQ3mlWOMuXOnR+7Nx/5tMO5AQ==
+domexception@^2.0.1:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/domexception/-/domexception-2.0.1.tgz#fb44aefba793e1574b0af6aed2801d057529f304"
+ integrity sha512-yxJ2mFy/sibVQlu5qHjOkf9J3K6zgmCxgJ94u2EdvDOV09H+32LtRswEcUsmUWN72pVLOEnTSRaIVVzVQgS0dg==
+ dependencies:
+ webidl-conversions "^5.0.0"
+
domhandler@^2.3.0:
version "2.4.2"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-2.4.2.tgz#8805097e933d65e85546f726d60f5eb88b44f803"
@@ -781,6 +1792,14 @@
dom-serializer "0"
domelementtype "1"
+ecc-jsbn@~0.1.1:
+ version "0.1.2"
+ resolved "https://registry.yarnpkg.com/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz#3a83a904e54353287874c564b7549386849a98c9"
+ integrity sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=
+ dependencies:
+ jsbn "~0.1.0"
+ safer-buffer "^2.1.0"
+
emoji-regex@^7.0.1:
version "7.0.3"
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-7.0.3.tgz#933a04052860c85e83c122479c4748a8e4c72156"
@@ -791,6 +1810,13 @@
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
+end-of-stream@^1.1.0:
+ version "1.4.4"
+ resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0"
+ integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==
+ dependencies:
+ once "^1.4.0"
+
entities@^1.1.1:
version "1.1.2"
resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56"
@@ -801,6 +1827,13 @@
resolved "https://registry.yarnpkg.com/entities/-/entities-2.0.3.tgz#5c487e5742ab93c15abb5da22759b8590ec03b7f"
integrity sha512-MyoZ0jgnLvB2X3Lg5HqpFmn1kybDiIfEQmKzTb5apr51Rb+T3KdmMiqa70T+bhGnyv7bQ6WMj2QMHpGMmlrUYQ==
+error-ex@^1.3.1:
+ version "1.3.2"
+ resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf"
+ integrity sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==
+ dependencies:
+ is-arrayish "^0.2.1"
+
es-abstract@^1.17.0-next.1, es-abstract@^1.17.5:
version "1.17.6"
resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.17.6.tgz#9142071707857b2cacc7b89ecb670316c3e2d52a"
@@ -832,6 +1865,23 @@
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=
+escape-string-regexp@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz#a30304e99daa32e23b2fd20f51babd07cffca344"
+ integrity sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==
+
+escodegen@^1.14.1:
+ version "1.14.3"
+ resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-1.14.3.tgz#4e7b81fba61581dc97582ed78cab7f0e8d63f503"
+ integrity sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==
+ dependencies:
+ esprima "^4.0.1"
+ estraverse "^4.2.0"
+ esutils "^2.0.2"
+ optionator "^0.8.1"
+ optionalDependencies:
+ source-map "~0.6.1"
+
eslint-config-prettier@^6.10.1:
version "6.11.0"
resolved "https://registry.yarnpkg.com/eslint-config-prettier/-/eslint-config-prettier-6.11.0.tgz#f6d2238c1290d01c859a8b5c1f7d352a0b0da8b1"
@@ -925,7 +1975,7 @@
acorn-jsx "^5.2.0"
eslint-visitor-keys "^1.1.0"
-esprima@^4.0.0:
+esprima@^4.0.0, esprima@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71"
integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==
@@ -944,7 +1994,7 @@
dependencies:
estraverse "^4.1.0"
-estraverse@^4.1.0, estraverse@^4.1.1:
+estraverse@^4.1.0, estraverse@^4.1.1, estraverse@^4.2.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d"
integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==
@@ -959,6 +2009,89 @@
resolved "https://registry.yarnpkg.com/esutils/-/esutils-2.0.3.tgz#74d2eb4de0b8da1293711910d50775b9b710ef64"
integrity sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==
+exec-sh@^0.3.2:
+ version "0.3.4"
+ resolved "https://registry.yarnpkg.com/exec-sh/-/exec-sh-0.3.4.tgz#3a018ceb526cc6f6df2bb504b2bfe8e3a4934ec5"
+ integrity sha512-sEFIkc61v75sWeOe72qyrqg2Qg0OuLESziUDk/O/z2qgS15y2gWVFrI6f2Qn/qw/0/NCfCEsmNA4zOjkwEZT1A==
+
+execa@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/execa/-/execa-1.0.0.tgz#c6236a5bb4df6d6f15e88e7f017798216749ddd8"
+ integrity sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==
+ dependencies:
+ cross-spawn "^6.0.0"
+ get-stream "^4.0.0"
+ is-stream "^1.1.0"
+ npm-run-path "^2.0.0"
+ p-finally "^1.0.0"
+ signal-exit "^3.0.0"
+ strip-eof "^1.0.0"
+
+execa@^4.0.0:
+ version "4.0.3"
+ resolved "https://registry.yarnpkg.com/execa/-/execa-4.0.3.tgz#0a34dabbad6d66100bd6f2c576c8669403f317f2"
+ integrity sha512-WFDXGHckXPWZX19t1kCsXzOpqX9LWYNqn4C+HqZlk/V0imTkzJZqf87ZBhvpHaftERYknpk0fjSylnXVlVgI0A==
+ dependencies:
+ cross-spawn "^7.0.0"
+ get-stream "^5.0.0"
+ human-signals "^1.1.1"
+ is-stream "^2.0.0"
+ merge-stream "^2.0.0"
+ npm-run-path "^4.0.0"
+ onetime "^5.1.0"
+ signal-exit "^3.0.2"
+ strip-final-newline "^2.0.0"
+
+exit@^0.1.2:
+ version "0.1.2"
+ resolved "https://registry.yarnpkg.com/exit/-/exit-0.1.2.tgz#0632638f8d877cc82107d30a0fff1a17cba1cd0c"
+ integrity sha1-BjJjj42HfMghB9MKD/8aF8uhzQw=
+
+expand-brackets@^2.1.4:
+ version "2.1.4"
+ resolved "https://registry.yarnpkg.com/expand-brackets/-/expand-brackets-2.1.4.tgz#b77735e315ce30f6b6eff0f83b04151a22449622"
+ integrity sha1-t3c14xXOMPa27/D4OwQVGiJEliI=
+ dependencies:
+ debug "^2.3.3"
+ define-property "^0.2.5"
+ extend-shallow "^2.0.1"
+ posix-character-classes "^0.1.0"
+ regex-not "^1.0.0"
+ snapdragon "^0.8.1"
+ to-regex "^3.0.1"
+
+expect@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/expect/-/expect-26.1.0.tgz#8c62e31d0f8d5a8ebb186ee81473d15dd2fbf7c8"
+ integrity sha512-QbH4LZXDsno9AACrN9eM0zfnby9G+OsdNgZUohjg/P0mLy1O+/bzTAJGT6VSIjVCe8yKM6SzEl/ckEOFBT7Vnw==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ ansi-styles "^4.0.0"
+ jest-get-type "^26.0.0"
+ jest-matcher-utils "^26.1.0"
+ jest-message-util "^26.1.0"
+ jest-regex-util "^26.0.0"
+
+extend-shallow@^2.0.1:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-2.0.1.tgz#51af7d614ad9a9f610ea1bafbb989d6b1c56890f"
+ integrity sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=
+ dependencies:
+ is-extendable "^0.1.0"
+
+extend-shallow@^3.0.0, extend-shallow@^3.0.2:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-3.0.2.tgz#26a71aaf073b39fb2127172746131c2704028db8"
+ integrity sha1-Jqcarwc7OfshJxcnRhMcJwQCjbg=
+ dependencies:
+ assign-symbols "^1.0.0"
+ is-extendable "^1.0.1"
+
+extend@~3.0.2:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
+ integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
+
external-editor@^3.0.3:
version "3.1.0"
resolved "https://registry.yarnpkg.com/external-editor/-/external-editor-3.1.0.tgz#cb03f740befae03ea4d283caed2741a83f335495"
@@ -968,6 +2101,30 @@
iconv-lite "^0.4.24"
tmp "^0.0.33"
+extglob@^2.0.4:
+ version "2.0.4"
+ resolved "https://registry.yarnpkg.com/extglob/-/extglob-2.0.4.tgz#ad00fe4dc612a9232e8718711dc5cb5ab0285543"
+ integrity sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==
+ dependencies:
+ array-unique "^0.3.2"
+ define-property "^1.0.0"
+ expand-brackets "^2.1.4"
+ extend-shallow "^2.0.1"
+ fragment-cache "^0.2.1"
+ regex-not "^1.0.0"
+ snapdragon "^0.8.1"
+ to-regex "^3.0.1"
+
+extsprintf@1.3.0:
+ version "1.3.0"
+ resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05"
+ integrity sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=
+
+extsprintf@^1.2.0:
+ version "1.4.0"
+ resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.4.0.tgz#e2689f8f356fad62cca65a3a91c5df5f9551692f"
+ integrity sha1-4mifjzVvrWLMplo6kcXfX5VRaS8=
+
fast-deep-equal@^3.1.1:
version "3.1.3"
resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525"
@@ -978,7 +2135,7 @@
resolved "https://registry.yarnpkg.com/fast-diff/-/fast-diff-1.2.0.tgz#73ee11982d86caaf7959828d519cfe927fac5f03"
integrity sha512-xJuoT5+L99XlZ8twedaRf6Ax2TgQVxvgZOYoPKqZufmJib0tL2tegPBOZb1pVNgIhlqDlA0eO0c3wBvQcmzx4w==
-fast-json-stable-stringify@^2.0.0:
+fast-json-stable-stringify@2.x, fast-json-stable-stringify@^2.0.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz#874bf69c6f404c2b5d99c481341399fd55892633"
integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==
@@ -988,6 +2145,13 @@
resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917"
integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
+fb-watchman@^2.0.0:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/fb-watchman/-/fb-watchman-2.0.1.tgz#fc84fb39d2709cf3ff6d743706157bb5708a8a85"
+ integrity sha512-DkPJKQeY6kKwmuMretBhr7G6Vodr7bFwDYTXIkfG1gjvNpaxBTQV3PbXg6bR1c1UP4jPOX0jHUbbHANL9vRjVg==
+ dependencies:
+ bser "2.1.1"
+
figures@^3.0.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/figures/-/figures-3.2.0.tgz#625c18bd293c604dc4a8ddb2febf0c88341746af"
@@ -1002,6 +2166,31 @@
dependencies:
flat-cache "^2.0.1"
+fill-range@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-4.0.0.tgz#d544811d428f98eb06a63dc402d2403c328c38f7"
+ integrity sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=
+ dependencies:
+ extend-shallow "^2.0.1"
+ is-number "^3.0.0"
+ repeat-string "^1.6.1"
+ to-regex-range "^2.1.0"
+
+fill-range@^7.0.1:
+ version "7.0.1"
+ resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40"
+ integrity sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==
+ dependencies:
+ to-regex-range "^5.0.1"
+
+find-up@^4.0.0, find-up@^4.1.0:
+ version "4.1.0"
+ resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19"
+ integrity sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==
+ dependencies:
+ locate-path "^5.0.0"
+ path-exists "^4.0.0"
+
flat-cache@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/flat-cache/-/flat-cache-2.0.1.tgz#5d296d6f04bda44a4630a301413bdbc2ec085ec0"
@@ -1016,6 +2205,32 @@
resolved "https://registry.yarnpkg.com/flatted/-/flatted-2.0.2.tgz#4575b21e2bcee7434aa9be662f4b7b5f9c2b5138"
integrity sha512-r5wGx7YeOwNWNlCA0wQ86zKyDLMQr+/RB8xy74M4hTphfmjlijTSSXGuH8rnvKZnfT9i+75zmd8jcKdMR4O6jA==
+for-in@^1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/for-in/-/for-in-1.0.2.tgz#81068d295a8142ec0ac726c6e2200c30fb6d5e80"
+ integrity sha1-gQaNKVqBQuwKxybG4iAMMPttXoA=
+
+forever-agent@~0.6.1:
+ version "0.6.1"
+ resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91"
+ integrity sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=
+
+form-data@~2.3.2:
+ version "2.3.3"
+ resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.3.3.tgz#dcce52c05f644f298c6a7ab936bd724ceffbf3a6"
+ integrity sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==
+ dependencies:
+ asynckit "^0.4.0"
+ combined-stream "^1.0.6"
+ mime-types "^2.1.12"
+
+fragment-cache@^0.2.1:
+ version "0.2.1"
+ resolved "https://registry.yarnpkg.com/fragment-cache/-/fragment-cache-0.2.1.tgz#4290fad27f13e89be7f33799c6bc5a0abfff0d19"
+ integrity sha1-QpD60n8T6Jvn8zeZxrxaCr//DRk=
+ dependencies:
+ map-cache "^0.2.2"
+
free-style@3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/free-style/-/free-style-3.1.0.tgz#4e2996029534e6b1731611d843437b9e2f473f08"
@@ -1026,6 +2241,11 @@
resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8=
+fsevents@^2.1.2:
+ version "2.1.3"
+ resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.1.3.tgz#fb738703ae8d2f9fe900c33836ddebee8b97f23e"
+ integrity sha512-Auw9a4AxqWpa9GUfj370BMPzzyncfBABW8Mab7BGWBYDj4Isgq+cDKtx0i6u9jcX9pQDnswsaaOTgTmA5pEjuQ==
+
function-bind@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
@@ -1036,11 +2256,52 @@
resolved "https://registry.yarnpkg.com/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz#1b0ab3bd553b2a0d6399d29c0e3ea0b252078327"
integrity sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=
+gensync@^1.0.0-beta.1:
+ version "1.0.0-beta.1"
+ resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.1.tgz#58f4361ff987e5ff6e1e7a210827aa371eaac269"
+ integrity sha512-r8EC6NO1sngH/zdD9fiRDLdcgnbayXah+mLgManTaIZJqEC1MZstmnox8KpnI2/fxQwrp5OpCOYWLp4rBl4Jcg==
+
+get-caller-file@^2.0.1:
+ version "2.0.5"
+ resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e"
+ integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==
+
+get-package-type@^0.1.0:
+ version "0.1.0"
+ resolved "https://registry.yarnpkg.com/get-package-type/-/get-package-type-0.1.0.tgz#8de2d803cff44df3bc6c456e6668b36c3926e11a"
+ integrity sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==
+
get-stdin@^6.0.0:
version "6.0.0"
resolved "https://registry.yarnpkg.com/get-stdin/-/get-stdin-6.0.0.tgz#9e09bf712b360ab9225e812048f71fde9c89657b"
integrity sha512-jp4tHawyV7+fkkSKyvjuLZswblUtz+SQKzSWnBbii16BuZksJlU1wuBYXY75r+duh/llF1ur6oNwi+2ZzjKZ7g==
+get-stream@^4.0.0:
+ version "4.1.0"
+ resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-4.1.0.tgz#c1b255575f3dc21d59bfc79cd3d2b46b1c3a54b5"
+ integrity sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==
+ dependencies:
+ pump "^3.0.0"
+
+get-stream@^5.0.0:
+ version "5.1.0"
+ resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.1.0.tgz#01203cdc92597f9b909067c3e656cc1f4d3c4dc9"
+ integrity sha512-EXr1FOzrzTfGeL0gQdeFEvOMm2mzMOglyiOXSTpPC+iAjAKftbr3jpCMWynogwYnM+eSj9sHGc6wjIcDvYiygw==
+ dependencies:
+ pump "^3.0.0"
+
+get-value@^2.0.3, get-value@^2.0.6:
+ version "2.0.6"
+ resolved "https://registry.yarnpkg.com/get-value/-/get-value-2.0.6.tgz#dc15ca1c672387ca76bd37ac0a395ba2042a2c28"
+ integrity sha1-3BXKHGcjh8p2vTesCjlbogQqLCg=
+
+getpass@^0.1.1:
+ version "0.1.7"
+ resolved "https://registry.yarnpkg.com/getpass/-/getpass-0.1.7.tgz#5eff8e3e684d569ae4cb2b1282604e8ba62149fa"
+ integrity sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=
+ dependencies:
+ assert-plus "^1.0.0"
+
glob-parent@^5.0.0:
version "5.1.1"
resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.1.tgz#b6c1ef417c4e5663ea498f1c45afac6916bbc229"
@@ -1048,7 +2309,7 @@
dependencies:
is-glob "^4.0.1"
-glob@^7.1.3, glob@^7.1.6:
+glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6:
version "7.1.6"
resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6"
integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==
@@ -1060,6 +2321,11 @@
once "^1.3.0"
path-is-absolute "^1.0.0"
+globals@^11.1.0:
+ version "11.12.0"
+ resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e"
+ integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==
+
globals@^12.1.0:
version "12.4.0"
resolved "https://registry.yarnpkg.com/globals/-/globals-12.4.0.tgz#a18813576a41b00a24a97e7f815918c2e19925f8"
@@ -1067,11 +2333,39 @@
dependencies:
type-fest "^0.8.1"
+graceful-fs@^4.2.4:
+ version "4.2.4"
+ resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.4.tgz#2256bde14d3632958c465ebc96dc467ca07a29fb"
+ integrity sha512-WjKPNJF79dtJAVniUlGGWHYGz2jWxT6VhN/4m1NdkbZ2nOsEF+cI1Edgql5zCRhs/VsQYRvrXctxktVXZUkixw==
+
+growly@^1.3.0:
+ version "1.3.0"
+ resolved "https://registry.yarnpkg.com/growly/-/growly-1.3.0.tgz#f10748cbe76af964b7c96c93c6bcc28af120c081"
+ integrity sha1-8QdIy+dq+WS3yWyTxrzCivEgwIE=
+
gud@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/gud/-/gud-1.0.0.tgz#a489581b17e6a70beca9abe3ae57de7a499852c0"
integrity sha512-zGEOVKFM5sVPPrYs7J5/hYEw2Pof8KCyOwyhG8sAF26mCAeUFAcYPu1mwB7hhpIP29zOIBaDqwuHdLp0jvZXjw==
+har-schema@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/har-schema/-/har-schema-2.0.0.tgz#a94c2224ebcac04782a0d9035521f24735b7ec92"
+ integrity sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=
+
+har-validator@~5.1.3:
+ version "5.1.3"
+ resolved "https://registry.yarnpkg.com/har-validator/-/har-validator-5.1.3.tgz#1ef89ebd3e4996557675eed9893110dc350fa080"
+ integrity sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==
+ dependencies:
+ ajv "^6.5.5"
+ har-schema "^2.0.0"
+
+harmony-reflect@^1.4.6:
+ version "1.6.1"
+ resolved "https://registry.yarnpkg.com/harmony-reflect/-/harmony-reflect-1.6.1.tgz#c108d4f2bb451efef7a37861fdbdae72c9bdefa9"
+ integrity sha512-WJTeyp0JzGtHcuMsi7rw2VwtkvLa+JyfEKJCFyfcS0+CDkjQ5lHPu7zEhFZP+PDSRrEgXa5Ah0l1MbgbE41XjA==
+
has-flag@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd"
@@ -1087,6 +2381,37 @@
resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.1.tgz#9f5214758a44196c406d9bd76cebf81ec2dd31e8"
integrity sha512-PLcsoqu++dmEIZB+6totNFKq/7Do+Z0u4oT0zKOJNl3lYK6vGwwu2hjHs+68OEZbTjiUE9bgOABXbP/GvrS0Kg==
+has-value@^0.3.1:
+ version "0.3.1"
+ resolved "https://registry.yarnpkg.com/has-value/-/has-value-0.3.1.tgz#7b1f58bada62ca827ec0a2078025654845995e1f"
+ integrity sha1-ex9YutpiyoJ+wKIHgCVlSEWZXh8=
+ dependencies:
+ get-value "^2.0.3"
+ has-values "^0.1.4"
+ isobject "^2.0.0"
+
+has-value@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/has-value/-/has-value-1.0.0.tgz#18b281da585b1c5c51def24c930ed29a0be6b177"
+ integrity sha1-GLKB2lhbHFxR3vJMkw7SmgvmsXc=
+ dependencies:
+ get-value "^2.0.6"
+ has-values "^1.0.0"
+ isobject "^3.0.0"
+
+has-values@^0.1.4:
+ version "0.1.4"
+ resolved "https://registry.yarnpkg.com/has-values/-/has-values-0.1.4.tgz#6d61de95d91dfca9b9a02089ad384bff8f62b771"
+ integrity sha1-bWHeldkd/Km5oCCJrThL/49it3E=
+
+has-values@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/has-values/-/has-values-1.0.0.tgz#95b0b63fec2146619a6fe57fe75628d5a39efe4f"
+ integrity sha1-lbC2P+whRmGab+V/51Yo1aOe/k8=
+ dependencies:
+ is-number "^3.0.0"
+ kind-of "^4.0.0"
+
has@^1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796"
@@ -1094,6 +2419,23 @@
dependencies:
function-bind "^1.1.1"
+hosted-git-info@^2.1.4:
+ version "2.8.8"
+ resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.8.tgz#7539bd4bc1e0e0a895815a2e0262420b12858488"
+ integrity sha512-f/wzC2QaWBs7t9IYqB4T3sR1xviIViXJRJTWBlx2Gf3g0Xi5vI7Yy4koXQ1c9OYDGHN9sBy1DQ2AB8fqZBWhUg==
+
+html-encoding-sniffer@^2.0.1:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz#42a6dc4fd33f00281176e8b23759ca4e4fa185f3"
+ integrity sha512-D5JbOMBIR/TVZkubHT+OyT2705QvogUW4IBn6nHd756OwieSF9aDYFj4dv6HHEVGYbHaLETa3WggZYWWMyy3ZQ==
+ dependencies:
+ whatwg-encoding "^1.0.5"
+
+html-escaper@^2.0.0:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
+ integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
+
htmlparser2@^3.10.0:
version "3.10.1"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.10.1.tgz#bd679dc3f59897b6a34bb10749c855bb53a9392f"
@@ -1106,13 +2448,34 @@
inherits "^2.0.1"
readable-stream "^3.1.1"
-iconv-lite@^0.4.24:
+http-signature@~1.2.0:
+ version "1.2.0"
+ resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.2.0.tgz#9aecd925114772f3d95b65a60abb8f7c18fbace1"
+ integrity sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=
+ dependencies:
+ assert-plus "^1.0.0"
+ jsprim "^1.2.2"
+ sshpk "^1.7.0"
+
+human-signals@^1.1.1:
+ version "1.1.1"
+ resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
+ integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw==
+
+iconv-lite@0.4.24, iconv-lite@^0.4.24:
version "0.4.24"
resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b"
integrity sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==
dependencies:
safer-buffer ">= 2.1.2 < 3"
+identity-obj-proxy@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/identity-obj-proxy/-/identity-obj-proxy-3.0.0.tgz#94d2bda96084453ef36fbc5aaec37e0f79f1fc14"
+ integrity sha1-lNK9qWCERT7zb7xarsN+D3nx/BQ=
+ dependencies:
+ harmony-reflect "^1.4.6"
+
ignore@^4.0.6:
version "4.0.6"
resolved "https://registry.yarnpkg.com/ignore/-/ignore-4.0.6.tgz#750e3db5862087b4737ebac8207ffd1ef27b25fc"
@@ -1126,6 +2489,14 @@
parent-module "^1.0.0"
resolve-from "^4.0.0"
+import-local@^3.0.2:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/import-local/-/import-local-3.0.2.tgz#a8cfd0431d1de4a2199703d003e3e62364fa6db6"
+ integrity sha512-vjL3+w0oulAVZ0hBHnxa/Nm5TAurf9YLQJDhqRZyqb+VKGOB6LU8t9H1Nr5CIo16vh9XfJTOoHwU0B71S557gA==
+ dependencies:
+ pkg-dir "^4.2.0"
+ resolve-cwd "^3.0.0"
+
imurmurhash@^0.1.4:
version "0.1.4"
resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea"
@@ -1163,21 +2534,106 @@
strip-ansi "^6.0.0"
through "^2.3.6"
+ip-regex@^2.1.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9"
+ integrity sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk=
+
+is-accessor-descriptor@^0.1.6:
+ version "0.1.6"
+ resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz#a9e12cb3ae8d876727eeef3843f8a0897b5c98d6"
+ integrity sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=
+ dependencies:
+ kind-of "^3.0.2"
+
+is-accessor-descriptor@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz#169c2f6d3df1f992618072365c9b0ea1f6878656"
+ integrity sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==
+ dependencies:
+ kind-of "^6.0.0"
+
is-arguments@^1.0.4:
version "1.0.4"
resolved "https://registry.yarnpkg.com/is-arguments/-/is-arguments-1.0.4.tgz#3faf966c7cba0ff437fb31f6250082fcf0448cf3"
integrity sha512-xPh0Rmt8NE65sNzvyUmWgI1tz3mKq74lGA0mL8LYZcoIzKOzDh6HmrYm3d18k60nHerC8A9Km8kYu87zfSFnLA==
+is-arrayish@^0.2.1:
+ version "0.2.1"
+ resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d"
+ integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=
+
+is-buffer@^1.1.5:
+ version "1.1.6"
+ resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be"
+ integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==
+
is-callable@^1.1.4, is-callable@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.0.tgz#83336560b54a38e35e3a2df7afd0454d691468bb"
integrity sha512-pyVD9AaGLxtg6srb2Ng6ynWJqkHU9bEM087AKck0w8QwDarTfNcpIYoU8x8Hv2Icm8u6kFJM18Dag8lyqGkviw==
+is-ci@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-2.0.0.tgz#6bc6334181810e04b5c22b3d589fdca55026404c"
+ integrity sha512-YfJT7rkpQB0updsdHLGWrvhBJfcfzNNawYDNIyQXJz0IViGf75O8EBPKSdvw2rF+LGCsX4FZ8tcr3b19LcZq4w==
+ dependencies:
+ ci-info "^2.0.0"
+
+is-data-descriptor@^0.1.4:
+ version "0.1.4"
+ resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56"
+ integrity sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=
+ dependencies:
+ kind-of "^3.0.2"
+
+is-data-descriptor@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz#d84876321d0e7add03990406abbbbd36ba9268c7"
+ integrity sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==
+ dependencies:
+ kind-of "^6.0.0"
+
is-date-object@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.2.tgz#bda736f2cd8fd06d32844e7743bfa7494c3bfd7e"
integrity sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g==
+is-descriptor@^0.1.0:
+ version "0.1.6"
+ resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-0.1.6.tgz#366d8240dde487ca51823b1ab9f07a10a78251ca"
+ integrity sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==
+ dependencies:
+ is-accessor-descriptor "^0.1.6"
+ is-data-descriptor "^0.1.4"
+ kind-of "^5.0.0"
+
+is-descriptor@^1.0.0, is-descriptor@^1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-1.0.2.tgz#3b159746a66604b04f8c81524ba365c5f14d86ec"
+ integrity sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==
+ dependencies:
+ is-accessor-descriptor "^1.0.0"
+ is-data-descriptor "^1.0.0"
+ kind-of "^6.0.2"
+
+is-docker@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.0.0.tgz#2cb0df0e75e2d064fe1864c37cdeacb7b2dcf25b"
+ integrity sha512-pJEdRugimx4fBMra5z2/5iRdZ63OhYV0vr0Dwm5+xtW4D1FvRkB8hamMIhnWfyJeDdyr/aa7BDyNbtG38VxgoQ==
+
+is-extendable@^0.1.0, is-extendable@^0.1.1:
+ version "0.1.1"
+ resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89"
+ integrity sha1-YrEQ4omkcUGOPsNqYX1HLjAd/Ik=
+
+is-extendable@^1.0.1:
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-1.0.1.tgz#a7470f9e426733d81bd81e1155264e3a3507cab4"
+ integrity sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA==
+ dependencies:
+ is-plain-object "^2.0.4"
+
is-extglob@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2"
@@ -1193,6 +2649,11 @@
resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d"
integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==
+is-generator-fn@^2.0.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/is-generator-fn/-/is-generator-fn-2.1.0.tgz#7d140adc389aaf3011a8f2a2a4cfa6faadffb118"
+ integrity sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==
+
is-glob@^4.0.0, is-glob@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.1.tgz#7567dbe9f2f5e2467bc77ab83c4a29482407a5dc"
@@ -1200,6 +2661,30 @@
dependencies:
is-extglob "^2.1.1"
+is-number@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/is-number/-/is-number-3.0.0.tgz#24fd6201a4782cf50561c810276afc7d12d71195"
+ integrity sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=
+ dependencies:
+ kind-of "^3.0.2"
+
+is-number@^7.0.0:
+ version "7.0.0"
+ resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b"
+ integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
+
+is-plain-object@^2.0.3, is-plain-object@^2.0.4:
+ version "2.0.4"
+ resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677"
+ integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==
+ dependencies:
+ isobject "^3.0.1"
+
+is-potential-custom-element-name@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.0.tgz#0c52e54bcca391bb2c494b21e8626d7336c6e397"
+ integrity sha1-DFLlS8yjkbssSUsh6GJtczbG45c=
+
is-regex@^1.0.4, is-regex@^1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.0.tgz#ece38e389e490df0dc21caea2bd596f987f767ff"
@@ -1207,6 +2692,16 @@
dependencies:
has-symbols "^1.0.1"
+is-stream@^1.1.0:
+ version "1.1.0"
+ resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-1.1.0.tgz#12d4a3dd4e68e0b79ceb8dbc84173ae80d91ca44"
+ integrity sha1-EtSj3U5o4Lec6428hBc66A2RykQ=
+
+is-stream@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.0.tgz#bde9c32680d6fae04129d6ac9d921ce7815f78e3"
+ integrity sha512-XCoy+WlUr7d1+Z8GgSuXmpuUFC9fOhRXglJMx+dwLKTkL44Cjd4W1Z5P+BQZpr+cR93aGP4S/s7Ftw6Nd/kiEw==
+
is-symbol@^1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.3.tgz#38e1014b9e6329be0de9d24a414fd7441ec61937"
@@ -1214,11 +2709,466 @@
dependencies:
has-symbols "^1.0.1"
+is-typedarray@^1.0.0, is-typedarray@~1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/is-typedarray/-/is-typedarray-1.0.0.tgz#e479c80858df0c1b11ddda6940f96011fcda4a9a"
+ integrity sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=
+
+is-windows@^1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d"
+ integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==
+
+is-wsl@^2.2.0:
+ version "2.2.0"
+ resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271"
+ integrity sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==
+ dependencies:
+ is-docker "^2.0.0"
+
+isarray@1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
+ integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=
+
isexe@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=
+isobject@^2.0.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/isobject/-/isobject-2.1.0.tgz#f065561096a3f1da2ef46272f815c840d87e0c89"
+ integrity sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=
+ dependencies:
+ isarray "1.0.0"
+
+isobject@^3.0.0, isobject@^3.0.1:
+ version "3.0.1"
+ resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df"
+ integrity sha1-TkMekrEalzFjaqH5yNHMvP2reN8=
+
+isstream@~0.1.2:
+ version "0.1.2"
+ resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
+ integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=
+
+istanbul-lib-coverage@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/istanbul-lib-coverage/-/istanbul-lib-coverage-3.0.0.tgz#f5944a37c70b550b02a78a5c3b2055b280cec8ec"
+ integrity sha512-UiUIqxMgRDET6eR+o5HbfRYP1l0hqkWOs7vNxC/mggutCMUIhWMm8gAHb8tHlyfD3/l6rlgNA5cKdDzEAf6hEg==
+
+istanbul-lib-instrument@^4.0.0, istanbul-lib-instrument@^4.0.3:
+ version "4.0.3"
+ resolved "https://registry.yarnpkg.com/istanbul-lib-instrument/-/istanbul-lib-instrument-4.0.3.tgz#873c6fff897450118222774696a3f28902d77c1d"
+ integrity sha512-BXgQl9kf4WTCPCCpmFGoJkz/+uhvm7h7PFKUYxh7qarQd3ER33vHG//qaE8eN25l07YqZPpHXU9I09l/RD5aGQ==
+ dependencies:
+ "@babel/core" "^7.7.5"
+ "@istanbuljs/schema" "^0.1.2"
+ istanbul-lib-coverage "^3.0.0"
+ semver "^6.3.0"
+
+istanbul-lib-report@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz#7518fe52ea44de372f460a76b5ecda9ffb73d8a6"
+ integrity sha512-wcdi+uAKzfiGT2abPpKZ0hSU1rGQjUQnLvtY5MpQ7QCTahD3VODhcu4wcfY1YtkGaDD5yuydOLINXsfbus9ROw==
+ dependencies:
+ istanbul-lib-coverage "^3.0.0"
+ make-dir "^3.0.0"
+ supports-color "^7.1.0"
+
+istanbul-lib-source-maps@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.0.tgz#75743ce6d96bb86dc7ee4352cf6366a23f0b1ad9"
+ integrity sha512-c16LpFRkR8vQXyHZ5nLpY35JZtzj1PQY1iZmesUbf1FZHbIupcWfjgOXBY9YHkLEQ6puz1u4Dgj6qmU/DisrZg==
+ dependencies:
+ debug "^4.1.1"
+ istanbul-lib-coverage "^3.0.0"
+ source-map "^0.6.1"
+
+istanbul-reports@^3.0.2:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.0.2.tgz#d593210e5000683750cb09fc0644e4b6e27fd53b"
+ integrity sha512-9tZvz7AiR3PEDNGiV9vIouQ/EAcqMXFmkcA1CDFTwOB98OZVDL0PH9glHotf5Ugp6GCOTypfzGWI/OqjWNCRUw==
+ dependencies:
+ html-escaper "^2.0.0"
+ istanbul-lib-report "^3.0.0"
+
+jest-changed-files@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-26.1.0.tgz#de66b0f30453bca2aff98e9400f75905da495305"
+ integrity sha512-HS5MIJp3B8t0NRKGMCZkcDUZo36mVRvrDETl81aqljT1S9tqiHRSpyoOvWg9ZilzZG9TDisDNaN1IXm54fLRZw==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ execa "^4.0.0"
+ throat "^5.0.0"
+
+jest-cli@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-26.1.0.tgz#eb9ec8a18cf3b6aa556d9deaa9e24be12b43ad87"
+ integrity sha512-Imumvjgi3rU7stq6SJ1JUEMaV5aAgJYXIs0jPqdUnF47N/Tk83EXfmtvNKQ+SnFVI6t6mDOvfM3aA9Sg6kQPSw==
+ dependencies:
+ "@jest/core" "^26.1.0"
+ "@jest/test-result" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ exit "^0.1.2"
+ graceful-fs "^4.2.4"
+ import-local "^3.0.2"
+ is-ci "^2.0.0"
+ jest-config "^26.1.0"
+ jest-util "^26.1.0"
+ jest-validate "^26.1.0"
+ prompts "^2.0.1"
+ yargs "^15.3.1"
+
+jest-config@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-26.1.0.tgz#9074f7539acc185e0113ad6d22ed589c16a37a73"
+ integrity sha512-ONTGeoMbAwGCdq4WuKkMcdMoyfs5CLzHEkzFOlVvcDXufZSaIWh/OXMLa2fwKXiOaFcqEw8qFr4VOKJQfn4CVw==
+ dependencies:
+ "@babel/core" "^7.1.0"
+ "@jest/test-sequencer" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ babel-jest "^26.1.0"
+ chalk "^4.0.0"
+ deepmerge "^4.2.2"
+ glob "^7.1.1"
+ graceful-fs "^4.2.4"
+ jest-environment-jsdom "^26.1.0"
+ jest-environment-node "^26.1.0"
+ jest-get-type "^26.0.0"
+ jest-jasmine2 "^26.1.0"
+ jest-regex-util "^26.0.0"
+ jest-resolve "^26.1.0"
+ jest-util "^26.1.0"
+ jest-validate "^26.1.0"
+ micromatch "^4.0.2"
+ pretty-format "^26.1.0"
+
+jest-diff@^25.2.1:
+ version "25.5.0"
+ resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-25.5.0.tgz#1dd26ed64f96667c068cef026b677dfa01afcfa9"
+ integrity sha512-z1kygetuPiREYdNIumRpAHY6RXiGmp70YHptjdaxTWGmA085W3iCnXNx0DhflK3vwrKmrRWyY1wUpkPMVxMK7A==
+ dependencies:
+ chalk "^3.0.0"
+ diff-sequences "^25.2.6"
+ jest-get-type "^25.2.6"
+ pretty-format "^25.5.0"
+
+jest-diff@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-26.1.0.tgz#00a549bdc936c9691eb4dc25d1fbd78bf456abb2"
+ integrity sha512-GZpIcom339y0OXznsEKjtkfKxNdg7bVbEofK8Q6MnevTIiR1jNhDWKhRX6X0SDXJlwn3dy59nZ1z55fLkAqPWg==
+ dependencies:
+ chalk "^4.0.0"
+ diff-sequences "^26.0.0"
+ jest-get-type "^26.0.0"
+ pretty-format "^26.1.0"
+
+jest-docblock@^26.0.0:
+ version "26.0.0"
+ resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-26.0.0.tgz#3e2fa20899fc928cb13bd0ff68bd3711a36889b5"
+ integrity sha512-RDZ4Iz3QbtRWycd8bUEPxQsTlYazfYn/h5R65Fc6gOfwozFhoImx+affzky/FFBuqISPTqjXomoIGJVKBWoo0w==
+ dependencies:
+ detect-newline "^3.0.0"
+
+jest-each@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-26.1.0.tgz#e35449875009a22d74d1bda183b306db20f286f7"
+ integrity sha512-lYiSo4Igr81q6QRsVQq9LIkJW0hZcKxkIkHzNeTMPENYYDw/W/Raq28iJ0sLlNFYz2qxxeLnc5K2gQoFYlu2bA==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ jest-get-type "^26.0.0"
+ jest-util "^26.1.0"
+ pretty-format "^26.1.0"
+
+jest-environment-jsdom@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-26.1.0.tgz#9dc7313ffe1b59761dad1fedb76e2503e5d37c5b"
+ integrity sha512-dWfiJ+spunVAwzXbdVqPH1LbuJW/kDL+FyqgA5YzquisHqTi0g9hquKif9xKm7c1bKBj6wbmJuDkeMCnxZEpUw==
+ dependencies:
+ "@jest/environment" "^26.1.0"
+ "@jest/fake-timers" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ jest-mock "^26.1.0"
+ jest-util "^26.1.0"
+ jsdom "^16.2.2"
+
+jest-environment-node@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-26.1.0.tgz#8bb387b3eefb132eab7826f9a808e4e05618960b"
+ integrity sha512-DNm5x1aQH0iRAe9UYAkZenuzuJ69VKzDCAYISFHQ5i9e+2Tbeu2ONGY7YStubCLH8a1wdKBgqScYw85+ySxqxg==
+ dependencies:
+ "@jest/environment" "^26.1.0"
+ "@jest/fake-timers" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ jest-mock "^26.1.0"
+ jest-util "^26.1.0"
+
+jest-get-type@^25.2.6:
+ version "25.2.6"
+ resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-25.2.6.tgz#0b0a32fab8908b44d508be81681487dbabb8d877"
+ integrity sha512-DxjtyzOHjObRM+sM1knti6or+eOgcGU4xVSb2HNP1TqO4ahsT+rqZg+nyqHWJSvWgKC5cG3QjGFBqxLghiF/Ig==
+
+jest-get-type@^26.0.0:
+ version "26.0.0"
+ resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-26.0.0.tgz#381e986a718998dbfafcd5ec05934be538db4039"
+ integrity sha512-zRc1OAPnnws1EVfykXOj19zo2EMw5Hi6HLbFCSjpuJiXtOWAYIjNsHVSbpQ8bDX7L5BGYGI8m+HmKdjHYFF0kg==
+
+jest-haste-map@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-26.1.0.tgz#ef31209be73f09b0d9445e7d213e1b53d0d1476a"
+ integrity sha512-WeBS54xCIz9twzkEdm6+vJBXgRBQfdbbXD0dk8lJh7gLihopABlJmIQFdWSDDtuDe4PRiObsjZSUjbJ1uhWEpA==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ "@types/graceful-fs" "^4.1.2"
+ anymatch "^3.0.3"
+ fb-watchman "^2.0.0"
+ graceful-fs "^4.2.4"
+ jest-serializer "^26.1.0"
+ jest-util "^26.1.0"
+ jest-worker "^26.1.0"
+ micromatch "^4.0.2"
+ sane "^4.0.3"
+ walker "^1.0.7"
+ which "^2.0.2"
+ optionalDependencies:
+ fsevents "^2.1.2"
+
+jest-jasmine2@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-26.1.0.tgz#4dfe349b2b2d3c6b3a27c024fd4cb57ac0ed4b6f"
+ integrity sha512-1IPtoDKOAG+MeBrKvvuxxGPJb35MTTRSDglNdWWCndCB3TIVzbLThRBkwH9P081vXLgiJHZY8Bz3yzFS803xqQ==
+ dependencies:
+ "@babel/traverse" "^7.1.0"
+ "@jest/environment" "^26.1.0"
+ "@jest/source-map" "^26.1.0"
+ "@jest/test-result" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ co "^4.6.0"
+ expect "^26.1.0"
+ is-generator-fn "^2.0.0"
+ jest-each "^26.1.0"
+ jest-matcher-utils "^26.1.0"
+ jest-message-util "^26.1.0"
+ jest-runtime "^26.1.0"
+ jest-snapshot "^26.1.0"
+ jest-util "^26.1.0"
+ pretty-format "^26.1.0"
+ throat "^5.0.0"
+
+jest-leak-detector@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-26.1.0.tgz#039c3a07ebcd8adfa984b6ac015752c35792e0a6"
+ integrity sha512-dsMnKF+4BVOZwvQDlgn3MG+Ns4JuLv8jNvXH56bgqrrboyCbI1rQg6EI5rs+8IYagVcfVP2yZFKfWNZy0rK0Hw==
+ dependencies:
+ jest-get-type "^26.0.0"
+ pretty-format "^26.1.0"
+
+jest-matcher-utils@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-26.1.0.tgz#cf75a41bd413dda784f022de5a65a2a5c73a5c92"
+ integrity sha512-PW9JtItbYvES/xLn5mYxjMd+Rk+/kIt88EfH3N7w9KeOrHWaHrdYPnVHndGbsFGRJ2d5gKtwggCvkqbFDoouQA==
+ dependencies:
+ chalk "^4.0.0"
+ jest-diff "^26.1.0"
+ jest-get-type "^26.0.0"
+ pretty-format "^26.1.0"
+
+jest-message-util@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-26.1.0.tgz#52573fbb8f5cea443c4d1747804d7a238a3e233c"
+ integrity sha512-dY0+UlldiAJwNDJ08SF0HdF32g9PkbF2NRK/+2iMPU40O6q+iSn1lgog/u0UH8ksWoPv0+gNq8cjhYO2MFtT0g==
+ dependencies:
+ "@babel/code-frame" "^7.0.0"
+ "@jest/types" "^26.1.0"
+ "@types/stack-utils" "^1.0.1"
+ chalk "^4.0.0"
+ graceful-fs "^4.2.4"
+ micromatch "^4.0.2"
+ slash "^3.0.0"
+ stack-utils "^2.0.2"
+
+jest-mock@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-26.1.0.tgz#80d8286da1f05a345fbad1bfd6fa49a899465d3d"
+ integrity sha512-1Rm8EIJ3ZFA8yCIie92UbxZWj9SuVmUGcyhLHyAhY6WI3NIct38nVcfOPWhJteqSn8V8e3xOMha9Ojfazfpovw==
+ dependencies:
+ "@jest/types" "^26.1.0"
+
+jest-pnp-resolver@^1.2.1:
+ version "1.2.2"
+ resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.2.tgz#b704ac0ae028a89108a4d040b3f919dfddc8e33c"
+ integrity sha512-olV41bKSMm8BdnuMsewT4jqlZ8+3TCARAXjZGT9jcoSnrfUnRCqnMoF9XEeoWjbzObpqF9dRhHQj0Xb9QdF6/w==
+
+jest-regex-util@^26.0.0:
+ version "26.0.0"
+ resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-26.0.0.tgz#d25e7184b36e39fd466c3bc41be0971e821fee28"
+ integrity sha512-Gv3ZIs/nA48/Zvjrl34bf+oD76JHiGDUxNOVgUjh3j890sblXryjY4rss71fPtD/njchl6PSE2hIhvyWa1eT0A==
+
+jest-resolve-dependencies@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-26.1.0.tgz#1ce36472f864a5dadf7dc82fa158e1c77955691b"
+ integrity sha512-fQVEPHHQ1JjHRDxzlLU/buuQ9om+hqW6Vo928aa4b4yvq4ZHBtRSDsLdKQLuCqn5CkTVpYZ7ARh2fbA8WkRE6g==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ jest-regex-util "^26.0.0"
+ jest-snapshot "^26.1.0"
+
+jest-resolve@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-26.1.0.tgz#a530eaa302b1f6fa0479079d1561dd69abc00e68"
+ integrity sha512-KsY1JV9FeVgEmwIISbZZN83RNGJ1CC+XUCikf/ZWJBX/tO4a4NvA21YixokhdR9UnmPKKAC4LafVixJBrwlmfg==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ graceful-fs "^4.2.4"
+ jest-pnp-resolver "^1.2.1"
+ jest-util "^26.1.0"
+ read-pkg-up "^7.0.1"
+ resolve "^1.17.0"
+ slash "^3.0.0"
+
+jest-runner@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-26.1.0.tgz#457f7fc522afe46ca6db1dccf19f87f500b3288d"
+ integrity sha512-elvP7y0fVDREnfqit0zAxiXkDRSw6dgCkzPCf1XvIMnSDZ8yogmSKJf192dpOgnUVykmQXwYYJnCx641uLTgcw==
+ dependencies:
+ "@jest/console" "^26.1.0"
+ "@jest/environment" "^26.1.0"
+ "@jest/test-result" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ exit "^0.1.2"
+ graceful-fs "^4.2.4"
+ jest-config "^26.1.0"
+ jest-docblock "^26.0.0"
+ jest-haste-map "^26.1.0"
+ jest-jasmine2 "^26.1.0"
+ jest-leak-detector "^26.1.0"
+ jest-message-util "^26.1.0"
+ jest-resolve "^26.1.0"
+ jest-runtime "^26.1.0"
+ jest-util "^26.1.0"
+ jest-worker "^26.1.0"
+ source-map-support "^0.5.6"
+ throat "^5.0.0"
+
+jest-runtime@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-26.1.0.tgz#45a37af42115f123ed5c51f126c05502da2469cb"
+ integrity sha512-1qiYN+EZLmG1QV2wdEBRf+Ci8i3VSfIYLF02U18PiUDrMbhfpN/EAMMkJtT02jgJUoaEOpHAIXG6zS3QRMzRmA==
+ dependencies:
+ "@jest/console" "^26.1.0"
+ "@jest/environment" "^26.1.0"
+ "@jest/fake-timers" "^26.1.0"
+ "@jest/globals" "^26.1.0"
+ "@jest/source-map" "^26.1.0"
+ "@jest/test-result" "^26.1.0"
+ "@jest/transform" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ "@types/yargs" "^15.0.0"
+ chalk "^4.0.0"
+ collect-v8-coverage "^1.0.0"
+ exit "^0.1.2"
+ glob "^7.1.3"
+ graceful-fs "^4.2.4"
+ jest-config "^26.1.0"
+ jest-haste-map "^26.1.0"
+ jest-message-util "^26.1.0"
+ jest-mock "^26.1.0"
+ jest-regex-util "^26.0.0"
+ jest-resolve "^26.1.0"
+ jest-snapshot "^26.1.0"
+ jest-util "^26.1.0"
+ jest-validate "^26.1.0"
+ slash "^3.0.0"
+ strip-bom "^4.0.0"
+ yargs "^15.3.1"
+
+jest-serializer@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-26.1.0.tgz#72a394531fc9b08e173dc7d297440ac610d95022"
+ integrity sha512-eqZOQG/0+MHmr25b2Z86g7+Kzd5dG9dhCiUoyUNJPgiqi38DqbDEOlHcNijyfZoj74soGBohKBZuJFS18YTJ5w==
+ dependencies:
+ graceful-fs "^4.2.4"
+
+jest-snapshot@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-26.1.0.tgz#c36ed1e0334bd7bd2fe5ad07e93a364ead7e1349"
+ integrity sha512-YhSbU7eMTVQO/iRbNs8j0mKRxGp4plo7sJ3GzOQ0IYjvsBiwg0T1o0zGQAYepza7lYHuPTrG5J2yDd0CE2YxSw==
+ dependencies:
+ "@babel/types" "^7.0.0"
+ "@jest/types" "^26.1.0"
+ "@types/prettier" "^2.0.0"
+ chalk "^4.0.0"
+ expect "^26.1.0"
+ graceful-fs "^4.2.4"
+ jest-diff "^26.1.0"
+ jest-get-type "^26.0.0"
+ jest-haste-map "^26.1.0"
+ jest-matcher-utils "^26.1.0"
+ jest-message-util "^26.1.0"
+ jest-resolve "^26.1.0"
+ natural-compare "^1.4.0"
+ pretty-format "^26.1.0"
+ semver "^7.3.2"
+
+jest-util@26.x, jest-util@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.1.0.tgz#80e85d4ba820decacf41a691c2042d5276e5d8d8"
+ integrity sha512-rNMOwFQevljfNGvbzNQAxdmXQ+NawW/J72dmddsK0E8vgxXCMtwQ/EH0BiWEIxh0hhMcTsxwAxINt7Lh46Uzbg==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ chalk "^4.0.0"
+ graceful-fs "^4.2.4"
+ is-ci "^2.0.0"
+ micromatch "^4.0.2"
+
+jest-validate@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-26.1.0.tgz#942c85ad3d60f78250c488a7f85d8f11a29788e7"
+ integrity sha512-WPApOOnXsiwhZtmkDsxnpye+XLb/tUISP+H6cHjfUIXvlG+eKwP+isnivsxlHCPaO9Q5wvbhloIBkdF3qUn+Nw==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ camelcase "^6.0.0"
+ chalk "^4.0.0"
+ jest-get-type "^26.0.0"
+ leven "^3.1.0"
+ pretty-format "^26.1.0"
+
+jest-watcher@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-26.1.0.tgz#99812a0cd931f0cb3d153180426135ab83e4d8f2"
+ integrity sha512-ffEOhJl2EvAIki613oPsSG11usqnGUzIiK7MMX6hE4422aXOcVEG3ySCTDFLn1+LZNXGPE8tuJxhp8OBJ1pgzQ==
+ dependencies:
+ "@jest/test-result" "^26.1.0"
+ "@jest/types" "^26.1.0"
+ ansi-escapes "^4.2.1"
+ chalk "^4.0.0"
+ jest-util "^26.1.0"
+ string-length "^4.0.1"
+
+jest-worker@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-26.1.0.tgz#65d5641af74e08ccd561c240e7db61284f82f33d"
+ integrity sha512-Z9P5pZ6UC+kakMbNJn+tA2RdVdNX5WH1x+5UCBZ9MxIK24pjYtFt96fK+UwBTrjLYm232g1xz0L3eTh51OW+yQ==
+ dependencies:
+ merge-stream "^2.0.0"
+ supports-color "^7.0.0"
+
+jest@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/jest/-/jest-26.1.0.tgz#2f3aa7bcffb9bfd025473f83bbbf46a3af026263"
+ integrity sha512-LIti8jppw5BcQvmNJe4w2g1N/3V68HUfAv9zDVm7v+VAtQulGhH0LnmmiVkbNE4M4I43Bj2fXPiBGKt26k9tHw==
+ dependencies:
+ "@jest/core" "^26.1.0"
+ import-local "^3.0.2"
+ jest-cli "^26.1.0"
+
"js-tokens@^3.0.0 || ^4.0.0", js-tokens@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"
@@ -1232,23 +3182,124 @@
argparse "^1.0.7"
esprima "^4.0.0"
+jsbn@~0.1.0:
+ version "0.1.1"
+ resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
+ integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM=
+
+jsdom@^16.2.2:
+ version "16.3.0"
+ resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-16.3.0.tgz#75690b7dac36c67be49c336dcd7219bbbed0810c"
+ integrity sha512-zggeX5UuEknpdZzv15+MS1dPYG0J/TftiiNunOeNxSl3qr8Z6cIlQpN0IdJa44z9aFxZRIVqRncvEhQ7X5DtZg==
+ dependencies:
+ abab "^2.0.3"
+ acorn "^7.1.1"
+ acorn-globals "^6.0.0"
+ cssom "^0.4.4"
+ cssstyle "^2.2.0"
+ data-urls "^2.0.0"
+ decimal.js "^10.2.0"
+ domexception "^2.0.1"
+ escodegen "^1.14.1"
+ html-encoding-sniffer "^2.0.1"
+ is-potential-custom-element-name "^1.0.0"
+ nwsapi "^2.2.0"
+ parse5 "5.1.1"
+ request "^2.88.2"
+ request-promise-native "^1.0.8"
+ saxes "^5.0.0"
+ symbol-tree "^3.2.4"
+ tough-cookie "^3.0.1"
+ w3c-hr-time "^1.0.2"
+ w3c-xmlserializer "^2.0.0"
+ webidl-conversions "^6.1.0"
+ whatwg-encoding "^1.0.5"
+ whatwg-mimetype "^2.3.0"
+ whatwg-url "^8.0.0"
+ ws "^7.2.3"
+ xml-name-validator "^3.0.0"
+
+jsesc@^2.5.1:
+ version "2.5.2"
+ resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-2.5.2.tgz#80564d2e483dacf6e8ef209650a67df3f0c283a4"
+ integrity sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==
+
+json-parse-better-errors@^1.0.1:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9"
+ integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==
+
json-schema-traverse@^0.4.1:
version "0.4.1"
resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660"
integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==
+json-schema@0.2.3:
+ version "0.2.3"
+ resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.2.3.tgz#b480c892e59a2f05954ce727bd3f2a4e882f9e13"
+ integrity sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=
+
json-stable-stringify-without-jsonify@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651"
integrity sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=
-json5@^2.1.1:
+json-stringify-safe@~5.0.1:
+ version "5.0.1"
+ resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
+ integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=
+
+json5@2.x, json5@^2.1.1, json5@^2.1.2:
version "2.1.3"
resolved "https://registry.yarnpkg.com/json5/-/json5-2.1.3.tgz#c9b0f7fa9233bfe5807fe66fcf3a5617ed597d43"
integrity sha512-KXPvOm8K9IJKFM0bmdn8QXh7udDh1g/giieX0NLCaMnb4hEiVFqnop2ImTXCc5e0/oHz3LTqmHGtExn5hfMkOA==
dependencies:
minimist "^1.2.5"
+jsprim@^1.2.2:
+ version "1.4.1"
+ resolved "https://registry.yarnpkg.com/jsprim/-/jsprim-1.4.1.tgz#313e66bc1e5cc06e438bc1b7499c2e5c56acb6a2"
+ integrity sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=
+ dependencies:
+ assert-plus "1.0.0"
+ extsprintf "1.3.0"
+ json-schema "0.2.3"
+ verror "1.10.0"
+
+kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0:
+ version "3.2.2"
+ resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64"
+ integrity sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=
+ dependencies:
+ is-buffer "^1.1.5"
+
+kind-of@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-4.0.0.tgz#20813df3d712928b207378691a45066fae72dd57"
+ integrity sha1-IIE989cSkosgc3hpGkUGb65y3Vc=
+ dependencies:
+ is-buffer "^1.1.5"
+
+kind-of@^5.0.0:
+ version "5.1.0"
+ resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-5.1.0.tgz#729c91e2d857b7a419a1f9aa65685c4c33f5845d"
+ integrity sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==
+
+kind-of@^6.0.0, kind-of@^6.0.2:
+ version "6.0.3"
+ resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd"
+ integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==
+
+kleur@^3.0.3:
+ version "3.0.3"
+ resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e"
+ integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==
+
+leven@^3.1.0:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2"
+ integrity sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==
+
levn@^0.3.0, levn@~0.3.0:
version "0.3.0"
resolved "https://registry.yarnpkg.com/levn/-/levn-0.3.0.tgz#3b09924edf9f083c0490fdd4c0bc4421e04764ee"
@@ -1257,6 +3308,18 @@
prelude-ls "~1.1.2"
type-check "~0.3.2"
+lines-and-columns@^1.1.6:
+ version "1.1.6"
+ resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.1.6.tgz#1c00c743b433cd0a4e80758f7b64a57440d9ff00"
+ integrity sha1-HADHQ7QzzQpOgHWPe2SldEDZ/wA=
+
+locate-path@^5.0.0:
+ version "5.0.0"
+ resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0"
+ integrity sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==
+ dependencies:
+ p-locate "^4.1.0"
+
lodash.clonedeep@^4.5.0:
version "4.5.0"
resolved "https://registry.yarnpkg.com/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz#e23f3f9c4f8fbdde872529c1071857a086e5ccef"
@@ -1282,12 +3345,22 @@
resolved "https://registry.yarnpkg.com/lodash.isstring/-/lodash.isstring-4.0.1.tgz#d527dfb5456eca7cc9bb95d5daeaf88ba54a5451"
integrity sha1-1SfftUVuynzJu5XV2ur4i6VKVFE=
+lodash.memoize@4.x:
+ version "4.1.2"
+ resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-4.1.2.tgz#bcc6c49a42a2840ed997f323eada5ecd182e0bfe"
+ integrity sha1-vMbEmkKihA7Zl/Mj6tpezRguC/4=
+
lodash.mergewith@^4.6.1:
version "4.6.2"
resolved "https://registry.yarnpkg.com/lodash.mergewith/-/lodash.mergewith-4.6.2.tgz#617121f89ac55f59047c7aec1ccd6654c6590f55"
integrity sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ==
-lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.16:
+lodash.sortby@^4.7.0:
+ version "4.7.0"
+ resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438"
+ integrity sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=
+
+lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.16, lodash@^4.17.19:
version "4.17.19"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.19.tgz#e48ddedbe30b3321783c5b4301fbd353bc1e4a4b"
integrity sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ==
@@ -1299,11 +3372,86 @@
dependencies:
js-tokens "^3.0.0 || ^4.0.0"
+make-dir@^3.0.0:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f"
+ integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==
+ dependencies:
+ semver "^6.0.0"
+
+make-error@1.x:
+ version "1.3.6"
+ resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2"
+ integrity sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==
+
+makeerror@1.0.x:
+ version "1.0.11"
+ resolved "https://registry.yarnpkg.com/makeerror/-/makeerror-1.0.11.tgz#e01a5c9109f2af79660e4e8b9587790184f5a96c"
+ integrity sha1-4BpckQnyr3lmDk6LlYd5AYT1qWw=
+ dependencies:
+ tmpl "1.0.x"
+
+map-cache@^0.2.2:
+ version "0.2.2"
+ resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf"
+ integrity sha1-wyq9C9ZSXZsFFkW7TyasXcmKDb8=
+
+map-visit@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/map-visit/-/map-visit-1.0.0.tgz#ecdca8f13144e660f1b5bd41f12f3479d98dfb8f"
+ integrity sha1-7Nyo8TFE5mDxtb1B8S80edmN+48=
+ dependencies:
+ object-visit "^1.0.0"
+
marked@^0.8.0:
version "0.8.2"
resolved "https://registry.yarnpkg.com/marked/-/marked-0.8.2.tgz#4faad28d26ede351a7a1aaa5fec67915c869e355"
integrity sha512-EGwzEeCcLniFX51DhTpmTom+dSA/MG/OBUDjnWtHbEnjAH180VzUeAw+oE4+Zv+CoYBWyRlYOTR0N8SO9R1PVw==
+merge-stream@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60"
+ integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
+
+micromatch@^3.1.4:
+ version "3.1.10"
+ resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-3.1.10.tgz#70859bc95c9840952f359a068a3fc49f9ecfac23"
+ integrity sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==
+ dependencies:
+ arr-diff "^4.0.0"
+ array-unique "^0.3.2"
+ braces "^2.3.1"
+ define-property "^2.0.2"
+ extend-shallow "^3.0.2"
+ extglob "^2.0.4"
+ fragment-cache "^0.2.1"
+ kind-of "^6.0.2"
+ nanomatch "^1.2.9"
+ object.pick "^1.3.0"
+ regex-not "^1.0.0"
+ snapdragon "^0.8.1"
+ to-regex "^3.0.2"
+
+micromatch@^4.0.2:
+ version "4.0.2"
+ resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.2.tgz#4fcb0999bf9fbc2fcbdd212f6d629b9a56c39259"
+ integrity sha512-y7FpHSbMUMoyPbYUSzO6PaZ6FyRnQOpHuKwbo1G+Knck95XVU4QAiKdGEnj5wwoS7PlOgthX/09u5iFJ+aYf5Q==
+ dependencies:
+ braces "^3.0.1"
+ picomatch "^2.0.5"
+
+mime-db@1.44.0:
+ version "1.44.0"
+ resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.44.0.tgz#fa11c5eb0aca1334b4233cb4d52f10c5a6272f92"
+ integrity sha512-/NOTfLrsPBVeH7YtFPgsVWveuL+4SjjYxaQ1xtM1KMFj7HdxlBlxeyNLzhyJVx7r4rZGJAZ/6lkKCitSc/Nmpg==
+
+mime-types@^2.1.12, mime-types@~2.1.19:
+ version "2.1.27"
+ resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.27.tgz#47949f98e279ea53119f5722e0f34e529bec009f"
+ integrity sha512-JIhqnCasI9yD+SsmkquHBxTSEuZdQX5BuQnS2Vc7puQQQ+8yiP5AY5uWhpdv4YL4VM5c6iliiYWPgJ/nJQLp7w==
+ dependencies:
+ mime-db "1.44.0"
+
mimic-fn@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
@@ -1316,11 +3464,24 @@
dependencies:
brace-expansion "^1.1.7"
-minimist@^1.2.5, minimist@~1.2.0:
+minimist@^1.1.1, minimist@^1.2.0, minimist@^1.2.5, minimist@~1.2.0:
version "1.2.5"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
+mixin-deep@^1.2.0:
+ version "1.3.2"
+ resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566"
+ integrity sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA==
+ dependencies:
+ for-in "^1.0.2"
+ is-extendable "^1.0.1"
+
+mkdirp@1.x:
+ version "1.0.4"
+ resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
+ integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
+
mkdirp@^0.5.1:
version "0.5.5"
resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def"
@@ -1333,6 +3494,11 @@
resolved "https://registry.yarnpkg.com/moment/-/moment-2.27.0.tgz#8bff4e3e26a236220dfe3e36de756b6ebaa0105d"
integrity sha512-al0MUK7cpIcglMv3YF13qSgdAIqxHTO7brRtaz3DlSULbqfazqkc5kEjNrLDOM7fsjshoFIihnU8snrP7zUvhQ==
+ms@2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
+ integrity sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=
+
ms@^2.1.1:
version "2.1.2"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
@@ -1343,6 +3509,23 @@
resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-0.0.8.tgz#1630c42b2251ff81e2a283de96a5497ea92e5e0d"
integrity sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==
+nanomatch@^1.2.9:
+ version "1.2.13"
+ resolved "https://registry.yarnpkg.com/nanomatch/-/nanomatch-1.2.13.tgz#b87a8aa4fc0de8fe6be88895b38983ff265bd119"
+ integrity sha512-fpoe2T0RbHwBTBUOftAfBPaDEi06ufaUai0mE6Yn1kacc3SnTErfb/h+X94VXzI64rKFHYImXSvdwGGCmwOqCA==
+ dependencies:
+ arr-diff "^4.0.0"
+ array-unique "^0.3.2"
+ define-property "^2.0.2"
+ extend-shallow "^3.0.2"
+ fragment-cache "^0.2.1"
+ is-windows "^1.0.2"
+ kind-of "^6.0.2"
+ object.pick "^1.3.0"
+ regex-not "^1.0.0"
+ snapdragon "^0.8.1"
+ to-regex "^3.0.1"
+
natural-compare@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
@@ -1358,21 +3541,98 @@
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.0.tgz#e633456386d4aa55863f676a7ab0daa8fdecb0fd"
integrity sha512-8dG4H5ujfvFiqDmVu9fQ5bOHUC15JMjMY/Zumv26oOvvVJjM67KF8koCWIabKQ1GJIa9r2mMZscBq/TbdOcmNA==
+node-int64@^0.4.0:
+ version "0.4.0"
+ resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
+ integrity sha1-h6kGXNs1XTGC2PlM4RGIuCXGijs=
+
+node-modules-regexp@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/node-modules-regexp/-/node-modules-regexp-1.0.0.tgz#8d9dbe28964a4ac5712e9131642107c71e90ec40"
+ integrity sha1-jZ2+KJZKSsVxLpExZCEHxx6Q7EA=
+
+node-notifier@^7.0.0:
+ version "7.0.2"
+ resolved "https://registry.yarnpkg.com/node-notifier/-/node-notifier-7.0.2.tgz#3a70b1b70aca5e919d0b1b022530697466d9c675"
+ integrity sha512-ux+n4hPVETuTL8+daJXTOC6uKLgMsl1RYfFv7DKRzyvzBapqco0rZZ9g72ZN8VS6V+gvNYHYa/ofcCY8fkJWsA==
+ dependencies:
+ growly "^1.3.0"
+ is-wsl "^2.2.0"
+ semver "^7.3.2"
+ shellwords "^0.1.1"
+ uuid "^8.2.0"
+ which "^2.0.2"
+
+normalize-package-data@^2.5.0:
+ version "2.5.0"
+ resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8"
+ integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==
+ dependencies:
+ hosted-git-info "^2.1.4"
+ resolve "^1.10.0"
+ semver "2 || 3 || 4 || 5"
+ validate-npm-package-license "^3.0.1"
+
+normalize-path@^2.1.1:
+ version "2.1.1"
+ resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-2.1.1.tgz#1ab28b556e198363a8c1a6f7e6fa20137fe6aed9"
+ integrity sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=
+ dependencies:
+ remove-trailing-separator "^1.0.1"
+
+normalize-path@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
+ integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
+
normalize.css@^8.0.1:
version "8.0.1"
resolved "https://registry.yarnpkg.com/normalize.css/-/normalize.css-8.0.1.tgz#9b98a208738b9cc2634caacbc42d131c97487bf3"
integrity sha512-qizSNPO93t1YUuUhP22btGOo3chcvDFqFaj2TRybP0DMxkHOCTYwp3n34fel4a31ORXy4m1Xq0Gyqpb5m33qIg==
+npm-run-path@^2.0.0:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-2.0.2.tgz#35a9232dfa35d7067b4cb2ddf2357b1871536c5f"
+ integrity sha1-NakjLfo11wZ7TLLd8jV7GHFTbF8=
+ dependencies:
+ path-key "^2.0.0"
+
+npm-run-path@^4.0.0:
+ version "4.0.1"
+ resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea"
+ integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==
+ dependencies:
+ path-key "^3.0.0"
+
number-is-nan@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/number-is-nan/-/number-is-nan-1.0.1.tgz#097b602b53422a522c1afb8790318336941a011d"
integrity sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=
+nwsapi@^2.2.0:
+ version "2.2.0"
+ resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.2.0.tgz#204879a9e3d068ff2a55139c2c772780681a38b7"
+ integrity sha512-h2AatdwYH+JHiZpv7pt/gSX1XoRGb7L/qSIeuqA6GwYoF9w1vP1cw42TO0aI2pNyshRK5893hNSl+1//vHK7hQ==
+
+oauth-sign@~0.9.0:
+ version "0.9.0"
+ resolved "https://registry.yarnpkg.com/oauth-sign/-/oauth-sign-0.9.0.tgz#47a7b016baa68b5fa0ecf3dee08a85c679ac6455"
+ integrity sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==
+
object-assign@^4.1.1:
version "4.1.1"
resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=
+object-copy@^0.1.0:
+ version "0.1.0"
+ resolved "https://registry.yarnpkg.com/object-copy/-/object-copy-0.1.0.tgz#7e7d858b781bd7c991a41ba975ed3812754e998c"
+ integrity sha1-fn2Fi3gb18mRpBupde04EnVOmYw=
+ dependencies:
+ copy-descriptor "^0.1.0"
+ define-property "^0.2.5"
+ kind-of "^3.0.3"
+
object-inspect@^1.7.0:
version "1.8.0"
resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.8.0.tgz#df807e5ecf53a609cc6bfe93eac3cc7be5b3a9d0"
@@ -1391,6 +3651,13 @@
resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e"
integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==
+object-visit@^1.0.0:
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/object-visit/-/object-visit-1.0.1.tgz#f79c4493af0c5377b59fe39d395e41042dd045bb"
+ integrity sha1-95xEk68MU3e1n+OdOV5BBC3QRbs=
+ dependencies:
+ isobject "^3.0.0"
+
object.assign@^4.1.0:
version "4.1.0"
resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.0.tgz#968bf1100d7956bb3ca086f006f846b3bc4008da"
@@ -1401,7 +3668,14 @@
has-symbols "^1.0.0"
object-keys "^1.0.11"
-once@^1.3.0:
+object.pick@^1.3.0:
+ version "1.3.0"
+ resolved "https://registry.yarnpkg.com/object.pick/-/object.pick-1.3.0.tgz#87a10ac4c1694bd2e1cbf53591a66141fb5dd747"
+ integrity sha1-h6EKxMFpS9Lhy/U1kaZhQftd10c=
+ dependencies:
+ isobject "^3.0.1"
+
+once@^1.3.0, once@^1.3.1, once@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E=
@@ -1415,7 +3689,7 @@
dependencies:
mimic-fn "^2.1.0"
-optionator@^0.8.3:
+optionator@^0.8.1, optionator@^0.8.3:
version "0.8.3"
resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495"
integrity sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==
@@ -1432,6 +3706,35 @@
resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274"
integrity sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=
+p-each-series@^2.1.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/p-each-series/-/p-each-series-2.1.0.tgz#961c8dd3f195ea96c747e636b262b800a6b1af48"
+ integrity sha512-ZuRs1miPT4HrjFa+9fRfOFXxGJfORgelKV9f9nNOWw2gl6gVsRaVDOQP0+MI0G0wGKns1Yacsu0GjOFbTK0JFQ==
+
+p-finally@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/p-finally/-/p-finally-1.0.0.tgz#3fbcfb15b899a44123b34b6dcc18b724336a2cae"
+ integrity sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=
+
+p-limit@^2.2.0:
+ version "2.3.0"
+ resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1"
+ integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==
+ dependencies:
+ p-try "^2.0.0"
+
+p-locate@^4.1.0:
+ version "4.1.0"
+ resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07"
+ integrity sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==
+ dependencies:
+ p-limit "^2.2.0"
+
+p-try@^2.0.0:
+ version "2.2.0"
+ resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6"
+ integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==
+
parent-module@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2"
@@ -1439,26 +3742,90 @@
dependencies:
callsites "^3.0.0"
+parse-json@^5.0.0:
+ version "5.0.1"
+ resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.0.1.tgz#7cfe35c1ccd641bce3981467e6c2ece61b3b3878"
+ integrity sha512-ztoZ4/DYeXQq4E21v169sC8qWINGpcosGv9XhTDvg9/hWvx/zrFkc9BiWxR58OJLHGk28j5BL0SDLeV2WmFZlQ==
+ dependencies:
+ "@babel/code-frame" "^7.0.0"
+ error-ex "^1.3.1"
+ json-parse-better-errors "^1.0.1"
+ lines-and-columns "^1.1.6"
+
+parse5@5.1.1:
+ version "5.1.1"
+ resolved "https://registry.yarnpkg.com/parse5/-/parse5-5.1.1.tgz#f68e4e5ba1852ac2cadc00f4555fff6c2abb6178"
+ integrity sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==
+
+pascalcase@^0.1.1:
+ version "0.1.1"
+ resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14"
+ integrity sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ=
+
+path-exists@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3"
+ integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==
+
path-is-absolute@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
-path-key@^2.0.1:
+path-key@^2.0.0, path-key@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40"
integrity sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=
+path-key@^3.0.0, path-key@^3.1.0:
+ version "3.1.1"
+ resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375"
+ integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
+
+path-parse@^1.0.6:
+ version "1.0.6"
+ resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c"
+ integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==
+
path-posix@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/path-posix/-/path-posix-1.0.0.tgz#06b26113f56beab042545a23bfa88003ccac260f"
integrity sha1-BrJhE/Vr6rBCVFojv6iAA8ysJg8=
+performance-now@^2.1.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
+ integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
+
+picomatch@^2.0.4, picomatch@^2.0.5:
+ version "2.2.2"
+ resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.2.2.tgz#21f333e9b6b8eaff02468f5146ea406d345f4dad"
+ integrity sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg==
+
+pirates@^4.0.1:
+ version "4.0.1"
+ resolved "https://registry.yarnpkg.com/pirates/-/pirates-4.0.1.tgz#643a92caf894566f91b2b986d2c66950a8e2fb87"
+ integrity sha512-WuNqLTbMI3tmfef2TKxlQmAiLHKtFhlsCZnPIpuv2Ow0RDVO8lfy1Opf4NUzlMXLjPl+Men7AuVdX6TA+s+uGA==
+ dependencies:
+ node-modules-regexp "^1.0.0"
+
+pkg-dir@^4.2.0:
+ version "4.2.0"
+ resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3"
+ integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==
+ dependencies:
+ find-up "^4.0.0"
+
popper.js@^1.14.4, popper.js@^1.16.1:
version "1.16.1"
resolved "https://registry.yarnpkg.com/popper.js/-/popper.js-1.16.1.tgz#2a223cb3dc7b6213d740e40372be40de43e65b1b"
integrity sha512-Wb4p1J4zyFTbM+u6WuO4XstYx4Ky9Cewe4DWrel7B0w6VVICvPwdOpotjzcf6eD8TsckVnIMNONQyPIUFOUbCQ==
+posix-character-classes@^0.1.0:
+ version "0.1.1"
+ resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab"
+ integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=
+
postcss@^7.0.5:
version "7.0.32"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.32.tgz#4310d6ee347053da3433db2be492883d62cec59d"
@@ -1480,16 +3847,44 @@
dependencies:
fast-diff "^1.1.2"
-prettier@1.16.4:
- version "1.16.4"
- resolved "https://registry.yarnpkg.com/prettier/-/prettier-1.16.4.tgz#73e37e73e018ad2db9c76742e2647e21790c9717"
- integrity sha512-ZzWuos7TI5CKUeQAtFd6Zhm2s6EpAD/ZLApIhsF9pRvRtM1RFo61dM/4MSRUA0SuLugA/zgrZD8m0BaY46Og7g==
+prettier@^1.19.0:
+ version "1.19.1"
+ resolved "https://registry.yarnpkg.com/prettier/-/prettier-1.19.1.tgz#f7d7f5ff8a9cd872a7be4ca142095956a60797cb"
+ integrity sha512-s7PoyDv/II1ObgQunCbB9PdLmUcBZcnWOcxDh7O0N/UwDEsHyqkW+Qh28jW+mVuCdx7gLB0BotYI1Y6uI9iyew==
+
+pretty-format@^25.2.1, pretty-format@^25.5.0:
+ version "25.5.0"
+ resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-25.5.0.tgz#7873c1d774f682c34b8d48b6743a2bf2ac55791a"
+ integrity sha512-kbo/kq2LQ/A/is0PQwsEHM7Ca6//bGPPvU6UnsdDRSKTWxT/ru/xb88v4BJf6a69H+uTytOEsTusT9ksd/1iWQ==
+ dependencies:
+ "@jest/types" "^25.5.0"
+ ansi-regex "^5.0.0"
+ ansi-styles "^4.0.0"
+ react-is "^16.12.0"
+
+pretty-format@^26.1.0:
+ version "26.1.0"
+ resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-26.1.0.tgz#272b9cd1f1a924ab5d443dc224899d7a65cb96ec"
+ integrity sha512-GmeO1PEYdM+non4BKCj+XsPJjFOJIPnsLewqhDVoqY1xo0yNmDas7tC2XwpMrRAHR3MaE2hPo37deX5OisJ2Wg==
+ dependencies:
+ "@jest/types" "^26.1.0"
+ ansi-regex "^5.0.0"
+ ansi-styles "^4.0.0"
+ react-is "^16.12.0"
progress@^2.0.0:
version "2.0.3"
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
+prompts@^2.0.1:
+ version "2.3.2"
+ resolved "https://registry.yarnpkg.com/prompts/-/prompts-2.3.2.tgz#480572d89ecf39566d2bd3fe2c9fccb7c4c0b068"
+ integrity sha512-Q06uKs2CkNYVID0VqwfAl9mipo99zkBv/n2JtWY89Yxa3ZabWSrs0e2KTudKVa3peLUvYXMefDqIleLPVUBZMA==
+ dependencies:
+ kleur "^3.0.3"
+ sisteransi "^1.0.4"
+
prop-types@^15.6.1, prop-types@^15.6.2:
version "15.7.2"
resolved "https://registry.yarnpkg.com/prop-types/-/prop-types-15.7.2.tgz#52c41e75b8c87e72b9d9360e0206b99dcbffa6c5"
@@ -1499,11 +3894,29 @@
object-assign "^4.1.1"
react-is "^16.8.1"
-punycode@^2.1.0:
+psl@^1.1.28:
+ version "1.8.0"
+ resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
+ integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==
+
+pump@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64"
+ integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==
+ dependencies:
+ end-of-stream "^1.1.0"
+ once "^1.3.1"
+
+punycode@^2.1.0, punycode@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
+qs@~6.5.2:
+ version "6.5.2"
+ resolved "https://registry.yarnpkg.com/qs/-/qs-6.5.2.tgz#cb3ae806e8740444584ef154ce8ee98d403f3e36"
+ integrity sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==
+
querystringify@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/querystringify/-/querystringify-2.1.1.tgz#60e5a5fd64a7f8bfa4d2ab2ed6fdf4c85bad154e"
@@ -1519,7 +3932,7 @@
prop-types "^15.6.2"
scheduler "^0.15.0"
-react-is@^16.8.1:
+react-is@^16.12.0, react-is@^16.8.1:
version "16.13.1"
resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4"
integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==
@@ -1561,6 +3974,25 @@
object-assign "^4.1.1"
prop-types "^15.6.2"
+read-pkg-up@^7.0.1:
+ version "7.0.1"
+ resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-7.0.1.tgz#f3a6135758459733ae2b95638056e1854e7ef507"
+ integrity sha512-zK0TB7Xd6JpCLmlLmufqykGE+/TlOePD6qKClNW7hHDKFh/J7/7gCWGR7joEQEW1bKq3a3yUZSObOoWLFQ4ohg==
+ dependencies:
+ find-up "^4.1.0"
+ read-pkg "^5.2.0"
+ type-fest "^0.8.1"
+
+read-pkg@^5.2.0:
+ version "5.2.0"
+ resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-5.2.0.tgz#7bf295438ca5a33e56cd30e053b34ee7250c93cc"
+ integrity sha512-Ug69mNOpfvKDAc2Q8DRpMjjzdtrnv9HcSMX+4VsZxD1aZ6ZzrIE7rlzXBtWTyhULSMKg076AW6WR5iZpD0JiOg==
+ dependencies:
+ "@types/normalize-package-data" "^2.4.0"
+ normalize-package-data "^2.5.0"
+ parse-json "^5.0.0"
+ type-fest "^0.6.0"
+
readable-stream@^3.1.1:
version "3.6.0"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
@@ -1575,6 +4007,14 @@
resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.5.tgz#d878a1d094b4306d10b9096484b33ebd55e26697"
integrity sha512-ZS5w8CpKFinUzOwW3c83oPeVXoNsrLsaCoLtJvAClH135j/R77RuymhiSErhm2lKcwSCIpmvIWSbDkIfAqKQlA==
+regex-not@^1.0.0, regex-not@^1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/regex-not/-/regex-not-1.0.2.tgz#1f4ece27e00b0b65e0247a6810e6a85d83a5752c"
+ integrity sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A==
+ dependencies:
+ extend-shallow "^3.0.2"
+ safe-regex "^1.1.0"
+
regexp.prototype.flags@^1.2.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.3.0.tgz#7aba89b3c13a64509dabcf3ca8d9fbb9bdf5cb75"
@@ -1593,6 +4033,73 @@
resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.1.0.tgz#206d0ad0a5648cffbdb8ae46438f3dc51c9f78e2"
integrity sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==
+remove-trailing-separator@^1.0.1:
+ version "1.1.0"
+ resolved "https://registry.yarnpkg.com/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz#c24bce2a283adad5bc3f58e0d48249b92379d8ef"
+ integrity sha1-wkvOKig62tW8P1jg1IJJuSN52O8=
+
+repeat-element@^1.1.2:
+ version "1.1.3"
+ resolved "https://registry.yarnpkg.com/repeat-element/-/repeat-element-1.1.3.tgz#782e0d825c0c5a3bb39731f84efee6b742e6b1ce"
+ integrity sha512-ahGq0ZnV5m5XtZLMb+vP76kcAM5nkLqk0lpqAuojSKGgQtn4eRi4ZZGm2olo2zKFH+sMsWaqOCW1dqAnOru72g==
+
+repeat-string@^1.6.1:
+ version "1.6.1"
+ resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637"
+ integrity sha1-jcrkcOHIirwtYA//Sndihtp15jc=
+
+request-promise-core@1.1.4:
+ version "1.1.4"
+ resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.4.tgz#3eedd4223208d419867b78ce815167d10593a22f"
+ integrity sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==
+ dependencies:
+ lodash "^4.17.19"
+
+request-promise-native@^1.0.8:
+ version "1.0.9"
+ resolved "https://registry.yarnpkg.com/request-promise-native/-/request-promise-native-1.0.9.tgz#e407120526a5efdc9a39b28a5679bf47b9d9dc28"
+ integrity sha512-wcW+sIUiWnKgNY0dqCpOZkUbF/I+YPi+f09JZIDa39Ec+q82CpSYniDp+ISgTTbKmnpJWASeJBPZmoxH84wt3g==
+ dependencies:
+ request-promise-core "1.1.4"
+ stealthy-require "^1.1.1"
+ tough-cookie "^2.3.3"
+
+request@^2.88.2:
+ version "2.88.2"
+ resolved "https://registry.yarnpkg.com/request/-/request-2.88.2.tgz#d73c918731cb5a87da047e207234146f664d12b3"
+ integrity sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==
+ dependencies:
+ aws-sign2 "~0.7.0"
+ aws4 "^1.8.0"
+ caseless "~0.12.0"
+ combined-stream "~1.0.6"
+ extend "~3.0.2"
+ forever-agent "~0.6.1"
+ form-data "~2.3.2"
+ har-validator "~5.1.3"
+ http-signature "~1.2.0"
+ is-typedarray "~1.0.0"
+ isstream "~0.1.2"
+ json-stringify-safe "~5.0.1"
+ mime-types "~2.1.19"
+ oauth-sign "~0.9.0"
+ performance-now "^2.1.0"
+ qs "~6.5.2"
+ safe-buffer "^5.1.2"
+ tough-cookie "~2.5.0"
+ tunnel-agent "^0.6.0"
+ uuid "^3.3.2"
+
+require-directory@^2.1.1:
+ version "2.1.1"
+ resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
+ integrity sha1-jGStX9MNqxyXbiNE/+f3kqam30I=
+
+require-main-filename@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-2.0.0.tgz#d0b329ecc7cc0f61649f62215be69af54aa8989b"
+ integrity sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==
+
requires-port@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/requires-port/-/requires-port-1.0.0.tgz#925d2601d39ac485e091cf0da5c6e694dc3dcaff"
@@ -1603,11 +4110,35 @@
resolved "https://registry.yarnpkg.com/resize-observer-polyfill/-/resize-observer-polyfill-1.5.1.tgz#0e9020dd3d21024458d4ebd27e23e40269810464"
integrity sha512-LwZrotdHOo12nQuZlHEmtuXdqGoOD0OhaxopaNFxWzInpEgaLWoVuAMbTzixuosCx2nEG58ngzW3vxdWoxIgdg==
+resolve-cwd@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-3.0.0.tgz#0f0075f1bb2544766cf73ba6a6e2adfebcb13f2d"
+ integrity sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==
+ dependencies:
+ resolve-from "^5.0.0"
+
resolve-from@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"
integrity sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==
+resolve-from@^5.0.0:
+ version "5.0.0"
+ resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69"
+ integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==
+
+resolve-url@^0.2.1:
+ version "0.2.1"
+ resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a"
+ integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=
+
+resolve@^1.10.0, resolve@^1.17.0, resolve@^1.3.2:
+ version "1.17.0"
+ resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.17.0.tgz#b25941b54968231cc2d1bb76a79cb7f2c0bf8444"
+ integrity sha512-ic+7JYiV8Vi2yzQGFWOkiZD5Z9z7O2Zhm9XMaTxdJExKasieFCr+yXZ/WmXsckHiKl12ar0y6XiXDx3m4RHn1w==
+ dependencies:
+ path-parse "^1.0.6"
+
restore-cursor@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-3.1.0.tgz#39f67c54b3a7a58cea5236d95cf0034239631f7e"
@@ -1616,6 +4147,11 @@
onetime "^5.1.0"
signal-exit "^3.0.2"
+ret@~0.1.10:
+ version "0.1.15"
+ resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc"
+ integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==
+
rimraf@2.6.3:
version "2.6.3"
resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.6.3.tgz#b2d104fe0d8fb27cf9e0a1cda8262dd3833c6cab"
@@ -1630,6 +4166,18 @@
dependencies:
glob "^7.1.3"
+rimraf@^3.0.0:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
+ integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
+ dependencies:
+ glob "^7.1.3"
+
+rsvp@^4.8.4:
+ version "4.8.5"
+ resolved "https://registry.yarnpkg.com/rsvp/-/rsvp-4.8.5.tgz#c8f155311d167f68f21e168df71ec5b083113734"
+ integrity sha512-nfMOlASu9OnRJo1mbEk2cz0D56a1MBNrJ7orjRZQG10XDyuvwksKbuXNp6qa+kbn839HwjwhBzhFmdsaEAfauA==
+
run-async@^2.4.0:
version "2.4.1"
resolved "https://registry.yarnpkg.com/run-async/-/run-async-2.4.1.tgz#8440eccf99ea3e70bd409d49aab88e10c189a455"
@@ -1642,16 +4190,43 @@
dependencies:
tslib "^1.9.0"
-safe-buffer@~5.2.0:
+safe-buffer@^5.0.1, safe-buffer@^5.1.2, safe-buffer@~5.2.0:
version "5.2.1"
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
-"safer-buffer@>= 2.1.2 < 3":
+safe-buffer@~5.1.1:
+ version "5.1.2"
+ resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
+ integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==
+
+safe-regex@^1.1.0:
+ version "1.1.0"
+ resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e"
+ integrity sha1-QKNmnzsHfR6UPURinhV91IAjvy4=
+ dependencies:
+ ret "~0.1.10"
+
+"safer-buffer@>= 2.1.2 < 3", safer-buffer@^2.0.2, safer-buffer@^2.1.0, safer-buffer@~2.1.0:
version "2.1.2"
resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
+sane@^4.0.3:
+ version "4.1.0"
+ resolved "https://registry.yarnpkg.com/sane/-/sane-4.1.0.tgz#ed881fd922733a6c461bc189dc2b6c006f3ffded"
+ integrity sha512-hhbzAgTIX8O7SHfp2c8/kREfEn4qO/9q8C9beyY6+tvZ87EpoZ3i1RIEvp27YBswnNbY9mWd6paKVmKbAgLfZA==
+ dependencies:
+ "@cnakazawa/watch" "^1.0.3"
+ anymatch "^2.0.0"
+ capture-exit "^2.0.0"
+ exec-sh "^0.3.2"
+ execa "^1.0.0"
+ fb-watchman "^2.0.0"
+ micromatch "^3.1.4"
+ minimist "^1.1.1"
+ walker "~1.0.5"
+
sanitize-html@~1.20.1:
version "1.20.1"
resolved "https://registry.yarnpkg.com/sanitize-html/-/sanitize-html-1.20.1.tgz#f6effdf55dd398807171215a62bfc21811bacf85"
@@ -1668,6 +4243,13 @@
srcset "^1.0.0"
xtend "^4.0.1"
+saxes@^5.0.0:
+ version "5.0.1"
+ resolved "https://registry.yarnpkg.com/saxes/-/saxes-5.0.1.tgz#eebab953fa3b7608dbe94e5dadb15c888fa6696d"
+ integrity sha512-5LBh1Tls8c9xgGjw3QrMwETmTMVk0oFgvrFSvWx62llR2hcEInrKNZ2GZCCuuy2lvWrdl5jhbpeqc5hRYKFOcw==
+ dependencies:
+ xmlchars "^2.2.0"
+
scheduler@^0.15.0:
version "0.15.0"
resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.15.0.tgz#6bfcf80ff850b280fed4aeecc6513bc0b4f17f8e"
@@ -1676,20 +4258,35 @@
loose-envify "^1.1.0"
object-assign "^4.1.1"
-semver@^5.5.0:
+"semver@2 || 3 || 4 || 5", semver@^5.4.1, semver@^5.5.0:
version "5.7.1"
resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7"
integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==
-semver@^6.1.2:
+semver@7.x, semver@^7.3.2:
+ version "7.3.2"
+ resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.2.tgz#604962b052b81ed0786aae84389ffba70ffd3938"
+ integrity sha512-OrOb32TeeambH6UrhtShmF7CRDqhL6/5XpPNp2DuRH6+9QLw/orhp72j87v8Qa1ScDkvrrBNpZcDejAirJmfXQ==
+
+semver@^6.0.0, semver@^6.1.2, semver@^6.3.0:
version "6.3.0"
resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
-semver@^7.3.2:
- version "7.3.2"
- resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.2.tgz#604962b052b81ed0786aae84389ffba70ffd3938"
- integrity sha512-OrOb32TeeambH6UrhtShmF7CRDqhL6/5XpPNp2DuRH6+9QLw/orhp72j87v8Qa1ScDkvrrBNpZcDejAirJmfXQ==
+set-blocking@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
+ integrity sha1-BF+XgtARrppoA93TgrJDkrPYkPc=
+
+set-value@^2.0.0, set-value@^2.0.1:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/set-value/-/set-value-2.0.1.tgz#a18d40530e6f07de4228c7defe4227af8cad005b"
+ integrity sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw==
+ dependencies:
+ extend-shallow "^2.0.1"
+ is-extendable "^0.1.1"
+ is-plain-object "^2.0.3"
+ split-string "^3.0.1"
shebang-command@^1.2.0:
version "1.2.0"
@@ -1698,16 +4295,43 @@
dependencies:
shebang-regex "^1.0.0"
+shebang-command@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea"
+ integrity sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==
+ dependencies:
+ shebang-regex "^3.0.0"
+
shebang-regex@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-1.0.0.tgz#da42f49740c0b42db2ca9728571cb190c98efea3"
integrity sha1-2kL0l0DAtC2yypcoVxyxkMmO/qM=
-signal-exit@^3.0.2:
+shebang-regex@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
+ integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
+
+shellwords@^0.1.1:
+ version "0.1.1"
+ resolved "https://registry.yarnpkg.com/shellwords/-/shellwords-0.1.1.tgz#d6b9181c1a48d397324c84871efbcfc73fc0654b"
+ integrity sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww==
+
+signal-exit@^3.0.0, signal-exit@^3.0.2:
version "3.0.3"
resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.3.tgz#a1410c2edd8f077b08b4e253c8eacfcaf057461c"
integrity sha512-VUJ49FC8U1OxwZLxIbTTrDvLnf/6TDgxZcK8wxR8zs13xpx7xbG60ndBlhNrFi2EMuFRoeDoJO7wthSLq42EjA==
+sisteransi@^1.0.4:
+ version "1.0.5"
+ resolved "https://registry.yarnpkg.com/sisteransi/-/sisteransi-1.0.5.tgz#134d681297756437cc05ca01370d3a7a571075ed"
+ integrity sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==
+
+slash@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634"
+ integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==
+
slice-ansi@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-2.1.0.tgz#cacd7693461a637a5788d92a7dd4fba068e81636"
@@ -1717,11 +4341,108 @@
astral-regex "^1.0.0"
is-fullwidth-code-point "^2.0.0"
-source-map@^0.6.1:
+snapdragon-node@^2.0.1:
+ version "2.1.1"
+ resolved "https://registry.yarnpkg.com/snapdragon-node/-/snapdragon-node-2.1.1.tgz#6c175f86ff14bdb0724563e8f3c1b021a286853b"
+ integrity sha512-O27l4xaMYt/RSQ5TR3vpWCAB5Kb/czIcqUFOM/C4fYcLnbZUc1PkjTAMjof2pBWaSTwOUd6qUHcFGVGj7aIwnw==
+ dependencies:
+ define-property "^1.0.0"
+ isobject "^3.0.0"
+ snapdragon-util "^3.0.1"
+
+snapdragon-util@^3.0.1:
+ version "3.0.1"
+ resolved "https://registry.yarnpkg.com/snapdragon-util/-/snapdragon-util-3.0.1.tgz#f956479486f2acd79700693f6f7b805e45ab56e2"
+ integrity sha512-mbKkMdQKsjX4BAL4bRYTj21edOf8cN7XHdYUJEe+Zn99hVEYcMvKPct1IqNe7+AZPirn8BCDOQBHQZknqmKlZQ==
+ dependencies:
+ kind-of "^3.2.0"
+
+snapdragon@^0.8.1:
+ version "0.8.2"
+ resolved "https://registry.yarnpkg.com/snapdragon/-/snapdragon-0.8.2.tgz#64922e7c565b0e14204ba1aa7d6964278d25182d"
+ integrity sha512-FtyOnWN/wCHTVXOMwvSv26d+ko5vWlIDD6zoUJ7LW8vh+ZBC8QdljveRP+crNrtBwioEUWy/4dMtbBjA4ioNlg==
+ dependencies:
+ base "^0.11.1"
+ debug "^2.2.0"
+ define-property "^0.2.5"
+ extend-shallow "^2.0.1"
+ map-cache "^0.2.2"
+ source-map "^0.5.6"
+ source-map-resolve "^0.5.0"
+ use "^3.1.0"
+
+source-map-resolve@^0.5.0:
+ version "0.5.3"
+ resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a"
+ integrity sha512-Htz+RnsXWk5+P2slx5Jh3Q66vhQj1Cllm0zvnaY98+NFx+Dv2CF/f5O/t8x+KaNdrdIAsruNzoh/KpialbqAnw==
+ dependencies:
+ atob "^2.1.2"
+ decode-uri-component "^0.2.0"
+ resolve-url "^0.2.1"
+ source-map-url "^0.4.0"
+ urix "^0.1.0"
+
+source-map-support@^0.5.6:
+ version "0.5.19"
+ resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61"
+ integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==
+ dependencies:
+ buffer-from "^1.0.0"
+ source-map "^0.6.0"
+
+source-map-url@^0.4.0:
+ version "0.4.0"
+ resolved "https://registry.yarnpkg.com/source-map-url/-/source-map-url-0.4.0.tgz#3e935d7ddd73631b97659956d55128e87b5084a3"
+ integrity sha1-PpNdfd1zYxuXZZlW1VEo6HtQhKM=
+
+source-map@^0.5.0, source-map@^0.5.6:
+ version "0.5.7"
+ resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"
+ integrity sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=
+
+source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.1:
version "0.6.1"
resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
+source-map@^0.7.3:
+ version "0.7.3"
+ resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.3.tgz#5302f8169031735226544092e64981f751750383"
+ integrity sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==
+
+spdx-correct@^3.0.0:
+ version "3.1.1"
+ resolved "https://registry.yarnpkg.com/spdx-correct/-/spdx-correct-3.1.1.tgz#dece81ac9c1e6713e5f7d1b6f17d468fa53d89a9"
+ integrity sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==
+ dependencies:
+ spdx-expression-parse "^3.0.0"
+ spdx-license-ids "^3.0.0"
+
+spdx-exceptions@^2.1.0:
+ version "2.3.0"
+ resolved "https://registry.yarnpkg.com/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz#3f28ce1a77a00372683eade4a433183527a2163d"
+ integrity sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==
+
+spdx-expression-parse@^3.0.0:
+ version "3.0.1"
+ resolved "https://registry.yarnpkg.com/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz#cf70f50482eefdc98e3ce0a6833e4a53ceeba679"
+ integrity sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==
+ dependencies:
+ spdx-exceptions "^2.1.0"
+ spdx-license-ids "^3.0.0"
+
+spdx-license-ids@^3.0.0:
+ version "3.0.5"
+ resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.5.tgz#3694b5804567a458d3c8045842a6358632f62654"
+ integrity sha512-J+FWzZoynJEXGphVIS+XEh3kFSjZX/1i9gFBaWQcB+/tmpe2qUsSBABpcxqxnAxFdiUFEgAX1bjYGQvIZmoz9Q==
+
+split-string@^3.0.1, split-string@^3.0.2:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2"
+ integrity sha512-NzNVhJDYpwceVVii8/Hu6DKfD2G+NrQHlS/V/qgv763EYudVwEcMQNxd2lh+0VrUByXN/oJkl5grOhYWvQUYiw==
+ dependencies:
+ extend-shallow "^3.0.0"
+
sprintf-js@~1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
@@ -1735,6 +4456,49 @@
array-uniq "^1.0.2"
number-is-nan "^1.0.0"
+sshpk@^1.7.0:
+ version "1.16.1"
+ resolved "https://registry.yarnpkg.com/sshpk/-/sshpk-1.16.1.tgz#fb661c0bef29b39db40769ee39fa70093d6f6877"
+ integrity sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==
+ dependencies:
+ asn1 "~0.2.3"
+ assert-plus "^1.0.0"
+ bcrypt-pbkdf "^1.0.0"
+ dashdash "^1.12.0"
+ ecc-jsbn "~0.1.1"
+ getpass "^0.1.1"
+ jsbn "~0.1.0"
+ safer-buffer "^2.0.2"
+ tweetnacl "~0.14.0"
+
+stack-utils@^2.0.2:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/stack-utils/-/stack-utils-2.0.2.tgz#5cf48b4557becb4638d0bc4f21d23f5d19586593"
+ integrity sha512-0H7QK2ECz3fyZMzQ8rH0j2ykpfbnd20BFtfg/SqVC2+sCTtcw0aDTGB7dk+de4U4uUeuz6nOtJcrkFFLG1B0Rg==
+ dependencies:
+ escape-string-regexp "^2.0.0"
+
+static-extend@^0.1.1:
+ version "0.1.2"
+ resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6"
+ integrity sha1-YICcOcv/VTNyJv1eC1IPNB8ftcY=
+ dependencies:
+ define-property "^0.2.5"
+ object-copy "^0.1.0"
+
+stealthy-require@^1.1.1:
+ version "1.1.1"
+ resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"
+ integrity sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=
+
+string-length@^4.0.1:
+ version "4.0.1"
+ resolved "https://registry.yarnpkg.com/string-length/-/string-length-4.0.1.tgz#4a973bf31ef77c4edbceadd6af2611996985f8a1"
+ integrity sha512-PKyXUd0LK0ePjSOnWn34V2uD6acUWev9uy0Ft05k0E8xRW+SKcA0F7eMr7h5xlzfn+4O3N+55rduYyet3Jk+jw==
+ dependencies:
+ char-regex "^1.0.2"
+ strip-ansi "^6.0.0"
+
string-width@^3.0.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-3.1.0.tgz#22767be21b62af1081574306f69ac51b62203961"
@@ -1744,7 +4508,7 @@
is-fullwidth-code-point "^2.0.0"
strip-ansi "^5.1.0"
-string-width@^4.1.0:
+string-width@^4.1.0, string-width@^4.2.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.0.tgz#952182c46cc7b2c313d1596e623992bd163b72b5"
integrity sha512-zUz5JD+tgqtuDjMhwIg5uFVV3dtqZ9yQJlZVfq4I01/K5Paj5UHj7VyrQOJvzawSVlKpObApbfD0Ed6yJc+1eg==
@@ -1790,6 +4554,21 @@
dependencies:
ansi-regex "^5.0.0"
+strip-bom@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-4.0.0.tgz#9c3505c1db45bcedca3d9cf7a16f5c5aa3901878"
+ integrity sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==
+
+strip-eof@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/strip-eof/-/strip-eof-1.0.0.tgz#bb43ff5598a6eb05d89b59fcd129c983313606bf"
+ integrity sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=
+
+strip-final-newline@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
+ integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==
+
strip-json-comments@^3.0.1:
version "3.1.1"
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
@@ -1809,13 +4588,26 @@
dependencies:
has-flag "^3.0.0"
-supports-color@^7.1.0:
+supports-color@^7.0.0, supports-color@^7.1.0:
version "7.1.0"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.1.0.tgz#68e32591df73e25ad1c4b49108a2ec507962bfd1"
integrity sha512-oRSIpR8pxT1Wr2FquTNnGet79b3BWljqOuoW/h4oBhxJ/HUbX5nX6JSruTkvXDCFMwDPvsaTTbvMLKZWSy0R5g==
dependencies:
has-flag "^4.0.0"
+supports-hyperlinks@^2.0.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/supports-hyperlinks/-/supports-hyperlinks-2.1.0.tgz#f663df252af5f37c5d49bbd7eeefa9e0b9e59e47"
+ integrity sha512-zoE5/e+dnEijk6ASB6/qrK+oYdm2do1hjoLWrqUC/8WEIW1gbxFcKuBof7sW8ArN6e+AYvsE8HBGiVRWL/F5CA==
+ dependencies:
+ has-flag "^4.0.0"
+ supports-color "^7.0.0"
+
+symbol-tree@^3.2.4:
+ version "3.2.4"
+ resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.2.4.tgz#430637d248ba77e078883951fb9aa0eed7c63fa2"
+ integrity sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==
+
table@^5.2.3:
version "5.4.6"
resolved "https://registry.yarnpkg.com/table/-/table-5.4.6.tgz#1292d19500ce3f86053b05f0e8e7e4a3bb21079e"
@@ -1826,11 +4618,33 @@
slice-ansi "^2.1.0"
string-width "^3.0.0"
+terminal-link@^2.0.0:
+ version "2.1.1"
+ resolved "https://registry.yarnpkg.com/terminal-link/-/terminal-link-2.1.1.tgz#14a64a27ab3c0df933ea546fba55f2d078edc994"
+ integrity sha512-un0FmiRUQNr5PJqy9kP7c40F5BOfpGlYTrxonDChEZB7pzZxRNp/bt+ymiy9/npwXya9KH99nJ/GXFIiUkYGFQ==
+ dependencies:
+ ansi-escapes "^4.2.1"
+ supports-hyperlinks "^2.0.0"
+
+test-exclude@^6.0.0:
+ version "6.0.0"
+ resolved "https://registry.yarnpkg.com/test-exclude/-/test-exclude-6.0.0.tgz#04a8698661d805ea6fa293b6cb9e63ac044ef15e"
+ integrity sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==
+ dependencies:
+ "@istanbuljs/schema" "^0.1.2"
+ glob "^7.1.4"
+ minimatch "^3.0.4"
+
text-table@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4"
integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=
+throat@^5.0.0:
+ version "5.0.0"
+ resolved "https://registry.yarnpkg.com/throat/-/throat-5.0.0.tgz#c5199235803aad18754a667d659b5e72ce16764b"
+ integrity sha512-fcwX4mndzpLQKBS1DVYhGAcYaYt7vsHNIvQV+WXMvnow5cgjPphq5CaayLaGsjRdSCKZFNGt7/GYAuXaNOiYCA==
+
through@^2.3.6:
version "2.3.8"
resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5"
@@ -1843,6 +4657,88 @@
dependencies:
os-tmpdir "~1.0.2"
+tmpl@1.0.x:
+ version "1.0.4"
+ resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.4.tgz#23640dd7b42d00433911140820e5cf440e521dd1"
+ integrity sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=
+
+to-fast-properties@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e"
+ integrity sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=
+
+to-object-path@^0.3.0:
+ version "0.3.0"
+ resolved "https://registry.yarnpkg.com/to-object-path/-/to-object-path-0.3.0.tgz#297588b7b0e7e0ac08e04e672f85c1f4999e17af"
+ integrity sha1-KXWIt7Dn4KwI4E5nL4XB9JmeF68=
+ dependencies:
+ kind-of "^3.0.2"
+
+to-regex-range@^2.1.0:
+ version "2.1.1"
+ resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-2.1.1.tgz#7c80c17b9dfebe599e27367e0d4dd5590141db38"
+ integrity sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg=
+ dependencies:
+ is-number "^3.0.0"
+ repeat-string "^1.6.1"
+
+to-regex-range@^5.0.1:
+ version "5.0.1"
+ resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
+ integrity sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==
+ dependencies:
+ is-number "^7.0.0"
+
+to-regex@^3.0.1, to-regex@^3.0.2:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/to-regex/-/to-regex-3.0.2.tgz#13cfdd9b336552f30b51f33a8ae1b42a7a7599ce"
+ integrity sha512-FWtleNAtZ/Ki2qtqej2CXTOayOH9bHDQF+Q48VpWyDXjbYxA4Yz8iDB31zXOBUlOHHKidDbqGVrTUvQMPmBGBw==
+ dependencies:
+ define-property "^2.0.2"
+ extend-shallow "^3.0.2"
+ regex-not "^1.0.2"
+ safe-regex "^1.1.0"
+
+tough-cookie@^2.3.3, tough-cookie@~2.5.0:
+ version "2.5.0"
+ resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
+ integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
+ dependencies:
+ psl "^1.1.28"
+ punycode "^2.1.1"
+
+tough-cookie@^3.0.1:
+ version "3.0.1"
+ resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-3.0.1.tgz#9df4f57e739c26930a018184887f4adb7dca73b2"
+ integrity sha512-yQyJ0u4pZsv9D4clxO69OEjLWYw+jbgspjTue4lTQZLfV0c5l1VmK2y1JK8E9ahdpltPOaAThPcp5nKPUgSnsg==
+ dependencies:
+ ip-regex "^2.1.0"
+ psl "^1.1.28"
+ punycode "^2.1.1"
+
+tr46@^2.0.2:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.0.2.tgz#03273586def1595ae08fedb38d7733cee91d2479"
+ integrity sha512-3n1qG+/5kg+jrbTzwAykB5yRYtQCTqOGKq5U5PE3b0a1/mzo6snDhjGS0zJVJunO0NrT3Dg1MLy5TjWP/UJppg==
+ dependencies:
+ punycode "^2.1.1"
+
+ts-jest@^26.1.3:
+ version "26.1.3"
+ resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-26.1.3.tgz#aac928a05fdf13e3e6dfbc8caec3847442667894"
+ integrity sha512-beUTSvuqR9SmKQEylewqJdnXWMVGJRFqSz2M8wKJe7GBMmLZ5zw6XXKSJckbHNMxn+zdB3guN2eOucSw2gBMnw==
+ dependencies:
+ bs-logger "0.x"
+ buffer-from "1.x"
+ fast-json-stable-stringify "2.x"
+ jest-util "26.x"
+ json5 "2.x"
+ lodash.memoize "4.x"
+ make-error "1.x"
+ mkdirp "1.x"
+ semver "7.x"
+ yargs-parser "18.x"
+
tslib@^1.8.1, tslib@^1.9.0:
version "1.13.0"
resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.13.0.tgz#c881e13cc7015894ed914862d276436fa9a47043"
@@ -1860,6 +4756,18 @@
dependencies:
tslib "^1.8.1"
+tunnel-agent@^0.6.0:
+ version "0.6.0"
+ resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
+ integrity sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=
+ dependencies:
+ safe-buffer "^5.0.1"
+
+tweetnacl@^0.14.3, tweetnacl@~0.14.0:
+ version "0.14.5"
+ resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64"
+ integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=
+
type-check@~0.3.2:
version "0.3.2"
resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.3.2.tgz#5884cab512cf1d355e3fb784f30804b2b520db72"
@@ -1867,11 +4775,21 @@
dependencies:
prelude-ls "~1.1.2"
+type-detect@4.0.8:
+ version "4.0.8"
+ resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c"
+ integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==
+
type-fest@^0.11.0:
version "0.11.0"
resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.11.0.tgz#97abf0872310fed88a5c466b25681576145e33f1"
integrity sha512-OdjXJxnCN1AvyLSzeKIgXTXxV+99ZuXl3Hpo9XpJAv9MBcHrrJOQ5kV7ypXOuQie+AmWG25hLbiKdwYTifzcfQ==
+type-fest@^0.6.0:
+ version "0.6.0"
+ resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.6.0.tgz#8d2a2370d3df886eb5c90ada1c5bf6188acf838b"
+ integrity sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg==
+
type-fest@^0.8.1:
version "0.8.1"
resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d"
@@ -1882,6 +4800,13 @@
resolved "https://registry.yarnpkg.com/typed-styles/-/typed-styles-0.0.7.tgz#93392a008794c4595119ff62dde6809dbc40a3d9"
integrity sha512-pzP0PWoZUhsECYjABgCGQlRGL1n7tOHsgwYv3oIiEpJwGhFTuty/YNeduxQYzXXa3Ge5BdT6sHYIQYpl4uJ+5Q==
+typedarray-to-buffer@^3.1.5:
+ version "3.1.5"
+ resolved "https://registry.yarnpkg.com/typedarray-to-buffer/-/typedarray-to-buffer-3.1.5.tgz#a97ee7a9ff42691b9f783ff1bc5112fe3fca9080"
+ integrity sha512-zdu8XMNEDepKKR+XYOXAVPtWui0ly0NtohUscw+UmaHiAWT8hrV1rr//H6V+0DvJ3OQ19S979M0laLfX8rm82Q==
+ dependencies:
+ is-typedarray "^1.0.0"
+
typescript@~3.7.0:
version "3.7.5"
resolved "https://registry.yarnpkg.com/typescript/-/typescript-3.7.5.tgz#0692e21f65fd4108b9330238aac11dd2e177a1ae"
@@ -1895,6 +4820,24 @@
csstype "2.6.9"
free-style "3.1.0"
+union-value@^1.0.0:
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/union-value/-/union-value-1.0.1.tgz#0b6fe7b835aecda61c6ea4d4f02c14221e109847"
+ integrity sha512-tJfXmxMeWYnczCVs7XAEvIV7ieppALdyepWMkHkwciRpZraG/xwT+s2JN8+pr1+8jCRf80FFzvr+MpQeeoF4Xg==
+ dependencies:
+ arr-union "^3.1.0"
+ get-value "^2.0.6"
+ is-extendable "^0.1.1"
+ set-value "^2.0.1"
+
+unset-value@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/unset-value/-/unset-value-1.0.0.tgz#8376873f7d2335179ffb1e6fc3a8ed0dfc8ab559"
+ integrity sha1-g3aHP30jNRef+x5vw6jtDfyKtVk=
+ dependencies:
+ has-value "^0.3.1"
+ isobject "^3.0.0"
+
uri-js@^4.2.2:
version "4.2.2"
resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.2.2.tgz#94c540e1ff772956e2299507c010aea6c8838eb0"
@@ -1902,6 +4845,11 @@
dependencies:
punycode "^2.1.0"
+urix@^0.1.0:
+ version "0.1.0"
+ resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72"
+ integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=
+
url-parse@~1.4.7:
version "1.4.7"
resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.4.7.tgz#a8a83535e8c00a316e403a5db4ac1b9b853ae278"
@@ -1910,16 +4858,78 @@
querystringify "^2.1.1"
requires-port "^1.0.0"
+use@^3.1.0:
+ version "3.1.1"
+ resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
+ integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==
+
util-deprecate@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=
+uuid@^3.3.2:
+ version "3.4.0"
+ resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee"
+ integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==
+
+uuid@^8.2.0:
+ version "8.2.0"
+ resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.2.0.tgz#cb10dd6b118e2dada7d0cd9730ba7417c93d920e"
+ integrity sha512-CYpGiFTUrmI6OBMkAdjSDM0k5h8SkkiTP4WAjQgDgNB1S3Ou9VBEvr6q0Kv2H1mMk7IWfxYGpMH5sd5AvcIV2Q==
+
v8-compile-cache@^2.0.3:
version "2.1.1"
resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.1.1.tgz#54bc3cdd43317bca91e35dcaf305b1a7237de745"
integrity sha512-8OQ9CL+VWyt3JStj7HX7/ciTL2V3Rl1Wf5OL+SNTm0yK1KvtReVulksyeRnCANHHuUxHlQig+JJDlUhBt1NQDQ==
+v8-to-istanbul@^4.1.3:
+ version "4.1.4"
+ resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-4.1.4.tgz#b97936f21c0e2d9996d4985e5c5156e9d4e49cd6"
+ integrity sha512-Rw6vJHj1mbdK8edjR7+zuJrpDtKIgNdAvTSAcpYfgMIw+u2dPDntD3dgN4XQFLU2/fvFQdzj+EeSGfd/jnY5fQ==
+ dependencies:
+ "@types/istanbul-lib-coverage" "^2.0.1"
+ convert-source-map "^1.6.0"
+ source-map "^0.7.3"
+
+validate-npm-package-license@^3.0.1:
+ version "3.0.4"
+ resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a"
+ integrity sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==
+ dependencies:
+ spdx-correct "^3.0.0"
+ spdx-expression-parse "^3.0.0"
+
+verror@1.10.0:
+ version "1.10.0"
+ resolved "https://registry.yarnpkg.com/verror/-/verror-1.10.0.tgz#3a105ca17053af55d6e270c1f8288682e18da400"
+ integrity sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=
+ dependencies:
+ assert-plus "^1.0.0"
+ core-util-is "1.0.2"
+ extsprintf "^1.2.0"
+
+w3c-hr-time@^1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz#0a89cdf5cc15822df9c360543676963e0cc308cd"
+ integrity sha512-z8P5DvDNjKDoFIHK7q8r8lackT6l+jo/Ye3HOle7l9nICP9lf1Ci25fy9vHd0JOWewkIFzXIEig3TdKT7JQ5fQ==
+ dependencies:
+ browser-process-hrtime "^1.0.0"
+
+w3c-xmlserializer@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/w3c-xmlserializer/-/w3c-xmlserializer-2.0.0.tgz#3e7104a05b75146cc60f564380b7f683acf1020a"
+ integrity sha512-4tzD0mF8iSiMiNs30BiLO3EpfGLZUT2MSX/G+o7ZywDzliWQ3OPtTZ0PTC3B3ca1UAf4cJMHB+2Bf56EriJuRA==
+ dependencies:
+ xml-name-validator "^3.0.0"
+
+walker@^1.0.7, walker@~1.0.5:
+ version "1.0.7"
+ resolved "https://registry.yarnpkg.com/walker/-/walker-1.0.7.tgz#2f7f9b8fd10d677262b18a884e28d19618e028fb"
+ integrity sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=
+ dependencies:
+ makeerror "1.0.x"
+
warning@^4.0.2, warning@^4.0.3:
version "4.0.3"
resolved "https://registry.yarnpkg.com/warning/-/warning-4.0.3.tgz#16e9e077eb8a86d6af7d64aa1e05fd85b4678ca3"
@@ -1927,6 +4937,42 @@
dependencies:
loose-envify "^1.0.0"
+webidl-conversions@^5.0.0:
+ version "5.0.0"
+ resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-5.0.0.tgz#ae59c8a00b121543a2acc65c0434f57b0fc11aff"
+ integrity sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==
+
+webidl-conversions@^6.1.0:
+ version "6.1.0"
+ resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-6.1.0.tgz#9111b4d7ea80acd40f5270d666621afa78b69514"
+ integrity sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==
+
+whatwg-encoding@^1.0.5:
+ version "1.0.5"
+ resolved "https://registry.yarnpkg.com/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz#5abacf777c32166a51d085d6b4f3e7d27113ddb0"
+ integrity sha512-b5lim54JOPN9HtzvK9HFXvBma/rnfFeqsic0hSpjtDbVxR3dJKLc+KB4V6GgiGOvl7CY/KNh8rxSo9DKQrnUEw==
+ dependencies:
+ iconv-lite "0.4.24"
+
+whatwg-mimetype@^2.3.0:
+ version "2.3.0"
+ resolved "https://registry.yarnpkg.com/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz#3d4b1e0312d2079879f826aff18dbeeca5960fbf"
+ integrity sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==
+
+whatwg-url@^8.0.0:
+ version "8.1.0"
+ resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.1.0.tgz#c628acdcf45b82274ce7281ee31dd3c839791771"
+ integrity sha512-vEIkwNi9Hqt4TV9RdnaBPNt+E2Sgmo3gePebCRgZ1R7g6d23+53zCTnuB0amKI4AXq6VM8jj2DUAa0S1vjJxkw==
+ dependencies:
+ lodash.sortby "^4.7.0"
+ tr46 "^2.0.2"
+ webidl-conversions "^5.0.0"
+
+which-module@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/which-module/-/which-module-2.0.0.tgz#d9ef07dce77b9902b8a3a8fa4b31c3e3f7e6e87a"
+ integrity sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=
+
which@^1.2.9:
version "1.3.1"
resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a"
@@ -1934,16 +4980,42 @@
dependencies:
isexe "^2.0.0"
+which@^2.0.1, which@^2.0.2:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
+ integrity sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==
+ dependencies:
+ isexe "^2.0.0"
+
word-wrap@~1.2.3:
version "1.2.3"
resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.3.tgz#610636f6b1f703891bd34771ccb17fb93b47079c"
integrity sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==
+wrap-ansi@^6.2.0:
+ version "6.2.0"
+ resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53"
+ integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==
+ dependencies:
+ ansi-styles "^4.0.0"
+ string-width "^4.1.0"
+ strip-ansi "^6.0.0"
+
wrappy@1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
+write-file-atomic@^3.0.0:
+ version "3.0.3"
+ resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-3.0.3.tgz#56bd5c5a5c70481cd19c571bd39ab965a5de56e8"
+ integrity sha512-AvHcyZ5JnSfq3ioSyjrBkH9yW4m7Ayk8/9My/DD9onKeu/94fwrMocemO2QAJFAlnnDN+ZDS+ZjAR5ua1/PV/Q==
+ dependencies:
+ imurmurhash "^0.1.4"
+ is-typedarray "^1.0.0"
+ signal-exit "^3.0.2"
+ typedarray-to-buffer "^3.1.5"
+
write@1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/write/-/write-1.0.3.tgz#0800e14523b923a387e415123c865616aae0f5c3"
@@ -1951,12 +5023,52 @@
dependencies:
mkdirp "^0.5.1"
-ws@^7.2.0:
+ws@^7.2.0, ws@^7.2.3:
version "7.3.1"
resolved "https://registry.yarnpkg.com/ws/-/ws-7.3.1.tgz#d0547bf67f7ce4f12a72dfe31262c68d7dc551c8"
integrity sha512-D3RuNkynyHmEJIpD2qrgVkc9DQ23OrN/moAwZX4L8DfvszsJxpjQuUq3LMx6HoYji9fbIOBY18XWBsAux1ZZUA==
+xml-name-validator@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
+ integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==
+
+xmlchars@^2.2.0:
+ version "2.2.0"
+ resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb"
+ integrity sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==
+
xtend@^4.0.1:
version "4.0.2"
resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==
+
+y18n@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.0.tgz#95ef94f85ecc81d007c264e190a120f0a3c8566b"
+ integrity sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==
+
+yargs-parser@18.x, yargs-parser@^18.1.2:
+ version "18.1.3"
+ resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
+ integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
+ dependencies:
+ camelcase "^5.0.0"
+ decamelize "^1.2.0"
+
+yargs@^15.3.1:
+ version "15.4.1"
+ resolved "https://registry.yarnpkg.com/yargs/-/yargs-15.4.1.tgz#0d87a16de01aee9d8bec2bfbf74f67851730f4f8"
+ integrity sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==
+ dependencies:
+ cliui "^6.0.0"
+ decamelize "^1.2.0"
+ find-up "^4.1.0"
+ get-caller-file "^2.0.1"
+ require-directory "^2.1.1"
+ require-main-filename "^2.0.0"
+ set-blocking "^2.0.0"
+ string-width "^4.2.0"
+ which-module "^2.0.0"
+ y18n "^4.0.0"
+ yargs-parser "^18.1.2"
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_environment.py b/sdks/python/apache_beam/runners/interactive/interactive_environment.py
index 1d28517..4363d17 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_environment.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_environment.py
@@ -251,18 +251,32 @@
return self._inspector
def cleanup(self, pipeline=None):
- """Cleans up cached states for the given pipeline. Cleans up
- for all pipelines if no specific pipeline is given."""
+ """Cleans up cached states for the given pipeline. Noop if the given
+ pipeline is absent from the environment. Cleans up for all pipelines
+ if no pipeline is specified."""
if pipeline:
+ from apache_beam.runners.interactive import background_caching_job as bcj
+ bcj.attempt_to_cancel_background_caching_job(pipeline)
+ bcj.attempt_to_stop_test_stream_service(pipeline)
cache_manager = self.get_cache_manager(pipeline)
if cache_manager:
cache_manager.cleanup()
else:
+ for _, job in self._background_caching_jobs.items():
+ if job:
+ job.cancel()
+ for _, controller in self._test_stream_service_controllers.items():
+ if controller:
+ controller.stop()
for _, cache_manager in self._cache_managers.items():
- cache_manager.cleanup()
+ if cache_manager:
+ cache_manager.cleanup()
+ self.evict_background_caching_job(pipeline)
+ self.evict_test_stream_service_controller(pipeline)
self.evict_computed_pcollections(pipeline)
self.evict_cached_source_signature(pipeline)
+ self.evict_pipeline_result(pipeline)
def watch(self, watchable):
"""Watches a watchable.
@@ -343,9 +357,13 @@
'apache_beam.runners.runner.PipelineResult or its subclass')
self._main_pipeline_results[str(id(pipeline))] = result
- def evict_pipeline_result(self, pipeline):
- """Evicts the tracking of given pipeline run. Noop if absent."""
- return self._main_pipeline_results.pop(str(id(pipeline)), None)
+ def evict_pipeline_result(self, pipeline=None):
+ """Evicts the last run result of the given pipeline. Noop if the pipeline
+ is absent from the environment. If no pipeline is specified, evicts for all
+ pipelines."""
+ if pipeline:
+ return self._main_pipeline_results.pop(str(id(pipeline)), None)
+ self._main_pipeline_results.clear()
def pipeline_result(self, pipeline):
"""Gets the pipeline run result. None if absent."""
@@ -364,16 +382,24 @@
"""Gets the background caching job started from the given pipeline."""
return self._background_caching_jobs.get(str(id(pipeline)), None)
+ def evict_background_caching_job(self, pipeline=None):
+ """Evicts the background caching job started from the given pipeline. Noop
+ if the given pipeline is absent from the environment. If no pipeline is
+ specified, evicts for all pipelines."""
+ if pipeline:
+ return self._background_caching_jobs.pop(str(id(pipeline)), None)
+ self._background_caching_jobs.clear()
+
def set_test_stream_service_controller(self, pipeline, controller):
"""Sets the test stream service controller that has started a gRPC server
- serving the test stream for any job started from the given user-defined
+ serving the test stream for any job started from the given user defined
pipeline.
"""
self._test_stream_service_controllers[str(id(pipeline))] = controller
def get_test_stream_service_controller(self, pipeline):
"""Gets the test stream service controller that has started a gRPC server
- serving the test stream for any job started from the given user-defined
+ serving the test stream for any job started from the given user defined
pipeline.
"""
return self._test_stream_service_controllers.get(str(id(pipeline)), None)
@@ -381,9 +407,12 @@
def evict_test_stream_service_controller(self, pipeline):
"""Evicts and pops the test stream service controller that has started a
gRPC server serving the test stream for any job started from the given
- user-defined pipeline.
+ user defined pipeline. Noop if the given pipeline is absent from the
+ environment. If no pipeline is specified, evicts for all pipelines.
"""
- return self._test_stream_service_controllers.pop(str(id(pipeline)), None)
+ if pipeline:
+ return self._test_stream_service_controllers.pop(str(id(pipeline)), None)
+ self._test_stream_service_controllers.clear()
def is_terminated(self, pipeline):
"""Queries if the most recent job (by executing the given pipeline) state
@@ -400,13 +429,15 @@
return self._cached_source_signature.get(str(id(pipeline)), set())
def evict_cached_source_signature(self, pipeline=None):
+ """Evicts the signature generated for each recorded source of the given
+ pipeline. Noop if the given pipeline is absent from the environment. If no
+ pipeline is specified, evicts for all pipelines."""
if pipeline:
- self._cached_source_signature.pop(str(id(pipeline)), None)
- else:
- self._cached_source_signature.clear()
+ return self._cached_source_signature.pop(str(id(pipeline)), None)
+ self._cached_source_signature.clear()
def track_user_pipelines(self):
- """Record references to all user-defined pipeline instances watched in
+ """Record references to all user defined pipeline instances watched in
current environment.
Current static global singleton interactive environment holds references to
@@ -416,11 +447,17 @@
then handle them differently.
This is invoked every time a PTransform is to be applied if the current
- code execution is under ipython due to the possibility that any user-defined
+ code execution is under ipython due to the possibility that any user defined
pipeline can be re-evaluated through notebook cell re-execution at any time.
Each time this is invoked, it will check if there is a cache manager
already created for each user defined pipeline. If not, create one for it.
+
+ If a pipeline is no longer watched due to re-execution while its
+ PCollections are still in watched scope, the pipeline becomes anonymous but
+ still accessible indirectly through references to its PCollections. This
+ function also clears up internal states for those anonymous pipelines once
+ all their PCollections are anonymous.
"""
self._tracked_user_pipelines = set()
for watching in self.watching():
@@ -428,6 +465,17 @@
if isinstance(val, beam.pipeline.Pipeline):
self._tracked_user_pipelines.add(val)
_ = self.get_cache_manager(val, create_if_absent=True)
+ all_tracked_pipeline_ids = set(self._background_caching_jobs.keys()).union(
+ set(self._test_stream_service_controllers.keys()),
+ set(self._cache_managers.keys()),
+ {str(id(pcoll.pipeline))
+ for pcoll in self._computed_pcolls},
+ set(self._cached_source_signature.keys()),
+ set(self._main_pipeline_results.keys()))
+ inspectable_pipelines = self._inspector.inspectable_pipelines
+ for pipeline in all_tracked_pipeline_ids:
+ if pipeline not in inspectable_pipelines:
+ self.cleanup(pipeline)
@property
def tracked_user_pipelines(self):
diff --git a/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py b/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py
index 6f44dac..6650c63 100644
--- a/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py
+++ b/sdks/python/apache_beam/runners/interactive/interactive_environment_test.py
@@ -236,14 +236,33 @@
@patch(
'apache_beam.runners.interactive.interactive_environment'
'.InteractiveEnvironment.cleanup')
- def test_cleanup_invoked_when_cache_manager_is_evicted(self, mocked_cleanup):
+ def test_track_user_pipeline_cleanup_non_inspectable_pipeline(
+ self, mocked_cleanup):
ie._interactive_beam_env = None
ie.new_env()
- dummy_pipeline = 'dummy'
+ dummy_pipeline_1 = beam.Pipeline()
+ dummy_pipeline_2 = beam.Pipeline()
+ dummy_pipeline_3 = beam.Pipeline()
+ dummy_pipeline_4 = beam.Pipeline()
+ dummy_pcoll = dummy_pipeline_4 | beam.Create([1])
+ dummy_pipeline_5 = beam.Pipeline()
+ dummy_non_inspectable_pipeline = 'dummy'
+ ie.current_env().watch(locals())
+ from apache_beam.runners.interactive.background_caching_job import BackgroundCachingJob
+ ie.current_env().set_background_caching_job(
+ dummy_pipeline_1,
+ BackgroundCachingJob(
+ runner.PipelineResult(runner.PipelineState.DONE), limiters=[]))
+ ie.current_env().set_test_stream_service_controller(dummy_pipeline_2, None)
ie.current_env().set_cache_manager(
- cache.FileBasedCacheManager(), dummy_pipeline)
+ cache.FileBasedCacheManager(), dummy_pipeline_3)
+ ie.current_env().mark_pcollection_computed([dummy_pcoll])
+ ie.current_env().set_cached_source_signature(
+ dummy_non_inspectable_pipeline, None)
+ ie.current_env().set_pipeline_result(
+ dummy_pipeline_5, runner.PipelineResult(runner.PipelineState.RUNNING))
mocked_cleanup.assert_not_called()
- ie.current_env().evict_cache_manager(dummy_pipeline)
+ ie.current_env().track_user_pipelines()
mocked_cleanup.assert_called_once()
diff --git a/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py b/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py
index 3bce182..a4a9f02 100644
--- a/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py
+++ b/sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py
@@ -41,6 +41,7 @@
def __init__(self):
self._inspectables = {}
self._anonymous = {}
+ self._inspectable_pipelines = set()
@property
def inspectables(self):
@@ -49,6 +50,20 @@
self._inspectables = inspect()
return self._inspectables
+ @property
+ def inspectable_pipelines(self):
+ """Returns a dictionary of all inspectable pipelines. The keys are
+ stringified id of pipeline instances.
+
+ This includes user defined pipeline assigned to variables and anonymous
+ pipelines with inspectable PCollections.
+ If a user defined pipeline is not within the returned dict, it can be
+ considered out of scope, and all resources and memory states related to it
+ should be released.
+ """
+ _ = self.list_inspectables()
+ return self._inspectable_pipelines
+
@as_json
def list_inspectables(self):
"""Lists inspectables in JSON format.
@@ -89,6 +104,8 @@
pipeline_identifier = obfuscate(meta(pipelines[pipeline], pipeline))
listing[pipeline_identifier]['pcolls'][identifier] = inspectable[
'metadata']
+ self._inspectable_pipelines = dict(
+ (str(id(pipeline)), pipeline) for pipeline in pipelines)
return listing
def get_val(self, identifier):
diff --git a/sdks/python/apache_beam/runners/interactive/options/capture_control.py b/sdks/python/apache_beam/runners/interactive/options/capture_control.py
index 12e901f..ab877b5 100644
--- a/sdks/python/apache_beam/runners/interactive/options/capture_control.py
+++ b/sdks/python/apache_beam/runners/interactive/options/capture_control.py
@@ -29,7 +29,6 @@
from datetime import timedelta
from apache_beam.io.gcp.pubsub import ReadFromPubSub
-from apache_beam.runners.interactive import background_caching_job as bcj
from apache_beam.runners.interactive import interactive_environment as ie
from apache_beam.runners.interactive.options import capture_limiters
@@ -71,15 +70,7 @@
runs, Interactive Beam will capture fresh data."""
if ie.current_env().options.enable_capture_replay:
_LOGGER.info(
- 'You have requested Interactive Beam to evict all captured '
+ 'You have requested Interactive Beam to evict all recorded'
'data that could be deterministically replayed among multiple '
'pipeline runs.')
- ie.current_env().track_user_pipelines()
- if pipeline:
- bcj.attempt_to_cancel_background_caching_job(pipeline)
- bcj.attempt_to_stop_test_stream_service(pipeline)
- else:
- for user_pipeline in ie.current_env().tracked_user_pipelines:
- bcj.attempt_to_cancel_background_caching_job(user_pipeline)
- bcj.attempt_to_stop_test_stream_service(user_pipeline)
ie.current_env().cleanup(pipeline)
diff --git a/sdks/python/apache_beam/runners/interactive/options/capture_limiters.py b/sdks/python/apache_beam/runners/interactive/options/capture_limiters.py
index a25aba0..2c84f80 100644
--- a/sdks/python/apache_beam/runners/interactive/options/capture_limiters.py
+++ b/sdks/python/apache_beam/runners/interactive/options/capture_limiters.py
@@ -24,6 +24,8 @@
import threading
+from apache_beam.portability.api.beam_interactive_api_pb2 import TestStreamFileHeader
+from apache_beam.portability.api.beam_interactive_api_pb2 import TestStreamFileRecord
from apache_beam.runners.interactive import interactive_environment as ie
@@ -36,6 +38,20 @@
raise NotImplementedError
+class ElementLimiter(Limiter):
+ """A `Limiter` that limits reading from cache based on some property of an
+ element.
+ """
+ def update(self, e):
+ # type: (Any) -> None
+
+ """Update the internal state based on some property of an element.
+
+ This is executed on every element that is read from cache.
+ """
+ raise NotImplementedError
+
+
class SizeLimiter(Limiter):
"""Limits the cache size to a specified byte limit."""
def __init__(
@@ -71,3 +87,56 @@
def is_triggered(self):
return self._triggered
+
+
+class CountLimiter(ElementLimiter):
+ """Limits by counting the number of elements seen."""
+ def __init__(self, max_count):
+ self._max_count = max_count
+ self._count = 0
+
+ def update(self, e):
+ # A TestStreamFileRecord can contain many elements at once. If e is a file
+ # record, then count the number of elements in the bundle.
+ if isinstance(e, TestStreamFileRecord):
+ if not e.recorded_event.element_event:
+ return
+ self._count += len(e.recorded_event.element_event.elements)
+
+ # Otherwise, count everything else but the header of the file since it is
+ # not an element.
+ elif not isinstance(e, TestStreamFileHeader):
+ self._count += 1
+
+ def is_triggered(self):
+ return self._count >= self._max_count
+
+
+class ProcessingTimeLimiter(ElementLimiter):
+ """Limits by how long the ProcessingTime passed in the element stream.
+
+ This measures the duration from the first element in the stream. Each
+ subsequent element has a delta "advance_duration" that moves the internal
+ clock forward. This triggers when the duration from the internal clock and
+ the start exceeds the given duration.
+ """
+ def __init__(self, max_duration_secs):
+ """Initialize the ProcessingTimeLimiter."""
+ self._max_duration_us = max_duration_secs * 1e6
+ self._start_us = 0
+ self._cur_time_us = 0
+
+ def update(self, e):
+ # Only look at TestStreamFileRecords which hold the processing time.
+ if not isinstance(e, TestStreamFileRecord):
+ return
+
+ if not e.recorded_event.processing_time_event:
+ return
+
+ if self._start_us == 0:
+ self._start_us = e.recorded_event.processing_time_event.advance_duration
+ self._cur_time_us += e.recorded_event.processing_time_event.advance_duration
+
+ def is_triggered(self):
+ return self._cur_time_us - self._start_us >= self._max_duration_us
diff --git a/sdks/python/apache_beam/runners/interactive/options/capture_limiters_test.py b/sdks/python/apache_beam/runners/interactive/options/capture_limiters_test.py
new file mode 100644
index 0000000..850c56e2c
--- /dev/null
+++ b/sdks/python/apache_beam/runners/interactive/options/capture_limiters_test.py
@@ -0,0 +1,53 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
+
+import unittest
+
+from apache_beam.portability.api.beam_interactive_api_pb2 import TestStreamFileRecord
+from apache_beam.runners.interactive.options.capture_limiters import CountLimiter
+from apache_beam.runners.interactive.options.capture_limiters import ProcessingTimeLimiter
+
+
+class CaptureLimitersTest(unittest.TestCase):
+ def test_count_limiter(self):
+ limiter = CountLimiter(5)
+
+ for e in range(4):
+ limiter.update(e)
+
+ self.assertFalse(limiter.is_triggered())
+ limiter.update(5)
+ self.assertTrue(limiter.is_triggered())
+
+ def test_processing_time_limiter(self):
+ limiter = ProcessingTimeLimiter(max_duration_secs=2)
+
+ r = TestStreamFileRecord()
+ r.recorded_event.processing_time_event.advance_duration = int(1 * 1e6)
+ limiter.update(r)
+ self.assertFalse(limiter.is_triggered())
+
+ r = TestStreamFileRecord()
+ r.recorded_event.processing_time_event.advance_duration = int(2 * 1e6)
+ limiter.update(r)
+ self.assertTrue(limiter.is_triggered())
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/sdks/python/apache_beam/runners/portability/flink_runner.py b/sdks/python/apache_beam/runners/portability/flink_runner.py
index fddf25d..f886a7d 100644
--- a/sdks/python/apache_beam/runners/portability/flink_runner.py
+++ b/sdks/python/apache_beam/runners/portability/flink_runner.py
@@ -110,7 +110,7 @@
return self._jar
else:
return self.path_to_beam_jar(
- 'runners:flink:%s:job-server:shadowJar' % self._flink_version)
+ ':runners:flink:%s:job-server:shadowJar' % self._flink_version)
def java_arguments(
self, job_port, artifact_port, expansion_port, artifacts_dir):
diff --git a/sdks/python/apache_beam/runners/portability/flink_runner_test.py b/sdks/python/apache_beam/runners/portability/flink_runner_test.py
index 6dd7634..eb83611 100644
--- a/sdks/python/apache_beam/runners/portability/flink_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/flink_runner_test.py
@@ -91,7 +91,7 @@
flink_job_server_jar = (
known_args.flink_job_server_jar or
job_server.JavaJarJobServer.path_to_beam_jar(
- 'runners:flink:%s:job-server:shadowJar' %
+ ':runners:flink:%s:job-server:shadowJar' %
FlinkRunnerOptions.PUBLISHED_FLINK_VERSIONS[-1]))
streaming = known_args.streaming
environment_type = known_args.environment_type.lower()
diff --git a/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py b/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py
index 4fa92cd..b10a75f 100644
--- a/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py
+++ b/sdks/python/apache_beam/runners/portability/flink_uber_jar_job_server.py
@@ -75,7 +75,7 @@
url = self._executable_jar
else:
url = job_server.JavaJarJobServer.path_to_beam_jar(
- 'runners:flink:%s:job-server:shadowJar' % self.flink_version())
+ ':runners:flink:%s:job-server:shadowJar' % self.flink_version())
return job_server.JavaJarJobServer.local_jar(url)
def flink_version(self):
diff --git a/sdks/python/apache_beam/runners/portability/spark_runner.py b/sdks/python/apache_beam/runners/portability/spark_runner.py
index 73a3ec4..4619ea9 100644
--- a/sdks/python/apache_beam/runners/portability/spark_runner.py
+++ b/sdks/python/apache_beam/runners/portability/spark_runner.py
@@ -87,7 +87,7 @@
self._jar)
return self._jar
else:
- return self.path_to_beam_jar('runners:spark:job-server:shadowJar')
+ return self.path_to_beam_jar(':runners:spark:job-server:shadowJar')
def java_arguments(
self, job_port, artifact_port, expansion_port, artifacts_dir):
diff --git a/sdks/python/apache_beam/runners/portability/spark_runner_test.py b/sdks/python/apache_beam/runners/portability/spark_runner_test.py
index d0c2c4e..19967c1 100644
--- a/sdks/python/apache_beam/runners/portability/spark_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/spark_runner_test.py
@@ -61,7 +61,7 @@
spark_job_server_jar = (
known_args.spark_job_server_jar or
job_server.JavaJarJobServer.path_to_beam_jar(
- 'runners:spark:job-server:shadowJar'))
+ ':runners:spark:job-server:shadowJar'))
environment_type = known_args.environment_type.lower()
environment_config = (
known_args.environment_config if known_args.environment_config else None)
diff --git a/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py
index 252f70a..6b9e6fd 100644
--- a/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py
+++ b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py
@@ -77,7 +77,7 @@
url = self._executable_jar
else:
url = job_server.JavaJarJobServer.path_to_beam_jar(
- 'runners:spark:job-server:shadowJar')
+ ':runners:spark:job-server:shadowJar')
return job_server.JavaJarJobServer.local_jar(url)
def create_beam_job(self, job_id, job_name, pipeline, options):
diff --git a/sdks/python/apache_beam/testing/test_stream_service.py b/sdks/python/apache_beam/testing/test_stream_service.py
index d69465f..618e208 100644
--- a/sdks/python/apache_beam/testing/test_stream_service.py
+++ b/sdks/python/apache_beam/testing/test_stream_service.py
@@ -34,6 +34,8 @@
"""
def __init__(self, reader, endpoint=None, exception_handler=None):
self._server = grpc.server(ThreadPoolExecutor(max_workers=10))
+ self._server_started = False
+ self._server_stopped = False
if endpoint:
self.endpoint = endpoint
@@ -50,9 +52,18 @@
self._exception_handler = lambda _: False
def start(self):
+ # A server can only be started if never started and never stopped before.
+ if self._server_started or self._server_stopped:
+ return
+ self._server_started = True
self._server.start()
def stop(self):
+ # A server can only be stopped if already started and never stopped before.
+ if not self._server_started or self._server_stopped:
+ return
+ self._server_started = False
+ self._server_stopped = True
self._server.stop(0)
# This was introduced in grpcio 1.24 and might be gone in the future. Keep
# this check in case the runtime is on a older, current or future grpcio.
diff --git a/sdks/python/apache_beam/testing/test_stream_service_test.py b/sdks/python/apache_beam/testing/test_stream_service_test.py
index 01b16a1..7a5b403 100644
--- a/sdks/python/apache_beam/testing/test_stream_service_test.py
+++ b/sdks/python/apache_beam/testing/test_stream_service_test.py
@@ -19,6 +19,7 @@
from __future__ import absolute_import
+import sys
import unittest
import grpc
@@ -30,6 +31,13 @@
from apache_beam.portability.api.beam_runner_api_pb2 import TestStreamPayload
from apache_beam.testing.test_stream_service import TestStreamServiceController
+# TODO(BEAM-8288): clean up the work-around of nose tests using Python2 without
+# unittest.mock module.
+try:
+ from unittest.mock import patch
+except ImportError:
+ from mock import patch # type: ignore[misc]
+
# Nose automatically detects tests if they match a regex. Here, it mistakens
# these protos as tests. For more info see the Nose docs at:
# https://nose.readthedocs.io/en/latest/writing_tests.html
@@ -116,5 +124,78 @@
self.assertEqual(events_b, expected_events)
+@unittest.skipIf(
+ sys.version_info < (3, 6), 'The tests require at least Python 3.6 to work.')
+class TestStreamServiceStartStopTest(unittest.TestCase):
+
+ # Weak internal use needs to be explicitly imported.
+ from grpc import _server
+
+ def setUp(self):
+ self.controller = TestStreamServiceController(
+ EventsReader(expected_key=[('full', EXPECTED_KEY)]))
+ self.assertFalse(self.controller._server_started)
+ self.assertFalse(self.controller._server_stopped)
+
+ def tearDown(self):
+ self.controller.stop()
+
+ def test_start_when_never_started(self):
+ with patch.object(self._server._Server,
+ 'start',
+ wraps=self.controller._server.start) as mock_start:
+ self.controller.start()
+ mock_start.assert_called_once()
+ self.assertTrue(self.controller._server_started)
+ self.assertFalse(self.controller._server_stopped)
+
+ def test_start_noop_when_already_started(self):
+ with patch.object(self._server._Server,
+ 'start',
+ wraps=self.controller._server.start) as mock_start:
+ self.controller.start()
+ mock_start.assert_called_once()
+ self.controller.start()
+ mock_start.assert_called_once()
+
+ def test_start_noop_when_already_stopped(self):
+ with patch.object(self._server._Server,
+ 'start',
+ wraps=self.controller._server.start) as mock_start:
+ self.controller.start()
+ self.controller.stop()
+ mock_start.assert_called_once()
+ self.controller.start()
+ mock_start.assert_called_once()
+
+ def test_stop_noop_when_not_started(self):
+ with patch.object(self._server._Server,
+ 'stop',
+ wraps=self.controller._server.stop) as mock_stop:
+ self.controller.stop()
+ mock_stop.assert_not_called()
+
+ def test_stop_when_already_started(self):
+ with patch.object(self._server._Server,
+ 'stop',
+ wraps=self.controller._server.stop) as mock_stop:
+ self.controller.start()
+ mock_stop.assert_not_called()
+ self.controller.stop()
+ mock_stop.assert_called_once()
+ self.assertFalse(self.controller._server_started)
+ self.assertTrue(self.controller._server_stopped)
+
+ def test_stop_noop_when_already_stopped(self):
+ with patch.object(self._server._Server,
+ 'stop',
+ wraps=self.controller._server.stop) as mock_stop:
+ self.controller.start()
+ self.controller.stop()
+ mock_stop.assert_called_once()
+ self.controller.stop()
+ mock_stop.assert_called_once()
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/sdks/python/apache_beam/transforms/ptransform.py b/sdks/python/apache_beam/transforms/ptransform.py
index 2ab121e..4a0e80f 100644
--- a/sdks/python/apache_beam/transforms/ptransform.py
+++ b/sdks/python/apache_beam/transforms/ptransform.py
@@ -68,13 +68,16 @@
from apache_beam.internal import pickler
from apache_beam.internal import util
from apache_beam.portability import python_urns
+from apache_beam.pvalue import DoOutputsTuple
from apache_beam.transforms.display import DisplayDataItem
from apache_beam.transforms.display import HasDisplayData
from apache_beam.typehints import native_type_compatibility
from apache_beam.typehints import typehints
+from apache_beam.typehints.decorators import IOTypeHints
from apache_beam.typehints.decorators import TypeCheckError
from apache_beam.typehints.decorators import WithTypeHints
from apache_beam.typehints.decorators import get_signature
+from apache_beam.typehints.decorators import get_type_hints
from apache_beam.typehints.decorators import getcallargs_forhints
from apache_beam.typehints.trivial_inference import instance_to_type
from apache_beam.typehints.typehints import validate_composite_type_param
@@ -350,6 +353,14 @@
# type: () -> str
return self.__class__.__name__
+ def default_type_hints(self):
+ fn_type_hints = IOTypeHints.from_callable(self.expand)
+ if fn_type_hints is not None:
+ fn_type_hints = fn_type_hints.strip_pcoll()
+
+ # Prefer class decorator type hints for backwards compatibility.
+ return get_type_hints(self.__class__).with_defaults(fn_type_hints)
+
def with_input_types(self, input_type_hint):
"""Annotates the input type of a :class:`PTransform` with a type-hint.
@@ -419,6 +430,8 @@
root_hint = (
arg_hints[0] if len(arg_hints) == 1 else arg_hints or kwarg_hints)
for context, pvalue_, hint in _ZipPValues().visit(pvalueish, root_hint):
+ if isinstance(pvalue_, DoOutputsTuple):
+ continue
if pvalue_.element_type is None:
# TODO(robertwb): It's a bug that we ever get here. (typecheck)
continue
diff --git a/sdks/python/apache_beam/transforms/sql.py b/sdks/python/apache_beam/transforms/sql.py
index 2092276..244cd17 100644
--- a/sdks/python/apache_beam/transforms/sql.py
+++ b/sdks/python/apache_beam/transforms/sql.py
@@ -39,16 +39,29 @@
class SqlTransform(ExternalTransform):
"""A transform that can translate a SQL query into PTransforms.
- Input PCollections must have a schema. Currently, this means the PCollection
- *must* have a NamedTuple output type, and that type must be registered to use
- RowCoder. For example::
+ Input PCollections must have a schema. Currently, there are two ways to define
+ a schema for a PCollection:
- Purchase = typing.NamedTuple('Purchase',
- [('item_name', unicode), ('price', float)])
- coders.registry.register_coder(Purchase, coders.RowCoder)
+ 1) Register a `typing.NamedTuple` type to use RowCoder, and specify it as the
+ output type. For example::
- Similarly, the output of SqlTransform is a PCollection with a generated
- NamedTuple type, and columns can be accessed as fields. For example::
+ Purchase = typing.NamedTuple('Purchase',
+ [('item_name', unicode), ('price', float)])
+ coders.registry.register_coder(Purchase, coders.RowCoder)
+ with Pipeline() as p:
+ purchases = (p | beam.io...
+ | beam.Map(..).with_output_types(Purchase))
+
+ 2) Produce `beam.Row` instances. Note this option will fail if Beam is unable
+ to infer data types for any of the fields. For example::
+
+ with Pipeline() as p:
+ purchases = (p | beam.io...
+ | beam.Map(lambda x: beam.Row(item_name=unicode(..),
+ price=float(..))))
+
+ Similarly, the output of SqlTransform is a PCollection with a schema.
+ The columns produced by the query can be accessed as attributes. For example::
purchases | SqlTransform(\"\"\"
SELECT item_name, COUNT(*) AS `count`
@@ -57,8 +70,8 @@
row.item_name))
Additional examples can be found in
- `apache_beam.examples.wordcount_xlang_sql`, and
- `apache_beam.transforms.sql_test`.
+ `apache_beam.examples.wordcount_xlang_sql`, `apache_beam.examples.sql_taxi`,
+ and `apache_beam.transforms.sql_test`.
For more details about Beam SQL in general see the `Java transform
<https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/extensions/sql/SqlTransform.html>`_,
diff --git a/sdks/python/apache_beam/typehints/decorators.py b/sdks/python/apache_beam/typehints/decorators.py
index dad0a31..4cd7681 100644
--- a/sdks/python/apache_beam/typehints/decorators.py
+++ b/sdks/python/apache_beam/typehints/decorators.py
@@ -105,6 +105,7 @@
from typing import Optional
from typing import Tuple
from typing import TypeVar
+from typing import Union
from apache_beam.typehints import native_type_compatibility
from apache_beam.typehints import typehints
@@ -378,6 +379,75 @@
self.output_types and len(self.output_types[0]) == 1 and
not self.output_types[1])
+ def strip_pcoll(self):
+ from apache_beam.pipeline import Pipeline
+ from apache_beam.pvalue import PBegin
+ from apache_beam.pvalue import PDone
+
+ return self.strip_pcoll_helper(self.input_types,
+ self._has_input_types,
+ 'input_types',
+ [Pipeline, PBegin],
+ 'This input type hint will be ignored '
+ 'and not used for type-checking purposes. '
+ 'Typically, input type hints for a '
+ 'PTransform are single (or nested) types '
+ 'wrapped by a PCollection, or PBegin.',
+ 'strip_pcoll_input()').\
+ strip_pcoll_helper(self.output_types,
+ self.has_simple_output_type,
+ 'output_types',
+ [PDone, None],
+ 'This output type hint will be ignored '
+ 'and not used for type-checking purposes. '
+ 'Typically, output type hints for a '
+ 'PTransform are single (or nested) types '
+ 'wrapped by a PCollection, PDone, or None.',
+ 'strip_pcoll_output()')
+
+ def strip_pcoll_helper(
+ self,
+ my_type, # type: any
+ has_my_type, # type: Callable[[], bool]
+ my_key, # type: str
+ special_containers, # type: List[Union[PBegin, PDone, PCollection]]
+ error_str, # type: str
+ source_str # type: str
+ ):
+ # type: (...) -> IOTypeHints
+ from apache_beam.pvalue import PCollection
+
+ if not has_my_type() or not my_type or len(my_type[0]) != 1:
+ return self
+
+ my_type = my_type[0][0]
+
+ if isinstance(my_type, typehints.AnyTypeConstraint):
+ return self
+
+ special_containers += [PCollection]
+ kwarg_dict = {}
+
+ if (my_type not in special_containers and
+ getattr(my_type, '__origin__', None) != PCollection):
+ logging.warning(error_str + ' Got: %s instead.' % my_type)
+ kwarg_dict[my_key] = None
+ return self._replace(
+ origin=self._make_origin([self], tb=False, msg=[source_str]),
+ **kwarg_dict)
+
+ if (getattr(my_type, '__args__', -1) in [-1, None] or
+ len(my_type.__args__) == 0):
+ # e.g. PCollection (or PBegin/PDone)
+ kwarg_dict[my_key] = ((typehints.Any, ), {})
+ else:
+ # e.g. PCollection[type]
+ kwarg_dict[my_key] = ((convert_to_beam_type(my_type.__args__[0]), ), {})
+
+ return self._replace(
+ origin=self._make_origin([self], tb=False, msg=[source_str]),
+ **kwarg_dict)
+
def strip_iterable(self):
# type: () -> IOTypeHints
diff --git a/sdks/python/apache_beam/typehints/schemas.py b/sdks/python/apache_beam/typehints/schemas.py
index 0cc513f..cb4cf01 100644
--- a/sdks/python/apache_beam/typehints/schemas.py
+++ b/sdks/python/apache_beam/typehints/schemas.py
@@ -178,6 +178,11 @@
return schema_pb2.FieldType(
array_type=schema_pb2.ArrayType(element_type=element_type))
+ elif _safe_issubclass(type_, Mapping):
+ key_type, value_type = map(typing_to_runner_api, _get_args(type_))
+ return schema_pb2.FieldType(
+ map_type=schema_pb2.MapType(key_type=key_type, value_type=value_type))
+
raise ValueError("Unsupported type: %s" % type_)
diff --git a/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py b/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py
index 2016871..e12930d 100644
--- a/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py
+++ b/sdks/python/apache_beam/typehints/typed_pipeline_test_py3.py
@@ -22,6 +22,7 @@
from __future__ import absolute_import
+import typing
import unittest
import apache_beam as beam
@@ -257,6 +258,135 @@
result = [1, 2, 3] | beam.FlatMap(fn) | beam.Map(fn2)
self.assertCountEqual([4, 6], result)
+ def test_typed_ptransform_with_no_error(self):
+ class StrToInt(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[str]) -> beam.pvalue.PCollection[int]:
+ return pcoll | beam.Map(lambda x: int(x))
+
+ class IntToStr(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[int]) -> beam.pvalue.PCollection[str]:
+ return pcoll | beam.Map(lambda x: str(x))
+
+ _ = ['1', '2', '3'] | StrToInt() | IntToStr()
+
+ def test_typed_ptransform_with_bad_typehints(self):
+ class StrToInt(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[str]) -> beam.pvalue.PCollection[int]:
+ return pcoll | beam.Map(lambda x: int(x))
+
+ class IntToStr(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[str]) -> beam.pvalue.PCollection[str]:
+ return pcoll | beam.Map(lambda x: str(x))
+
+ with self.assertRaisesRegex(typehints.TypeCheckError,
+ "Input type hint violation at IntToStr: "
+ "expected <class 'str'>, got <class 'int'>"):
+ _ = ['1', '2', '3'] | StrToInt() | IntToStr()
+
+ def test_typed_ptransform_with_bad_input(self):
+ class StrToInt(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[str]) -> beam.pvalue.PCollection[int]:
+ return pcoll | beam.Map(lambda x: int(x))
+
+ class IntToStr(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[int]) -> beam.pvalue.PCollection[str]:
+ return pcoll | beam.Map(lambda x: str(x))
+
+ with self.assertRaisesRegex(typehints.TypeCheckError,
+ "Input type hint violation at StrToInt: "
+ "expected <class 'str'>, got <class 'int'>"):
+ # Feed integers to a PTransform that expects strings
+ _ = [1, 2, 3] | StrToInt() | IntToStr()
+
+ def test_typed_ptransform_with_partial_typehints(self):
+ class StrToInt(beam.PTransform):
+ def expand(self, pcoll) -> beam.pvalue.PCollection[int]:
+ return pcoll | beam.Map(lambda x: int(x))
+
+ class IntToStr(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[int]) -> beam.pvalue.PCollection[str]:
+ return pcoll | beam.Map(lambda x: str(x))
+
+ # Feed integers to a PTransform that should expect strings
+ # but has no typehints so it expects any
+ _ = [1, 2, 3] | StrToInt() | IntToStr()
+
+ def test_typed_ptransform_with_bare_wrappers(self):
+ class StrToInt(beam.PTransform):
+ def expand(
+ self, pcoll: beam.pvalue.PCollection) -> beam.pvalue.PCollection:
+ return pcoll | beam.Map(lambda x: int(x))
+
+ class IntToStr(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[int]) -> beam.pvalue.PCollection[str]:
+ return pcoll | beam.Map(lambda x: str(x))
+
+ _ = [1, 2, 3] | StrToInt() | IntToStr()
+
+ def test_typed_ptransform_with_no_typehints(self):
+ class StrToInt(beam.PTransform):
+ def expand(self, pcoll):
+ return pcoll | beam.Map(lambda x: int(x))
+
+ class IntToStr(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[int]) -> beam.pvalue.PCollection[str]:
+ return pcoll | beam.Map(lambda x: str(x))
+
+ # Feed integers to a PTransform that should expect strings
+ # but has no typehints so it expects any
+ _ = [1, 2, 3] | StrToInt() | IntToStr()
+
+ def test_typed_ptransform_with_generic_annotations(self):
+ T = typing.TypeVar('T')
+
+ class IntToInt(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[T]) -> beam.pvalue.PCollection[T]:
+ return pcoll | beam.Map(lambda x: x)
+
+ class IntToStr(beam.PTransform):
+ def expand(
+ self,
+ pcoll: beam.pvalue.PCollection[T]) -> beam.pvalue.PCollection[str]:
+ return pcoll | beam.Map(lambda x: str(x))
+
+ _ = [1, 2, 3] | IntToInt() | IntToStr()
+
+ def test_typed_ptransform_with_do_outputs_tuple_compiles(self):
+ class MyDoFn(beam.DoFn):
+ def process(self, element: int, *args, **kwargs):
+ if element % 2:
+ yield beam.pvalue.TaggedOutput('odd', 1)
+ else:
+ yield beam.pvalue.TaggedOutput('even', 1)
+
+ class MyPTransform(beam.PTransform):
+ def expand(self, pcoll: beam.pvalue.PCollection[int]):
+ return pcoll | beam.ParDo(MyDoFn()).with_outputs('odd', 'even')
+
+ # This test fails if you remove the following line from ptransform.py
+ # if isinstance(pvalue_, DoOutputsTuple): continue
+ _ = [1, 2, 3] | MyPTransform()
+
class AnnotationsTest(unittest.TestCase):
def test_pardo_dofn(self):
diff --git a/sdks/python/apache_beam/typehints/typehints_test_py3.py b/sdks/python/apache_beam/typehints/typehints_test_py3.py
index a7c23f0..5a36330 100644
--- a/sdks/python/apache_beam/typehints/typehints_test_py3.py
+++ b/sdks/python/apache_beam/typehints/typehints_test_py3.py
@@ -23,11 +23,19 @@
from __future__ import absolute_import
from __future__ import print_function
+import typing
import unittest
+import apache_beam.typehints.typehints as typehints
+from apache_beam import Map
+from apache_beam import PTransform
+from apache_beam.pvalue import PBegin
+from apache_beam.pvalue import PCollection
+from apache_beam.pvalue import PDone
from apache_beam.transforms.core import DoFn
from apache_beam.typehints import KV
from apache_beam.typehints import Iterable
+from apache_beam.typehints.typehints import Any
class TestParDoAnnotations(unittest.TestCase):
@@ -46,11 +54,221 @@
def process(self, element: int) -> Iterable[str]:
pass
- print(MyDoFn().get_type_hints())
th = MyDoFn().get_type_hints()
self.assertEqual(th.input_types, ((int, ), {}))
self.assertEqual(th.output_types, ((str, ), {}))
+class TestPTransformAnnotations(unittest.TestCase):
+ def test_pep484_annotations(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: PCollection[int]) -> PCollection[str]:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((int, ), {}))
+ self.assertEqual(th.output_types, ((str, ), {}))
+
+ def test_annotations_without_input_pcollection_wrapper(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: int) -> PCollection[str]:
+ return pcoll | Map(lambda num: str(num))
+
+ error_str = (
+ r'This input type hint will be ignored and not used for '
+ r'type-checking purposes. Typically, input type hints for a '
+ r'PTransform are single (or nested) types wrapped by a '
+ r'PCollection, or PBegin. Got: {} instead.'.format(int))
+
+ with self.assertLogs(level='WARN') as log:
+ MyPTransform().get_type_hints()
+ self.assertIn(error_str, log.output[0])
+
+ def test_annotations_without_output_pcollection_wrapper(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: PCollection[int]) -> str:
+ return pcoll | Map(lambda num: str(num))
+
+ error_str = (
+ r'This output type hint will be ignored and not used for '
+ r'type-checking purposes. Typically, output type hints for a '
+ r'PTransform are single (or nested) types wrapped by a '
+ r'PCollection, PDone, or None. Got: {} instead.'.format(str))
+
+ with self.assertLogs(level='WARN') as log:
+ th = MyPTransform().get_type_hints()
+ self.assertIn(error_str, log.output[0])
+ self.assertEqual(th.input_types, ((int, ), {}))
+ self.assertEqual(th.output_types, None)
+
+ def test_annotations_without_input_internal_type(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: PCollection) -> PCollection[str]:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((Any, ), {}))
+ self.assertEqual(th.output_types, ((str, ), {}))
+
+ def test_annotations_without_output_internal_type(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: PCollection[int]) -> PCollection:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((int, ), {}))
+ self.assertEqual(th.output_types, ((Any, ), {}))
+
+ def test_annotations_without_any_internal_type(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: PCollection) -> PCollection:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((Any, ), {}))
+ self.assertEqual(th.output_types, ((Any, ), {}))
+
+ def test_annotations_without_input_typehint(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll) -> PCollection[str]:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((Any, ), {}))
+ self.assertEqual(th.output_types, ((str, ), {}))
+
+ def test_annotations_without_output_typehint(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: PCollection[int]):
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((int, ), {}))
+ self.assertEqual(th.output_types, ((Any, ), {}))
+
+ def test_annotations_without_any_typehints(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll):
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, None)
+ self.assertEqual(th.output_types, None)
+
+ def test_annotations_with_pbegin(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: PBegin):
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((Any, ), {}))
+ self.assertEqual(th.output_types, ((Any, ), {}))
+
+ def test_annotations_with_pdone(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll) -> PDone:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((Any, ), {}))
+ self.assertEqual(th.output_types, ((Any, ), {}))
+
+ def test_annotations_with_none_input(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: None) -> PCollection[str]:
+ return pcoll | Map(lambda num: str(num))
+
+ error_str = (
+ r'This input type hint will be ignored and not used for '
+ r'type-checking purposes. Typically, input type hints for a '
+ r'PTransform are single (or nested) types wrapped by a '
+ r'PCollection, or PBegin. Got: {} instead.'.format(None))
+
+ with self.assertLogs(level='WARN') as log:
+ th = MyPTransform().get_type_hints()
+ self.assertIn(error_str, log.output[0])
+ self.assertEqual(th.input_types, None)
+ self.assertEqual(th.output_types, ((str, ), {}))
+
+ def test_annotations_with_none_output(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll) -> None:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((Any, ), {}))
+ self.assertEqual(th.output_types, ((Any, ), {}))
+
+ def test_annotations_with_arbitrary_output(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll) -> str:
+ return pcoll | Map(lambda num: str(num))
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((Any, ), {}))
+ self.assertEqual(th.output_types, None)
+
+ def test_annotations_with_arbitrary_input_and_output(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: int) -> str:
+ return pcoll | Map(lambda num: str(num))
+
+ input_error_str = (
+ r'This input type hint will be ignored and not used for '
+ r'type-checking purposes. Typically, input type hints for a '
+ r'PTransform are single (or nested) types wrapped by a '
+ r'PCollection, or PBegin. Got: {} instead.'.format(int))
+
+ output_error_str = (
+ r'This output type hint will be ignored and not used for '
+ r'type-checking purposes. Typically, output type hints for a '
+ r'PTransform are single (or nested) types wrapped by a '
+ r'PCollection, PDone, or None. Got: {} instead.'.format(str))
+
+ with self.assertLogs(level='WARN') as log:
+ th = MyPTransform().get_type_hints()
+ self.assertIn(input_error_str, log.output[0])
+ self.assertIn(output_error_str, log.output[1])
+ self.assertEqual(th.input_types, None)
+ self.assertEqual(th.output_types, None)
+
+ def test_typing_module_annotations_are_converted_to_beam_annotations(self):
+ class MyPTransform(PTransform):
+ def expand(
+ self, pcoll: PCollection[typing.Dict[str, str]]
+ ) -> PCollection[typing.Dict[str, str]]:
+ return pcoll
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((typehints.Dict[str, str], ), {}))
+ self.assertEqual(th.input_types, ((typehints.Dict[str, str], ), {}))
+
+ def test_nested_typing_annotations_are_converted_to_beam_annotations(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll:
+ PCollection[typing.Union[int, typing.Any, typing.Dict[str, float]]]) \
+ -> PCollection[typing.Union[int, typing.Any, typing.Dict[str, float]]]:
+ return pcoll
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(
+ th.input_types,
+ ((typehints.Union[int, typehints.Any, typehints.Dict[str,
+ float]], ), {}))
+ self.assertEqual(
+ th.input_types,
+ ((typehints.Union[int, typehints.Any, typehints.Dict[str,
+ float]], ), {}))
+
+ def test_mixed_annotations_are_converted_to_beam_annotations(self):
+ class MyPTransform(PTransform):
+ def expand(self, pcoll: typing.Any) -> typehints.Any:
+ return pcoll
+
+ th = MyPTransform().get_type_hints()
+ self.assertEqual(th.input_types, ((typehints.Any, ), {}))
+ self.assertEqual(th.input_types, ((typehints.Any, ), {}))
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/sdks/python/apache_beam/utils/shared.py b/sdks/python/apache_beam/utils/shared.py
index 31d4ed6..9f0ac94 100644
--- a/sdks/python/apache_beam/utils/shared.py
+++ b/sdks/python/apache_beam/utils/shared.py
@@ -102,10 +102,12 @@
def __init__(self):
self._lock = threading.Lock()
self._ref = None
+ self._tag = None
def acquire(
self,
- constructor_fn # type: Callable[[], Any]
+ constructor_fn, # type: Callable[[], Any]
+ tag=None # type: Any
):
# type: (...) -> Any
@@ -116,6 +118,9 @@
present in the cache. This function should take no arguments. It should
return an initialised object, or None if the object could not be
initialised / constructed.
+ tag: an optional indentifier to store with the cached object. If
+ subsequent calls to acquire use different tags, the object will be
+ reloaded rather than returned from cache.
Returns:
An initialised object, either from a previous initialisation, or
@@ -124,7 +129,8 @@
with self._lock:
# self._ref is None if this is a new control block.
# self._ref() is None if the weak reference was GCed.
- if self._ref is None or self._ref() is None:
+ # self._tag != tag if user specifies a new identifier
+ if self._ref is None or self._ref() is None or self._tag != tag:
result = constructor_fn()
if result is None:
return None
@@ -205,6 +211,7 @@
self,
key, # type: Text
constructor_fn, # type: Callable[[], Any]
+ tag=None # type: Any
):
# type: (...) -> Any
@@ -216,6 +223,9 @@
present in the cache. This function should take no arguments. It should
return an initialised object, or None if the object could not be
initialised / constructed.
+ tag: an optional indentifier to store with the cached object. If
+ subsequent calls to acquire use different tags, the object will be
+ reloaded rather than returned from cache.
Returns:
A reference to the initialised object, either from the cache, or
@@ -227,7 +237,7 @@
control_block = _SharedControlBlock()
self._cache_map[key] = control_block
- result = control_block.acquire(constructor_fn)
+ result = control_block.acquire(constructor_fn, tag)
# Because we release the lock in between, if we acquire multiple Shareds
# in a short time, there's no guarantee as to which one will be kept alive.
@@ -266,7 +276,8 @@
def acquire(
self,
- constructor_fn # type: Callable[[], Any]
+ constructor_fn, # type: Callable[[], Any]
+ tag=None # type: Any
):
# type: (...) -> Any
@@ -277,9 +288,12 @@
present in the cache. This function should take no arguments. It should
return an initialised object, or None if the object could not be
initialised / constructed.
+ tag: an optional indentifier to store with the cached object. If
+ subsequent calls to acquire use different tags, the object will be
+ reloaded rather than returned from cache.
Returns:
A reference to an initialised object, either from the cache, or
newly-constructed.
"""
- return _shared_map.acquire(self._key, constructor_fn)
+ return _shared_map.acquire(self._key, constructor_fn, tag)
diff --git a/sdks/python/apache_beam/utils/shared_test.py b/sdks/python/apache_beam/utils/shared_test.py
index 35cdb26..28ba7c8 100644
--- a/sdks/python/apache_beam/utils/shared_test.py
+++ b/sdks/python/apache_beam/utils/shared_test.py
@@ -236,6 +236,28 @@
self.assertEqual('sequence3', f3.get_name())
self.assertEqual('sequence4', s3.get_name())
+ def testTagCacheEviction(self):
+ shared1 = shared.Shared()
+ shared2 = shared.Shared()
+
+ def acquire_fn_1():
+ return NamedObject('obj_1')
+
+ def acquire_fn_2():
+ return NamedObject('obj_2')
+
+ # with no tag, shared handle does not know when to evict objects
+ p1 = shared1.acquire(acquire_fn_1)
+ assert p1.get_name() == 'obj_1'
+ p2 = shared1.acquire(acquire_fn_2)
+ assert p2.get_name() == 'obj_1'
+
+ # cache eviction can be forced by specifying different tags
+ p1 = shared2.acquire(acquire_fn_1, tag='1')
+ assert p1.get_name() == 'obj_1'
+ p2 = shared2.acquire(acquire_fn_2, tag='2')
+ assert p2.get_name() == 'obj_2'
+
if __name__ == '__main__':
unittest.main()
diff --git a/sdks/python/apache_beam/utils/subprocess_server.py b/sdks/python/apache_beam/utils/subprocess_server.py
index 0d40181..018a837 100644
--- a/sdks/python/apache_beam/utils/subprocess_server.py
+++ b/sdks/python/apache_beam/utils/subprocess_server.py
@@ -291,11 +291,20 @@
if port:
return port
else:
- s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ except OSError as e:
+ # [Errno 97] Address family not supported by protocol
+ # Likely indicates we are in an IPv6-only environment (BEAM-10618). Try
+ # again with AF_INET6.
+ if e.errno == 97:
+ s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ else:
+ raise e
+
sockets.append(s)
s.bind(('localhost', 0))
- _, free_port = s.getsockname()
- return free_port
+ return s.getsockname()[1]
ports = list(map(find_free_port, ports))
# Close sockets only now to avoid the same port to be chosen twice
diff --git a/sdks/python/apache_beam/utils/subprocess_server_test.py b/sdks/python/apache_beam/utils/subprocess_server_test.py
index 4663b0f..22640bc 100644
--- a/sdks/python/apache_beam/utils/subprocess_server_test.py
+++ b/sdks/python/apache_beam/utils/subprocess_server_test.py
@@ -51,12 +51,12 @@
'https://repo.maven.apache.org/maven2/org/apache/beam/'
'beam-sdks-java-fake/VERSION/beam-sdks-java-fake-VERSION.jar',
subprocess_server.JavaJarServer.path_to_beam_jar(
- 'sdks:java:fake:fatJar', version='VERSION'))
+ ':sdks:java:fake:fatJar', version='VERSION'))
self.assertEqual(
'https://repo.maven.apache.org/maven2/org/apache/beam/'
'beam-sdks-java-fake/VERSION/beam-sdks-java-fake-A-VERSION.jar',
subprocess_server.JavaJarServer.path_to_beam_jar(
- 'sdks:java:fake:fatJar', appendix='A', version='VERSION'))
+ ':sdks:java:fake:fatJar', appendix='A', version='VERSION'))
def test_gradle_jar_dev(self):
with self.assertRaisesRegex(
@@ -69,7 +69,7 @@
'beam-sdks-java-fake-VERSION-SNAPSHOT.jar')) +
' not found.'):
subprocess_server.JavaJarServer.path_to_beam_jar(
- 'sdks:java:fake:fatJar', version='VERSION.dev')
+ ':sdks:java:fake:fatJar', version='VERSION.dev')
with self.assertRaisesRegex(
Exception,
re.escape(os.path.join('sdks',
@@ -80,7 +80,7 @@
'beam-sdks-java-fake-A-VERSION-SNAPSHOT.jar')) +
' not found.'):
subprocess_server.JavaJarServer.path_to_beam_jar(
- 'sdks:java:fake:fatJar', appendix='A', version='VERSION.dev')
+ ':sdks:java:fake:fatJar', appendix='A', version='VERSION.dev')
def test_beam_services(self):
with subprocess_server.JavaJarServer.beam_services({':some:target': 'foo'}):
diff --git a/sdks/python/container/base_image_requirements.txt b/sdks/python/container/base_image_requirements.txt
index 18e805a..3484d38 100644
--- a/sdks/python/container/base_image_requirements.txt
+++ b/sdks/python/container/base_image_requirements.txt
@@ -45,10 +45,10 @@
typing-extensions==3.7.4.1
# GCP extra features
-google-api-core==1.20.0
+google-api-core==1.21.0
google-apitools==0.5.28
google-cloud-pubsub==1.0.2
-google-cloud-bigquery==1.24.0
+google-cloud-bigquery==1.26.1
google-cloud-bigtable==1.0.0
google-cloud-core==1.1.0
google-cloud-datastore==1.7.4
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 44d846a..652301f 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -201,19 +201,20 @@
GCP_REQUIREMENTS = [
'cachetools>=3.1.0,<4',
'google-apitools>=0.5.31,<0.5.32',
- 'google-cloud-datastore>=1.7.1,<1.8.0',
- 'google-cloud-pubsub>=0.39.0,<1.1.0',
+ 'google-auth>=1.18.0,<2',
+ 'google-cloud-datastore>=1.7.1,<2',
+ 'google-cloud-pubsub>=0.39.0,<2',
# GCP packages required by tests
- 'google-cloud-bigquery>=1.6.0,<=1.24.0',
+ 'google-cloud-bigquery>=1.6.0,<2',
'google-cloud-core>=0.28.1,<2',
- 'google-cloud-bigtable>=0.31.1,<1.1.0',
- 'google-cloud-spanner>=1.13.0,<1.14.0',
+ 'google-cloud-bigtable>=0.31.1,<2',
+ 'google-cloud-spanner>=1.13.0,<2',
'grpcio-gcp>=0.2.2,<1',
# GCP Packages required by ML functionality
- 'google-cloud-dlp>=0.12.0,<=0.13.0',
+ 'google-cloud-dlp>=0.12.0,<2',
'google-cloud-language>=1.3.0,<2',
- 'google-cloud-videointelligence>=1.8.0,<1.14.0',
- 'google-cloud-vision>=0.38.0,<0.43.0',
+ 'google-cloud-videointelligence>=1.8.0,<2',
+ 'google-cloud-vision>=0.38.0,<2',
]
INTERACTIVE_BEAM = [
diff --git a/sdks/python/test-suites/portable/common.gradle b/sdks/python/test-suites/portable/common.gradle
index 2e60afa..48312a6 100644
--- a/sdks/python/test-suites/portable/common.gradle
+++ b/sdks/python/test-suites/portable/common.gradle
@@ -101,38 +101,6 @@
}
}
-task crossLanguagePythonJavaKafkaIOFlink {
- dependsOn 'setupVirtualenv'
- dependsOn ':runners:flink:1.10:job-server:shadowJar'
- dependsOn ":sdks:python:container:py${pythonVersionSuffix}:docker"
- dependsOn ':sdks:java:container:docker'
- dependsOn ':sdks:java:io:expansion-service:shadowJar'
- dependsOn ':sdks:java:testing:kafka-service:buildTestKafkaServiceJar'
-
- doLast {
- def kafkaJar = project(":sdks:java:testing:kafka-service:").buildTestKafkaServiceJar.archivePath
- def options = [
- "--runner=FlinkRunner",
- "--parallelism=2",
- "--environment_type=DOCKER",
- "--environment_cache_millis=10000",
- "--experiment=beam_fn_api",
- ]
- exec {
- environment "LOCAL_KAFKA_JAR", kafkaJar
- executable 'sh'
- args '-c', """
- . ${envdir}/bin/activate \\
- && cd ${pythonRootDir} \\
- && pip install -e .[test] \\
- && python setup.py nosetests \\
- --tests apache_beam.io.external.xlang_kafkaio_it_test:CrossLanguageKafkaIOTest \\
- --test-pipeline-options='${options.join(' ')}'
- """
- }
- }
-}
-
task createProcessWorker {
dependsOn ':sdks:python:container:build'
dependsOn 'setupVirtualenv'
@@ -223,12 +191,14 @@
':runners:flink:1.10:job-server:shadowJar',
':sdks:java:container:docker',
':sdks:java:io:expansion-service:shadowJar',
+ ':sdks:java:testing:kafka-service:buildTestKafkaServiceJar'
]
doLast {
def tests = [
"apache_beam.io.gcp.bigquery_read_it_test",
"apache_beam.io.external.xlang_jdbcio_it_test",
+ "apache_beam.io.external.xlang_kafkaio_it_test",
]
def testOpts = ["--tests=${tests.join(',')}"]
def cmdArgs = mapToArgString([
@@ -236,7 +206,9 @@
"suite": "postCommitIT-flink-py${pythonVersionSuffix}",
"pipeline_opts": "--runner=FlinkRunner --project=apache-beam-testing --environment_type=LOOPBACK --temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
])
+ def kafkaJar = project(":sdks:java:testing:kafka-service:").buildTestKafkaServiceJar.archivePath
exec {
+ environment "LOCAL_KAFKA_JAR", kafkaJar
executable 'sh'
args '-c', ". ${envdir}/bin/activate && ${pythonRootDir}/scripts/run_integration_test.sh $cmdArgs"
}
diff --git a/settings.gradle b/settings.gradle
index 8289f79..638c216 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -120,6 +120,7 @@
include ":sdks:java:io:redis"
include ":sdks:java:io:solr"
include ":sdks:java:io:snowflake"
+include ":sdks:java:io:snowflake:expansion-service"
include ":sdks:java:io:splunk"
include ":sdks:java:io:thrift"
include ":sdks:java:io:tika"
@@ -133,6 +134,7 @@
include ":sdks:java:testing:nexmark"
include ":sdks:java:testing:expansion-service"
include ":sdks:java:testing:kafka-service"
+include ":sdks:java:testing:tpcds"
include ":sdks:python"
include ":sdks:python:apache_beam:testing:load_tests"
include ":sdks:python:container"
diff --git a/website/www/site/content/en/documentation/dsls/sql/walkthrough.md b/website/www/site/content/en/documentation/dsls/sql/walkthrough.md
index a02e14c..59ecb82 100644
--- a/website/www/site/content/en/documentation/dsls/sql/walkthrough.md
+++ b/website/www/site/content/en/documentation/dsls/sql/walkthrough.md
@@ -129,7 +129,7 @@
.builder()
.addInt32Field("appId")
.addInt32Field("reviewerId")
- .withFloatField("rating")
+ .addFloatField("rating")
.addDateTimeField("rowtime")
.build();
diff --git a/website/www/site/content/en/documentation/io/built-in/snowflake.md b/website/www/site/content/en/documentation/io/built-in/snowflake.md
new file mode 100644
index 0000000..078a373
--- /dev/null
+++ b/website/www/site/content/en/documentation/io/built-in/snowflake.md
@@ -0,0 +1,364 @@
+---
+title: "Apache Snowflake I/O connector"
+---
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+[Built-in I/O Transforms](/documentation/io/built-in/)
+
+# Snowflake I/O
+Pipeline options and general information about using and running Snowflake IO.
+
+## Authentication
+All authentication methods available for the Snowflake JDBC Driver are possible to use with the IO transforms:
+
+- Username and password
+- Key pair
+- OAuth token
+
+Passing credentials is done via Pipeline options.
+
+Passing credentials is done via Pipeline options used to instantiate `SnowflakeIO.DataSourceConfiguration`:
+{{< highlight java >}}
+SnowflakePipelineOptions options = PipelineOptionsFactory
+ .fromArgs(args)
+ .withValidation()
+ .as(SnowflakePipelineOptions.class);
+SnowflakeCredentials credentials = SnowflakeCredentialsFactory.of(options);
+
+SnowflakeIO.DataSourceConfiguration.create(credentials)
+ .(other DataSourceConfiguration options)
+{{< /highlight >}}
+### Username and password
+To use username/password authentication in SnowflakeIO, invoke your pipeline with the following Pipeline options:
+{{< highlight >}}
+--username=<USERNAME> --password=<PASSWORD>
+{{< /highlight >}}
+### Key pair
+To use this authentication method, you must first generate a key pair and associate the public key with the Snowflake user that will connect using the IO transform. For instructions, see the [Snowflake documentation](https://docs.snowflake.com/en/user-guide/jdbc-configure.html).
+
+To use key pair authentication with SnowflakeIO, invoke your pipeline with following Pipeline options:
+{{< highlight >}}
+--username=<USERNAME> --privateKeyPath=<PATH_TO_P8_FILE> --privateKeyPassphrase=<PASSWORD_FOR_KEY>
+{{< /highlight >}}
+
+### OAuth token
+SnowflakeIO also supports OAuth token.
+
+**IMPORTANT**: SnowflakeIO requires a valid OAuth access token. It will neither be able to refresh the token nor obtain it using a web-based flow. For information on configuring an OAuth integration and obtaining the token, see the [Snowflake documentation](https://docs.snowflake.com/en/user-guide/oauth-intro.html).
+
+Once you have the token, invoke your pipeline with following Pipeline Options:
+{{< highlight >}}
+--oauthToken=<TOKEN>
+{{< /highlight >}}
+## DataSource Configuration
+DataSource configuration is required in both read and write object for configuring Snowflake connection properties for IO purposes.
+### General usage
+Create the DataSource configuration:
+{{< highlight java >}}
+ SnowflakeIO.DataSourceConfiguration
+ .create(SnowflakeCredentialsFactory.of(options))
+ .withUrl(options.getUrl())
+ .withServerName(options.getServerName())
+ .withDatabase(options.getDatabase())
+ .withWarehouse(options.getWarehouse())
+ .withSchema(options.getSchema());
+{{< /highlight >}}
+Where parameters can be:
+
+- ` .withUrl(...)`
+ - JDBC-like URL for your Snowflake account, including account name and region, without any parameters.
+ - Example: `.withUrl("jdbc:snowflake://account.snowflakecomputing.com")`
+- `.withServerName(...)`
+ - Server Name - full server name with account, zone and domain.
+ - Example: `.withServerName("account.snowflakecomputing.com")`
+- `.withDatabase(...)`
+ - Name of the Snowflake database to use.
+ - Example: `.withDatabase("MY_DATABASE")`
+- `.withWarehouse(...)`
+ - Name of the Snowflake warehouse to use. This parameter is optional. If no warehouse name is specified, the default warehouse for the user is used.
+ - Example: `.withWarehouse("MY_WAREHOUSE")`
+- `.withSchema(...)`
+ - Name of the schema in the database to use. This parameter is optional.
+ - Example: `.withSchema("PUBLIC")`
+
+
+**Note** - either `.withUrl(...)` or `.withServerName(...)` **is required**.
+
+## Pipeline options
+Use Beam’s [Pipeline options](https://beam.apache.org/releases/javadoc/2.17.0/org/apache/beam/sdk/options/PipelineOptions.html) to set options via the command line.
+### Snowflake Pipeline options
+Snowflake IO library supports following options that can be passed via the [command line](https://beam.apache.org/documentation/io/built-in/snowflake/#running-main-command-with-pipeline-options) by default when a Pipeline uses them:
+
+`--url` Snowflake's JDBC-like url including account name and region without any parameters.
+
+`--serverName` Full server name with account, zone and domain.
+
+`--username` Required for username/password and Private Key authentication.
+
+`--oauthToken` Required for OAuth authentication only.
+
+`--password` Required for username/password authentication only.
+
+`--privateKeyPath` Path to Private Key file. Required for Private Key authentication only.
+
+`--privateKeyPassphrase` Private Key's passphrase. Required for Private Key authentication only.
+
+`--stagingBucketName` External bucket path ending with `/`. I.e. `gs://bucket/`. Sub-directories are allowed.
+
+`--storageIntegrationName` Storage integration name
+
+`--warehouse` Warehouse to use. Optional.
+
+`--database` Database name to connect to. Optional.
+
+`--schema` Schema to use. Optional.
+
+`--table` Table to use. Optional.
+
+`--query` Query to use. Optional.
+
+`--role` Role to use. Optional.
+
+`--authenticator` Authenticator to use. Optional.
+
+`--portNumber` Port number. Optional.
+
+`--loginTimeout` Login timeout. Optional.
+
+## Running pipelines on Dataflow
+By default, pipelines are run on [Direct Runner](https://beam.apache.org/documentation/runners/direct/) on your local machine. To run a pipeline on [Google Dataflow](https://cloud.google.com/dataflow/), you must provide the following Pipeline options:
+
+- `--runner=DataflowRunner`
+ - The Dataflow’s specific runner.
+
+- `--project=<GCS PROJECT>`
+ - Name of the Google Cloud Platform project.
+
+- `--stagingBucketName=<GCS BUCKET NAME>`
+ - Google Cloud Services bucket where the Beam files will be staged.
+
+- `--maxNumWorkers=5`
+ - (optional) Maximum number of workers.
+
+- `--appName=<JOB NAME>`
+ - (optional) Prefix for the job name in the Dataflow Dashboard.
+
+More pipeline options for Dataflow can be found [here](https://beam.apache.org/releases/javadoc/2.17.0/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.html).
+
+**Note**: To properly authenticate with Google Cloud, please use [gcloud](https://cloud.google.com/sdk/gcloud/) or follow the [Google Cloud documentation](https://cloud.google.com/docs/authentication/).
+
+**Important**: Please acknowledge [Google Dataflow pricing](Important: Please acknowledge Google Dataflow pricing).
+
+## Writing to Snowflake tables
+One of the functions of SnowflakeIO is writing to Snowflake tables. This transformation enables you to finish the Beam pipeline with an output operation that sends the user's [PCollection](https://beam.apache.org/releases/javadoc/2.17.0/org/apache/beam/sdk/values/PCollection.html) to your Snowflake database.
+### Batch write (from a bounded source)
+The basic .`write()` operation usage is as follows:
+{{< highlight java >}}
+data.apply(
+ SnowflakeIO.<type>write()
+ .withDataSourceConfiguration(dc)
+ .to("MY_TABLE")
+ .withStagingBucketName("BUCKET NAME")
+ .withStorageIntegrationName("STORAGE INTEGRATION NAME")
+ .withUserDataMapper(mapper)
+)
+{{< /highlight >}}
+Replace type with the data type of the PCollection object to write; for example, SnowflakeIO.<String> for an input PCollection of Strings.
+
+All the below parameters are required:
+
+- `.withDataSourceConfiguration()` Accepts a DatasourceConfiguration object.
+
+- `.to()` Accepts the target Snowflake table name.
+
+- `.withStagingBucketName()` Accepts a cloud bucket path ended with slash.
+ -Example: `.withStagingBucketName("gs://mybucket/my/dir/")`
+
+- `.withStorageIntegrationName()` Accepts a name of a Snowflake storage integration object created according to Snowflake documentationt. Example:
+{{< highlight >}}
+CREATE OR REPLACE STORAGE INTEGRATION test_integration
+TYPE = EXTERNAL_STAGE
+STORAGE_PROVIDER = GCS
+ENABLED = TRUE
+STORAGE_ALLOWED_LOCATIONS = ('gcs://bucket/');
+{{< /highlight >}}
+Then:
+{{< highlight >}}
+.withStorageIntegrationName(test_integration)
+{{< /highlight >}}
+
+- `.withUserDataMapper()` Accepts the UserDataMapper function that will map a user's PCollection to an array of String values `(String[])`.
+
+**Note**:
+SnowflakeIO uses COPY statements behind the scenes to write (using [COPY to table](https://docs.snowflake.net/manuals/sql-reference/sql/copy-into-table.html)). StagingBucketName will be used to save CSV files which will end up in Snowflake. Those CSV files will be saved under the “stagingBucketName” path.
+
+### UserDataMapper function
+The UserDataMapper function is required to map data from a PCollection to an array of String values before the `write()` operation saves the data to temporary .csv files. For example:
+{{< highlight java >}}
+public static SnowflakeIO.UserDataMapper<Long> getCsvMapper() {
+ return (SnowflakeIO.UserDataMapper<Long>) recordLine -> new String[] {recordLine.toString()};
+}
+{{< /highlight >}}
+
+### Additional write options
+#### Transformation query
+The `.withQueryTransformation()` option for the `write()` operation accepts a SQL query as a String value, which will be performed while transfering data staged in CSV files directly to the target Snowflake table. For information about the transformation SQL syntax, see the [Snowflake Documentation](https://docs.snowflake.net/manuals/sql-reference/sql/copy-into-table.html#transformation-parameters).
+
+Usage:
+{{< highlight java >}}
+String query = "SELECT t.$1 from YOUR_TABLE;";
+data.apply(
+ SnowflakeIO.<~>write()
+ .withDataSourceConfiguration(dc)
+ .to("MY_TABLE")
+ .withStagingBucketName("BUCKET NAME")
+ .withStorageIntegrationName("STORAGE INTEGRATION NAME")
+ .withUserDataMapper(mapper)
+ .withQueryTransformation(query)
+)
+{{< /highlight >}}
+
+#### Write disposition
+Define the write behaviour based on the table where data will be written to by specifying the `.withWriteDisposition(...)` option for the `write()` operation. The following values are supported:
+
+- APPEND - Default behaviour. Written data is added to the existing rows in the table,
+
+- EMPTY - The target table must be empty; otherwise, the write operation fails,
+
+- TRUNCATE - The write operation deletes all rows from the target table before writing to it.
+
+Example of usage:
+{{< highlight java >}}
+data.apply(
+ SnowflakeIO.<~>write()
+ .withDataSourceConfiguration(dc)
+ .to("MY_TABLE")
+ .withStagingBucketName("BUCKET NAME")
+ .withStorageIntegrationName("STORAGE INTEGRATION NAME")
+ .withUserDataMapper(mapper)
+ .withWriteDisposition(TRUNCATE)
+)
+{{< /highlight >}}
+
+#### Create disposition
+The `.withCreateDisposition()` option defines the behavior of the write operation if the target table does not exist . The following values are supported:
+
+- CREATE_IF_NEEDED - default behaviour. The write operation checks whether the specified target table exists; if it does not, the write operation attempts to create the table Specify the schema for the target table using the `.withTableSchema()` option.
+
+- CREATE_NEVER - The write operation fails if the target table does not exist.
+
+Usage:
+{{< highlight java >}}
+data.apply(
+ SnowflakeIO.<~>write()
+ .withDataSourceConfiguration(dc)
+ .to("MY_TABLE")
+ .withStagingBucketName("BUCKET NAME")
+ .withStorageIntegrationName("STORAGE INTEGRATION NAME")
+ .withUserDataMapper(mapper)
+ .withCreateDisposition(CREATE_NEVER)
+)
+{{< /highlight >}}
+
+#### Table schema disposition
+When the `.withCreateDisposition()` .option is set to `CREATE_IF_NEEDED`, the `.withTableSchema()` option enables specifying the schema for the created target table.
+A table schema is a list of `SFColumn` objects with name and type corresponding to column type for each column in the table.
+
+Usage:
+{{< highlight java >}}
+SFTableSchema tableSchema =
+ new SFTableSchema(
+ SFColumn.of("my_date", new SFDate(), true),
+ new SFColumn("id", new SFNumber()),
+ SFColumn.of("name", new SFText(), true));
+
+data.apply(
+ SnowflakeIO.<~>write()
+ .withDataSourceConfiguration(dc)
+ .to("MY_TABLE")
+ .withStagingBucketName("BUCKET NAME")
+ .withStorageIntegrationName("STORAGE INTEGRATION NAME")
+ .withUserDataMapper(mapper)
+ .withTableSchema(tableSchema)
+)
+{{< /highlight >}}
+## Reading from Snowflake
+One of the functions of SnowflakeIO is reading Snowflake tables - either full tables via table name or custom data via query. Output of the read transform is a [PCollection](https://beam.apache.org/releases/javadoc/2.17.0/org/apache/beam/sdk/values/PCollection.html) of user-defined data type.
+
+### General usage
+
+The basic `.read()` operation usage:
+{{< highlight java >}}
+PCollection<USER_DATA_TYPE> items = pipeline.apply(
+ SnowflakeIO.<USER_DATA_TYPE>read()
+ .withDataSourceConfiguration(dc)
+ .fromTable("MY_TABLE") // or .fromQuery("QUERY")
+ .withStagingBucketName("BUCKET NAME")
+ .withStorageIntegrationName("STORAGE INTEGRATION NAME")
+ .withCsvMapper(mapper)
+ .withCoder(coder));
+)
+{{< /highlight >}}
+Where all below parameters are required:
+
+- `.withDataSourceConfiguration(...)`
+ - Accepts a DataSourceConfiguration object.
+
+- `.fromTable(...) or .fromQuery(...)`
+ - Specifies a Snowflake table name or custom SQL query.
+
+- `.withStagingBucketName()`
+ - Accepts a cloud bucket name.
+
+- `.withStorageIntegrationName()`
+ - Accepts a name of a Snowflake storage integration object created according to Snowflake documentation. Example:
+{{< highlight >}}
+CREATE OR REPLACE STORAGE INTEGRATION test_integration
+TYPE = EXTERNAL_STAGE
+STORAGE_PROVIDER = GCS
+ENABLED = TRUE
+STORAGE_ALLOWED_LOCATIONS = ('gcs://bucket/');
+{{< /highlight >}}
+Then:
+{{< highlight >}}
+.withStorageIntegrationName(test_integration)
+{{< /highlight >}}
+
+- `.withCsvMapper(mapper)`
+ - Accepts a [CSVMapper](https://beam.apache.org/documentation/io/built-in/snowflake/#csvmapper) instance for mapping String[] to USER_DATA_TYPE.
+- `.withCoder(coder)`
+ - Accepts the [Coder](https://beam.apache.org/releases/javadoc/2.0.0/org/apache/beam/sdk/coders/Coder.html) for USER_DATA_TYPE.
+
+**Note**:
+SnowflakeIO uses COPY statements behind the scenes to read (using [COPY to location](https://docs.snowflake.net/manuals/sql-reference/sql/copy-into-location.html)) files staged in cloud storage.StagingBucketName will be used as a temporary location for storing CSV files. Those temporary directories will be named `sf_copy_csv_DATE_TIME_RANDOMSUFFIX` and they will be removed automatically once Read operation finishes.
+
+### CSVMapper
+SnowflakeIO uses a [COPY INTO <location>](https://docs.snowflake.net/manuals/sql-reference/sql/copy-into-location.html) statement to move data from a Snowflake table to Google Cloud Storage as CSV files. These files are then downloaded via [FileIO](https://beam.apache.org/releases/javadoc/2.3.0/index.html?org/apache/beam/sdk/io/FileIO.html) and processed line by line. Each line is split into an array of Strings using the [OpenCSV](http://opencsv.sourceforge.net/) library.
+
+The CSVMapper’s job is to give the user the possibility to convert the array of Strings to a user-defined type, ie. GenericRecord for Avro or Parquet files, or custom POJO.
+
+Example implementation of CsvMapper for GenericRecord:
+{{< highlight java >}}
+static SnowflakeIO.CsvMapper<GenericRecord> getCsvMapper() {
+ return (SnowflakeIO.CsvMapper<GenericRecord>)
+ parts -> {
+ return new GenericRecordBuilder(PARQUET_SCHEMA)
+ .set("ID", Long.valueOf(parts[0]))
+ .set("NAME", parts[1])
+ [...]
+ .build();
+ };
+}
+{{< /highlight >}}
\ No newline at end of file
diff --git a/website/www/site/content/en/documentation/patterns/ai-platform.md b/website/www/site/content/en/documentation/patterns/ai-platform.md
index 905f895..b2a7b10 100644
--- a/website/www/site/content/en/documentation/patterns/ai-platform.md
+++ b/website/www/site/content/en/documentation/patterns/ai-platform.md
@@ -35,7 +35,7 @@
{{< /highlight >}}
{{< highlight java >}}
-// Java examples will be available on Beam 2.23 release.
+{{< code_sample "examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java" NlpAnalyzeText >}}
{{< /highlight >}}
@@ -79,7 +79,7 @@
{{< /highlight >}}
{{< highlight java >}}
-// Java examples will be available on Beam 2.23 release.
+{{< code_sample "examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java" NlpExtractSentiments >}}
{{< /highlight >}}
The snippet loops over `sentences` and, for each sentence, extracts the sentiment score.
@@ -99,7 +99,7 @@
{{< /highlight >}}
{{< highlight java >}}
-// Java examples will be available on Beam 2.23 release.
+{{< code_sample "examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java" NlpExtractEntities >}}
{{< /highlight >}}
Entities can be found in `entities` attribute. Just like before, `entities` is a sequence, that's why list comprehension is a viable choice. The most tricky part is interpreting the types of entities. Natural Language API defines entity types as enum. In a response object, entity types are returned as integers. That's why a user has to instantiate `naturallanguageml.enums.Entity.Type` to access a human-readable name.
@@ -119,7 +119,7 @@
{{< /highlight >}}
{{< highlight java >}}
-// Java examples will be available on Beam 2.23 release.
+{{< code_sample "examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java" NlpAnalyzeDependencyTree >}}
{{< /highlight >}}
The output is below. For better readability, indexes are replaced by text which they refer to:
diff --git a/website/www/site/content/en/documentation/runners/flink.md b/website/www/site/content/en/documentation/runners/flink.md
index 276f83b..d173f70 100644
--- a/website/www/site/content/en/documentation/runners/flink.md
+++ b/website/www/site/content/en/documentation/runners/flink.md
@@ -326,7 +326,7 @@
<th>Artifact Id</th>
</tr>
<tr>
- <td rowspan="3">2.21.0</td>
+ <td rowspan="3">≥ 2.21.0</td>
<td>1.10.x</td>
<td>beam-runners-flink-1.10</td>
</tr>
@@ -339,7 +339,7 @@
<td>beam-runners-flink-1.8</td>
</tr>
<tr>
- <td rowspan="3">2.17.0-2.20.0</td>
+ <td rowspan="3">2.17.0 - 2.20.0</td>
<td>1.9.x</td>
<td>beam-runners-flink-1.9</td>
</tr>
diff --git a/website/www/site/content/en/documentation/sdks/python-type-safety.md b/website/www/site/content/en/documentation/sdks/python-type-safety.md
index 074460c..755f795 100644
--- a/website/www/site/content/en/documentation/sdks/python-type-safety.md
+++ b/website/www/site/content/en/documentation/sdks/python-type-safety.md
@@ -71,7 +71,7 @@
Using Annotations has the added benefit of allowing use of a static type checker (such as mypy) to additionally type check your code.
If you already use a type checker, using annotations instead of decorators reduces code duplication.
However, annotations do not cover all the use cases that decorators and inline declarations do.
-Two such are the `expand` of a composite transform and lambda functions.
+For instance, they do not work for lambda functions.
### Declaring Type Hints Using Type Annotations
@@ -82,6 +82,7 @@
Annotations are currently supported on:
- `process()` methods on `DoFn` subclasses.
+ - `expand()` methods on `PTransform` subclasses.
- Functions passed to: `ParDo`, `Map`, `FlatMap`, `Filter`.
The following code declares an `int` input and a `str` output type hint on the `to_id` transform, using annotations on `my_fn`.
@@ -90,6 +91,15 @@
{{< code_sample "sdks/python/apache_beam/examples/snippets/snippets_test_py3.py" type_hints_map_annotations >}}
{{< /highlight >}}
+The following code demonstrates how to use annotations on `PTransform` subclasses.
+A valid annotation is a `PCollection` that wraps an internal (nested) type, `PBegin`, `PDone`, or `None`.
+The following code declares typehints on a custom PTransform, that takes a `PCollection[int]` input
+and outputs a `PCollection[str]`, using annotations.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/snippets_test_py3.py" type_hints_ptransforms >}}
+{{< /highlight >}}
+
The following code declares `int` input and output type hints on `filter_evens`, using annotations on `FilterEvensDoFn.process`.
Since `process` returns a generator, the output type for a DoFn producing a `PCollection[int]` is annotated as `Iterable[int]` (`Generator[int, None, None]` would also work here).
Beam will remove the outer iterable of the return type on the `DoFn.process` method and functions passed to `FlatMap` to deduce the element type of resulting PCollection .
@@ -182,6 +192,7 @@
* `Iterable[T]`
* `Iterator[T]`
* `Generator[T]`
+* `PCollection[T]`
**Note:** The `Tuple[T, U]` type hint is a tuple with a fixed number of heterogeneously typed elements, while the `Tuple[T, ...]` type hint is a tuple with a variable of homogeneously typed elements.
diff --git a/website/www/site/content/en/documentation/transforms/python/aggregation/max.md b/website/www/site/content/en/documentation/transforms/python/aggregation/max.md
index d6d7f19..7769b81 100644
--- a/website/www/site/content/en/documentation/transforms/python/aggregation/max.md
+++ b/website/www/site/content/en/documentation/transforms/python/aggregation/max.md
@@ -17,7 +17,61 @@
# Max
-## Examples
-See [BEAM-7390](https://issues.apache.org/jira/browse/BEAM-7390) for updates.
+{{< localstorage language language-py >}}
-## Related transforms
\ No newline at end of file
+{{< button-pydoc path="apache_beam.transforms.core" class="CombineGlobally" >}}
+
+Gets the element with the maximum value within each aggregation.
+
+## Examples
+
+In the following example, we create a pipeline with a `PCollection`.
+Then, we get the element with the maximum value in different ways.
+
+### Example 1: Maximum element in a PCollection
+
+We use `Combine.Globally()` to get the maximum element from the *entire* `PCollection`.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/max.py" max_globally >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/max_test.py" max_element >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/max.py" >}}
+
+### Example 2: Maximum elements for each key
+
+We use `Combine.PerKey()` to get the maximum element for each unique key in a `PCollection` of key-values.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/max.py" max_per_key >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/max_test.py" elements_with_max_value_per_key >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/max.py" >}}
+
+## Related transforms
+
+* [CombineGlobally](/documentation/transforms/python/aggregation/combineglobally)
+* [CombinePerKey](/documentation/transforms/python/aggregation/combineperkey)
+* [Mean](/documentation/transforms/python/aggregation/mean)
+* [Min](/documentation/transforms/python/aggregation/min)
+* [Sum](/documentation/transforms/python/aggregation/sum)
+
+{{< button-pydoc path="apache_beam.transforms.core" class="CombineGlobally" >}}
diff --git a/website/www/site/content/en/documentation/transforms/python/aggregation/mean.md b/website/www/site/content/en/documentation/transforms/python/aggregation/mean.md
index 28648db..69fa5ad 100644
--- a/website/www/site/content/en/documentation/transforms/python/aggregation/mean.md
+++ b/website/www/site/content/en/documentation/transforms/python/aggregation/mean.md
@@ -16,23 +16,64 @@
-->
# Mean
-<table align="left">
- <a target="_blank" class="button"
- href="https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.combiners.html?highlight=mean#apache_beam.transforms.combiners.Mean">
- <img src="https://beam.apache.org/images/logos/sdks/python.png" width="20px" height="20px"
- alt="Pydoc" />
- Pydoc
- </a>
-</table>
-<br><br>
+{{< localstorage language language-py >}}
+
+{{< button-pydoc path="apache_beam.transforms.combiners" class="Mean" >}}
Transforms for computing the arithmetic mean of the elements in a collection,
or the mean of the values associated with each key in a collection of
key-value pairs.
## Examples
-See [BEAM-7390](https://issues.apache.org/jira/browse/BEAM-7390) for updates.
-## Related transforms
-* [CombineGlobally](/documentation/transforms/python/aggregation/combineglobally) to combine elements.
\ No newline at end of file
+In the following example, we create a pipeline with a `PCollection`.
+Then, we get the element with the average value in different ways.
+
+### Example 1: Mean of element in a PCollection
+
+We use `Mean.Globally()` to get the average of the elements from the *entire* `PCollection`.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py" mean_globally >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean_test.py" mean_element >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py" >}}
+
+### Example 2: Mean of elements for each key
+
+We use `Mean.PerKey()` to get the average of the elements for each unique key in a `PCollection` of key-values.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py" mean_per_key >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean_test.py" elements_with_mean_value_per_key >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/mean.py" >}}
+
+## Related transforms
+
+* [CombineGlobally](/documentation/transforms/python/aggregation/combineglobally)
+* [CombinePerKey](/documentation/transforms/python/aggregation/combineperkey)
+* [Max](/documentation/transforms/python/aggregation/max)
+* [Min](/documentation/transforms/python/aggregation/min)
+* [Sum](/documentation/transforms/python/aggregation/sum)
+
+{{< button-pydoc path="apache_beam.transforms.combiners" class="Mean" >}}
diff --git a/website/www/site/content/en/documentation/transforms/python/aggregation/min.md b/website/www/site/content/en/documentation/transforms/python/aggregation/min.md
index 7a04c19..8f385d2 100644
--- a/website/www/site/content/en/documentation/transforms/python/aggregation/min.md
+++ b/website/www/site/content/en/documentation/transforms/python/aggregation/min.md
@@ -17,7 +17,61 @@
# Min
-## Examples
-See [BEAM-7390](https://issues.apache.org/jira/browse/BEAM-7390) for updates.
+{{< localstorage language language-py >}}
-## Related transforms
\ No newline at end of file
+{{< button-pydoc path="apache_beam.transforms.core" class="CombineGlobally" >}}
+
+Gets the element with the minimum value within each aggregation.
+
+## Examples
+
+In the following example, we create a pipeline with a `PCollection`.
+Then, we get the element with the minimum value in different ways.
+
+### Example 1: Minimum element in a PCollection
+
+We use `Combine.Globally()` to get the minimum element from the *entire* `PCollection`.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/min.py" min_globally >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/min_test.py" min_element >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/min.py" >}}
+
+### Example 2: Minimum elements for each key
+
+We use `Combine.PerKey()` to get the minimum element for each unique key in a `PCollection` of key-values.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/min.py" min_per_key >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/min_test.py" elements_with_min_value_per_key >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/min.py" >}}
+
+## Related transforms
+
+* [CombineGlobally](/documentation/transforms/python/aggregation/combineglobally)
+* [CombinePerKey](/documentation/transforms/python/aggregation/combineperkey)
+* [Max](/documentation/transforms/python/aggregation/max)
+* [Mean](/documentation/transforms/python/aggregation/mean)
+* [Sum](/documentation/transforms/python/aggregation/sum)
+
+{{< button-pydoc path="apache_beam.transforms.core" class="CombineGlobally" >}}
diff --git a/website/www/site/content/en/documentation/transforms/python/aggregation/sample.md b/website/www/site/content/en/documentation/transforms/python/aggregation/sample.md
index 7d10eb0..33fc8f6 100644
--- a/website/www/site/content/en/documentation/transforms/python/aggregation/sample.md
+++ b/website/www/site/content/en/documentation/transforms/python/aggregation/sample.md
@@ -14,24 +14,62 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-# Sample
-<table align="left">
- <a target="_blank" class="button"
- href="https://beam.apache.org/releases/pydoc/2.12.0/apache_beam.transforms.combiners.html?#apache_beam.transforms.combiners.Sample">
- <img src="https://beam.apache.org/images/logos/sdks/python.png" width="20px" height="20px"
- alt="Pydoc" />
- Pydoc
- </a>
-</table>
-<br><br>
+# Sample
+
+{{< localstorage language language-py >}}
+
+{{< button-pydoc path="apache_beam.transforms.combiners" class="Sample" >}}
Transforms for taking samples of the elements in a collection, or
-samples of the values associated with each key in a collection of
+samples of the values associated with each key in a collection of
key-value pairs.
## Examples
-See [BEAM-7390](https://issues.apache.org/jira/browse/BEAM-7390) for updates.
-## Related transforms
-* [Top](/documentation/transforms/python/aggregation/top) finds the largest or smallest element.
\ No newline at end of file
+In the following example, we create a pipeline with a `PCollection`.
+Then, we get a random sample of elements in different ways.
+
+### Example 1: Sample elements from a PCollection
+
+We use `Sample.FixedSizeGlobally()` to get a fixed-size random sample of elements from the *entire* `PCollection`.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py" sample_fixed_size_globally >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample_test.py" sample >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py" >}}
+
+### Example 2: Sample elements for each key
+
+We use `Sample.FixedSizePerKey()` to get fixed-size random samples for each unique key in a `PCollection` of key-values.
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py" sample_fixed_size_per_key >}}
+{{< /highlight >}}
+
+{{< paragraph class="notebook-skip" >}}
+Output:
+{{< /paragraph >}}
+
+{{< highlight class="notebook-skip" >}}
+{{< code_sample "sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample_test.py" samples_per_key >}}
+{{< /highlight >}}
+
+{{< buttons-code-snippet
+ py="sdks/python/apache_beam/examples/snippets/transforms/aggregation/sample.py" >}}
+
+## Related transforms
+
+* [Top](/documentation/transforms/python/aggregation/top) finds the largest or smallest element.
+
+{{< button-pydoc path="apache_beam.transforms.combiners" class="Sample" >}}
diff --git a/website/www/site/content/en/documentation/transforms/python/overview.md b/website/www/site/content/en/documentation/transforms/python/overview.md
index 4cf68e9..d648ac7 100644
--- a/website/www/site/content/en/documentation/transforms/python/overview.md
+++ b/website/www/site/content/en/documentation/transforms/python/overview.md
@@ -56,9 +56,9 @@
<tr><td><a href="/documentation/transforms/python/aggregation/groupbykey">GroupByKey</a></td><td>Takes a keyed collection of elements and produces a collection where each element consists of a key and all values associated with that key.</td></tr>
<tr><td><a href="/documentation/transforms/python/aggregation/groupintobatches">GroupIntoBatches</a></td><td>Batches the input into desired batch size.</td></tr>
<tr><td><a href="/documentation/transforms/python/aggregation/latest">Latest</a></td><td>Gets the element with the latest timestamp.</td></tr>
- <tr><td>Max</td><td>Not available.</td></tr>
+ <tr><td><a href="/documentation/transforms/python/aggregation/max">Max</a></td><td>Gets the element with the maximum value within each aggregation.</td></tr>
<tr><td><a href="/documentation/transforms/python/aggregation/mean">Mean</a></td><td>Computes the average within each aggregation.</td></tr>
- <tr><td>Min</td><td>Not available.</td></tr>
+ <tr><td><a href="/documentation/transforms/python/aggregation/min">Min</a></td><td>Gets the element with the minimum value within each aggregation.</td></tr>
<tr><td><a href="/documentation/transforms/python/aggregation/sample">Sample</a></td><td>Randomly select some number of elements from each aggregation.</td></tr>
<tr><td>Sum</td><td>Not available.</td></tr>
<tr><td><a href="/documentation/transforms/python/aggregation/top">Top</a></td><td>Compute the largest element(s) in each aggregation.</td></tr>
diff --git a/website/www/site/content/en/get-started/quickstart-py.md b/website/www/site/content/en/get-started/quickstart-py.md
index 1c0afda..b2a503e 100644
--- a/website/www/site/content/en/get-started/quickstart-py.md
+++ b/website/www/site/content/en/get-started/quickstart-py.md
@@ -157,19 +157,16 @@
python -m apache_beam.examples.wordcount --input /path/to/inputfile --output /path/to/write/counts
{{< /highlight >}}
-{{< highlight class="runner-flink-local" >}}
-Currently, running wordcount.py on Flink requires a full download of the Beam source code.
-See https://beam.apache.org/roadmap/portability/#python-on-flink for more information.
-{{< /highlight >}}
-
-{{< highlight class="runner-flink-cluster" >}}
-Currently, running wordcount.py on Flink requires a full download of the Beam source code.
-See https://beam.apache.org/documentation/runners/flink/ for more information.
+{{< highlight class="runner-flink" >}}
+python -m apache_beam.examples.wordcount --input /path/to/inputfile \
+ --output /path/to/write/counts \
+ --runner FlinkRunner
{{< /highlight >}}
{{< highlight class="runner-spark" >}}
-Currently, running wordcount.py on Spark requires a full download of the Beam source code.
-See https://beam.apache.org/roadmap/portability/#python-on-spark for more information.
+python -m apache_beam.examples.wordcount --input /path/to/inputfile \
+ --output /path/to/write/counts \
+ --runner SparkRunner
{{< /highlight >}}
{{< highlight class="runner-dataflow" >}}
diff --git a/website/www/site/content/en/get-started/wordcount-example.md b/website/www/site/content/en/get-started/wordcount-example.md
index cd08f5a..9be441e 100644
--- a/website/www/site/content/en/get-started/wordcount-example.md
+++ b/website/www/site/content/en/get-started/wordcount-example.md
@@ -387,18 +387,20 @@
{{< /highlight >}}
{{< highlight class="runner-flink-local" >}}
-Currently, running wordcount.py on Flink requires a full download of the Beam source code.
-See https://beam.apache.org/roadmap/portability/#python-on-flink for more information.
+python -m apache_beam.examples.wordcount --input /path/to/inputfile \
+ --output /path/to/write/counts \
+ --runner FlinkRunner
{{< /highlight >}}
{{< highlight class="runner-flink-cluster" >}}
-Currently, running wordcount.py on Flink requires a full download of the Beam source code.
-See https://beam.apache.org/documentation/runners/flink/ for more information.
+# Running Beam Python on a distributed Flink cluster requires additional configuration.
+# See https://beam.apache.org/documentation/runners/flink/ for more information.
{{< /highlight >}}
{{< highlight class="runner-spark" >}}
-Currently, running wordcount.py on Spark requires a full download of the Beam source code.
-See https://beam.apache.org/roadmap/portability/#python-on-spark for more information.
+python -m apache_beam.examples.wordcount --input /path/to/inputfile \
+ --output /path/to/write/counts \
+ --runner SparkRunner
{{< /highlight >}}
{{< highlight class="runner-dataflow" >}}
diff --git a/website/www/site/content/en/roadmap/connectors-multi-sdk.md b/website/www/site/content/en/roadmap/connectors-multi-sdk.md
index 464ad83..3b13f59 100644
--- a/website/www/site/content/en/roadmap/connectors-multi-sdk.md
+++ b/website/www/site/content/en/roadmap/connectors-multi-sdk.md
@@ -80,6 +80,7 @@
* Java KafkaIO - completed - [BEAM-7029](https://issues.apache.org/jira/browse/BEAM-7029)
* Java KinesisIO - In progress - [BEAM-10137](https://issues.apache.org/jira/browse/BEAM-10137), [BEAM-10138](https://issues.apache.org/jira/browse/BEAM-10138)
* Java PubSubIO - In progress - [BEAM-7738](https://issues.apache.org/jira/browse/BEAM-7738)
+* Java SnowflakeIO - In progress - [BEAM-9897](https://issues.apache.org/jira/browse/BEAM-9897), [BEAM-9898](https://issues.apache.org/jira/browse/BEAM-9898)
* Java SpannerIO - In progress - [BEAM-10139](https://issues.apache.org/jira/browse/BEAM-10139), [BEAM-10140](https://issues.apache.org/jira/browse/BEAM-10140)
* Java SQL - completed - [BEAM-8603](https://issues.apache.org/jira/browse/BEAM-8603)
diff --git a/website/www/site/data/io_matrix.yaml b/website/www/site/data/io_matrix.yaml
index fb12b3d..84637c6 100644
--- a/website/www/site/data/io_matrix.yaml
+++ b/website/www/site/data/io_matrix.yaml
@@ -143,7 +143,7 @@
implementations:
- language: java
name: org.apache.beam.sdk.io.aws.s3.S3FileSystemRegistrar
- url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/hdfs/package-summary.html
+ url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/aws/s3/S3FileSystemRegistrar.html
- transform: In-memory
description: "`FileSystem` implementation in memory; useful for testing."
implementations:
@@ -303,6 +303,13 @@
- language: py
name: apache_beam.io.gcp.datastore.v1new.datastoreio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.datastore.v1new.datastoreio.html
+ - transform: SnowflakeIO
+ description: Experimental Transforms for reading from and writing to [Snowflake](https://www.snowflake.com/).
+ docs: /documentation/io/built-in/snowflake
+ implementations:
+ - language: java
+ name: org.apache.beam.sdk.io.snowflake.SnowflakeIO
+ url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/snowflake/SnowflakeIO.html
- transform: SpannerIO
description: Experimental Transforms for reading from and writing to [Google Cloud Spanner](https://cloud.google.com/spanner).
implementations:
diff --git a/website/www/site/layouts/partials/section-menu/en/documentation.html b/website/www/site/layouts/partials/section-menu/en/documentation.html
index 600e197..fd35a21bd 100644
--- a/website/www/site/layouts/partials/section-menu/en/documentation.html
+++ b/website/www/site/layouts/partials/section-menu/en/documentation.html
@@ -74,6 +74,7 @@
<li><a href="/documentation/io/built-in/hadoop/">Hadoop Input/Output Format IO</a></li>
<li><a href="/documentation/io/built-in/hcatalog/">HCatalog IO</a></li>
<li><a href="/documentation/io/built-in/google-bigquery/">Google BigQuery I/O connector</a></li>
+ <li><a href="/documentation/io/built-in/snowflake/">Snowflake I/O connector</a></li>
</ul>
</li>
@@ -200,7 +201,9 @@
<li><a href="/documentation/transforms/python/aggregation/groupbykey/">GroupByKey</a></li>
<li><a href="/documentation/transforms/python/aggregation/groupintobatches/">GroupIntoBatches</a></li>
<li><a href="/documentation/transforms/python/aggregation/latest/">Latest</a></li>
+ <li><a href="/documentation/transforms/python/aggregation/max/">Max</a></li>
<li><a href="/documentation/transforms/python/aggregation/mean/">Mean</a></li>
+ <li><a href="/documentation/transforms/python/aggregation/min/">Min</a></li>
<li><a href="/documentation/transforms/python/aggregation/sample/">Sample</a></li>
<li><a href="/documentation/transforms/python/aggregation/top/">Top</a></li>
</ul>