blob: e6779a6baa9dcc8049cf366d90128c8595ccefd3 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
import pytest
from tests.providers.google.cloud.utils.gcp_authenticator import GCP_DATAPROC_KEY
from tests.test_utils.gcp_system_helpers import CLOUD_DAG_FOLDER, GoogleSystemTest, provide_gcp_context
BUCKET = os.environ.get("GCP_DATAPROC_BUCKET", "dataproc-system-tests")
PYSPARK_MAIN = os.environ.get("PYSPARK_MAIN", "hello_world.py")
PYSPARK_URI = "gs://{}/{}".format(BUCKET, PYSPARK_MAIN)
pyspark_file = """
#!/usr/bin/python
import pyspark
sc = pyspark.SparkContext()
rdd = sc.parallelize(['Hello,', 'world!'])
words = sorted(rdd.collect())
print(words)
"""
@pytest.mark.backend("mysql", "postgres")
@pytest.mark.credential_file(GCP_DATAPROC_KEY)
class DataprocExampleDagsTest(GoogleSystemTest):
@provide_gcp_context(GCP_DATAPROC_KEY)
def setUp(self):
super().setUp()
self.create_gcs_bucket(BUCKET)
self.upload_content_to_gcs(lines=pyspark_file, bucket=PYSPARK_URI, filename=PYSPARK_MAIN)
@provide_gcp_context(GCP_DATAPROC_KEY)
def tearDown(self):
self.delete_gcs_bucket(BUCKET)
super().tearDown()
@provide_gcp_context(GCP_DATAPROC_KEY)
def test_run_example_dag(self):
self.run_dag(dag_id="example_gcp_dataproc", dag_folder=CLOUD_DAG_FOLDER)