blob: 478e609f358d97fc1b9ea58c1a0247b5e5ccd88f [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
from __future__ import print_function
import argparse
import logging
import sys
import unittest
from shutil import rmtree
from tempfile import mkdtemp
from apache_beam.options.pipeline_options import DebugOptions
from apache_beam.options.pipeline_options import PortableOptions
from apache_beam.runners.portability import portable_runner
from apache_beam.runners.portability import portable_runner_test
if __name__ == '__main__':
# Run as
#
# python -m apache_beam.runners.portability.spark_runner_test \
# --spark_job_server_jar=/path/to/job_server.jar \
# [SparkRunnerTest.test_method, ...]
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('--spark_job_server_jar',
help='Job server jar to submit jobs.')
parser.add_argument('--environment_type', default='docker',
help='Environment type. docker or process')
parser.add_argument('--environment_config', help='Environment config.')
parser.add_argument('--extra_experiments', default=[], action='append',
help='Beam experiments config.')
known_args, args = parser.parse_known_args(sys.argv)
sys.argv = args
spark_job_server_jar = known_args.spark_job_server_jar
environment_type = known_args.environment_type.lower()
environment_config = (
known_args.environment_config if known_args.environment_config else None)
extra_experiments = known_args.extra_experiments
# This is defined here to only be run when we invoke this file explicitly.
class SparkRunnerTest(portable_runner_test.PortableRunnerTest):
_use_grpc = True
_use_subprocesses = True
@classmethod
def _subprocess_command(cls, job_port, expansion_port):
# will be cleaned up at the end of this method, and recreated and used by
# the job server
tmp_dir = mkdtemp(prefix='sparktest')
try:
return [
'java',
'-Dbeam.spark.test.reuseSparkContext=true',
'-jar', spark_job_server_jar,
'--spark-master-url', 'local',
'--artifacts-dir', tmp_dir,
'--job-port', str(job_port),
'--artifact-port', '0',
'--expansion-port', str(expansion_port),
]
finally:
rmtree(tmp_dir)
@classmethod
def get_runner(cls):
return portable_runner.PortableRunner()
def create_options(self):
options = super(SparkRunnerTest, self).create_options()
options.view_as(DebugOptions).experiments = [
'beam_fn_api'] + extra_experiments
options.view_as(PortableOptions).environment_type = (
environment_type.upper())
if environment_config:
options.view_as(PortableOptions).environment_config = environment_config
return options
def test_metrics(self):
# Skip until Spark runner supports metrics.
raise unittest.SkipTest("BEAM-7219")
def test_pardo_state_only(self):
# Skip until Spark runner supports user state.
raise unittest.SkipTest("BEAM-7044")
def test_pardo_timers(self):
# Skip until Spark runner supports timers.
raise unittest.SkipTest("BEAM-7221")
def test_pardo_state_timers(self):
# Skip until Spark runner supports user state and timers.
raise unittest.SkipTest("BEAM-7044, BEAM-7221")
def test_windowed_pardo_state_timers(self):
# Skip until Spark runner supports user state and timers.
raise unittest.SkipTest("BEAM-7044, BEAM-7221")
def test_sdf(self):
# Skip until Spark runner supports SDF.
raise unittest.SkipTest("BEAM-7222")
def test_sdf_with_sdf_initiated_checkpointing(self):
# Skip until Spark runner supports SDF.
raise unittest.SkipTest("BEAM-7222")
def test_external_transforms(self):
# Skip until Spark runner supports external transforms.
raise unittest.SkipTest("BEAM-7232")
def test_callbacks_with_exception(self):
# Skip until Spark runner supports bundle finalization.
raise unittest.SkipTest("BEAM-7233")
def test_register_finalizations(self):
# Skip until Spark runner supports bundle finalization.
raise unittest.SkipTest("BEAM-7233")
def test_flattened_side_input(self):
# Blocked on support for transcoding
# https://jira.apache.org/jira/browse/BEAM-7236
super(SparkRunnerTest, self).test_flattened_side_input(
with_transcoding=False)
# Inherits all other tests from PortableRunnerTest.
# Run the tests.
logging.getLogger().setLevel(logging.INFO)
unittest.main()