blob: 6e4a9a4518c9e0c1f51f11bdf9c4f5993f4f8e46 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Performance GBK streaming test that uses PubSub SyntheticSources
messages.
Test requires --test-pipeline-options with following options:
* --pubsub_topic_name=name - name of PubSub topic which is
already created
* --project=project-name
* --num_of_records=1000 - expected number of records
* --runner=TestDataflowRunner or TestDirectRunner - only
test runners supports matchers
Optional pipeline options:
* --timeout=1000 max time that test will be run and wait
for all messages.
* --publish_to_big_query=true/false
* --metrics_dataset=python_load_tests
* --metrics_table=gbk_stream
"""
from __future__ import absolute_import
import logging
import os
import unittest
import uuid
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.testing.load_tests.load_test import LoadTest
from apache_beam.testing.load_tests.streaming import group_by_key_streaming_pipeline
load_test_enabled = False
if os.environ.get('LOAD_TEST_ENABLED') == 'true':
load_test_enabled = True
DEFAULT_TIMEOUT = 800
WAIT_UNTIL_FINISH_DURATION = 1 * 60 * 1000
class TestOptions(PipelineOptions):
@classmethod
def _add_argparse_args(cls, parser):
parser.add_argument('--test-pipeline-options')
@unittest.skipIf(not load_test_enabled, 'Enabled only for phase triggering.')
class GroupByKeyStreamingTest(LoadTest):
ID_LABEL = 'id'
def setUp(self):
super(GroupByKeyStreamingTest, self).setUp()
self.topic_short_name = self.pipeline.get_option('pubsub_topic_name')
self.setup_pubsub()
self.extra_opts = {
'input_subscription': self.input_sub.name,
'metrics_namespace': self.metrics_namespace,
'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
}
def setup_pubsub(self):
self.uuid = str(uuid.uuid4())
# Set up PubSub environment.
from google.cloud import pubsub
self.pub_client = pubsub.PublisherClient()
input_topic_full_name = "projects/{}/topics/{}"\
.format(self.project_id, self.topic_short_name)
self.input_topic = self.pub_client.get_topic(input_topic_full_name)
self.output_topic_name = self.topic_short_name + '_out_' + self.uuid
self.output_topic = self.pub_client.create_topic(
self.pub_client.topic_path(self.project_id, self.output_topic_name))
self.sub_client = pubsub.SubscriberClient()
self.input_sub_name = self.topic_short_name + '_sub_in_' + self.uuid
self.input_sub = self.sub_client.create_subscription(
self.sub_client.subscription_path(self.project_id, self.input_sub_name),
self.input_topic.name)
self.output_sub_name = self.topic_short_name + '_sub_out_' + self.uuid
self.output_sub = self.sub_client.create_subscription(
self.sub_client.subscription_path(self.project_id,
self.output_sub_name),
self.output_topic.name,
ack_deadline_seconds=60)
# If the test timeouts on Dataflow it may not cleanup pubsub after
def tearDown(self):
super(GroupByKeyStreamingTest, self).tearDown()
def testGroupByKey(self):
args = self.pipeline.get_full_options_as_args(**self.extra_opts)
self.result = group_by_key_streaming_pipeline.run(args)
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
unittest.main()