| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| """End-to-end test for the hourly team score example. |
| |
| Code: beam/sdks/python/apache_beam/examples/complete/game/hourly_team_score.py |
| Usage: |
| |
| python setup.py nosetests --test-pipeline-options=" \ |
| --runner=TestDataflowRunner \ |
| --project=... \ |
| --staging_location=gs://... \ |
| --temp_location=gs://... \ |
| --output=gs://... \ |
| --sdk_location=... \ |
| |
| """ |
| |
| from __future__ import absolute_import |
| |
| import logging |
| import time |
| import unittest |
| |
| from hamcrest.core.core.allof import all_of |
| from nose.plugins.attrib import attr |
| |
| from apache_beam.examples.complete.game import hourly_team_score |
| from apache_beam.io.gcp.tests import utils |
| from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryMatcher |
| from apache_beam.runners.runner import PipelineState |
| from apache_beam.testing.pipeline_verifiers import PipelineStateMatcher |
| from apache_beam.testing.test_pipeline import TestPipeline |
| |
| |
| class HourlyTeamScoreIT(unittest.TestCase): |
| |
| DEFAULT_INPUT_FILE = 'gs://dataflow-samples/game/gaming_data*' |
| # SHA-1 hash generated from sorted rows reading from BigQuery table |
| DEFAULT_EXPECTED_CHECKSUM = '4fa761fb5c3341ec573d5d12c6ab75e3b2957a25' |
| OUTPUT_DATASET = 'hourly_team_score_it_dataset' |
| OUTPUT_TABLE = 'leader_board' |
| |
| def setUp(self): |
| self.test_pipeline = TestPipeline(is_integration_test=True) |
| self.project = self.test_pipeline.get_option('project') |
| |
| # Set up BigQuery environment |
| from google.cloud import bigquery |
| client = bigquery.Client() |
| unique_dataset_name = self.OUTPUT_DATASET + str(int(time.time())) |
| self.dataset = client.dataset(unique_dataset_name, project=self.project) |
| self.dataset.create() |
| |
| def _cleanup_dataset(self): |
| self.dataset.delete() |
| |
| @attr('IT') |
| def test_hourly_team_score_it(self): |
| state_verifier = PipelineStateMatcher(PipelineState.DONE) |
| query = ('SELECT COUNT(*) FROM [%s:%s.%s]' % (self.project, |
| self.dataset.name, |
| self.OUTPUT_TABLE)) |
| |
| bigquery_verifier = BigqueryMatcher(self.project, |
| query, |
| self.DEFAULT_EXPECTED_CHECKSUM) |
| |
| extra_opts = {'input': self.DEFAULT_INPUT_FILE, |
| 'dataset': self.dataset.name, |
| 'window_duration': 1, |
| 'on_success_matcher': all_of(state_verifier, |
| bigquery_verifier)} |
| |
| # Register clean up before pipeline execution |
| # Note that actual execution happens in reverse order. |
| self.addCleanup(self._cleanup_dataset) |
| self.addCleanup(utils.delete_bq_table, self.project, |
| self.dataset.name, self.OUTPUT_TABLE) |
| |
| # Get pipeline options from command argument: --test-pipeline-options, |
| # and start pipeline job by calling pipeline main function. |
| hourly_team_score.run( |
| self.test_pipeline.get_full_options_as_args(**extra_opts)) |
| |
| |
| if __name__ == '__main__': |
| logging.getLogger().setLevel(logging.DEBUG) |
| unittest.main() |