sdks/python/apache_beam/examples/complete/game/hourly_team_score_it_test.py - beam - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 """End-to-end test for the hourly team score example.

 Code: beam/sdks/python/apache_beam/examples/complete/game/hourly_team_score.py
 Usage:

   python setup.py nosetests --test-pipeline-options=" \
       --runner=TestDataflowRunner \
       --project=... \
       --staging_location=gs://... \
       --temp_location=gs://... \
       --output=gs://... \
       --sdk_location=... \

 """

 from __future__ import absolute_import

 import logging
 import time
 import unittest

 from hamcrest.core.core.allof import all_of
 from nose.plugins.attrib import attr

 from apache_beam.examples.complete.game import hourly_team_score
 from apache_beam.io.gcp.tests import utils
 from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryMatcher
 from apache_beam.runners.runner import PipelineState
 from apache_beam.testing.pipeline_verifiers import PipelineStateMatcher
 from apache_beam.testing.test_pipeline import TestPipeline


 class HourlyTeamScoreIT(unittest.TestCase):

   DEFAULT_INPUT_FILE = 'gs://dataflow-samples/game/gaming_data*'
   # SHA-1 hash generated from sorted rows reading from BigQuery table
   DEFAULT_EXPECTED_CHECKSUM = '4fa761fb5c3341ec573d5d12c6ab75e3b2957a25'
   OUTPUT_DATASET = 'hourly_team_score_it_dataset'
   OUTPUT_TABLE = 'leader_board'

   def setUp(self):
     self.test_pipeline = TestPipeline(is_integration_test=True)
     self.project = self.test_pipeline.get_option('project')

     # Set up BigQuery environment
     from google.cloud import bigquery
     client = bigquery.Client()
     unique_dataset_name = self.OUTPUT_DATASET + str(int(time.time()))
     self.dataset = client.dataset(unique_dataset_name, project=self.project)
     self.dataset.create()

   def _cleanup_dataset(self):
     self.dataset.delete()

   @attr('IT')
   def test_hourly_team_score_it(self):
     state_verifier = PipelineStateMatcher(PipelineState.DONE)
     query = ('SELECT COUNT(*) FROM [%s:%s.%s]' % (self.project,
                                                   self.dataset.name,
                                                   self.OUTPUT_TABLE))

     bigquery_verifier = BigqueryMatcher(self.project,
                                         query,
                                         self.DEFAULT_EXPECTED_CHECKSUM)

     extra_opts = {'input': self.DEFAULT_INPUT_FILE,
                   'dataset': self.dataset.name,
                   'window_duration': 1,
                   'on_success_matcher': all_of(state_verifier,
                                                bigquery_verifier)}

     # Register clean up before pipeline execution
     # Note that actual execution happens in reverse order.
     self.addCleanup(self._cleanup_dataset)
     self.addCleanup(utils.delete_bq_table, self.project,
                     self.dataset.name, self.OUTPUT_TABLE)

     # Get pipeline options from command argument: --test-pipeline-options,
     # and start pipeline job by calling pipeline main function.
     hourly_team_score.run(
         self.test_pipeline.get_full_options_as_args(**extra_opts))


 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.DEBUG)
   unittest.main()
	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	"""End-to-end test for the hourly team score example.

	Code: beam/sdks/python/apache_beam/examples/complete/game/hourly_team_score.py
	Usage:

	python setup.py nosetests --test-pipeline-options=" \
	--runner=TestDataflowRunner \
	--project=... \
	--staging_location=gs://... \
	--temp_location=gs://... \
	--output=gs://... \
	--sdk_location=... \

	"""

	from __future__ import absolute_import

	import logging
	import time
	import unittest

	from hamcrest.core.core.allof import all_of
	from nose.plugins.attrib import attr

	from apache_beam.examples.complete.game import hourly_team_score
	from apache_beam.io.gcp.tests import utils
	from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryMatcher
	from apache_beam.runners.runner import PipelineState
	from apache_beam.testing.pipeline_verifiers import PipelineStateMatcher
	from apache_beam.testing.test_pipeline import TestPipeline


	class HourlyTeamScoreIT(unittest.TestCase):

	DEFAULT_INPUT_FILE = 'gs://dataflow-samples/game/gaming_data*'
	# SHA-1 hash generated from sorted rows reading from BigQuery table
	DEFAULT_EXPECTED_CHECKSUM = '4fa761fb5c3341ec573d5d12c6ab75e3b2957a25'
	OUTPUT_DATASET = 'hourly_team_score_it_dataset'
	OUTPUT_TABLE = 'leader_board'

	def setUp(self):
	self.test_pipeline = TestPipeline(is_integration_test=True)
	self.project = self.test_pipeline.get_option('project')

	# Set up BigQuery environment
	from google.cloud import bigquery
	client = bigquery.Client()
	unique_dataset_name = self.OUTPUT_DATASET + str(int(time.time()))
	self.dataset = client.dataset(unique_dataset_name, project=self.project)
	self.dataset.create()

	def _cleanup_dataset(self):
	self.dataset.delete()

	@attr('IT')
	def test_hourly_team_score_it(self):
	state_verifier = PipelineStateMatcher(PipelineState.DONE)
	query = ('SELECT COUNT(*) FROM [%s:%s.%s]' % (self.project,
	self.dataset.name,
	self.OUTPUT_TABLE))

	bigquery_verifier = BigqueryMatcher(self.project,
	query,
	self.DEFAULT_EXPECTED_CHECKSUM)

	extra_opts = {'input': self.DEFAULT_INPUT_FILE,
	'dataset': self.dataset.name,
	'window_duration': 1,
	'on_success_matcher': all_of(state_verifier,
	bigquery_verifier)}

	# Register clean up before pipeline execution
	# Note that actual execution happens in reverse order.
	self.addCleanup(self._cleanup_dataset)
	self.addCleanup(utils.delete_bq_table, self.project,
	self.dataset.name, self.OUTPUT_TABLE)

	# Get pipeline options from command argument: --test-pipeline-options,
	# and start pipeline job by calling pipeline main function.
	hourly_team_score.run(
	self.test_pipeline.get_full_options_as_args(**extra_opts))


	if __name__ == '__main__':
	logging.getLogger().setLevel(logging.DEBUG)
	unittest.main()