blob: ef7235936bd1d948e371f1ac6d1db66d0402a2ea [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# pytype: skip-file
"""Unit tests for Google Cloud Natural Language API transform."""
from __future__ import absolute_import
import unittest
import mock
import apache_beam as beam
from apache_beam.metrics import MetricsFilter
from apache_beam.testing.test_pipeline import TestPipeline
# Protect against environments where Google Cloud Natural Language client
# is not available.
# pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
try:
from google.cloud import language
except ImportError:
language = None
else:
from apache_beam.ml.gcp import naturallanguageml
# pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
@unittest.skipIf(language is None, 'GCP dependencies are not installed')
class NaturalLanguageMlTest(unittest.TestCase):
def assertCounterEqual(self, pipeline_result, counter_name, expected):
metrics = pipeline_result.metrics().query(
MetricsFilter().with_name(counter_name))
try:
counter = metrics['counters'][0]
self.assertEqual(expected, counter.result)
except IndexError:
raise AssertionError('Counter "{}" was not found'.format(counter_name))
def test_document_source(self):
document = naturallanguageml.Document('Hello, world!')
dict_ = naturallanguageml.Document.to_dict(document)
self.assertTrue('content' in dict_)
self.assertFalse('gcs_content_uri' in dict_)
document = naturallanguageml.Document('gs://sample/location', from_gcs=True)
dict_ = naturallanguageml.Document.to_dict(document)
self.assertFalse('content' in dict_)
self.assertTrue('gcs_content_uri' in dict_)
def test_annotate_test_called(self):
with mock.patch('apache_beam.ml.gcp.naturallanguageml._AnnotateTextFn'
'._get_api_client'):
p = TestPipeline()
features = [
naturallanguageml.types.AnnotateTextRequest.Features(
extract_syntax=True)
]
_ = (
p | beam.Create([naturallanguageml.Document('Hello, world!')])
| naturallanguageml.AnnotateText(features))
result = p.run()
result.wait_until_finish()
self.assertCounterEqual(result, 'api_calls', 1)
if __name__ == '__main__':
unittest.main()