tests/authorization/test_authorized_proxy.py - impala - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 from __future__ import absolute_import, division, print_function
 import json
 import os
 import time

 import pytest
 from thrift.protocol import TBinaryProtocol
 from thrift.transport.TSocket import TSocket
 from thrift.transport.TTransport import TBufferedTransport

 from impala_thrift_gen.ImpalaService import ImpalaHiveServer2Service
 from impala_thrift_gen.TCLIService import TCLIService
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.hs2.hs2_test_suite import operation_id_to_query_id

 AUDIT_LOG_DIR = 'audit_log_dir'

 RANGER_IMPALAD_ARGS = ("--server-name=server1 "
                        "--ranger_service_type=hive "
                        "--ranger_app_id=impala "
                        "--authorization_provider=ranger "
                        "--abort_on_failed_audit_event=false "
                        "--audit_event_log_dir={" + AUDIT_LOG_DIR + "}")
 RANGER_CATALOGD_ARGS = ("--server-name=server1 "
                         "--ranger_service_type=hive "
                         "--ranger_app_id=impala "
                         "--authorization_provider=ranger")
 RANGER_ADMIN_USER = "admin"


 class TestAuthorizedProxy(CustomClusterTestSuite):
   def setup_method(self, method):
     super(TestAuthorizedProxy, self).setup_method(method)
     host, port = (self.cluster.impalads[0].service.hostname,
                   self.cluster.impalads[0].service.hs2_port)
     self.socket = TSocket(host, port)
     self.transport = TBufferedTransport(self.socket)
     self.transport.open()
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

   def teardown_method(self, method):
     super(TestAuthorizedProxy, self).teardown_method(method)
     if self.socket:
       self.socket.close()

   def _execute_hs2_stmt(self, statement, verify=True):
     """
     Executes an hs2 statement

     :param statement: the statement to execute
     :param verify: If set to true, will thrown an exception on a failed hs2 execution
     :return: the result of execution
     """
     from tests.hs2.test_hs2 import TestHS2
     execute_statement_req = TCLIService.TExecuteStatementReq()
     execute_statement_req.sessionHandle = self.session_handle
     execute_statement_req.statement = statement
     result = self.hs2_client.ExecuteStatement(execute_statement_req)
     if verify:
       TestHS2.check_response(result)
     return result

   def _open_hs2(self, user, configuration, verify=True):
     """
     Open a session with hs2

     :param user: the user to open the session
     :param configuration: the configuration for the session
     :param verify: If set to true, will thrown an exception on failed session open
     :return: the result of opening the session
     """
     from tests.hs2.test_hs2 import TestHS2
     open_session_req = TCLIService.TOpenSessionReq()
     open_session_req.username = user
     open_session_req.configuration = configuration
     resp = self.hs2_client.OpenSession(open_session_req)
     if verify:
       TestHS2.check_response(resp)
     return resp

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
     impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue=non_owner "
                  .format(RANGER_IMPALAD_ARGS),
     catalogd_args=RANGER_CATALOGD_ARGS,
     tmp_dir_placeholders=[AUDIT_LOG_DIR])
   def test_authorized_proxy_user_with_ranger(self):
     """Tests authorized proxy user with Ranger using HS2."""
     self._test_authorized_proxy_with_ranger(self._test_authorized_proxy, "non_owner",
                                             False)

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
     impalad_args="{0} --authorized_proxy_user_config=hue=non_owner "
                  "--authorized_proxy_group_config=foo=bar;hue=non_owner "
                  "--use_customized_user_groups_mapper_for_ranger"
                  .format(RANGER_IMPALAD_ARGS),
     catalogd_args=RANGER_CATALOGD_ARGS,
     tmp_dir_placeholders=[AUDIT_LOG_DIR])
   def test_authorized_proxy_group_with_ranger(self):
     """Tests authorized proxy group with Ranger using HS2."""
     self._test_authorized_proxy_with_ranger(self._test_authorized_proxy, "non_owner",
                                             True)

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
     impalad_args="{0} --authorized_proxy_user_config=foo=bar "
                  "--authorized_proxy_group_config=foo=bar".format(RANGER_IMPALAD_ARGS),
     catalogd_args=RANGER_CATALOGD_ARGS,
     tmp_dir_placeholders=[AUDIT_LOG_DIR])
   def test_no_matching_user_and_group_authorized_proxy_with_ranger(self):
     self._test_no_matching_user_and_group_authorized_proxy()

   def _test_no_matching_user_and_group_authorized_proxy(self):
     open_session_req = TCLIService.TOpenSessionReq()
     open_session_req.username = "hue"
     open_session_req.configuration = dict()
     open_session_req.configuration["impala.doas.user"] = "abc"
     resp = self.hs2_client.OpenSession(open_session_req)
     assert "User 'hue' is not authorized to delegate to 'abc'" in str(resp)

   def _test_authorized_proxy_with_ranger(
       self, test_func, delegated_user, delegated_to_group):
     try:
       self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle
       if not delegated_to_group:
         self._execute_hs2_stmt("grant all on table tpch.lineitem to user non_owner")
       else:
         self._execute_hs2_stmt("grant all on table tpch.lineitem to group non_owner")
       test_func(delegated_user)
     finally:
       self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle
       if not delegated_to_group:
         self._execute_hs2_stmt("revoke all on table tpch.lineitem from user non_owner")
       else:
         self._execute_hs2_stmt("revoke all on table tpch.lineitem from group non_owner")

   def _test_authorized_proxy(self, delegated_user):
     """End-to-end impersonation + authorization test. Expects authorization to be
        configured before running this test"""
     # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
     # the module within this test function, rather than as a top-level import. This way
     # the tests in that module will not get pulled when executing this test suite. The fix
     # is to split the utility code out of the TestHS2 class and support HS2 as a first
     # class citizen in our test framework.
     from tests.hs2.test_hs2 import TestHS2

     # Try to query a table we are not authorized to access.
     self.session_handle = self._open_hs2("hue",
                                          {"impala.doas.user": delegated_user})\
         .sessionHandle
     bad_resp = self._execute_hs2_stmt("describe tpch_seq.lineitem", False)
     assert "User '%s' does not have privileges to access" % delegated_user in \
            str(bad_resp)

     assert self._wait_for_audit_record(user=delegated_user, impersonator="hue"), \
            "No matching audit event recorded in time window"

     # Now try the same operation on a table we are authorized to access.
     good_resp = self._execute_hs2_stmt("describe tpch.lineitem")
     TestHS2.check_response(good_resp)

     # Verify the correct user information is in the runtime profile.
     query_id = operation_id_to_query_id(good_resp.operationHandle.operationId)
     profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
     self._verify_profile_user_fields(profile_page, effective_user=delegated_user,
                                      delegated_user=delegated_user, connected_user="hue")

     # Try to delegate a user we are not authorized to delegate to.
     resp = self._open_hs2("hue", {"impala.doas.user": "some_user"}, False)
     assert "User 'hue' is not authorized to delegate to 'some_user'" in str(resp)

     # Create a new session which does not have a do_as_user and run a simple query.
     self.session_handle = self._open_hs2("hue", dict()).sessionHandle
     resp = self._execute_hs2_stmt("select 1")

     # Verify the correct user information is in the runtime profile. Since there is
     # no do_as_user the Delegated User field should be empty.
     query_id = operation_id_to_query_id(resp.operationHandle.operationId)

     profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
     self._verify_profile_user_fields(profile_page, effective_user="hue",
                                      delegated_user="", connected_user="hue")

   def _verify_profile_user_fields(self, profile_str, effective_user, connected_user,
                                   delegated_user):
     """Verifies the given runtime profile string contains the specified values for
        User, Connected User, and Delegated User"""
     assert "\n    User: {0}\n".format(effective_user) in profile_str
     assert "\n    Connected User: {0}\n".format(connected_user) in profile_str
     assert "\n    Delegated User: {0}\n".format(delegated_user) in profile_str

   def _wait_for_audit_record(self, user, impersonator, timeout_secs=30):
     """Waits until an audit log record is found that contains the given user and
        impersonator, or until the timeout is reached.
     """
     # The audit event might not show up immediately (the audit logs are flushed to disk
     # on regular intervals), so poll the audit event logs until a matching record is
     # found.
     start_time = time.time()
     while time.time() - start_time < timeout_secs:
       for audit_file_name in os.listdir(self.get_tmp_dir(AUDIT_LOG_DIR)):
         if self._find_matching_audit_record(
           audit_file_name, user, impersonator):
           return True
       time.sleep(1)
     return False

   def _find_matching_audit_record(self, audit_file_name, user, impersonator):
     with open(
       os.path.join(self.get_tmp_dir(AUDIT_LOG_DIR), audit_file_name)) as audit_log_file:
       for line in audit_log_file.readlines():
         json_dict = json.loads(line)
         if len(json_dict) == 0: continue
         if json_dict[min(json_dict)]["user"] == user and \
             json_dict[min(json_dict)]["impersonator"] == impersonator:
           return True
     return False
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	from __future__ import absolute_import, division, print_function
	import json
	import os
	import time

	import pytest
	from thrift.protocol import TBinaryProtocol
	from thrift.transport.TSocket import TSocket
	from thrift.transport.TTransport import TBufferedTransport

	from impala_thrift_gen.ImpalaService import ImpalaHiveServer2Service
	from impala_thrift_gen.TCLIService import TCLIService
	from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
	from tests.hs2.hs2_test_suite import operation_id_to_query_id

	AUDIT_LOG_DIR = 'audit_log_dir'

	RANGER_IMPALAD_ARGS = ("--server-name=server1 "
	"--ranger_service_type=hive "
	"--ranger_app_id=impala "
	"--authorization_provider=ranger "
	"--abort_on_failed_audit_event=false "
	"--audit_event_log_dir={" + AUDIT_LOG_DIR + "}")
	RANGER_CATALOGD_ARGS = ("--server-name=server1 "
	"--ranger_service_type=hive "
	"--ranger_app_id=impala "
	"--authorization_provider=ranger")
	RANGER_ADMIN_USER = "admin"


	class TestAuthorizedProxy(CustomClusterTestSuite):
	def setup_method(self, method):
	super(TestAuthorizedProxy, self).setup_method(method)
	host, port = (self.cluster.impalads[0].service.hostname,
	self.cluster.impalads[0].service.hs2_port)
	self.socket = TSocket(host, port)
	self.transport = TBufferedTransport(self.socket)
	self.transport.open()
	self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
	self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

	def teardown_method(self, method):
	super(TestAuthorizedProxy, self).teardown_method(method)
	if self.socket:
	self.socket.close()

	def _execute_hs2_stmt(self, statement, verify=True):
	"""
	Executes an hs2 statement

	:param statement: the statement to execute
	:param verify: If set to true, will thrown an exception on a failed hs2 execution
	:return: the result of execution
	"""
	from tests.hs2.test_hs2 import TestHS2
	execute_statement_req = TCLIService.TExecuteStatementReq()
	execute_statement_req.sessionHandle = self.session_handle
	execute_statement_req.statement = statement
	result = self.hs2_client.ExecuteStatement(execute_statement_req)
	if verify:
	TestHS2.check_response(result)
	return result

	def _open_hs2(self, user, configuration, verify=True):
	"""
	Open a session with hs2

	:param user: the user to open the session
	:param configuration: the configuration for the session
	:param verify: If set to true, will thrown an exception on failed session open
	:return: the result of opening the session
	"""
	from tests.hs2.test_hs2 import TestHS2
	open_session_req = TCLIService.TOpenSessionReq()
	open_session_req.username = user
	open_session_req.configuration = configuration
	resp = self.hs2_client.OpenSession(open_session_req)
	if verify:
	TestHS2.check_response(resp)
	return resp

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue=non_owner "
	.format(RANGER_IMPALAD_ARGS),
	catalogd_args=RANGER_CATALOGD_ARGS,
	tmp_dir_placeholders=[AUDIT_LOG_DIR])
	def test_authorized_proxy_user_with_ranger(self):
	"""Tests authorized proxy user with Ranger using HS2."""
	self._test_authorized_proxy_with_ranger(self._test_authorized_proxy, "non_owner",
	False)

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="{0} --authorized_proxy_user_config=hue=non_owner "
	"--authorized_proxy_group_config=foo=bar;hue=non_owner "
	"--use_customized_user_groups_mapper_for_ranger"
	.format(RANGER_IMPALAD_ARGS),
	catalogd_args=RANGER_CATALOGD_ARGS,
	tmp_dir_placeholders=[AUDIT_LOG_DIR])
	def test_authorized_proxy_group_with_ranger(self):
	"""Tests authorized proxy group with Ranger using HS2."""
	self._test_authorized_proxy_with_ranger(self._test_authorized_proxy, "non_owner",
	True)

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="{0} --authorized_proxy_user_config=foo=bar "
	"--authorized_proxy_group_config=foo=bar".format(RANGER_IMPALAD_ARGS),
	catalogd_args=RANGER_CATALOGD_ARGS,
	tmp_dir_placeholders=[AUDIT_LOG_DIR])
	def test_no_matching_user_and_group_authorized_proxy_with_ranger(self):
	self._test_no_matching_user_and_group_authorized_proxy()

	def _test_no_matching_user_and_group_authorized_proxy(self):
	open_session_req = TCLIService.TOpenSessionReq()
	open_session_req.username = "hue"
	open_session_req.configuration = dict()
	open_session_req.configuration["impala.doas.user"] = "abc"
	resp = self.hs2_client.OpenSession(open_session_req)
	assert "User 'hue' is not authorized to delegate to 'abc'" in str(resp)

	def _test_authorized_proxy_with_ranger(
	self, test_func, delegated_user, delegated_to_group):
	try:
	self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle
	if not delegated_to_group:
	self._execute_hs2_stmt("grant all on table tpch.lineitem to user non_owner")
	else:
	self._execute_hs2_stmt("grant all on table tpch.lineitem to group non_owner")
	test_func(delegated_user)
	finally:
	self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle
	if not delegated_to_group:
	self._execute_hs2_stmt("revoke all on table tpch.lineitem from user non_owner")
	else:
	self._execute_hs2_stmt("revoke all on table tpch.lineitem from group non_owner")

	def _test_authorized_proxy(self, delegated_user):
	"""End-to-end impersonation + authorization test. Expects authorization to be
	configured before running this test"""
	# TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
	# the module within this test function, rather than as a top-level import. This way
	# the tests in that module will not get pulled when executing this test suite. The fix
	# is to split the utility code out of the TestHS2 class and support HS2 as a first
	# class citizen in our test framework.
	from tests.hs2.test_hs2 import TestHS2

	# Try to query a table we are not authorized to access.
	self.session_handle = self._open_hs2("hue",
	{"impala.doas.user": delegated_user})\
	.sessionHandle
	bad_resp = self._execute_hs2_stmt("describe tpch_seq.lineitem", False)
	assert "User '%s' does not have privileges to access" % delegated_user in \
	str(bad_resp)

	assert self._wait_for_audit_record(user=delegated_user, impersonator="hue"), \
	"No matching audit event recorded in time window"

	# Now try the same operation on a table we are authorized to access.
	good_resp = self._execute_hs2_stmt("describe tpch.lineitem")
	TestHS2.check_response(good_resp)

	# Verify the correct user information is in the runtime profile.
	query_id = operation_id_to_query_id(good_resp.operationHandle.operationId)
	profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
	self._verify_profile_user_fields(profile_page, effective_user=delegated_user,
	delegated_user=delegated_user, connected_user="hue")

	# Try to delegate a user we are not authorized to delegate to.
	resp = self._open_hs2("hue", {"impala.doas.user": "some_user"}, False)
	assert "User 'hue' is not authorized to delegate to 'some_user'" in str(resp)

	# Create a new session which does not have a do_as_user and run a simple query.
	self.session_handle = self._open_hs2("hue", dict()).sessionHandle
	resp = self._execute_hs2_stmt("select 1")

	# Verify the correct user information is in the runtime profile. Since there is
	# no do_as_user the Delegated User field should be empty.
	query_id = operation_id_to_query_id(resp.operationHandle.operationId)

	profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
	self._verify_profile_user_fields(profile_page, effective_user="hue",
	delegated_user="", connected_user="hue")

	def _verify_profile_user_fields(self, profile_str, effective_user, connected_user,
	delegated_user):
	"""Verifies the given runtime profile string contains the specified values for
	User, Connected User, and Delegated User"""
	assert "\n User: {0}\n".format(effective_user) in profile_str
	assert "\n Connected User: {0}\n".format(connected_user) in profile_str
	assert "\n Delegated User: {0}\n".format(delegated_user) in profile_str

	def _wait_for_audit_record(self, user, impersonator, timeout_secs=30):
	"""Waits until an audit log record is found that contains the given user and
	impersonator, or until the timeout is reached.
	"""
	# The audit event might not show up immediately (the audit logs are flushed to disk
	# on regular intervals), so poll the audit event logs until a matching record is
	# found.
	start_time = time.time()
	while time.time() - start_time < timeout_secs:
	for audit_file_name in os.listdir(self.get_tmp_dir(AUDIT_LOG_DIR)):
	if self._find_matching_audit_record(
	audit_file_name, user, impersonator):
	return True
	time.sleep(1)
	return False

	def _find_matching_audit_record(self, audit_file_name, user, impersonator):
	with open(
	os.path.join(self.get_tmp_dir(AUDIT_LOG_DIR), audit_file_name)) as audit_log_file:
	for line in audit_log_file.readlines():
	json_dict = json.loads(line)
	if len(json_dict) == 0: continue
	if json_dict[min(json_dict)]["user"] == user and \
	json_dict[min(json_dict)]["impersonator"] == impersonator:
	return True
	return False