tests/custom_cluster/test_web_pages.py - impala - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import json
 import random
 import re
 import requests
 import psutil
 import pytest

 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.shell.util import run_impala_shell_cmd


 class TestWebPage(CustomClusterTestSuite):
   @classmethod
   def get_workload(cls):
     return 'functional-query'

   @classmethod
   def setup_class(cls):
     if cls.exploration_strategy() != 'exhaustive':
       pytest.skip('runs only in exhaustive')
     super(TestWebPage, cls).setup_class()

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--enable_extended_memory_metrics=true"
   )
   def test_varz_hidden_variables(self):
     """Tests that modified hidden variables show up in /varz"""
     response = requests.get("http://localhost:25000/varz?json")
     assert response.status_code == requests.codes.ok
     varz_json = json.loads(response.text)
     flag = [e for e in varz_json["flags"]
             if e["name"] == "enable_extended_memory_metrics"]
     assert len(flag) == 1
     assert flag[0]["default"] == "false"
     assert flag[0]["current"] == "true"
     assert flag[0]["experimental"]

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--webserver_max_post_length_bytes=100"
   )
   def test_max_post_length(self):
     """Tests that the maximum length of a POST request that will be accepted"""
     too_big_post_content = "c" * 10000
     # POST that exceeds the limit
     response = requests.post("http://localhost:25000/", too_big_post_content)
     assert response.status_code == requests.codes.request_entity_too_large

     # POST within the limit
     # This is on a URI that does not understand POST and treats it like a GET.
     ok_post_content = "c" * 100
     response = requests.post("http://localhost:25000/", ok_post_content)
     assert response.status_code == requests.codes.ok

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args()
   def test_webserver_interface(self):
     addrs = psutil.net_if_addrs()
     print("net_if_addrs returned: %s" % addrs)
     ip_matcher = re.compile("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
     ip_addrs = []
     for addr in addrs:
       for snic in addrs[addr]:
         if ip_matcher.match(snic.address):
           ip_addrs.append(snic.address)

     # There must be at least one available interface on the machine.
     assert len(ip_addrs) > 0, addrs

     ports = ["25000", "25010", "25020"]
     # With default args, the webserver should be accessible over all interfaces for all
     # daemons.
     for ip in ip_addrs:
       for port in ports:
         response = requests.get("http://%s:%s/" % (ip, port))
         assert response.status_code == requests.codes.ok, ip

     # Pick a random interface and restart with the webserver on that interface.
     interface = random.choice(ip_addrs)
     self._start_impala_cluster(["--impalad_args=--webserver_interface=%s" % interface])

     # Now the webserver should only be accessible over the choosen interface.
     for ip in ip_addrs:
       try:
         response = requests.get("http://%s:25000/" % ip)
         assert ip == interface
         assert response.status_code == requests.codes.ok, ip
       except requests.exceptions.ConnectionError:
         assert ip != interface

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--query_stmt_size=0"
   )
   def test_query_stmt_without_truncate(self):
     """Check if the full query string is displayed in the query list on the WebUI."""
     # The input query is a select + 450 'x ' long.
     query_select = "x " * 450
     query = 'select "{0}"'.format(query_select)
     # In the site there is an extra \ before the " so we need that in the expected
     # response too.
     expected = 'select \\"{0}\\"'.format(query_select)
     self.execute_query(query)
     response = requests.get("http://localhost:25000/queries?json")
     response_json = response.text
     assert expected in response_json, "No matching statement found in the queries site."

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--query_stmt_size=10"
   )
   def test_query_stmt_with_custom_length(self):
     """Check if the partial query with the correct length is displayed in the query list
     on the WebUI."""
     # The input query is a select + 450 'x ' long.
     query = 'select "{0}"'.format("x " * 450)
     # Searching for the custom, 10 chars long response. In the site there is an extra \
     # before the " so we need that in the expected response too.
     expected = 'select \\"x ...'
     self.execute_query(query)
     response = requests.get("http://localhost:25000/queries?json")
     response_json = response.text
     assert expected in response_json, "No matching statement found in the queries site."

   # Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of
   # 'should_exist'
   def _validate_shell_messages(self, result_stderr, messages, should_exist=True):
     for msg in messages:
       if should_exist:
         assert msg in result_stderr, result_stderr
       else:
         assert msg not in result_stderr, result_stderr

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--ping_expose_webserver_url=false"
   )
   def test_webserver_url_not_exposed(self, vector):
     if vector.get_value('table_format').file_format != 'text':
       pytest.skip('runs only for text table_format')
     # If webserver url is not exposed, debug web urls shouldn't be printed out.
     shell_messages = ["Query submitted at: ", "(Coordinator: ",
         "Query progress can be monitored at: "]
     query_shell_arg = '--query=select * from functional.alltypes'
     # hs2
     results = run_impala_shell_cmd(vector, [query_shell_arg])
     self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
     # beeswax
     results = run_impala_shell_cmd(vector, ['--protocol=beeswax', query_shell_arg])
     self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
     # Even though webserver url is not exposed, it is still accessible.
     page = requests.get('http://localhost:25000')
     assert page.status_code == requests.codes.ok

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
     impalad_args="--logtostderr=true --redirect_stdout_stderr=false",
     statestored_args="--logtostderr=true --redirect_stdout_stderr=false",
     catalogd_args="--logtostderr=true --redirect_stdout_stderr=false"
   )
   def test_webserver_hide_logs_link(self, vector):
     """Validate that there is no /logs link when we use --logtostderr=true """
     ports = ["25000", "25010", "25020"]
     for port in ports:
       # Get the webui home page as json.
       response = requests.get("http://localhost:%s?json" % port)
       assert response.status_code == requests.codes.ok
       home = json.loads(response.text)
       # Get the items in the navbar.
       navbar = home["__common__"]['navbar']
       found_links = [link_item['link'] for link_item in navbar]
       # The links that we expect to see in the navbar.
       expected_coordinator_links = [
         "/",
         "/admission",
         "/backends",
         "/catalog",
         "/hadoop-varz",
         "/jmx",
         "/log_level",
         "/memz",
         "/metrics",
         "/profile_docs",
         "/queries",
         "/rpcz",
         "/sessions",
         "/threadz",
         "/varz",
       ]
       expected_statestore_links = [
         "/",
         "/log_level",
         "/memz",
         "/metrics",
         "/profile_docs",
         "/rpcz",
         "/subscribers",
         "/threadz",
         "/topics",
         "/varz",
       ]
       expected_catalog_links = [
         "/",
         "/catalog",
         "/jmx",
         "/log_level",
         "/memz",
         "/metrics",
         "/operations",
         "/profile_docs",
         "/rpcz",
         "/threadz",
         "/varz",
       ]
       msg = "bad links from webui port %s" % port
       if port == "25000":
         assert found_links == expected_coordinator_links, msg
       elif port == "25010":
         assert found_links == expected_statestore_links, msg
       elif port == "25020":
         assert found_links == expected_catalog_links, msg

   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
     impalad_args="--disable_content_security_policy_header=true",
     statestored_args="--disable_content_security_policy_header=true",
     catalogd_args="--disable_content_security_policy_header=true"
   )
   def test_cdp_header_disabled(self):
     """Test that if servers are started with the flag
     --disable_content_security_policy_header=true then the emission of the CDP header is
     disabled."""
     ports = ["25000", "25010", "25020"]  # Respectively the impalad, statestore, catalog.
     for port in ports:
       response = requests.get("http://localhost:%s" % port)
       assert 'Content-Security-Policy' not in response.headers, \
         "CSP header present despite being disabled (port %s)" % port
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import json
	import random
	import re
	import requests
	import psutil
	import pytest

	from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
	from tests.shell.util import run_impala_shell_cmd


	class TestWebPage(CustomClusterTestSuite):
	@classmethod
	def get_workload(cls):
	return 'functional-query'

	@classmethod
	def setup_class(cls):
	if cls.exploration_strategy() != 'exhaustive':
	pytest.skip('runs only in exhaustive')
	super(TestWebPage, cls).setup_class()

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="--enable_extended_memory_metrics=true"
	)
	def test_varz_hidden_variables(self):
	"""Tests that modified hidden variables show up in /varz"""
	response = requests.get("http://localhost:25000/varz?json")
	assert response.status_code == requests.codes.ok
	varz_json = json.loads(response.text)
	flag = [e for e in varz_json["flags"]
	if e["name"] == "enable_extended_memory_metrics"]
	assert len(flag) == 1
	assert flag[0]["default"] == "false"
	assert flag[0]["current"] == "true"
	assert flag[0]["experimental"]

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="--webserver_max_post_length_bytes=100"
	)
	def test_max_post_length(self):
	"""Tests that the maximum length of a POST request that will be accepted"""
	too_big_post_content = "c" * 10000
	# POST that exceeds the limit
	response = requests.post("http://localhost:25000/", too_big_post_content)
	assert response.status_code == requests.codes.request_entity_too_large

	# POST within the limit
	# This is on a URI that does not understand POST and treats it like a GET.
	ok_post_content = "c" * 100
	response = requests.post("http://localhost:25000/", ok_post_content)
	assert response.status_code == requests.codes.ok

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args()
	def test_webserver_interface(self):
	addrs = psutil.net_if_addrs()
	print("net_if_addrs returned: %s" % addrs)
	ip_matcher = re.compile("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
	ip_addrs = []
	for addr in addrs:
	for snic in addrs[addr]:
	if ip_matcher.match(snic.address):
	ip_addrs.append(snic.address)

	# There must be at least one available interface on the machine.
	assert len(ip_addrs) > 0, addrs

	ports = ["25000", "25010", "25020"]
	# With default args, the webserver should be accessible over all interfaces for all
	# daemons.
	for ip in ip_addrs:
	for port in ports:
	response = requests.get("http://%s:%s/" % (ip, port))
	assert response.status_code == requests.codes.ok, ip

	# Pick a random interface and restart with the webserver on that interface.
	interface = random.choice(ip_addrs)
	self._start_impala_cluster(["--impalad_args=--webserver_interface=%s" % interface])

	# Now the webserver should only be accessible over the choosen interface.
	for ip in ip_addrs:
	try:
	response = requests.get("http://%s:25000/" % ip)
	assert ip == interface
	assert response.status_code == requests.codes.ok, ip
	except requests.exceptions.ConnectionError:
	assert ip != interface

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="--query_stmt_size=0"
	)
	def test_query_stmt_without_truncate(self):
	"""Check if the full query string is displayed in the query list on the WebUI."""
	# The input query is a select + 450 'x ' long.
	query_select = "x " * 450
	query = 'select "{0}"'.format(query_select)
	# In the site there is an extra \ before the " so we need that in the expected
	# response too.
	expected = 'select \\"{0}\\"'.format(query_select)
	self.execute_query(query)
	response = requests.get("http://localhost:25000/queries?json")
	response_json = response.text
	assert expected in response_json, "No matching statement found in the queries site."

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="--query_stmt_size=10"
	)
	def test_query_stmt_with_custom_length(self):
	"""Check if the partial query with the correct length is displayed in the query list
	on the WebUI."""
	# The input query is a select + 450 'x ' long.
	query = 'select "{0}"'.format("x " * 450)
	# Searching for the custom, 10 chars long response. In the site there is an extra \
	# before the " so we need that in the expected response too.
	expected = 'select \\"x ...'
	self.execute_query(query)
	response = requests.get("http://localhost:25000/queries?json")
	response_json = response.text
	assert expected in response_json, "No matching statement found in the queries site."

	# Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of
	# 'should_exist'
	def _validate_shell_messages(self, result_stderr, messages, should_exist=True):
	for msg in messages:
	if should_exist:
	assert msg in result_stderr, result_stderr
	else:
	assert msg not in result_stderr, result_stderr

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="--ping_expose_webserver_url=false"
	)
	def test_webserver_url_not_exposed(self, vector):
	if vector.get_value('table_format').file_format != 'text':
	pytest.skip('runs only for text table_format')
	# If webserver url is not exposed, debug web urls shouldn't be printed out.
	shell_messages = ["Query submitted at: ", "(Coordinator: ",
	"Query progress can be monitored at: "]
	query_shell_arg = '--query=select * from functional.alltypes'
	# hs2
	results = run_impala_shell_cmd(vector, [query_shell_arg])
	self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
	# beeswax
	results = run_impala_shell_cmd(vector, ['--protocol=beeswax', query_shell_arg])
	self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
	# Even though webserver url is not exposed, it is still accessible.
	page = requests.get('http://localhost:25000')
	assert page.status_code == requests.codes.ok

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="--logtostderr=true --redirect_stdout_stderr=false",
	statestored_args="--logtostderr=true --redirect_stdout_stderr=false",
	catalogd_args="--logtostderr=true --redirect_stdout_stderr=false"
	)
	def test_webserver_hide_logs_link(self, vector):
	"""Validate that there is no /logs link when we use --logtostderr=true """
	ports = ["25000", "25010", "25020"]
	for port in ports:
	# Get the webui home page as json.
	response = requests.get("http://localhost:%s?json" % port)
	assert response.status_code == requests.codes.ok
	home = json.loads(response.text)
	# Get the items in the navbar.
	navbar = home["__common__"]['navbar']
	found_links = [link_item['link'] for link_item in navbar]
	# The links that we expect to see in the navbar.
	expected_coordinator_links = [
	"/",
	"/admission",
	"/backends",
	"/catalog",
	"/hadoop-varz",
	"/jmx",
	"/log_level",
	"/memz",
	"/metrics",
	"/profile_docs",
	"/queries",
	"/rpcz",
	"/sessions",
	"/threadz",
	"/varz",
	]
	expected_statestore_links = [
	"/",
	"/log_level",
	"/memz",
	"/metrics",
	"/profile_docs",
	"/rpcz",
	"/subscribers",
	"/threadz",
	"/topics",
	"/varz",
	]
	expected_catalog_links = [
	"/",
	"/catalog",
	"/jmx",
	"/log_level",
	"/memz",
	"/metrics",
	"/operations",
	"/profile_docs",
	"/rpcz",
	"/threadz",
	"/varz",
	]
	msg = "bad links from webui port %s" % port
	if port == "25000":
	assert found_links == expected_coordinator_links, msg
	elif port == "25010":
	assert found_links == expected_statestore_links, msg
	elif port == "25020":
	assert found_links == expected_catalog_links, msg

	@pytest.mark.execute_serially
	@CustomClusterTestSuite.with_args(
	impalad_args="--disable_content_security_policy_header=true",
	statestored_args="--disable_content_security_policy_header=true",
	catalogd_args="--disable_content_security_policy_header=true"
	)
	def test_cdp_header_disabled(self):
	"""Test that if servers are started with the flag
	--disable_content_security_policy_header=true then the emission of the CDP header is
	disabled."""
	ports = ["25000", "25010", "25020"] # Respectively the impalad, statestore, catalog.
	for port in ports:
	response = requests.get("http://localhost:%s" % port)
	assert 'Content-Security-Policy' not in response.headers, \
	"CSP header present despite being disabled (port %s)" % port