IMPALA-14634: Organize custom cluster logs into subdirectories by test name This changes the logic for setting the log directory for custom cluster tests. It uses the test class name and (optionally) the test method name to put log files into subdirectories. For a test that restarts the cluster for each test method, this would have a directory structure of base_dir / test class / test method name / For a test that starts the cluster at the class level and reuses it across tests methods, this would be: base_dir / test class / This directory structure can still have logs from multiple tests going to a single directory. For example, some tests are parameterized so that a test method runs multiple times with different parameter values. Those will share a directory. This should be rare and most directories will have logs from a single test. Some test methods restart the cluster within the test itself rather than just at the beginning. This stores the test method so these restarts continue to use the same directory. The code is often in @classmethod functions, so this stores the test method name at the class level and manipulates it through @classmethod functions. This is awkward, but it works because the custom cluster tests are single threaded. This also adjusts the logic for determining the base directory for custom cluster logs. tests/run-custom-cluster-tests.sh sets LOG_DIR to $IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR (i.e. logs/custom_cluster_tests) and this continues to respect LOG_DIR. However, in the dev environment using impala-py.test, LOG_DIR is not set and the code currently defaults to /tmp. This changes it to use $IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR so that logs go to logs/custom_cluster_tests rather than /tmp. Testing: - Ran locally and verified the output goes into directories - Ran exhaustive custom cluster tests Change-Id: I8a8402fed1584a99f91451a3976e7026d0deb834 Reviewed-on: http://gerrit.cloudera.org:8080/23796 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Quanlong Huang <huangquanlong@gmail.com>

commit: d5da6ee2a5b570fdb99d9da5d74885c48ec5d539 [log] [tgz]
author: Joe McDonnell <joemcdonnell@cloudera.com> Tue Dec 16 10:29:51 2025 -0800
committer: Joe McDonnell <joemcdonnell@cloudera.com> Fri Jan 02 18:17:25 2026 +0000
tree: 4f1c24130ce8c10805180d277eeb527cd110ffe7
parent: 411309acf4d3f326f05dfa04749a0ec0e2ccc801 [diff]
diff --git a/tests/common/custom_cluster_test_suite.py b/tests/common/custom_cluster_test_suite.py
index 98b3b3e..a5a1198 100644
--- a/tests/common/custom_cluster_test_suite.py
+++ b/tests/common/custom_cluster_test_suite.py

@@ -116,6 +116,15 @@
   # Args for cluster startup/teardown when sharing a single cluster for the entire class.
   SHARED_CLUSTER_ARGS = {}
 
+  # The currently executing test method. setup_method() populates this and tear_method()
+  # clears it. This is used to set the log directory location when a test manually
+  # restarts the cluster during the test. This is left unset for tests that use a single
+  # cluster for multiple tests (i.e. with SHARED_CLUSTER_ARGS), as the logs will be
+  # shared across multiple tests. Since this is used from @classmethod functions, this is
+  # set and accessed via @classmethod functions set/get_current_test_method(). This is
+  # awkward, but it should work because custom cluster tests are single threaded.
+  CURRENT_TEST_METHOD_NAME = None
+
   @classmethod
   def add_test_dimensions(cls):
     super(CustomClusterTestSuite, cls).add_test_dimensions()
@@ -269,6 +278,14 @@
     del self.TMP_DIRS[name]
 
   @classmethod
+  def set_current_test_method_name(cls, test_method_name):
+    cls.CURRENT_TEST_METHOD_NAME = test_method_name
+
+  @classmethod
+  def get_current_test_method_name(self):
+    return self.CURRENT_TEST_METHOD_NAME
+
+  @classmethod
   def cluster_setup(cls, args):
     cluster_args = list()
     disable_log_buffering = args.get(DISABLE_LOG_BUFFERING, False)
@@ -393,6 +410,10 @@
 
   def setup_method(self, method):
     if not self.SHARED_CLUSTER_ARGS:
+      # Store the test method name so that we can put logs in different directories for
+      # different tests. This only applies if the cluster is being restarted per test
+      # method. If this cluster is used for multiple test methods, leave this unset.
+      self.set_current_test_method_name(method.__name__)
       self.cluster_setup(method.__dict__)
     elif method.__dict__.get(WITH_ARGS_METHOD):
       pytest.fail("Cannot specify with_args on both class and methods")
@@ -426,6 +447,7 @@
   def teardown_method(self, method):
     if not self.SHARED_CLUSTER_ARGS:
       self.cluster_teardown(method.__name__, method.__dict__)
+    self.set_current_test_method_name(None)
 
   def wait_for_wm_init_complete(self, timeout_s=180):
     """
@@ -554,7 +576,7 @@
   @classmethod
   def _start_impala_cluster(cls,
                             options,
-                            impala_log_dir=os.getenv('LOG_DIR', "/tmp/"),
+                            impala_log_dir=None,
                             cluster_size=DEFAULT_CLUSTER_SIZE,
                             num_coordinators=NUM_COORDINATORS,
                             use_exclusive_coordinators=False,
@@ -570,7 +592,33 @@
                             wait_for_backends=True,
                             log_symlinks=False,
                             force_restart=True):
-    cls.impala_log_dir = impala_log_dir
+    if impala_log_dir:
+      # If the test gave a specific location, use it, as the test may be parsing the logs
+      # to find certain output.
+      cls.impala_log_dir = impala_log_dir
+    else:
+      # The test didn't customize the log dir, so calculate a reasonable base directory
+      # To find the log directory, we proceed in this order:
+      # 1. LOG_DIR environment variable (used in test scripts for Jenkins jobs, etc)
+      # 2. IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR - set impala-config.sh (used in devenvs)
+      # 3. /tmp/ - This probably shouldn't happen, but at least the logs can go somewhere
+      impala_base_log_dir = os.getenv("LOG_DIR",
+          os.getenv("IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR", "/tmp/"))
+
+      # To make it easier to find logs across multiple custom cluster tests, organize
+      # them into subdirectories based on their test class and their test method name
+      # (where applicable).
+      impala_log_dir_per_test = os.path.join(impala_base_log_dir, cls.__name__)
+      # The CURRENT_TEST_METHOD_NAME will be None when using SHARED_CLUSTER_ARGS as the
+      # cluster is not restarted for each test method
+      if cls.CURRENT_TEST_METHOD_NAME:
+        impala_log_dir_per_test = os.path.join(impala_log_dir_per_test,
+            cls.CURRENT_TEST_METHOD_NAME)
+
+      if not os.path.isdir(impala_log_dir_per_test):
+        os.makedirs(impala_log_dir_per_test)
+      cls.impala_log_dir = impala_log_dir_per_test
+
     # We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests specifically test that
     # certain custom startup arguments work and we want to keep them independent of dev
     # environments.
@@ -578,7 +626,7 @@
            '--state_store_args=%s' % DEFAULT_STATESTORE_ARGS,
            '--cluster_size=%d' % cluster_size,
            '--num_coordinators=%d' % num_coordinators,
-           '--log_dir=%s' % impala_log_dir,
+           '--log_dir=%s' % cls.impala_log_dir,
            '--log_level=%s' % log_level]
 
     if ignore_pid_on_log_rotation:

diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index db3f2cc..666f384 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py

@@ -264,7 +264,7 @@
     """Check that setting 'minidump_path' to a relative value results in minidump files
     written to 'log_dir'.
     """
-    minidump_base_dir = os.path.join(os.environ.get('LOG_DIR', '/tmp'), 'minidumps')
+    minidump_base_dir = os.path.join(self.impala_log_dir, 'minidumps')
     shutil.rmtree(minidump_base_dir, ignore_errors=True)
     # Omitting minidump_path as a parameter to the cluster will choose the default
     # configuration, which is a FLAGS_log_dir/minidumps.
commit	d5da6ee2a5b570fdb99d9da5d74885c48ec5d539	[log] [tgz]
author	Joe McDonnell <joemcdonnell@cloudera.com>	Tue Dec 16 10:29:51 2025 -0800
committer	Joe McDonnell <joemcdonnell@cloudera.com>	Fri Jan 02 18:17:25 2026 +0000
tree	4f1c24130ce8c10805180d277eeb527cd110ffe7
parent	411309acf4d3f326f05dfa04749a0ec0e2ccc801 [diff]