IMPALA-14634: Organize custom cluster logs into subdirectories by test name

This changes the logic for setting the log directory for
custom cluster tests. It uses the test class name and (optionally)
the test method name to put log files into subdirectories.
For a test that restarts the cluster for each test method, this
would have a directory structure of
base_dir / test class / test method name /
For a test that starts the cluster at the class level and reuses
it across tests methods, this would be:
base_dir / test class /
This directory structure can still have logs from multiple tests
going to a single directory. For example, some tests are parameterized
so that a test method runs multiple times with different parameter
values. Those will share a directory. This should be rare and most
directories will have logs from a single test.

Some test methods restart the cluster within the test itself rather
than just at the beginning. This stores the test method so these
restarts continue to use the same directory. The code is often in
@classmethod functions, so this stores the test method name at the
class level and manipulates it through @classmethod functions.
This is awkward, but it works because the custom cluster tests are
single threaded.

This also adjusts the logic for determining the base directory
for custom cluster logs. tests/run-custom-cluster-tests.sh sets
LOG_DIR to $IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR (i.e.
logs/custom_cluster_tests) and this continues to respect LOG_DIR.
However, in the dev environment using impala-py.test, LOG_DIR
is not set and the code currently defaults to /tmp. This changes it
to use $IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR so that logs go to
logs/custom_cluster_tests rather than /tmp.

Testing:
 - Ran locally and verified the output goes into directories
 - Ran exhaustive custom cluster tests

Change-Id: I8a8402fed1584a99f91451a3976e7026d0deb834
Reviewed-on: http://gerrit.cloudera.org:8080/23796
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Quanlong Huang <huangquanlong@gmail.com>
diff --git a/tests/common/custom_cluster_test_suite.py b/tests/common/custom_cluster_test_suite.py
index 98b3b3e..a5a1198 100644
--- a/tests/common/custom_cluster_test_suite.py
+++ b/tests/common/custom_cluster_test_suite.py
@@ -116,6 +116,15 @@
   # Args for cluster startup/teardown when sharing a single cluster for the entire class.
   SHARED_CLUSTER_ARGS = {}
 
+  # The currently executing test method. setup_method() populates this and tear_method()
+  # clears it. This is used to set the log directory location when a test manually
+  # restarts the cluster during the test. This is left unset for tests that use a single
+  # cluster for multiple tests (i.e. with SHARED_CLUSTER_ARGS), as the logs will be
+  # shared across multiple tests. Since this is used from @classmethod functions, this is
+  # set and accessed via @classmethod functions set/get_current_test_method(). This is
+  # awkward, but it should work because custom cluster tests are single threaded.
+  CURRENT_TEST_METHOD_NAME = None
+
   @classmethod
   def add_test_dimensions(cls):
     super(CustomClusterTestSuite, cls).add_test_dimensions()
@@ -269,6 +278,14 @@
     del self.TMP_DIRS[name]
 
   @classmethod
+  def set_current_test_method_name(cls, test_method_name):
+    cls.CURRENT_TEST_METHOD_NAME = test_method_name
+
+  @classmethod
+  def get_current_test_method_name(self):
+    return self.CURRENT_TEST_METHOD_NAME
+
+  @classmethod
   def cluster_setup(cls, args):
     cluster_args = list()
     disable_log_buffering = args.get(DISABLE_LOG_BUFFERING, False)
@@ -393,6 +410,10 @@
 
   def setup_method(self, method):
     if not self.SHARED_CLUSTER_ARGS:
+      # Store the test method name so that we can put logs in different directories for
+      # different tests. This only applies if the cluster is being restarted per test
+      # method. If this cluster is used for multiple test methods, leave this unset.
+      self.set_current_test_method_name(method.__name__)
       self.cluster_setup(method.__dict__)
     elif method.__dict__.get(WITH_ARGS_METHOD):
       pytest.fail("Cannot specify with_args on both class and methods")
@@ -426,6 +447,7 @@
   def teardown_method(self, method):
     if not self.SHARED_CLUSTER_ARGS:
       self.cluster_teardown(method.__name__, method.__dict__)
+    self.set_current_test_method_name(None)
 
   def wait_for_wm_init_complete(self, timeout_s=180):
     """
@@ -554,7 +576,7 @@
   @classmethod
   def _start_impala_cluster(cls,
                             options,
-                            impala_log_dir=os.getenv('LOG_DIR', "/tmp/"),
+                            impala_log_dir=None,
                             cluster_size=DEFAULT_CLUSTER_SIZE,
                             num_coordinators=NUM_COORDINATORS,
                             use_exclusive_coordinators=False,
@@ -570,7 +592,33 @@
                             wait_for_backends=True,
                             log_symlinks=False,
                             force_restart=True):
-    cls.impala_log_dir = impala_log_dir
+    if impala_log_dir:
+      # If the test gave a specific location, use it, as the test may be parsing the logs
+      # to find certain output.
+      cls.impala_log_dir = impala_log_dir
+    else:
+      # The test didn't customize the log dir, so calculate a reasonable base directory
+      # To find the log directory, we proceed in this order:
+      # 1. LOG_DIR environment variable (used in test scripts for Jenkins jobs, etc)
+      # 2. IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR - set impala-config.sh (used in devenvs)
+      # 3. /tmp/ - This probably shouldn't happen, but at least the logs can go somewhere
+      impala_base_log_dir = os.getenv("LOG_DIR",
+          os.getenv("IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR", "/tmp/"))
+
+      # To make it easier to find logs across multiple custom cluster tests, organize
+      # them into subdirectories based on their test class and their test method name
+      # (where applicable).
+      impala_log_dir_per_test = os.path.join(impala_base_log_dir, cls.__name__)
+      # The CURRENT_TEST_METHOD_NAME will be None when using SHARED_CLUSTER_ARGS as the
+      # cluster is not restarted for each test method
+      if cls.CURRENT_TEST_METHOD_NAME:
+        impala_log_dir_per_test = os.path.join(impala_log_dir_per_test,
+            cls.CURRENT_TEST_METHOD_NAME)
+
+      if not os.path.isdir(impala_log_dir_per_test):
+        os.makedirs(impala_log_dir_per_test)
+      cls.impala_log_dir = impala_log_dir_per_test
+
     # We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests specifically test that
     # certain custom startup arguments work and we want to keep them independent of dev
     # environments.
@@ -578,7 +626,7 @@
            '--state_store_args=%s' % DEFAULT_STATESTORE_ARGS,
            '--cluster_size=%d' % cluster_size,
            '--num_coordinators=%d' % num_coordinators,
-           '--log_dir=%s' % impala_log_dir,
+           '--log_dir=%s' % cls.impala_log_dir,
            '--log_level=%s' % log_level]
 
     if ignore_pid_on_log_rotation:
diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index db3f2cc..666f384 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -264,7 +264,7 @@
     """Check that setting 'minidump_path' to a relative value results in minidump files
     written to 'log_dir'.
     """
-    minidump_base_dir = os.path.join(os.environ.get('LOG_DIR', '/tmp'), 'minidumps')
+    minidump_base_dir = os.path.join(self.impala_log_dir, 'minidumps')
     shutil.rmtree(minidump_base_dir, ignore_errors=True)
     # Omitting minidump_path as a parameter to the cluster will choose the default
     # configuration, which is a FLAGS_log_dir/minidumps.