IMPALA-9129: Add a test fixture that cleans up intentional core dumps

Some negative tests produce core dumps intentionally. We should have a
way of removing these as part of test cleanup.

For custom cluster tests, it's likely the cores may actually be generated
during the base class setup phase, which means it's too early for the
test fixture to really be useful. Such was the case with the test case
TestAuthorizationProvider::test_invalid_provider_flag. In this instance,
we had to add the same steps directly to the tests.

Testing done:
For test_invalid_provider_flag, I made sure I had pre-existing core files
in the IMPALA_HOME directory, then ran the test to confirm new cores were
removed.

-- 2019-11-06 19:53:27,303 INFO  MainThread: Removing core.impalad.61852 created by test_invalid_provider_flag
-- 2019-11-06 19:53:27,375 INFO  MainThread: Removing core.impalad.61856 created by test_invalid_provider_flag
-- 2019-11-06 19:53:27,450 INFO  MainThread: Removing core.impalad.61849 created by test_invalid_provider_flag

...and then made sure the pre-existing cores were still present.

Change-Id: I778f27e820a6983894c1294d35627ddb04f5a51a
Reviewed-on: http://gerrit.cloudera.org:8080/14640
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/lib/python/impala_py_lib/helpers.py b/lib/python/impala_py_lib/helpers.py
new file mode 100644
index 0000000..93a868f
--- /dev/null
+++ b/lib/python/impala_py_lib/helpers.py
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import fnmatch
+import os
+import re
+import subprocess
+
+
+def exec_local_command(cmd):
+  """
+  Executes a command for the local bash shell and return stdout as a string.
+
+  Args:
+    cmd: command as a string
+
+  Return:
+    STDOUT
+  """
+  return subprocess.check_output(cmd.split())
+
+
+def find_all_files(fname_pattern, base_dir=os.getenv('IMPALA_HOME', '.')):
+  """
+  General utility to recursively find files matching a certain unix-like file pattern.
+
+  Args:
+    fname_pattern: Unix glob
+    base_dir: the root directory where searching should start
+
+  Returns:
+    A list of full paths relative to the give base_dir
+  """
+  file_glob = fnmatch.translate(fname_pattern)
+  matching_files = []
+
+  for root, dirs, files in os.walk(base_dir):
+    matching_files += [os.path.join(root, f) for f in files if re.match(file_glob, f)]
+
+  return matching_files
+
+
+def is_core_dump(file_path):
+  """
+  Determine whether given file is a core file. Works on CentOS and Ubuntu.
+
+  Args:
+    file_path: full path to a possible core file
+  """
+  file_std_out = exec_local_command("file %s" % file_path)
+  return "core file" in file_std_out and 'ELF' in file_std_out
diff --git a/tests/authorization/test_provider.py b/tests/authorization/test_provider.py
index 4d0e671..c80db78 100644
--- a/tests/authorization/test_provider.py
+++ b/tests/authorization/test_provider.py
@@ -17,13 +17,17 @@
 #
 # Client tests for SQL statement authorization
 
+import logging
 import pytest
 import os
 import tempfile
 
+from impala_py_lib.helpers import find_all_files, is_core_dump
 from tests.common.file_utils import assert_file_in_dir_contains
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 
+LOG = logging.getLogger('test_provider')
+
 
 class TestAuthorizationProvider(CustomClusterTestSuite):
   """
@@ -38,6 +42,8 @@
   LOG_DIR = tempfile.mkdtemp(prefix="test_provider_", dir=os.getenv("LOG_DIR"))
   MINIDUMP_PATH = tempfile.mkdtemp()
 
+  pre_test_cores = None
+
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impala_log_dir=LOG_DIR,
@@ -59,6 +65,10 @@
                                 .format(TestAuthorizationProvider.BAD_FLAG))
 
   def setup_method(self, method):
+    # Make a note of any core files that already exist
+    possible_cores = find_all_files('*core*')
+    self.pre_test_cores = set([f for f in possible_cores if is_core_dump(f)])
+
     # Explicitly override CustomClusterTestSuite.setup_method() to
     # allow it to exception, since this testsuite is for cases where
     # startup fails
@@ -68,10 +78,18 @@
       self._stop_impala_cluster()
 
   def teardown_method(self, method):
-    # Explicitly override CustomClusterTestSuite.teardown_method() to
-    # allow it to exception, since it relies on setup_method() having
-    # completed successfully
     try:
+      # The core dumps expected to be generated by this test should be cleaned up
+      possible_cores = find_all_files('*core*')
+      post_test_cores = set([f for f in possible_cores if is_core_dump(f)])
+
+      for f in (post_test_cores - self.pre_test_cores):
+        LOG.info("Cleaned up {core} created by test_invalid_provider_flag".format(core=f))
+        os.remove(f)
+
+      # Explicitly override CustomClusterTestSuite.teardown_method() to
+      # allow it to exception, since it relies on setup_method() having
+      # completed successfully
       super(TestAuthorizationProvider, self).teardown_method(method)
     except Exception:
       self._stop_impala_cluster()
diff --git a/tests/conftest.py b/tests/conftest.py
index 8589867..21438c7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,6 +28,7 @@
 import pytest
 
 import tests.common
+from impala_py_lib.helpers import find_all_files, is_core_dump
 from tests.common.environ import build_flavor_timeout
 from common.test_result_verifier import QueryTestResult
 from tests.common.patterns import is_valid_impala_identifier
@@ -213,6 +214,27 @@
     metafunc.parametrize('vector', vectors, ids=vector_names)
 
 
+@pytest.yield_fixture
+def cleanup_generated_core_dumps(request):
+  """
+  A fixture to cleanup core dumps intentionally generated by tests (for negative testing).
+
+  Only core dumps generated by the decorated test function will be removed. Pre-existing
+  cores that need to be triaged from prior test failures are retained.
+  """
+  possible_cores = find_all_files('*core*')
+  pre_test_cores = set([f for f in possible_cores if is_core_dump(f)])
+
+  yield  # Wait for test to execute
+
+  possible_cores = find_all_files('*core*')
+  post_test_cores = set([f for f in possible_cores if is_core_dump(f)])
+
+  for f in (post_test_cores - pre_test_cores):
+    LOG.info("Cleaned up {core} created by {test}".format(core=f, test=request.node.name))
+    os.remove(f)
+
+
 @pytest.fixture
 def testid_checksum(request):
   """