[WIP] [BUGFIX] Fix flakey TemporaryDirectory() cleanup on Windows (#21107)
* Trigger CI to show windows-gpu job failures
* Fix TemporaryDirectory cleanup issues on Windows
* Fix PermissionError handling
* Remove temporary comments
* Fix TemporaryDirectory() to yield dir.name
* Remove temp dirs even with exceptions
diff --git a/python/mxnet/gluon/model_zoo/model_store.py b/python/mxnet/gluon/model_zoo/model_store.py
index 0ab1151..f742c30 100644
--- a/python/mxnet/gluon/model_zoo/model_store.py
+++ b/python/mxnet/gluon/model_zoo/model_store.py
@@ -21,11 +21,9 @@
import os
import zipfile
import logging
-import tempfile
import uuid
-import shutil
-from ..utils import download, check_sha1, replace_file
+from ..utils import download, check_sha1, replace_file, TemporaryDirectory
from ... import base
_model_sha1 = {name: checksum for checksum, name in [
@@ -114,11 +112,10 @@
download(_url_format.format(repo_url=repo_url, file_name=file_name),
path=temp_zip_file_path, overwrite=True)
with zipfile.ZipFile(temp_zip_file_path) as zf:
- temp_dir = tempfile.mkdtemp(dir=root)
- zf.extractall(temp_dir)
- temp_file_path = os.path.join(temp_dir, file_name+'.params')
- replace_file(temp_file_path, file_path)
- shutil.rmtree(temp_dir)
+ with TemporaryDirectory(dir=root) as temp_dir:
+ zf.extractall(temp_dir)
+ temp_file_path = os.path.join(temp_dir, file_name+'.params')
+ replace_file(temp_file_path, file_path)
os.remove(temp_zip_file_path)
if check_sha1(file_path, sha1_hash):
diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
index 267ed2c..36ce4b0 100644
--- a/python/mxnet/gluon/utils.py
+++ b/python/mxnet/gluon/utils.py
@@ -34,7 +34,7 @@
import numpy as np
from .. import ndarray
-from ..util import is_np_shape, is_np_array
+from ..util import is_np_shape, is_np_array, TemporaryDirectory
from .. import numpy as _mx_np # pylint: disable=reimported
diff --git a/python/mxnet/util.py b/python/mxnet/util.py
index f99dfd0..3a190d7 100644
--- a/python/mxnet/util.py
+++ b/python/mxnet/util.py
@@ -20,6 +20,9 @@
import functools
import inspect
import threading
+import tempfile
+import platform
+from contextlib import contextmanager
from struct import calcsize
from .base import (_LIB, check_call, c_str, py_str,
@@ -1359,3 +1362,22 @@
elif isinstance(number, _np.generic):
return number.dtype
raise TypeError('type {} not supported'.format(str(type(number))))
+
+# This is a wrapping of tempfile.TemporaryDirectory(), known to have cleanup issues on Windows.
+# The problem is partially handled as of Python 3.10 by the adding of a 'ignore_cleanup_errors'
+# parameter. Once MXNet's Python version is forced to be >= 3.10, a simplification of this
+# function to use 'ignore_cleanup_errors' would be possible. Until the fundamental Windows
+# issues are resolved, best to use this routine instead of tempfile.TemporaryDirectory().
+@contextmanager
+def TemporaryDirectory(*args, **kwargs):
+ """A context wrapper of tempfile.TemporaryDirectory() that ignores cleanup errors on Windows.
+ """
+ dir = tempfile.TemporaryDirectory(*args, **kwargs)
+ try:
+ yield dir.name
+ finally:
+ try:
+ dir.cleanup()
+ except PermissionError:
+ if platform.system() != 'Windows':
+ raise
diff --git a/tests/nightly/test_large_array.py b/tests/nightly/test_large_array.py
index fb1fad5..391ead3 100644
--- a/tests/nightly/test_large_array.py
+++ b/tests/nightly/test_large_array.py
@@ -23,6 +23,7 @@
import mxnet as mx
from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, default_device, check_symbolic_forward, create_2d_tensor
+from mxnet.util import TemporaryDirectory
from mxnet import gluon, nd
from common import with_seed
import pytest
@@ -1028,7 +1029,7 @@
def check_load_save():
x = create_2d_tensor(SMALL_Y, LARGE_X)
- with tempfile.TemporaryDirectory() as tmp:
+ with TemporaryDirectory() as tmp:
tmpfile = os.path.join(tmp, 'large_tensor')
nd.save(tmpfile, [x])
y = nd.load(tmpfile)
diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index 01d7528..dea2e6f 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -17,12 +17,12 @@
import os
import sys
-import tempfile
import math
import numpy as np
import mxnet as mx
from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, create_vector
+from mxnet.util import TemporaryDirectory
from mxnet import gluon, nd
from common import with_seed
import pytest
@@ -374,7 +374,7 @@
def check_load_save():
x = create_vector(size=LARGE_X)
- with tempfile.TemporaryDirectory() as tmp:
+ with TemporaryDirectory() as tmp:
tmpfile = os.path.join(tmp, 'large_vector')
nd.save(tmpfile, [x])
y = nd.load(tmpfile)
diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py
index d55bc74..963abe3 100644
--- a/tests/python/unittest/common.py
+++ b/tests/python/unittest/common.py
@@ -30,7 +30,7 @@
import models
from contextlib import contextmanager
import pytest
-from tempfile import TemporaryDirectory
+from mxnet.util import TemporaryDirectory
import locale
xfail_when_nonstandard_decimal_separator = pytest.mark.xfail(
diff --git a/tests/python/unittest/test_deferred_compute.py b/tests/python/unittest/test_deferred_compute.py
index b9d93a5..3d69151 100644
--- a/tests/python/unittest/test_deferred_compute.py
+++ b/tests/python/unittest/test_deferred_compute.py
@@ -17,13 +17,13 @@
import functools
import operator
-import tempfile
import numpy as np
import mxnet as mx
import mxnet._deferred_compute as dc
from mxnet.base import MXNetError
+from mxnet.util import TemporaryDirectory
import pytest
@@ -420,7 +420,7 @@
_all_same(ys_np, ys_hybrid_np)
- with tempfile.TemporaryDirectory() as root:
+ with TemporaryDirectory() as root:
with mx.util.np_shape(True), mx.util.np_array(True):
net.export(root)