PyTorch and Pillow-SIMD and matplot with new pillow-simd

f61a6569 · Alexandre Strube · 86bc06c1 · f61a6569 · f61a6569 · f61a6569
Commit f61a6569 authored 3 years ago by Alexandre Strube
--- a/Golden_Repo/m/matplotlib/matplotlib-3.4.3-gcccoremkl-11.2.0-2021.4.0.eb
+++ b/Golden_Repo/m/matplotlib/matplotlib-3.4.3-gcccoremkl-11.2.0-2021.4.0.eb
@@ -21,7 +21,7 @@ dependencies = [
    ('libpng', '1.6.37'),
    ('freetype', '2.11.0'),
    ('Tkinter', '%(pyver)s'),
-    ('Pillow-SIMD', '8.3.1'),
+    ('Pillow-SIMD', '9.0.1'),
    ('Qhull', '2020.2')
 ]

--- a/Golden_Repo/p/Pillow-SIMD/Pillow-SIMD-9.0.1-GCCcore-11.2.0.eb
+++ b/Golden_Repo/p/Pillow-SIMD/Pillow-SIMD-9.0.1-GCCcore-11.2.0.eb
+easyblock = 'PythonPackage'
+name = 'Pillow-SIMD'
+version = '9.0.1'
+homepage = 'https://github.com/uploadcare/pillow-simd'
+description = """Pillow is the 'friendly PIL fork' by Alex Clark and Contributors.
+ PIL is the Python Imaging Library by Fredrik Lundh and Contributors."""
+toolchain = {'name': 'GCCcore', 'version': '11.2.0'}
+source_urls = ['https://github.com/uploadcare/pillow-simd/archive/']
+sources = ['%(version)s.tar.gz']
+builddependencies = [('binutils', '2.37')]
+dependencies = [
+    ('Python', '3.9.6'),
+    ('libjpeg-turbo', '2.1.1'),
+    ('libpng', '1.6.37'),
+    ('zlib', '1.2.11'),
+    ('LibTIFF', '4.3.0'),
+    ('freetype', '2.11.0')
+]
+use_pip = True
+download_dep_fail = True
+options = {'modulename': 'PIL'}
+sanity_check_paths = {
+    'files': [],
+    'dirs': ['lib/python%(pyshortver)s/site-packages/PIL'],
+}
+sanity_pip_check = True
+moduleclass = 'vis'
--- a/Golden_Repo/p/PyTorch/PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
+++ b/Golden_Repo/p/PyTorch/PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
+# Author: Alexander Grund
+# Avoid test failures in CGROUP environments
+# See https://github.com/pytorch/pytorch/issues/44368 and https://github.com/pytorch/pytorch/pull/44369
+diff -Nru pytorch.orig/test/test_dataloader.py pytorch/test/test_dataloader.py
+--- pytorch.orig/test/test_dataloader.py	2021-10-28 19:19:23.284526686 +0200
+++ pytorch/test/test_dataloader.py	2021-10-28 19:21:31.860488973 +0200
+@@ -2374,22 +2374,27 @@
+         after = os.sched_getaffinity(0)
+         return iter(after)
+-
+-def worker_set_affinity(_):
+-    os.sched_setaffinity(0, [multiprocessing.cpu_count() - 1])
+-
+-
+ @unittest.skipIf(
+     not hasattr(os, 'sched_setaffinity'),
+     "os.sched_setaffinity is not available")
+ class TestSetAffinity(TestCase):
+     def test_set_affinity_in_worker_init(self):
+        # Query the current affinity mask to avoid setting a disallowed one
+        old_affinity = os.sched_getaffinity(0)
+        if not old_affinity:
+            self.skipTest("No affinity information")
+        # Choose any
+        expected_affinity = list(old_affinity)[-1]
+
+        def worker_set_affinity(_):
+            os.sched_setaffinity(0, [expected_affinity])
+
+         dataset = SetAffinityDataset()
+         dataloader = torch.utils.data.DataLoader(
+             dataset, num_workers=2, worker_init_fn=worker_set_affinity)
+         for sample in dataloader:
+-            self.assertEqual(sample, [multiprocessing.cpu_count() - 1])
+            self.assertEqual(sample, [expected_affinity])
+ class ConvDataset(Dataset):
+     def __init__(self):
--- a/Golden_Repo/p/PyTorch/PyTorch-1.10.0_skip_cmake_rpath.patch
+++ b/Golden_Repo/p/PyTorch/PyTorch-1.10.0_skip_cmake_rpath.patch
+# Author: Caspar van Leeuwen
+# PyTorch's CMAKE configuration by default sets RUNPATH on libraries if they link other libraries
+# that are outside the build tree, which is done because of the CMAKE config on 
+# https://github.com/pytorch/pytorch/blob/v1.10.0/cmake/Dependencies.cmake#L10.
+# This provides problems, since the cuda stubs library path then also gets added to the RUNPATH.
+# As a result, at runtime, the stub version of things like libcuda.so.1 gets picked up, instead of the real drivers
+# See https://github.com/easybuilders/easybuild-easyconfigs/issues/14359
+# This line https://github.com/pytorch/pytorch/blob/v1.10.0/cmake/Dependencies.cmake#L16
+# Makes sure that any path that is linked, is also added to the RUNPATH.
+# This has been reported upstream in https://github.com/pytorch/pytorch/issues/35418
+# and a fix was attempted in https://github.com/pytorch/pytorch/pull/37737 but it was reverted
+#
+# This EasyBuild patch changes behavior for the libraries that were failing, i.e. the ones in this list:
+# https://github.com/easybuilders/easybuild-easyconfigs/issues/14359#issuecomment-970479904
+# This is done by setting INSTALL_RPATH_USE_LINK_PATH to false, and instead, specifying the RPATH
+# explicitely by defining INSTALL_RPATH, but only adding directories that do not match to the "stubs" regex
+# It has been upstreamed in this PR https://github.com/pytorch/pytorch/pull/68912 (not accepted yet at the time of writing)
+diff -Nru pytorch.orig/caffe2/CMakeLists.txt pytorch/caffe2/CMakeLists.txt
+--- pytorch.orig/caffe2/CMakeLists.txt	2021-11-17 11:46:01.797337624 +0100
+++ pytorch/caffe2/CMakeLists.txt	2021-11-18 19:05:35.637707235 +0100
+@@ -630,8 +630,33 @@
+     else()
+       set(DELAY_LOAD_FLAGS "")
+     endif()
+-    target_link_libraries(caffe2_nvrtc ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB} ${DELAY_LOAD_FLAGS})
+    # message("CUDA_NVRTC: ${CUDA_NVRTC}")
+    # message("CUDA_NVRTC_LIB: ${CUDA_NVRTC_LIB}")
+    # message("CUDA_CUDA_LIB: ${CUDA_CUDA_LIB}")
+    # message("DELAY_LOAD_FLAGS: ${DELAY_LOAD_FLAGS}")
+    # if(CUDA_CUDA_LIB MATCHES "stubs")
+    #   message("stubs libraries found in the CUDA_CUDA_LIB: ${CUDA_CUDA_LIB}")
+    # else()
+    #   message("Stubs libs not found in CUDA_CUDA_LIB: ${CUDA_CUDA_LIB}")
+    # endif()
+    # Make sure the CUDA stubs folder doesn't end up in the RPATH of CAFFE2_NVRTC:
+    set(CAFFE2_NVRTC_LIBS ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB})
+    foreach(LIB IN LISTS CAFFE2_NVRTC_LIBS)
+      message("LIB: ${LIB}")
+      if(LIB MATCHES "stubs")
+        message("Filtering ${LIB} from being set in caffe2_nvrtc's RPATH, because it appears to point to the CUDA stubs directory, which should not be RPATHed.")
+      else()
+        cmake_path(GET LIB PARENT_PATH LIB_PATH)
+        message("LIBPATH: ${LIB_PATH}")
+	list(APPEND CAFFE2_NVRTC_RPATH ${LIB_PATH})
+      endif()
+    endforeach()
+    message("CAFFE2_NVRTC_RPATH: ${CAFFE2_NVRTC_RPATH}")
+    set_target_properties(caffe2_nvrtc PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE)
+    set_target_properties(caffe2_nvrtc PROPERTIES INSTALL_RPATH "${CAFFE2_NVRTC_RPATH}")
+    target_link_libraries(caffe2_nvrtc ${CAFFE2_NVRTC_LIBS} ${DELAY_LOAD_FLAGS})
+     target_include_directories(caffe2_nvrtc PRIVATE ${CUDA_INCLUDE_DIRS})
+#    message(FATAL_ERROR "STOP HERE, we're debugging")
+     install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
+     if(USE_NCCL AND BUILD_SPLIT_CUDA)
+       list(APPEND Caffe2_GPU_SRCS_CPP
+diff -Nru pytorch.orig/test/cpp/api/CMakeLists.txt pytorch/test/cpp/api/CMakeLists.txt
+--- pytorch.orig/test/cpp/api/CMakeLists.txt	2021-11-17 11:46:02.991350652 +0100
+++ pytorch/test/cpp/api/CMakeLists.txt	2021-11-18 19:06:41.207423777 +0100
+@@ -61,6 +61,22 @@
+     ${CUDA_CUDA_LIB}
+     ${TORCH_CUDA_LIBRARIES})
+  # Make sure the CUDA stubs folder doesn't end up in the RPATH of test_api:
+  set(TEST_API_LIBS ${CUDA_LIBRARIES} ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} ${TORCH_CUDA_LIBRARIES})
+  foreach(LIB IN LISTS TEST_API_LIBS)
+    message("LIB: ${LIB}")
+    if(LIB MATCHES "stubs")
+      message("Filtering ${LIB} from being set in caffe2_nvrtc's RPATH, because it appears to point to the CUDA stubs directory, which should not be RPATHed.")
+    else()
+      cmake_path(GET LIB PARENT_PATH LIB_PATH)
+      message("LIBPATH: ${LIB_PATH}")
+      list(APPEND TEST_API_RPATH ${LIB_PATH})
+    endif()
+  endforeach()
+  message("TEST_API_RPATH: ${TEST_API_RPATH}")
+  set_target_properties(test_api PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE)
+  set_target_properties(test_api PROPERTIES INSTALL_RPATH "${TEST_API_RPATH}")
+
+   target_compile_definitions(test_api PRIVATE "USE_CUDA")
+ endif()
+diff -Nru pytorch.orig/test/cpp/dist_autograd/CMakeLists.txt pytorch/test/cpp/dist_autograd/CMakeLists.txt
+--- pytorch.orig/test/cpp/dist_autograd/CMakeLists.txt	2021-11-17 11:46:02.993350674 +0100
+++ pytorch/test/cpp/dist_autograd/CMakeLists.txt	2021-11-18 19:06:18.389174421 +0100
+@@ -16,6 +16,22 @@
+       ${CUDA_CUDA_LIB}
+       ${TORCH_CUDA_LIBRARIES})
+    # Make sure the CUDA stubs folder doesn't end up in the RPATH of test_dist_autograd:
+    set(DIST_AUTOGRAD_LIBS ${CUDA_LIBRARIES} ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} ${TORCH_CUDA_LIBRARIES})
+    foreach(LIB IN LISTS DIST_AUTOGRAD_LIBS)
+      message("LIB: ${LIB}")
+      if(LIB MATCHES "stubs")
+        message("Filtering ${LIB} from being set in caffe2_nvrtc's RPATH, because it appears to point to the CUDA stubs directory, which should not be RPATHed.")
+      else()
+        cmake_path(GET LIB PARENT_PATH LIB_PATH)
+        message("LIBPATH: ${LIB_PATH}")
+        list(APPEND DIST_AUTOGRAD_RPATH ${LIB_PATH})
+      endif()
+    endforeach()
+    message("DIST_AUTOGRAD_RPATH: ${DIST_AUTOGRAD_RPATH}")
+    set_target_properties(test_dist_autograd PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE)
+    set_target_properties(test_dist_autograd PROPERTIES INSTALL_RPATH "${DIST_AUTOGRAD_RPATH}")
+
+     target_compile_definitions(test_dist_autograd PRIVATE "USE_CUDA")
+   endif()
+diff -Nru pytorch.orig/test/cpp/jit/CMakeLists.txt pytorch/test/cpp/jit/CMakeLists.txt
+--- pytorch.orig/test/cpp/jit/CMakeLists.txt	2021-11-17 11:46:02.989350630 +0100
+++ pytorch/test/cpp/jit/CMakeLists.txt	2021-11-18 19:05:41.396770168 +0100
+@@ -94,6 +94,7 @@
+   list(APPEND JIT_TEST_DEPENDENCIES onnx_library)
+ endif(MSVC)
+
+ target_link_libraries(test_jit PRIVATE ${JIT_TEST_DEPENDENCIES})
+ target_include_directories(test_jit PRIVATE ${ATen_CPU_INCLUDE})
+@@ -109,6 +110,22 @@
+     ${CUDA_CUDA_LIB}
+     ${TORCH_CUDA_LIBRARIES})
+  # Make sure the CUDA stubs folder doesn't end up in the RPATH of test_jit:
+  set(TEST_JIT_LIBS ${CUDA_LIBRARIES} ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} ${TORCH_CUDA_LIBRARIES})
+  foreach(LIB IN LISTS TEST_JIT_LIBS)
+    message("LIB: ${LIB}")
+    if(LIB MATCHES "stubs")
+      message("Filtering ${LIB} from being set in test_jit's RPATH, because it appears to point to the CUDA stubs directory, which should not be RPATHed.")
+    else()
+      cmake_path(GET LIB PARENT_PATH LIB_PATH)
+      message("LIBPATH: ${LIB_PATH}")
+      list(APPEND TEST_JIT_RPATH ${LIB_PATH})
+    endif()
+  endforeach()
+  message("TEST_JIT_RPATH: ${TEST_JIT_RPATH}")
+  set_target_properties(test_jit PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE)
+  set_target_properties(test_jit PROPERTIES INSTALL_RPATH "${TEST_JIT_RPATH}")
+
+   target_compile_definitions(test_jit PRIVATE USE_CUDA)
+ elseif(USE_ROCM)
+   target_link_libraries(test_jit PRIVATE
+diff -Nru pytorch.orig/test/cpp/rpc/CMakeLists.txt pytorch/test/cpp/rpc/CMakeLists.txt
+--- pytorch.orig/test/cpp/rpc/CMakeLists.txt	2021-11-17 11:46:02.991350652 +0100
+++ pytorch/test/cpp/rpc/CMakeLists.txt	2021-11-18 19:06:30.502306793 +0100
+@@ -39,6 +39,22 @@
+     ${CUDA_CUDA_LIB}
+     ${TORCH_CUDA_LIBRARIES})
+  # Make sure the CUDA stubs folder doesn't end up in the RPATH of test_cpp_rpc:
+  set(CPP_RPC_LIBS ${CUDA_LIBRARIES} ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} ${TORCH_CUDA_LIBRARIES})
+  foreach(LIB IN LISTS CPP_RPC_LIBS)
+    message("LIB: ${LIB}")
+    if(LIB MATCHES "stubs")
+      message("Filtering ${LIB} from being set in caffe2_nvrtc's RPATH, because it appears to point to the CUDA stubs directory, which should not be RPATHed.")
+    else()
+      cmake_path(GET LIB PARENT_PATH LIB_PATH)
+      message("LIBPATH: ${LIB_PATH}")
+      list(APPEND CPP_RPC_RPATH ${LIB_PATH})
+    endif()
+  endforeach()
+  message("CPP_RPC_RPATH: ${CPP_RPC_RPATH}")
+  set_target_properties(test_cpp_rpc PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE)
+  set_target_properties(test_cpp_rpc PROPERTIES INSTALL_RPATH "${CPP_RPC_RPATH}")
+
+   target_compile_definitions(test_cpp_rpc PRIVATE "USE_CUDA")
+ endif()
+diff -Nru pytorch.orig/test/cpp/tensorexpr/CMakeLists.txt pytorch/test/cpp/tensorexpr/CMakeLists.txt
+--- pytorch.orig/test/cpp/tensorexpr/CMakeLists.txt	2021-11-17 11:46:02.993350674 +0100
+++ pytorch/test/cpp/tensorexpr/CMakeLists.txt	2021-11-18 19:06:00.988984273 +0100
+@@ -62,6 +62,24 @@
+     ${CUDA_CUDA_LIB}
+     ${TORCH_CUDA_LIBRARIES})
+   target_compile_definitions(tutorial_tensorexpr PRIVATE USE_CUDA)
+
+  # Make sure the CUDA stubs folder doesn't end up in the RPATH of tutorial_tensorexpr:
+  set(CUDA_LINK_LIBS ${CUDA_LIBRARIES} ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} ${TORCH_CUDA_LIBRARIES})
+  foreach(LIB IN LISTS CUDA_LINK_LIBS)
+    message("LIB: ${LIB}")
+    if(LIB MATCHES "stubs")
+      message("Filtering ${LIB} from being set in test_tensorexpr and tutorial_tensorexpr RPATH, because it appears to point to the CUDA stubs directory, which should not be RPATHed.")
+    else()
+      cmake_path(GET LIB PARENT_PATH LIB_PATH)
+      message("LIBPATH: ${LIB_PATH}")
+      list(APPEND TENSOREXPR_RPATH ${LIB_PATH})
+    endif()
+  endforeach()
+  message("TENSOREXPR_RPATH: ${TENSOREXPR_RPATH}")
+  set_target_properties(test_tensorexpr PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE)
+  set_target_properties(test_tensorexpr PROPERTIES INSTALL_RPATH "${TENSOREXPR_RPATH}")
+  set_target_properties(tutorial_tensorexpr PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE)
+  set_target_properties(tutorial_tensorexpr PROPERTIES INSTALL_RPATH "${TENSOREXPR_RPATH}")
+ elseif(USE_ROCM)
+   target_link_libraries(test_tensorexpr PRIVATE
+     ${ROCM_HIPRTC_LIB}
--- a/Golden_Repo/p/PyTorch/PyTorch-1.11-gcccoremkl-11.2.0-2021.4.0-CUDA-11.5.eb
+++ b/Golden_Repo/p/PyTorch/PyTorch-1.11-gcccoremkl-11.2.0-2021.4.0-CUDA-11.5.eb
+name = 'PyTorch'
+version = '1.11'
+versionsuffix = '-CUDA-%(cudaver)s'
+homepage = 'https://pytorch.org/'
+description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
+PyTorch is a deep learning framework that puts Python first."""
+toolchain = {'name': 'gcccoremkl', 'version': '11.2.0-2021.4.0'}
+toolchainopts = {'openmp': True}
+# toolchainopts = {'cstd': 'c++11'}
+sources = [{
+    'filename': '%(name)s-%(version)s.tar.gz',
+    'git_config': {
+        'url': 'https://github.com/pytorch',
+        'repo_name': 'pytorch',
+        'tag': 'v1.11.0',
+        'recursive': True,
+    },
+}]
+patches = [
+    'PyTorch-1.7.0_avoid-nan-in-test-torch.patch',
+    'PyTorch-1.7.0_disable-dev-shm-test.patch',
+    # 'PyTorch-1.7.1_correctly-pass-jit_opt_level.patch',
+    'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch',
+    # 'PyTorch-1.8.1_increase-distributed-test-timeout.patch',
+    'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch',
+    'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch',
+    # 'PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch',
+    # 'PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch',
+    # 'PyTorch-1.10.0_fix-test-cond-cpu.patch',
+    # 'PyTorch-1.10.0_fix-vnni-detection.patch',
+    # 'PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch',
+    # 'PyTorch-1.10.0_skip_failing_ops_tests.patch',
+    # 'PyTorch-1.10.0_skip_nan_tests_openblas.patch',
+    'PyTorch-1.10.0_skip_cmake_rpath.patch',
+    # 'PyTorch-1.10.0_fix-gcc11-ideep.patch',
+    # 'PyTorch-1.10.0_fix_gcc11_nullpointer.patch',
+    # 'cub-lint.yaml.patch',
+    # 'cub-cub.cuh.patch',
+    #('cub-cub-definitions.patch', 1),
+    # 'cub-context_gpu.patch',
+    # 'cub-accuracy_op.patch',
+    # 'cub-affine-channel_op.patch',
+    # 'cub-arg_ops.patch',
+    # 'cub-batch_moments_op.patch',
+    # 'cub-batch_sparse_to_dense_op.patch',
+    # 'cub-boolean_mask_ops.patch',
+    # 'cub-cross_entropy.patch',
+    # 'cub-distance_op.patch',
+    # 'cub-elementwise_div_op.patch',
+    # 'cub-elementwise_linear_op.patch',
+    # 'cub-elementwise_mul_op.patch',
+    # 'cub-elementwise_ops.patch',
+    # 'cub-find_op.patch',
+    # 'cub-generate_proposals_op.patch',
+    # 'cub-normalize_ops.patch',
+    # 'cub-one_hot_ops.patch',
+    # 'cub-pack_segments.patch',
+    # 'cub-prelu_op.patch',
+    # 'cub-reduce_front_back_max_ops.patch',
+    # 'cub-reduce_front_back_sum_mean_ops.patch',
+    # 'cub-reduction_ops.patch',
+    # 'cub-rmac_regions_op.patch',
+    # 'cub-segment_reduction_op_gpu.patch',
+    # 'cub-sequence_ops.patch',
+    # 'cub-softmax_ops.patch',
+    # 'cub-spatial_batch_norm_op_impl.patch',
+    # 'cub-adagrad_fused_op_gpu.patch',
+    # 'cub-adagrad_op_gpu.patch',
+    # 'cub-adam_op_gpu.patch',
+    #('cub-cub_namespace.patch', 1),
+    # 'cub-reduce.patch',
+    # 'cub-math-gpu.patch',
+    # 'cub-CMake-Dependencies.patch',
+]
+osdependencies = [OS_PKG_IBVERBS_DEV]
+builddependencies = [
+    ('CMake', '3.21.1'),
+    ('hypothesis', '6.14.6'),
+]
+dependencies = [
+    ('CUDA', '11.5', '', True),
+    ('Ninja', '1.10.2'),  # Required for JIT compilation of C++ extensions
+    ('Python', '3.9.6'),
+    ('protobuf', '3.17.3'),
+    ('protobuf-python', '3.17.3'),
+    ('pybind11', '2.7.1'),
+    ('SciPy-bundle', '2021.10'),
+    ('typing-extensions', '3.10.0.0'),
+    ('PyYAML', '5.4.1'),
+    ('MPFR', '4.1.0'),
+    ('GMP', '6.2.1'),
+    ('numactl', '2.0.14', '', SYSTEM),
+    ('FFmpeg', '4.4.1'),
+    ('Pillow-SIMD', '9.0.1'),
+    ('cuDNN', '8.3.1.22', '-CUDA-%(cudaver)s', True),
+    ('magma', '2.6.1', '-CUDA-%(cudaver)s'),
+    ('NCCL', '2.11.4', '-CUDA-%(cudaver)s'),
+    ('expecttest', '0.1.3'),
+]
+# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
+cuda_compute_capabilities = ['6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6']
+custom_opts = ["USE_CUPTI_SO=1"]
+configopts = 'MKL_THREADING_LAYER=sequential CFLAGS="$CFLAGS -fopenmp" CXXFLAGS="$CXXFLAGS -fopenmp" LDFLAGS=-fopenmp'
+excluded_tests = {
+    '': [
+        # Bad tests: https://github.com/pytorch/pytorch/issues/60260
+        'distributed/elastic/utils/distributed_test',
+        'distributed/elastic/multiprocessing/api_test',
+        # These tests fail on A10s at the very least, they time out forever no matter how long the timeout is.
+        # Possibly related to NCCL 2.8.3: https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-8-3.html
+        # 'distributed/test_distributed_fork',
+        'distributed/test_distributed_spawn',
+        # Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
+        'test_optim',
+        # Test from this suite timeout often. The process group backend is deprecated anyway
+        # 'distributed/rpc/test_process_group_agent',
+        'test_jit',
+        'test_jit_cuda_fuser',
+        'test_jit_legacy',
+        'test_jit_profiling',
+        'test_xnnpack_integration',
+        'distributed/_shard/sharded_optim/test_sharded_optim',
+        'distributed/_shard/sharded_tensor/ops/test_linear',
+        'distributed/_shard/sharded_tensor/test_megatron_prototype',
+        'distributions/test_distributions',
+        'test_cpp_extensions_jit',
+        'distributed/rpc/test_tensorpipe_agent',
+        'test_ops',
+        'distributed/fsdp/test_fsdp_memory',  # fails on hdfml
+        'distributed/fsdp/test_fsdp_overlap',  # fails on hdfml
+        'test_autograd',  # fails on jureca dc and deep
+        'test_cuda',  # fails on jureca dc
+        'test_multiprocessing',  # fails on jureca dc
+        'test_nn',  # fails on jureca dc
+        'test_profiler',  # fails on jureca dc
+        'test_quantization',  # fails on jureca dc
+        'distributed/_shard/sharded_tensor/test_sharded_tensor',  # fails on deep
+        'distributed/algorithms/test_join',  # fails on deep and jureca dc
+        'distributed/fsdp/test_fsdp_checkpoint',  # fails on deep and jureca dc
+        'distributed/fsdp/test_fsdp_core',  # fails on deep and jureca dc
+        'distributed/fsdp/test_fsdp_freezing_weights',  # fails on deep and jureca dc
+        'distributed/fsdp/test_fsdp_memory',  # fails on deep
+        'distributed/fsdp/test_fsdp_multiple_forward',  # fails on deep and jureca dc
+        'distributed/fsdp/test_fsdp_multiple_wrapping',  # fails on deep and jureca dc
+        'distributed/fsdp/test_fsdp_overlap',  # fails on deep
+        'distributed/fsdp/test_fsdp_pure_fp16',  # fails on deep and jureca dc
+        'distributed/fsdp/test_fsdp_uneven',  # fails on deep and jureca dc
+        'distributed/fsdp/test_wrap',  # fails on deep and jureca dc
+        'distributed/optim/test_zero_redundancy_optimizer',  # fails on deep and jureca dc
+        'distributed/rpc/cuda/test_tensorpipe_agent',  # fails on deep
+        'distributed/rpc/test_faulty_agent',  # fails on deep
+        'distributed/test_c10d_gloo',  # fails on deep
+        'test_model_dump',  # fails on deep
+        'distributed/test_c10d_nccl',  # fails on jureca dc
+        'distributed/test_c10d_spawn_nccl',  # fails on jureca dc
+        'distributed/test_data_parallel',  # fails on jureca dc
+    ]
+}
+runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error  --verbose %(excluded_tests)s'
+# The readelf sanity check can be taken out once the TestRPATH test from https://github.com/pytorch/pytorch/pull/68912
+# is accepted, since it is then checked as part of the PyTorch test suite
+local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
+sanity_check_commands = [
+    "python -c 'import torch'",
+    "readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
+]
+tests = ['PyTorch-check-cpp-extension.py']
+moduleclass = 'devel'
--- a/Golden_Repo/p/PyTorch/PyTorch-1.7.0_avoid-nan-in-test-torch.patch
+++ b/Golden_Repo/p/PyTorch/PyTorch-1.7.0_avoid-nan-in-test-torch.patch
+This test uses in-place operations which may generate NaNs making subsequent tests fail
+See https://github.com/pytorch/pytorch/issues/48591
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/test_torch.py b/test/test_torch.py
+index 1f3f568f7b..237fb030f6 100644
+--- a/test/test_torch.py
+++ b/test/test_torch.py
+@@ -15060,7 +15060,7 @@ class TestTorchDeviceType(TestCase):
+                 x_c = x.contiguous()
+                 y_c = y.contiguous()
+                 result_c = fn(x_c, y_c)
+-                result = fn(x, y)
+                result = fn(x.clone(), y)
+                 self.assertEqual(result, result_c)
+                 self.assertTrue(
+                     result.is_contiguous(memory_format=memory_format),
--- a/Golden_Repo/p/PyTorch/PyTorch-1.7.0_disable-dev-shm-test.patch
+++ b/Golden_Repo/p/PyTorch/PyTorch-1.7.0_disable-dev-shm-test.patch
+This test fails randomly. I assume some concurrent test runners or another race condition
+Should be safe even if this fails
+See https://github.com/pytorch/pytorch/issues/48579
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/test_multiprocessing.py b/test/test_multiprocessing.py
+index 49e0a3cb45..e4e5b64ca1 100644
+--- a/test/test_multiprocessing.py
+++ b/test/test_multiprocessing.py
+@@ -202,7 +202,9 @@ class leak_checker(object):
+             # available_fds = self._get_next_fds(10)
+             # self.test_case.assertLessEqual(
+             #     available_fds[-1] - self.next_fds[-1], 5)
+-            self.test_case.assertFalse(self.has_shm_files())
+            # self.test_case.assertFalse(self.has_shm_files())
+            if self.has_shm_files():
+                print("WARNING: has_shm_files test would have failed!")
+         return False
+     def check_pid(self, pid):
--- a/Golden_Repo/p/PyTorch/PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch
+++ b/Golden_Repo/p/PyTorch/PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch
+Disable a part of a test which uses the current GPUs CUDA compute capability
+This will fail if the GPU is newer than what nvcc supports.
+See https://github.com/pytorch/pytorch/issues/51950
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/test_cpp_extensions_jit.py b/test/test_cpp_extensions_jit.py
+index efda7cb2cf..64607346c8 100644
+--- a/test/test_cpp_extensions_jit.py
+++ b/test/test_cpp_extensions_jit.py
+@@ -181,11 +181,9 @@ class TestCppExtensionJIT(common.TestCase):
+         #   - With/without '+PTX'
+         n = torch.cuda.device_count()
+-        capabilities = {torch.cuda.get_device_capability(i) for i in range(n)}
+         # expected values is length-2 tuple: (list of ELF, list of PTX)
+         # note: there should not be more than one PTX value
+         archflags = {
+-            '': (['{}{}'.format(capability[0], capability[1]) for capability in capabilities], None),
+             "Maxwell+Tegra;6.1": (['53', '61'], None),
+             "Pascal 3.5": (['35', '60', '61'], None),
+             "Volta": (['70'], ['70']),
--- a/Golden_Repo/p/PyTorch/PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
+++ b/Golden_Repo/p/PyTorch/PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
+Some tests fail when run with anything but 2 GPUs and others when run with anything but 2 or 4 GPUs.
+So limit to 2 GPUs.
+See https://github.com/pytorch/pytorch/issues/59548
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/distributed/optim/test_zero_redundancy_optimizer.py b/test/distributed/optim/test_zero_redundancy_optimizer.py
+index 06f1b4f484..bc82f6c304 100644
+--- a/test/distributed/optim/test_zero_redundancy_optimizer.py
+++ b/test/distributed/optim/test_zero_redundancy_optimizer.py
+@@ -233,7 +233,7 @@ class TestZeroRedundancyOptimizerSingleRank(TestZeroRedundancyOptimizer):
+ class TestZeroRedundancyOptimizerDistributed(TestZeroRedundancyOptimizer):
+     @property
+     def world_size(self):
+-        return min(4, max(2, torch.cuda.device_count()))
+        return 2
+     @skip_if_rocm
+     def test_step(self):