Skip to content
Snippets Groups Projects
Commit 49297bcd authored by Alexandre Strube's avatar Alexandre Strube
Browse files

PyTorch

parent 5a1a1f8d
No related branches found
No related tags found
No related merge requests found
......@@ -20,7 +20,7 @@ dependencies = [
('SciPy-bundle', '2021.10', '', ('gcccoremkl', '11.2.0-2021.4.0')),
('Shapely', '1.8.0'),
('lxml', '4.6.3'),
('Pillow-SIMD', '8.3.1'),
('Pillow-SIMD', '9.0.1'),
('PROJ', '8.1.0'),
('PyYAML', '5.4.1'),
]
......
......@@ -35,6 +35,7 @@ patches = [
# 'PyTorch-1.10.0_skip_failing_ops_tests.patch',
# 'PyTorch-1.10.0_skip_nan_tests_openblas.patch',
'PyTorch-1.10.0_skip_cmake_rpath.patch',
'PyTorch-1.11.0_fix_sharded_imports.patch',
# 'PyTorch-1.10.0_fix-gcc11-ideep.patch',
# 'PyTorch-1.10.0_fix_gcc11_nullpointer.patch',
# 'cub-lint.yaml.patch',
......@@ -75,26 +76,8 @@ patches = [
# 'cub-math-gpu.patch',
# 'cub-CMake-Dependencies.patch',
'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch',
'PyTorch-1.11.0_increas-distributed-test-timeout.patch',
'PyTorch-1.11.0_increase-distributed-test-timeout.patch',
'PyTorch-1.11.0_skip_failing_ops_tests.patch',
]
checksums = [
'7547d3d52ca7067f1ce82fa14d02c49e7ca9c9841cfbc1f1742ffe95c0bfd2d6', # PyTorch-1.11.tar.gz
'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch
'622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch
'89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch
# PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea',
# PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
'313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707',
'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_cmake_rpath.patch
'91e67cd498918baafe3fd58e0ba04b610a3561d1d97cec2c934bfd48fffd8324', # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch
# PyTorch-1.11.0_increas-distributed-test-timeout.patch
'bb9709590ea8bd329360ca345c70afb8ff028be80e112af7ee00abba58482316',
'88a312d4752fe72171a2292d0aa5438ada42b124be113015bb4969c83c723766', # PyTorch-1.11.0_skip_failing_ops_tests.patch
]
osdependencies = [OS_PKG_IBVERBS_DEV]
......@@ -139,48 +122,49 @@ excluded_tests = {
'distributed/test_distributed_spawn',
# Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
'test_optim',
# Test from this suite timeout often. The process group backend is deprecated anyway
# 'distributed/rpc/test_process_group_agent',
'test_jit',
'test_jit_cuda_fuser',
'test_jit_legacy',
'test_jit_profiling',
'test_xnnpack_integration',
'distributed/_shard/sharded_optim/test_sharded_optim',
'distributed/_shard/sharded_tensor/ops/test_linear',
'distributed/_shard/sharded_tensor/test_megatron_prototype',
'distributions/test_distributions',
'test_cpp_extensions_jit',
'distributed/rpc/test_tensorpipe_agent',
'test_ops',
'distributed/fsdp/test_fsdp_memory', # fails on hdfml
'distributed/fsdp/test_fsdp_overlap', # fails on hdfml
'test_autograd', # fails on jureca dc and deep
'test_cuda', # fails on jureca dc
'test_multiprocessing', # fails on jureca dc
'test_nn', # fails on jureca dc
'test_profiler', # fails on jureca dc
'test_quantization', # fails on jureca dc
'distributed/_shard/sharded_tensor/test_sharded_tensor', # fails on deep
'distributed/algorithms/test_join', # fails on deep and jureca dc
'distributed/fsdp/test_fsdp_checkpoint', # fails on deep and jureca dc
'distributed/fsdp/test_fsdp_core', # fails on deep and jureca dc
'distributed/fsdp/test_fsdp_freezing_weights', # fails on deep and jureca dc
'distributed/fsdp/test_fsdp_memory', # fails on deep
'distributed/fsdp/test_fsdp_multiple_forward', # fails on deep and jureca dc
'distributed/fsdp/test_fsdp_multiple_wrapping', # fails on deep and jureca dc
'distributed/fsdp/test_fsdp_overlap', # fails on deep
'distributed/fsdp/test_fsdp_pure_fp16', # fails on deep and jureca dc
'distributed/fsdp/test_fsdp_uneven', # fails on deep and jureca dc
'distributed/fsdp/test_wrap', # fails on deep and jureca dc
'distributed/optim/test_zero_redundancy_optimizer', # fails on deep and jureca dc
'distributed/rpc/cuda/test_tensorpipe_agent', # fails on deep
'distributed/rpc/test_faulty_agent', # fails on deep
'distributed/test_c10d_gloo', # fails on deep
'test_model_dump', # fails on deep
'distributed/test_c10d_nccl', # fails on jureca dc
'distributed/test_c10d_spawn_nccl', # fails on jureca dc
'distributed/test_data_parallel', # fails on jureca dc
'test_jit', # fails on all systems
'test_jit_cuda_fuser', # fails on all systems
'test_jit_legacy', # fails on all systems
'test_jit_profiling', # fails on all systems
'test_jit_fuser_te', # fails on booster and dc
# 'test_xnnpack_integration',
'distributed/_shard/sharded_optim/test_sharded_optim', # fails on booster and dc
'distributed/_shard/sharded_tensor/ops/test_linear', # fails on booster and dc
'distributed/_shard/sharded_tensor/test_megatron_prototype', # fails on booster and dc
'distributions/test_distributions', # fails on all systems
'test_cpp_extensions_jit', # fails on al systems
'test_ops', # fails on booster, dc, jusuf (works on hdfml?)
'distributed/fsdp/test_fsdp_memory', # fails on jusuf and hdfml
'distributed/fsdp/test_fsdp_overlap', # fails on jusuf and hdfml
# Those tests fail when not running from a container or without latest patches
# 'distributed/rpc/test_tensorpipe_agent',
# 'test_autograd', # fails on jureca dc and deep
# 'test_cuda', # fails on jureca dc
# 'test_multiprocessing', # fails on jureca dc
# 'test_nn', # fails on jureca dc
# 'test_profiler', # fails on jureca dc
# 'test_quantization', # fails on jureca dc
'distributed/_shard/sharded_tensor/test_sharded_tensor', # fails on juwels cluster container and deep
# 'distributed/algorithms/test_join', # fails on deep and jureca dc
# 'distributed/fsdp/test_fsdp_checkpoint', # fails on deep and jureca dc
# 'distributed/fsdp/test_fsdp_core', # fails on deep and jureca dc
# 'distributed/fsdp/test_fsdp_freezing_weights', # fails on deep and jureca dc
# 'distributed/fsdp/test_fsdp_memory', # fails on deep
# 'distributed/fsdp/test_fsdp_multiple_forward', # fails on deep and jureca dc
# 'distributed/fsdp/test_fsdp_multiple_wrapping', # fails on deep and jureca dc
# 'distributed/fsdp/test_fsdp_overlap', # fails on deep
# 'distributed/fsdp/test_fsdp_pure_fp16', # fails on deep and jureca dc
# 'distributed/fsdp/test_fsdp_uneven', # fails on deep and jureca dc
# 'distributed/fsdp/test_wrap', # fails on deep and jureca dc
# 'distributed/optim/test_zero_redundancy_optimizer', # fails on deep and jureca dc
# 'distributed/rpc/cuda/test_tensorpipe_agent', # fails on deep
# 'distributed/rpc/test_faulty_agent', # fails on deep
# 'distributed/test_c10d_gloo', # fails on deep
# 'test_model_dump', # fails on deep
# 'distributed/test_c10d_nccl', # fails on jureca dc
# 'distributed/test_c10d_spawn_nccl', # fails on jureca dc
# 'distributed/test_data_parallel', # fails on jureca dc
]
}
......
# Fixes a "NameError: name 'sharded_tensor' is not defined" error
# for the test_named_params_with_sharded_tensor test
# See https://github.com/pytorch/pytorch/pull/73309
From 012d490ed76d8af8538d310a508b0e09a91b7632 Mon Sep 17 00:00:00 2001
From: wanchaol <wanchaol@devvm3348.frc0.facebook.com>
Date: Wed, 23 Feb 2022 12:10:39 -0800
Subject: [PATCH] [shard] fix some imports in tests
This fix some imports in sharded optimizer tests
Differential Revision: [D34427252](https://our.internmc.facebook.com/intern/diff/D34427252/)
[ghstack-poisoned]
---
.../_shard/sharded_optim/test_sharded_optim.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/test/distributed/_shard/sharded_optim/test_sharded_optim.py b/test/distributed/_shard/sharded_optim/test_sharded_optim.py
index 085c928985eb..d3f1468aea3c 100644
--- a/test/distributed/_shard/sharded_optim/test_sharded_optim.py
+++ b/test/distributed/_shard/sharded_optim/test_sharded_optim.py
@@ -2,7 +2,10 @@
import torch
import torch.optim as optim
-import torch.distributed._shard.sharded_tensor
+from torch.distributed._shard import (
+ sharded_tensor,
+ shard_parameter
+)
from copy import deepcopy
from torch.distributed._shard.sharding_spec import (
@@ -77,8 +80,8 @@ def shard_parameter(self):
],
)
- sharded_tensor.shard_parameter(self.linear1, "weight", rowwise_sharding_spec)
- sharded_tensor.shard_parameter(self.linear2, "weight", colwise_sharding_spec)
+ shard_parameter(self.linear1, "weight", rowwise_sharding_spec)
+ shard_parameter(self.linear2, "weight", colwise_sharding_spec)
def forward(self, inp):
return self.linear2(self.gelu(self.linear1(inp)))
\ No newline at end of file
# Author: Caspar van Leeuwen, SURF
# Fixes failing tests due to use of TensorFloat32
# Setting NVIDIA_TF32_OVERRIDE=0 makes these tests pass, proving that TensorFloat32 is the issue
# We increase tolerances for the asserts to make these tests pass
diff -Nru pytorch_orig/test/distributed/_shard/sharded_tensor/ops/test_linear.py pytorch/test/distributed/_shard/sharded_tensor/ops/test_linear.py
--- pytorch_orig/test/distributed/_shard/sharded_tensor/ops/test_linear.py 2022-04-07 18:31:13.069599000 +0200
+++ pytorch/test/distributed/_shard/sharded_tensor/ops/test_linear.py 2022-04-07 18:32:32.877406000 +0200
@@ -77,7 +77,7 @@
local_output = local_linear(inp)
# Verify
- self.assertEqual(local_output, sharded_output)
+ self.assertEqual(local_output, sharded_output, rtol=0.02, atol=1e-03)
# Validate for torch.nn.functional.linear version.
local_output = torch.nn.functional.linear(
@@ -91,7 +91,7 @@
# for reshard. We need to squeeze the # of dimensions manually.
if inp.dim() == 1:
sharded_output = sharded_output.squeeze(reshard_spec.dim)
- self.assertEqual(local_output, sharded_output)
+ self.assertEqual(local_output, sharded_output, rtol=0.02, atol=1e-03)
# Compute loss and run backward pass.
local_output.sum().backward()
@@ -114,7 +114,7 @@
# Test backward gradient calculation.
self.assertEqual(sharded_linear.bias.grad, local_bias_grad)
- self.assertEqual(sharded_weight.grad, local_grad_narrowed)
+ self.assertEqual(sharded_weight.grad, local_grad_narrowed, rtol=0.01, atol=1e-03)
# Test optimizer.
previous = local_linear.weight.clone().detach()
@@ -135,7 +135,7 @@
)
self.assertEqual(sharded_weight.size(), local_weight_narrowed.size())
self.assertNotEqual(previous_sharded_weight, sharded_weight)
- self.assertEqual(sharded_weight, local_weight_narrowed)
+ self.assertEqual(sharded_weight, local_weight_narrowed, rtol=0.01, atol=1e-04)
self.assertNotEqual(previous_sharded_bias, sharded_linear.bias)
self.assertEqual(sharded_linear.bias, local_linear.bias)
diff -Nru pytorch_orig/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py pytorch/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py
--- pytorch_orig/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py 2022-04-07 18:31:13.091710000 +0200
+++ pytorch/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py 2022-04-07 18:41:03.744644000 +0200
@@ -113,7 +113,7 @@
local_output = local_megatron_lm(inp)
# Verify
- self.assertEqual(local_output, sharded_output)
+ self.assertEqual(local_output, sharded_output, rtol=0.01, atol=1e-03)
# Compute loss and run backward pass.
local_output.sum().backward()
@@ -161,9 +161,9 @@
)
# Test backward gradient calculation.
- self.assertEqual(sharded_weight_fc1.grad, local_grad_narrowed_fc1)
- self.assertEqual(sharded_weight_fc2.grad, local_grad_narrowed_fc2)
- self.assertEqual(bias_grad_fc1, local_bias_grad_fc1)
+ self.assertEqual(sharded_weight_fc1.grad, local_grad_narrowed_fc1, rtol=0.01, atol=2e-03)
+ self.assertEqual(sharded_weight_fc2.grad, local_grad_narrowed_fc2, rtol=0.01, atol=1e-03)
+ self.assertEqual(bias_grad_fc1, local_bias_grad_fc1, rtol=0.01, atol=2e-02)
self.assertEqual(bias_grad_fc2, local_bias_grad_fc2)
# Test optimizer.
@@ -171,7 +171,7 @@
local_bias_fc1, local_bias_fc2 = _get_bias(local_megatron_lm)
self.assertEqual(bias_fc1, local_bias_fc1)
self.assertEqual(bias_fc2, local_bias_fc2)
- self.assertEqual(bias_fc1.grad, local_bias_fc1.grad)
+ self.assertEqual(bias_fc1.grad, local_bias_fc1.grad, rtol=0.01, atol=2e-02)
self.assertEqual(bias_fc2.grad, local_bias_fc2.grad)
previous_sharded_weight_fc1 = sharded_weight_fc1.clone()
previous_sharded_weight_fc2 = sharded_weight_fc2.clone()
@@ -197,13 +197,13 @@
self.assertEqual(sharded_weight_fc2.size(), local_weight_fc2_narrowed.size())
self.assertNotEqual(previous_sharded_weight_fc1, sharded_weight_fc1)
self.assertNotEqual(previous_sharded_weight_fc2, sharded_weight_fc2)
- self.assertEqual(sharded_weight_fc1, local_weight_fc1_narrowed)
- self.assertEqual(sharded_weight_fc2, local_weight_fc2_narrowed)
+ self.assertEqual(sharded_weight_fc1, local_weight_fc1_narrowed, rtol=0.01, atol=1e-03)
+ self.assertEqual(sharded_weight_fc2, local_weight_fc2_narrowed, rtol=0.01, atol=1e-03)
# Test bias value after optimizer.
local_bias_fc1, local_bias_fc2 = _get_bias(local_megatron_lm)
self.assertNotEqual(previous_bias_fc1, bias_fc1)
- self.assertEqual(bias_fc1, local_bias_fc1)
+ self.assertEqual(bias_fc1, local_bias_fc1, rtol=0.01, atol=1e-03)
self.assertNotEqual(previous_bias_fc2, bias_fc2)
self.assertEqual(bias_fc2, local_bias_fc2)
diff -Nru pytorch_orig/test/test_stateless.py pytorch/test/test_stateless.py
--- pytorch_orig/test/test_stateless.py 2022-04-07 18:31:13.029968000 +0200
+++ pytorch/test/test_stateless.py 2022-04-07 18:43:46.723968000 +0200
@@ -42,7 +42,7 @@
# existing params in module. So here we expect the result to be the
# same as the input if the weight swapping went well.
res = _stateless.functional_call(module, parameters, x)
- self.assertEqual(x, res)
+ self.assertEqual(x, res, rtol=1e-04, atol=1e-04)
# check that the weight remain unmodified
cur_weight = to_check.l1.weight
uur_buffer = to_check.buffer
c PyTorch-1.11.0_increase_test_tolerances_TF32.patch
rig/test/test_jit_fuser_te.py pytorch/test/test_jit_fuser_te.py
--- pytorch_orig/test/test_jit_fuser_te.py 2022-04-07 18:31:13.046680000 +0200
+++ pytorch/test/test_jit_fuser_te.py 2022-04-12 18:21:00.355114000 +0200
@@ -956,7 +956,7 @@
def test_lstm_traced(self):
for device in self.devices:
inputs = get_lstm_inputs(device)
- ge = self.checkTrace(LSTMCellF, inputs)
+ ge = self.checkTrace(LSTMCellF, inputs, atol=1e-4, rtol=1e-5)
graph = ge.graph_for(*inputs)
fusion_groups = self.findFusionGroups(graph)
# TODO: chunk
diff -Nru pytorch_orig/torch/testing/_internal/jit_utils.py pytorch/torch/testing/_internal/jit_utils.py
--- pytorch_orig/torch/testing/_internal/jit_utils.py 2022-04-07 18:28:54.339477000 +0200
+++ pytorch/torch/testing/_internal/jit_utils.py 2022-04-12 18:19:59.614272000 +0200
@@ -525,7 +525,7 @@
def checkTrace(self, func, reference_tensors, input_tensors=None,
drop=None, allow_unused=False, verbose=False,
inputs_require_grads=True, check_tolerance=1e-5, export_import=True,
- _force_outplace=False):
+ _force_outplace=False, rtol=None, atol=None):
# TODO: check gradients for parameters, not just inputs
def allSum(vs):
@@ -618,7 +618,10 @@
self.assertEqual(outputs, outputs_ge)
if inputs_require_grads:
- self.assertEqual(grads, grads_ge)
+ if atol is not None and rtol is not None:
+ self.assertEqual(grads, grads_ge, atol=atol, rtol=rtol)
+ else:
+ self.assertEqual(grads, grads_ge)
for g2, g2_ge in zip(grads2, grads2_ge):
if g2 is None and g2_ge is None:
continue
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment