[CI] Relax per proc memory by a little bit, mark a test as serial (#125960)

test failure is here https://github.com/pytorch/pytorch/actions/runs/9036789873/job/24836020415

* OOMs etc rel to https://github.com/pytorch/pytorch/pull/125598
Pull Request resolved: https://github.com/pytorch/pytorch/pull/125960
Approved by: https://github.com/huydhn
This commit is contained in:
Catherine Lee 2024-05-10 21:11:37 +00:00 committed by PyTorch MergeBot
parent c61bfd24c1
commit b08072f645
2 changed files with 3 additions and 1 deletions

View File

@ -30,6 +30,7 @@ from torch.testing._internal.common_utils import (
IS_SANDCASTLE,
NoTest,
run_tests,
serialTest,
skipCUDANonDefaultStreamIf,
skipIfRocm,
TEST_CUDA,
@ -280,6 +281,7 @@ class TestCudaMultiGPU(TestCase):
assert_change(0, reset_peak=True)
@unittest.skipIf(TEST_CUDAMALLOCASYNC, "temporarily disabled")
@serialTest()
def test_memory_stats(self):
gc.collect()
torch.cuda.empty_cache()

View File

@ -1297,7 +1297,7 @@ if TEST_CUDA and 'NUM_PARALLEL_PROCS' in os.environ:
num_procs = int(os.getenv("NUM_PARALLEL_PROCS", "2"))
gb_available = torch.cuda.mem_get_info()[1] / 2 ** 30
# other libraries take up about a little under 1 GB of space per process
torch.cuda.set_per_process_memory_fraction(round((gb_available - num_procs * .9) / gb_available / num_procs, 2))
torch.cuda.set_per_process_memory_fraction(round((gb_available - num_procs * .85) / gb_available / num_procs, 2))
requires_cuda = unittest.skipUnless(torch.cuda.is_available(), "Requires CUDA")