mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[CI] Relax per proc memory by a little bit, mark a test as serial (#125960)
test failure is here https://github.com/pytorch/pytorch/actions/runs/9036789873/job/24836020415 * OOMs etc rel to https://github.com/pytorch/pytorch/pull/125598 Pull Request resolved: https://github.com/pytorch/pytorch/pull/125960 Approved by: https://github.com/huydhn
This commit is contained in:
parent
c61bfd24c1
commit
b08072f645
|
|
@ -30,6 +30,7 @@ from torch.testing._internal.common_utils import (
|
|||
IS_SANDCASTLE,
|
||||
NoTest,
|
||||
run_tests,
|
||||
serialTest,
|
||||
skipCUDANonDefaultStreamIf,
|
||||
skipIfRocm,
|
||||
TEST_CUDA,
|
||||
|
|
@ -280,6 +281,7 @@ class TestCudaMultiGPU(TestCase):
|
|||
assert_change(0, reset_peak=True)
|
||||
|
||||
@unittest.skipIf(TEST_CUDAMALLOCASYNC, "temporarily disabled")
|
||||
@serialTest()
|
||||
def test_memory_stats(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
|
|
|||
|
|
@ -1297,7 +1297,7 @@ if TEST_CUDA and 'NUM_PARALLEL_PROCS' in os.environ:
|
|||
num_procs = int(os.getenv("NUM_PARALLEL_PROCS", "2"))
|
||||
gb_available = torch.cuda.mem_get_info()[1] / 2 ** 30
|
||||
# other libraries take up about a little under 1 GB of space per process
|
||||
torch.cuda.set_per_process_memory_fraction(round((gb_available - num_procs * .9) / gb_available / num_procs, 2))
|
||||
torch.cuda.set_per_process_memory_fraction(round((gb_available - num_procs * .85) / gb_available / num_procs, 2))
|
||||
|
||||
requires_cuda = unittest.skipUnless(torch.cuda.is_available(), "Requires CUDA")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user