mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary:
As issue https://github.com/pytorch/pytorch/issues/59750 is fixed, this PR is to remove the workaround implemented for it on ROCm.
Enabled hasPrimaryContext() related PyTorch unit tests on ROCm.
cc: amathews-amd, jithunnair-amd
cc jeffdaily sunway513 jithunnair-amd ROCmSupport KyleCZH
Pull Request resolved: https://github.com/pytorch/pytorch/pull/71146
Reviewed By: anjali411
Differential Revision: D33754615
Pulled By: albanD
fbshipit-source-id: b3a5c65a20c6d52d5f2ffc9e6f9628c819329b5d
(cherry picked from commit cfdd12166c)
116 lines
4.4 KiB
Python
116 lines
4.4 KiB
Python
# Owner(s): ["module: cuda"]
|
|
|
|
import torch
|
|
from torch.testing._internal.common_utils import TestCase, run_tests, skipIfRocmVersionLessThan
|
|
import sys
|
|
import unittest
|
|
|
|
# NOTE: this needs to be run in a brand new process
|
|
|
|
# We cannot import TEST_CUDA and TEST_MULTIGPU from torch.testing._internal.common_cuda here,
|
|
# because if we do that, the TEST_CUDNN line from torch.testing._internal.common_cuda will be executed
|
|
# multiple times as well during the execution of this test suite, and it will
|
|
# cause CUDA OOM error on Windows.
|
|
TEST_CUDA = torch.cuda.is_available()
|
|
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
|
|
|
|
if not TEST_CUDA:
|
|
print('CUDA not available, skipping tests', file=sys.stderr)
|
|
TestCase = object # noqa: F811
|
|
|
|
|
|
class TestCudaPrimaryCtx(TestCase):
|
|
CTX_ALREADY_CREATED_ERR_MSG = (
|
|
"Tests defined in test_cuda_primary_ctx.py must be run in a process "
|
|
"where CUDA contexts are never created. Use either run_test.py or add "
|
|
"--subprocess to run each test in a different subprocess.")
|
|
|
|
@skipIfRocmVersionLessThan((4, 4, 21504))
|
|
def setUp(self):
|
|
for device in range(torch.cuda.device_count()):
|
|
# Ensure context has not been created beforehand
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(device), TestCudaPrimaryCtx.CTX_ALREADY_CREATED_ERR_MSG)
|
|
|
|
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
|
|
def test_str_repr(self):
|
|
x = torch.randn(1, device='cuda:1')
|
|
|
|
# We should have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
str(x)
|
|
repr(x)
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
|
|
def test_copy(self):
|
|
x = torch.randn(1, device='cuda:1')
|
|
|
|
# We should have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
y = torch.randn(1, device='cpu')
|
|
y.copy_(x)
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
|
|
def test_pin_memory(self):
|
|
x = torch.randn(1, device='cuda:1')
|
|
|
|
# We should have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
self.assertFalse(x.is_pinned())
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
x = torch.randn(3, device='cpu').pin_memory()
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
self.assertTrue(x.is_pinned())
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
x = torch.randn(3, device='cpu', pin_memory=True)
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
x = torch.zeros(3, device='cpu', pin_memory=True)
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
x = torch.empty(3, device='cpu', pin_memory=True)
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
x = x.pin_memory()
|
|
|
|
# We should still have only created context on 'cuda:1'
|
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
|
|
|
|
if __name__ == '__main__':
|
|
run_tests()
|