pytorch/test/test_cuda_primary_ctx.py
Pritam Damania 872237c1f2 Output to stderr in distributed tests. (#42139)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/42139

A bunch of tests were failing with buck since we would output to
stdout and buck would fail parsing stdout in some cases.

Moving these print statements to stderr fixes this issue.
ghstack-source-id: 108606579

Test Plan: Run the offending unit tests.

Reviewed By: mrshenli

Differential Revision: D22779135

fbshipit-source-id: 789af3b16a03b68a6cb12377ed852e5b5091bbad
2020-07-29 19:23:34 -07:00

114 lines
4.3 KiB
Python

import torch
from torch.testing._internal.common_utils import TestCase, run_tests, skipIfRocm
import sys
import unittest
# NOTE: this needs to be run in a brand new process
# We cannot import TEST_CUDA and TEST_MULTIGPU from torch.testing._internal.common_cuda here,
# because if we do that, the TEST_CUDNN line from torch.testing._internal.common_cuda will be executed
# multiple times as well during the execution of this test suite, and it will
# cause CUDA OOM error on Windows.
TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
if not TEST_CUDA:
print('CUDA not available, skipping tests', file=sys.stderr)
TestCase = object # noqa: F811
class TestCudaPrimaryCtx(TestCase):
CTX_ALREADY_CREATED_ERR_MSG = (
"Tests defined in test_cuda_primary_ctx.py must be run in a process "
"where CUDA contexts are never created. Use either run_test.py or add "
"--subprocess to run each test in a different subprocess.")
@skipIfRocm
def setUp(self):
for device in range(torch.cuda.device_count()):
# Ensure context has not been created beforehand
self.assertFalse(torch._C._cuda_hasPrimaryContext(device), TestCudaPrimaryCtx.CTX_ALREADY_CREATED_ERR_MSG)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
def test_str_repr(self):
x = torch.randn(1, device='cuda:1')
# We should have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
str(x)
repr(x)
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
def test_copy(self):
x = torch.randn(1, device='cuda:1')
# We should have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
y = torch.randn(1, device='cpu')
y.copy_(x)
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
def test_pin_memory(self):
x = torch.randn(1, device='cuda:1')
# We should have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
self.assertFalse(x.is_pinned())
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
x = torch.randn(3, device='cpu').pin_memory()
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
self.assertTrue(x.is_pinned())
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
x = torch.randn(3, device='cpu', pin_memory=True)
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
x = torch.zeros(3, device='cpu', pin_memory=True)
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
x = torch.empty(3, device='cpu', pin_memory=True)
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
x = x.pin_memory()
# We should still have only created context on 'cuda:1'
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
self.assertTrue(torch._C._cuda_hasPrimaryContext(1))
if __name__ == '__main__':
run_tests()