Use accelerator API in common_dtensor (#163498)

Fixes #ISSUE_NUMBER Try to unify the device checking in common_dtensor (testing module) by accelerator API Pull Request resolved: https://github.com/pytorch/pytorch/pull/163498 Approved by: https://github.com/albanD, https://github.com/H-Huang
2025-12-06 12:20:52 +01:00 · 2025-09-23 16:30:20 +00:00 · 2025-09-23 16:30:20 +00:00 · 6e5dddba64
commit 6e5dddba64
parent ebddbe787a
1 changed files with 19 additions and 17 deletions
--- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py
+++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py
@ -13,7 +13,6 @@ import torch
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
 from torch._utils import _get_device_module
 from torch.distributed.tensor import (
    DeviceMesh,
    distribute_tensor,
@ -38,24 +37,21 @@ from torch.testing._internal.common_distributed import (
    skip_if_lt_x_gpu,
    TEST_SKIPS,
 )
-from torch.testing._internal.common_utils import TEST_CUDA, TEST_HPU, TEST_XPU
+from torch.testing._internal.common_utils import (
    TEST_CUDA,
    TEST_HPU,
    TEST_PRIVATEUSE1,
    TEST_XPU,
 )
 from torch.utils._pytree import tree_flatten, tree_unflatten, TreeSpec
 DEVICE_COUNT: int
-if TEST_CUDA:
+if TEST_CUDA or TEST_XPU or TEST_HPU or TEST_PRIVATEUSE1:
-    DEVICE_TYPE = "cuda"
+    DEVICE_TYPE = torch.accelerator.current_accelerator().type
-    PG_BACKEND = "nccl"
+    DEVICE_COUNT = torch.accelerator.device_count()
-    DEVICE_COUNT = _get_device_module("cuda").device_count()
+    PG_BACKEND = dist.Backend.default_device_backend_map[DEVICE_TYPE]
 elif TEST_HPU:
    DEVICE_TYPE = "hpu"
    PG_BACKEND = "hccl"
    DEVICE_COUNT = _get_device_module("hpu").device_count()
 elif TEST_XPU:
    DEVICE_TYPE = "xpu"
    PG_BACKEND = "xccl"
    DEVICE_COUNT = _get_device_module("xpu").device_count()
 else:
    DEVICE_TYPE = "cpu"
    PG_BACKEND = "gloo"
@ -63,7 +59,7 @@ else:
 NUM_DEVICES = 4
 # We use this as a proxy for "multiple GPUs exist"
-if (TEST_CUDA or TEST_XPU or TEST_HPU) and DEVICE_COUNT > 1:
+if (TEST_CUDA or TEST_XPU or TEST_HPU or TEST_PRIVATEUSE1) and DEVICE_COUNT > 1:
    # when we actually have multiple GPUs, relax the requirement to smaller counts.
    NUM_DEVICES = min(NUM_DEVICES, DEVICE_COUNT)
@ -341,7 +337,10 @@ class DTensorContinuousTestBase(MultiProcContinuousTest):
    @classmethod
    def device_type(cls) -> str:
        # if enough GPU/XPU/HPU we can use those devices, otherwise we fallback to CPU
-        if not (TEST_CUDA or TEST_XPU or TEST_HPU) or DEVICE_COUNT < cls.world_size:
+        if (
            not (TEST_CUDA or TEST_XPU or TEST_HPU or TEST_PRIVATEUSE1)
            or DEVICE_COUNT < cls.world_size
        ):
            return "cpu"
        else:
            return DEVICE_TYPE
@ -360,7 +359,10 @@ class DTensorTestBase(MultiProcessTestCase):
    @property
    def device_type(self) -> str:
        # if enough GPU/XPU/HPU we can use those devices, otherwise we fallback to CPU
-        if not (TEST_CUDA or TEST_XPU or TEST_HPU) or DEVICE_COUNT < self.world_size:
+        if (
            not (TEST_CUDA or TEST_XPU or TEST_HPU or TEST_PRIVATEUSE1)
            or DEVICE_COUNT < self.world_size
        ):
            return "cpu"
        else:
            return DEVICE_TYPE