Revert "Respect ROCR_VISIBLE_DEVICES on AMD GPU device discovery (#140320)"

This reverts commit add4a42ea2.

Reverted https://github.com/pytorch/pytorch/pull/140320 on behalf of https://github.com/huydhn due to Sorry for reverting your change but test_hip_device_count is failing in trunk after this land ([comment](https://github.com/pytorch/pytorch/pull/140320#issuecomment-2524742845))
This commit is contained in:
PyTorch MergeBot 2024-12-07 01:28:49 +00:00
parent db313c87f9
commit 40d1b5f490
2 changed files with 1 additions and 19 deletions

View File

@ -3305,8 +3305,6 @@ print(f"{torch.cuda.device_count()}")
{"CUDA_VISIBLE_DEVICES": "0", "HIP_VISIBLE_DEVICES": None},
{"CUDA_VISIBLE_DEVICES": None, "HIP_VISIBLE_DEVICES": "0"},
{"CUDA_VISIBLE_DEVICES": "0,1,2,3", "HIP_VISIBLE_DEVICES": "0"},
{"ROCR_VISIBLE_DEVICES": "1,2,3", "HIP_VISIBLE_DEVICES": "0"},
{"ROCR_VISIBLE_DEVICES": "0"},
]
for env_config in custom_envs:

View File

@ -646,24 +646,8 @@ def _parse_visible_devices() -> Union[List[int], List[str]]:
if torch.version.hip:
hip_devices = os.getenv("HIP_VISIBLE_DEVICES")
rocr_devices = os.getenv("ROCR_VISIBLE_DEVICES")
if rocr_devices is not None:
# Mostly required for ROCm to make sure ROCr visible devices
# is respected, this ensures we do not return a list of devices
# that exceeds the total available supplied via ROCR_VISIBLE_DEVICES
var = rocr_devices
if hip_devices is not None:
# If ROCr devices have been set, the hip visible devices would
# be a subset of those. HIP_VISIBLE_DEVICES can only contain
# integer indices so we can use the ROCr visible devices as a key
if rocr_devices is not None:
hip_device_list = [int(dev) for dev in hip_devices.split(",")]
rocr_device_list = rocr_devices.split(",")
var = ",".join(rocr_device_list[dev] for dev in hip_device_list)
else:
var = hip_devices
var = hip_devices
if var is None:
return list(range(64))