Smoke Test - disable pypi package validation for binaries that package cuda libs (#150194)

Smoke Test - disable pypi package validation for binaries that package cuda libs. These binaries do not install packages via pypi.
Should Resolve this from `linux-binary-manywheel / manywheel-py3_11-cuda12_6-full-test / test`:
```
Traceback (most recent call last):
  File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 468, in <module>
    main()
  File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 462, in main
    smoke_test_cuda(
  File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 274, in smoke_test_cuda
    compare_pypi_to_torch_versions(
  File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 220, in compare_pypi_to_torch_versions
    raise RuntimeError(f"Can't find {package} in PyPI for Torch: {torch_version}")
RuntimeError: Can't find cudnn in PyPI for Torch: 9.5.1
```
Link: https://github.com/pytorch/pytorch/actions/runs/14101221665/job/39505479587#step:15:982
Pull Request resolved: https://github.com/pytorch/pytorch/pull/150194
Approved by: https://github.com/ZainRizvi
This commit is contained in:
atalman 2025-04-01 19:18:44 +00:00 committed by PyTorch MergeBot
parent d2ad9aa2f2
commit 295162ec3a
2 changed files with 29 additions and 6 deletions

View File

@ -227,7 +227,10 @@ def compare_pypi_to_torch_versions(
def smoke_test_cuda(
package: str, runtime_error_check: str, torch_compile_check: str
package: str,
runtime_error_check: str,
torch_compile_check: str,
pypi_pkg_check: str,
) -> None:
if not torch.cuda.is_available() and is_cuda_system:
raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.")
@ -268,13 +271,14 @@ def smoke_test_cuda(
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
torch_cudnn_version = cudnn_to_version_str(torch.backends.cudnn.version())
print(f"Torch cuDNN version: {torch_cudnn_version}")
torch_nccl_version = ".".join(str(v) for v in torch.cuda.nccl.version())
print(f"Torch nccl; version: {torch_nccl_version}")
# Pypi dependencies are installed on linux ony and nccl is availbale only on Linux.
if sys.platform in ["linux", "linux2"]:
if pypi_pkg_check == "enabled" and sys.platform in ["linux", "linux2"]:
compare_pypi_to_torch_versions(
"cudnn", find_pypi_package_version("nvidia-cudnn"), torch_cudnn_version
)
torch_nccl_version = ".".join(str(v) for v in torch.cuda.nccl.version())
compare_pypi_to_torch_versions(
"nccl", find_pypi_package_version("nvidia-nccl"), torch_nccl_version
)
@ -436,6 +440,13 @@ def parse_args():
choices=["enabled", "disabled"],
default="enabled",
)
parser.add_argument(
"--pypi-pkg-check",
help="Check pypi package versions cudnn and nccl",
type=str,
choices=["enabled", "disabled"],
default="enabled",
)
return parser.parse_args()
@ -460,7 +471,10 @@ def main() -> None:
smoke_test_modules()
smoke_test_cuda(
options.package, options.runtime_error_check, options.torch_compile_check
options.package,
options.runtime_error_check,
options.torch_compile_check,
options.pypi_pkg_check,
)

View File

@ -90,8 +90,17 @@ fi
/pytorch/.ci/pytorch/check_binary.sh
if [[ "\$GPU_ARCH_TYPE" != *s390x* && "\$GPU_ARCH_TYPE" != *xpu* && "\$GPU_ARCH_TYPE" != *rocm* && "$PACKAGE_TYPE" != libtorch ]]; then
# Exclude s390, xpu, rocm and libtorch builds from smoke testing
python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled
torch_pkg_size="$(ls -1 /final_pkgs/torch-* | sort |tail -1 |xargs wc -c |cut -d ' ' -f1)"
# todo: implement check for large binaries
# if the package is larger than 1.5GB, we disable the pypi check.
# this package contains all libraries packaged in torch libs folder
# example of such package is https://download.pytorch.org/whl/cu126_full/torch
if [[ "\$torch_pkg_size" -gt 1500000000 ]]; then
python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled --pypi-pkg-check disabled
else
python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled $extra_parameters
fi
fi
# Clean temp files