From b3ad8f4a9c56fd7967a18dfa2234a63d641fc9fe Mon Sep 17 00:00:00 2001
From: can-gaa-hou <jiahaochen535@gmail.com>
Date: Mon, 15 Sep 2025 05:44:15 +0000
Subject: [PATCH] [BUG] Fix nonzero_static crash on CUDA when the input is a
 empty tensor (#162578)

Fixes #162473

Pull Request resolved: https://github.com/pytorch/pytorch/pull/162578
Approved by: https://github.com/ngimel
---
 aten/src/ATen/native/cuda/Nonzero.cu | 11 +++++++++++
 test/test_unary_ufuncs.py            |  9 +++++++++
 2 files changed, 20 insertions(+)

diff --git a/aten/src/ATen/native/cuda/Nonzero.cu b/aten/src/ATen/native/cuda/Nonzero.cu
index 2d0e32d4e8c..aa1291dc784 100644
--- a/aten/src/ATen/native/cuda/Nonzero.cu
+++ b/aten/src/ATen/native/cuda/Nonzero.cu
@@ -317,6 +317,17 @@ void nonzero_static_cuda_out_impl(
     out_temp =
         Tensor(at::detail::empty_cuda({self.dim(), size}, out.options())).t();
   }
+  // If input has zero elements, avoid kernel grid calculations (which can
+  // produce zero divisors) and just fill the output with fill_value.
+  if (self.numel() == 0) {
+    if (need_to_copy) {
+      out_temp.fill_(fill_value);
+      out.copy_(out_temp);
+    } else {
+      out.fill_(fill_value);
+    }
+    return;
+  }
   int64_t* out_data_ptr = need_to_copy ? out_temp.mutable_data_ptr<int64_t>()
                                        : out.mutable_data_ptr<int64_t>();
 
diff --git a/test/test_unary_ufuncs.py b/test/test_unary_ufuncs.py
index 9939e8e76ce..15b967e5707 100644
--- a/test/test_unary_ufuncs.py
+++ b/test/test_unary_ufuncs.py
@@ -1654,6 +1654,15 @@ class TestUnaryUfuncs(TestCase):
             ),
         )
 
+        # empty input
+        # https://github.com/pytorch/pytorch/issues/162473
+        input_tensor = torch.tensor([], device=device)
+        static_size = 1
+        self.assertEqual(
+            torch.nonzero_static(input_tensor, size=static_size),
+            torch.tensor([[-1]], device=device),
+        )
+
         # 1D input
         input_tensor = torch.tensor([0, 8], device=device)
         static_size = 1