diff --git a/test/inductor/test_torchinductor_codegen_dynamic_shapes.py b/test/inductor/test_torchinductor_codegen_dynamic_shapes.py index 62aeaf5e99c..20620c29788 100644 --- a/test/inductor/test_torchinductor_codegen_dynamic_shapes.py +++ b/test/inductor/test_torchinductor_codegen_dynamic_shapes.py @@ -348,7 +348,7 @@ test_failures = { "test_rand_like_deterministic_dynamic_shapes": TestFailure( ("cpu", "cuda", "xpu"), is_skip=True ), - "test_repeat_interleave_2_dynamic_shapes": TestFailure(("cpu", "xpu")), + "test_repeat_interleave_2_dynamic_shapes": TestFailure(("cpu",)), "test_slice_mutation2_dynamic_shapes": TestFailure( ("cpu", "cuda", "xpu"), is_skip=True ), diff --git a/test/inductor/test_torchinductor_opinfo.py b/test/inductor/test_torchinductor_opinfo.py index 1ee24c74bb7..8e527b659ec 100644 --- a/test/inductor/test_torchinductor_opinfo.py +++ b/test/inductor/test_torchinductor_opinfo.py @@ -682,6 +682,14 @@ inductor_override_kwargs["xpu"] = { ("nn.functional.unfold", f16): { "reference_in_float": True, }, + # Reference crash on Intel LTS2 driver. + ("nn.functional.interpolate.trilinear", f32): { + "check_gradient": False, + }, + # Reference crash on Intel LTS2 driver. + ("nn.functional.interpolate.trilinear", f64): { + "check_gradient": False, + }, } if TEST_WITH_ROCM: inductor_override_kwargs["cuda"].update( diff --git a/test/test_testing.py b/test/test_testing.py index a69fb8ac953..00fb106ac2a 100644 --- a/test/test_testing.py +++ b/test/test_testing.py @@ -2351,7 +2351,7 @@ class TestImports(TestCase): # fail, so just set CWD to this script's directory cwd=os.path.dirname(os.path.realpath(__file__)),).decode("utf-8") - # The test is flaky on ROCm and has been open and close multiple times + # The test is flaky on ROCm/XPU and has been open and close multiple times # https://github.com/pytorch/pytorch/issues/110040 @skipIfRocm def test_circular_dependencies(self) -> None: diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index bf0a558b4ee..4f8ab3a7cac 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -7160,10 +7160,10 @@ class DeviceCopy(ExternKernelOut): # x.get_stride() may be unimplemented if x's size is empty stride = x.get_stride() is_destination_pinned = ( - x_device.type == "cuda" and device.type == "cpu" and non_blocking + is_gpu(x_device.type) and device.type == "cpu" and non_blocking ) is_source_pinned = ( - x_device.type == "cpu" and device.type == "cuda" and non_blocking + x_device.type == "cpu" and is_gpu(device.type) and non_blocking ) if is_source_pinned and is_storage_and_layout(x): x.get_layout().is_pinned = True