Disable device check for foreach kernels (#56871)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56871

foreach kernels fall back to slow path when tensor are on different devices

Generated by codemod:
```
fastmod '(- func: _foreach.*)' '${1}
  device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices'   aten/src/ATen/native/native_functions.yaml
```
ghstack-source-id: 127914017

Test Plan: autotest

Reviewed By: ezyang

Differential Revision: D27986560

fbshipit-source-id: b0cd963cdba04b4e1589bbf369eb26b48d523968
This commit is contained in:
Wenlei Xie 2021-05-01 12:00:00 -07:00 committed by Facebook GitHub Bot
parent 183320df96
commit 22ecb8885f
2 changed files with 95 additions and 0 deletions

View File

@ -6931,546 +6931,637 @@
QuantizedCPU: cat_out_quantized_cpu
- func: _foreach_add.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_add_scalar_kernel_slow
CUDA: foreach_tensor_add_scalar_kernel_cuda
- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_add_scalar_kernel_slow_
CUDA: foreach_tensor_add_scalar_kernel_cuda_
- func: _foreach_sub.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sub_scalar_kernel_slow
CUDA: foreach_tensor_sub_scalar_kernel_cuda
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sub_scalar_kernel_slow_
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
- func: _foreach_mul.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_mul_scalar_kernel_slow
CUDA: foreach_tensor_mul_scalar_kernel_cuda
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_mul_scalar_kernel_slow_
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
- func: _foreach_div.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_div_scalar_kernel_slow
CUDA: foreach_tensor_div_scalar_kernel_cuda
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_div_scalar_kernel_slow_
CUDA: foreach_tensor_div_scalar_kernel_cuda_
- func: _foreach_add.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_add_list_kernel_slow
CUDA: foreach_tensor_add_list_kernel_cuda
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_add_list_kernel_slow_
CUDA: foreach_tensor_add_list_kernel_cuda_
- func: _foreach_sub.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sub_list_kernel_slow
CUDA: foreach_tensor_sub_list_kernel_cuda
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sub_list_kernel_slow_
CUDA: foreach_tensor_sub_list_kernel_cuda_
- func: _foreach_mul.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_mul_list_kernel_slow
CUDA: foreach_tensor_mul_list_kernel_cuda
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_mul_list_kernel_slow_
CUDA: foreach_tensor_mul_list_kernel_cuda_
- func: _foreach_div.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_div_list_kernel_slow
CUDA: foreach_tensor_div_list_kernel_cuda
- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_div_list_kernel_slow_
CUDA: foreach_tensor_div_list_kernel_cuda_
- func: _foreach_add.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_add_scalarlist_kernel_slow
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_add_scalarlist_kernel_slow_
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
- func: _foreach_sub.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sub_scalarlist_kernel_slow
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sub_scalarlist_kernel_slow_
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
- func: _foreach_div.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_div_scalarlist_kernel_slow
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_div_scalarlist_kernel_slow_
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
- func: _foreach_mul.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_mul_scalarlist_kernel_slow
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_mul_scalarlist_kernel_slow_
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
- func: _foreach_exp(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_exp_slow
CUDA: foreach_tensor_exp_cuda
- func: _foreach_zero_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_zero_slow_
CUDA: foreach_tensor_zero_cuda_
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_exp_slow_
CUDA: foreach_tensor_exp_cuda_
- func: _foreach_sqrt(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sqrt_slow
CUDA: foreach_tensor_sqrt_cuda
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sqrt_slow_
CUDA: foreach_tensor_sqrt_cuda_
- func: _foreach_abs(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_abs_slow
CUDA: foreach_tensor_abs_cuda
- func: _foreach_abs_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_abs_slow_
CUDA: foreach_tensor_abs_cuda_
- func: _foreach_acos(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_acos_slow
CUDA: foreach_tensor_acos_cuda
- func: _foreach_acos_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_acos_slow_
CUDA: foreach_tensor_acos_cuda_
- func: _foreach_asin(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_asin_slow
CUDA: foreach_tensor_asin_cuda
- func: _foreach_asin_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_asin_slow_
CUDA: foreach_tensor_asin_cuda_
- func: _foreach_atan(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_atan_slow
CUDA: foreach_tensor_atan_cuda
- func: _foreach_atan_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_atan_slow_
CUDA: foreach_tensor_atan_cuda_
- func: _foreach_ceil(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_ceil_slow
CUDA: foreach_tensor_ceil_cuda
- func: _foreach_ceil_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_ceil_slow_
CUDA: foreach_tensor_ceil_cuda_
- func: _foreach_cos(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_cos_slow
CUDA: foreach_tensor_cos_cuda
- func: _foreach_cos_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_cos_slow_
CUDA: foreach_tensor_cos_cuda_
- func: _foreach_cosh(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_cosh_slow
CUDA: foreach_tensor_cosh_cuda
- func: _foreach_cosh_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_cosh_slow_
CUDA: foreach_tensor_cosh_cuda_
- func: _foreach_erf(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_erf_slow
CUDA: foreach_tensor_erf_cuda
- func: _foreach_erf_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_erf_slow_
CUDA: foreach_tensor_erf_cuda_
- func: _foreach_erfc(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_erfc_slow
CUDA: foreach_tensor_erfc_cuda
- func: _foreach_erfc_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_erfc_slow_
CUDA: foreach_tensor_erfc_cuda_
- func: _foreach_expm1(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_expm1_slow
CUDA: foreach_tensor_expm1_cuda
- func: _foreach_expm1_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_expm1_slow_
CUDA: foreach_tensor_expm1_cuda_
- func: _foreach_floor(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_floor_slow
CUDA: foreach_tensor_floor_cuda
- func: _foreach_floor_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_floor_slow_
CUDA: foreach_tensor_floor_cuda_
- func: _foreach_log(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log_slow
CUDA: foreach_tensor_log_cuda
- func: _foreach_log_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log_slow_
CUDA: foreach_tensor_log_cuda_
- func: _foreach_log10(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log10_slow
CUDA: foreach_tensor_log10_cuda
- func: _foreach_log10_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log10_slow_
CUDA: foreach_tensor_log10_cuda_
- func: _foreach_log1p(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log1p_slow
CUDA: foreach_tensor_log1p_cuda
- func: _foreach_log1p_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log1p_slow_
CUDA: foreach_tensor_log1p_cuda_
- func: _foreach_log2(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log2_slow
CUDA: foreach_tensor_log2_cuda
- func: _foreach_log2_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_log2_slow_
CUDA: foreach_tensor_log2_cuda_
- func: _foreach_neg(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_neg_slow
CUDA: foreach_tensor_neg_cuda
- func: _foreach_neg_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_neg_slow_
CUDA: foreach_tensor_neg_cuda_
- func: _foreach_tan(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_tan_slow
CUDA: foreach_tensor_tan_cuda
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_tan_slow_
CUDA: foreach_tensor_tan_cuda_
- func: _foreach_tanh(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_tanh_slow
CUDA: foreach_tensor_tanh_cuda
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_tanh_slow_
CUDA: foreach_tensor_tanh_cuda_
- func: _foreach_sin(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sin_slow
CUDA: foreach_tensor_sin_cuda
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sin_slow_
CUDA: foreach_tensor_sin_cuda_
- func: _foreach_sinh(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sinh_slow
CUDA: foreach_tensor_sinh_cuda
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sinh_slow_
CUDA: foreach_tensor_sinh_cuda_
- func: _foreach_round(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_round_slow
CUDA: foreach_tensor_round_cuda
- func: _foreach_round_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_round_slow_
CUDA: foreach_tensor_round_cuda_
- func: _foreach_lgamma(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_lgamma_slow
CUDA: foreach_tensor_lgamma_cuda
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_lgamma_slow_
CUDA: foreach_tensor_lgamma_cuda_
- func: _foreach_frac(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_frac_slow
CUDA: foreach_tensor_frac_cuda
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_frac_slow_
CUDA: foreach_tensor_frac_cuda_
- func: _foreach_reciprocal(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_reciprocal_slow
CUDA: foreach_tensor_reciprocal_cuda
- func: _foreach_reciprocal_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_reciprocal_slow_
CUDA: foreach_tensor_reciprocal_cuda_
- func: _foreach_sigmoid(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sigmoid_slow
CUDA: foreach_tensor_sigmoid_cuda
- func: _foreach_sigmoid_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sigmoid_slow_
CUDA: foreach_tensor_sigmoid_cuda_
- func: _foreach_trunc(Tensor[] tensors) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_trunc_slow
CUDA: foreach_tensor_trunc_cuda
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_trunc_slow_
CUDA: foreach_tensor_trunc_cuda_
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcdiv_scalar_slow_
CUDA: foreach_tensor_addcdiv_scalar_cuda_
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcmul_scalar_slow_
CUDA: foreach_tensor_addcmul_scalar_cuda_
- func: _foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcdiv_scalarlist_slow_
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
- func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcmul_scalarlist_slow_
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
- func: _foreach_addcdiv.Scalar(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcdiv_scalar_slow
CUDA: foreach_tensor_addcdiv_scalar_cuda
- func: _foreach_addcmul.Scalar(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcmul_scalar_slow
CUDA: foreach_tensor_addcmul_scalar_cuda
- func: _foreach_addcdiv.ScalarList(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcdiv_scalarlist_slow
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
- func: _foreach_addcmul.ScalarList(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_addcmul_scalarlist_slow
CUDA: foreach_tensor_addcmul_scalarlist_cuda
- func: _foreach_maximum.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_maximum_slow
CUDA: foreach_tensor_maximum_cuda
- func: _foreach_minimum.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_minimum_slow

View File

@ -424,6 +424,10 @@ class NativeFunction:
assert k not in self.dispatch, \
f"if structured_delegate, then must not have {k} in dispatch dictionary " \
"(it is delegated!)"
if str(self.func.name).startswith('_foreach'):
assert self.device_check == DeviceCheckType.NoCheck, \
"foreach kernels fall back to slow path when tensor are on different devices, " \
"device_check not allowed to be enabled"
SchemaKind = Enum('SchemaKind', ('functional', 'inplace', 'out'))