mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Disable device check for foreach kernels (#56871)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/56871 foreach kernels fall back to slow path when tensor are on different devices Generated by codemod: ``` fastmod '(- func: _foreach.*)' '${1} device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices' aten/src/ATen/native/native_functions.yaml ``` ghstack-source-id: 127914017 Test Plan: autotest Reviewed By: ezyang Differential Revision: D27986560 fbshipit-source-id: b0cd963cdba04b4e1589bbf369eb26b48d523968
This commit is contained in:
parent
183320df96
commit
22ecb8885f
|
|
@ -6931,546 +6931,637 @@
|
|||
QuantizedCPU: cat_out_quantized_cpu
|
||||
|
||||
- func: _foreach_add.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_add_scalar_kernel_slow
|
||||
CUDA: foreach_tensor_add_scalar_kernel_cuda
|
||||
|
||||
- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_add_scalar_kernel_slow_
|
||||
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
||||
|
||||
- func: _foreach_sub.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sub_scalar_kernel_slow
|
||||
CUDA: foreach_tensor_sub_scalar_kernel_cuda
|
||||
|
||||
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sub_scalar_kernel_slow_
|
||||
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
||||
|
||||
- func: _foreach_mul.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_mul_scalar_kernel_slow
|
||||
CUDA: foreach_tensor_mul_scalar_kernel_cuda
|
||||
|
||||
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_mul_scalar_kernel_slow_
|
||||
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
||||
|
||||
- func: _foreach_div.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_div_scalar_kernel_slow
|
||||
CUDA: foreach_tensor_div_scalar_kernel_cuda
|
||||
|
||||
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_div_scalar_kernel_slow_
|
||||
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
||||
|
||||
- func: _foreach_add.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_add_list_kernel_slow
|
||||
CUDA: foreach_tensor_add_list_kernel_cuda
|
||||
|
||||
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_add_list_kernel_slow_
|
||||
CUDA: foreach_tensor_add_list_kernel_cuda_
|
||||
|
||||
- func: _foreach_sub.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sub_list_kernel_slow
|
||||
CUDA: foreach_tensor_sub_list_kernel_cuda
|
||||
|
||||
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sub_list_kernel_slow_
|
||||
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
||||
|
||||
- func: _foreach_mul.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_mul_list_kernel_slow
|
||||
CUDA: foreach_tensor_mul_list_kernel_cuda
|
||||
|
||||
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_mul_list_kernel_slow_
|
||||
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
||||
|
||||
- func: _foreach_div.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_div_list_kernel_slow
|
||||
CUDA: foreach_tensor_div_list_kernel_cuda
|
||||
|
||||
- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_div_list_kernel_slow_
|
||||
CUDA: foreach_tensor_div_list_kernel_cuda_
|
||||
|
||||
- func: _foreach_add.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_add_scalarlist_kernel_slow
|
||||
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
|
||||
|
||||
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_add_scalarlist_kernel_slow_
|
||||
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
||||
|
||||
- func: _foreach_sub.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sub_scalarlist_kernel_slow
|
||||
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
|
||||
|
||||
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sub_scalarlist_kernel_slow_
|
||||
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
||||
|
||||
- func: _foreach_div.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_div_scalarlist_kernel_slow
|
||||
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
|
||||
|
||||
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_div_scalarlist_kernel_slow_
|
||||
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
||||
|
||||
- func: _foreach_mul.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_mul_scalarlist_kernel_slow
|
||||
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
|
||||
|
||||
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_mul_scalarlist_kernel_slow_
|
||||
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
||||
|
||||
- func: _foreach_exp(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_exp_slow
|
||||
CUDA: foreach_tensor_exp_cuda
|
||||
|
||||
- func: _foreach_zero_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_zero_slow_
|
||||
CUDA: foreach_tensor_zero_cuda_
|
||||
|
||||
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_exp_slow_
|
||||
CUDA: foreach_tensor_exp_cuda_
|
||||
|
||||
- func: _foreach_sqrt(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sqrt_slow
|
||||
CUDA: foreach_tensor_sqrt_cuda
|
||||
|
||||
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sqrt_slow_
|
||||
CUDA: foreach_tensor_sqrt_cuda_
|
||||
|
||||
- func: _foreach_abs(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_abs_slow
|
||||
CUDA: foreach_tensor_abs_cuda
|
||||
|
||||
- func: _foreach_abs_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_abs_slow_
|
||||
CUDA: foreach_tensor_abs_cuda_
|
||||
|
||||
- func: _foreach_acos(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_acos_slow
|
||||
CUDA: foreach_tensor_acos_cuda
|
||||
|
||||
- func: _foreach_acos_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_acos_slow_
|
||||
CUDA: foreach_tensor_acos_cuda_
|
||||
|
||||
- func: _foreach_asin(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_asin_slow
|
||||
CUDA: foreach_tensor_asin_cuda
|
||||
|
||||
- func: _foreach_asin_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_asin_slow_
|
||||
CUDA: foreach_tensor_asin_cuda_
|
||||
|
||||
- func: _foreach_atan(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_atan_slow
|
||||
CUDA: foreach_tensor_atan_cuda
|
||||
|
||||
- func: _foreach_atan_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_atan_slow_
|
||||
CUDA: foreach_tensor_atan_cuda_
|
||||
|
||||
- func: _foreach_ceil(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_ceil_slow
|
||||
CUDA: foreach_tensor_ceil_cuda
|
||||
|
||||
- func: _foreach_ceil_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_ceil_slow_
|
||||
CUDA: foreach_tensor_ceil_cuda_
|
||||
|
||||
- func: _foreach_cos(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_cos_slow
|
||||
CUDA: foreach_tensor_cos_cuda
|
||||
|
||||
- func: _foreach_cos_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_cos_slow_
|
||||
CUDA: foreach_tensor_cos_cuda_
|
||||
|
||||
- func: _foreach_cosh(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_cosh_slow
|
||||
CUDA: foreach_tensor_cosh_cuda
|
||||
|
||||
- func: _foreach_cosh_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_cosh_slow_
|
||||
CUDA: foreach_tensor_cosh_cuda_
|
||||
|
||||
- func: _foreach_erf(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_erf_slow
|
||||
CUDA: foreach_tensor_erf_cuda
|
||||
|
||||
- func: _foreach_erf_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_erf_slow_
|
||||
CUDA: foreach_tensor_erf_cuda_
|
||||
|
||||
- func: _foreach_erfc(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_erfc_slow
|
||||
CUDA: foreach_tensor_erfc_cuda
|
||||
|
||||
- func: _foreach_erfc_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_erfc_slow_
|
||||
CUDA: foreach_tensor_erfc_cuda_
|
||||
|
||||
- func: _foreach_expm1(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_expm1_slow
|
||||
CUDA: foreach_tensor_expm1_cuda
|
||||
|
||||
- func: _foreach_expm1_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_expm1_slow_
|
||||
CUDA: foreach_tensor_expm1_cuda_
|
||||
|
||||
- func: _foreach_floor(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_floor_slow
|
||||
CUDA: foreach_tensor_floor_cuda
|
||||
|
||||
- func: _foreach_floor_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_floor_slow_
|
||||
CUDA: foreach_tensor_floor_cuda_
|
||||
|
||||
- func: _foreach_log(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log_slow
|
||||
CUDA: foreach_tensor_log_cuda
|
||||
|
||||
- func: _foreach_log_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log_slow_
|
||||
CUDA: foreach_tensor_log_cuda_
|
||||
|
||||
- func: _foreach_log10(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log10_slow
|
||||
CUDA: foreach_tensor_log10_cuda
|
||||
|
||||
- func: _foreach_log10_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log10_slow_
|
||||
CUDA: foreach_tensor_log10_cuda_
|
||||
|
||||
- func: _foreach_log1p(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log1p_slow
|
||||
CUDA: foreach_tensor_log1p_cuda
|
||||
|
||||
- func: _foreach_log1p_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log1p_slow_
|
||||
CUDA: foreach_tensor_log1p_cuda_
|
||||
|
||||
- func: _foreach_log2(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log2_slow
|
||||
CUDA: foreach_tensor_log2_cuda
|
||||
|
||||
- func: _foreach_log2_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_log2_slow_
|
||||
CUDA: foreach_tensor_log2_cuda_
|
||||
|
||||
- func: _foreach_neg(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_neg_slow
|
||||
CUDA: foreach_tensor_neg_cuda
|
||||
|
||||
- func: _foreach_neg_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_neg_slow_
|
||||
CUDA: foreach_tensor_neg_cuda_
|
||||
|
||||
- func: _foreach_tan(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_tan_slow
|
||||
CUDA: foreach_tensor_tan_cuda
|
||||
|
||||
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_tan_slow_
|
||||
CUDA: foreach_tensor_tan_cuda_
|
||||
|
||||
- func: _foreach_tanh(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_tanh_slow
|
||||
CUDA: foreach_tensor_tanh_cuda
|
||||
|
||||
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_tanh_slow_
|
||||
CUDA: foreach_tensor_tanh_cuda_
|
||||
|
||||
- func: _foreach_sin(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sin_slow
|
||||
CUDA: foreach_tensor_sin_cuda
|
||||
|
||||
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sin_slow_
|
||||
CUDA: foreach_tensor_sin_cuda_
|
||||
|
||||
- func: _foreach_sinh(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sinh_slow
|
||||
CUDA: foreach_tensor_sinh_cuda
|
||||
|
||||
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sinh_slow_
|
||||
CUDA: foreach_tensor_sinh_cuda_
|
||||
|
||||
- func: _foreach_round(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_round_slow
|
||||
CUDA: foreach_tensor_round_cuda
|
||||
|
||||
- func: _foreach_round_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_round_slow_
|
||||
CUDA: foreach_tensor_round_cuda_
|
||||
|
||||
- func: _foreach_lgamma(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_lgamma_slow
|
||||
CUDA: foreach_tensor_lgamma_cuda
|
||||
|
||||
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_lgamma_slow_
|
||||
CUDA: foreach_tensor_lgamma_cuda_
|
||||
|
||||
- func: _foreach_frac(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_frac_slow
|
||||
CUDA: foreach_tensor_frac_cuda
|
||||
|
||||
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_frac_slow_
|
||||
CUDA: foreach_tensor_frac_cuda_
|
||||
|
||||
- func: _foreach_reciprocal(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_reciprocal_slow
|
||||
CUDA: foreach_tensor_reciprocal_cuda
|
||||
|
||||
- func: _foreach_reciprocal_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_reciprocal_slow_
|
||||
CUDA: foreach_tensor_reciprocal_cuda_
|
||||
|
||||
- func: _foreach_sigmoid(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sigmoid_slow
|
||||
CUDA: foreach_tensor_sigmoid_cuda
|
||||
|
||||
- func: _foreach_sigmoid_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_sigmoid_slow_
|
||||
CUDA: foreach_tensor_sigmoid_cuda_
|
||||
|
||||
- func: _foreach_trunc(Tensor[] tensors) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_trunc_slow
|
||||
CUDA: foreach_tensor_trunc_cuda
|
||||
|
||||
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_trunc_slow_
|
||||
CUDA: foreach_tensor_trunc_cuda_
|
||||
|
||||
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcdiv_scalar_slow_
|
||||
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
||||
|
||||
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcmul_scalar_slow_
|
||||
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
||||
|
||||
- func: _foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcdiv_scalarlist_slow_
|
||||
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
||||
|
||||
- func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcmul_scalarlist_slow_
|
||||
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
||||
|
||||
- func: _foreach_addcdiv.Scalar(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcdiv_scalar_slow
|
||||
CUDA: foreach_tensor_addcdiv_scalar_cuda
|
||||
|
||||
- func: _foreach_addcmul.Scalar(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcmul_scalar_slow
|
||||
CUDA: foreach_tensor_addcmul_scalar_cuda
|
||||
|
||||
- func: _foreach_addcdiv.ScalarList(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcdiv_scalarlist_slow
|
||||
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
|
||||
|
||||
- func: _foreach_addcmul.ScalarList(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_addcmul_scalarlist_slow
|
||||
CUDA: foreach_tensor_addcmul_scalarlist_cuda
|
||||
|
||||
- func: _foreach_maximum.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_maximum_slow
|
||||
CUDA: foreach_tensor_maximum_cuda
|
||||
|
||||
- func: _foreach_minimum.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
||||
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: foreach_tensor_minimum_slow
|
||||
|
|
|
|||
|
|
@ -424,6 +424,10 @@ class NativeFunction:
|
|||
assert k not in self.dispatch, \
|
||||
f"if structured_delegate, then must not have {k} in dispatch dictionary " \
|
||||
"(it is delegated!)"
|
||||
if str(self.func.name).startswith('_foreach'):
|
||||
assert self.device_check == DeviceCheckType.NoCheck, \
|
||||
"foreach kernels fall back to slow path when tensor are on different devices, " \
|
||||
"device_check not allowed to be enabled"
|
||||
|
||||
SchemaKind = Enum('SchemaKind', ('functional', 'inplace', 'out'))
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user