[MPS] Move ops modifiers to testing utils so other tests can reuse (#151781)

Test collection check:
```
python -m pytest test/test_mps.py --collect-only
```
Before:
```
6390 tests collected in 8.34s
```

After:
```
6390 tests collected in 7.71s
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/151781
Approved by: https://github.com/malfet
This commit is contained in:
Li-Huai (Allan) Lin 2025-04-22 11:11:01 -07:00 committed by PyTorch MergeBot
parent 982062dfc4
commit fbd29527d8
2 changed files with 1078 additions and 876 deletions

View File

@ -24,6 +24,7 @@ from torch.testing._internal import opinfo
from torch.testing._internal.common_utils import \
(gradcheck, gradgradcheck, parametrize, run_tests, TestCase, download_file, MACOS_VERSION, IS_CI,
NoTest, skipIfSlowGradcheckEnv, suppress_warnings, serialTest, instantiate_parametrized_tests)
from torch.testing._internal.common_mps import mps_ops_modifier, mps_ops_grad_modifier, mps_ops_error_inputs_modifier
from torch.testing import make_tensor
from torch.testing._internal.common_dtype import get_all_dtypes, integral_types
import torch.backends.mps
@ -32,7 +33,6 @@ from functools import partial
from torch.testing._internal.common_methods_invocations import (
op_db,
DecorateInfo,
UnaryUfuncInfo,
ReductionOpInfo,
SpectralFuncInfo,
@ -79,880 +79,6 @@ def xfailIf(condition):
return func
return wrapper
def mps_ops_grad_modifier(ops):
XFAILLIST_GRAD = {
# precision issues
'special.polygammaspecial_polygamma_n_0': [torch.float16],
'polygammapolygamma_n_0': [torch.float16],
# Unimplemented ops
'__getitem__': [torch.float16],
'_segment_reduce': [torch.float16, torch.float32],
'_chunk_cat': [torch.float16, torch.float32],
'_upsample_bilinear2d_aa': None, # `_upsample_bilinear2d_aa_backward_out` not implemented for MPS
'_upsample_bicubic2d_aa': None, # `_upsample_bilinear2d_aa_backward_out` not implemented for MPS
'sparse.mmreduce': [torch.float32], # csr not supported
'unique_consecutive': [torch.float16, torch.float32],
'scalar_tensor': [torch.float16, torch.float32],
'cdist': [torch.float32],
'masked.scatter': [torch.float16, torch.float32],
'index_fill': [torch.float16, torch.float32], # missing `aten::_unique`.
'linalg.solve': [torch.float16, torch.float32], # missing `aten::lu_solve`.
'linalg.solve_ex': [torch.float16, torch.float32], # missing `aten::lu_solve`.
'linalg.tensorsolve': [torch.float16, torch.float32], # missing `aten::lu_solve`.
'linalg.det': [torch.float16, torch.float32], # missing aten::lu_solve.out
'linalg.slogdet': [torch.float16, torch.float32], # missing aten::lu_solve.out
'logdet': [torch.float16, torch.float32], # missing aten::lu_solve.out
'aminmax': [torch.float32, torch.float16],
'special.i1': [torch.float16], # "i1_backward" not implemented for 'Half'
'special.i1e': [torch.float16], # "i1e_backward" not implemented for 'Half'
# Correctness issues
'atanh': [torch.float32],
# Random output
'exponential': [torch.float16, torch.float32],
# CPU errors
# derivative for zeta is not implemented
'special.zeta': None,
# derivative for aten::nextafter is not implemented on CPU
'nextafter': None,
# derivative for aten::floor_divide is not implemented on CPU
'floor_divide': [torch.float16, torch.float32],
# derivative for aten::narrow_copy is not implemented on CPU
'narrow_copy': [torch.float16, torch.float32],
# derivative for aten::_histogramdd_from_bin_cts is not implemented on CPU
'histogramdd': [torch.float16, torch.float32],
# derivative for aten::histogram is not implemented
'histogram': [torch.float16, torch.float32],
# 'bool' object is not iterable
'allclose': [torch.float16, torch.float32],
'equal': [torch.float16, torch.float32],
# 'float' object is not iterable
'item': [torch.float16, torch.float32],
# "mse_backward_cpu_out" not implemented for 'Half'
'nn.functional.mse_loss': [torch.float16],
# "smooth_l1_backward_cpu_out" not implemented for 'Half'
'nn.functional.smooth_l1_loss': [torch.float16],
# cpu error: grad requires non-empty inputs
'randn': [torch.float16, torch.float32],
'signal.windows.bartlett': [torch.float32],
'signal.windows.blackman': [torch.float32],
'signal.windows.cosine': [torch.float32],
'signal.windows.exponential': [torch.float32],
'signal.windows.gaussian': [torch.float32],
'signal.windows.general_cosine': [torch.float32],
'signal.windows.general_hamming': [torch.float32],
'signal.windows.hamming': [torch.float32],
'signal.windows.hann': [torch.float32],
'signal.windows.kaiser': [torch.float32],
'signal.windows.nuttall': [torch.float32],
'eye': [torch.float16, torch.float32],
# trunc_tensor not working properly for float16
'divtrunc_rounding': [torch.float16],
'fmod': [torch.float16],
# round not working properly for float16
'round': [torch.float16],
# atomic operation in backward pass
'_unsafe_masked_index': [torch.float16],
'_unsafe_masked_index_put_accumulate': [torch.float16],
}
MACOS_BEFORE_13_3_XFAILLIST_GRAD = {
# Failures due to precision issues (due to fast-math). These has been fixed in MacOS 13.3+
'masked.softmin': [torch.float32, torch.float16],
'masked.softmax': [torch.float32, torch.float16],
'masked.log_softmax': [torch.float32, torch.float16],
# Unsupported Border padding mode, forward pass success as fallback to cpu
'grid_sampler_2d': [torch.float32, torch.float16, torch.bfloat16],
# Same issue as `argsort` and `sort` with duplicate elements (undefined behaviour).
# Forward pass is passing since `msort` doesn't return the indices, just the values, which match the CPU.
# On the backward pass for `sort` both are used (values and indices), thus resulting in a issmatch between CPU and MPS.
# Running `msort` with stable `sort` passes.
'msort': [torch.float16],
}
SKIPLIST_GRAD = {
'nn.functional.pairwise_distance': [torch.float16],
# failed assertion `destination datatype must be fp32'
'nn.functional.conv1d': [torch.float16],
'nn.functional.conv2d': [torch.float16],
'nn.functional.conv3d': [torch.float16],
'nn.functional.conv_transpose1d': [torch.float16],
'nn.functional.conv_transpose2d': [torch.float16],
'nn.functional.conv_transpose3d': [torch.float16],
}
MACOS_13_3_XFAILLIST_GRAD = {
# Same issue as `argsort` and `sort` with duplicate elements (undefined behaviour).
# Forward pass is passing since `msort` doesn't return the indices, just the values, which match the CPU.
# On the backward pass for `sort` both are used (values and indices), thus resulting in a issmatch between CPU and MPS.
# Running `msort` with stable `sort` passes.
'msort': [torch.float16],
}
ON_MPS_XFAILLIST = {
# Failures due to lack of implementation of downstream functions on MPS backend
# TODO: remove these once downstream function 'aten::_linalg_svd.U' have been implemented
'linalg.matrix_rank': None,
# Exception: Caused by sample input at index 3 on MPS
'nn.functional.conv3d': [torch.float32],
}
def addDecorator(op, d) -> None:
op.decorators = list(op.decorators) if op.decorators is not None else []
op.decorators.append(d)
for op in ops:
key = op.name + op.variant_test_name
if key in XFAILLIST_GRAD:
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=XFAILLIST_GRAD[key]))
if key in SKIPLIST_GRAD:
addDecorator(op, DecorateInfo(
unittest.skip,
dtypes=SKIPLIST_GRAD[key]))
if key in ON_MPS_XFAILLIST:
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=ON_MPS_XFAILLIST[key]))
if key in MACOS_BEFORE_13_3_XFAILLIST_GRAD and (torch.backends.mps.is_macos13_or_newer() and MACOS_VERSION < 13.3):
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=MACOS_BEFORE_13_3_XFAILLIST_GRAD[key]))
if key in MACOS_13_3_XFAILLIST_GRAD and (MACOS_VERSION >= 13.3):
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=MACOS_13_3_XFAILLIST_GRAD[key]))
yield op
def mps_ops_modifier(ops):
# Supported complex OPS
SUPPORTED_COMPLEX_OPS = {
'__radd__',
'__rmul__',
'__getitem__',
'_unsafe_masked_index',
'abs',
'add',
'alias_copy',
'argwhere',
'atleast_1d',
'atleast_2d',
'atleast_3d',
'as_strided',
'as_strided_copy',
'as_strided_scatter',
'broadcast_tensors',
'broadcast_to',
'chalf',
'cfloat',
'chunk',
'clone',
'conj',
'conj_physical',
'contiguous',
'diag',
'diag_embed',
'diagflat',
'diagonal',
'diagonal_copy',
'diagonal_scatter',
'dsplit',
'empty',
'empty_permuted',
'empty_strided',
'exp',
'expand',
'expand_as',
'expand_copy',
'flatten',
'fill',
'full',
'H',
'hsplit',
'imag',
'index_select',
'isfinite',
'isinf',
'isreal',
'item',
'kron',
'linalg.diagonal',
'linalg.svd',
'mH',
'mT',
'masked_fill',
'masked_scatter',
'masked_select',
'meshgridlist_of_tensors',
'meshgridvariadic_tensors',
'movedim',
'mul',
'narrow',
'narrow_copy',
'new_full',
'new_ones',
'new_zeros',
'nn.functional.conv1d',
'nn.functional.conv2d',
'nn.functional.conv_transpose1d',
'nn.functional.conv_transpose2d',
'nn.functional.feature_alpha_dropoutwithout_train',
'nn.functional.padcircular',
'nn.functional.tanhshrink',
'nn.functional.unfold',
'nonzero',
'ones',
'outer',
'permute',
'permute_copy',
'positive',
'randn',
'ravel',
'real',
'repeat_interleave',
'reshape_as',
'reshape',
'resolve_conj',
'resolve_neg',
'scalar_tensor',
'select',
'sgn',
'sinc',
'slice',
'special.spherical_bessel_j0',
'special.entr',
'special.xlog1py',
'special.zeta',
'split',
'split_with_sizes',
'split_with_sizes_copy',
'splitlist_args',
'sqrt',
'squeeze',
'squeeze_copy',
'squeezemultiple',
'sub',
'svd',
't',
't_copy',
'tanh',
'tensor_split',
'transpose',
'transpose_copy',
'T',
'unbind',
'unbind_copy',
'unflatten',
'unfold',
'unfold_copy',
'unsafe_chunk',
'unsafe_split',
'unsqueeze',
'unsqueeze_copy',
'view_as',
'view_as_real',
'view',
'view_copy',
'vsplit',
'zero_',
'zeros',
}
AFTER_MACOS_14_0_SUPPORTED_COMPLEX_OPS = {
'__rdiv__',
'__rmatmul__',
'_chunk_cat',
'acos',
'acosh',
'all',
'allclose',
'angle',
'any',
'addcdiv',
'addcmul',
'addmmdecomposed',
'addmv',
'asin',
'atan',
'atanh',
'bfloat16',
'bmm',
'bool',
'cartesian_prod',
'cat',
'char',
'column_stack',
'combinations',
'corrcoef',
'constant_pad_nd',
'cos',
'cosh',
'cov',
'count_nonzero',
'diff',
'div',
'divno_rounding_mode',
'dot',
'dstack',
'einsum',
'eq',
'equal',
'exp2',
'expm1',
'eye',
'fft.fft',
'fft.fft2',
'fft.fftn',
'fft.fftshift',
'fft.ifft',
'fft.ifft2',
'fft.ifftn',
'fft.ifftshift',
'fft.irfftn',
'fft.irfft2',
'fft.irfft',
'fft.hfftn',
'fft.hfft2',
'fft.hfft',
'flip',
'fliplr',
'flipud',
'float',
'gradient',
'half',
'hstack',
'inner',
'int',
'isclose',
'isnan',
'ldexp',
'linalg.multi_dot',
'linalg.pinv',
'linspace',
'linspacetensor_overload',
'log10',
'log1p',
'log2',
'log',
'logical_and',
'logical_not',
'logical_or',
'logical_xor',
'logsumexp',
'long',
'masked.mean',
'masked.prod',
'masked.std',
'masked.sum',
'masked.var',
'masked.logsumexp',
'matmul',
'mean',
'mm',
'mv',
'ne',
'neg',
'nn.functional.padconstant',
'nn.functional.padreflect',
'nn.functional.padreplicate',
'nn.functional.pixel_shuffle',
'nn.functional.pixel_unshuffle',
'nn.functional.rms_norm',
'nn.functional.softsign',
'pinverse',
'prod',
'reciprocal',
'roll',
'rot90',
'rsqrt',
'short',
'sigmoid',
'sin',
'sinh',
'sqrt',
'square',
'stack',
'stft',
'sum',
'sum_to_size',
'tan',
'tensordot',
'trace',
'trapz',
'trapezoid',
'tril',
'triu',
'true_divide',
'vstack',
'where',
'byte',
}
# Those ops worked on MacOS12, but broken on MacOS13, see https://github.com/pytorch/pytorch/issues/85758
MACOS_BEFORE_13_3_XFAILLIST = {
# Failures due to precision issues (due to fast-math). These has been fixed in MacOS 13.3+
'tan': [torch.float32],
'cdist': [torch.float32],
# CPU Error: cpu not giving nan for x/0.0
'atan2': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
# test blow pass on macOS 12 as it falls back to cpu
# Argsort case using duplicate indices (undefined behaviour):
# - CPU output: tensor([2546, 6917, 3181, ..., 7128, 5133, 30], devuce='cpu')
# - MPS output: tensor([2546, 6917, 3181, ..., 7128, 30, 5133], device='mps:0')
# Elements from index 30 and 5133 are both equal.
# Since CPU is not using argsort with stable=True, these cases result in undefined behaviour.
'argsort': [torch.float16, torch.int8, torch.uint8, torch.bool],
# Same issue as `argsort` with duplicate indices. This test checks both the sorted values and the indices.
# The values of the sorted tensor match the CPU, but in case of the returned indices this results in undefined behaviour.
'sort': [torch.int8, torch.uint8, torch.bool, torch.float16],
# Unsupported dtypes
'cumsum': [torch.int64],
'cumprod': [torch.int64],
'cumulative_trapezoid': [torch.int64],
'masked.cumsum': [torch.int64],
'masked.cumprod': [torch.int64],
'linalg.vander': [torch.int64],
}
MACOS_AFTER_13_1_XFAILLIST = {
# before macOS 13.2 it falls back to cpu and pass the forward pass
'grid_sampler_2d': [torch.float32, torch.float16, torch.bfloat16], # Unsupported Border padding mode
}
MACOS_13_3_XFAILLIST = {
# Failure due to precision issue for fp16
# on both cpu and mps there are test cases that might produce inf result
# 'nn.functional.pairwise_distance': [torch.float16],
# test blow pass on macOS 12 as it falls back to cpu
# Argsort case using duplicate indices (undefined behaviour):
# - CPU output: tensor([2546, 6917, 3181, ..., 7128, 5133, 30], devuce='cpu')
# - MPS output: tensor([2546, 6917, 3181, ..., 7128, 30, 5133], device='mps:0')
# Elements from index 30 and 5133 are both equal.
# Since CPU is not using argsort with stable=True, these cases result in undefined behaviour.
'argsort': [torch.float16, torch.int8, torch.uint8, torch.bool, torch.bfloat16],
# Same issue as `argsort` with duplicate indices. This test checks both the sorted values and the indices.
# The values of the sorted tensor match the CPU, but in case of the returned indices this results in undefined behaviour.
'sort': [torch.int8, torch.uint8, torch.bool, torch.float16, torch.bfloat16],
}
MACOS_BEFORE_14_4_XFAILLIST = {
# These ops work fine in 14.4 but fail in 14.2 or 13.x
'fft.hfft2': [torch.complex64],
}
# Those ops are not expected to work
UNIMPLEMENTED_XFAILLIST = {
# Failures due to lack of op implementation on MPS backend
'login': None,
'logspace': None,
'logspacetensor_overload': None,
'linalg.eig': None,
'linalg.eigvals': None,
'put': None,
'nn.functional.conv_transpose3d': None,
'__rsub__': None,
'cauchy_': None,
'cauchy': None,
'cholesky_inverse': None,
'cholesky_solve': None,
'cummax': None,
'cummin': None,
'erfc': None,
'frexp': None,
'gcd': None,
'geqrf': None,
'nn.functional.grid_sample': None, # Unsupported Border padding mode
'heaviside': None,
'igamma': None,
'igammac': None,
'index_copy': None,
'index_reduceprod': None,
'index_reducemean': None,
'index_reduceamax': None,
'index_reduceamin': None,
'kthvalue': None,
'lcm': None,
'linalg.cond': None,
'linalg.eigh': None,
'linalg.eigvalsh': None,
'linalg.householder_product': None,
'linalg.ldl_factor': None,
'linalg.ldl_factor_ex': None,
'linalg.ldl_solve': None,
'linalg.lstsq': None,
'linalg.lstsqgrad_oriented': None,
'linalg.lu': None,
'linalg.lu_solve': None,
'linalg.matrix_norm': [torch.float32],
'linalg.norm': [torch.float32],
'linalg.normsubgradients_at_zero': [torch.float32],
'linalg.qr': None,
'linalg.svdvals': None,
'linalg.vecdot': None,
'logcumsumexp': None,
'lu_solve': None,
'masked.median': None,
'matrix_exp': None,
'mode': None,
'native_dropout_backward': None,
'normnuc': None,
'nn.functional.fractional_max_pool2d': None,
'nn.functional.fractional_max_pool3d': None,
'nn.functional.adaptive_avg_pool3d': None,
'nn.functional.adaptive_max_pool3d': None,
'nn.functional.interpolatearea': None,
'nn.functional.interpolatebicubic': [torch.uint8],
'nn.functional.interpolatetrilinear': None,
'nn.functional.max_unpool1dgrad': None,
'nn.functional.max_unpool2dgrad': None,
'nn.functional.max_unpool3dgrad': None,
'nn.functional.avg_pool3d': None,
'nn.functional.ctc_loss': None,
'nn.functional.embedding_bag': None,
'nn.functional.hardshrink': None,
'nn.functional.max_pool3d': None,
'nn.functional.max_unpool1d': None,
'nn.functional.max_unpool2d': None,
'nn.functional.max_unpool3d': None,
'nn.functional.multi_margin_loss': None,
'nn.functional.multilabel_margin_loss': None,
'nn.functional.pdist': None,
'nn.functional.rrelu': None,
'nn.functional.norm': None,
'ormqr': None,
'pca_lowrank': None,
'qr': None,
'rsub': None,
'scatter_reduceamax': [torch.int32, torch.int64] if MACOS_VERSION < 15.0 else [torch.int64],
'scatter_reduceamin': [torch.int32, torch.int64] if MACOS_VERSION < 15.0 else [torch.int64],
'segment_reduce': None,
'_segment.reduce': None,
'segment.reduce': None,
'segment_reduce_offsets': None,
'_segment_reduce_offsets': None,
'_segment_reduce_lengths': None,
'_segment_reducelengths': None,
'_segment_reduceoffsets': None,
'sparse.mm': None,
'sparse.mmreduce': None,
'special.airy_ai': None,
'special.erfcx': None,
'special.laguerre_polynomial_l': None,
'special.log_ndtr': None,
'special.ndtri': None,
'svd_lowrank': None,
'symeig': None,
'take': None,
'to': None,
'to_sparse': None,
'unique': None,
'vdot': None,
'segment_reduce_': None,
'_upsample_bilinear2d_aa': [torch.uint8], # uint8 is for CPU only
'_upsample_bicubic2d_aa': [torch.uint8], # uint8 is for CPU only
'geometric' : None,
'geometric_': None,
'log_normal_': None,
'log_normal': None,
'cdouble': None,
'double': None,
'nn.functional.softminwith_dtype': None,
'log_softmaxwith_dtype': None,
'softmaxwith_dtype': None,
'float_power': None,
'full_like': None,
'linalg.matrix_rankhermitian': None,
'linalg.pinvhermitian': None,
'nonzero_static': None,
# MPS: input sizes must be divisible by output sizes
'nn.functional.adaptive_avg_pool1d': None,
'nn.functional.adaptive_avg_pool2d': None,
# Unsupported dtypes
'ones_like': None,
'zeros_like': None,
# Convolution for integral types is not supported on MPS
'nn.functional.conv1d': [torch.int64],
'nn.functional.conv2d': [torch.int64],
'nn.functional.conv3d': [torch.int64],
'nn.functional.conv_transpose1d': [torch.int64],
'nn.functional.conv_transpose2d': [torch.int64, torch.bfloat16],
# Unsupported dtypes
'dot': [torch.int64] if MACOS_VERSION < 14.0 else [],
'histc': [torch.float16, torch.bfloat16],
'index_add': [torch.int64],
# Operations not supported for integral types
'special.xlog1py': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'special.zeta': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'special.chebyshev_polynomial_t': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'special.chebyshev_polynomial_u': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'special.hermite_polynomial_h': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'special.hermite_polynomial_he': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
# entr does not support boolean types
'special.entr': [torch.bool],
# GEMM on MPS is not supported for integral types
'nn.functional.linear': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'addmmdecomposed': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'addbmm': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'addmm': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'baddbmm': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'mat': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
'matmul': [torch.int64] if MACOS_VERSION < 14.0 else [],
'__rmatmul__': [torch.int64] if MACOS_VERSION < 14.0 else [],
'unravel_index': [torch.int32, torch.int64],
# returned output on CPU is float64
'bincount': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8],
# trunc_tensor not working properly for float16 and bfloat16
'divtrunc_rounding': [torch.float16, torch.bfloat16],
'fmod': [torch.float16],
# round not working properly for float16 and bfloat16
'round': [torch.float16, torch.bfloat16],
'rounddecimals_0': [torch.bfloat16],
# bfloat16 have weird issues with rounding
'divfloor_rounding': [torch.bfloat16],
'floor_divide': [torch.bfloat16],
'remainder': [torch.bfloat16],
# atomic operations not supported
'_unsafe_masked_index_put_accumulate': [torch.bool, torch.int8, torch.uint8, torch.float16, torch.int16, torch.int64,
torch.bfloat16],
}
if MACOS_VERSION < 14.0:
# FFT and BFloat16 support was added in MacOS 14
UNIMPLEMENTED_XFAILLIST.update({
'bfloat16': None,
'fft.fft': None,
'fft.fft2': None,
'fft.fftn': None,
'fft.hfft': None,
'fft.hfft2': None,
'fft.hfftn': None,
'fft.ifft': None,
'fft.ifft2': None,
'fft.ifftn': None,
'fft.ihfft': None,
'fft.ihfft2': None,
'fft.ihfftn': None,
'fft.irfft': None,
'fft.irfft2': None,
'fft.irfftn': None,
'fft.rfft': None,
'fft.rfft2': None,
'fft.rfftn': None,
'stft': None,
# Error in TestConsistencyCPU.test_output_match_isin_cpu fails for integers,
# not reproducible in later OS. Added assert to op if used in < 14.0
'isin': [torch.int64, torch.int32, torch.int16, torch.uint8, torch.int8],
'nn.functional.max_pool2d': [torch.uint8],
})
if MACOS_VERSION < 15.0:
UNIMPLEMENTED_XFAILLIST.update({
'quantile': None,
'nanquantile': None,
})
UNDEFINED_XFAILLIST = {
# Top 60 operators
# topk fails with duplicate indices
'topk': [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8, torch.bfloat16],
# Failures due to random output that they generate using
# Philox engine causing mismatch with CPU results
'multinomial': [torch.float16, torch.float32, torch.bfloat16], # random results
'uniform': [torch.float16, torch.float32, torch.bfloat16],
'rand_like': [torch.float16, torch.float32, torch.bfloat16],
'randint': None,
'randint_like': None,
'randn': None,
'randn_like': None,
'bernoulli': [torch.float16, torch.float32, torch.bfloat16],
'exponential': [torch.float16, torch.float32, torch.bfloat16],
'nn.functional.feature_alpha_dropoutwith_train': [torch.float16, torch.float32, torch.bfloat16],
'normal': [torch.float16, torch.float32, torch.bfloat16],
'normalin_place': [torch.float16, torch.float32, torch.bfloat16],
'normalnumber_mean': [torch.float16, torch.float32, torch.bfloat16],
'nn.functional.alpha_dropout': [torch.float16, torch.float32, torch.bfloat16],
'nn.functional.dropout': [torch.float16, torch.float32, torch.bfloat16],
'nn.functional.dropout2d': [torch.float16, torch.float32, torch.bfloat16],
'nn.functional.dropout3d': [torch.float16, torch.float32, torch.bfloat16],
# See https://github.com/pytorch/pytorch/issues/111479
'nn.functional.multi_head_attention_forward': [torch.float32, torch.float16, torch.bfloat16],
'index_put': [torch.bool, torch.uint8, torch.int8, torch.int16, torch.int64, torch.float16, torch.bfloat16],
# zero to negative integer powers are undefined
'__rpow__': [torch.int8, torch.int16, torch.int32, torch.int64],
'resize_': [torch.float16, torch.float32, torch.bfloat16],
'resize_as_': [torch.float16, torch.float32, torch.bfloat16],
# CPU Errors:
'addr': [torch.bool, torch.int16, torch.int32,
torch.int64, torch.uint8, torch.int8], # "addmv_impl_cpu" not implemented for 'Half'
'as_stridedpartial_views': None, # cpu result off, showing random values
# random results
# mps vs cpu:
# Mismatched elements: 40 / 96 (41.7%)
# Greatest absolute difference: 17.892311096191406 at index (1, 0, 2) (up to 1e-05 allowed)
# Greatest relative difference: inf at index (1, 0, 0) (up to 1.3e-06 allowed)
# cuda(2.0.0.dev20230301+cu117) vs cpu:
# Mismatched elements: 56 / 96 (58.3%)
# Greatest absolute difference: 17.892311096191406 at index (1, 0, 2) (up to 1e-05 allowed)
# Greatest relative difference: inf at index (1, 0, 0) (up to 1.3e-06 allowed)
'nn.functional.scaled_dot_product_attention': [torch.float32, torch.float16, torch.bfloat16],
}
ON_MPS_XFAILLIST = {
# Failures due to lack of implementation of downstream functions on MPS backend
# TODO: remove these once downstream function 'aten::_linalg_svd.U' have been implemented
'linalg.matrix_rank': None,
# Exception: Caused by `torch.arange(-8.001, -4.0, dtype=torch.uint8, device="mps")`
'arange': [torch.uint8],
}
EMPTY_OPS_SKIPLIST = {
# Fill tensors with uninitialized data, causing mismatch with CPU.
# They occasionally match, thus skipping them.
# See https://github.com/pytorch/pytorch/issues/100175
'new_empty': None,
'new_empty_strided': None,
'empty_strided': None,
# CPU: empty is returning all 0's and there is a mismatch with MPS
# allocation (MacOS 13). According to
# https://pytorch.org/docs/2.0/generated/torch.empty.html
'empty': None,
'empty_like': None,
'empty_permuted': None,
}
SKIPLIST = {
# Unsupported
# This doesn't work on M1, but is partially working on M2 with the exception of torch.float16
'nn.functional.conv3d': None,
}
def addDecorator(op, d) -> None:
op.decorators = list(op.decorators) if op.decorators is not None else []
op.decorators.append(d)
for op in ops:
key = op.name + op.variant_test_name
if key in EMPTY_OPS_SKIPLIST:
addDecorator(op, DecorateInfo(
unittest.skip("Skipping empty ops."),
dtypes=EMPTY_OPS_SKIPLIST[key]))
if key in SKIPLIST:
addDecorator(op, DecorateInfo(unittest.skip("Skipped!"), dtypes=SKIPLIST[key]))
for xfaillist in [UNIMPLEMENTED_XFAILLIST, UNDEFINED_XFAILLIST, ON_MPS_XFAILLIST]:
if key in xfaillist:
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=xfaillist[key]))
if key in MACOS_BEFORE_14_4_XFAILLIST and (MACOS_VERSION < 14.4):
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=MACOS_BEFORE_14_4_XFAILLIST[key]))
if key in MACOS_BEFORE_13_3_XFAILLIST and (torch.backends.mps.is_macos13_or_newer() and MACOS_VERSION < 13.3):
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=MACOS_BEFORE_13_3_XFAILLIST[key]))
if key in MACOS_AFTER_13_1_XFAILLIST and torch.backends.mps.is_macos13_or_newer(2):
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=MACOS_AFTER_13_1_XFAILLIST[key]))
if key in MACOS_13_3_XFAILLIST and (MACOS_VERSION >= 13.3):
addDecorator(op, DecorateInfo(
unittest.expectedFailure,
dtypes=MACOS_13_3_XFAILLIST[key]))
# If ops is not supported for complex types, expect it to fail
if key not in SUPPORTED_COMPLEX_OPS and (key not in AFTER_MACOS_14_0_SUPPORTED_COMPLEX_OPS or MACOS_VERSION < 14.0):
addDecorator(op, DecorateInfo(unittest.expectedFailure, dtypes=[torch.complex32, torch.complex64]))
yield op
def mps_ops_error_inputs_modifier(ops):
# Error input samples do not take a dtype argument.
XFAILLIST = {
# Exceptions are not raised
'__rmod__',
'__rsub__',
'__rpow__',
'bernoulli',
'clamp_max',
'clamp_min',
'masked_scatter',
# unsupported float64 dtype
'cat',
'complex',
'multinomial',
'nn.functional.conv1d',
'nn.functional.conv2d',
'nn.functional.conv3d',
'gather',
'scatter',
'scatter_add',
# MPS does not support tensor dimensions > 16
'amax',
'amin',
'aminmax',
# memory overlapping checks
'index_select',
# unimplemented
'logcumsumexp',
}
def addDecorator(op, d) -> None:
op.decorators = list(op.decorators) if op.decorators is not None else []
op.decorators.append(d)
for op in ops:
if op.error_inputs_func is None:
continue
key = op.name + op.variant_test_name
if key in XFAILLIST:
addDecorator(op, DecorateInfo(unittest.expectedFailure))
yield op
# Same logic as test_cuda.py
if not torch.backends.mps.is_available():
@ -12863,7 +11989,12 @@ class TestConsistency(TestCaseMPS):
class TestErrorInputs(TestCase):
_ignore_not_implemented_error = True
@ops(mps_ops_error_inputs_modifier(test_error_inputs_op_db), dtypes=OpDTypes.none)
@ops(
mps_ops_error_inputs_modifier(
[op for op in test_error_inputs_op_db if op.error_inputs_func is not None]
),
dtypes=OpDTypes.none
)
def test_error_inputs(self, device, op):
self.assertEqual(device, "mps:0")

File diff suppressed because it is too large Load Diff