mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
[BE][1/5] fix typos in aten/ (#157550)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/157550 Approved by: https://github.com/albanD ghstack dependencies: #156605, #157637
This commit is contained in:
parent
c8d43cbc6e
commit
4c8b408d16
|
|
@ -1162,7 +1162,6 @@ exclude_patterns = [
|
|||
# These files are all grandfathered in, feel free to remove from this list
|
||||
# as necessary
|
||||
# NOTE: remove the patterns in the order they are listed
|
||||
'aten/**',
|
||||
'aten/src/ATen/native/**',
|
||||
'aten/src/ATen/native/q*/**',
|
||||
'aten/src/ATen/native/[a-pA-P]*/**',
|
||||
|
|
|
|||
|
|
@ -458,7 +458,7 @@ if(LAPACK_FOUND)
|
|||
# would not need this at all), some of our libraries (magma in particular)
|
||||
# backend to CPU BLAS/LAPACK implementations, and so it is very important
|
||||
# we get the *right* implementation, because even if the symbols are the
|
||||
# same, LAPACK implementions may have different calling conventions.
|
||||
# same, LAPACK implementations may have different calling conventions.
|
||||
# This caused https://github.com/pytorch/pytorch/issues/7353
|
||||
#
|
||||
# We do NOT do this on Linux, since we just rely on torch_cpu to
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include <ATen/Tensor.h>
|
||||
#include <ATen/dlpack.h>
|
||||
|
||||
// this convertor will:
|
||||
// this converter will:
|
||||
// 1) take a Tensor object and wrap it in the DLPack tensor
|
||||
// 2) take a dlpack tensor and convert it to the ATen Tensor
|
||||
|
||||
|
|
|
|||
|
|
@ -233,8 +233,8 @@ Tensor FunctionalInverses::slice_Tensor_inverse(const Tensor& base, const Tensor
|
|||
|
||||
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
||||
Tensor FunctionalInverses::split_Tensor_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, int64_t mutated_view_idx, c10::SymInt split_size, int64_t dim) {
|
||||
// It would be nice if this logic could be re-used from autograd's split_backward(), but I don't think it can.
|
||||
// For functionalization, we have only have one of the tensors from the TensorList outputed by split(), and we want to layer i
|
||||
// It would be nice if this logic could be reused from autograd's split_backward(), but I don't think it can.
|
||||
// For functionalization, we have only have one of the tensors from the TensorList outputted by split(), and we want to layer i
|
||||
// on top of the base tensor.
|
||||
// For autograd, we have all of the tensors outputted by split() and we just want to stack them.
|
||||
dim = at::maybe_wrap_dim(dim, base.dim());
|
||||
|
|
|
|||
|
|
@ -286,11 +286,11 @@ void FunctionalTensorWrapper::storage_resize_(const c10::SymInt& new_size) {
|
|||
// storage resizing is severely limited: we only support resizing either to zero, or from zero bytes.
|
||||
TORCH_CHECK(new_size == 0 || curr_storage_size == 0, "new_size: ", new_size, ". curr_storage_size: ", curr_storage_size);
|
||||
// The "functionalization rule" for storage resizing is a giant no-op, mainly because we don't want
|
||||
// resize_() calls to actualy emit any ops in the functional graph.
|
||||
// resize_() calls to actually emit any ops in the functional graph.
|
||||
// How does it work?
|
||||
// Resizing up (old size == 0):
|
||||
// We do nothing in this case.
|
||||
// The expection is that for the user code to be valid, the next op that should run against the current tensor "x"
|
||||
// The expectation is that for the user code to be valid, the next op that should run against the current tensor "x"
|
||||
// will be a x.copy_(y) (or similar), that will fully overwrite the data of x.
|
||||
// If there are any outstanding aliases of x, we expect them not to be used until after the copy_() call
|
||||
// (otherwise the eager code would be invalid),
|
||||
|
|
@ -327,7 +327,7 @@ void FunctionalTensorWrapper::maybe_replace_storage(const Tensor& other) {
|
|||
// We're also no longer re-generate "b" fully from "a" anymore, since "a" refers to a slice of "b"'s data.
|
||||
//
|
||||
// This is probably fixable in theory, but:
|
||||
// - the fix would likey complicated the functionalization logic quite a bit.
|
||||
// - the fix would likely complicated the functionalization logic quite a bit.
|
||||
// - the primary use case for resize_() today is resizing zero-sized tensors in out= variants of operators
|
||||
// - resize_() also can give you weird results today if you try to resize_() a weirdly strided tensor.
|
||||
//
|
||||
|
|
@ -344,7 +344,7 @@ void FunctionalTensorWrapper::maybe_replace_storage(const Tensor& other) {
|
|||
set_sizes_and_strides(value_.sizes(), value_.strides());
|
||||
refresh_numel();
|
||||
// (Technically we should be guaranteed that the tensor was already contiguous,
|
||||
// since it's guaranteed not to have been a view. Doesnt hurt to run though)
|
||||
// since it's guaranteed not to have been a view. Doesn't hurt to run though)
|
||||
refresh_contiguous();
|
||||
// Swapping out the storage of a tensor (aka from a resize_() call) will update the sizes and strides of the tensor,
|
||||
// so we need to record the fact that metadata was mutated.
|
||||
|
|
@ -819,7 +819,7 @@ void setFunctionalizationReapplyViewsTLS(bool reapply_views) {
|
|||
// This function will "functionalize" it.
|
||||
// That is, it will call the operator, but removing any intermediate views/mutations
|
||||
// that are performed inside of it.
|
||||
// This is useful for LTC/XLA, which would like to re-use some of our composite kernels
|
||||
// This is useful for LTC/XLA, which would like to reuse some of our composite kernels
|
||||
// from pytorch core but not have to worry about the view ops that they might call.
|
||||
// e.g. at::block_diag
|
||||
void functionalize_op_helper(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
|
|
|
|||
|
|
@ -218,7 +218,7 @@ static Tensor safeStack(TensorList tensors) {
|
|||
// is possible for the backward function to return an undefined grad for some
|
||||
// grad_input for each example. In that case, we return an undefined grad.
|
||||
//
|
||||
// It is theoretically posssible for *some* of the examples to produce an
|
||||
// It is theoretically possible for *some* of the examples to produce an
|
||||
// undefined grad (a kernel could peek at the gradient values and return an
|
||||
// undefined tensor if it determines the gradient is full of zeros). We
|
||||
// could handle this by treating the undefined grad as a zero-filled tensor
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ struct TORCH_API VmapPhysicalView {
|
|||
// mapping a physical tensor to a new logical tensor (BatchedTensor)
|
||||
VmapPhysicalToLogicalMap getPhysicalToLogicalMap() const;
|
||||
|
||||
// Maps a logical shape to a physical shape by pre-pending the batch
|
||||
// Maps a logical shape to a physical shape by prepending the batch
|
||||
// sizes to the logical shape.
|
||||
VmapDimVector getPhysicalShape(IntArrayRef logical_shape) const;
|
||||
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ MapAllocator::MapAllocator(WithFd, std::string_view filename, int fd, int flags,
|
|||
::close(fd);
|
||||
TORCH_CHECK(false, "unable to stretch file <", filename_, "> to the right size: ", c10::utils::str_error(last_err), " (", last_err, ")");
|
||||
}
|
||||
/* on macOS write returns with errno 45 (Opperation not supported) when used
|
||||
/* on macOS write returns with errno 45 (Operation not supported) when used
|
||||
* with a file descriptor obtained via shm_open
|
||||
*/
|
||||
#ifndef __APPLE__
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ NestedTensorImpl::NestedTensorImpl(
|
|||
}
|
||||
|
||||
// assume contiguous, `nested_strides` and `offsets`
|
||||
// can be infered from `nested_sizes`
|
||||
// can be inferred from `nested_sizes`
|
||||
NestedTensorImpl::NestedTensorImpl(
|
||||
const at::Tensor& buffer,
|
||||
const at::Tensor& nested_sizes)
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl {
|
|||
at::Tensor nested_strides,
|
||||
at::Tensor storage_offsets);
|
||||
// assume contiguous, `nested_strides` and `offsets`
|
||||
// can be infered from `nested_sizes`
|
||||
// can be inferred from `nested_sizes`
|
||||
explicit NestedTensorImpl(
|
||||
const at::Tensor& buffer,
|
||||
const at::Tensor& nested_sizes);
|
||||
|
|
|
|||
|
|
@ -93,12 +93,12 @@ ident: identity for binary combination function sf. sf(ident, x) needs to return
|
|||
x.
|
||||
|
||||
f: function for reduction over a chunk. f needs to be of signature scalar_t
|
||||
f(int64_t partial_begin, int64_t partial_end, scalar_t identifiy)
|
||||
f(int64_t partial_begin, int64_t partial_end, scalar_t identify)
|
||||
|
||||
sf: function to combine two partial results. sf needs to be of signature
|
||||
scalar_t sf(scalar_t x, scalar_t y)
|
||||
|
||||
For example, you might have a tensor of 10000 entires and want to sum together
|
||||
For example, you might have a tensor of 10000 entries and want to sum together
|
||||
all the elements. Parallel_reduce with a grain_size of 2500 will then allocate
|
||||
an intermediate result tensor with 4 elements. Then it will execute the function
|
||||
"f" you provide and pass the beginning and end index of these chunks, so
|
||||
|
|
|
|||
|
|
@ -252,7 +252,7 @@ inline Tensor applySelect(
|
|||
// Note: `size >= -index` is not equivalent to `size > -1 - index` if index
|
||||
// is INT64_MIN For std::numeric_limits<int64_t>::min() result of unary
|
||||
// minus is undefined by the standard but in practice is equal to self. On
|
||||
// the other hand, indexing wraping is valid for all negative int64_t
|
||||
// the other hand, indexing wrapping is valid for all negative int64_t
|
||||
// values, as x[INT64_MIN] is the same as x[INT64_MAX]
|
||||
TORCH_CHECK_INDEX(
|
||||
size.sym_gt(-1 - index)
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ bool TensorIteratorConfig::is_tensor_const(size_t idx) {
|
|||
// same strides are increasing. If dimensions are non-increasing, we move on to the next input to break the tie.
|
||||
//
|
||||
// Instead of applying rule 4 for tie breaking, we could move on to the next tensor directly. This would result in possibly
|
||||
// losing the correct permuation of the first tensor if there are permuted trivial dimensions, but could potentially
|
||||
// losing the correct permutation of the first tensor if there are permuted trivial dimensions, but could potentially
|
||||
// improve traversal order of the second tensor. We chose the former option to better propagate channels last layout
|
||||
// for example for a tensor with the sizes N1H1
|
||||
// These rules result in the intuitive behavior that in most cases recovers permutation of either the first argument (if all
|
||||
|
|
@ -244,7 +244,7 @@ void TensorIteratorBase::reorder_dimensions() {
|
|||
// initialize perm with n-1, n-2, ..., 1, 0
|
||||
std::iota(perm_.rbegin(), perm_.rend(), 0);
|
||||
|
||||
// Reordering dimensions changes iteraton order
|
||||
// Reordering dimensions changes iteration order
|
||||
if (enforce_linear_iteration_) {
|
||||
permute_dimensions(perm_);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -388,7 +388,7 @@ struct TORCH_API TensorIteratorBase : public impl::MetaBase {
|
|||
|
||||
/// Return scalar value from original_tensor_base if it is defined. When
|
||||
/// common_dtype is Half, casting scalar input to common_dtype might overflow.
|
||||
/// If the scalar is aleady given in the type of Half, then return scalar
|
||||
/// If the scalar is already given in the type of Half, then return scalar
|
||||
/// value from tensor_base.
|
||||
template <typename T>
|
||||
T original_scalar_value(int64_t arg) {
|
||||
|
|
@ -502,7 +502,7 @@ struct TORCH_API TensorIteratorBase : public impl::MetaBase {
|
|||
/// kernels
|
||||
bool can_use_32bit_indexing() const;
|
||||
|
||||
/// An "iteratable" object that recursively splits this iterator into
|
||||
/// An "iterable" object that recursively splits this iterator into
|
||||
/// sub-iterators that can use 32-bit indexing.
|
||||
SplitUntil32Bit with_32bit_indexing() const;
|
||||
|
||||
|
|
@ -878,7 +878,7 @@ class TORCH_API TensorIteratorConfig final {
|
|||
|
||||
// Sets the enforce_linear_iteration_ flag, which is false by default.
|
||||
// If true, iteration goes in the same order as a C-contiguous tensor
|
||||
// is layed out in memory. i.e. last dimension iterates fastest.
|
||||
// is laid out in memory. i.e. last dimension iterates fastest.
|
||||
//
|
||||
// This iteration order can be less efficient and may even prevent
|
||||
// vectorization. So only use if the correctness of your kernel depends on it.
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ inline bool areAnyOptionalTensorSubclassLike(
|
|||
// NOTE: This function expects a scalar tensor of boolean dtype.
|
||||
// Eg.
|
||||
// Non-Composite Compliant Pattern : (t == 0).all().item<bool>()
|
||||
// Composite Compliant Patter : is_salar_tensor_true((t == 0).all())
|
||||
// Composite Compliant Pattern : is_salar_tensor_true((t == 0).all())
|
||||
inline bool is_scalar_tensor_true(const Tensor& t) {
|
||||
TORCH_INTERNAL_ASSERT(t.dim() == 0)
|
||||
TORCH_INTERNAL_ASSERT(t.scalar_type() == kBool)
|
||||
|
|
|
|||
|
|
@ -378,9 +378,9 @@ inline static std::optional<ResultVec> computeStride_impl(
|
|||
(TORCH_GUARD_OR_TRUE(sym_ne(oldshape[tensor_d - 1], 1)) &&
|
||||
TORCH_GUARD_OR_TRUE(sym_ne(oldstride[tensor_d - 1], tensor_numel * chunk_base_stride)))) {
|
||||
// We want to accumulate stuff in view_numel until view_numel == tensor_numel, if we do not
|
||||
// know if that is satisfied we keep accumalating. For example if view_numel = 1 and tensor_numel = u1,
|
||||
// know if that is satisfied we keep accumulating. For example if view_numel = 1 and tensor_numel = u1,
|
||||
// we want to take that path, view_numel will become u0. Next iteration if u0==u1 we want to stop.
|
||||
// Thats why we use TORCH_GUARD_OR_TRUE below.
|
||||
// That's why we use TORCH_GUARD_OR_TRUE below.
|
||||
|
||||
// we use TORCH_GUARD_OR_FALSE and not TORCH_GUARD_OR_TRUE when comparing newshape[view_d] ==1 because
|
||||
// if we know view_numel < tensor_numel is false, we want to stop. Unless we know for sure newshape[view_d]==1
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@
|
|||
// ops (ops being called by other ops). After the intermediate op call
|
||||
// finishes it's set back to the original `TracingState` object.
|
||||
//
|
||||
// The `TracingState` obect in TLS can also be read/written via its Python
|
||||
// The `TracingState` object in TLS can also be read/written via its Python
|
||||
// binding in `python_tracer.cpp`, and `get/setTracingState()` C++ APIs,
|
||||
// which are also exposed as `TORCH_API`.
|
||||
//
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ namespace at {
|
|||
m.impl("clone", torch::CppFunction::makeFallthrough());
|
||||
m.impl("dot", torch::CppFunction::makeFallthrough());
|
||||
m.impl("vdot", torch::CppFunction::makeFallthrough());
|
||||
// The functions in the list below have a specific registeration in native_functions.yaml and
|
||||
// The functions in the list below have a specific registration in native_functions.yaml and
|
||||
// do not use the fallback.
|
||||
// m.impl("mul.Tensor", torch::CppFunction::makeFallthrough());
|
||||
// m.impl("add.Tensor", torch::CppFunction::makeFallthrough());
|
||||
|
|
|
|||
|
|
@ -377,7 +377,7 @@ Keep it simple for now by assuming only one such flag is
|
|||
present in the argument list. If I ever need a function
|
||||
with more than flag I'll figure out something else.
|
||||
The policy is:
|
||||
If the user has explicity specified a dtype, respect it.
|
||||
If the user has explicitly specified a dtype, respect it.
|
||||
Otherwise, set it to the autocast type.
|
||||
********************************************************/
|
||||
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ typedef struct {
|
|||
* `byte_offset` field should be used to point to the beginning of the data.
|
||||
*
|
||||
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
|
||||
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
|
||||
* TVM, perhaps others) do not adhere to this 256 byte alignment requirement
|
||||
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
|
||||
* (after which this note will be updated); at the moment it is recommended
|
||||
* to not rely on the data pointer being correctly aligned.
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ static void load_platform_library() {
|
|||
(void)run_once;
|
||||
}
|
||||
|
||||
// NnapiCompilation functon definitions:
|
||||
// NnapiCompilation function definitions:
|
||||
|
||||
// Could possibly call load_platform_library in constructor, but error reporting
|
||||
// can be complicated if the constructor is called during model loading.
|
||||
|
|
|
|||
|
|
@ -666,7 +666,7 @@ void record_function_with_scope_and_debug_handle(
|
|||
guard, fn, debug_handle, inputs, ##__VA_ARGS__); \
|
||||
}
|
||||
|
||||
// Helper macros to record LITE INTERPETER scope events with debug handles
|
||||
// Helper macros to record LITE INTERPRETER scope events with debug handles
|
||||
#define RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS( \
|
||||
fn, debug_handle, inputs) \
|
||||
RECORD_WITH_SCOPE_DEBUG_HANDLE_AND_INPUTS( \
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
// NOTE: This condition is true for all PyTorch internal libraries, it
|
||||
// just excludes external projects such as torch_xla which
|
||||
// re-use some of the PyTorch codegen machinery.
|
||||
// reuse some of the PyTorch codegen machinery.
|
||||
#if defined(CAFFE2_BUILD_MAIN_LIB) || \
|
||||
defined(TORCH_CUDA_BUILD_MAIN_LIB) || \
|
||||
defined(TORCH_HIP_BUILD_MAIN_LIB) || \
|
||||
|
|
|
|||
|
|
@ -491,7 +491,7 @@ class TORCH_API Tensor: public TensorBase {
|
|||
"attribute won't be populated during autograd.backward(). If you indeed want the .grad "
|
||||
"field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. "
|
||||
"If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor "
|
||||
"instead. See github.com/pytorch/pytorch/pull/30531 for more informations.");
|
||||
"instead. See github.com/pytorch/pytorch/pull/30531 for more information.");
|
||||
}
|
||||
return maybe_grad;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
#ifdef C10_MOBILE
|
||||
// Need to disable mkldnn for this test since it allocated memory
|
||||
// via raw_allocate inteface which requires context pointer and raw
|
||||
// via raw_allocate interface which requires context pointer and raw
|
||||
// pointer to be the same. Tis is not true for mobile allocator.
|
||||
at::globalContext().setUserEnabledMkldnn(false);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ TEST(TestHalf, Arithmetic) {
|
|||
ASSERT_EQ(one + one, 2);
|
||||
}
|
||||
|
||||
TEST(TestHalf, Comparisions) {
|
||||
TEST(TestHalf, Comparisons) {
|
||||
Half zero = 0;
|
||||
Half one = 1;
|
||||
ASSERT_LT(zero, one);
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ using namespace at;
|
|||
TEST(TestUndefined, UndefinedTest) {
|
||||
manual_seed(123);
|
||||
|
||||
// mainly test ops on undefined tensors don't segfault and give a reasonable errror message.
|
||||
// mainly test ops on undefined tensors don't segfault and give a reasonable error message.
|
||||
Tensor und;
|
||||
Tensor ft = ones({1}, CPU(kFloat));
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ namespace {
|
|||
template <typename T>
|
||||
class Memory : public ::testing::Test {};
|
||||
template <typename T>
|
||||
class Arithmetics : public ::testing::Test {};
|
||||
class Arithmetic : public ::testing::Test {};
|
||||
template <typename T>
|
||||
class Comparison : public ::testing::Test {};
|
||||
template <typename T>
|
||||
|
|
@ -92,7 +92,7 @@ namespace {
|
|||
using ComplexTypes = ::testing::Types<vcomplex, vcomplexDbl>;
|
||||
using ReducedFloatTestedTypes = ::testing::Types<vBFloat16, vHalf>;
|
||||
TYPED_TEST_SUITE(Memory, ALLTestedTypes);
|
||||
TYPED_TEST_SUITE(Arithmetics, FloatIntTestedTypes);
|
||||
TYPED_TEST_SUITE(Arithmetic, FloatIntTestedTypes);
|
||||
TYPED_TEST_SUITE(Comparison, RealFloatIntReducedFloatTestedTypes);
|
||||
TYPED_TEST_SUITE(Bitwise, FloatIntTestedTypes);
|
||||
TYPED_TEST_SUITE(MinMax, RealFloatIntTestedTypes);
|
||||
|
|
@ -691,7 +691,7 @@ namespace {
|
|||
AssertVectorized<vec>(NAME_INFO(DeInterleave FirstHalf), std::get<0>(cc), vec::loadu(vals)).check(true);
|
||||
AssertVectorized<vec>(NAME_INFO(DeInterleave SecondHalf), std::get<1>(cc), vec::loadu(vals + vec::size())).check(true);
|
||||
}
|
||||
TYPED_TEST(Arithmetics, Plus) {
|
||||
TYPED_TEST(Arithmetic, Plus) {
|
||||
using vec = TypeParam;
|
||||
using VT = ValueType<TypeParam>;
|
||||
test_binary<vec>(
|
||||
|
|
@ -703,7 +703,7 @@ namespace {
|
|||
createDefaultBinaryTestCase<vec>(TestSeed()),
|
||||
RESOLVE_OVERLOAD(filter_add_overflow));
|
||||
}
|
||||
TYPED_TEST(Arithmetics, Minus) {
|
||||
TYPED_TEST(Arithmetic, Minus) {
|
||||
using vec = TypeParam;
|
||||
using VT = ValueType<TypeParam>;
|
||||
test_binary<vec>(
|
||||
|
|
@ -715,7 +715,7 @@ namespace {
|
|||
createDefaultBinaryTestCase<vec>(TestSeed()),
|
||||
RESOLVE_OVERLOAD(filter_sub_overflow));
|
||||
}
|
||||
TYPED_TEST(Arithmetics, Multiplication) {
|
||||
TYPED_TEST(Arithmetic, Multiplication) {
|
||||
using vec = TypeParam;
|
||||
test_binary<vec>(
|
||||
NAME_INFO(mult),
|
||||
|
|
@ -724,7 +724,7 @@ namespace {
|
|||
createDefaultBinaryTestCase<vec>(TestSeed(), false, true),
|
||||
RESOLVE_OVERLOAD(filter_mult_overflow));
|
||||
}
|
||||
TYPED_TEST(Arithmetics, Division) {
|
||||
TYPED_TEST(Arithmetic, Division) {
|
||||
using vec = TypeParam;
|
||||
TestSeed seed;
|
||||
test_binary<vec>(
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ template <typename T>
|
|||
std::enable_if_t<is_complex<T>::value, void>
|
||||
filter_div_ub(T& val1, T& val2) {
|
||||
//missing
|
||||
//at least consdier zero division
|
||||
//at least consider zero division
|
||||
auto ret = std::abs(val2);
|
||||
if (ret == 0) {
|
||||
val2 = T(1, 2);
|
||||
|
|
@ -1291,7 +1291,7 @@ std::enable_if_t<is_complex<Complex<T>>::value, Complex<T>> local_multiply(Compl
|
|||
T y_real = y.real();
|
||||
T y_imag = y.imag();
|
||||
#if defined(CPU_CAPABILITY_VSX) || defined(CPU_CAPABILITY_ZVECTOR)
|
||||
//check multiplication considerin swap and fma
|
||||
//check multiplication considering swap and fma
|
||||
T rr = x_real * y_real;
|
||||
T ii = x_imag * y_real;
|
||||
T neg_imag = -y_imag;
|
||||
|
|
@ -1362,7 +1362,7 @@ std::enable_if_t<is_complex<Complex<T>>::value, Complex<T>> local_division(Compl
|
|||
return Complex<T>(rr, ii);
|
||||
#else /* defined(CPU_CAPABILITY_ZVECTOR) */
|
||||
#if defined(CPU_CAPABILITY_VSX)
|
||||
//check multiplication considerin swap and fma
|
||||
//check multiplication considering swap and fma
|
||||
T rr = x_real * y_real;
|
||||
T ii = x_imag * y_real;
|
||||
T neg_imag = -y_imag;
|
||||
|
|
|
|||
|
|
@ -1232,7 +1232,7 @@ void test_matmul(
|
|||
}
|
||||
|
||||
TEST_F(VulkanAPITest, DISABLED_matmul_3d_weight_vulkan) {
|
||||
// This will call at::bmm. Will crash for unknow reason.
|
||||
// This will call at::bmm. Will crash for unknown reason.
|
||||
const auto m1_cpu =
|
||||
at::rand({13, 23, 45}, at::device(at::kCPU).dtype(at::kFloat));
|
||||
const auto m2_cpu =
|
||||
|
|
@ -1241,7 +1241,7 @@ TEST_F(VulkanAPITest, DISABLED_matmul_3d_weight_vulkan) {
|
|||
}
|
||||
|
||||
TEST_F(VulkanAPITest, DISABLED_matmul_3d_weight_cpu) {
|
||||
// This will call at::bmm. Will crash for unknow reason.
|
||||
// This will call at::bmm. Will crash for unknown reason.
|
||||
const auto m1_cpu =
|
||||
at::rand({13, 23, 45}, at::device(at::kCPU).dtype(at::kFloat));
|
||||
const auto m2_cpu =
|
||||
|
|
@ -2004,7 +2004,7 @@ TEST_F(VulkanAPITest, conv2d_pw_prepack_bc_medium) {
|
|||
1); // groups
|
||||
}
|
||||
|
||||
// The followin 2 tests failed on Meta's CI when all tests are executed. Output
|
||||
// The following 2 tests failed on Meta's CI when all tests are executed. Output
|
||||
// has lots of nan. Cause unknown.
|
||||
// When this test is run alone (with gtest_filter), it passes.
|
||||
// The test also passes with smaller planes, see "conv2d_pw_prepack_medium".
|
||||
|
|
@ -5664,7 +5664,7 @@ TEST_F(VulkanAPITest, var_2d_unbiased) {
|
|||
test_var({3, 5}, {1}, true, true);
|
||||
test_var({3, 5}, {1}, true, false);
|
||||
|
||||
// inpu.dim() == dim_list.size(), only keepdim == true is supported
|
||||
// input.dim() == dim_list.size(), only keepdim == true is supported
|
||||
test_var({3, 5}, {0, 1}, true, true);
|
||||
}
|
||||
|
||||
|
|
@ -5672,7 +5672,7 @@ TEST_F(VulkanAPITest, var_2d_biased) {
|
|||
test_var({3, 5}, {1}, false, true);
|
||||
test_var({3, 5}, {1}, false, false);
|
||||
|
||||
// inpu.dim() == dim_list.size(), only keepdim == true is supported
|
||||
// input.dim() == dim_list.size(), only keepdim == true is supported
|
||||
test_var({3, 5}, {0, 1}, false, true);
|
||||
}
|
||||
|
||||
|
|
@ -7142,12 +7142,12 @@ TEST_F(VulkanAPITest, clone_success) {
|
|||
}
|
||||
|
||||
TEST_F(VulkanAPITest, clone_invalidinputs_exceptions) {
|
||||
// Act: Vulkan supports Preserve and Contiguous memory foramts
|
||||
// Act: Vulkan supports Preserve and Contiguous memory formats
|
||||
EXPECT_THROW({
|
||||
clone_test({2, 3, 5, 161}, c10::MemoryFormat::ChannelsLast);
|
||||
}, ::std::exception);
|
||||
|
||||
// Act: Vulkan supports Preserve and Contiguous memory foramts
|
||||
// Act: Vulkan supports Preserve and Contiguous memory formats
|
||||
EXPECT_THROW({
|
||||
clone_test({2, 3, 5, 161}, c10::MemoryFormat::ChannelsLast3d);
|
||||
}, ::std::exception);
|
||||
|
|
|
|||
|
|
@ -2116,7 +2116,7 @@ std::tuple<double, double, int, int> produce_inputs_for_binary_op(
|
|||
input2_cpu = produce_random_tensor(input2_shape);
|
||||
|
||||
if (compute_quantization_params) {
|
||||
// compute appropiate scale and zero point for inputs
|
||||
// compute appropriate scale and zero point for inputs
|
||||
const auto in1_quant_params = compute_quant_params(input1_cpu);
|
||||
in1_scale = std::get<0>(in1_quant_params);
|
||||
in1_zero_point = std::get<1>(in1_quant_params);
|
||||
|
|
@ -2287,7 +2287,7 @@ void test_quantized_binary_op(
|
|||
apply_cpu_quantized_binary_op(op_name, input1_cpu_deq, input2_cpu_deq);
|
||||
|
||||
if (compute_quantization_params || random_quantization_params) {
|
||||
// compute appropiate scale and zero point for output
|
||||
// compute appropriate scale and zero point for output
|
||||
const auto out_quant_params = compute_quant_params(output_cpu);
|
||||
out_scale = std::get<0>(out_quant_params);
|
||||
out_zero_point = std::get<1>(out_quant_params);
|
||||
|
|
@ -2540,7 +2540,7 @@ void test_quantized_conv2d(
|
|||
bias_cpu = produce_random_tensor(bias_shape, 1.26, 5.97, 0.59);
|
||||
|
||||
if (compute_quantization_params) {
|
||||
// compute appropiate scale and zero point for input, weight and bias
|
||||
// compute appropriate scale and zero point for input, weight and bias
|
||||
const auto in_quant_params = compute_quant_params(input_cpu, in_dtype);
|
||||
in_scale = std::get<0>(in_quant_params);
|
||||
in_zero_point = std::get<1>(in_quant_params);
|
||||
|
|
@ -2624,7 +2624,7 @@ void test_quantized_conv2d(
|
|||
groups);
|
||||
|
||||
if (compute_quantization_params || random_quantization_params) {
|
||||
// compute appropiate scale and zero point for output
|
||||
// compute appropriate scale and zero point for output
|
||||
const auto out_quant_params = compute_quant_params(output_cpu, out_dtype);
|
||||
out_scale = std::get<0>(out_quant_params);
|
||||
out_zero_point = std::get<1>(out_quant_params);
|
||||
|
|
@ -3524,7 +3524,7 @@ TEST_F(VulkanAPITest, linear_4d_large) {
|
|||
test_quantized_linear({9, 13, 11, 17}, {23, 17}, {23});
|
||||
}
|
||||
|
||||
// The following code is not directly releated to quantization. We put it here
|
||||
// The following code is not directly related to quantization. We put it here
|
||||
// since we are not able to run this test on GH's CI: for some unknown reason,
|
||||
// we are not able to reference symbols in the vulkan directory, hence the build
|
||||
// on GH fails. Moving the test here so we are still able to run it on
|
||||
|
|
@ -3566,7 +3566,7 @@ TEST_F(VulkanAPITest, extract_texel_test) {
|
|||
// is the channel count.
|
||||
// We always start a new batch on a new z. Hence, when c cannot be divided by
|
||||
// 4, there are some undefined values in the padding area. We use -1 to
|
||||
// indicate that we are not performing comparsion on those values.
|
||||
// indicate that we are not performing comparison on those values.
|
||||
std::tuple<ivec3, ivec4> test_cases[]{
|
||||
{{0, 0, 0}, {0, hw, 2 * hw, 3 * hw}},
|
||||
{{1, 0, 0}, {1, hw + 1, 2 * hw + 1, 3 * hw + 1}},
|
||||
|
|
@ -3672,7 +3672,7 @@ TEST_F(VulkanAPITest, channel_to_width_packing_test) {
|
|||
at::Tensor output = at::native::vulkan::ops::convert(v_output);
|
||||
|
||||
// This tensor will be width-packed. Meaning that each texel represent
|
||||
// consecutive elements along the width dimension. The differece between
|
||||
// consecutive elements along the width dimension. The difference between
|
||||
// consecutive texels is 1.
|
||||
std::tuple<ivec3, ivec4> test_cases[]{
|
||||
{{0, 0, 0}, {0, 1, 2, 3}},
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ namespace at::xpu {
|
|||
* must match the same device.
|
||||
*
|
||||
* Currently, XPUEvent does NOT support to export an inter-process event from
|
||||
* another process via inter-process comunication(IPC). So it means that
|
||||
* another process via inter-process communication(IPC). So it means that
|
||||
* inter-process communication for event handles between different processes is
|
||||
* not available. This could impact some applications that rely on cross-process
|
||||
* synchronization and communication.
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ multiple variants of the library, summarized here:
|
|||
* THC = TorcH Cuda
|
||||
* THCS = TorcH Cuda Sparse (now defunct)
|
||||
* THNN = TorcH Neural Network (now defunct)
|
||||
* THS = TorcH Sparse (now defunct)
|
||||
* THS = TorcH Sparse (now defunct) <!-- codespell:ignore -->
|
||||
|
||||
(You'll also see these abbreviations show up in symbol names.)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user