mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[2/N][Fix] Fix typo in test folder (#166374)
Fix typo in test folder. _typos.toml ```bash [default.extend-words] nd = "nd" arange = "arange" Nd = "Nd" GLOBALs = "GLOBALs" hte = "hte" iy = "iy" PN = "PN" Dout = "Dout" optin = "optin" gam = "gam" PTD = "PTD" ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/166374 Approved by: https://github.com/cyyever, https://github.com/ezyang
This commit is contained in:
parent
1764f3a9c8
commit
695cb0d342
|
|
@ -238,7 +238,7 @@ def pytest_pycollect_makemodule(module_path, path, parent) -> Module:
|
|||
|
||||
@pytest.hookimpl(hookwrapper=True)
|
||||
def pytest_report_teststatus(report, config):
|
||||
# Add the test time to the verbose output, unforunately I don't think this
|
||||
# Add the test time to the verbose output, unfortunately I don't think this
|
||||
# includes setup or teardown
|
||||
pluggy_result = yield
|
||||
if not isinstance(report, pytest.TestReport):
|
||||
|
|
|
|||
|
|
@ -584,7 +584,7 @@ TEST(CustomAutogradTest, MarkDirty) {
|
|||
}
|
||||
};
|
||||
|
||||
// Clone here because modifying leafs inplace is not allowed
|
||||
// Clone here because modifying leaves inplace is not allowed
|
||||
auto x = torch::randn({5, 5}, torch::requires_grad()).clone();
|
||||
auto version_before = x._version();
|
||||
auto out = MyFunction::apply(x);
|
||||
|
|
|
|||
|
|
@ -264,7 +264,7 @@ TEST_F(ParallelTest, DataParallelNumericalEquivalence_MultiCUDA) {
|
|||
input += i;
|
||||
input_dp += i;
|
||||
|
||||
// non-prallel training
|
||||
// non-parallel training
|
||||
torch::optim::SGD optim(model->parameters(), torch::optim::SGDOptions(0.1));
|
||||
auto output = model->forward(input);
|
||||
auto loss = torch::mse_loss(output, torch::zeros_like(output));
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@ Please refer to [this](https://docs.pytorch.org/docs/main/accelerator/index.html
|
|||
- Device-agnostic APIs
|
||||
- Memory Management
|
||||
- Generator
|
||||
- Distrubuted
|
||||
- Distributed
|
||||
- Custom Tensor&Storage
|
||||
- ...
|
||||
- **Improve Tests**: Add more test cases related to the integration mechanism.
|
||||
|
|
|
|||
|
|
@ -216,7 +216,7 @@ class TestSavePlan(TestCase):
|
|||
# Number of plans should remain unchanged
|
||||
self.assertEqual(len(all_plans), len(deduped_plans))
|
||||
|
||||
# Numer of items in the deduped plans should be less than the original plans
|
||||
# Number of items in the deduped plans should be less than the original plans
|
||||
for new_plan, old_plan in zip(deduped_plans, all_plans):
|
||||
self.assertFalse(_compare_save_plans(new_plan, old_plan))
|
||||
self.assertTrue(len(new_plan.items) < len(old_plan.items))
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ class RingAttentionTest(DTensorTestBase):
|
|||
# parameters because when require_grad is True, resize_ is not
|
||||
# allowed. But requires_grad of cp_q, cp_k, and cp_v are False
|
||||
# now. So we can just use context_parallel() to shard q, k, v.
|
||||
# In reality, context_paralle() should be used to shard the input.
|
||||
# In reality, context_parallel() should be used to shard the input.
|
||||
# In reality, context_parallel() should only be used to shard
|
||||
# the model inputs (batch).
|
||||
|
||||
|
|
@ -701,7 +701,7 @@ class CPFlexAttentionTest(DTensorTestBase):
|
|||
)
|
||||
|
||||
# TODO: change this for-loop to run_subtests
|
||||
# Use a for-loop instead of run_subtests because we need to intialize the mask
|
||||
# Use a for-loop instead of run_subtests because we need to initialize the mask
|
||||
# for each subtest. This can be baked into self._test_cp_flex_attention as
|
||||
# a str argument denoting mask type.
|
||||
for batch_size, max_seq_len, lb_type in itertools.product(
|
||||
|
|
|
|||
|
|
@ -4901,7 +4901,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
|
|||
for p2p_op_idx, input_sizes in zip(
|
||||
range(first_op, coalesced_op, 1), op_sizes_per_coalesce
|
||||
):
|
||||
# the indivudal ops inside the coalescing group the individual op metadata,
|
||||
# the individual ops inside the coalescing group the individual op metadata,
|
||||
# but not the timing info coming from the actual coalesced kernel
|
||||
profiling_name = (
|
||||
"nccl:recv 0<-1" if self.rank == 0 else "nccl:send 1->0"
|
||||
|
|
|
|||
|
|
@ -398,7 +398,7 @@ class NVSHMEMAll2AllTest(MultiProcContinuousTest):
|
|||
nsplits, dtype=torch.int64, device=self.device
|
||||
).copy_(inp_splits)
|
||||
# 2 rows: output splits, output offsets
|
||||
# Initiallizing all values to -1 to check if they are updated
|
||||
# Initializing all values to -1 to check if they are updated
|
||||
out_splits_offsets = symm_mem.empty(
|
||||
(2, nsplits), dtype=torch.int64, device=self.device
|
||||
).fill_(-1)
|
||||
|
|
@ -503,7 +503,7 @@ class NVSHMEMAll2AllTest(MultiProcContinuousTest):
|
|||
(2, nsplits), dtype=torch.int64, device=self.device
|
||||
)
|
||||
# 2 rows: output splits, output offsets
|
||||
# Initiallizing all values to -1 to check if they are updated
|
||||
# Initializing all values to -1 to check if they are updated
|
||||
out_splits_offsets = symm_mem.empty(
|
||||
(2, nsplits), dtype=torch.int64, device=self.device
|
||||
).fill_(-1)
|
||||
|
|
@ -617,7 +617,7 @@ def dispatch_then_combine(device, align: int, group) -> None:
|
|||
inp_splits
|
||||
)
|
||||
# 2 rows: output splits, output offsets
|
||||
# Initiallizing all values to -1 to check if they are updated
|
||||
# Initializing all values to -1 to check if they are updated
|
||||
out_splits_offsets = symm_mem.empty(
|
||||
(2, nsplits), dtype=torch.int64, device=device
|
||||
).fill_(-1)
|
||||
|
|
@ -625,7 +625,7 @@ def dispatch_then_combine(device, align: int, group) -> None:
|
|||
# Buffers for combine
|
||||
combine_out = symm_mem.empty(max_out_numel, dtype=dtype, device=device).fill_(-1)
|
||||
# 2 rows: output splits, output offsets
|
||||
# Initiallizing all values to -1 to check if they are updated
|
||||
# Initializing all values to -1 to check if they are updated
|
||||
combine_out_splits_offsets = symm_mem.empty(
|
||||
(2, nsplits), dtype=torch.int64, device=device
|
||||
).fill_(-1)
|
||||
|
|
|
|||
|
|
@ -274,7 +274,7 @@ class SymmetricMemoryTest(MultiProcContinuousTest):
|
|||
self.assertTrue(buf.eq(peer_rank + world.size() // 2).all())
|
||||
|
||||
|
||||
# We move AsyncTP tests to a seperate test suite because 1) Async TP ops are not
|
||||
# We move AsyncTP tests to a separate test suite because 1) Async TP ops are not
|
||||
# the core symmetric memory APIs, they are more like applications, 2)
|
||||
# MultiProcContinuousTest will skip all the following tests if a test fails (
|
||||
# we should fix this too). We still want to get the test signals for the core
|
||||
|
|
@ -621,7 +621,7 @@ class AsyncTPTest(MultiProcContinuousTest):
|
|||
|
||||
# [READ ME FIRST]
|
||||
# The `SymmMemEmptySetDeviceTest` suite parameterizes whether user sets the
|
||||
# device before calling symm_mem.emtpy. Either way should work.
|
||||
# device before calling symm_mem.empty. Either way should work.
|
||||
# However, since `set_device` is persistent, we cannot use the
|
||||
# `MultiProcContinuousTest` template because the next function will be
|
||||
# "contaminated", leading to flaky tests (e.g. hang). Therefore, we use
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ nan
|
|||
>>> INF / INF
|
||||
nan
|
||||
|
||||
However unambigous operations with inf return inf:
|
||||
However unambiguous operations with inf return inf:
|
||||
>>> INF * INF
|
||||
inf
|
||||
>>> 1.5 * INF
|
||||
|
|
|
|||
|
|
@ -1711,7 +1711,7 @@ class TestBasicOps(__TestCase):
|
|||
t3 = tnew(t1)
|
||||
self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc'))
|
||||
|
||||
# test that tee objects are weak referencable
|
||||
# test that tee objects are weak referenceable
|
||||
a, b = tee(range(10))
|
||||
p = weakref.proxy(a)
|
||||
self.assertEqual(getattr(p, '__class__'), type(b))
|
||||
|
|
@ -2243,7 +2243,7 @@ class TestPurePythonRoughEquivalents(__TestCase):
|
|||
t3 = tnew(t1)
|
||||
self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc'))
|
||||
|
||||
# test that tee objects are weak referencable
|
||||
# test that tee objects are weak referenceable
|
||||
a, b = tee(range(10))
|
||||
p = weakref.proxy(a)
|
||||
self.assertEqual(getattr(p, '__class__'), type(b))
|
||||
|
|
|
|||
|
|
@ -5760,7 +5760,7 @@ def forward(self, s77 : torch.SymInt, s27 : torch.SymInt, L_x_ : torch.Tensor):
|
|||
self.assertEqual(func(x, 0), opt_func(x, 0))
|
||||
|
||||
def test_grad(self):
|
||||
# Write to `grad` or `_grad` should reflecte in reading from the other,
|
||||
# Write to `grad` or `_grad` should reflective in reading from the other,
|
||||
# and should be codegen-ed.
|
||||
def fn(x, y):
|
||||
x._grad = y + 1
|
||||
|
|
|
|||
|
|
@ -3955,7 +3955,7 @@ def forward(self, causal_mask, fill_value):
|
|||
def test_export_custom_op_lib(self):
|
||||
ops_registered_before = set(torch.ops.mylib)
|
||||
|
||||
# Assert warning for CompositeImplictAutograd op
|
||||
# Assert warning for CompositeImplicitAutograd op
|
||||
with torch.library._scoped_library("mylib", "FRAGMENT") as lib:
|
||||
lib.define("foo123(Tensor x) -> Tensor")
|
||||
lib.impl("foo123", lambda x: x.sin(), "CompositeImplicitAutograd")
|
||||
|
|
|
|||
|
|
@ -2000,7 +2000,7 @@ class TestSaveLoad(TestCase):
|
|||
|
||||
def test_save_load_with_multiple_empty_tensors(self) -> None:
|
||||
# Test scenario where models have multiple empty tensors
|
||||
# but with differnt data types.
|
||||
# but with different data types.
|
||||
class M(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ class BertSelfAttention(nn.Module):
|
|||
|
||||
# we can then use that as an indirect index into the embedding table values to look up the features for that index
|
||||
# this is just a `gather` primitive op. The resulting tensor will
|
||||
# have all the dimensions of embeddeding_idx (query_sequence x key_sequence),
|
||||
# have all the dimensions of embedding_idx (query_sequence x key_sequence),
|
||||
# plus all the dimensions of `embed` that were not indirectly accessed (`embedding_range`).
|
||||
# this form of indirect indexing is more straightforward than either advanced indexing or torch.gather which both
|
||||
# have a lot of dependencies on the positions of indexing tensors.
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ def get_public_overridable_apis(pytorch_root="/raid/rzou/pt/debug-cpu"):
|
|||
for module, module_name, src in public_docs:
|
||||
with open(f"{pytorch_root}/{src}") as f:
|
||||
lines = f.readlines()
|
||||
# APIs eitehr begin with 4 spaces or ".. autofunction::"
|
||||
# APIs either begin with 4 spaces or ".. autofunction::"
|
||||
api_lines1 = [line.strip() for line in lines if line.startswith(" " * 4)]
|
||||
api_lines2 = [
|
||||
line.strip()[len(".. autofunction:: ") :]
|
||||
|
|
|
|||
|
|
@ -6399,7 +6399,7 @@ def forward(self, primals_1, primals_2, primals_3):
|
|||
|
||||
# Important pieces of the graph:
|
||||
# - 4 total dense outputs.
|
||||
# This corresponds to the fact that each user fwd inpt (a, b)
|
||||
# This corresponds to the fact that each user fwd input (a, b)
|
||||
# will get a gradient that is a TwoTensor subclass,
|
||||
# so (mul_2, mul_3) will be wrapped into a.grad
|
||||
# and (div_1, div_2) will be wrapped into b.grad
|
||||
|
|
@ -8395,7 +8395,7 @@ aot_autograd_module_failures = set(
|
|||
# implementation not traceable or that there is a bug in AOTAutograd.
|
||||
torch.nn.TransformerEncoder, # DataDependentOutputException: aten.eq compares a mask input
|
||||
# to a causal mask tensor, to see if Boolean is_causal should be set
|
||||
# for TrnasformerEncoder layers, MHA and sdp custom kernels
|
||||
# for TransformerEncoder layers, MHA and sdp custom kernels
|
||||
torch.nn.Transformer, # DataDependentOutputException: aten.equal compares a mask input
|
||||
# to a causal mask tensor, to see if Boolean is_causal should be set
|
||||
# for TransformerEncoder layers, MHA and sdp custom kernels
|
||||
|
|
|
|||
|
|
@ -1236,7 +1236,7 @@ def forward(self, pred_1, x_1):
|
|||
from torch.fx.passes.shape_prop import _extract_tensor_metadata, TensorMetadata
|
||||
|
||||
# This is a helper function that extracts the metadata from the tensor and
|
||||
# sets the requries_grad flag to false. This is needed as we compare the
|
||||
# sets the requires_grad flag to false. This is needed as we compare the
|
||||
# metadata of the operands and the gradients
|
||||
def _extract_tensor_metadata_except_requires_grad(arg):
|
||||
metadata = _extract_tensor_metadata(arg)
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ class TestConstParamShapeInControlFlow(TestCase):
|
|||
graph1_node_targets = [n.target for n in traced_graph.nodes]
|
||||
graph2_node_targets = [n.target for n in traced_graph2.nodes]
|
||||
|
||||
# the second graph has an exta relu function call node
|
||||
# the second graph has an extra relu function call node
|
||||
assert torch.mm in graph1_node_targets and torch.mm in graph2_node_targets
|
||||
assert (
|
||||
torch.relu not in graph1_node_targets and torch.relu in graph2_node_targets
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ class TestGraphTransformObserver(TestCase):
|
|||
|
||||
@torch._inductor.config.patch("trace.provenance_tracking_level", 1)
|
||||
def test_graph_transform_observer_replace(self):
|
||||
# the node sohuld should not be duplicated
|
||||
# the node should should not be duplicated
|
||||
class Model(torch.nn.Module):
|
||||
def forward(self, x):
|
||||
y = x + 1
|
||||
|
|
|
|||
|
|
@ -1865,7 +1865,7 @@ class TestFlexAttention(InductorTestCase):
|
|||
requires_grad=True,
|
||||
)
|
||||
query, key, value = make_tensor(), make_tensor(), make_tensor()
|
||||
# floor_div is not decomposed in decompostion_table is empty
|
||||
# floor_div is not decomposed in decomposition_table is empty
|
||||
attention = functools.partial(flex_attention, score_mod=score_mod_func)
|
||||
gm = make_fx(attention, decomposition_table={})(query, key, value)
|
||||
self.assertExpectedInline(
|
||||
|
|
|
|||
|
|
@ -1188,7 +1188,7 @@ class TestFlexDecoding(InductorTestCase):
|
|||
requires_grad=True,
|
||||
)
|
||||
query, key, value = make_q(), make_kv(), make_kv()
|
||||
# floor_div is not decomposed in decompostion_table is empty
|
||||
# floor_div is not decomposed in decomposition_table is empty
|
||||
attention = functools.partial(flex_attention, score_mod=score_mod_func)
|
||||
gm = make_fx(attention, decomposition_table={})(query, key, value)
|
||||
self.assertExpectedInline(
|
||||
|
|
|
|||
|
|
@ -1128,7 +1128,7 @@ class TestReplaceFloorDiv(InductorTestCase):
|
|||
replaced = replace_floor_div(expr)
|
||||
|
||||
# Check that all floor's were replaced.
|
||||
# We shoud have no more new FloorDiv's than floor's in the original expression,
|
||||
# We should have no more new FloorDiv's than floor's in the original expression,
|
||||
# although we can have less due to simplification.
|
||||
self.assertEqual(replaced.count(sympy.floor), 0)
|
||||
self.assertLessEqual(
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ class LoopOrderingTest(TestCase):
|
|||
return x.to(torch.float32)
|
||||
return x
|
||||
|
||||
# Wordaround the issue that call allclose on fp8 tensor triggers error
|
||||
# Workaround the issue that call allclose on fp8 tensor triggers error
|
||||
# RuntimeError: "mul_cuda" not implemented for 'Float8_e4m3fn'
|
||||
expect = tree_map(_cast, expect)
|
||||
actual = tree_map(_cast, actual)
|
||||
|
|
@ -547,7 +547,7 @@ class LoopOrderingTest(TestCase):
|
|||
|
||||
# A small amount of extra memory access for:
|
||||
# - store output for the first reduction
|
||||
# - load input for the second redution
|
||||
# - load input for the second reduction
|
||||
# - store output for the second reduction
|
||||
expected_numbytes += (M * 2 + 1) * x.itemsize
|
||||
|
||||
|
|
|
|||
|
|
@ -384,7 +384,7 @@ class TestMaxAutotune(TestCase):
|
|||
a[:] = torch.randn((M, K), dtype=torch.float16)
|
||||
b = torch.empty_strided((K, N), (1, K), dtype=torch.float16, device=GPU_TYPE)
|
||||
b[:] = torch.randn((K, N), dtype=torch.float16)
|
||||
# allocate an output with a stride not divisble by 16, so it can't satisfy TMA alignment checks.
|
||||
# allocate an output with a stride not divisible by 16, so it can't satisfy TMA alignment checks.
|
||||
out = torch.empty_strided((M, N), (N, 1), dtype=torch.float16, device=GPU_TYPE)
|
||||
|
||||
with (
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class TestUtils(TestCase):
|
|||
self.assertEqual(expr.is_integer, None)
|
||||
self.assertEqual(expr.is_nonnegative, None)
|
||||
# replace abs(x) with y
|
||||
# propagte abs(x) sympy properties.
|
||||
# propagate abs(x) sympy properties.
|
||||
result = sympy_subs(expr, {expr: Symbol("y")})
|
||||
self.assertEqual(result.name, "y")
|
||||
self.assertEqual(result.is_integer, None)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from torch.testing._internal.common_utils import (
|
|||
# hacky way to skip these tests in fbcode:
|
||||
# during test execution in fbcode, test_nnapi is available during test discovery,
|
||||
# but not during test execution. So we can't try-catch here, otherwise it'll think
|
||||
# it sees tests but then fails when it tries to actuall run them.
|
||||
# it sees tests but then fails when it tries to actually run them.
|
||||
if not IS_FBCODE:
|
||||
from test_nnapi import TestNNAPI
|
||||
|
||||
|
|
|
|||
|
|
@ -292,7 +292,7 @@ class TestCUDA(JitTestCase):
|
|||
default_stream_id: int
|
||||
user_stream_id: int
|
||||
|
||||
# The test aims at checking different stream proporties.
|
||||
# The test aims at checking different stream properties.
|
||||
@torch.jit.script
|
||||
def test_get_stream():
|
||||
device_index = torch.cuda.current_device()
|
||||
|
|
@ -499,7 +499,7 @@ class TestCUDA(JitTestCase):
|
|||
|
||||
# Record the CUDA event for operation torch.mm on the current stream
|
||||
# and then test if the elapsed time is greater than 0. This test is also
|
||||
# an adaption from eager mdoe CUDA tests available at test/test_cuda.py
|
||||
# an adaption from eager mode CUDA tests available at test/test_cuda.py
|
||||
@torch.jit.script
|
||||
def test_event():
|
||||
device_index = torch.cuda.current_device()
|
||||
|
|
|
|||
|
|
@ -563,7 +563,7 @@ class TestFreezing(JitTestCase):
|
|||
self.assertTrue(mf.hasattr("sub1"))
|
||||
self.assertTrue(mf.sub1.hasattr("a"))
|
||||
self.assertFalse(mf.sub1.hasattr("b"))
|
||||
# sub2 is fully folded becasue self.sub1 and self.sub2.sub are not alias (Scripting bug)
|
||||
# sub2 is fully folded because self.sub1 and self.sub2.sub are not alias (Scripting bug)
|
||||
self.assertFalse(mf.hasattr("sub2"))
|
||||
input = torch.randn(2, 2)
|
||||
output = m.forward(input)
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ class TestPeephole(JitTestCase):
|
|||
self.run_pass("peephole", test.graph)
|
||||
FileCheck().check_not("prim::unchecked_cast").run(test.graph)
|
||||
|
||||
# refinement not optimzied out
|
||||
# refinement not optimized out
|
||||
def is_int_tensor(x):
|
||||
scalar = x.item()
|
||||
if isinstance(scalar, int):
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ class TestUpgraders(JitTestCase):
|
|||
version = self._load_model_version(loaded_func)
|
||||
self.assertTrue(version == 5)
|
||||
|
||||
# make sure we preserve old behaviou
|
||||
# make sure we preserve old behaviour
|
||||
torch._C._calculate_package_version_based_on_upgraders(current_flag_value)
|
||||
|
||||
def test_aten_linspace(self):
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ def maketest(module_cls, exception_msg_pattern=None, ctxmgr=None):
|
|||
class OptimizeTest(unittest.TestCase):
|
||||
test_sub = maketest(ModuleSub)
|
||||
# Same as test_sub but force aten::sub to fallback
|
||||
# We expect an exception caught because of LTC fallabck.
|
||||
# We expect an exception caught because of LTC fallback.
|
||||
test_ltc_fallback = maketest(
|
||||
ModuleSub,
|
||||
exception_msg_pattern="fallback.*aten::sub",
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ class TestLazyTensor(JitTestCase):
|
|||
if mark_step:
|
||||
torch._lazy.mark_step()
|
||||
|
||||
# y and x should contiue to be aliased after the mark_step call.
|
||||
# y and x should continue to be aliased after the mark_step call.
|
||||
y.add_(1)
|
||||
return x
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ python test/mobile/model_test/gen_test_model.py ios
|
|||
The test coverage is based on the number of root ops tested in these test models. The full list of generated ops can be found in:
|
||||
https://github.com/pytorch/pytorch/blob/master/test/mobile/model_test/coverage.yaml
|
||||
|
||||
In additional, the simulator tests will also report the percentage of Meta's production ops that are covered. The list of production ops changes overtime, so a Meta employee needs to regularly udpate the list it using
|
||||
In additional, the simulator tests will also report the percentage of Meta's production ops that are covered. The list of production ops changes overtime, so a Meta employee needs to regularly update the list it using
|
||||
```
|
||||
python test/mobile/model_test/update_production_ops.py ~/fbsource/xplat/pytorch_models/build/all_mobile_model_configs.yaml
|
||||
```
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ with open(sys.argv[1]) as input_yaml_file:
|
|||
model_infos = yaml.safe_load(input_yaml_file)
|
||||
for info in model_infos:
|
||||
for op in info["root_operators"]:
|
||||
# aggregate occurance per op
|
||||
# aggregate occurrence per op
|
||||
root_operators[op] = 1 + (root_operators.get(op, 0))
|
||||
for op in info["traced_operators"]:
|
||||
# aggregate occurance per op
|
||||
# aggregate occurrence per op
|
||||
traced_operators[op] = 1 + (traced_operators.get(op, 0))
|
||||
# merge dtypes for each kernel
|
||||
for kernal, dtypes in info["kernel_metadata"].items():
|
||||
|
|
|
|||
|
|
@ -1009,7 +1009,7 @@ class TestConvolutionNN(NNTestCase):
|
|||
@unittest.skipIf(not TEST_CUDNN, "needs cudnn")
|
||||
def test_conv_cudnn_memory_layout_dominance(self):
|
||||
# desired behavior here is to have the memory_layout of conv.weight to
|
||||
# dominante the layout of output.
|
||||
# dominant the layout of output.
|
||||
# which is not the same as current behavior, we'll fix this in
|
||||
# following up PRs and remove the `expectedFailure` tag
|
||||
input = torch.randint(
|
||||
|
|
@ -3599,7 +3599,7 @@ class TestConvolutionNNDeviceType(NNTestCase):
|
|||
input_format=input_format,
|
||||
weight_format=weight_format,
|
||||
)
|
||||
# test when input chanels is 1 and not converted to channels last
|
||||
# test when input channel is 1 and not converted to channels last
|
||||
helper(
|
||||
nn.Conv2d,
|
||||
2,
|
||||
|
|
|
|||
|
|
@ -1395,7 +1395,7 @@ class TestNNParametrization(NNTestCase):
|
|||
eval_out0 = wrapped_m(input)
|
||||
# assert eval gives same result as last training iteration
|
||||
self.assertEqual(eval_out0, last_train_out)
|
||||
# assert doing more iteartion in eval don't change things
|
||||
# assert doing more iteration in eval don't change things
|
||||
self.assertEqual(eval_out0, wrapped_m(input))
|
||||
self.assertEqual(last_train_u, spectral_norm_m._u)
|
||||
self.assertEqual(last_train_v, spectral_norm_m._v)
|
||||
|
|
@ -1440,7 +1440,7 @@ class TestNNParametrization(NNTestCase):
|
|||
|
||||
class SplitAndCat(nn.Module):
|
||||
def right_inverse(self, x):
|
||||
# split the tensor in two halfs
|
||||
# split the tensor in two halves
|
||||
return torch.split(x, x.shape[1] // 2)
|
||||
|
||||
def forward(self, x0, x1):
|
||||
|
|
|
|||
|
|
@ -894,14 +894,14 @@ class TestPruningNN(NNTestCase):
|
|||
prune.l1_unstructured(l, "weight_ih_l0", 0.5)
|
||||
assert sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights) == 3
|
||||
|
||||
# Removing the pruning reparametrization restores the Parameter
|
||||
# Removing the pruning reparameterization restores the Parameter
|
||||
prune.remove(l, "weight_ih_l0")
|
||||
assert sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights) == 4
|
||||
|
||||
# Make sure that, upon removal of the reparametrization, the
|
||||
# Make sure that, upon removal of the reparameterization, the
|
||||
# `._parameters` and `.named_parameters` contain the right params.
|
||||
# Specifically, the original weight ('weight_ih_l0') should be placed
|
||||
# back in the parameters, while the reparametrization component
|
||||
# back in the parameters, while the reparameterization component
|
||||
# ('weight_ih_l0_orig') should be removed.
|
||||
assert "weight_ih_l0" in l._parameters
|
||||
assert l._parameters["weight_ih_l0"] is not None
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
import torch
|
||||
|
||||
|
||||
# Autograd funtion that is a replica of the autograd funtion in
|
||||
# Autograd function that is a replica of the autograd function in
|
||||
# test_utility_funs.py (test_autograd_module_name)
|
||||
class CustomFunction(torch.autograd.Function):
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -574,7 +574,7 @@ class TestCustomTranslationTable(common_utils.TestCase):
|
|||
|
||||
def test_01_specialization_with_run_decomp_is_supported(self):
|
||||
# Phi3RMSNorm changes and redo shape inference after `run_decompositions` call
|
||||
# We ned this test to make sure everything we do on fx graph is covered by
|
||||
# We need this test to make sure everything we do on fx graph is covered by
|
||||
# backed_size_oblivious
|
||||
class Phi3RMSNorm(torch.nn.Module):
|
||||
def __init__(self, hidden_size, eps=1e-6):
|
||||
|
|
|
|||
|
|
@ -1935,7 +1935,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
|
|||
class DivModule(torch.nn.Module):
|
||||
def forward(self, x, y):
|
||||
# Add transpose to hide shape/type information
|
||||
# Otherwise shape and type are still avaiable from input.
|
||||
# Otherwise shape and type are still available from input.
|
||||
x = x.transpose(1, 2)
|
||||
y = y.transpose(1, 2)
|
||||
return x / y, torch.true_divide(x, y)
|
||||
|
|
@ -3878,7 +3878,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
|
|||
def test_topk_smallest_unsorted(self):
|
||||
class MyModule(torch.nn.Module):
|
||||
def forward(self, x, k):
|
||||
# When sorted=False, order of elements in the outout tensors
|
||||
# When sorted=False, order of elements in the output tensors
|
||||
# are not expected to match between PyTorch and ORT
|
||||
topk_unsorted = torch.topk(x, k, largest=False, sorted=False)
|
||||
topk_sorted = torch.topk(x, k, largest=False, sorted=True)
|
||||
|
|
@ -4361,7 +4361,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
|
|||
super().__init__()
|
||||
self.weight = torch.nn.Buffer(torch.ones(5))
|
||||
# torch.nn.Embedding is converted to ONNX::Gather.
|
||||
# Constant folding will be triggerred for constant inputs.
|
||||
# Constant folding will be triggered for constant inputs.
|
||||
# This pattern is common for constant mask inputs in transformer models.
|
||||
self.embed = torch.nn.Embedding(8, 3)
|
||||
|
||||
|
|
@ -5389,7 +5389,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
|
|||
input = torch.randn(7, 3, 5)
|
||||
self._argmin_argmax_model(input)
|
||||
|
||||
# Argmin and Argmax with "select_last_index" is not supprted before opset 12
|
||||
# Argmin and Argmax with "select_last_index" is not supported before opset 12
|
||||
# "select_last_index" was added in opset 12 to deal with corner case where the
|
||||
# same value appears multiple times in the tensor
|
||||
@skipIfUnsupportedMinOpsetVersion(12)
|
||||
|
|
@ -10511,7 +10511,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
|
|||
amax = torch.ones(4)
|
||||
scale = amax / 127.0
|
||||
zero_point = torch.zeros_like(amax, dtype=torch.int)
|
||||
# Quantize twice to test differnet branches
|
||||
# Quantize twice to test different branches
|
||||
y = torch.fake_quantize_per_channel_affine(
|
||||
input, scale, zero_point, 1, 0, 255
|
||||
)
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@ def _empty_input_wrangler(
|
|||
def _grid_sample_input_wrangler(
|
||||
args: list[Any], kwargs: dict[str, Any]
|
||||
) -> tuple[list[Any], dict[str, Any]]:
|
||||
# Convert string attriute to int as input
|
||||
# Convert string attribute to int as input
|
||||
inter_mode_options = {"bilinear": 0, "nearest": 1, "bicubic": 2}
|
||||
padding_mode_options = {"zeros": 0, "border": 1, "reflection": 2}
|
||||
args.append(inter_mode_options[kwargs["mode"]])
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ torch.package.package_exporter._gate_torchscript_serialization = False
|
|||
|
||||
|
||||
def generate_bc_packages():
|
||||
"""Function to create packages for testing backwards compatiblity"""
|
||||
"""Function to create packages for testing backwards compatibility"""
|
||||
if not IS_FBCODE or IS_SANDCASTLE:
|
||||
from package_a.test_nn_module import TestNnModule
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ packaging_directory = f"{Path(__file__).parent}/package_bc"
|
|||
|
||||
|
||||
class TestLoadBCPackages(PackageTestCase):
|
||||
"""Tests for checking loading has backwards compatiblity"""
|
||||
"""Tests for checking loading has backwards compatibility"""
|
||||
|
||||
@skipIf(
|
||||
IS_FBCODE or IS_SANDCASTLE,
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ class TestMisc(PackageTestCase):
|
|||
"Tests that use temporary files are disabled in fbcode",
|
||||
)
|
||||
def test_load_python_version_from_package(self):
|
||||
"""Tests loading a package with a python version embdded"""
|
||||
"""Tests loading a package with a python version embedded"""
|
||||
importer1 = PackageImporter(
|
||||
f"{Path(__file__).parent}/package_e/test_nn_module.pt"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class ModelTest(PackageTestCase):
|
|||
# how they want to save it but the 'server' can always
|
||||
# use the same API to load the package.
|
||||
|
||||
# The convension is for each model to provide a
|
||||
# The convention is for each model to provide a
|
||||
# 'model' package with a 'load' function that actual
|
||||
# reads the model out of the archive.
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ class ModelTest(PackageTestCase):
|
|||
import torch_package_importer as resources
|
||||
|
||||
# server knows to call model.load() to get the model,
|
||||
# maybe in the future it passes options as arguments by convension
|
||||
# maybe in the future it passes options as arguments by convention
|
||||
def load():
|
||||
return resources.load_pickle('model', 'pickled')
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ from torch.utils._triton import has_triton
|
|||
# This causes an issue in the multithreading test because we check all events
|
||||
# in that test with their tids. The events that correspond to these lingering
|
||||
# threads all have TID of (uint64_t)(-1) which is invalid.
|
||||
# The work around is turnning off monitoring thread when tqdm is loaded.
|
||||
# The work around is turning off monitoring thread when tqdm is loaded.
|
||||
# Since these are unit tests, it is safe to turn off monitor thread.
|
||||
try:
|
||||
import tqdm
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ if TYPE_CHECKING:
|
|||
# This causes an issue in the multithreading test because we check all events
|
||||
# in that test with their tids. The events that correspond to these lingering
|
||||
# threads all have TID of (uint64_t)(-1) which is invalid.
|
||||
# The work around is turnning off monitoring thread when tqdm is loaded.
|
||||
# The work around is turning off monitoring thread when tqdm is loaded.
|
||||
# Since these are unit tests, it is safe to turn off monitor thread.
|
||||
try:
|
||||
import tqdm
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase
|
|||
# This causes an issue in the multithreading test because we check all events
|
||||
# in that test with their tids. The events that correspond to these lingering
|
||||
# threads all have TID of (uint64_t)(-1) which is invalid.
|
||||
# The work around is turnning off monitoring thread when tqdm is loaded.
|
||||
# The work around is turning off monitoring thread when tqdm is loaded.
|
||||
# Since these are unit tests, it is safe to turn off monitor thread.
|
||||
try:
|
||||
import tqdm
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase
|
|||
# This causes an issue in the multithreading test because we check all events
|
||||
# in that test with their tids. The events that correspond to these lingering
|
||||
# threads all have TID of (uint64_t)(-1) which is invalid.
|
||||
# The work around is turnning off monitoring thread when tqdm is loaded.
|
||||
# The work around is turning off monitoring thread when tqdm is loaded.
|
||||
# Since these are unit tests, it is safe to turn off monitor thread.
|
||||
try:
|
||||
import tqdm
|
||||
|
|
@ -425,7 +425,7 @@ class TestTorchTidyProfiler(TestCase):
|
|||
self.assertEqual(state[0][0], "momentum_buffer")
|
||||
self.assertEqual(state[0][1].id, weight_momenumtum_id)
|
||||
|
||||
# Check that we handle first step (lazy initalization) and steady state.
|
||||
# Check that we handle first step (lazy initialization) and steady state.
|
||||
check(cold_start=True)
|
||||
check(cold_start=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ def get_filenames(self, subname):
|
|||
|
||||
|
||||
class TestSerialization(TestCase):
|
||||
"""Test backward compatiblity for serialization and numerics"""
|
||||
"""Test backward compatibility for serialization and numerics"""
|
||||
|
||||
# Copy and modified from TestCase.assertExpected
|
||||
def _test_op(
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ class TestNonUniformObserver(unittest.TestCase):
|
|||
# weight: fp tensor
|
||||
weight = 1000 * torch.rand(4, 4)
|
||||
|
||||
# activtion: fp32 tensor with ~ integer values
|
||||
# activation: fp32 tensor with ~ integer values
|
||||
activation = torch.randint(low=0, high=255, size=(4, 4), dtype=torch.float)
|
||||
|
||||
# calculate result from calling linear forward method
|
||||
|
|
@ -41,7 +41,7 @@ class TestNonUniformObserver(unittest.TestCase):
|
|||
# weight: fp tensor
|
||||
weight = 1000 * torch.rand(5, 3)
|
||||
|
||||
# activtion: fp32 tensor with ~ integer values
|
||||
# activation: fp32 tensor with ~ integer values
|
||||
# note: transpose of activation matrix will have dimension (3, 5)
|
||||
activation = torch.randint(low=0, high=255, size=(5, 3), dtype=torch.float)
|
||||
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ class TestQuantizedOps(TestCase):
|
|||
`output_is_observed`: if specified and is True, we'll append extra
|
||||
output_scale/output_zero_point keyword argument when calling quantized op
|
||||
"""
|
||||
# Retrives the default parameters from X.
|
||||
# Retrieves the default parameters from X.
|
||||
X, (scale, zero_point, torch_type) = X
|
||||
if not isinstance(X, torch.Tensor):
|
||||
X = torch.from_numpy(X)
|
||||
|
|
@ -3584,7 +3584,7 @@ class TestDynamicQuantizedOps(TestCase):
|
|||
def test_wrapped_fbgemm_pack_gemm_matrix_fp16_pt2_compliant(self):
|
||||
# We are not using opcheck over here because the output for the op we're testing
|
||||
# (_quantized.wrapped_fbgemm_pack_gemm_matrix_fp16) is not deterministic
|
||||
# due to the C-struct it's procuding. This would fail the check when we're trying
|
||||
# due to the C-struct it's producing. This would fail the check when we're trying
|
||||
# to match the result between compiled and eager version.
|
||||
#
|
||||
# This is only a temporary solution, long term, we should be able to support PT2
|
||||
|
|
@ -5572,7 +5572,7 @@ class TestQuantizedConv(TestCase):
|
|||
)
|
||||
|
||||
act_qdtypes = [torch.quint8]
|
||||
# Only qnnpack qengine supportes qint8
|
||||
# Only qnnpack qengine supports qint8
|
||||
if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
|
||||
act_qdtypes.append(torch.qint8)
|
||||
|
||||
|
|
@ -5653,7 +5653,7 @@ class TestQuantizedConv(TestCase):
|
|||
)
|
||||
|
||||
act_qdtypes = [torch.quint8]
|
||||
# Only qnnpack qengine supportes qint8
|
||||
# Only qnnpack qengine supports qint8
|
||||
if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
|
||||
act_qdtypes.append(torch.qint8)
|
||||
|
||||
|
|
@ -6084,7 +6084,7 @@ class TestQuantizedConv(TestCase):
|
|||
)
|
||||
|
||||
act_qdtypes = [torch.quint8]
|
||||
# Only qnnpack qengine supportes qint8
|
||||
# Only qnnpack qengine supports qint8
|
||||
if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
|
||||
act_qdtypes.append(torch.qint8)
|
||||
|
||||
|
|
@ -6210,7 +6210,7 @@ class TestQuantizedConv(TestCase):
|
|||
bias=use_bias
|
||||
)
|
||||
act_qdtypes = [torch.quint8]
|
||||
# Only qnnpack qengine supportes qint8
|
||||
# Only qnnpack qengine supports qint8
|
||||
if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
|
||||
act_qdtypes.append(torch.qint8)
|
||||
|
||||
|
|
@ -6515,7 +6515,7 @@ class TestQuantizedConv(TestCase):
|
|||
qconv = torch.ops.quantized.conv1d
|
||||
|
||||
act_qdtypes = [torch.quint8]
|
||||
# Only qnnpack qengine supportes qint8
|
||||
# Only qnnpack qengine supports qint8
|
||||
if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
|
||||
act_qdtypes.append(torch.qint8)
|
||||
|
||||
|
|
@ -6586,7 +6586,7 @@ class TestQuantizedConv(TestCase):
|
|||
qconv = torch.ops.quantized.conv1d_relu
|
||||
|
||||
act_qdtypes = [torch.quint8]
|
||||
# Only qnnpack qengine supportes qint8
|
||||
# Only qnnpack qengine supports qint8
|
||||
if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
|
||||
act_qdtypes.append(torch.qint8)
|
||||
|
||||
|
|
|
|||
|
|
@ -765,7 +765,7 @@ class TestQuantizedTensor(TestCase):
|
|||
qr = torch.quantize_per_tensor(r, scale, zero_point, dtype=dtype)
|
||||
qr = qr.transpose(0, 1)
|
||||
rqr = qr.dequantize()
|
||||
# compare transpose + dequantized result with orignal transposed result
|
||||
# compare transpose + dequantized result with original transposed result
|
||||
self.assertTrue(np.allclose(r.cpu().numpy().transpose([1, 0, 2, 3]), rqr.cpu().numpy(), atol=2 / scale))
|
||||
|
||||
qr = torch.quantize_per_tensor(r, scale, zero_point, dtype=dtype)
|
||||
|
|
@ -1210,7 +1210,7 @@ class TestQuantizedTensor(TestCase):
|
|||
if device == 'cpu':
|
||||
self.assertFalse(torch.equal(b, c))
|
||||
|
||||
# a case can't view non-contiguos Tensor
|
||||
# a case can't view non-contiguous Tensor
|
||||
a_int = torch.randint(0, 100, [1, 2, 3, 4], device=device, dtype=dtype)
|
||||
a = torch._make_per_tensor_quantized_tensor(a_int, scale=scale, zero_point=zero_point)
|
||||
b = a.transpose(1, 2) # swaps 2nd and 3rd dimension
|
||||
|
|
|
|||
|
|
@ -946,7 +946,7 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
|
||||
model_report = ModelReport(model_prep, test_detector_set)
|
||||
|
||||
# prepare the model for callibration
|
||||
# prepare the model for calibration
|
||||
prepared_for_callibrate_model = model_report.prepare_detailed_calibration()
|
||||
|
||||
# see whether observers properly in regular nn.Module
|
||||
|
|
@ -985,7 +985,7 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
elif isinstance(detector, DynamicStaticDetector):
|
||||
self.assertEqual(len(detector_obs_of_interest_fqns), 4)
|
||||
|
||||
# ensure that we can prepare for callibration only once
|
||||
# ensure that we can prepare for calibration only once
|
||||
with self.assertRaises(ValueError):
|
||||
prepared_for_callibrate_model = model_report.prepare_detailed_calibration()
|
||||
|
||||
|
|
@ -1037,7 +1037,7 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
model_full = TwoThreeOps()
|
||||
model_single = TwoThreeOps()
|
||||
|
||||
# prepare and callibrate two different instances of same model
|
||||
# prepare and calibrate two different instances of same model
|
||||
# prepare the model
|
||||
example_input = model_full.get_example_inputs()[0]
|
||||
current_backend = torch.backends.quantized.engine
|
||||
|
|
@ -1052,11 +1052,11 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
# initialize another with a single detector set
|
||||
model_report_single = ModelReport(model_prep_single, single_detector_set)
|
||||
|
||||
# prepare the models for callibration
|
||||
# prepare the models for calibration
|
||||
prepared_for_callibrate_model_full = model_report_full.prepare_detailed_calibration()
|
||||
prepared_for_callibrate_model_single = model_report_single.prepare_detailed_calibration()
|
||||
|
||||
# now callibrate the two models
|
||||
# now calibrate the two models
|
||||
num_iterations = 10
|
||||
for i in range(num_iterations):
|
||||
example_input = torch.tensor(torch.randint(100, (1, 3, 3, 3)), dtype=torch.float)
|
||||
|
|
@ -1109,12 +1109,12 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
|
||||
model = TwoThreeOps()
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
|
||||
model, detector_set, model.get_example_inputs()[0]
|
||||
)
|
||||
|
||||
# now we actually callibrate the model
|
||||
# now we actually calibrate the model
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1162,12 +1162,12 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
|
||||
model = TwoThreeOps()
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
|
||||
model, detector_set, model.get_example_inputs()[0]
|
||||
)
|
||||
|
||||
# now we actually callibrate the models
|
||||
# now we actually calibrate the models
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1192,7 +1192,7 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
self.assertEqual(len(qconfig_mapping.module_name_qconfigs), 2)
|
||||
|
||||
# only two linears, make sure per channel min max for weight since fbgemm
|
||||
# also static distribution since a simple single callibration
|
||||
# also static distribution since a simple single calibration
|
||||
for key in qconfig_mapping.module_name_qconfigs:
|
||||
config = qconfig_mapping.module_name_qconfigs[key]
|
||||
self.assertEqual(config.weight, default_per_channel_weight_observer)
|
||||
|
|
@ -1220,12 +1220,12 @@ class TestFxModelReportClass(QuantizationTestCase):
|
|||
|
||||
model = TwoThreeOps()
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
|
||||
model, detector_set, model.get_example_inputs()[0]
|
||||
)
|
||||
|
||||
# now we actually callibrate the models
|
||||
# now we actually calibrate the models
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1319,7 +1319,7 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
|
|||
|
||||
detector_set = {InputWeightEqualizationDetector(0.5)}
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
non_fused = self._get_prepped_for_calibration_model(self.TwoBlockComplexNet(), detector_set)
|
||||
fused = self._get_prepped_for_calibration_model(self.TwoBlockComplexNet(), detector_set, fused=True)
|
||||
|
||||
|
|
@ -1365,12 +1365,12 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
|
|||
test_input_weight_detector = InputWeightEqualizationDetector(0.4)
|
||||
detector_set = {test_input_weight_detector}
|
||||
model = self.TwoBlockComplexNet()
|
||||
# prepare the model for callibration
|
||||
# prepare the model for calibration
|
||||
prepared_for_callibrate_model, model_report = self._get_prepped_for_calibration_model(
|
||||
model, detector_set
|
||||
)
|
||||
|
||||
# now we actually callibrate the model
|
||||
# now we actually calibrate the model
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1430,7 +1430,7 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
|
|||
self.assertEqual(global_max, max(dimension_max))
|
||||
|
||||
input_ratio = torch.sqrt((per_channel_max - per_channel_min) / (global_max - global_min))
|
||||
# ensure comparision stat passed back is sqrt of range ratios
|
||||
# ensure comparison stat passed back is sqrt of range ratios
|
||||
# need to get the weight ratios first
|
||||
|
||||
# make sure per channel min and max are as expected
|
||||
|
|
@ -1474,10 +1474,10 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
|
|||
test_input_weight_detector = InputWeightEqualizationDetector(0.4)
|
||||
detector_set = {test_input_weight_detector}
|
||||
model = self.ReluOnly()
|
||||
# prepare the model for callibration
|
||||
# prepare the model for calibration
|
||||
prepared_for_callibrate_model, model_report = self._get_prepped_for_calibration_model(model, detector_set)
|
||||
|
||||
# now we actually callibrate the model
|
||||
# now we actually calibrate the model
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1531,7 +1531,7 @@ class TestFxDetectOutliers(QuantizationTestCase):
|
|||
|
||||
def _get_prepped_for_calibration_model(self, model, detector_set, use_outlier_data=False):
|
||||
r"""Returns a model that has been prepared for callibration and corresponding model_report"""
|
||||
# call the general helper function to callibrate
|
||||
# call the general helper function to calibrate
|
||||
example_input = model.get_example_inputs()[0]
|
||||
|
||||
# if we specifically want to test data with outliers replace input
|
||||
|
|
@ -1550,7 +1550,7 @@ class TestFxDetectOutliers(QuantizationTestCase):
|
|||
|
||||
detector_set = {OutlierDetector(reference_percentile=0.95)}
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
|
||||
self.LargeBatchModel(param_size=128), detector_set
|
||||
)
|
||||
|
|
@ -1594,12 +1594,12 @@ class TestFxDetectOutliers(QuantizationTestCase):
|
|||
detector_set = {outlier_detector, dynamic_static_detector}
|
||||
model = self.LargeBatchModel(param_size=param_size)
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
|
||||
model, detector_set
|
||||
)
|
||||
|
||||
# now we actually callibrate the model
|
||||
# now we actually calibrate the model
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1644,12 +1644,12 @@ class TestFxDetectOutliers(QuantizationTestCase):
|
|||
detector_set = {outlier_detector}
|
||||
model = self.LargeBatchModel(param_size=param_size)
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
|
||||
model, detector_set
|
||||
)
|
||||
|
||||
# now we actually callibrate the model
|
||||
# now we actually calibrate the model
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1694,16 +1694,16 @@ class TestFxDetectOutliers(QuantizationTestCase):
|
|||
detector_set = {outlier_detector}
|
||||
model = self.LargeBatchModel(param_size=param_size)
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
|
||||
model, detector_set, use_outlier_data=True
|
||||
)
|
||||
|
||||
# now we actually callibrate the model
|
||||
# now we actually calibrate the model
|
||||
example_input = model.get_outlier_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
# now callibrate minimum 30 times to make it above minimum threshold
|
||||
# now calibrate minimum 30 times to make it above minimum threshold
|
||||
for i in range(30):
|
||||
example_input = model.get_outlier_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
|
@ -1764,7 +1764,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase):
|
|||
r"""
|
||||
Callibrates the passed in model, generates report, and returns the visualizer
|
||||
"""
|
||||
# now we actually callibrate the model
|
||||
# now we actually calibrate the model
|
||||
example_input = model.get_example_inputs()[0]
|
||||
example_input = example_input.to(torch.float)
|
||||
|
||||
|
|
@ -1796,7 +1796,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase):
|
|||
|
||||
model = TwoThreeOps()
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
|
||||
model, detector_set, model.get_example_inputs()[0]
|
||||
)
|
||||
|
|
@ -1843,7 +1843,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase):
|
|||
|
||||
model = TwoThreeOps()
|
||||
|
||||
# get tst model and callibrate
|
||||
# get tst model and calibrate
|
||||
prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
|
||||
model, detector_set, model.get_example_inputs()[0]
|
||||
)
|
||||
|
|
@ -1953,7 +1953,7 @@ def _get_prepped_for_calibration_model_helper(model, detector_set, example_input
|
|||
|
||||
model_report = ModelReport(model_prep, detector_set)
|
||||
|
||||
# prepare the model for callibration
|
||||
# prepare the model for calibration
|
||||
prepared_for_callibrate_model = model_report.prepare_detailed_calibration()
|
||||
|
||||
return (prepared_for_callibrate_model, model_report)
|
||||
|
|
|
|||
|
|
@ -1221,7 +1221,7 @@ class TestQuantizeFx(QuantizationTestCase):
|
|||
def checkSerDeser(model, is_dynamic):
|
||||
for module_name in ("linear", "conv"):
|
||||
if hasattr(model, module_name):
|
||||
# make sure seralization works
|
||||
# make sure serialization works
|
||||
state_dict = copy.deepcopy(model.state_dict())
|
||||
all_keys = _get_keys(module_name, is_dynamic)
|
||||
for key in all_keys:
|
||||
|
|
@ -1484,7 +1484,7 @@ class TestQuantizeFx(QuantizationTestCase):
|
|||
def checkSerDeser(model, is_dynamic):
|
||||
module_name = "deconv"
|
||||
if hasattr(model, module_name):
|
||||
# make sure seralization works
|
||||
# make sure serialization works
|
||||
state_dict = copy.deepcopy(model.state_dict())
|
||||
all_keys = _get_keys(module_name, is_dynamic)
|
||||
for key in all_keys:
|
||||
|
|
@ -1569,7 +1569,7 @@ class TestQuantizeFx(QuantizationTestCase):
|
|||
def checkSerDeser(model, is_dynamic):
|
||||
module_name = "deconv"
|
||||
if hasattr(model, module_name):
|
||||
# make sure seralization works
|
||||
# make sure serialization works
|
||||
state_dict = copy.deepcopy(model.state_dict())
|
||||
all_keys = _get_keys(module_name, is_dynamic)
|
||||
for key in all_keys:
|
||||
|
|
|
|||
|
|
@ -2926,7 +2926,7 @@ class TestQuantizeJitOps(QuantizationTestCase):
|
|||
m._c, "forward", {"": qconfig}, inplace=False
|
||||
)
|
||||
)
|
||||
# Checking the model before fianlize contain unfused patterns
|
||||
# Checking the model before finalize contain unfused patterns
|
||||
# that numerically matches the model after quantize by checking
|
||||
# number of aten::quantize_per_tensor functions
|
||||
# conv has 3 quantize_per_tensor for activations and 1 for weight
|
||||
|
|
|
|||
|
|
@ -1682,7 +1682,7 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
|
|||
qconfig_mapping.set_object_type(torch.nn.Linear, dynamic_qconfig)
|
||||
# Had to turn off check against fx because fx quant workflow does not seem
|
||||
# to propagate observers for permute node for this model.
|
||||
# Suprisingly it does propagate it for EmbeddingConvLinearModule
|
||||
# Surprisingly it does propagate it for EmbeddingConvLinearModule
|
||||
# TODO: Figure out the right behavior for propagation
|
||||
self._test_quantizer(
|
||||
m_eager,
|
||||
|
|
@ -2253,7 +2253,7 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
|
|||
model = prepare_qat_pt2e(model, composed_quantizer)
|
||||
cur = time.time()
|
||||
# print("prepare time:", cur - prev)
|
||||
# Without Calibraiton, scale/zero value will have an initialized value of 1.0
|
||||
# Without Calibration, scale/zero value will have an initialized value of 1.0
|
||||
# Per channel quantization needs a proper scale/zero shape/value to work properly.
|
||||
# So we need to run calibration before converting to quantized model.
|
||||
model(*example_inputs)
|
||||
|
|
|
|||
|
|
@ -2464,11 +2464,11 @@ class TestQuantizePT2EX86Inductor(X86InductorQuantTestCase):
|
|||
torch.ops.quantized_decomposed.dequantize_per_channel.default: 2,
|
||||
}
|
||||
node_list = [
|
||||
# Q/DQ for first lienar
|
||||
# Q/DQ for first linear
|
||||
torch.ops.quantized_decomposed.quantize_per_tensor.default,
|
||||
torch.ops.quantized_decomposed.dequantize_per_tensor.default,
|
||||
torch.ops.aten.linear.default,
|
||||
# Q/DQ for second lienar
|
||||
# Q/DQ for second linear
|
||||
torch.ops.quantized_decomposed.quantize_per_tensor.default,
|
||||
torch.ops.quantized_decomposed.dequantize_per_tensor.default,
|
||||
torch.ops.aten.linear.default,
|
||||
|
|
|
|||
|
|
@ -1062,7 +1062,7 @@ class TestXNNPACKQuantizerModels(PT2EQuantizationTestCase):
|
|||
# the result matches exactly after prepare
|
||||
# Note: this currently will always be true since we are inserting observers
|
||||
# the check becomes useful when we add qat examples
|
||||
# but we can still manully inspect the printed observers to make sure
|
||||
# but we can still manually inspect the printed observers to make sure
|
||||
# it matches
|
||||
self.assertEqual(after_prepare_result, after_prepare_result_fx)
|
||||
self.assertEqual(
|
||||
|
|
|
|||
|
|
@ -1626,7 +1626,7 @@ def get_selected_tests(options) -> list[str]:
|
|||
if options.xpu:
|
||||
selected_tests = exclude_tests(XPU_BLOCKLIST, selected_tests, "on XPU")
|
||||
else:
|
||||
# Exclude all xpu specifc tests otherwise
|
||||
# Exclude all xpu specific tests otherwise
|
||||
options.exclude.extend(XPU_TEST)
|
||||
|
||||
# Filter to only run onnx tests when --onnx option is specified
|
||||
|
|
|
|||
|
|
@ -5896,7 +5896,7 @@ Done""",
|
|||
|
||||
@staticmethod
|
||||
def backward(ctx, grad):
|
||||
# Create a sparse tensor with non-contigous indices and values
|
||||
# Create a sparse tensor with non-contiguous indices and values
|
||||
# and return as grad.
|
||||
v = torch.rand(1, 3)
|
||||
i = torch.ones(1, 1, dtype=torch.long)
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ def side_effect_func(x: torch.Tensor):
|
|||
class TestFX(JitTestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
# Checking for mutable operations whil tracing is feature flagged
|
||||
# Checking for mutable operations while tracing is feature flagged
|
||||
# Enable it in testing but not by default
|
||||
self.orig_tracer_mutable_flag = (
|
||||
torch.fx.proxy.TracerBase.check_mutable_operations
|
||||
|
|
@ -4198,7 +4198,7 @@ def run_getitem_target():
|
|||
|
||||
class TestOperatorSignatures(JitTestCase):
|
||||
def setUp(self):
|
||||
# Checking for mutable operations whil tracing is feature flagged
|
||||
# Checking for mutable operations while tracing is feature flagged
|
||||
# Enable it in testing but not by default
|
||||
self.orig_tracer_mutable_flag = (
|
||||
torch.fx.proxy.TracerBase.check_mutable_operations
|
||||
|
|
@ -4241,7 +4241,7 @@ class TestFXAPIBackwardCompatibility(JitTestCase):
|
|||
super().setUp()
|
||||
self.maxDiff = None
|
||||
|
||||
# Checking for mutable operations whil tracing is feature flagged
|
||||
# Checking for mutable operations while tracing is feature flagged
|
||||
# Enable it in testing but not by default
|
||||
self.orig_tracer_mutable_flag = (
|
||||
torch.fx.proxy.TracerBase.check_mutable_operations
|
||||
|
|
@ -4597,7 +4597,7 @@ class TestFXAPIBackwardCompatibility(JitTestCase):
|
|||
class TestFunctionalTracing(JitTestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
# Checking for mutable operations whil tracing is feature flagged
|
||||
# Checking for mutable operations while tracing is feature flagged
|
||||
# Enable it in testing but not by default
|
||||
self.orig_tracer_mutable_flag = (
|
||||
torch.fx.proxy.TracerBase.check_mutable_operations
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ class TestIndexing(TestCase):
|
|||
x[ri([0, 2, 4]),], torch.tensor([5, 4, 3], dtype=dtype, device=device)
|
||||
)
|
||||
|
||||
# Only validates indexing and setting for Halfs
|
||||
# Only validates indexing and setting for Halves
|
||||
if dtype == torch.half:
|
||||
reference = consec((10,))
|
||||
validate_indexing(reference)
|
||||
|
|
|
|||
|
|
@ -4842,7 +4842,7 @@ class TestLinalg(TestCase):
|
|||
self.assertTrue(torch.cuda.tunable.record_untuned_is_enabled())
|
||||
|
||||
make_arg = partial(make_tensor, device=device, dtype=dtype)
|
||||
# offline tuning only handles matmuls on two dimensionsal tensors
|
||||
# offline tuning only handles matmuls on two dimensional tensors
|
||||
# matmul that require broadcasting are
|
||||
# not supported either.
|
||||
# Below we check the different transA and transB combinations.
|
||||
|
|
@ -4871,7 +4871,7 @@ class TestLinalg(TestCase):
|
|||
continue
|
||||
|
||||
# offline tuning only handles batched matmuls on
|
||||
# three dimensionsal tensors
|
||||
# three dimensional tensors
|
||||
# matmul that require broadcasting are
|
||||
# not supported either.
|
||||
# Below we check the different transA and transB combinations.
|
||||
|
|
|
|||
|
|
@ -1520,7 +1520,7 @@ class TestMkldnn(TestCase):
|
|||
h = torch.randn(num_layers * num_directions, batch_size, hidden_size, dtype=torch.float32)
|
||||
c = torch.randn(num_layers * num_directions, batch_size, hidden_size, dtype=torch.float32)
|
||||
if fp16:
|
||||
# TODO add traing support when oneDNN support lstm FP16 training
|
||||
# TODO add training support when oneDNN support lstm FP16 training
|
||||
training = False
|
||||
model = torch.nn.LSTM(input_size, hidden_size, num_layers, bidirectional=bidirectional,
|
||||
bias=bias, dropout=dropout, batch_first=batch_first).float()
|
||||
|
|
|
|||
|
|
@ -328,7 +328,7 @@ class TestModule(TestCase):
|
|||
|
||||
def _retain_grad(self, obj):
|
||||
# gradients needs to be retained to check for grad. This is useful when
|
||||
# non-leafs are present in the graph.
|
||||
# non-leaves are present in the graph.
|
||||
def inner_retain_grad(obj):
|
||||
if obj.requires_grad:
|
||||
obj.retain_grad()
|
||||
|
|
|
|||
|
|
@ -7842,7 +7842,7 @@ class TestMPS(TestCaseMPS):
|
|||
shape = (2, 3, 4, 5, 6)
|
||||
x = torch.rand(shape, device="mps")
|
||||
self.assertNotEqual(x[0], x[1])
|
||||
# Check that normal distributino is not affected by the same
|
||||
# Check that normal distributions is not affected by the same
|
||||
y = torch.normal(torch.zeros(shape, device="mps"), torch.ones(shape, device="mps"))
|
||||
self.assertNotEqual(y[0], y[1])
|
||||
|
||||
|
|
@ -12644,7 +12644,7 @@ class TestConsistency(TestCaseMPS):
|
|||
self.assertEqual(out_mps, out_cpu)
|
||||
|
||||
def test_fmax_mixed_dtypes(self, device):
|
||||
# Regression tesing for https://github.com/pytorch/pytorch/issues/149951
|
||||
# Regression testing for https://github.com/pytorch/pytorch/issues/149951
|
||||
# fmax and fmin are implemented as binary metal shaders and they were implemented
|
||||
# with the assumption that both args have the same dtype
|
||||
x = torch.rand((3, 3), device=device, dtype=torch.float32)
|
||||
|
|
|
|||
|
|
@ -1809,17 +1809,17 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""")
|
|||
num_params - 1,
|
||||
)
|
||||
|
||||
# Removing the weight norm reparametrization restores the Parameter
|
||||
# Removing the weight norm reparameterization restores the Parameter
|
||||
l = torch.nn.utils.remove_weight_norm(l, name=name)
|
||||
self.assertEqual(
|
||||
sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights),
|
||||
num_params,
|
||||
)
|
||||
|
||||
# Make sure that, upon removal of the reparametrization, the
|
||||
# Make sure that, upon removal of the reparameterization, the
|
||||
# `._parameters` and `.named_parameters` contain the right params.
|
||||
# Specifically, the original weight ('weight_ih_l0') should be placed
|
||||
# back in the parameters, while the reparametrization components
|
||||
# back in the parameters, while the reparameterization components
|
||||
# ('weight_ih_l0_v' and 'weight_ih_l0_g') should be removed.
|
||||
self.assertTrue(name in l._parameters)
|
||||
self.assertIsNotNone(l._parameters[name])
|
||||
|
|
@ -7308,7 +7308,7 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""")
|
|||
count_tensor
|
||||
)
|
||||
|
||||
# Test batch_norm_backward_elemt gives the same answer for all
|
||||
# Test batch_norm_backward_element gives the same answer for all
|
||||
# combinations of contiguous as channels_last input
|
||||
for a, b in [
|
||||
(torch.channels_last, torch.contiguous_format),
|
||||
|
|
|
|||
|
|
@ -2647,7 +2647,7 @@ fake_skips = (
|
|||
"linalg.eigvals", # The tensor has a non-zero number of elements, but its data is not allocated yet
|
||||
"linalg.eigvalsh", # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend
|
||||
"linalg.matrix_power", # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend
|
||||
# "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen
|
||||
# "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backend
|
||||
"linalg.matrix_rank.hermitian", # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend
|
||||
"linalg.pinv.hermitian", # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor
|
||||
"linalg.solve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend
|
||||
|
|
|
|||
|
|
@ -2520,7 +2520,7 @@ def forward(self, x_1):
|
|||
self.last_args = args
|
||||
return func(*args, **kwargs)
|
||||
|
||||
# Value that could not be intepreted as signed int64
|
||||
# Value that could not be interpreted as signed int64
|
||||
uarg = 2**63 + 1
|
||||
with DummyMode() as m:
|
||||
a = torch.full((3, 3), uarg, dtype=torch.uint64)
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ from quantization.eager.test_quantize_eager_qat import TestQuantizeEagerQAT # n
|
|||
from quantization.eager.test_quantize_eager_qat import TestQuantizeEagerQATNumerics # noqa: F401
|
||||
# 3. Eager mode fusion passes
|
||||
from quantization.eager.test_fuse_eager import TestFuseEager # noqa: F401
|
||||
# 4. Testing model numerics between quanitzed and FP32 models
|
||||
# 4. Testing model numerics between quantized and FP32 models
|
||||
from quantization.eager.test_model_numerics import TestModelNumericsEager # noqa: F401
|
||||
# 5. Tooling: numeric_suite
|
||||
from quantization.eager.test_numeric_suite_eager import TestNumericSuiteEager # noqa: F401
|
||||
|
|
|
|||
|
|
@ -426,7 +426,7 @@ def data_to_nvfp4_with_global_scale(x, block_size):
|
|||
# Per-tensor max
|
||||
global_max = x.abs().max()
|
||||
|
||||
# Contants
|
||||
# Constants
|
||||
# Global encoding scale for block-scales
|
||||
S_enc = FP4_MAX_VAL * F8E4M3_MAX_VAL / global_max
|
||||
S_dec = 1. / S_enc
|
||||
|
|
|
|||
|
|
@ -262,7 +262,7 @@ class TestShapeOps(TestCase):
|
|||
expected = xn.diagonal(*args)
|
||||
self.assertEqual(expected.shape, result.shape)
|
||||
self.assertEqual(expected, result)
|
||||
# test non-continguous
|
||||
# test non-contiguous
|
||||
xp = x.permute(1, 2, 3, 0)
|
||||
result = torch.diagonal(xp, 0, -2, -1)
|
||||
expected = xp.numpy().diagonal(0, -2, -1)
|
||||
|
|
|
|||
|
|
@ -1333,7 +1333,7 @@ class TestSparse(TestSparseBase):
|
|||
res_sparse = t.to_sparse().index_select(0, idx_empty)
|
||||
self.assertEqual(res_dense, res_sparse)
|
||||
|
||||
# non-contigous index
|
||||
# non-contiguous index
|
||||
idx = torch.randint(low=0, high=5, size=(10, 2), device=device)[:, 0]
|
||||
|
||||
def run_test(sizes):
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ class TestStatelessFunctionalAPI(TestCase):
|
|||
cur_rm = module.running_mean
|
||||
self.assertEqual(cur_rm, prev_rm)
|
||||
self.assertEqual(rm, torch.full((10,), 12.8))
|
||||
# Now run functional without reparametrization and check that the module has
|
||||
# Now run functional without reparameterization and check that the module has
|
||||
# been updated
|
||||
functional_call(module, {}, x)
|
||||
self.assertEqual(module.running_mean, torch.full((10,), 12.8))
|
||||
|
|
|
|||
|
|
@ -705,7 +705,7 @@ class TestTensorExprFuser(BaseTestClass):
|
|||
# d = to_bf16(to_fp32(a) + to_fp32(b) + to_fp32(c))
|
||||
# Hence, we simulate NNC computation by feeding fp32 tensors and converting
|
||||
# the result tensor back to bf16. The simulation could avoid the numeric
|
||||
# deviation to simplify the result comprasion
|
||||
# deviation to simplify the result comparison
|
||||
y = warmup_and_run_forward(traced, rand_a.float(), rand_b.float())
|
||||
if torch_fn not in cmp_fns:
|
||||
y = y.bfloat16()
|
||||
|
|
|
|||
|
|
@ -4320,8 +4320,8 @@ class TestSDPAXpuOnly(NNTestCase):
|
|||
_ = F.scaled_dot_product_attention(q, k, v)
|
||||
|
||||
def test_default_priority_order(self, device):
|
||||
# The default priority order of xpu is overrideable, math, flash, efficient, cudnn
|
||||
# For xpu backend, we need to make sure that overrideable > math > flash
|
||||
# The default priority order of xpu is overridable, math, flash, efficient, cudnn
|
||||
# For xpu backend, we need to make sure that overridable > math > flash
|
||||
dtype = torch.bfloat16
|
||||
shape = SdpaShape(1, 1, 1, 1)
|
||||
make_tensor = partial(torch.rand, shape, device=device, dtype=dtype)
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ class TestBuiltin(TestCase):
|
|||
assert_raises(TypeError, np.dtype, "l8")
|
||||
assert_raises(TypeError, np.dtype, "L8")
|
||||
|
||||
# XXX: what is 'q'? on my 64-bit ubuntu maching it's int64, same as 'l'
|
||||
# XXX: what is 'q'? on my 64-bit ubuntu matching it's int64, same as 'l'
|
||||
# if np.dtype('q').itemsize == 8:
|
||||
# assert_raises(TypeError, np.dtype, 'q4')
|
||||
# assert_raises(TypeError, np.dtype, 'Q4')
|
||||
|
|
|
|||
|
|
@ -976,7 +976,7 @@ class TestEinsum(TestCase):
|
|||
# Test originally added to cover broken float16 path: gh-20305
|
||||
# Likely most are covered elsewhere, at least partially.
|
||||
dtype = np.dtype(dtype)
|
||||
# Simple test, designed to excersize most specialized code paths,
|
||||
# Simple test, designed to exercise most specialized code paths,
|
||||
# note the +0.5 for floats. This makes sure we use a float value
|
||||
# where the results must be exact.
|
||||
arr = (np.arange(7) + 0.5).astype(dtype)
|
||||
|
|
@ -1160,7 +1160,7 @@ class TestEinsum(TestCase):
|
|||
@xfail # (reason="order='F' not supported")
|
||||
def test_output_order(self):
|
||||
# Ensure output order is respected for optimize cases, the below
|
||||
# conraction should yield a reshaped tensor view
|
||||
# contraction should yield a reshaped tensor view
|
||||
# gh-16415
|
||||
|
||||
a = np.ones((2, 3, 5), order="F")
|
||||
|
|
|
|||
|
|
@ -375,7 +375,7 @@ class TestIndexing(TestCase):
|
|||
assert_array_equal(a[idx], idx)
|
||||
|
||||
# this case must not go into the fast path, note that idx is
|
||||
# a non-contiuguous none 1D array here.
|
||||
# a non-contiguous none 1D array here.
|
||||
a[idx] = -1
|
||||
res = np.arange(6)
|
||||
res[0] = -1
|
||||
|
|
|
|||
|
|
@ -900,7 +900,7 @@ class TestScalarIndexing(TestCase):
|
|||
|
||||
assert_raises(IndexError, subscript, a, (np.newaxis, 0))
|
||||
|
||||
# this assersion fails because 50 > NPY_MAXDIMS = 32
|
||||
# this assertion fails because 50 > NPY_MAXDIMS = 32
|
||||
# assert_raises(IndexError, subscript, a, (np.newaxis,)*50)
|
||||
|
||||
@xfail # (reason="pytorch disallows overlapping assignments")
|
||||
|
|
@ -3283,7 +3283,7 @@ class TestArgmax(TestCase):
|
|||
([np.nan, 0, 1, 2, 3], 0),
|
||||
([np.nan, 0, np.nan, 2, 3], 0),
|
||||
# To hit the tail of SIMD multi-level(x4, x1) inner loops
|
||||
# on variant SIMD widthes
|
||||
# on variant SIMD widths
|
||||
([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
|
||||
([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
|
||||
([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
|
||||
|
|
@ -3392,7 +3392,7 @@ class TestArgmin(TestCase):
|
|||
([np.nan, 0, 1, 2, 3], 0),
|
||||
([np.nan, 0, np.nan, 2, 3], 0),
|
||||
# To hit the tail of SIMD multi-level(x4, x1) inner loops
|
||||
# on variant SIMD widthes
|
||||
# on variant SIMD widths
|
||||
([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
|
||||
([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
|
||||
([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ skip = functools.partial(skipif, True)
|
|||
|
||||
|
||||
@xpassIfTorchDynamo_np # (
|
||||
# reason="We do not disctinguish between scalar and array types."
|
||||
# reason="We do not distinguish between scalar and array types."
|
||||
# " Thus, scalars can upcast arrays."
|
||||
# )
|
||||
class TestCommonType(TestCase):
|
||||
|
|
|
|||
|
|
@ -3361,42 +3361,42 @@ class TestPercentile(TestCase):
|
|||
assert_equal(np.percentile(a, 0.3), np.nan)
|
||||
assert_equal(np.percentile(a, 0.3).ndim, 0)
|
||||
|
||||
# axis0 zerod
|
||||
# axis0 zeroed
|
||||
b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0)
|
||||
b[2, 3] = np.nan
|
||||
b[1, 2] = np.nan
|
||||
assert_equal(np.percentile(a, 0.3, 0), b)
|
||||
|
||||
# axis0 not zerod
|
||||
# axis0 not zeroed
|
||||
b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 0)
|
||||
b[:, 2, 3] = np.nan
|
||||
b[:, 1, 2] = np.nan
|
||||
assert_equal(np.percentile(a, [0.3, 0.6], 0), b)
|
||||
|
||||
# axis1 zerod
|
||||
# axis1 zeroed
|
||||
b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1)
|
||||
b[1, 3] = np.nan
|
||||
b[1, 2] = np.nan
|
||||
assert_equal(np.percentile(a, 0.3, 1), b)
|
||||
# axis1 not zerod
|
||||
# axis1 not zeroed
|
||||
b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1)
|
||||
b[:, 1, 3] = np.nan
|
||||
b[:, 1, 2] = np.nan
|
||||
assert_equal(np.percentile(a, [0.3, 0.6], 1), b)
|
||||
|
||||
# axis02 zerod
|
||||
# axis02 zeroed
|
||||
b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2))
|
||||
b[1] = np.nan
|
||||
b[2] = np.nan
|
||||
assert_equal(np.percentile(a, 0.3, (0, 2)), b)
|
||||
# axis02 not zerod
|
||||
# axis02 not zeroed
|
||||
b = np.percentile(
|
||||
np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], (0, 2)
|
||||
)
|
||||
b[:, 1] = np.nan
|
||||
b[:, 2] = np.nan
|
||||
assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b)
|
||||
# axis02 not zerod with method='nearest'
|
||||
# axis02 not zeroed with method='nearest'
|
||||
b = np.percentile(
|
||||
np.arange(24, dtype=float).reshape(2, 3, 4),
|
||||
[0.3, 0.6],
|
||||
|
|
|
|||
|
|
@ -399,7 +399,7 @@ class TestArgmax(TestCase):
|
|||
([np.nan, 0, 1, 2, 3], 0),
|
||||
([np.nan, 0, np.nan, 2, 3], 0),
|
||||
# To hit the tail of SIMD multi-level(x4, x1) inner loops
|
||||
# on variant SIMD widthes
|
||||
# on variant SIMD widths
|
||||
([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
|
||||
([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
|
||||
([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
|
||||
|
|
@ -534,7 +534,7 @@ class TestArgmin(TestCase):
|
|||
([np.nan, 0, 1, 2, 3], 0),
|
||||
([np.nan, 0, np.nan, 2, 3], 0),
|
||||
# To hit the tail of SIMD multi-level(x4, x1) inner loops
|
||||
# on variant SIMD widthes
|
||||
# on variant SIMD widths
|
||||
([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
|
||||
([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
|
||||
([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ class TestArrayScalars(TestCase):
|
|||
assert product.shape == (3,)
|
||||
assert_equal(product, [42, 42 * 2, 42 * 3])
|
||||
|
||||
# repeat with right-mulitply
|
||||
# repeat with right-multiply
|
||||
product = lst * value
|
||||
assert isinstance(product, np.ndarray)
|
||||
assert product.shape == (3,)
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ assert_type(-TENSOR, Tensor)
|
|||
assert_type(~TENSOR, Tensor)
|
||||
|
||||
#
|
||||
# Binary ops that return a bolean
|
||||
# Binary ops that return a boolean
|
||||
#
|
||||
|
||||
# Operator ==
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user