Revert "Fix global flake8 issues (#124771)"

This reverts commit f01275934b.

Reverted https://github.com/pytorch/pytorch/pull/124771 on behalf of https://github.com/jeanschmidt due to Unfortunately, I needed to revert #123735 and this one depends on it. So please check if there are no merge conflicts or breakages and feel free to merge this PR again ([comment](https://github.com/pytorch/pytorch/pull/124428#issuecomment-2078699836))
This commit is contained in:
PyTorch MergeBot 2024-04-26 06:15:17 +00:00
parent e607dc8abb
commit 1ac60484c1
55 changed files with 211 additions and 213 deletions

View File

@ -29,7 +29,7 @@ def parse_args() -> Any:
"--onto-branch", type=str, required=True, help="the target release branch"
)
parser.add_argument(
"--github-actor", type=str, required=True, help="all the world's a stage"
"--github-actor", type=str, required=True, help="all the worlds a stage"
)
parser.add_argument(
"--classification",

View File

@ -152,8 +152,8 @@ def run(
result_entry["sequence_length"] = sequence_length
result_entry["n_heads"] = num_heads
result_entry["embed_dim"] = embed_dim
result_entry["time_native_mha_slow(\u00B5s)"] = f"{time_native_mha_slow:.3f}"
result_entry["time_native_mha_fast (\u00B5s)"] = f"{time_native_mha_fast:.3f}"
result_entry["time_native_mha_slow(μs)"] = f"{time_native_mha_slow:.3f}"
result_entry["time_native_mha_fast (μs)"] = f"{time_native_mha_fast:.3f}"
result_entry["speedup flash_mha v native_mha"] = f"{speedup_fast_internal:.3f}"
result_entry["padding"] = f"{padding:.3f}"
return result_entry

View File

@ -81,10 +81,10 @@ class ExperimentResults:
@classmethod
def get_entry_names(cls) -> List[str]:
return [
"nn_mha_time (\u00B5s)",
"compiled_nn_mha_time (\u00B5s)",
"composite_mha_time (\u00B5s)",
"compiled_composite_mha_time (\u00B5s)",
"nn_mha_time (μs)",
"compiled_nn_mha_time (μs)",
"composite_mha_time (μs)",
"compiled_composite_mha_time (μs)",
]

View File

@ -28,7 +28,7 @@ import keyword
import warnings
from typing import Collection, List, Mapping, Optional, Set, Tuple, Union
_ellipsis: str = "\u2026" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated
_ellipsis: str = "" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated
class AnonymousAxis:

View File

@ -3752,11 +3752,11 @@ class TestDistributions(DistributionsTestCase):
@unittest.skipIf(not TEST_NUMPY, "NumPy not found")
def test_dirichlet_log_prob_zero(self):
# Specifically test the special case where x=0 and alpha=1. The PDF is
# proportional to x**(alpha-1), which in this case works out to 0**0=1.
# Specifically test the special case where x=0 and α=1. The PDF is
# proportional to x**(α-1), which in this case works out to 0**0=1.
# The log PDF of this term should therefore be 0. However, it's easy
# to accidentally introduce NaNs by calculating log(x) without regard
# for the value of alpha-1.
# for the value of α-1.
alpha = torch.tensor([1, 2])
dist = Dirichlet(alpha)
x = torch.tensor([0, 1])

View File

@ -107,7 +107,7 @@ class TestParsedExpression(TestCase):
ParsedExpression("(a) ((b c) (d ...))")
# invalid identifiers
ParsedExpression("camelCase under_scored cApiTaLs \u00DF ...")
ParsedExpression("camelCase under_scored cApiTaLs ß ...")
with self.assertRaises(ValueError):
ParsedExpression("1a")
with self.assertRaises(ValueError):

View File

@ -308,8 +308,8 @@ class TestTemplatedSDPA(InductorTestCase):
# this means that the base for the LSE computed by ref is e while for the compiled
# version it is 2. To compare we use the change of base formula
# log_2(x_compiled) = log_e(x_ref) * log_2(e) where
# x_ref = sum(_i e^(scores[i]))
# x_compiled = sum(_i 2^(log2(e) * scores[i]))
# x_ref = ∑_i e^(scores[i])
# x_compiled = ∑_i 2^(log2(e) * scores[i])
self.assertTrue(ref_lse.dtype == torch.float32)
self.assertTrue(compiled_lse.dtype == torch.float32)

View File

@ -111,16 +111,16 @@ class DirectoryReaderTest(PackageTestCase):
with PackageExporter(filename) as pe:
# Layout looks like:
# package
# |-- one/
# | |-- a.txt
# | |-- b.txt
# | |-- c.txt
# | +-- three/
# | |-- d.txt
# | +-- e.txt
# +-- two/
# |-- f.txt
# +-- g.txt
# ├── one/
# │ ├── a.txt
# │ ├── b.txt
# │ ├── c.txt
# │ └── three/
# │ ├── d.txt
# │ └── e.txt
# └── two/
# ├── f.txt
# └── g.txt
pe.save_text("one", "a.txt", "hello, a!")
pe.save_text("one", "b.txt", "hello, b!")
pe.save_text("one", "c.txt", "hello, c!")

View File

@ -38,46 +38,46 @@ class TestMisc(PackageTestCase):
export_plain = dedent(
"""\
\u251c\u2500\u2500 .data
\u2502 \u251c\u2500\u2500 extern_modules
\u2502 \u251c\u2500\u2500 python_version
\u2502 \u251c\u2500\u2500 serialization_id
\u2502 \u2514\u2500\u2500 version
\u251c\u2500\u2500 main
\u2502 \u2514\u2500\u2500 main
\u251c\u2500\u2500 obj
\u2502 \u2514\u2500\u2500 obj.pkl
\u251c\u2500\u2500 package_a
\u2502 \u251c\u2500\u2500 __init__.py
\u2502 \u2514\u2500\u2500 subpackage.py
\u251c\u2500\u2500 byteorder
\u2514\u2500\u2500 module_a.py
.data
extern_modules
python_version
serialization_id
version
main
main
obj
obj.pkl
package_a
__init__.py
subpackage.py
byteorder
module_a.py
"""
)
export_include = dedent(
"""\
\u251c\u2500\u2500 obj
\u2502 \u2514\u2500\u2500 obj.pkl
\u2514\u2500\u2500 package_a
\u2514\u2500\u2500 subpackage.py
obj
obj.pkl
package_a
subpackage.py
"""
)
import_exclude = dedent(
"""\
\u251c\u2500\u2500 .data
\u2502 \u251c\u2500\u2500 extern_modules
\u2502 \u251c\u2500\u2500 python_version
\u2502 \u251c\u2500\u2500 serialization_id
\u2502 \u2514\u2500\u2500 version
\u251c\u2500\u2500 main
\u2502 \u2514\u2500\u2500 main
\u251c\u2500\u2500 obj
\u2502 \u2514\u2500\u2500 obj.pkl
\u251c\u2500\u2500 package_a
\u2502 \u251c\u2500\u2500 __init__.py
\u2502 \u2514\u2500\u2500 subpackage.py
\u251c\u2500\u2500 byteorder
\u2514\u2500\u2500 module_a.py
.data
extern_modules
python_version
serialization_id
version
main
main
obj
obj.pkl
package_a
__init__.py
subpackage.py
byteorder
module_a.py
"""
)

View File

@ -25,16 +25,16 @@ class TestResources(PackageTestCase):
with PackageExporter(buffer) as pe:
# Layout looks like:
# package
# |-- one/
# | |-- a.txt
# | |-- b.txt
# | |-- c.txt
# | +-- three/
# | |-- d.txt
# | +-- e.txt
# +-- two/
# |-- f.txt
# +-- g.txt
# ├── one/
# │ ├── a.txt
# │ ├── b.txt
# │ ├── c.txt
# │ └── three/
# │ ├── d.txt
# │ └── e.txt
# └── two/
# ├── f.txt
# └── g.txt
pe.save_text("one", "a.txt", "hello, a!")
pe.save_text("one", "b.txt", "hello, b!")
pe.save_text("one", "c.txt", "hello, c!")

View File

@ -15679,7 +15679,7 @@ dedent """
def test_unicode_comments(self):
@torch.jit.script
def test(self, a):
# shrug
# 🤷🤷🤷🤷
return torch.nn.functional.relu(a)
def test_get_set_state_with_tensors(self):

View File

@ -70,7 +70,7 @@ class TestFuser(JitTestCase):
@unittest.skipIf(IS_SANDCASTLE, "NYI: fuser CPU support for Sandcastle")
@enable_cpu_fuser
def test_abs_cpu_unicode_temp_dir(self):
with TemporaryDirectoryName(suffix='\u4e2d\u6587') as dname:
with TemporaryDirectoryName(suffix='中文') as dname:
shell_env = os.environ.copy()
shell_env['TMP'] = dname
cmd = [sys.executable, os.path.basename(__file__), type(self).__name__ + '.test_abs_cpu']

View File

@ -1950,7 +1950,7 @@ class TestLinalg(TestCase):
# if out tensor with floating dtype is passed for complex output an error is thrown
if not dtype.is_complex:
# The characteristic equation is p(lambda) = lambda^2 - 2lambda + 5 = 0, with roots lambda = 1[+-]2i
# The characteristic equation is p(λ) = λ^2 2λ + 5 = 0, with roots λ = 1±2i
a = torch.tensor([[3., -2.], [4., -1.]], dtype=dtype, device=device)
out0 = torch.empty(0, device=device, dtype=dtype)
out1 = torch.empty(0, device=device, dtype=dtype)
@ -2117,7 +2117,7 @@ class TestLinalg(TestCase):
# if out tensor with floating dtype is passed for complex output an error is thrown
if not dtype.is_complex:
# The characteristic equation is p(lambda) = lambda^2 - 2lambda + 5 = 0, with roots lambda = 1[+-]2i
# The characteristic equation is p(λ) = λ^2 2λ + 5 = 0, with roots λ = 1±2i
a = torch.tensor([[3., -2.], [4., -1.]], dtype=dtype, device=device)
out = torch.empty(0, device=device, dtype=dtype)
with self.assertRaisesRegex(RuntimeError, "Expected eigenvalues to be safely castable"):

View File

@ -428,7 +428,7 @@ class TestPublicBindings(TestCase):
def test_correct_module_names(self):
'''
An API is considered public, if its `__module__` starts with `torch.`
and there is no name in `__module__` or the object itself that starts with "_".
and there is no name in `__module__` or the object itself that starts with _.
Each public package should either:
- (preferred) Define `__all__` and all callables and classes in there must have their
`__module__` start with the current submodule's path. Things not in `__all__` should

View File

@ -924,7 +924,7 @@ class TestSerialization(TestCase, SerializationMixin):
test(fname)
if IS_FILESYSTEM_UTF8_ENCODING:
with TemporaryDirectoryName(suffix='\u975eASCII\u30d1\u30b9') as dname:
with TemporaryDirectoryName(suffix='非ASCIIパス') as dname:
with TemporaryFileName(dir=dname) as fname:
test(fname)

View File

@ -8046,7 +8046,7 @@ class TestTorch(TestCase):
assert_with_filename(fname)
if IS_FILESYSTEM_UTF8_ENCODING:
with TemporaryDirectoryName(suffix='\u4e2d\u6587') as dname, TemporaryFileName(dir=dname) as fname:
with TemporaryDirectoryName(suffix='中文') as dname, TemporaryFileName(dir=dname) as fname:
assert_with_filename(fname)
def test_torch_from_file(self):
@ -8077,7 +8077,7 @@ class TestTorch(TestCase):
assert_with_filename(fname)
if IS_FILESYSTEM_UTF8_ENCODING:
with TemporaryDirectoryName(suffix='\u4e2d\u6587') as dname, TemporaryFileName(dir=dname) as fname:
with TemporaryDirectoryName(suffix='中文') as dname, TemporaryFileName(dir=dname) as fname:
assert_with_filename(fname)
def test_print(self):

View File

@ -744,7 +744,7 @@ def slice_forward(
raise RuntimeError("slice step must be positive")
start_val = start if start is not None else 0
end_val = end if end is not None else sys.maxsize # 2^63 - 1
end_val = end if end is not None else sys.maxsize # 2^63 1
if start_val < 0:
start_val += sizes[dim]

View File

@ -57,7 +57,7 @@ def list_cmp(op: Callable[[Any, Any], bool], left: Sequence[Any], right: Sequenc
def dropwhile(predicate, iterable):
# dropwhile(lambda x: x<5, [1,4,6,4,1]) -> 6 4 1
# dropwhile(lambda x: x<5, [1,4,6,4,1]) 6 4 1
iterable = iter(iterable)
for x in iterable:
if not predicate(x):

View File

@ -5,13 +5,13 @@ class ExportErrorType(Enum):
# User providing invalid inputs to either tracer, or other public facing APIs
INVALID_INPUT_TYPE = 1
# User returning values from their models that we don't support.
# User returning values from their models that we dont support.
INVALID_OUTPUT_TYPE = 2
# Generated IR does not conform to Export IR Specification.
VIOLATION_OF_SPEC = 3
# User's code contains types and functionalities we don't support.
# Users code contains types and functionalities we dont support.
NOT_SUPPORTED = 4
# User's code didn't provide necessary details for us to successfully trace and export.

View File

@ -498,7 +498,7 @@ def get_tangents_in_dims(input_dims, tangents):
# in_dims = 0
# vmap(Sum.apply, in_dims)(x)
#
# Let's assume for a moment that we didn't vmap setup_context in VmappedSum:
# Lets assume for a moment that we didnt vmap setup_context in VmappedSum:
#
# class VmappedSum(torch.autograd.Function):
# @staticmethod
@ -519,7 +519,7 @@ def get_tangents_in_dims(input_dims, tangents):
# return gx
#
# We end up saving [B, 4] as x_shape. In the backward, gy has shape [B],
# and we're doing:
# and were doing:
#
# def backward_no_context(gy):
# return gy.expand([B, 4])

View File

@ -62,8 +62,8 @@ class LiveRange:
Invariant: begin <= end
"""
begin: float # int | +/-inf
end: float # int | +/-inf
begin: float # int | ±inf
end: float # int | ±inf
def contains(self, other: LiveRange):
"""Is other entirely within self"""

View File

@ -5373,7 +5373,7 @@ def meta__scaled_dot_product_flash_attention_for_cpu_backward(
scale: Optional[float] = None,
):
# cpus's grad layout is different from cuda's,
# i.e. (batch_size, seq_len,num_heads, head_dim)
# i.e. (batch_size, seq_lennum_heads, head_dim
batch_size = query.size(0)
num_heads = query.size(1)
head_dim = query.size(3)

View File

@ -2008,7 +2008,7 @@ def min_scalar_type(a: ArrayLike, /):
from ._dtypes import DType
if a.numel() > 1:
# numpy docs: "For non-scalar array a, returns the vector's dtype unmodified."
# numpy docs: "For non-scalar array a, returns the vectors dtype unmodified."
return DType(a.dtype)
if a.dtype == torch.bool:

View File

@ -485,7 +485,7 @@ def _make_alias(fn, name):
"""
This function defines an alias of another function and sets its __name__ argument.
It also sets its __module__ argument to the module of the caller.
Note that when naively doing `alias = fn`, we have that `alias.__name__ == "fn"`, and
Note that when naïvely doing `alias = fn`, we have that `alias.__name__ == "fn"`, and
`alias.__module__ == fn.__module__`.
"""

View File

@ -600,7 +600,7 @@ def margin_ranking_loss(
margin: float = 0.0,
reduction: str = "mean",
) -> TensorLikeType:
# loss_without_reduction = max(0, -target * (input1 - input2) + margin)
# loss_without_reduction = max(0, target * (input1 input2) + margin)
if input1.ndim != input2.ndim or input1.ndim != target.ndim:
raise RuntimeError(
"margin_ranking_loss : All input tensors should have same dimension but got sizes: "

View File

@ -116,7 +116,7 @@ def i1e(a: TensorLikeType) -> TensorLikeType:
type_promotion_kind=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.INT_TO_FLOAT,
)
def log_ndtr(a: TensorLikeType) -> TensorLikeType:
# Note: M_SQRT1_2 is the value of 1 / sqrt(2)
# Note: M_SQRT1_2 is the value of 1 / √2
M_SQRT1_2 = 0.707106781186547524400844362104849039
t = a * M_SQRT1_2
return torch.where(
@ -185,7 +185,7 @@ def multigammaln(a: TensorLikeType, p: int) -> TensorLikeType:
type_promotion_kind=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.INT_TO_FLOAT,
)
def ndtr(a: TensorLikeType) -> TensorLikeType:
# Note: M_SQRT1_2 is the value of 1 / sqrt(2)
# Note: M_SQRT1_2 is the value of 1 / √2
M_SQRT1_2 = 0.707106781186547524400844362104849039
a_sqrt_2 = a * M_SQRT1_2
return (1 + torch.erf(a_sqrt_2)) * 0.5

View File

@ -2305,8 +2305,8 @@ Keyword Args:
times each observation should be repeated. Its numel must equal the number of columns of :attr:`input`.
Must have integral dtype. Ignored if ``None``. Defaults to ``None``.
aweights (tensor, optional): A Scalar or 1D array of observation vector weights.
These relative weights are typically large for observations considered "important" and smaller for
observations considered less "important". Its numel must equal the number of columns of :attr:`input`.
These relative weights are typically large for observations considered important and smaller for
observations considered less important. Its numel must equal the number of columns of :attr:`input`.
Must have floating point dtype. Ignored if ``None``. Defaults to ``None``.
Returns:
@ -4773,7 +4773,7 @@ This is detailed in the "Keyword Arguments" section below.
The gradient is estimated by estimating each partial derivative of :math:`g` independently. This estimation is
accurate if :math:`g` is in :math:`C^3` (it has at least 3 continuous derivatives), and the estimation can be
improved by providing closer samples. Mathematically, the value at each interior point of a partial derivative
is estimated using `Taylor's theorem with remainder <https://en.wikipedia.org/wiki/Taylor%27s_theorem>`_.
is estimated using `Taylors theorem with remainder <https://en.wikipedia.org/wiki/Taylor%27s_theorem>`_.
Letting :math:`x` be an interior point with :math:`x-h_l` and :math:`x+h_r` be points neighboring
it to the left and right respectively, :math:`f(x+h_r)` and :math:`f(x-h_l)` can be estimated using:

View File

@ -79,12 +79,12 @@ class DTypeWithConstraints:
* `quant_min_lower_bound` and `quant_max_upper_bound`: Lower and upper
bounds for the minimum and maximum quantized values respectively. If
the QConfig's `quant_min` and `quant_max` fall outside this range,
the QConfigs `quant_min` and `quant_max` fall outside this range,
then the QConfig will be ignored.
* `scale_min_lower_bound` and `scale_max_upper_bound`: Lower and upper
bounds for the minimum and maximum scale values respectively. If the
QConfig's minimum scale value (currently exposed as `eps`) falls below
QConfigs minimum scale value (currently exposed as `eps`) falls below
the lower bound, then the QConfig will be ignored. Note that the upper
bound is currently not enforced.
@ -130,7 +130,7 @@ class DTypeConfig:
dtypes here are the same as the semantics of the dtypes specified in
the observers.
These dtypes are matched against the ones specified in the user's
These dtypes are matched against the ones specified in the users
QConfig. If there is a match, and the QConfig satisfies the constraints
specified in the DTypeConfig (if any), then we will quantize the given
pattern using this DTypeConfig. Otherwise, the QConfig is ignored and

View File

@ -187,7 +187,7 @@ def full(sharding_spec: ShardingSpec,
process_group=None,
init_rrefs=False) -> ShardedTensor:
"""
Creates a :class:`ShardedTensor` filled with fill_value. The tensor's dtype
Creates a :class:`ShardedTensor` filled with fill_value. The tensors dtype
is inferred from fill_value. If dtype is specified, it will override the
inferred type from fill_value. Needs to be called on all ranks in an SPMD fashion.
Args:
@ -195,7 +195,7 @@ def full(sharding_spec: ShardingSpec,
describing how to shard the Tensor.
size (int...): a list, tuple, or `torch.Size` of integers defining the shape of the
output tensor.
fill_value (Scalar) - the value to fill the output tensor with.
fill_value (Scalar) the value to fill the output tensor with.
Keyword args:
dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).

View File

@ -117,7 +117,7 @@ def _handle_col_wise_sharding_base(
padding_idx: If specified, the entries at padding_idx do
not contribute to the gradient; therefore, the embedding
vector at padding_idx is not updated during training,
i.e. it remains as a fixed "pad".
i.e. it remains as a fixed pad.
Note that the embedding vector at padding_idx is
excluded from the reduction.
@ -312,7 +312,7 @@ def _handle_row_wise_mask(gather_inp, padding_idx, weight, world_size, rank):
padding_idx: If specified, the entries at padding_idx do
not contribute to the gradient; therefore, the embedding
vector at padding_idx is not updated during training,
i.e. it remains as a fixed "pad".
i.e. it remains as a fixed pad.
Note that the embedding vector at padding_idx is
excluded from the reduction.
weight: weight tensor of Embedding look-up table.

View File

@ -202,7 +202,7 @@ def _handle_col_wise_sharding(
padding_idx: If specified, the entries at padding_idx do
not contribute to the gradient; therefore, the embedding
vector at padding_idx is not updated during training,
i.e. it remains as a fixed "pad".
i.e. it remains as a fixed pad.
pg: process group.
Returns: final result of lookup.
@ -250,7 +250,7 @@ def _handle_row_wise_sharding(
padding_idx: If specified, the entries at padding_idx do
not contribute to the gradient; therefore, the embedding
vector at padding_idx is not updated during training,
i.e. it remains as a fixed "pad".
i.e. it remains as a fixed pad.
rank: # of cuda process.
pg: process group.

View File

@ -268,7 +268,7 @@ def _handle_col_wise_sharding(
padding_idx: If specified, the entries at padding_idx do
not contribute to the gradient; therefore, the embedding
vector at padding_idx is not updated during training,
i.e. it remains as a fixed "pad".
i.e. it remains as a fixed pad.
Note that the embedding vector at padding_idx is
excluded from the reduction.
pg: process group.
@ -342,7 +342,7 @@ def _handle_row_wise_sharding(
padding_idx: If specified, the entries at padding_idx do
not contribute to the gradient; therefore, the embedding
vector at padding_idx is not updated during training,
i.e. it remains as a fixed "pad".
i.e. it remains as a fixed pad.
Note that the embedding vector at padding_idx is
excluded from the reduction.
rank: # of cuda process.

View File

@ -124,7 +124,7 @@ class EtcdRendezvousHandler(RendezvousHandler):
| | (default 600s) |
+--------------------------------------------+--------------------------+
| last_call_timeout | additional wait amount |
| | ("last call") after min |
| | (last call) after min |
| | number of workers has |
| | been reached (defaults |
| | to 30s) |

View File

@ -4,7 +4,7 @@
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
"""Implements "Block Partitions of Sequences" by Imre B\u00e1r\u00e1ny et al.
"""Implements "Block Partitions of Sequences" by Imre Bárány et al.
Paper: https://arxiv.org/pdf/1308.2452.pdf
@ -18,7 +18,7 @@ def solve(sequence: List[int], partitions: int = 1) -> List[List[int]]:
"""Splits a sequence into several partitions to minimize variance for each
partition.
The result might not be optimal. However, it can be done only in O(kn\u00b3),
The result might not be optimal. However, it can be done only in O(kn³),
where k is the number of partitions and n is the length of the sequence.
"""
@ -51,14 +51,14 @@ def solve(sequence: List[int], partitions: int = 1) -> List[List[int]]:
while True:
"""
(1) Fix p element-of [k] with M(P) = bp. So Bp is a maximal block of P.
(1) Fix p [k] with M(P) = bp. So Bp is a maximal block of P.
"""
# max_size: M(P)
max_size, p = max(leaderboard())
while True:
"""
(2) If M(P) <= m(P) + 1, then stop.
(2) If M(P) m(P) + 1, then stop.
"""
# min_size: m(P)
min_size, q = min(leaderboard())
@ -67,7 +67,7 @@ def solve(sequence: List[int], partitions: int = 1) -> List[List[int]]:
return [sequence[i:j] for i, j in zip([0] + splits[:-1], splits)]
"""
(3) If M(P) > m(P) + 1, then let m(P) = bq for the q element-of [k] which is
(3) If M(P) > m(P) + 1, then let m(P) = bq for the q [k] which is
closest to p (ties broken arbitrarily). Thus Bq is a minimal block
of P. Let Bh be the block next to Bq between Bp and Bq. (Note that
Bh is a non-empty block: if it were, then m(P) = 0 and we should
@ -75,21 +75,21 @@ def solve(sequence: List[int], partitions: int = 1) -> List[List[int]]:
"""
if p < q:
"""
So either p < q and then h = q-1 and we define P * by moving
the last element from Bh = Bq-1 to Bq,
So either p < q and then h = q1 and we define P by moving
the last element from Bh = Bq1 to Bq,
"""
h = q - 1
splits[h] -= 1
else:
"""
or q < p, and then h = q + 1 and P * is obtained by moving the
or q < p, and then h = q + 1 and P is obtained by moving the
first element of Bh = Bq+1 to Bq.
"""
h = q + 1
splits[q] += 1
"""
Set P = P * . If p = h, then go to (1), else go to (2).
Set P = P . If p = h, then go to (1), else go to (2).
"""
if p == h:
break

View File

@ -157,30 +157,30 @@ class Pipeline:
exc_info: Optional[ExcInfo] = None
# With checkpointing, the autograd graph looks like this diagram:
# +-----+------+
# | Copy |
# +-----+------+ (fence)
# - - - + - - - - - - - - -
# | (compute)
# +-----+------+
# | Wait | [1] Synchronize the current stream with the copy stream.
# +-----+------+
# +-----+------+
# | Checkpoint | [2] Compute a partition within checkpointing.
# +-----+------+
# +-----+------+
# | Wait | [3] Synchronize the copy stream with the current stream.
# +-----+------+
# + - - - +
# | +-----+-----+
# | | Recompute | [4] Schedule the recomputation at backpropagation.
# | +-----+-----+
# + - - - +
# |
# - - - + - - - - - - - - -
# +-----+------+ (fence)
# | Copy |
# +-----+------+
# ┌─────┸──────┐
# │ Copy │
# └─────┰──────┘ (fence)
# ─ ─ ─ ╂ ─ ─ ─ ─ ─ ─ ─ ─ ─
# (compute)
# ┌─────┸──────┐
# │ Wait │ [1] Synchronize the current stream with the copy stream.
# └─────┰──────┘
# ┌─────┸──────┐
# │ Checkpoint │ [2] Compute a partition within checkpointing.
# └─────┰──────┘
# ┌─────┸──────┐
# │ Wait │ [3] Synchronize the copy stream with the current stream.
# └─────┰──────┘
# ┠ ─ ─ ─ ┐
# ┃ ┌─────┴─────┐
# ┃ │ Recompute │ [4] Schedule the recomputation at backpropagation.
# ┃ └─────┬─────┘
# ┠ ─ ─ ─ ┘
#
# ─ ─ ─ ╂ ─ ─ ─ ─ ─ ─ ─ ─ ─
# ┌─────┸──────┐ (fence)
# │ Copy │
# └─────┰──────┘
for i, j in schedule:
batch = batches[i]
partition = partitions[j]

View File

@ -9,7 +9,7 @@ autograd engine. The shared context of three functions (:class:`PortalBlue`,
:class:`PortalOrange`, and :class:`PortalCopy`) out of the computation graph is
one of the most important feature of :mod:`torchpipe.skip`.
The metaphor is inspired by Portal(tm) from Valve.
The metaphor is inspired by Portal from Valve.
"""
from typing import List, Optional, Tuple

View File

@ -362,16 +362,16 @@ def verify_skippables(module: nn.Sequential) -> None:
# Layer3 pops "1to3".
nn.Sequential(Layer1(), Layer2())
# +---- ?
# └──── ?
nn.Sequential(Layer2(), Layer3())
# ? ----+
# ? ────┘
nn.Sequential(Layer1(), Layer2(), Layer3(), Layer3())
# +-------------------+ ^^^^^^
# └───────────────────┘ ^^^^^^
nn.Sequential(Layer1(), Layer1(), Layer2(), Layer3())
# ^^^^^^ +-------------------+
# ^^^^^^ └───────────────────┘
To use the same name for multiple skip tensors, they must be isolated by
different namespaces. See :meth:`isolate()

View File

@ -152,7 +152,7 @@ class TGreatestUpperBound(Constraint):
self.rhs2 = rhs2
def __repr__(self):
return f'{self.res} = {self.rhs1}\u2294*{self.rhs2}'
return f'{self.res} = {self.rhs1}*{self.rhs2}'
def __eq__(self, other):
if isinstance(other, TGreatestUpperBound):
@ -180,7 +180,7 @@ class DGreatestUpperBound(Constraint):
self.rhs2 = rhs2
def __repr__(self):
return f'{self.res} = {self.rhs1}\u2294{self.rhs2}'
return f'{self.res} = {self.rhs1}{self.rhs2}'
def __eq__(self, other):
if isinstance(other, DGreatestUpperBound):

View File

@ -5,10 +5,10 @@ op_div = '/'
op_eq = '='
op_neq = '!='
op_imp = '=>'
op_matching = '\u22b3' # (contains)
op_matching = ''
op_consistency = '~'
op_precision = '\u2291' # (square image of or equal to)
op_leq = '\u2264' # less-than or equal to
op_precision = ''
op_leq = ''
op_lt = '<'
op_gt = '>'
op_mod = '%'

View File

@ -1450,7 +1450,7 @@ Keyword args:
out (Tensor, optional): output tensor. Ignored if `None`. Default: `None`.
dtype (:class:`torch.dtype`, optional): type used to perform the accumulation and the return.
If specified, :attr:`x` is cast to :attr:`dtype` before performing the operation,
and the returned tensor's type will be :attr:`dtype` if real and of its real counterpart if complex.
and the returned tensors type will be :attr:`dtype` if real and of its real counterpart if complex.
:attr:`dtype` may be complex if :attr:`x` is complex, otherwise it must be real.
:attr:`x` should be convertible without narrowing to :attr:`dtype`. Default: None

View File

@ -1012,7 +1012,7 @@ Args:
input (Tensor): the input tensor
dim (int or tuple of ints, optional): the dimension or dimensions to reduce.
Default: None that is equivalent to ``tuple(range(input.ndim))``.
unbiased (bool): when True, use Bessel's correction, otherwise, compute
unbiased (bool): when True, use Bessels correction, otherwise, compute
the uncorrected sample variance.
Keyword args:
@ -1148,7 +1148,7 @@ Args:
input (Tensor): the input tensor
dim (int or tuple of ints, optional): the dimension or dimensions to reduce.
Default: None that is equivalent to ``tuple(range(input.ndim))``.
unbiased (bool): when True, use Bessel's correction, otherwise, compute
unbiased (bool): when True, use Bessels correction, otherwise, compute
the uncorrected sample variance.
Keyword args:

View File

@ -210,7 +210,7 @@ ord (int, float, optional): the order of vector norm. Default: 2.
ord (int, float): the order of vector norm. Default: 2.
See :func:`torch.linalg.vector_norm` for a list of supported norms.""",
unbiased="""\
unbiased (bool): when True, use Bessel's correction, otherwise, compute
unbiased (bool): when True, use Bessels correction, otherwise, compute
the uncorrected sample variance.""",
eps="""\
eps (float, optional): small value to avoid division by zero. Default: {default}.""",

View File

@ -186,7 +186,7 @@ Example::
def nested_tensor(tensor_list, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False) -> Tensor:
r"""
Constructs a nested tensor with no autograd history (also known as a "leaf tensor", see
Constructs a nested tensor with no autograd history (also known as a leaf tensor, see
:ref:`Autograd mechanics <autograd-mechanics>`) from :attr:`tensor_list` a list of tensors.
Args:

View File

@ -20,7 +20,7 @@ class AdaptiveLogSoftmaxWithLoss(Module):
As described in
`Efficient softmax approximation for GPUs by Edouard Grave, Armand Joulin,
Moustapha Ciss\u00e9, David Grangier, and Herv\u00e9 J\u00e9gou
Moustapha Cissé, David Grangier, and Hervé gou
<https://arxiv.org/abs/1609.04309>`__.
Adaptive softmax is an approximate strategy for training models with large

View File

@ -204,7 +204,7 @@ class Conv1d(_ConvNd):
amount of implicit padding applied on both sides.
* :attr:`dilation` controls the spacing between the kernel points; also
known as the \uue0 trous algorithm. It is harder to describe, but this `link`_
known as the à trous algorithm. It is harder to describe, but this `link`_
has a nice visualization of what :attr:`dilation` does.
{groups_note}
@ -341,7 +341,7 @@ class Conv2d(_ConvNd):
amount of implicit padding applied on both sides.
* :attr:`dilation` controls the spacing between the kernel points; also
known as the \u00e0 trous algorithm. It is harder to describe, but this `link`_
known as the à trous algorithm. It is harder to describe, but this `link`_
has a nice visualization of what :attr:`dilation` does.
{groups_note}
@ -483,7 +483,7 @@ class Conv3d(_ConvNd):
can be either a string {{'valid', 'same'}} or a tuple of ints giving the
amount of implicit padding applied on both sides.
* :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
* :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
{groups_note}
@ -690,7 +690,7 @@ class ConvTranspose1d(_ConvTransposeNd):
* :attr:`output_padding` controls the additional size added to one side
of the output shape. See note below for details.
* :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
* :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
It is harder to describe, but the link `here`_ has a nice visualization of what :attr:`dilation` does.
{groups_note}
@ -821,7 +821,7 @@ class ConvTranspose2d(_ConvTransposeNd):
* :attr:`output_padding` controls the additional size added to one side
of the output shape. See note below for details.
* :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
* :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
It is harder to describe, but the link `here`_ has a nice visualization of what :attr:`dilation` does.
{groups_note}
@ -978,7 +978,7 @@ class ConvTranspose3d(_ConvTransposeNd):
* :attr:`output_padding` controls the additional size added to one side
of the output shape. See note below for details.
* :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
* :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
It is harder to describe, but the link `here`_ has a nice visualization of what :attr:`dilation` does.
{groups_note}

View File

@ -41,7 +41,7 @@ class Fold(Module):
sides for :attr:`padding` number of points for each dimension before
reshaping.
* :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
* :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
Args:
@ -186,7 +186,7 @@ class Unfold(Module):
sides for :attr:`padding` number of points for each dimension before
reshaping.
* :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
* :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
Args:

View File

@ -49,7 +49,7 @@ class Tag(enum.Enum):
class PatchedPropertyBag(sarif.PropertyBag):
"""Key/value pairs that provide additional information about the object.
The definition of PropertyBag via SARIF spec is "A property bag is an object (section 3.6)
The definition of PropertyBag via SARIF spec is "A property bag is an object (§3.6)
containing an unordered set of properties with arbitrary names." However it is not
reflected in the json file, and therefore not captured by the python representation.
This patch adds additional **kwargs to the `__init__` method to allow recording

View File

@ -26,13 +26,13 @@ def export_as_test_case(
is as follows:
dir
\u251c\u2500\u2500 test_<name>
\u2502 \u251c\u2500\u2500 model.onnx
\u2502 \u2514\u2500\u2500 test_data_set_0
\u2502 \u251c\u2500\u2500 input_0.pb
\u2502 \u251c\u2500\u2500 input_1.pb
\u2502 \u251c\u2500\u2500 output_0.pb
\u2502 \u2514\u2500\u2500 output_1.pb
test_<name>
model.onnx
test_data_set_0
input_0.pb
input_1.pb
output_0.pb
output_1.pb
Args:
model_bytes: The ONNX model in bytes.
@ -80,13 +80,13 @@ def load_test_case(dir: str) -> Tuple[bytes, Any, Any]:
should be as follows:
dir
\u251c\u2500\u2500 test_<name>
\u2502 \u251c\u2500\u2500 model.onnx
\u2502 \u2514\u2500\u2500 test_data_set_0
\u2502 \u251c\u2500\u2500 input_0.pb
\u2502 \u251c\u2500\u2500 input_1.pb
\u2502 \u251c\u2500\u2500 output_0.pb
\u2502 \u2514\u2500\u2500 output_1.pb
test_<name>
model.onnx
test_data_set_0
input_0.pb
input_1.pb
output_0.pb
output_1.pb
Args:
dir: The directory containing the test case.

View File

@ -785,7 +785,7 @@ def nan_to_num(g: jit_utils.GraphContext, input, nan, posinf, neginf):
)
# For None values of posinf, neginf we use the greatest/lowest finite
# value representable by input's dtype.
# value representable by inputs dtype.
finfo = torch.finfo(input_dtype)
if posinf is None:
posinf = finfo.max

View File

@ -1379,10 +1379,10 @@ def normal(
pin_memory=None,
):
# If you can sample from a given distribution with mean 0 and variance 1, then you can easily sample from a
# scale-location transformation of that distribution, which has mean mu and variance sigma's square. If x is a sample
# scale-location transformation of that distribution, which has mean μ and variance σ's square. If x is a sample
# from a mean 0 and variance 1 distribution then
# sigma x+mu
# is a sample with mean mu and variance sigma's square.
# σx+μ
# is a sample with mean μ and variance σ's square.
if sizes is not None and not symbolic_helper._is_none(sizes):
mean = opset9.expand(g, mean, sizes, None)
result = opset9.mul(g, std, g.op("RandomNormalLike", mean))

View File

@ -1020,7 +1020,7 @@ class GraphInfoPrettyPrinter:
else ""
)
return f"{node_count} {'X' if has_mismatch else chr(0x2713)} {error_node_kind}"
return f"{node_count} {'X' if has_mismatch else ''} {error_node_kind}"
@_beartype.beartype
def _graph_id_segment_str(self) -> str:
@ -1148,13 +1148,13 @@ class OnnxTestCaseRepro:
structure is as follows:
dir
\u251c\u2500\u2500 test_<name>
\u2502 \u251c\u2500\u2500 model.onnx
\u2502 \u2514\u2500\u2500 test_data_set_0
\u2502 \u251c\u2500\u2500 input_0.pb
\u2502 \u251c\u2500\u2500 input_1.pb
\u2502 \u251c\u2500\u2500 output_0.pb
\u2502 \u2514\u2500\u2500 output_1.pb
test_<name>
model.onnx
test_data_set_0
input_0.pb
input_1.pb
output_0.pb
output_1.pb
Args:
proto: ONNX model proto.
@ -1244,19 +1244,19 @@ class GraphInfo:
Example::
==================================== Tree: =====================================
5 X __2 X __1 \u2713
5 X __2 X __1
id: | id: 0 | id: 00
| |
| |__1 X (aten::relu)
| id: 01
|
|__3 X __1 \u2713
|__3 X __1
id: 1 | id: 10
|
|__2 X __1 X (aten::relu)
id: 11 | id: 110
|
|__1 \u2713
|__1
id: 111
=========================== Mismatch leaf subgraphs: ===========================
['01', '110']
@ -1354,13 +1354,13 @@ class GraphInfo:
The repro directory will contain the following files::
dir
\u251c\u2500\u2500 test_<name>
\u2502 \u251c\u2500\u2500 model.onnx
\u2502 \u2514\u2500\u2500 test_data_set_0
\u2502 \u251c\u2500\u2500 input_0.pb
\u2502 \u251c\u2500\u2500 input_1.pb
\u2502 \u251c\u2500\u2500 output_0.pb
\u2502 \u2514\u2500\u2500 output_1.pb
test_<name>
model.onnx
test_data_set_0
input_0.pb
input_1.pb
output_0.pb
output_1.pb
Args:
repro_dir: The directory to export the repro files to. Defaults to current
@ -1825,19 +1825,19 @@ def find_mismatch(
Greatest absolute difference: 0.2328854203224182 at index (1, 2) (up to 1e-07 allowed)
Greatest relative difference: 0.699536174352349 at index (1, 3) (up to 0.001 allowed)
==================================== Tree: =====================================
5 X __2 X __1 \u2713
5 X __2 X __1
id: | id: 0 | id: 00
| |
| |__1 X (aten::relu)
| id: 01
|
|__3 X __1 \u2713
|__3 X __1
id: 1 | id: 10
|
|__2 X __1 X (aten::relu)
id: 11 | id: 110
|
|__1 \u2713
|__1
id: 111
=========================== Mismatch leaf subgraphs: ===========================
['01', '110']

View File

@ -67,16 +67,13 @@ class Directory:
return "".join(str_list)
def _stringify_tree(
self,
str_list: List[str],
preamble: str = "",
dir_ptr: str = "\u2500\u2500\u2500 ",
self, str_list: List[str], preamble: str = "", dir_ptr: str = "─── "
):
"""Recursive method to generate print-friendly version of a Directory."""
space = " "
branch = "\u2502 "
tee = "\u251c\u2500\u2500 "
last = "\u2514\u2500\u2500 "
branch = " "
tee = "├── "
last = "└── "
# add this directory's representation
str_list.append(f"{preamble}{dir_ptr}{self.name}\n")

View File

@ -748,7 +748,7 @@ Computes the minimum 4-term Blackman-Harris window according to Nuttall.
.. math::
w_n = 1 - 0.36358 \cos{(z_n)} + 0.48917 \cos{(2z_n)} - 0.13659 \cos{(3z_n)} + 0.01064 \cos{(4z_n)}
where ``z_n = 2 \u03c0 n/ M``.
where ``z_n = 2 π n/ M``.
""",
"""
@ -766,12 +766,12 @@ Keyword args:
References::
- A. Nuttall, "Some windows with very good sidelobe behavior,"
- A. Nuttall, Some windows with very good sidelobe behavior,
IEEE Transactions on Acoustics, Speech, and Signal Processing, vol. 29, no. 1, pp. 84-91,
Feb 1981. https://doi.org/10.1109/TASSP.1981.1163506
- Heinzel G. et al., "Spectrum and spectral density estimation by the Discrete Fourier transform (DFT),
including a comprehensive list of window functions and some new flat-top windows",
- Heinzel G. et al., Spectrum and spectral density estimation by the Discrete Fourier transform (DFT),
including a comprehensive list of window functions and some new flat-top windows,
February 15, 2002 https://holometer.fnal.gov/GH_FFT.pdf
Examples::

View File

@ -1036,7 +1036,7 @@ hermite_polynomial_h = _add_docstr(_special.special_hermite_polynomial_h,
r"""
hermite_polynomial_h(input, n, *, out=None) -> Tensor
Physicist's Hermite polynomial :math:`H_{n}(\text{input})`.
Physicists Hermite polynomial :math:`H_{n}(\text{input})`.
If :math:`n = 0`, :math:`1` is returned. If :math:`n = 1`, :math:`\text{input}`
is returned. Otherwise, the recursion:
@ -1059,7 +1059,7 @@ hermite_polynomial_he = _add_docstr(_special.special_hermite_polynomial_he,
r"""
hermite_polynomial_he(input, n, *, out=None) -> Tensor
Probabilist's Hermite polynomial :math:`He_{n}(\text{input})`.
Probabilists Hermite polynomial :math:`He_{n}(\text{input})`.
If :math:`n = 0`, :math:`1` is returned. If :math:`n = 1`, :math:`\text{input}`
is returned. Otherwise, the recursion:

View File

@ -0,0 +1 @@