[BE]: Update ruff to 0.4.1 (#124549)

Update ruff to 0.4.1 . This version fixes a lot false negatives/false positives, is 20-40% faster, and has various other bug fixes. Below is a before and after table showing the execution time of ruff lint and ruff format in milliseconds courtesy of https://astral.sh/blog/ruff-v0.4.0 | Repository | Linter (v0.3) | Linter (v0.4) | Formatter (v0.3) | Formatter (v0.4) | |----------------------------------------------------|---------------|---------------|------------------|------------------| | [pytorch/pytorch](https://github.com/pytorch/pytorch) | 328.7 | 251.8 | 351.1 | 274.9 | Pull Request resolved: https://github.com/pytorch/pytorch/pull/124549 Approved by: https://github.com/ezyang
2025-12-06 12:20:52 +01:00 · 2024-04-21 14:06:20 +00:00 · 2024-04-21 14:06:20 +00:00 · 5a1216bb2e
commit 5a1216bb2e
parent f34905f61d
70 changed files with 204 additions and 387 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -2168,7 +2168,7 @@ init_command = [
    'python3',
    'tools/linter/adapters/pip_init.py',
    '--dry-run={{DRYRUN}}',
-    'ruff==0.3.0',
+    'ruff==0.4.1',
 ]
 is_formatter = true

--- a/benchmarks/distributed/pipeline/pipe.py
+++ b/benchmarks/distributed/pipeline/pipe.py
@ -187,9 +187,7 @@ def train(lm_dataloader, model, criterion, optimizer, vocab_size, args):
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print(
-                "| batch {:5d} | wps {:5.2f} | loss {:5.2f} | ppl {:8.2f}".format(
-                    i, word_counter / elapsed, cur_loss, math.exp(cur_loss)
-                )
+                f"| batch {i:5d} | wps {word_counter / elapsed:5.2f} | loss {cur_loss:5.2f} | ppl {math.exp(cur_loss):8.2f}"
            )
            word_counter = 0
            total_loss = 0
--- a/benchmarks/operator_benchmark/benchmark_caffe2.py
+++ b/benchmarks/operator_benchmark/benchmark_caffe2.py
@ -185,9 +185,7 @@ def generate_c2_test_from_ops(ops_metadata, bench_op, tags):
        op = bench_op()
        op.init(**test_attrs)
        test_name = op.test_name("short")
-        input_config = "Shapes: {}, Type: {}, Args: {}".format(
-            op_metadata.input_dims, op_metadata.input_types, str(op_metadata.args)
-        )
+        input_config = f"Shapes: {op_metadata.input_dims}, Type: {op_metadata.input_types}, Args: {str(op_metadata.args)}"
        test_config = TestConfig(test_name, input_config, tags, run_backward=False)
        if op is not None:
            create_caffe2_op_test_case(op, test_config)
--- a/scripts/analysis/format_test_csv.py
+++ b/scripts/analysis/format_test_csv.py
@ -34,7 +34,7 @@ hash = subprocess.check_output(

 out.writerow([hash, args.log_url, ""])

-with open(args.file, "r") as f:
+with open(args.file) as f:
    reader = csv.DictReader(f)
    for row in reader:
        if row["status"] not in {"failed", "error"}:
--- a/scripts/compile_tests/common.py
+++ b/scripts/compile_tests/common.py
@ -14,7 +14,6 @@ except ImportError:
    warnings.warn(
        "lxml was not found. `pip install lxml` to make this script run much faster"
    )
-from download_reports import download_reports


 def open_test_results(directory):
--- a/scripts/compile_tests/download_reports.py
+++ b/scripts/compile_tests/download_reports.py
@ -1,4 +1,3 @@
-import enum
 import json
 import os
 import pprint
--- a/scripts/compile_tests/update_failures.py
+++ b/scripts/compile_tests/update_failures.py
@ -48,7 +48,7 @@ def patch_file(

    # These are hand written skips
    extra_dynamo_skips = set()
-    with open(filename, "r") as f:
+    with open(filename) as f:
        start = False
        for text in f.readlines():
            text = text.strip()
--- a/scripts/diagnose_protobuf.py
+++ b/scripts/diagnose_protobuf.py
@ -71,13 +71,11 @@ protobuf via:
        https://github.com/google/protobuf/releases/
 """

-VERSION_MISMATCH = """
-Your python protobuf is of version {py_ver} but your native protoc version is of
-version {native_ver}. This will cause the installation to produce incompatible
+VERSION_MISMATCH = f"""
+Your python protobuf is of version {python_version} but your native protoc version is of
+version {native_version}. This will cause the installation to produce incompatible
 protobuf files. This is bad in general - consider installing the same version.
-""".format(
-    py_ver=python_version, native_ver=native_version
-)
+"""

 # Now, give actual recommendations
 if not python_protobuf_installed:
--- a/scripts/export/update_schema.py
+++ b/scripts/export/update_schema.py
@ -47,7 +47,7 @@ if __name__ == "__main__":
    if next_version is not None and next_version != commit.result["SCHEMA_VERSION"]:
        raise RuntimeError(
            f"Schema version is not updated from {commit.base['SCHEMA_VERSION']} to {next_version}.\n"
-            + f"Please either:\n"
+            + "Please either:\n"
            + "    1. update schema.py to not break compatibility.\n"
            + "    or 2. bump the schema version to the expected value.\n"
            + "    or 3. use --force-unsafe to override schema.yaml (not recommended).\n "
--- a/scripts/model_zoo/update-caffe2-models.py
+++ b/scripts/model_zoo/update-caffe2-models.py
@ -110,7 +110,7 @@ def generate_models():
        caffe2_model_dir = sc._caffe2_model_dir(model)
        onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
        subprocess.check_call(["echo", model])
-        with open(os.path.join(caffe2_model_dir, "value_info.json"), "r") as f:
+        with open(os.path.join(caffe2_model_dir, "value_info.json")) as f:
            value_info = f.read()
        subprocess.check_call(
            [
--- a/scripts/model_zoo/update-models-from-caffe2.py
+++ b/scripts/model_zoo/update-models-from-caffe2.py
@ -90,9 +90,7 @@ def download_onnx_model(model_name, zoo_dir, use_cache=True, only_local=False):
    try:
        download_file.close()
        print(
-            "Downloading ONNX model {} from {} and save in {} ...\n".format(
-                model_name, url, download_file.name
-            )
+            f"Downloading ONNX model {model_name} from {url} and save in {download_file.name} ...\n"
        )
        urlretrieve(url, download_file.name)
        with tarfile.open(download_file.name) as t:
@ -300,9 +298,7 @@ if __name__ == "__main__":
        )

        print(f"Deleteing old ONNX {onnx_model_name} model...")
-        for f in glob.glob(
-            os.path.join(onnx_model_dir, "model*".format(onnx_model_name))
-        ):
+        for f in glob.glob(os.path.join(onnx_model_dir, "model*".format())):
            os.remove(f)

        print(f"Serializing generated ONNX {onnx_model_name} model ...")
--- a/scripts/release_notes/apply_categories.py
+++ b/scripts/release_notes/apply_categories.py
@ -9,12 +9,12 @@ import commitlist
 category_csv = "results/category_data.csv"
 commitlist_csv = "results/commitlist.csv"

-with open(category_csv, "r") as category_data:
+with open(category_csv) as category_data:
    reader = csv.DictReader(category_data, commitlist.commit_fields)
    rows = list(reader)
    category_map = {row["commit_hash"]: row["category"] for row in rows}

-with open(commitlist_csv, "r") as commitlist_data:
+with open(commitlist_csv) as commitlist_data:
    reader = csv.DictReader(commitlist_data, commitlist.commit_fields)
    commitlist_rows = list(reader)

--- a/scripts/release_notes/classifier.py
+++ b/scripts/release_notes/classifier.py
@ -11,7 +11,6 @@ import common
 import pandas as pd
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 import torchtext
 from torchtext.functional import to_tensor
 from tqdm import tqdm
--- a/scripts/release_notes/commitlist.py
+++ b/scripts/release_notes/commitlist.py
@ -13,7 +13,6 @@ from common import (
    features_to_dict,
    frontend_categories,
    get_commit_data_cache,
-    get_features,
    run,
    topics,
 )
--- a/scripts/release_notes/common.py
+++ b/scripts/release_notes/common.py
@ -321,7 +321,7 @@ class _CommitDataCache:
        return self.data[commit]

    def read_from_disk(self):
-        with open(self.path, "r") as f:
+        with open(self.path) as f:
            data = json.load(f)
            data = {commit: dict_to_features(dct) for commit, dct in data.items()}
        return data
--- a/scripts/release_notes/namespace_check.py
+++ b/scripts/release_notes/namespace_check.py
@ -5,7 +5,6 @@ from os import path
 import torch

 # Import all utils so that getattr below can find them
-from torch.utils import bottleneck, checkpoint, model_zoo

 all_submod_list = [
    "",
@ -69,10 +68,10 @@ def run(args, submod):
        if not path.exists(new_filename):
            raise RuntimeError("New version data not collected")

-        with open(prev_filename, "r") as f:
+        with open(prev_filename) as f:
            prev_content = set(json.load(f))

-        with open(new_filename, "r") as f:
+        with open(new_filename) as f:
            new_content = set(json.load(f))

        if not args.show_all:
--- a/test/cpp_api_parity/functional_impl_check.py
+++ b/test/cpp_api_parity/functional_impl_check.py
@ -158,9 +158,7 @@ def compute_functional_name(test_params_dict):
        return test_params_dict["cpp_function_call"].split("(")[0].replace("F::", "")
    else:
        raise RuntimeError(
-            "`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{}".format(
-                pprint.pformat(test_params_dict)
-            )
+            f"`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{pprint.pformat(test_params_dict)}"  # noqa: B950
        )


@ -181,9 +179,7 @@ def compute_cpp_function_call(test_params_dict, arg_dict, functional_name):
        )
    else:
        raise RuntimeError(
-            "`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{}".format(
-                pprint.pformat(test_params_dict)
-            )
+            f"`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{pprint.pformat(test_params_dict)}"  # noqa: B950
        )


@ -221,12 +217,10 @@ def write_test_to_test_class(
        or "cpp_function_call" in test_params_dict
    ), (
        "To enable C++ API parity test, "
-        "`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{}. \n"
+        f"`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{pprint.pformat(test_params_dict)}. \n"  # noqa: B950
        "If you are interested in adding the C++ API parity test, please see:\n"
        "NOTE [How to check NN module / functional API parity between Python and C++ frontends]. \n"
        "If not, please add `test_cpp_api_parity=False` to the test params dict and file an issue about this."
-    ).format(
-        pprint.pformat(test_params_dict)
    )

    assert not (
@ -241,16 +235,14 @@ def write_test_to_test_class(

    assert hasattr(
        torch.nn.functional, functional_name
-    ), "`torch.nn.functional` doesn't have function `{}`. (Discovered while processing\n{}.)".format(
-        functional_name, pprint.pformat(test_params_dict)
-    )
+    ), f"`torch.nn.functional` doesn't have function `{functional_name}`. (Discovered while processing\n{pprint.pformat(test_params_dict)}.)"  # noqa: B950

    functional_full_name = "F::" + functional_name

    assert functional_full_name in parity_table["torch::nn::functional"], (
-        "Please add `{}` entry to `torch::nn::functional` section of `test/cpp_api_parity/parity-tracker.md`. "
-        "(Discovered while processing\n{}.)"
-    ).format(functional_full_name, pprint.pformat(test_params_dict))
+        f"Please add `{functional_full_name}` entry to `torch::nn::functional` section of `test/cpp_api_parity/parity-tracker.md`. "
+        f"(Discovered while processing\n{pprint.pformat(test_params_dict)}.)"
+    )

    for device in devices:
        test_params = process_test_params_for_functional(
--- a/test/cpp_api_parity/utils.py
+++ b/test/cpp_api_parity/utils.py
@ -376,9 +376,9 @@ NOTE [How to check NN module / functional API parity between Python and C++ fron

 def generate_error_msg(name, cpp_value, python_value):
    return (
-        "Parity test failed: {} in C++ has value: {}, "
-        "which does not match the corresponding value in Python: {}.\n{}"
-    ).format(name, cpp_value, python_value, MESSAGE_HOW_TO_FIX_CPP_PARITY_TEST_FAILURE)
+        f"Parity test failed: {name} in C++ has value: {cpp_value}, "
+        f"which does not match the corresponding value in Python: {python_value}.\n{MESSAGE_HOW_TO_FIX_CPP_PARITY_TEST_FAILURE}"
+    )


 def try_remove_folder(folder_path):
--- a/test/distributions/test_distributions.py
+++ b/test/distributions/test_distributions.py
@ -4741,9 +4741,7 @@ class TestDistributionShapes(DistributionsTestCase):
                    expected_shape = (
                        dist.batch_shape if dist.batch_shape else torch.Size()
                    )
-                    message = "{} example {}/{}, shape mismatch. expected {}, actual {}".format(
-                        Dist.__name__, i + 1, len(params), expected_shape, actual_shape
-                    )
+                    message = f"{Dist.__name__} example {i + 1}/{len(params)}, shape mismatch. expected {expected_shape}, actual {actual_shape}"  # noqa: B950
                    self.assertEqual(actual_shape, expected_shape, msg=message)
                except NotImplementedError:
                    continue
--- a/test/quantization/core/test_workflow_ops.py
+++ b/test/quantization/core/test_workflow_ops.py
@ -924,18 +924,13 @@ class TestFakeQuantizeOps(TestCase):

                self.assertTrue(
                    torch.allclose(dX_expected, dX_actual, rtol=tolerance, atol=tolerance),
-                    "Expected dX={} to match X.grad={}, X={}, s={}, z={}, dout={}, n_bits={}".format(
-                        dX_expected, dX_actual, X_curr, scale_curr, zero_point_curr, dout, n_bits))
+                    f"Expected dX={dX_expected} to match X.grad={dX_actual}, X={X_curr}, s={scale_curr}, z={zero_point_curr}, dout={dout}, n_bits={n_bits}")  # noqa: B950
                self.assertTrue(
                    torch.allclose(dScale_expected * grad_factor, dScale_actual, rtol=tolerance, atol=tolerance),
-                    "Expected dScale={} to match scale.grad={}, X={}, s={}, z={}, dout={}, n_bits={}".format(
-                        dScale_expected * grad_factor, dScale_actual,
-                        X_curr, scale_curr, zero_point_curr, dout, n_bits))
+                    f"Expected dScale={dScale_expected * grad_factor} to match scale.grad={dScale_actual}, X={X_curr}, s={scale_curr}, z={zero_point_curr}, dout={dout}, n_bits={n_bits}")  # noqa: B950
                self.assertTrue(
                    torch.allclose(dZeroPoint_expected * grad_factor, dZeroPoint_actual, rtol=tolerance, atol=tolerance),
-                    "Expected dZeroPoint={} to match zero_point.grad={}, X={}, s={}, z={}, dout={}, n_bits={}".format(
-                        dZeroPoint_expected * grad_factor, dZeroPoint_actual,
-                        X_curr, scale_curr, zero_point_curr, dout, n_bits))
+                    f"Expected dZeroPoint={dZeroPoint_expected * grad_factor} to match zero_point.grad={dZeroPoint_actual}, X={X_curr}, s={scale_curr}, z={zero_point_curr}, dout={dout}, n_bits={n_bits}")  # noqa: B950
                X_curr.grad.data.zero_()
                scale_curr.grad.data.zero_()
                zero_point_curr.grad.data.zero_()
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@ -4696,9 +4696,7 @@ Done""",
        )  # make it us which is profiler default
        print("Total time based on python measurements: ", _format_time(total_time_us))
        print(
-            "CPU time measurement python side overhead: {:.2f}%".format(
-                (total_time_us / prof.self_cpu_time_total - 1.0) * 100.0
-            )
+            f"CPU time measurement python side overhead: {(total_time_us / prof.self_cpu_time_total - 1.0) * 100.0:.2f}%"
        )

        if sys.platform != "win32":
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@ -1359,8 +1359,7 @@ torch.cuda.synchronize()
                output_method = getattr(args[0], op)(*args[1:], **add_kwargs)
                if isinstance(output_method, torch.Tensor):
                    self.assertTrue(out_type == output_method.dtype,
-                                    "autocast for torch.{} produced {}, should produce torch.{}"
-                                    .format(op, output_method.dtype, out_type))
+                                    f"autocast for torch.{op} produced {output_method.dtype}, should produce torch.{out_type}")

            self.assertTrue((output is not None) or (output_method is not None),
                            f"{op} not found as an attribute on either Tensor or the requested module {module}")
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@ -2135,8 +2135,7 @@ except RuntimeError as e:
                        elif exit_method == 'worker_kill':
                            if isinstance(loader_p.exception, RuntimeError):
                                if 'DataLoader worker (pid' not in str(loader_p.exception):
-                                    fail('loader process did not raise expected exception, but had {}'.format(
-                                        loader_p.exception))
+                                    fail(f'loader process did not raise expected exception, but had {loader_p.exception}')
                            elif isinstance(loader_p.exception, ConnectionRefusedError):
                                # Sometimes, when the worker is being killed and is freeing its
                                # resources, the unpickling in loader process will be met an
--- a/test/test_mps.py
+++ b/test/test_mps.py
@ -1158,19 +1158,17 @@ class MpsMemoryLeakCheck:
        if caching_allocator_discrepancy and not driver_discrepancy:
            # Just raises a warning if the leak is not validated by the driver API
            msg = ("MPS caching allocator reports a memory leak not "
-                   "verified by the driver API in {}! "
-                   "Caching allocator allocated memory was {} and is now reported as {}. "
-                   "MPS driver allocated memory was {} and is now {}.").format(
-                self.name, self.caching_allocator_before,
-                caching_allocator_mem_allocated, self.driver_before, driver_mem_allocated)
+                   f"verified by the driver API in {self.name}! "
+                   f"Caching allocator allocated memory was {self.caching_allocator_before} "
+                   f"and is now reported as {caching_allocator_mem_allocated}. "
+                   f"MPS driver allocated memory was {self.driver_before} and is now {driver_mem_allocated}.")
            warnings.warn(msg)
        elif caching_allocator_discrepancy and driver_discrepancy:
            # A caching allocator discrepancy validated by the driver API is a failure
-            msg = ("MPS driver API confirmed a leak in {}! "
-                   "Caching allocator allocated memory was {} and is now reported as {}. "
-                   "MPS driver allocated memory was {} and is now {}.").format(
-                self.name, self.caching_allocator_before, caching_allocator_mem_allocated,
-                self.driver_before, driver_mem_allocated)
+            msg = (f"MPS driver API confirmed a leak in {self.name}! "
+                   f"Caching allocator allocated memory was {self.caching_allocator_before} "
+                   f"and is now reported as {caching_allocator_mem_allocated}. "
+                   f"MPS driver allocated memory was {self.driver_before} and is now {driver_mem_allocated}.")

            raise RuntimeError(msg)

--- a/test/test_ops.py
+++ b/test/test_ops.py
@ -819,8 +819,9 @@ class TestCommon(TestCase):
                self.assertEqual(expected, out)

                if compare_strides_and_data_ptrs:
-                    stride_msg = "Strides are not the same! Original strides were {} and strides are now {}".format(
-                        original_strides, final_strides
+                    stride_msg = (
+                        f"Strides are not the same! Original strides were {original_strides} "
+                        f"and strides are now {final_strides}"
                    )
                    self.assertEqual(original_strides, final_strides, msg=stride_msg)
                    self.assertEqual(original_ptrs, final_ptrs)
@ -944,8 +945,9 @@ class TestCommon(TestCase):
                self.assertEqual(expected, out)

                if compare_strides_and_data_ptrs:
-                    stride_msg = "Strides are not the same! Original strides were {} and strides are now {}".format(
-                        original_strides, final_strides
+                    stride_msg = (
+                        "Strides are not the same! "
+                        f"Original strides were {original_strides} and strides are now {final_strides}"
                    )
                    self.assertEqual(original_strides, final_strides, msg=stride_msg)
                    self.assertEqual(original_ptrs, final_ptrs)
@ -1516,16 +1518,12 @@ class TestCommon(TestCase):
            if len(partially_supported_forward) > 0:
                msg = (
                    msg
-                    + "The following dtypes only worked on some samples during forward: {}.\n".format(
-                        partially_supported_forward
-                    )
+                    + f"The following dtypes only worked on some samples during forward: {partially_supported_forward}.\n"
                )
            if len(partially_supported_backward) > 0:
                msg = (
                    msg
-                    + "The following dtypes only worked on some samples during backward: {}.\n".format(
-                        partially_supported_backward
-                    )
+                    + f"The following dtypes only worked on some samples during backward: {partially_supported_backward}.\n"
                )
            print(msg)

@ -1550,30 +1548,26 @@ class TestCommon(TestCase):
        if len(supported_but_unclaimed_forward) > 0:
            msg = (
                msg
-                + "The following dtypes worked in forward but are not listed by the OpInfo: {}.\n".format(
-                    supported_but_unclaimed_forward
-                )
+                + "The following dtypes worked in forward but are not listed by the OpInfo: "
+                + f"{supported_but_unclaimed_forward}.\n"
            )
        if len(supported_but_unclaimed_backward) > 0:
            msg = (
                msg
-                + "The following dtypes worked in backward but are not listed by the OpInfo: {}.\n".format(
-                    supported_but_unclaimed_backward
-                )
+                + "The following dtypes worked in backward but are not listed by the OpInfo: "
+                + f"{supported_but_unclaimed_backward}.\n"
            )
        if len(claimed_but_unsupported_forward) > 0:
            msg = (
                msg
-                + "The following dtypes did not work in forward but are listed by the OpInfo: {}.\n".format(
-                    claimed_but_unsupported_forward
-                )
+                + "The following dtypes did not work in forward but are listed by the OpInfo: "
+                + f"{claimed_but_unsupported_forward}.\n"
            )
        if len(claimed_but_unsupported_backward) > 0:
            msg = (
                msg
-                + "The following dtypes did not work in backward but are listed by the OpInfo: {}.\n".format(
-                    claimed_but_unsupported_backward
-                )
+                + "The following dtypes did not work in backward "
+                + f"but are listed by the OpInfo: {claimed_but_unsupported_backward}.\n"
            )

        all_claimed_but_unsupported = set.union(
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@ -4350,8 +4350,7 @@ class TestSparseMeta(TestCase):
            )

        printed = []
-        printed.append("########## {}/{}/size={}+{}+{}+{} ##########".format(
-            dtype, index_dtype, batchsize, sparsesize, blocksize, densesize))
+        printed.append(f"########## {dtype}/{index_dtype}/size={batchsize}+{sparsesize}+{blocksize}+{densesize} ##########")
        printed.append("# sparse meta tensor")
        printed.append(str(x))

--- a/test/test_sparse_csr.py
+++ b/test/test_sparse_csr.py
@ -433,8 +433,7 @@ class TestSparseCompressed(TestCase):
                        basesize = size[batch_ndim:batch_ndim + base_ndim]
                        densesize = size[batch_ndim + base_ndim:]
                        assert len(densesize) == dense_ndim
-                        printed.append("########## {}/{}/size={}+{}+{} ##########".format(
-                            dtype, index_dtype, batchsize, basesize, densesize))
+                        printed.append(f"########## {dtype}/{index_dtype}/size={batchsize}+{basesize}+{densesize} ##########")
                        x = torch.sparse_compressed_tensor(compressed_indices,
                                                           plain_indices,
                                                           values, size, dtype=dtype, layout=layout, device=device)
--- a/test/test_tensor_creation_ops.py
+++ b/test/test_tensor_creation_ops.py
@ -945,7 +945,7 @@ class TestTensorCreation(TestCase):
    @onlyNativeDeviceTypes
    @dtypes(*all_types_and_complex_and(torch.half))
    def test_vstack_row_stack(self, device, dtype):
-        ops = ((torch.vstack, np.vstack), (torch.row_stack, np.row_stack))
+        ops = ((torch.vstack, np.vstack), (torch.row_stack, np.vstack))
        for torch_op, np_op in ops:
            self._test_special_stacks(0, 2, torch_op, np_op, device, dtype)
            for i in range(5):
--- a/test/test_testing.py
+++ b/test/test_testing.py
@ -1973,8 +1973,7 @@ class TestTestParametrizationDeviceType(TestCase):
        for op in op_db:
            for dtype in op.supported_dtypes(torch.device(device).type):
                for flag_part in ('flag_disabled', 'flag_enabled'):
-                    expected_name = '{}.test_op_parametrized_{}_{}_{}_{}'.format(
-                        device_cls.__name__, op.formatted_name, flag_part, device, dtype_name(dtype))
+                    expected_name = f'{device_cls.__name__}.test_op_parametrized_{op.formatted_name}_{flag_part}_{device}_{dtype_name(dtype)}'  # noqa: B950
                    expected_test_names.append(expected_name)

        test_names = _get_test_names_for_test_class(device_cls)
--- a/test/test_torch.py
+++ b/test/test_torch.py
@ -9126,8 +9126,8 @@ tensor([[[1.+1.j, 1.+1.j, 1.+1.j,  ..., 1.+1.j, 1.+1.j, 1.+1.j],
        for seed, expected_initial_seed in test_cases:
            torch.manual_seed(seed)
            actual_initial_seed = torch.initial_seed()
-            msg = "expected initial_seed() = {:x} after calling manual_seed({:x}), but got {:x} instead".format(
-                expected_initial_seed, seed, actual_initial_seed)
+            msg = (f"expected initial_seed() = {expected_initial_seed:x} "
+                   f"after calling manual_seed({seed:x}), but got {actual_initial_seed:x} instead")
            self.assertEqual(expected_initial_seed, actual_initial_seed, msg=msg)
        for invalid_seed in [min_int64 - 1, max_uint64 + 1]:
            with self.assertRaisesRegex(RuntimeError, r'Overflow when unpacking long'):
--- a/torch/_dynamo/variables/builder.py
+++ b/torch/_dynamo/variables/builder.py
@ -1377,9 +1377,7 @@ class VariableBuilder:
            if not is_constant_source(self.get_source()):
                if self.tx.export and not isinstance(self.get_source(), LocalSource):
                    raise AssertionError(
-                        "Dynamo attempts to add additional input during export: value={}, source={}".format(
-                            wrapped_value, self.get_source()
-                        )
+                        f"Dynamo attempts to add additional input during export: value={wrapped_value}, source={self.get_source()}"
                    )
                fake_tensor_value = None
                if isinstance(unspec_var, ConstantVariable):
--- a/torch/_prims/init.py
+++ b/torch/_prims/init.py
@ -1665,8 +1665,9 @@ def _split_dim_meta(a: TensorLikeType, dim: int, outer_length: int) -> TensorLik
    inner_length = a.shape[dim] // outer_length

    if (a.shape[dim] % outer_length) != 0:
-        msg = "Attempting to split dimension of length {}, but outer length of {} divides it with a remainder!".format(
-            a.shape[dim], outer_length
+        msg = (
+            f"Attempting to split dimension of length {a.shape[dim]}, "
+            f"but outer length of {outer_length} divides it with a remainder!"
        )
        raise ValueError(msg)

@ -1744,9 +1745,7 @@ squeeze = _make_prim(

 def _transpose_meta(a: TensorLikeType, permutation: DimsSequenceType) -> TensorLikeType:
    if a.ndim != len(permutation):
-        msg = "Attempting to permute a tensor of rank {}, but received a permutation of length {}!".format(
-            a.ndim, len(permutation)
-        )
+        msg = f"Attempting to permute a tensor of rank {a.ndim}, but received a permutation of length {len(permutation)}!"
        raise ValueError(msg)

    if not utils.is_valid_permutation(a.ndim, permutation):
--- a/torch/_prims_common/init.py
+++ b/torch/_prims_common/init.py
@ -1774,10 +1774,9 @@ def check_in_bounds_for_storage(
    required_length = compute_required_storage_length(shape, strides, storage_offset)
    if a.size() < required_length:
        msg = (
-            "Can't view a storage of size {} with an offset of {}, shape of {}, and strides of {}, "
-            "which requires a storage of size {}".format(
-                a.size(), storage_offset, str(shape), str(strides), required_length
-            )
+            f"Can't view a storage of size {a.size()} with an offset of {storage_offset}, "
+            f"shape of {str(shape)}, and strides of {str(strides)}, "
+            f"which requires a storage of size {required_length}"
        )
        raise ValueError(msg)

--- a/torch/_prims_common/wrappers.py
+++ b/torch/_prims_common/wrappers.py
@ -182,8 +182,9 @@ def _safe_copy_out(
 ):
    # Checks same device
    if copy_from.device != copy_to.device:
-        msg = "Attempting to copy from device {} to device {}, but cross-device copies are not allowed!".format(
-            copy_from.device, copy_to.device
+        msg = (
+            f"Attempting to copy from device {copy_from.device} "
+            f"to device {copy_to.device}, but cross-device copies are not allowed!"
        )
        raise RuntimeError(msg)

--- a/torch/_refs/init.py
+++ b/torch/_refs/init.py
@ -1146,9 +1146,7 @@ def copysign(
    if isinstance(b, Number) and isinstance(a, Tensor):
        b = scalar_tensor(b, dtype=a.dtype, device=a.device)
    elif isinstance(a, Tensor) and isinstance(b, Tensor) and a.device != b.device:
-        msg = "Expected divisor (b) to be on the same device ({}) as dividend (a), but it is found on {}!".format(
-            a.device, b.device
-        )
+        msg = f"Expected divisor (b) to be on the same device ({a.device}) as dividend (a), but it is found on {b.device}!"
        raise RuntimeError(msg)
    return where(signbit(b), neg(abs(a)), abs(a))

@ -1288,9 +1286,7 @@ def floor_divide(
        a = scalar_tensor(a, dtype=b.dtype, device=b.device)
    elif isinstance(a, Tensor) and isinstance(b, Tensor) and a.device != b.device:
        if a.device == torch.device("cpu"):
-            msg = "Expected divisor (b) to be on the same device ({}) as dividend (a), but it is found on {}!".format(
-                a.device, b.device
-            )
+            msg = f"Expected divisor (b) to be on the same device ({a.device}) as dividend (a), but it is found on {b.device}!"
            raise RuntimeError(msg)
        else:
            b = prims.device_put(b, device=a.device)
@ -1936,9 +1932,7 @@ def clone(

 def copy_to(a: Tensor, b: Tensor, *, allow_cross_device=True):
    if not allow_cross_device and a.device != b.device:
-        msg = "Attempting to copy from device {} to device {}, but cross-device copies are not allowed!".format(
-            b.device, a.device
-        )
+        msg = f"Attempting to copy from device {b.device} to device {a.device}, but cross-device copies are not allowed!"
        raise RuntimeError(msg)

    return prims.copy_to(a, b)
@ -3708,9 +3702,7 @@ def _reshape_view_helper(a: TensorLikeType, *shape, allow_copy: bool) -> TensorL
                if allow_copy:
                    return prims.reshape(a, shape)

-                msg = "Cannot view a tensor with shape {} and strides {} as a tensor with shape {}!".format(
-                    a.shape, a.stride(), shape
-                )
+                msg = f"Cannot view a tensor with shape {a.shape} and strides {a.stride()} as a tensor with shape {shape}!"
                raise ValueError(msg)

            a_ = flatten(a_, idx, end)
@ -4070,8 +4062,9 @@ def tensor_split(
    # If indices_or_sections is a tensor, it must be a CPU Long tensor
    if isinstance(indices_or_sections, TensorLike):
        if not indices_or_sections.device.type == "cpu":
-            msg = "tensor_split: if indices_or_sections is a tensor it must be on the CPU, but received one on {}".format(
-                indices_or_sections.device
+            msg = (
+                f"tensor_split: if indices_or_sections is a tensor it must be on the CPU, "
+                f"but received one on {indices_or_sections.device}"
            )
            raise ValueError(msg)
        if indices_or_sections.dtype != torch.long:
--- a/torch/ao/nn/intrinsic/modules/fused.py
+++ b/torch/ao/nn/intrinsic/modules/fused.py
@ -39,8 +39,7 @@ class LinearReLU(_FusedModule):
    During quantization this will be replaced with the corresponding fused module."""
    def __init__(self, linear, relu):
        assert type_before_parametrizations(linear) == Linear and type_before_parametrizations(relu) == ReLU, \
-            'Incorrect types for input modules{}{}'.format(
-                type_before_parametrizations(linear), type_before_parametrizations(relu))
+            f'Incorrect types for input modules{type_before_parametrizations(linear)}{type_before_parametrizations(relu)}'
        super().__init__(linear, relu)

 class ConvBn1d(_FusedModule):
@ -64,8 +63,7 @@ class ConvBnReLU1d(_FusedModule):
    During quantization this will be replaced with the corresponding fused module."""
    def __init__(self, conv, bn, relu):
        assert type_before_parametrizations(conv) == Conv1d and type_before_parametrizations(bn) == BatchNorm1d and \
-            type_before_parametrizations(relu) == ReLU, 'Incorrect types for input modules{}{}{}' \
-            .format(type_before_parametrizations(conv), type_before_parametrizations(bn), type_before_parametrizations(relu))
+            type_before_parametrizations(relu) == ReLU, f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(bn)}{type_before_parametrizations(relu)}'  # noqa: B950
        super().__init__(conv, bn, relu)

 class ConvBnReLU2d(_FusedModule):
@ -73,8 +71,7 @@ class ConvBnReLU2d(_FusedModule):
    During quantization this will be replaced with the corresponding fused module."""
    def __init__(self, conv, bn, relu):
        assert type_before_parametrizations(conv) == Conv2d and type_before_parametrizations(bn) == BatchNorm2d and \
-            type_before_parametrizations(relu) == ReLU, 'Incorrect types for input modules{}{}{}' \
-            .format(type_before_parametrizations(conv), type_before_parametrizations(bn), type_before_parametrizations(relu))
+            type_before_parametrizations(relu) == ReLU, f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(bn)}{type_before_parametrizations(relu)}'  # noqa: B950
        super().__init__(conv, bn, relu)

 class ConvBn3d(_FusedModule):
@ -90,8 +87,7 @@ class ConvBnReLU3d(_FusedModule):
    During quantization this will be replaced with the corresponding fused module."""
    def __init__(self, conv, bn, relu):
        assert type_before_parametrizations(conv) == Conv3d and type_before_parametrizations(bn) == BatchNorm3d and \
-            type_before_parametrizations(relu) == ReLU, 'Incorrect types for input modules{}{}{}' \
-            .format(type_before_parametrizations(conv), type_before_parametrizations(bn), type_before_parametrizations(relu))
+            type_before_parametrizations(relu) == ReLU, f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(bn)}{type_before_parametrizations(relu)}'  # noqa: B950
        super().__init__(conv, bn, relu)


@ -100,8 +96,7 @@ class BNReLU2d(_FusedModule):
    During quantization this will be replaced with the corresponding fused module."""
    def __init__(self, batch_norm, relu):
        assert type_before_parametrizations(batch_norm) == BatchNorm2d and type_before_parametrizations(relu) == ReLU, \
-            'Incorrect types for input modules{}{}'.format(
-                type_before_parametrizations(batch_norm), type_before_parametrizations(relu))
+            f'Incorrect types for input modules{type_before_parametrizations(batch_norm)}{type_before_parametrizations(relu)}'
        super().__init__(batch_norm, relu)

 class BNReLU3d(_FusedModule):
@ -109,8 +104,7 @@ class BNReLU3d(_FusedModule):
    During quantization this will be replaced with the corresponding fused module."""
    def __init__(self, batch_norm, relu):
        assert type_before_parametrizations(batch_norm) == BatchNorm3d and type_before_parametrizations(relu) == ReLU, \
-            'Incorrect types for input modules{}{}'.format(
-                type_before_parametrizations(batch_norm), type_before_parametrizations(relu))
+            f'Incorrect types for input modules{type_before_parametrizations(batch_norm)}{type_before_parametrizations(relu)}'
        super().__init__(batch_norm, relu)


--- a/torch/ao/nn/quantized/dynamic/modules/conv.py
+++ b/torch/ao/nn/quantized/dynamic/modules/conv.py
@ -56,9 +56,7 @@ class Conv1d(nnq.Conv1d):
                 dtype=None,
                 reduce_range=True):
        warnings.warn(
-            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
-                self._get_name()
-            )
+            f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended"  # noqa: B950
        )
        factory_kwargs = {'device': device, 'dtype': dtype}
        kernel_size = _single(kernel_size)
@ -121,9 +119,8 @@ class Conv2d(nnq.Conv2d):
                 padding=0, dilation=1, groups=1, bias=True,
                 padding_mode='zeros', device=None, dtype=None):
        warnings.warn(
-            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
-                self._get_name()
-            )
+            f"The current implementation of the {self._get_name()} module "
+            "has poor numerical accuracy and its use is not recommended"
        )
        factory_kwargs = {'device': device, 'dtype': dtype}
        kernel_size = _pair(kernel_size)
@ -186,9 +183,7 @@ class Conv3d(nnq.Conv3d):
                 padding=0, dilation=1, groups=1, bias=True,
                 padding_mode='zeros', device=None, dtype=None):
        warnings.warn(
-            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
-                self._get_name()
-            )
+            f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended"  # noqa: B950
        )
        assert padding_mode != 'reflect', "Conv3d does not support reflection padding"
        factory_kwargs = {'device': device, 'dtype': dtype}
@ -256,9 +251,7 @@ class ConvTranspose1d(nnq.ConvTranspose1d):
                 padding=0, output_padding=0, groups=1, bias=True,
                 dilation=1, padding_mode='zeros', device=None, dtype=None):
        warnings.warn(
-            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
-                self._get_name()
-            )
+            f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended"  # noqa: B950
        )
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__(
@ -317,9 +310,7 @@ class ConvTranspose2d(nnq.ConvTranspose2d):
                 padding=0, output_padding=0, groups=1, bias=True,
                 dilation=1, padding_mode='zeros', device=None, dtype=None):
        warnings.warn(
-            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
-                self._get_name()
-            )
+            f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended"  # noqa: B950
        )
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__(
@ -378,9 +369,7 @@ class ConvTranspose3d(nnq.ConvTranspose3d):
                 padding=0, output_padding=0, groups=1, bias=True,
                 dilation=1, padding_mode='zeros', device=None, dtype=None):
        warnings.warn(
-            "The current implementation of the {} module has poor numerical accuracy and its use is not recommended".format(
-                self._get_name()
-            )
+            f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended"  # noqa: B950
        )
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__(
--- a/torch/ao/nn/quantized/dynamic/modules/linear.py
+++ b/torch/ao/nn/quantized/dynamic/modules/linear.py
@ -64,9 +64,7 @@ class Linear(nnq.Linear):
        return 'DynamicQuantizedLinear'

    def extra_repr(self):
-        extra_repr_str = 'in_features={}, out_features={}, dtype={}'.format(
-            self.in_features, self.out_features, self._packed_params.dtype
-        )
+        extra_repr_str = f'in_features={self.in_features}, out_features={self.out_features}, dtype={self._packed_params.dtype}'
        if self._packed_params.dtype == torch.qint8:
            extra_repr_str += f', qscheme={self.weight().qscheme()}'
        return extra_repr_str
--- a/torch/ao/nn/quantized/modules/embedding_ops.py
+++ b/torch/ao/nn/quantized/modules/embedding_ops.py
@ -125,9 +125,8 @@ class Embedding(torch.nn.Module):
        return _hide_packed_params_repr(self, EmbeddingPackedParams)

    def extra_repr(self):
-        extra_repr_str = 'num_embeddings={}, embedding_dim={}, dtype={}, qscheme={}'.format(
-            self.num_embeddings, self.embedding_dim, self._packed_params.dtype, self.weight().qscheme()
-        )
+        extra_repr_str = (f'num_embeddings={self.num_embeddings}, embedding_dim={self.embedding_dim}, '
+                          f'dtype={self._packed_params.dtype}, qscheme={self.weight().qscheme()}')

        return extra_repr_str

--- a/torch/ao/nn/quantized/modules/linear.py
+++ b/torch/ao/nn/quantized/modules/linear.py
@ -157,9 +157,8 @@ class Linear(WeightedQuantizedModule):
        return 'QuantizedLinear'

    def extra_repr(self):
-        return 'in_features={}, out_features={}, scale={}, zero_point={}, qscheme={}'.format(
-            self.in_features, self.out_features, self.scale, self.zero_point, self.weight().qscheme()
-        )
+        return f'in_features={self.in_features}, out_features={self.out_features}, scale={self.scale}, ' \
+               f'zero_point={self.zero_point}, qscheme={self.weight().qscheme()}'

    def __repr__(self):
        return _hide_packed_params_repr(self, LinearPackedParams)
--- a/torch/ao/nn/sparse/quantized/linear.py
+++ b/torch/ao/nn/sparse/quantized/linear.py
@ -99,9 +99,8 @@ class Linear(torch.nn.Module):
        return 'SparseQuantizedLinear'

    def extra_repr(self):
-        return 'in_features={}, out_features={}, scale={}, zero_point={}, qscheme={}'.format(
-            self.in_features, self.out_features, self.scale, self.zero_point, self.weight().qscheme()
-        )
+        return (f'in_features={self.in_features}, out_features={self.out_features}, scale={self.scale}, '
+                f'zero_point={self.zero_point}, qscheme={self.weight().qscheme()}')

    def __repr__(self):
        return _hide_packed_params_repr(self, LinearPackedParams)
--- a/torch/ao/pruning/_experimental/data_scheduler/base_data_scheduler.py
+++ b/torch/ao/pruning/_experimental/data_scheduler/base_data_scheduler.py
@ -30,8 +30,7 @@ class BaseDataScheduler:
    def __init__(self, data_sparsifier, schedule_param: str, last_epoch=-1, verbose=False):
        # Attach sparsifier
        if not isinstance(data_sparsifier, BaseDataSparsifier):
-            raise TypeError('{} is not an instance of torch.ao.pruning.BaseDataSparsifier'.format(
-                type(data_sparsifier).__name__))
+            raise TypeError(f'{type(data_sparsifier).__name__} is not an instance of torch.ao.pruning.BaseDataSparsifier')
        self.data_sparsifier = data_sparsifier
        self.schedule_param = schedule_param

--- a/torch/ao/quantization/fake_quantize.py
+++ b/torch/ao/quantization/fake_quantize.py
@ -217,12 +217,10 @@ class FakeQuantize(FakeQuantizeBase):

    @torch.jit.export
    def extra_repr(self):
-        return 'fake_quant_enabled={}, observer_enabled={}, ' \
-               'quant_min={}, quant_max={}, dtype={}, qscheme={}, ch_axis={}, ' \
-               'scale={}, zero_point={}'.format(
-                   self.fake_quant_enabled, self.observer_enabled,
-                   self.activation_post_process.quant_min, self.activation_post_process.quant_max,
-                   self.dtype, self.qscheme, self.ch_axis, self.scale, self.zero_point)
+        return f'fake_quant_enabled={self.fake_quant_enabled}, observer_enabled={self.observer_enabled}, ' \
+               f'quant_min={self.activation_post_process.quant_min}, quant_max={self.activation_post_process.quant_max}, ' \
+               f'dtype={self.dtype}, qscheme={self.qscheme}, ch_axis={self.ch_axis}, ' \
+               f'scale={self.scale}, zero_point={self.zero_point}'

    def _save_to_state_dict(self, destination, prefix, keep_vars):
        # We cannot currently register scalar values as buffers, so need to manually
@ -289,11 +287,10 @@ class FixedQParamsFakeQuantize(FakeQuantize):
    @torch.jit.export
    def extra_repr(self):
        """Define a string representation of the object's attributes."""
-        return 'fake_quant_enabled={}, observer_enabled={}, scale={}, zero_point={}, ' \
-               'dtype={}, quant_min={}, quant_max={}, qscheme={}'.format(
-                   self.fake_quant_enabled, self.observer_enabled,
-                   self.scale, self.zero_point, self.dtype,
-                   self.activation_post_process.quant_min, self.activation_post_process.quant_max, self.qscheme)
+        return f'fake_quant_enabled={self.fake_quant_enabled}, observer_enabled={self.observer_enabled}, ' \
+               f'scale={self.scale}, zero_point={self.zero_point}, ' \
+               f'dtype={self.dtype}, quant_min={self.activation_post_process.quant_min}, ' \
+               f'quant_max={self.activation_post_process.quant_max}, qscheme={self.qscheme}'


 class FusedMovingAvgObsFakeQuantize(FakeQuantize):
@ -335,18 +332,10 @@ class FusedMovingAvgObsFakeQuantize(FakeQuantize):
    @torch.jit.export
    def extra_repr(self) -> str:
        return (
-            "fake_quant_enabled={}, observer_enabled={}, scale={}, zero_point={}, "
-            "dtype={}, quant_min={}, quant_max={}, qscheme={}, reduce_range={}".format(
-                self.fake_quant_enabled,
-                self.observer_enabled,
-                self.scale,
-                self.zero_point,
-                self.dtype,
-                self.activation_post_process.quant_min,
-                self.activation_post_process.quant_max,
-                self.qscheme,
-                self.activation_post_process.reduce_range,
-            )
+            f"fake_quant_enabled={self.fake_quant_enabled}, observer_enabled={self.observer_enabled}, "
+            f"scale={self.scale}, zero_point={self.zero_point}, dtype={self.dtype}, "
+            f"quant_min={self.activation_post_process.quant_min}, quant_max={self.activation_post_process.quant_max}, "
+            f"qscheme={self.qscheme}, reduce_range={self.activation_post_process.reduce_range}"
        )

    def forward(self, X: torch.Tensor) -> torch.Tensor:
--- a/torch/ao/quantization/fx/_model_report/detector.py
+++ b/torch/ao/quantization/fx/_model_report/detector.py
@ -988,9 +988,8 @@ class InputWeightEqualizationDetector(DetectorBase):
        if global_range == 0:
            range_zero_explanation = "We recommend removing this channel as it doesn't provide any useful information."
            raise ValueError(
-                "The range of the {} data for module {} is 0, which means you have a constant value channel. {}".format(
-                    info_str, module_fqn, range_zero_explanation
-                )
+                f"The range of the {info_str} data for module {module_fqn} is 0, "
+                f"which means you have a constant value channel. {range_zero_explanation}"
            )

        ratio = per_channel_range / global_range
--- a/torch/autograd/profiler_util.py
+++ b/torch/autograd/profiler_util.py
@ -633,30 +633,12 @@ class FunctionEvent(FormattedTimesMixin):
            else self.privateuse1_memory_usage
        )
        return (
-            "<FunctionEvent id={} name={} device_type={} node_id={} cpu_time={} start_us={} end_us={} "
-            "cpu_children={} {}_time={} name={} thread={} input_shapes={} "
-            "cpu_memory_usage={} {}_memory_usage={} is_async={} is_remote={} seq_nr={} is_legacy={}>".format(
-                self.id,
-                self.name,
-                self.device_type,
-                self.node_id,
-                self.cpu_time_str,
-                self.time_range.start,
-                self.time_range.end,
-                str([child.id for child in self.cpu_children]),
-                device_name,
-                device_time,
-                self.name,
-                self.thread,
-                str(self.input_shapes),
-                self.cpu_memory_usage,
-                device_name,
-                device_memory_usage,
-                self.is_async,
-                self.is_remote,
-                self.sequence_nr,
-                self.is_legacy,
-            )
+            f"<FunctionEvent id={self.id} name={self.name} device_type={self.device_type} node_id={self.node_id} "
+            f"cpu_time={self.cpu_time_str} start_us={self.time_range.start} end_us={self.time_range.end} "
+            f"cpu_children={str([child.id for child in self.cpu_children])} {device_name}_time={device_time} "
+            f"name={self.name} thread={self.thread} input_shapes={str(self.input_shapes)} "
+            f"cpu_memory_usage={self.cpu_memory_usage} {device_name}_memory_usage={device_memory_usage} "
+            f"is_async={self.is_async} is_remote={self.is_remote} seq_nr={self.sequence_nr} is_legacy={self.is_legacy}>"
        )


@ -749,21 +731,9 @@ class FunctionEventAvg(FormattedTimesMixin):
            else self.privateuse1_memory_usage
        )
        return (
-            "<FunctionEventAvg key={} self_cpu_time={} cpu_time={} "
-            " self_{}_time={} {}_time={} input_shapes={} "
-            "cpu_memory_usage={} {}_memory_usage={}>".format(
-                self.key,
-                self.self_cpu_time_total_str,
-                self.cpu_time_str,
-                device_name,
-                self_device_time,
-                device_name,
-                device_time,
-                str(self.input_shapes),
-                self.cpu_memory_usage,
-                device_name,
-                device_memory,
-            )
+            f"<FunctionEventAvg key={self.key} self_cpu_time={self.self_cpu_time_total_str} cpu_time={self.cpu_time_str} "
+            f" self_{device_name}_time={self_device_time} {device_name}_time={device_time} input_shapes={str(self.input_shapes)} "
+            f"cpu_memory_usage={self.cpu_memory_usage} {device_name}_memory_usage={device_memory}>"
        )


--- a/torch/csrc/lazy/test_mnist.py
+++ b/torch/csrc/lazy/test_mnist.py
@ -54,13 +54,9 @@ def train(log_interval, model, device, train_loader, optimizer, epoch):

        if batch_idx % log_interval == 0:
            print(
-                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
-                    epoch,
-                    batch_idx * len(data),
-                    len(train_loader.dataset),
-                    100.0 * batch_idx / len(train_loader),
-                    loss.item(),
-                )
+                f"Train Epoch: {epoch} "
+                f"[{batch_idx * len(data)}/{len(train_loader.dataset)} ({100.0 * batch_idx / len(train_loader):.0f}%)]"
+                f"\tLoss: {loss.item():.6f}"
            )


--- a/torch/cuda/memory.py
+++ b/torch/cuda/memory.py
@ -566,13 +566,8 @@ def memory_summary(device: Union[Device, int] = None, abbreviated: bool = False)
                freed_prefval = freed

            lines.append(
-                " {:<21} | {} | {} | {} | {} ".format(
-                    submetric_name,
-                    formatter(current, current_prefval),
-                    formatter(peak, peak_prefval),
-                    formatter(allocated, allocated_prefval),
-                    formatter(freed, freed_prefval),
-                ),
+                f" {submetric_name:<21} | {formatter(current, current_prefval)} | {formatter(peak, peak_prefval)} | "
+                f"{formatter(allocated, allocated_prefval)} | {formatter(freed, freed_prefval)} ",
            )

    metrics_to_display = [
@ -591,13 +586,8 @@ def memory_summary(device: Union[Device, int] = None, abbreviated: bool = False)
        freed = stats[prefix + "freed"]

        lines.append(
-            " {:<21} | {} | {} | {} | {} ".format(
-                metric_name,
-                formatter(current, current),
-                formatter(peak, peak),
-                formatter(allocated, allocated),
-                formatter(freed, freed),
-            ),
+            f" {metric_name:<21} | {formatter(current, current)} | {formatter(peak, peak)} | "
+            f"{formatter(allocated, allocated)} | {formatter(freed, freed)} ",
        )

    lines.append("=" * 75)
--- a/torch/distributed/distributed_c10d.py
+++ b/torch/distributed/distributed_c10d.py
@ -739,9 +739,8 @@ def _store_based_barrier(rank, store, group_name, rendezvous_count, timeout, log
            if timedelta(seconds=(time.time() - start)) > timeout:
                raise DistStoreError(  # noqa: TRY200
                    "Timed out initializing process group in store based barrier on "
-                    "rank {}, for key: {} (world_size={}, num_workers_joined={}, timeout={} error={})".format(
-                        rank, store_key, world_size, worker_count, timeout, e
-                    )
+                    f"rank {rank}, for key: {store_key} (world_size={world_size}, "
+                    f"num_workers_joined={worker_count}, timeout={timeout} error={e})"
                )

    logger.info(
--- a/torch/distributions/kl.py
+++ b/torch/distributions/kl.py
@ -128,9 +128,8 @@ def _dispatch_kl(type_p, type_q):
    right_fun = _KL_REGISTRY[right_p, right_q]
    if left_fun is not right_fun:
        warnings.warn(
-            "Ambiguous kl_divergence({}, {}). Please register_kl({}, {})".format(
-                type_p.__name__, type_q.__name__, left_p.__name__, right_q.__name__
-            ),
+            f"Ambiguous kl_divergence({type_p.__name__}, {type_q.__name__}). "
+            f"Please register_kl({left_p.__name__}, {right_q.__name__})",
            RuntimeWarning,
        )
    return left_fun
--- a/torch/distributions/transformed_distribution.py
+++ b/torch/distributions/transformed_distribution.py
@ -67,9 +67,7 @@ class TransformedDistribution(Distribution):
        transform = ComposeTransform(self.transforms)
        if len(base_shape) < transform.domain.event_dim:
            raise ValueError(
-                "base_distribution needs to have shape with size at least {}, but got {}.".format(
-                    transform.domain.event_dim, base_shape
-                )
+                f"base_distribution needs to have shape with size at least {transform.domain.event_dim}, but got {base_shape}."
            )
        forward_shape = transform.forward_shape(base_shape)
        expanded_base_shape = transform.inverse_shape(forward_shape)
--- a/torch/fx/experimental/symbolic_shapes.py
+++ b/torch/fx/experimental/symbolic_shapes.py
@ -1294,11 +1294,7 @@ class DynamicDimConstraintPrinter(StrPrinter):
        return self.print_source(self.symbol_to_source[expr][0])

    def _print_Relational(self, expr):
-        return '{} {} {}'.format(
-            self.parenthesize(expr.lhs, precedence(expr)),
-            expr.rel_op,
-            self.parenthesize(expr.rhs, precedence(expr))
-        )
+        return f'{self.parenthesize(expr.lhs, precedence(expr))} {expr.rel_op} {self.parenthesize(expr.rhs, precedence(expr))}'


 class DimConstraints:
--- a/torch/jit/_recursive.py
+++ b/torch/jit/_recursive.py
@ -824,16 +824,12 @@ def check_module_initialized(mod):
        for name, param in mod._parameters.items():
            if param is not None and torch.nn.parameter.is_lazy(param):
                raise RuntimeError(
-                    "'{}' has uninitialized parameters {}. Did you forget to run a forward pass?".format(
-                        torch.typename(type(mod)), name
-                    )
+                    f"'{torch.typename(type(mod))}' has uninitialized parameters {name}. Did you forget to run a forward pass?"
                )
        for name, buf in mod._buffers.items():
            if buf is not None and torch.nn.parameter.is_lazy(buf):
                raise RuntimeError(
-                    "'{}' has uninitialized buffers {}. Did you forget to run a forward pass?".format(
-                        torch.typename(type(mod)), name
-                    )
+                    f"'{torch.typename(type(mod))}' has uninitialized buffers {name}. Did you forget to run a forward pass?"
                )


--- a/torch/jit/supported_ops.py
+++ b/torch/jit/supported_ops.py
@ -46,10 +46,10 @@ def _emit_schema(mod, name, schema, arg_start=0, padding=4):
        qualified_name = name
    else:
        qualified_name = f"{mod}.{name}"
-    schema_str = "{}({}) -> {}".format(
-        qualified_name,
-        _emit_args(len(qualified_name) + 1 + padding, schema.arguments[arg_start:]),
-        _emit_rets(schema.returns),
+    schema_str = (
+        f"{qualified_name}"
+        f"({_emit_args(len(qualified_name) + 1 + padding, schema.arguments[arg_start:])}) "
+        f"-> {_emit_rets(schema.returns)}"
    )
    return schema_str

--- a/torch/nn/functional.py
+++ b/torch/nn/functional.py
@ -3160,8 +3160,8 @@ def binary_cross_entropy(
        reduction_enum = _Reduction.get_enum(reduction)
    if target.size() != input.size():
        raise ValueError(
-            "Using a target size ({}) that is different to the input size ({}) is deprecated. "
-            "Please ensure they have the same size.".format(target.size(), input.size())
+            f"Using a target size ({target.size()}) that is different to the input size ({input.size()}) is deprecated. "
+            "Please ensure they have the same size."
        )

    if weight is not None:
--- a/torch/nn/modules/conv.py
+++ b/torch/nn/modules/conv.py
@ -638,9 +638,8 @@ class _ConvTransposeNd(_ConvNd):
                output_size = output_size[num_non_spatial_dims:]
            if len(output_size) != num_spatial_dims:
                raise ValueError(
-                    "ConvTranspose{}D: for {}D input, output_size must have {} or {} elements (got {})"
-                    .format(num_spatial_dims, input.dim(), num_spatial_dims,
-                            num_non_spatial_dims + num_spatial_dims, len(output_size)))
+                    f"ConvTranspose{num_spatial_dims}D: for {input.dim()}D input, output_size must have {num_spatial_dims} "
+                    f"or {num_non_spatial_dims + num_spatial_dims} elements (got {len(output_size)})")

            min_sizes = torch.jit.annotate(List[int], [])
            max_sizes = torch.jit.annotate(List[int], [])
@ -1176,9 +1175,9 @@ class _LazyConvXdMixin(LazyModuleMixin):
        num_dims_no_batch = num_spatial_dims + 1  # +1 for channels dim
        num_dims_batch = num_dims_no_batch + 1
        if input.dim() not in (num_dims_no_batch, num_dims_batch):
-            raise RuntimeError("Expected {}D (unbatched) or {}D (batched) input to {}, but "
-                               "got input of size: {}".format(num_dims_no_batch, num_dims_batch,
-                                                              self.__class__.__name__, input.shape))
+            raise RuntimeError(f"Expected {num_dims_no_batch}D (unbatched) or {num_dims_batch}D (batched) input "
+                               f"to {self.__class__.__name__}, but "
+                               f"got input of size: {input.shape}")
        return input.shape[1] if input.dim() == num_dims_batch else input.shape[0]

    # Function to return the number of spatial dims expected for inputs to the module.
--- a/torch/nn/modules/linear.py
+++ b/torch/nn/modules/linear.py
@ -200,9 +200,8 @@ class Bilinear(Module):
        return F.bilinear(input1, input2, self.weight, self.bias)

    def extra_repr(self) -> str:
-        return 'in1_features={}, in2_features={}, out_features={}, bias={}'.format(
-            self.in1_features, self.in2_features, self.out_features, self.bias is not None
-        )
+        return (f'in1_features={self.in1_features}, in2_features={self.in2_features}, '
+                f'out_features={self.out_features}, bias={self.bias is not None}')


 class LazyLinear(LazyModuleMixin, Linear):
--- a/torch/nn/modules/module.py
+++ b/torch/nn/modules/module.py
@ -431,8 +431,8 @@ class Module:

        # Backward compatibility: no args used to be allowed when call_super_init=False
        if self.call_super_init is False and bool(kwargs):
-            raise TypeError("{}.__init__() got an unexpected keyword argument '{}'"
-                            "".format(type(self).__name__, next(iter(kwargs))))
+            raise TypeError(f"{type(self).__name__}.__init__() got an unexpected keyword argument '{next(iter(kwargs))}'"
+                            "")

        if self.call_super_init is False and bool(args):
            raise TypeError(f"{type(self).__name__}.__init__() takes 1 positional argument but {len(args) + 1} were"
@ -2036,9 +2036,8 @@ class Module:

                if not is_param_lazy and input_param.shape != param.shape:
                    # local shape should match the one in checkpoint
-                    error_msgs.append('size mismatch for {}: copying a param with shape {} from checkpoint, '
-                                      'the shape in current model is {}.'
-                                      .format(key, input_param.shape, param.shape))
+                    error_msgs.append(f'size mismatch for {key}: copying a param with shape {input_param.shape} from checkpoint, '
+                                      f'the shape in current model is {param.shape}.')
                    continue

                if param.is_meta and not input_param.is_meta and not assign_to_params_buffers:
--- a/torch/nn/parallel/distributed.py
+++ b/torch/nn/parallel/distributed.py
@ -736,11 +736,8 @@ class DistributedDataParallel(Module, Joinable):
                    ValueError,
                    "DistributedDataParallel device_ids and output_device arguments "
                    "only work with single-device/multiple-device GPU modules or CPU modules, "
-                    "but got device_ids {}, output_device {}, and module parameters {}.".format(
-                        device_ids,
-                        output_device,
-                        {p.device for p in self._module_parameters},
-                    ),
+                    f"but got device_ids {device_ids}, output_device {output_device}, "
+                    f"and module parameters {({p.device for p in self._module_parameters})}.",
                )

            self.device_ids = None
--- a/torch/testing/_internal/common_distributed.py
+++ b/torch/testing/_internal/common_distributed.py
@ -304,9 +304,7 @@ def requires_nccl_version(version, msg):
    else:
        return skip_but_pass_in_sandcastle_if(
            torch.cuda.nccl.version() < version,
-            "Requires NCCL version greater than or equal to: {}, found: {}, reason: {}".format(
-                version, torch.cuda.nccl.version(), msg
-            ),
+            f"Requires NCCL version greater than or equal to: {version}, found: {torch.cuda.nccl.version()}, reason: {msg}",
        )


@ -798,9 +796,8 @@ class MultiProcessTestCase(TestCase):
                # Get error from pipe.
                error_message = self.pid_to_pipe[process.pid].recv()
                error += (
-                    "Process {} exited with error code {} and exception:\n{}\n".format(
-                        i, MultiProcessTestCase.TEST_ERROR_EXIT_CODE, error_message
-                    )
+                    f"Process {i} exited with error code {MultiProcessTestCase.TEST_ERROR_EXIT_CODE} "
+                    f"and exception:\n{error_message}\n"
                )

            raise RuntimeError(error)
@ -814,9 +811,7 @@ class MultiProcessTestCase(TestCase):
            self.assertEqual(
                p.exitcode,
                first_process.exitcode,
-                msg="Expect process {} exit code to match Process 0 exit code of {}, but got {}".format(
-                    i, first_process.exitcode, p.exitcode
-                ),
+                msg=f"Expect process {i} exit code to match Process 0 exit code of {first_process.exitcode}, but got {p.exitcode}",
            )
        for skip in TEST_SKIPS.values():
            if first_process.exitcode == skip.exit_code:
--- a/torch/testing/_internal/common_nn.py
+++ b/torch/testing/_internal/common_nn.py
@ -3333,8 +3333,7 @@ class TestBase:
                if name in {'constructor_args', 'extra_args'}:
                    kwargs[name] = tuple()
                else:
-                    raise ValueError("{}: Specify {} by a value, a function to generate it, or it's size!"
-                                     .format(self.get_name(), name))
+                    raise ValueError(f"{self.get_name()}: Specify {name} by a value, a function to generate it, or it's size!")
        self._extra_kwargs = kwargs
        self._arg_cache = {}

--- a/torch/testing/_internal/common_quantization.py
+++ b/torch/testing/_internal/common_quantization.py
@ -838,10 +838,8 @@ class QuantizationTestCase(TestCase):
                    (exp_type_end_b is act_type_end_b)
                self.assertTrue(
                    types_match,
-                    'Type mismatch at {}: expected {}, got {}'.format(
-                        k,
-                        (exp_type_start_a, exp_type_end_a, exp_type_start_b, exp_type_end_b),
-                        (act_type_start_a, act_type_end_a, act_type_start_b, act_type_end_b))
+                    f'Type mismatch at {k}: expected {(exp_type_start_a, exp_type_end_a, exp_type_start_b, exp_type_end_b)}, '
+                    f'got {(act_type_start_a, act_type_end_a, act_type_start_b, act_type_end_b)}'
                )

        def assert_ns_compare_dict_valid(
--- a/torch/testing/_internal/common_utils.py
+++ b/torch/testing/_internal/common_utils.py
@ -395,9 +395,8 @@ def compose_parametrize_fns(old_parametrize_fn, new_parametrize_fn):
                redundant_params = set(old_param_kwargs.keys()).intersection(new_param_kwargs.keys())
                if redundant_params:
                    raise RuntimeError('Parametrization over the same parameter by multiple parametrization '
-                                       'decorators is not supported. For test "{}", the following parameters '
-                                       'are handled multiple times: {}'.format(
-                                           test.__name__, redundant_params))
+                                       f'decorators is not supported. For test "{test.__name__}", the following parameters '
+                                       f'are handled multiple times: {redundant_params}')
                full_param_kwargs = {**old_param_kwargs, **new_param_kwargs}
                merged_test_name = '{}{}{}'.format(new_test_name,
                                                   '_' if old_test_name != '' and new_test_name != '' else '',
@ -2175,30 +2174,20 @@ class CudaMemoryLeakCheck:
                #   statistics or a leak too small to trigger the allocation of an
                #   additional block of memory by the CUDA driver
                msg = ("CUDA caching allocator reports a memory leak not "
-                       "verified by the driver API in {}! "
-                       "Caching allocator allocated memory was {} and is now reported as {} "
-                       "on device {}. "
-                       "CUDA driver allocated memory was {} and is now {}.").format(
-                    self.name,
-                    self.caching_allocator_befores[i],
-                    caching_allocator_mem_allocated,
-                    i,
-                    self.driver_befores[i],
-                    driver_mem_allocated)
+                       f"verified by the driver API in {self.name}! "
+                       f"Caching allocator allocated memory was {self.caching_allocator_befores[i]} "
+                       f"and is now reported as {caching_allocator_mem_allocated} "
+                       f"on device {i}. "
+                       f"CUDA driver allocated memory was {self.driver_befores[i]} and is now {driver_mem_allocated}.")
                warnings.warn(msg)
            elif caching_allocator_discrepancy and driver_discrepancy:
                # A caching allocator discrepancy validated by the driver API is a
                #   failure (except on ROCm, see below)
-                msg = ("CUDA driver API confirmed a leak in {}! "
-                       "Caching allocator allocated memory was {} and is now reported as {} "
-                       "on device {}. "
-                       "CUDA driver allocated memory was {} and is now {}.").format(
-                    self.name,
-                    self.caching_allocator_befores[i],
-                    caching_allocator_mem_allocated,
-                    i,
-                    self.driver_befores[i],
-                    driver_mem_allocated)
+                msg = (f"CUDA driver API confirmed a leak in {self.name}! "
+                       f"Caching allocator allocated memory was {self.caching_allocator_befores[i]} "
+                       f"and is now reported as {caching_allocator_mem_allocated} "
+                       f"on device {i}. "
+                       f"CUDA driver allocated memory was {self.driver_befores[i]} and is now {driver_mem_allocated}.")

                raise RuntimeError(msg)

--- a/torch/testing/_internal/dist_utils.py
+++ b/torch/testing/_internal/dist_utils.py
@ -131,9 +131,8 @@ def wait_until_pending_futures_and_users_flushed(timeout: int = 20) -> None:
        time.sleep(0.1)
        if time.time() - start > timeout:
            raise ValueError(
-                "Timed out waiting to flush pending futures and users, had {} pending futures and {} pending users".format(
-                    num_pending_futures, num_pending_users
-                )
+                f"Timed out waiting to flush pending futures and users, "
+                f"had {num_pending_futures} pending futures and {num_pending_users} pending users"
            )


@ -167,13 +166,8 @@ def wait_until_owners_and_forks_on_rank(
        time.sleep(1)
        if time.time() - start > timeout:
            raise ValueError(
-                "Timed out waiting {} sec for {} owners and {} forks on rank, had {} owners and {} forks".format(
-                    timeout,
-                    num_owners,
-                    num_forks,
-                    num_owners_on_rank,
-                    num_forks_on_rank,
-                )
+                f"Timed out waiting {timeout} sec for {num_owners} owners and {num_forks} forks on rank,"
+                f" had {num_owners_on_rank} owners and {num_forks_on_rank} forks"
            )


--- a/torch/testing/_internal/distributed/rpc/rpc_test.py
+++ b/torch/testing/_internal/distributed/rpc/rpc_test.py
@ -3162,9 +3162,8 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon):
        )
        self.assertEqual(
            rref2.__str__(),
-            "UserRRef(RRefId = {0}(created_on={1}, local_id=1), ForkId = {0}(created_on={1}, local_id=2))".format(
-                id_class, self.rank
-            ),
+            f"UserRRef(RRefId = {id_class}(created_on={self.rank}, local_id=1), "
+            f"ForkId = {id_class}(created_on={self.rank}, local_id=2))",
        )

    @dist_init
--- a/torch/utils/bundled_inputs.py
+++ b/torch/utils/bundled_inputs.py
@ -271,16 +271,13 @@ def augment_many_model_functions_with_bundled_inputs(
        if hasattr(model, "_generate_bundled_inputs_for_" + function_name):
            if input_list is not None:
                raise Exception(
-                    "inputs[{name}] is not None, but _generate_bundled_inputs_for_{name} is already defined".format(
-                        name=function_name
-                    )
+                    f"inputs[{function_name}] is not None, but _generate_bundled_inputs_for_{function_name} is already defined"
                )
            # Model author already defined _generate_bundled_inputs_for_<function_name>.
        elif input_list is None or len(input_list) == 0:
            raise Exception(
-                "inputs for {name} must be specified if _generate_bundled_inputs_for_{name} is not already defined".format(
-                    name=function_name,
-                )
+                f"inputs for {function_name} must be specified if "
+                f"_generate_bundled_inputs_for_{function_name} is not already defined"
            )
        else:
            # Iterate over the inputs and args in each input.
--- a/torch/utils/data/dataloader.py
+++ b/torch/utils/data/dataloader.py
@ -521,11 +521,9 @@ class DataLoader(Generic[T_co]):
                "DataLoader is not able to compute a suggested max number of worker in current system.")

            warn_msg = (
-                "This DataLoader will create {} worker processes in total. {} "
+                f"This DataLoader will create {num_worker_created} worker processes in total. {suggested_max_worker_msg} "
                "Please be aware that excessive worker creation might get DataLoader running slow or even freeze, "
-                "lower the worker number to avoid potential slowness/freeze if necessary.").format(
-                    num_worker_created,
-                    suggested_max_worker_msg)
+                "lower the worker number to avoid potential slowness/freeze if necessary.")
            return warn_msg

        if not self.num_workers or self.num_workers == 0:
@ -633,9 +631,8 @@ class _BaseDataLoaderIter:
            if self._dataset_kind == _DatasetKind.Iterable and \
                    self._IterableDataset_len_called is not None and \
                    self._num_yielded > self._IterableDataset_len_called:
-                warn_msg = ("Length of IterableDataset {} was reported to be {} (when accessing len(dataloader)), but {} "
-                            "samples have been fetched. ").format(self._dataset, self._IterableDataset_len_called,
-                                                                  self._num_yielded)
+                warn_msg = (f"Length of IterableDataset {self._dataset} was reported to be {self._IterableDataset_len_called}"
+                            f"(when accessing len(dataloader)), but {self._num_yielded} samples have been fetched. ")
                if self._num_workers > 0:
                    warn_msg += ("For multiprocessing data-loading, this could be caused by not properly configuring the "
                                 "IterableDataset replica at each worker. Please see "
--- a/torch/utils/data/datapipes/_decorator.py
+++ b/torch/utils/data/datapipes/_decorator.py
@ -89,9 +89,9 @@ class non_deterministic:
        #  Decorate IterDataPipe
        if self.cls is not None:
            if _determinism:
-                raise TypeError("{} is non-deterministic, but you set 'guaranteed_datapipes_determinism'. "
+                raise TypeError(f"{self.cls.__name__} is non-deterministic, but you set 'guaranteed_datapipes_determinism'. "
                                "You can turn off determinism for this DataPipe if that is acceptable "
-                                "for your application".format(self.cls.__name__))
+                                "for your application")
            return self.cls(*args, **kwargs)  # type: ignore[call-arg]

        # Decorate with a functional argument
--- a/torch/utils/data/datapipes/_typing.py
+++ b/torch/utils/data/datapipes/_typing.py
@ -408,8 +408,8 @@ def _dp_init_subclass(sub_cls, *args, **kwargs):
                                ", but found {}".format(sub_cls.__name__, _type_repr(hints['return'])))
            data_type = return_hint.__args__[0]
            if not issubtype(data_type, sub_cls.type.param):
-                raise TypeError("Expected return type of '__iter__' as a subtype of {}, but found {}"
-                                " for {}".format(sub_cls.type, _type_repr(data_type), sub_cls.__name__))
+                raise TypeError(f"Expected return type of '__iter__' as a subtype of {sub_cls.type},"
+                                f" but found {_type_repr(data_type)} for {sub_cls.__name__}")


 def reinforce_type(self, expected_type):
--- a/torch/utils/hipify/hipify_python.py
+++ b/torch/utils/hipify/hipify_python.py
@ -49,8 +49,7 @@ class HipifyResult:
        self.status = ""

    def __str__(self):
-        return ("HipifyResult:: current_state: {}, hipified_path : {}, status: {}".format(self.current_state,
-                                                                                          self.hipified_path, self.status))
+        return (f"HipifyResult:: current_state: {self.current_state}, hipified_path : {self.hipified_path}, status: {self.status}")

 HipifyFinalResult = Dict[str, HipifyResult]
 HIPIFY_C_BREADCRUMB = "// !!! This is a file automatically generated by hipify!!!\n"
--- a/torch/utils/mobile_optimizer.py
+++ b/torch/utils/mobile_optimizer.py
@ -104,13 +104,15 @@ def generate_mobile_module_lints(script_module: torch.jit.ScriptModule):
    op_names = torch.jit.export_opnames(script_module)
    for op_name in op_names:
        if "dropout" in op_name:
-            lint_list.append({"name": LintCode.DROPOUT.name, "message": "Operator {} exists, remember to call eval() before "
+            lint_list.append({"name": LintCode.DROPOUT.name,
+                              "message": f"Operator {op_name} exists, remember to call eval() before "
                              "saving the module.and call torch.utils.mobile_optimizer.optimize_for_mobile to drop dropout "
-                              "operator.".format(op_name)})
+                              "operator."})
        if "batch_norm" in op_name:
-            lint_list.append({"name": LintCode.BATCHNORM.name, "message": "Operator {} exists, remember to call eval() before "
+            lint_list.append({"name": LintCode.BATCHNORM.name,
+                              "message": f"Operator {op_name} exists, remember to call eval() before "
                              "saving the module and call torch.utils.mobile_optimizer.optimize_for_mobile to drop batch_norm "
-                              "operator.".format(op_name)})
+                              "operator."})

    return lint_list