diff --git a/benchmarks/compare-fastrnn-results.py b/benchmarks/compare-fastrnn-results.py index 64c8d8dd98d..35d907d0820 100644 --- a/benchmarks/compare-fastrnn-results.py +++ b/benchmarks/compare-fastrnn-results.py @@ -2,6 +2,7 @@ import argparse import json from collections import namedtuple + Result = namedtuple("Result", ["name", "base_time", "diff_time"]) diff --git a/benchmarks/distributed/rpc/parameter_server/data/__init__.py b/benchmarks/distributed/rpc/parameter_server/data/__init__.py index cf96abd4c68..3b37129e9a5 100644 --- a/benchmarks/distributed/rpc/parameter_server/data/__init__.py +++ b/benchmarks/distributed/rpc/parameter_server/data/__init__.py @@ -1,3 +1,4 @@ from .DummyData import DummyData + data_map = {"DummyData": DummyData} diff --git a/benchmarks/distributed/rpc/parameter_server/models/__init__.py b/benchmarks/distributed/rpc/parameter_server/models/__init__.py index cfeeaaf4c77..0b53de4dd1b 100644 --- a/benchmarks/distributed/rpc/parameter_server/models/__init__.py +++ b/benchmarks/distributed/rpc/parameter_server/models/__init__.py @@ -1,3 +1,4 @@ from .DummyModel import DummyModel + model_map = {"DummyModel": DummyModel} diff --git a/benchmarks/distributed/rpc/parameter_server/server/__init__.py b/benchmarks/distributed/rpc/parameter_server/server/__init__.py index 54d8983ea9f..d204b3d03c2 100644 --- a/benchmarks/distributed/rpc/parameter_server/server/__init__.py +++ b/benchmarks/distributed/rpc/parameter_server/server/__init__.py @@ -1,5 +1,6 @@ from .server import AverageBatchParameterServer, AverageParameterServer + server_map = { "AverageParameterServer": AverageParameterServer, "AverageBatchParameterServer": AverageBatchParameterServer, diff --git a/benchmarks/distributed/rpc/parameter_server/trainer/__init__.py b/benchmarks/distributed/rpc/parameter_server/trainer/__init__.py index c1f51adec32..12d6f9e7f05 100644 --- a/benchmarks/distributed/rpc/parameter_server/trainer/__init__.py +++ b/benchmarks/distributed/rpc/parameter_server/trainer/__init__.py @@ -6,6 +6,7 @@ from .iteration_steps import basic_iteration_step from .preprocess_data import preprocess_dummy_data from .trainer import DdpTrainer + criterion_map = {"cel": cel} ddp_hook_map = { diff --git a/benchmarks/distributed/rpc/parameter_server/utils.py b/benchmarks/distributed/rpc/parameter_server/utils.py index efa470bd44c..baa4a9a9e9e 100644 --- a/benchmarks/distributed/rpc/parameter_server/utils.py +++ b/benchmarks/distributed/rpc/parameter_server/utils.py @@ -1,5 +1,6 @@ import torch + RPC_SPARSE = "rpc_sparse" RPC_DENSE = "rpc_dense" diff --git a/benchmarks/distributed/rpc/rl/coordinator.py b/benchmarks/distributed/rpc/rl/coordinator.py index 75b1780a34f..8dff633d2e1 100644 --- a/benchmarks/distributed/rpc/rl/coordinator.py +++ b/benchmarks/distributed/rpc/rl/coordinator.py @@ -1,13 +1,13 @@ import time import numpy as np - from agent import AgentBase from observer import ObserverBase import torch import torch.distributed.rpc as rpc + COORDINATOR_NAME = "coordinator" AGENT_NAME = "agent" OBSERVER_NAME = "observer{}" diff --git a/benchmarks/distributed/rpc/rl/launcher.py b/benchmarks/distributed/rpc/rl/launcher.py index 22ae8962975..7c6f74524d7 100644 --- a/benchmarks/distributed/rpc/rl/launcher.py +++ b/benchmarks/distributed/rpc/rl/launcher.py @@ -1,5 +1,4 @@ import argparse - import json import os import time @@ -9,6 +8,7 @@ from coordinator import CoordinatorBase import torch.distributed.rpc as rpc import torch.multiprocessing as mp + COORDINATOR_NAME = "coordinator" AGENT_NAME = "agent" OBSERVER_NAME = "observer{}" diff --git a/benchmarks/dynamo/benchmarks.py b/benchmarks/dynamo/benchmarks.py index c209781ffd6..6ed845a9fcb 100755 --- a/benchmarks/dynamo/benchmarks.py +++ b/benchmarks/dynamo/benchmarks.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 + import argparse import os import sys - from typing import Set diff --git a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py index 1f3a02b69a5..29d204c02e2 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py +++ b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py @@ -30,6 +30,7 @@ from zipfile import ZipFile import pandas as pd import requests + # Note: the public query url targets this rockset lambda: # https://console.rockset.com/lambdas/details/commons.artifacts ARTIFACTS_QUERY_URL = "https://api.usw2a1.rockset.com/v1/public/shared_lambdas/4ca0033e-0117-41f5-b043-59cde19eff35" diff --git a/benchmarks/dynamo/combine_csv.py b/benchmarks/dynamo/combine_csv.py index 560b8a3cf24..e4c2e0f0951 100644 --- a/benchmarks/dynamo/combine_csv.py +++ b/benchmarks/dynamo/combine_csv.py @@ -6,6 +6,7 @@ import csv import sys from collections import defaultdict + assert len(sys.argv) == 3 RESULTS = defaultdict(dict) diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 76ca81fb2c5..8069ca11520 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + from __future__ import annotations import abc diff --git a/benchmarks/dynamo/dist_util.py b/benchmarks/dynamo/dist_util.py index 98cc274a4c6..c1036bbb1e4 100644 --- a/benchmarks/dynamo/dist_util.py +++ b/benchmarks/dynamo/dist_util.py @@ -15,6 +15,7 @@ from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( from torch.distributed.fsdp import FullyShardedDataParallel as FSDP from torch.distributed.fsdp.wrap import ModuleWrapPolicy + try: from .torchbench import setup_torchbench_cwd except ImportError: diff --git a/benchmarks/dynamo/distributed.py b/benchmarks/dynamo/distributed.py index 9d99c4fcb6e..d2af630c8bf 100644 --- a/benchmarks/dynamo/distributed.py +++ b/benchmarks/dynamo/distributed.py @@ -10,6 +10,7 @@ from torch._dynamo.testing import reduce_to_scalar_loss from torch.nn.parallel import DistributedDataParallel as DDP from torch.profiler import profile, ProfilerActivity, record_function + try: from .common import timed from .dist_util import apply_fsdp, cleanup, get_model, model_iter_fn, setup diff --git a/benchmarks/dynamo/huggingface.py b/benchmarks/dynamo/huggingface.py index 46492cc2dfc..32708ccfbf9 100755 --- a/benchmarks/dynamo/huggingface.py +++ b/benchmarks/dynamo/huggingface.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + import importlib import logging import os @@ -7,16 +8,17 @@ import subprocess import sys import warnings + try: from .common import BenchmarkRunner, download_retry_decorator, main, reset_rng_state except ImportError: from common import BenchmarkRunner, download_retry_decorator, main, reset_rng_state import torch - from torch._dynamo.testing import collect_results from torch._dynamo.utils import clone_inputs + log = logging.getLogger(__name__) # Enable FX graph caching diff --git a/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py b/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py index 424edac2b8f..b0e083b0007 100644 --- a/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py +++ b/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py @@ -1,12 +1,13 @@ # flake8: noqa + import triton from prettytable import PrettyTable import torch - import torch._dynamo import torch._inductor.config + # torch._inductor.config.debug = True torch._inductor.config.triton.dense_indexing = True torch.manual_seed(0) diff --git a/benchmarks/dynamo/microbenchmarks/fx_microbenchmarks.py b/benchmarks/dynamo/microbenchmarks/fx_microbenchmarks.py index 2c575c69418..0f957dc4aaf 100644 --- a/benchmarks/dynamo/microbenchmarks/fx_microbenchmarks.py +++ b/benchmarks/dynamo/microbenchmarks/fx_microbenchmarks.py @@ -2,6 +2,7 @@ import timeit import torch.fx + N = 100000 K = 1000 diff --git a/benchmarks/dynamo/microbenchmarks/inductor_bmm.py b/benchmarks/dynamo/microbenchmarks/inductor_bmm.py index 5c5326beef6..3e97680a3ba 100644 --- a/benchmarks/dynamo/microbenchmarks/inductor_bmm.py +++ b/benchmarks/dynamo/microbenchmarks/inductor_bmm.py @@ -1,7 +1,6 @@ from benchmark_helper import time_with_torch_timer import torch - import torch._dynamo import torch._dynamo.config import torch._inductor.config as config diff --git a/benchmarks/dynamo/microbenchmarks/inductor_mm.py b/benchmarks/dynamo/microbenchmarks/inductor_mm.py index 2db6fc0e722..6d26c9d029e 100644 --- a/benchmarks/dynamo/microbenchmarks/inductor_mm.py +++ b/benchmarks/dynamo/microbenchmarks/inductor_mm.py @@ -2,11 +2,11 @@ import triton from benchmark_helper import time_with_torch_timer import torch - import torch._dynamo import torch._dynamo.config import torch._inductor.config as config + # The flag below controls whether to allow TF32 on matmul. This flag defaults to True. torch.backends.cuda.matmul.allow_tf32 = True # The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True. diff --git a/benchmarks/dynamo/microbenchmarks/matmul_relu.py b/benchmarks/dynamo/microbenchmarks/matmul_relu.py index 9baf893af96..bc975b6455f 100644 --- a/benchmarks/dynamo/microbenchmarks/matmul_relu.py +++ b/benchmarks/dynamo/microbenchmarks/matmul_relu.py @@ -1,10 +1,10 @@ from benchmark_helper import time_with_torch_timer import torch - import torch._dynamo import torch._inductor.config as inductor_config + inductor_config.triton.mm = "triton" diff --git a/benchmarks/dynamo/microbenchmarks/microbench.py b/benchmarks/dynamo/microbenchmarks/microbench.py index fa01824afcf..302f6857e77 100755 --- a/benchmarks/dynamo/microbenchmarks/microbench.py +++ b/benchmarks/dynamo/microbenchmarks/microbench.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + import argparse import inspect import sys @@ -7,13 +8,13 @@ import numpy as np import tabulate import torch - import torch._inductor from torch._dynamo.backends.cudagraphs import cudagraphs_inner from torch._dynamo.testing import same from torch._inductor.compile_fx import compile_fx from torch._inductor.utils import timed + aten = torch.ops.aten try: diff --git a/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py b/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py index 28139e93390..602c3bc516f 100644 --- a/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py +++ b/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py @@ -12,6 +12,7 @@ from torch.utils import _pytree as pytree from torch.utils._python_dispatch import TorchDispatchMode from torch.utils._pytree import tree_map + log = logging.getLogger(__name__) OP_INP_DIRECTORY = os.path.join(os.path.dirname(__file__), "operator_inp_logs") diff --git a/benchmarks/dynamo/microbenchmarks/operatorbench.py b/benchmarks/dynamo/microbenchmarks/operatorbench.py index dcc2b428d2e..7965e4ff01d 100644 --- a/benchmarks/dynamo/microbenchmarks/operatorbench.py +++ b/benchmarks/dynamo/microbenchmarks/operatorbench.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 + import click import numpy as np from operator_inp_utils import OperatorInputsLoader import torch - from torch._dynamo.backends.cudagraphs import cudagraphs_inner from torch._dynamo.testing import same from torch._inductor.compile_fx import compile_fx @@ -13,6 +13,7 @@ from torch._inductor.lowering import lowerings from torch._inductor.utils import gen_gm_and_inputs from torch.utils._pytree import tree_map_only + aten = torch.ops.aten diff --git a/benchmarks/dynamo/parse_logs.py b/benchmarks/dynamo/parse_logs.py index 8ae27289790..9aed78b2e35 100644 --- a/benchmarks/dynamo/parse_logs.py +++ b/benchmarks/dynamo/parse_logs.py @@ -3,6 +3,7 @@ import os import re import sys + # This script takes the logs produced by the benchmark scripts (e.g., # torchbench.py) and parses it into a CSV file that summarizes what # is failing and why. It is kept separate from the benchmark script diff --git a/benchmarks/dynamo/runner.py b/benchmarks/dynamo/runner.py index bc42b656670..747a8e06e63 100755 --- a/benchmarks/dynamo/runner.py +++ b/benchmarks/dynamo/runner.py @@ -23,7 +23,6 @@ If you want to test float16 """ - import argparse import dataclasses import functools @@ -44,7 +43,6 @@ from os.path import abspath, exists from random import randint import matplotlib.pyplot as plt - import numpy as np import pandas as pd from matplotlib import rcParams @@ -52,9 +50,9 @@ from scipy.stats import gmean from tabulate import tabulate import torch - import torch._dynamo + rcParams.update({"figure.autolayout": True}) plt.rc("axes", axisbelow=True) diff --git a/benchmarks/dynamo/test.py b/benchmarks/dynamo/test.py index b2b72b01d97..396f4037f5c 100644 --- a/benchmarks/dynamo/test.py +++ b/benchmarks/dynamo/test.py @@ -2,9 +2,9 @@ import os import unittest from .common import parse_args, run - from .torchbench import setup_torchbench_cwd, TorchBenchmarkRunner + try: # fbcode only from aiplatform.utils.sanitizer_status import is_asan_or_tsan diff --git a/benchmarks/dynamo/timm_models.py b/benchmarks/dynamo/timm_models.py index 650af586d51..f624e5cd9e8 100755 --- a/benchmarks/dynamo/timm_models.py +++ b/benchmarks/dynamo/timm_models.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + import importlib import logging import os @@ -7,16 +8,17 @@ import subprocess import sys import warnings + try: from .common import BenchmarkRunner, download_retry_decorator, main except ImportError: from common import BenchmarkRunner, download_retry_decorator, main import torch - from torch._dynamo.testing import collect_results, reduce_to_scalar_loss from torch._dynamo.utils import clone_inputs + # Enable FX graph caching if "TORCHINDUCTOR_FX_GRAPH_CACHE" not in os.environ: torch._inductor.config.fx_graph_cache = True diff --git a/benchmarks/dynamo/torchbench.py b/benchmarks/dynamo/torchbench.py index bdb5b1a3990..43c2962f9ff 100755 --- a/benchmarks/dynamo/torchbench.py +++ b/benchmarks/dynamo/torchbench.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + import functools import gc import importlib @@ -14,6 +15,7 @@ import yaml import torch + try: from .common import BenchmarkRunner, main except ImportError: @@ -22,6 +24,7 @@ except ImportError: from torch._dynamo.testing import collect_results, reduce_to_scalar_loss from torch._dynamo.utils import clone_inputs + # We are primarily interested in tf32 datatype torch.backends.cuda.matmul.allow_tf32 = True diff --git a/benchmarks/dynamo/training_loss.py b/benchmarks/dynamo/training_loss.py index 99e65a45f2a..ff797f07d97 100644 --- a/benchmarks/dynamo/training_loss.py +++ b/benchmarks/dynamo/training_loss.py @@ -9,10 +9,10 @@ from datasets import load_dataset, load_metric from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch - import torch._dynamo from torch.utils.data import DataLoader + torch.backends.cuda.matmul.allow_tf32 = True # You will download around 84G dataset if you run this end to end training/evaluation example. diff --git a/benchmarks/fastrnns/__init__.py b/benchmarks/fastrnns/__init__.py index 10678d46f03..4b59ea2123d 100644 --- a/benchmarks/fastrnns/__init__.py +++ b/benchmarks/fastrnns/__init__.py @@ -1,6 +1,7 @@ from .cells import * # noqa: F403 from .factory import * # noqa: F403 + # (output, next_state) = cell(input, state) seqLength = 100 numLayers = 2 diff --git a/benchmarks/fastrnns/conftest.py b/benchmarks/fastrnns/conftest.py index 1d4e8bbdff7..1ef551e848a 100644 --- a/benchmarks/fastrnns/conftest.py +++ b/benchmarks/fastrnns/conftest.py @@ -1,5 +1,6 @@ import pytest # noqa: F401 + default_rnns = [ "cudnn", "aten", diff --git a/benchmarks/fastrnns/custom_lstms.py b/benchmarks/fastrnns/custom_lstms.py index 39584624619..0e5643bbeda 100644 --- a/benchmarks/fastrnns/custom_lstms.py +++ b/benchmarks/fastrnns/custom_lstms.py @@ -9,6 +9,7 @@ import torch.nn as nn from torch import Tensor from torch.nn import Parameter + """ Some helper classes for writing custom TorchScript LSTMs. diff --git a/benchmarks/fastrnns/scratch.py b/benchmarks/fastrnns/scratch.py index 10e5390bfb7..fbd0ebde9e2 100644 --- a/benchmarks/fastrnns/scratch.py +++ b/benchmarks/fastrnns/scratch.py @@ -45,6 +45,7 @@ recurrent_scaleshift.graph_for(x, scale, shift) import torch + x = torch.tensor([]) x.requires_grad = True x.mean().backward() # no error triggered diff --git a/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py b/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py index 826c4d283ee..4a3638e5628 100644 --- a/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py +++ b/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py @@ -1,10 +1,11 @@ import argparse from pt_wrapper_module import WrapperModule - from SimpleAddModule import add_tensors_loop, SimpleAddModule + from utils import benchmark_module, BenchmarkConfig, ModuleConfig, ms_to_us + """ Framework overhead benchmark script. Benchmark framework overhead. Currently supported ops: add. diff --git a/benchmarks/framework_overhead_benchmark/utils.py b/benchmarks/framework_overhead_benchmark/utils.py index abd8233d5a5..a62f5d4632b 100644 --- a/benchmarks/framework_overhead_benchmark/utils.py +++ b/benchmarks/framework_overhead_benchmark/utils.py @@ -3,6 +3,7 @@ from collections import namedtuple from torch.utils import ThroughputBenchmark + NUM_LOOP_ITERS = 1000 BenchmarkConfig = namedtuple("BenchmarkConfig", "num_warmup_iters num_iters") ModuleConfig = namedtuple("ModuleConfig", "pt_fn c2_op num_params graph_mode") diff --git a/benchmarks/functional_autograd_benchmark/audio_text_models.py b/benchmarks/functional_autograd_benchmark/audio_text_models.py index 3add9b7e3d5..64b132ab63f 100644 --- a/benchmarks/functional_autograd_benchmark/audio_text_models.py +++ b/benchmarks/functional_autograd_benchmark/audio_text_models.py @@ -1,5 +1,4 @@ import torchaudio_models as models - from utils import check_for_functorch, extract_weights, GetterReturnType, load_weights import torch diff --git a/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py b/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py index 3a8e90684e7..a78e4d6816e 100644 --- a/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py +++ b/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py @@ -6,6 +6,7 @@ from typing import Any, Callable, List, NamedTuple import torch from torch.autograd import functional + try: import functorch as ft diff --git a/benchmarks/functional_autograd_benchmark/torchaudio_models.py b/benchmarks/functional_autograd_benchmark/torchaudio_models.py index 04dc8969e32..aab5fef96cd 100644 --- a/benchmarks/functional_autograd_benchmark/torchaudio_models.py +++ b/benchmarks/functional_autograd_benchmark/torchaudio_models.py @@ -9,6 +9,7 @@ import torch import torch.nn.functional as F from torch import nn, Tensor + __all__ = ["Wav2Letter"] diff --git a/benchmarks/functional_autograd_benchmark/torchvision_models.py b/benchmarks/functional_autograd_benchmark/torchvision_models.py index 66eb23cf4e5..a9ef7e8444b 100644 --- a/benchmarks/functional_autograd_benchmark/torchvision_models.py +++ b/benchmarks/functional_autograd_benchmark/torchvision_models.py @@ -4,10 +4,10 @@ from collections import OrderedDict import torch from torch import nn - from torch.jit.annotations import Dict from torch.nn import functional as F + try: from scipy.optimize import linear_sum_assignment diff --git a/benchmarks/functional_autograd_benchmark/utils.py b/benchmarks/functional_autograd_benchmark/utils.py index b5f79a91f09..87d676f4fb3 100644 --- a/benchmarks/functional_autograd_benchmark/utils.py +++ b/benchmarks/functional_autograd_benchmark/utils.py @@ -2,9 +2,9 @@ from collections import defaultdict from typing import Callable, Dict, List, Tuple, Union import torch - from torch import nn, Tensor + # Type helpers InputsType = Union[Tensor, Tuple[Tensor, ...]] # A Getter takes in a device and returns a callable and the inputs to that callable diff --git a/benchmarks/functional_autograd_benchmark/vision_models.py b/benchmarks/functional_autograd_benchmark/vision_models.py index fdb06cfcf47..a33ac09da43 100644 --- a/benchmarks/functional_autograd_benchmark/vision_models.py +++ b/benchmarks/functional_autograd_benchmark/vision_models.py @@ -1,12 +1,12 @@ from typing import cast import torchvision_models as models - from utils import check_for_functorch, extract_weights, GetterReturnType, load_weights import torch from torch import Tensor + has_functorch = check_for_functorch() diff --git a/benchmarks/fuser/plot_speedups.py b/benchmarks/fuser/plot_speedups.py index 64dfb0f50d0..c8e4ebfa632 100644 --- a/benchmarks/fuser/plot_speedups.py +++ b/benchmarks/fuser/plot_speedups.py @@ -1,5 +1,6 @@ import pandas + df = pandas.read_csv("perf.csv") ops = pandas.unique(df["operator"]) @@ -11,6 +12,7 @@ pivot_speedups = (pivot_op_shape.T / pivot_op_shape["eager"]).T import matplotlib.pyplot as plt + plt.rcParams["figure.figsize"] = (20, 100) fig, axs = plt.subplots(nops) plt.subplots_adjust(hspace=0.5) diff --git a/benchmarks/fuser/run_benchmarks.py b/benchmarks/fuser/run_benchmarks.py index 40d1b5e9d0f..64b517e1961 100644 --- a/benchmarks/fuser/run_benchmarks.py +++ b/benchmarks/fuser/run_benchmarks.py @@ -7,6 +7,7 @@ import click import torch + torch.set_num_threads(1) torch._C._debug_set_fusion_group_inlining(False) diff --git a/benchmarks/gpt_fast/benchmark.py b/benchmarks/gpt_fast/benchmark.py index 5ff277e8f60..cfa1a5424e9 100644 --- a/benchmarks/gpt_fast/benchmark.py +++ b/benchmarks/gpt_fast/benchmark.py @@ -10,6 +10,7 @@ import torch import torch.nn as nn from torch.utils.flop_counter import FlopCounterMode + WARMUP_ITER = 5 A100_40G_BF16_TFLOPS = 312 diff --git a/benchmarks/gpt_fast/generate.py b/benchmarks/gpt_fast/generate.py index c7d0cc6fb0f..c94e1192348 100644 --- a/benchmarks/gpt_fast/generate.py +++ b/benchmarks/gpt_fast/generate.py @@ -14,6 +14,7 @@ from quantize import WeightOnlyInt8QuantHandler as LLaMAWeightOnlyInt8QuantHandl import torch import torch._inductor.config + torch._inductor.config.coordinate_descent_tuning = True torch._inductor.config.triton.unique_kernel_names = True torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future diff --git a/benchmarks/gpt_fast/mixtral_moe_quantize.py b/benchmarks/gpt_fast/mixtral_moe_quantize.py index 6dfac4a9938..c1840330c02 100644 --- a/benchmarks/gpt_fast/mixtral_moe_quantize.py +++ b/benchmarks/gpt_fast/mixtral_moe_quantize.py @@ -5,6 +5,7 @@ import torch import torch.nn as nn import torch.nn.functional as F + ##### Quantization Primitives ###### diff --git a/benchmarks/inference/process_metrics.py b/benchmarks/inference/process_metrics.py index acdd1dffda7..6bd8b5e4faa 100644 --- a/benchmarks/inference/process_metrics.py +++ b/benchmarks/inference/process_metrics.py @@ -10,6 +10,7 @@ import os import pandas as pd + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Parse output files") parser.add_argument("--csv", type=str, help="Path to csv file") diff --git a/benchmarks/inference/server.py b/benchmarks/inference/server.py index 40ef2fd3729..706811b134e 100644 --- a/benchmarks/inference/server.py +++ b/benchmarks/inference/server.py @@ -1,5 +1,4 @@ import argparse - import asyncio import os.path import subprocess diff --git a/benchmarks/instruction_counts/core/api.py b/benchmarks/instruction_counts/core/api.py index 1b7cb48efef..c598e726e97 100644 --- a/benchmarks/instruction_counts/core/api.py +++ b/benchmarks/instruction_counts/core/api.py @@ -8,6 +8,7 @@ from typing import Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union from worker.main import WorkerTimerArgs + if TYPE_CHECKING: # Benchmark utils are only partially strict compliant, so MyPy won't follow # imports using the public namespace. (Due to an exclusion rule in diff --git a/benchmarks/instruction_counts/core/expand.py b/benchmarks/instruction_counts/core/expand.py index 1944aaf00a5..c91b2311c40 100644 --- a/benchmarks/instruction_counts/core/expand.py +++ b/benchmarks/instruction_counts/core/expand.py @@ -13,6 +13,7 @@ from typing import List, Optional, Tuple, TYPE_CHECKING import torch + if TYPE_CHECKING: # See the note in api.py for why this is necessary. from torch.utils.benchmark.utils.timer import Language diff --git a/benchmarks/instruction_counts/execution/work.py b/benchmarks/instruction_counts/execution/work.py index c745fde88fa..ab076854e41 100644 --- a/benchmarks/instruction_counts/execution/work.py +++ b/benchmarks/instruction_counts/execution/work.py @@ -20,6 +20,7 @@ from worker.main import ( WorkerUnpickler, ) + if TYPE_CHECKING: PopenType = subprocess.Popen[bytes] else: diff --git a/benchmarks/operator_benchmark/benchmark_all_other_test.py b/benchmarks/operator_benchmark/benchmark_all_other_test.py index f53e8b9d97c..05022e8407f 100644 --- a/benchmarks/operator_benchmark/benchmark_all_other_test.py +++ b/benchmarks/operator_benchmark/benchmark_all_other_test.py @@ -1,4 +1,4 @@ -from pt import ( # noqa: F401 # noqa: F401 +from pt import ( # noqa: F401 add_test, ao_sparsifier_test, as_strided_test, @@ -31,5 +31,6 @@ from pt import ( # noqa: F401 # noqa: F401 import operator_benchmark as op_bench + if __name__ == "__main__": op_bench.benchmark_runner.main() diff --git a/benchmarks/operator_benchmark/benchmark_all_test.py b/benchmarks/operator_benchmark/benchmark_all_test.py index 4a364867fbc..f7d967c2c26 100644 --- a/benchmarks/operator_benchmark/benchmark_all_test.py +++ b/benchmarks/operator_benchmark/benchmark_all_test.py @@ -4,5 +4,6 @@ from pt import unary_test # noqa: F401 import operator_benchmark as op_bench + if __name__ == "__main__": op_bench.benchmark_runner.main() diff --git a/benchmarks/operator_benchmark/benchmark_core.py b/benchmarks/operator_benchmark/benchmark_core.py index 241fce8ef5e..bc340ae17f6 100644 --- a/benchmarks/operator_benchmark/benchmark_core.py +++ b/benchmarks/operator_benchmark/benchmark_core.py @@ -6,6 +6,7 @@ import timeit from collections import namedtuple import benchmark_utils + import numpy as np import torch diff --git a/benchmarks/operator_benchmark/benchmark_runner.py b/benchmarks/operator_benchmark/benchmark_runner.py index 6abbc566820..0695e4847e7 100644 --- a/benchmarks/operator_benchmark/benchmark_runner.py +++ b/benchmarks/operator_benchmark/benchmark_runner.py @@ -1,10 +1,12 @@ import argparse import benchmark_core + import benchmark_utils import torch + """Performance microbenchmarks's main binary. This is the main function for running performance microbenchmark tests. diff --git a/benchmarks/operator_benchmark/common/repeat_benchmark.py b/benchmarks/operator_benchmark/common/repeat_benchmark.py index 8f8f5fffb8a..8c0558e3483 100644 --- a/benchmarks/operator_benchmark/common/repeat_benchmark.py +++ b/benchmarks/operator_benchmark/common/repeat_benchmark.py @@ -4,6 +4,7 @@ import numpy as np import torch + """Microbenchmarks for Tensor repeat operator. Supports PyTorch.""" input_shapes = ( diff --git a/benchmarks/operator_benchmark/common/tests/jit_forward_test.py b/benchmarks/operator_benchmark/common/tests/jit_forward_test.py index 339b2856c70..65a86a6c292 100644 --- a/benchmarks/operator_benchmark/common/tests/jit_forward_test.py +++ b/benchmarks/operator_benchmark/common/tests/jit_forward_test.py @@ -2,6 +2,7 @@ import operator_benchmark as op_bench import torch + intraop_bench_configs = op_bench.config_list( attrs=[ [8, 16], diff --git a/benchmarks/operator_benchmark/common/tests/pt_configs_list_test.py b/benchmarks/operator_benchmark/common/tests/pt_configs_list_test.py index 8e92f5bca18..e5bf5c7800a 100644 --- a/benchmarks/operator_benchmark/common/tests/pt_configs_list_test.py +++ b/benchmarks/operator_benchmark/common/tests/pt_configs_list_test.py @@ -2,6 +2,7 @@ import operator_benchmark as op_bench import torch + """Microbenchmarks for element-wise Add operator. Supports both Caffe2/PyTorch.""" add_short_configs = op_bench.config_list( diff --git a/benchmarks/operator_benchmark/operator_benchmark.py b/benchmarks/operator_benchmark/operator_benchmark.py index 7121c519fdd..b3c6678420f 100644 --- a/benchmarks/operator_benchmark/operator_benchmark.py +++ b/benchmarks/operator_benchmark/operator_benchmark.py @@ -2,4 +2,5 @@ import benchmark_runner # noqa: F401 from benchmark_pytorch import TorchBenchmarkBase # noqa: F401 from benchmark_test_generator import * # noqa: F401,F403 + from benchmark_utils import * # noqa: F401,F403 diff --git a/benchmarks/operator_benchmark/pt/add_test.py b/benchmarks/operator_benchmark/pt/add_test.py index 8431e4d398d..54504c4f300 100644 --- a/benchmarks/operator_benchmark/pt/add_test.py +++ b/benchmarks/operator_benchmark/pt/add_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for add_ operator. Supports both Caffe2/PyTorch.""" # Configs for PT add operator diff --git a/benchmarks/operator_benchmark/pt/ao_sparsifier_test.py b/benchmarks/operator_benchmark/pt/ao_sparsifier_test.py index a00e0dc3b21..38324edbf3f 100644 --- a/benchmarks/operator_benchmark/pt/ao_sparsifier_test.py +++ b/benchmarks/operator_benchmark/pt/ao_sparsifier_test.py @@ -1,7 +1,7 @@ import operator_benchmark as op_bench + import torch from torch import nn - from torch.ao import pruning diff --git a/benchmarks/operator_benchmark/pt/batchnorm_test.py b/benchmarks/operator_benchmark/pt/batchnorm_test.py index 4524c307797..a9f5ffa0b06 100644 --- a/benchmarks/operator_benchmark/pt/batchnorm_test.py +++ b/benchmarks/operator_benchmark/pt/batchnorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch import torch.nn.functional as F diff --git a/benchmarks/operator_benchmark/pt/binary_test.py b/benchmarks/operator_benchmark/pt/binary_test.py index a8c9256610d..4a4144a96ee 100644 --- a/benchmarks/operator_benchmark/pt/binary_test.py +++ b/benchmarks/operator_benchmark/pt/binary_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/bmm_test.py b/benchmarks/operator_benchmark/pt/bmm_test.py index 3fcdb70cc6a..8ff5d0b5e1b 100644 --- a/benchmarks/operator_benchmark/pt/bmm_test.py +++ b/benchmarks/operator_benchmark/pt/bmm_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for add_ operator. Supports both Caffe2/PyTorch.""" diff --git a/benchmarks/operator_benchmark/pt/channel_shuffle_test.py b/benchmarks/operator_benchmark/pt/channel_shuffle_test.py index 40742b45d3f..a45c790c9f8 100644 --- a/benchmarks/operator_benchmark/pt/channel_shuffle_test.py +++ b/benchmarks/operator_benchmark/pt/channel_shuffle_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/chunk_test.py b/benchmarks/operator_benchmark/pt/chunk_test.py index d6c203efe42..7ab456ecb9a 100644 --- a/benchmarks/operator_benchmark/pt/chunk_test.py +++ b/benchmarks/operator_benchmark/pt/chunk_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/clip_ranges_test.py b/benchmarks/operator_benchmark/pt/clip_ranges_test.py index 0e4889af44f..5d69c90ba41 100644 --- a/benchmarks/operator_benchmark/pt/clip_ranges_test.py +++ b/benchmarks/operator_benchmark/pt/clip_ranges_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/configs.py b/benchmarks/operator_benchmark/pt/configs.py index ccfa62cc364..807182dd592 100644 --- a/benchmarks/operator_benchmark/pt/configs.py +++ b/benchmarks/operator_benchmark/pt/configs.py @@ -1,5 +1,6 @@ import operator_benchmark as op_bench + """ Configs shared by multiple benchmarks """ diff --git a/benchmarks/operator_benchmark/pt/conv_test.py b/benchmarks/operator_benchmark/pt/conv_test.py index ad315d8a0bb..93b4942cea2 100644 --- a/benchmarks/operator_benchmark/pt/conv_test.py +++ b/benchmarks/operator_benchmark/pt/conv_test.py @@ -1,9 +1,11 @@ from pt import configs import operator_benchmark as op_bench + import torch import torch.nn as nn + """ Microbenchmarks for Conv1d and ConvTranspose1d operators. """ diff --git a/benchmarks/operator_benchmark/pt/diag_test.py b/benchmarks/operator_benchmark/pt/diag_test.py index bb128a56bfa..9eab6f862bb 100644 --- a/benchmarks/operator_benchmark/pt/diag_test.py +++ b/benchmarks/operator_benchmark/pt/diag_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/embeddingbag_test.py b/benchmarks/operator_benchmark/pt/embeddingbag_test.py index f73fbc5425b..d82d6970759 100644 --- a/benchmarks/operator_benchmark/pt/embeddingbag_test.py +++ b/benchmarks/operator_benchmark/pt/embeddingbag_test.py @@ -2,8 +2,10 @@ import numpy from pt import configs import operator_benchmark as op_bench + import torch + """Embedding and EmbeddingBag Operator Benchmark""" diff --git a/benchmarks/operator_benchmark/pt/fill_test.py b/benchmarks/operator_benchmark/pt/fill_test.py index 1252e81982c..4572983a186 100644 --- a/benchmarks/operator_benchmark/pt/fill_test.py +++ b/benchmarks/operator_benchmark/pt/fill_test.py @@ -1,8 +1,9 @@ import operator_benchmark as op_bench -import torch +import torch from torch.testing._internal.common_device_type import get_all_device_types + """Microbenchmark for Fill_ operator.""" fill_short_configs = op_bench.config_list( diff --git a/benchmarks/operator_benchmark/pt/gather_test.py b/benchmarks/operator_benchmark/pt/gather_test.py index 8a4db487b75..67be175f9c4 100644 --- a/benchmarks/operator_benchmark/pt/gather_test.py +++ b/benchmarks/operator_benchmark/pt/gather_test.py @@ -1,6 +1,7 @@ import numpy import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/gelu_test.py b/benchmarks/operator_benchmark/pt/gelu_test.py index ab83f6659e1..6c48d53738d 100644 --- a/benchmarks/operator_benchmark/pt/gelu_test.py +++ b/benchmarks/operator_benchmark/pt/gelu_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/groupnorm_test.py b/benchmarks/operator_benchmark/pt/groupnorm_test.py index ead09808ece..6f033e050c9 100644 --- a/benchmarks/operator_benchmark/pt/groupnorm_test.py +++ b/benchmarks/operator_benchmark/pt/groupnorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch import torch.nn.functional as F diff --git a/benchmarks/operator_benchmark/pt/hardsigmoid_test.py b/benchmarks/operator_benchmark/pt/hardsigmoid_test.py index 7e9b51a0c84..27bc02dd2c2 100644 --- a/benchmarks/operator_benchmark/pt/hardsigmoid_test.py +++ b/benchmarks/operator_benchmark/pt/hardsigmoid_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch import torch.nn as nn diff --git a/benchmarks/operator_benchmark/pt/hardswish_test.py b/benchmarks/operator_benchmark/pt/hardswish_test.py index d93fe298d2e..82a251a459f 100644 --- a/benchmarks/operator_benchmark/pt/hardswish_test.py +++ b/benchmarks/operator_benchmark/pt/hardswish_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch import torch.nn as nn diff --git a/benchmarks/operator_benchmark/pt/index_select_test.py b/benchmarks/operator_benchmark/pt/index_select_test.py index c4a0c233e91..870610071c1 100644 --- a/benchmarks/operator_benchmark/pt/index_select_test.py +++ b/benchmarks/operator_benchmark/pt/index_select_test.py @@ -1,6 +1,7 @@ import numpy import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/instancenorm_test.py b/benchmarks/operator_benchmark/pt/instancenorm_test.py index 47f8243817c..fc74f91bf0f 100644 --- a/benchmarks/operator_benchmark/pt/instancenorm_test.py +++ b/benchmarks/operator_benchmark/pt/instancenorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch import torch.nn.functional as F diff --git a/benchmarks/operator_benchmark/pt/interpolate_test.py b/benchmarks/operator_benchmark/pt/interpolate_test.py index 0ae487bc832..c65f0b9c241 100644 --- a/benchmarks/operator_benchmark/pt/interpolate_test.py +++ b/benchmarks/operator_benchmark/pt/interpolate_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for interpolate operator.""" diff --git a/benchmarks/operator_benchmark/pt/layernorm_test.py b/benchmarks/operator_benchmark/pt/layernorm_test.py index 82f12cbaf58..f837a521565 100644 --- a/benchmarks/operator_benchmark/pt/layernorm_test.py +++ b/benchmarks/operator_benchmark/pt/layernorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch import torch.nn.functional as F diff --git a/benchmarks/operator_benchmark/pt/linear_prepack_fp16_test.py b/benchmarks/operator_benchmark/pt/linear_prepack_fp16_test.py index e2c72bec92a..582598051e7 100644 --- a/benchmarks/operator_benchmark/pt/linear_prepack_fp16_test.py +++ b/benchmarks/operator_benchmark/pt/linear_prepack_fp16_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for linear_prepack_fp16_ operator. Supports both Caffe2/PyTorch.""" # Configs for PT linear_prepack_fp16 operator diff --git a/benchmarks/operator_benchmark/pt/linear_test.py b/benchmarks/operator_benchmark/pt/linear_test.py index cf7a87f0ec0..6f5e2397545 100644 --- a/benchmarks/operator_benchmark/pt/linear_test.py +++ b/benchmarks/operator_benchmark/pt/linear_test.py @@ -1,6 +1,7 @@ from pt import configs import operator_benchmark as op_bench + import torch import torch.nn as nn diff --git a/benchmarks/operator_benchmark/pt/linear_unpack_fp16_test.py b/benchmarks/operator_benchmark/pt/linear_unpack_fp16_test.py index 3b2ef5bc134..f75642c1f22 100644 --- a/benchmarks/operator_benchmark/pt/linear_unpack_fp16_test.py +++ b/benchmarks/operator_benchmark/pt/linear_unpack_fp16_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for linear_unpack_fp16_ operator. Supports both Caffe2/PyTorch.""" # Configs for PT linear_unpack_fp16 operator diff --git a/benchmarks/operator_benchmark/pt/matmul_test.py b/benchmarks/operator_benchmark/pt/matmul_test.py index 5a2d52e0520..e92728e9ebd 100644 --- a/benchmarks/operator_benchmark/pt/matmul_test.py +++ b/benchmarks/operator_benchmark/pt/matmul_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for MatMul operator""" # Configs for PT Matmul operator diff --git a/benchmarks/operator_benchmark/pt/matrix_mult_test.py b/benchmarks/operator_benchmark/pt/matrix_mult_test.py index f95c28c663c..c905b566192 100644 --- a/benchmarks/operator_benchmark/pt/matrix_mult_test.py +++ b/benchmarks/operator_benchmark/pt/matrix_mult_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """ Microbenchmarks for batch matrix mult with einsum and torch.bmm. """ diff --git a/benchmarks/operator_benchmark/pt/pool_test.py b/benchmarks/operator_benchmark/pt/pool_test.py index 6ac67cece9b..7e842ef649c 100644 --- a/benchmarks/operator_benchmark/pt/pool_test.py +++ b/benchmarks/operator_benchmark/pt/pool_test.py @@ -1,7 +1,9 @@ import operator_benchmark as op_bench + import torch import torch.nn as nn + """ Microbenchmarks for MaxPool1d and AvgPool1d operators. """ diff --git a/benchmarks/operator_benchmark/pt/qactivation_test.py b/benchmarks/operator_benchmark/pt/qactivation_test.py index 65e8516a252..9b7280530bc 100644 --- a/benchmarks/operator_benchmark/pt/qactivation_test.py +++ b/benchmarks/operator_benchmark/pt/qactivation_test.py @@ -1,7 +1,9 @@ import operator_benchmark as op_bench + import torch import torch.ao.nn.quantized.functional as qF + r"""Microbenchmarks for the quantized activations.""" qactivation_long_configs = op_bench.cross_product_configs( diff --git a/benchmarks/operator_benchmark/pt/qarithmetic_test.py b/benchmarks/operator_benchmark/pt/qarithmetic_test.py index 7a8def81bc5..a65ae561870 100644 --- a/benchmarks/operator_benchmark/pt/qarithmetic_test.py +++ b/benchmarks/operator_benchmark/pt/qarithmetic_test.py @@ -1,7 +1,9 @@ import operator_benchmark as op_bench + import torch from torch._ops import ops + qarithmetic_binary_configs = op_bench.cross_product_configs( N=(2, 8, 64, 512), dtype=(torch.quint8, torch.qint8, torch.qint32), diff --git a/benchmarks/operator_benchmark/pt/qatembedding_ops_test.py b/benchmarks/operator_benchmark/pt/qatembedding_ops_test.py index 2e72dbe1422..03fc5339610 100644 --- a/benchmarks/operator_benchmark/pt/qatembedding_ops_test.py +++ b/benchmarks/operator_benchmark/pt/qatembedding_ops_test.py @@ -2,10 +2,12 @@ import numpy from pt import configs import operator_benchmark as op_bench + import torch import torch.ao.nn.qat as nnqat from torch.ao.quantization import default_embedding_qat_qconfig + """ Microbenchmarks for QAT Embedding + EmbeddingBag operators. """ diff --git a/benchmarks/operator_benchmark/pt/qbatchnorm_test.py b/benchmarks/operator_benchmark/pt/qbatchnorm_test.py index b8cee3ae02b..218e133b937 100644 --- a/benchmarks/operator_benchmark/pt/qbatchnorm_test.py +++ b/benchmarks/operator_benchmark/pt/qbatchnorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/qcomparators_test.py b/benchmarks/operator_benchmark/pt/qcomparators_test.py index 9a251d6417a..37ee6f60796 100644 --- a/benchmarks/operator_benchmark/pt/qcomparators_test.py +++ b/benchmarks/operator_benchmark/pt/qcomparators_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + qcomparators_configs = op_bench.cross_product_configs( N=(8, 64), dtype=(torch.quint8, torch.qint8, torch.qint32), diff --git a/benchmarks/operator_benchmark/pt/qconv_test.py b/benchmarks/operator_benchmark/pt/qconv_test.py index 52374acf95b..540480c297c 100644 --- a/benchmarks/operator_benchmark/pt/qconv_test.py +++ b/benchmarks/operator_benchmark/pt/qconv_test.py @@ -1,9 +1,11 @@ from pt import configs import operator_benchmark as op_bench + import torch import torch.ao.nn.quantized as nnq + """ Microbenchmarks for qConv operators. """ diff --git a/benchmarks/operator_benchmark/pt/qembedding_bag_lookups_test.py b/benchmarks/operator_benchmark/pt/qembedding_bag_lookups_test.py index 7ee0792d1ef..f5fd9dc4ff3 100644 --- a/benchmarks/operator_benchmark/pt/qembedding_bag_lookups_test.py +++ b/benchmarks/operator_benchmark/pt/qembedding_bag_lookups_test.py @@ -3,10 +3,11 @@ from typing import Optional import numpy as np import operator_benchmark as op_bench -import torch +import torch from torch.testing._internal.common_quantization import lengths_to_offsets + torch.ops.load_library("//caffe2/torch/fb/sparsenn:sparsenn_operators") diff --git a/benchmarks/operator_benchmark/pt/qembedding_pack_test.py b/benchmarks/operator_benchmark/pt/qembedding_pack_test.py index cfc6c51024a..d63b880dc0d 100644 --- a/benchmarks/operator_benchmark/pt/qembedding_pack_test.py +++ b/benchmarks/operator_benchmark/pt/qembedding_pack_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + embeddingbag_conversion_short_configs = op_bench.cross_product_configs( num_embeddings=(80,), embedding_dim=(128, 256, 512), tags=("short",) ) diff --git a/benchmarks/operator_benchmark/pt/qembeddingbag_test.py b/benchmarks/operator_benchmark/pt/qembeddingbag_test.py index fe563af0bc8..adc8c3b2769 100644 --- a/benchmarks/operator_benchmark/pt/qembeddingbag_test.py +++ b/benchmarks/operator_benchmark/pt/qembeddingbag_test.py @@ -2,9 +2,11 @@ import numpy from pt import configs import operator_benchmark as op_bench + import torch import torch.ao.nn.quantized as nnq + """ Microbenchmarks for qEmbeddingBag operators. """ diff --git a/benchmarks/operator_benchmark/pt/qgroupnorm_test.py b/benchmarks/operator_benchmark/pt/qgroupnorm_test.py index 0148800450e..73489965e38 100644 --- a/benchmarks/operator_benchmark/pt/qgroupnorm_test.py +++ b/benchmarks/operator_benchmark/pt/qgroupnorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/qinstancenorm_test.py b/benchmarks/operator_benchmark/pt/qinstancenorm_test.py index 4dcec4f2a88..196cdea2a11 100644 --- a/benchmarks/operator_benchmark/pt/qinstancenorm_test.py +++ b/benchmarks/operator_benchmark/pt/qinstancenorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/qinterpolate_test.py b/benchmarks/operator_benchmark/pt/qinterpolate_test.py index c3afc308f6b..2410266cda8 100644 --- a/benchmarks/operator_benchmark/pt/qinterpolate_test.py +++ b/benchmarks/operator_benchmark/pt/qinterpolate_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for the quantized interpolate op. Note: We are not benchmarking `upsample` as it is being deprecated, and calls diff --git a/benchmarks/operator_benchmark/pt/qlayernorm_test.py b/benchmarks/operator_benchmark/pt/qlayernorm_test.py index c2015b920d6..0f81497ef60 100644 --- a/benchmarks/operator_benchmark/pt/qlayernorm_test.py +++ b/benchmarks/operator_benchmark/pt/qlayernorm_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/qlinear_test.py b/benchmarks/operator_benchmark/pt/qlinear_test.py index 1e96b3a75d1..d90b4e8af64 100644 --- a/benchmarks/operator_benchmark/pt/qlinear_test.py +++ b/benchmarks/operator_benchmark/pt/qlinear_test.py @@ -1,10 +1,12 @@ from pt import configs import operator_benchmark as op_bench + import torch import torch.ao.nn.quantized as nnq import torch.ao.nn.quantized.dynamic as nnqd + """ Microbenchmarks for Quantized Linear operators. """ diff --git a/benchmarks/operator_benchmark/pt/qobserver_test.py b/benchmarks/operator_benchmark/pt/qobserver_test.py index c4ab625f936..0685a5f01cd 100644 --- a/benchmarks/operator_benchmark/pt/qobserver_test.py +++ b/benchmarks/operator_benchmark/pt/qobserver_test.py @@ -1,7 +1,9 @@ import operator_benchmark as op_bench + import torch import torch.ao.quantization.observer as obs + qobserver_short_configs_dict = { "attr_names": ("C", "M", "N", "dtype", "device"), "attrs": ( diff --git a/benchmarks/operator_benchmark/pt/qpool_test.py b/benchmarks/operator_benchmark/pt/qpool_test.py index b2e1118ac89..964a8b54b5c 100644 --- a/benchmarks/operator_benchmark/pt/qpool_test.py +++ b/benchmarks/operator_benchmark/pt/qpool_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + # 2D pooling will have input matrix of rank 3 or 4 qpool2d_long_configs = op_bench.config_list( attrs=( diff --git a/benchmarks/operator_benchmark/pt/qrnn_test.py b/benchmarks/operator_benchmark/pt/qrnn_test.py index 5c4600683f7..6d140464e96 100644 --- a/benchmarks/operator_benchmark/pt/qrnn_test.py +++ b/benchmarks/operator_benchmark/pt/qrnn_test.py @@ -1,7 +1,9 @@ import operator_benchmark as op_bench + import torch from torch import nn + """ Microbenchmarks for RNNs. """ diff --git a/benchmarks/operator_benchmark/pt/qtensor_method_test.py b/benchmarks/operator_benchmark/pt/qtensor_method_test.py index 3e545c36874..966c69dd051 100644 --- a/benchmarks/operator_benchmark/pt/qtensor_method_test.py +++ b/benchmarks/operator_benchmark/pt/qtensor_method_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + # Configs for pointwise and reduction unary ops qmethods_configs_short = op_bench.config_list( attr_names=["M", "N"], diff --git a/benchmarks/operator_benchmark/pt/quantization_test.py b/benchmarks/operator_benchmark/pt/quantization_test.py index 73b0ef09dba..e0d3483963a 100644 --- a/benchmarks/operator_benchmark/pt/quantization_test.py +++ b/benchmarks/operator_benchmark/pt/quantization_test.py @@ -1,9 +1,11 @@ import operator_benchmark as op_bench + import torch import torch.ao.nn.quantized as nnq import torch.ao.quantization as tq import torch.nn as nn + """Microbenchmarks for general quantization operations.""" # mode is used to show the direction of the benchmark: diff --git a/benchmarks/operator_benchmark/pt/qunary_test.py b/benchmarks/operator_benchmark/pt/qunary_test.py index f1f92cbf72c..1f4d08ff579 100644 --- a/benchmarks/operator_benchmark/pt/qunary_test.py +++ b/benchmarks/operator_benchmark/pt/qunary_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/remainder_test.py b/benchmarks/operator_benchmark/pt/remainder_test.py index 7b069d4386b..5de8d7ca08d 100644 --- a/benchmarks/operator_benchmark/pt/remainder_test.py +++ b/benchmarks/operator_benchmark/pt/remainder_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/softmax_test.py b/benchmarks/operator_benchmark/pt/softmax_test.py index d7c6e170f51..b7fb4bc6dc9 100644 --- a/benchmarks/operator_benchmark/pt/softmax_test.py +++ b/benchmarks/operator_benchmark/pt/softmax_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch import torch.nn as nn diff --git a/benchmarks/operator_benchmark/pt/split_test.py b/benchmarks/operator_benchmark/pt/split_test.py index 74c388e4248..4b55626eb9c 100644 --- a/benchmarks/operator_benchmark/pt/split_test.py +++ b/benchmarks/operator_benchmark/pt/split_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt/sum_test.py b/benchmarks/operator_benchmark/pt/sum_test.py index 493c950569c..ec9c80222cf 100644 --- a/benchmarks/operator_benchmark/pt/sum_test.py +++ b/benchmarks/operator_benchmark/pt/sum_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + """Microbenchmarks for sum reduction operator.""" # Configs for PT add operator diff --git a/benchmarks/operator_benchmark/pt/tensor_to_test.py b/benchmarks/operator_benchmark/pt/tensor_to_test.py index dfd682e0246..621e58212cb 100644 --- a/benchmarks/operator_benchmark/pt/tensor_to_test.py +++ b/benchmarks/operator_benchmark/pt/tensor_to_test.py @@ -1,6 +1,8 @@ import operator_benchmark as op_bench + import torch + tensor_conversion_short_configs = op_bench.cross_product_configs( M=( 8, diff --git a/benchmarks/operator_benchmark/pt/unary_test.py b/benchmarks/operator_benchmark/pt/unary_test.py index a5f617fd50c..e605c731396 100644 --- a/benchmarks/operator_benchmark/pt/unary_test.py +++ b/benchmarks/operator_benchmark/pt/unary_test.py @@ -1,4 +1,5 @@ import operator_benchmark as op_bench + import torch diff --git a/benchmarks/operator_benchmark/pt_extension/setup.py b/benchmarks/operator_benchmark/pt_extension/setup.py index 53db50f4436..06bd8bdf235 100644 --- a/benchmarks/operator_benchmark/pt_extension/setup.py +++ b/benchmarks/operator_benchmark/pt_extension/setup.py @@ -2,6 +2,7 @@ from setuptools import setup from torch.utils.cpp_extension import BuildExtension, CppExtension + setup( name="benchmark_cpp_extension", ext_modules=[CppExtension("benchmark_cpp_extension", ["extension.cpp"])], diff --git a/benchmarks/overrides_benchmark/bench.py b/benchmarks/overrides_benchmark/bench.py index 72dcf89bedb..1052f252219 100644 --- a/benchmarks/overrides_benchmark/bench.py +++ b/benchmarks/overrides_benchmark/bench.py @@ -5,6 +5,7 @@ from common import SubTensor, SubWithTorchFunction, WithTorchFunction import torch + NUM_REPEATS = 1000 NUM_REPEAT_OF_REPEATS = 1000 diff --git a/benchmarks/overrides_benchmark/common.py b/benchmarks/overrides_benchmark/common.py index fe594bad214..257760ea195 100644 --- a/benchmarks/overrides_benchmark/common.py +++ b/benchmarks/overrides_benchmark/common.py @@ -1,5 +1,6 @@ import torch + NUM_REPEATS = 1000 NUM_REPEAT_OF_REPEATS = 1000 diff --git a/benchmarks/overrides_benchmark/pyspybench.py b/benchmarks/overrides_benchmark/pyspybench.py index 69745162dfe..e7d26660b61 100644 --- a/benchmarks/overrides_benchmark/pyspybench.py +++ b/benchmarks/overrides_benchmark/pyspybench.py @@ -4,6 +4,7 @@ from common import SubTensor, SubWithTorchFunction, WithTorchFunction # noqa: F import torch + Tensor = torch.tensor NUM_REPEATS = 1000000 diff --git a/benchmarks/profiler_benchmark/profiler_bench.py b/benchmarks/profiler_benchmark/profiler_bench.py index e29d45f0649..ced82a501cb 100644 --- a/benchmarks/profiler_benchmark/profiler_bench.py +++ b/benchmarks/profiler_benchmark/profiler_bench.py @@ -3,9 +3,9 @@ import sys import timeit import torch - from torch.utils.benchmark import Timer + PARALLEL_TASKS_NUM = 4 INTERNAL_ITER = None diff --git a/benchmarks/profiler_benchmark/resnet_memory_profiler.py b/benchmarks/profiler_benchmark/resnet_memory_profiler.py index 85212b98218..c18d456c83c 100644 --- a/benchmarks/profiler_benchmark/resnet_memory_profiler.py +++ b/benchmarks/profiler_benchmark/resnet_memory_profiler.py @@ -1,9 +1,9 @@ import torchvision.models as models import torch - import torch.autograd.profiler as profiler + for with_cuda in [False, True]: model = models.resnet18() inputs = torch.randn(5, 3, 224, 224) diff --git a/benchmarks/serialization/nested_annotation_str.py b/benchmarks/serialization/nested_annotation_str.py index a7b6cf49cfd..9d14c292031 100644 --- a/benchmarks/serialization/nested_annotation_str.py +++ b/benchmarks/serialization/nested_annotation_str.py @@ -1,6 +1,7 @@ import torch import torch.utils.benchmark as benchmark + MEMO = {} diff --git a/benchmarks/serialization/simple_measurement.py b/benchmarks/serialization/simple_measurement.py index 9cc12ca1329..ee75acd5a85 100644 --- a/benchmarks/serialization/simple_measurement.py +++ b/benchmarks/serialization/simple_measurement.py @@ -2,6 +2,7 @@ from pyarkbench import Benchmark, default_args, Timer import torch + use_new = True diff --git a/benchmarks/tensorexpr/__main__.py b/benchmarks/tensorexpr/__main__.py index 3c80a6e39fe..95e6bd6825a 100644 --- a/benchmarks/tensorexpr/__main__.py +++ b/benchmarks/tensorexpr/__main__.py @@ -5,7 +5,7 @@ import os # from . import conv # noqa: F401 # from . import normalization # noqa: F401 # from . import pooling # noqa: F401 -from . import ( # noqa: F401 # noqa: F401 # noqa: F401 # noqa: F401 # noqa: F401 # noqa: F401 # noqa: F401 # noqa: F401 # noqa: F401 +from . import ( # noqa: F401 attention, benchmark, broadcast, diff --git a/benchmarks/transformer/better_transformer_vs_mha_functional.py b/benchmarks/transformer/better_transformer_vs_mha_functional.py index 44e8105ef1e..3aa2e6c214c 100644 --- a/benchmarks/transformer/better_transformer_vs_mha_functional.py +++ b/benchmarks/transformer/better_transformer_vs_mha_functional.py @@ -12,7 +12,6 @@ import argparse import itertools import json import random - import warnings from collections import defaultdict, OrderedDict from pathlib import Path @@ -20,12 +19,12 @@ from pprint import pprint from typing import Optional import numpy as np - from prettytable import PrettyTable from tqdm import tqdm import torch + warnings.filterwarnings("ignore") error_dict = defaultdict(int) diff --git a/benchmarks/transformer/score_mod.py b/benchmarks/transformer/score_mod.py index 040838d0786..d8a88f2a2b3 100644 --- a/benchmarks/transformer/score_mod.py +++ b/benchmarks/transformer/score_mod.py @@ -13,6 +13,7 @@ import torch import torch.nn.functional as F from torch.nn.attention.flex_attention import flex_attention + torch._dynamo.config.automatic_dynamic_shapes = False # Needed since changing args to function causes recompiles torch._dynamo.config.cache_size_limit = 1000 diff --git a/benchmarks/transformer/sdp.py b/benchmarks/transformer/sdp.py index eacf6cf0f00..3edda07b309 100644 --- a/benchmarks/transformer/sdp.py +++ b/benchmarks/transformer/sdp.py @@ -1,7 +1,6 @@ import argparse import itertools import random - import warnings from dataclasses import dataclass from pathlib import Path @@ -16,6 +15,7 @@ import torch import torch.utils.benchmark as benchmark from torch.backends.cuda import sdp_kernel + warnings.filterwarnings("ignore") diff --git a/tools/linter/adapters/ufmt_linter.py b/tools/linter/adapters/ufmt_linter.py index 203811bc6e4..851d80afbbe 100644 --- a/tools/linter/adapters/ufmt_linter.py +++ b/tools/linter/adapters/ufmt_linter.py @@ -32,7 +32,6 @@ ISORT_WHITELIST = re.compile( # .ci/** # .github/** # benchmarks/** - "benchmarks/**", # functorch/** "functorch/**", # tools/**