[CacheBench] Refactor code to prepare for mode benchmarks (#147641)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/147641 Approved by: https://github.com/huydhn
2025-12-06 12:20:52 +01:00 · 2025-02-21 12:33:48 -08:00 · 2025-02-21 12:33:48 -08:00 · 1c334893dc
commit 1c334893dc
parent 5d26b7108f
2 changed files with 75 additions and 43 deletions
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -494,7 +494,8 @@ test_cachebench() {
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"

-  $TASKSET python "benchmarks/dynamo/cachebench.py" --output "$TEST_REPORTS_DIR/cachebench.json"
+  $TASKSET python "benchmarks/dynamo/cachebench.py" --mode training --benchmark torchbench --output "$TEST_REPORTS_DIR/cachebench_training.json"
+  $TASKSET python "benchmarks/dynamo/cachebench.py" --mode inference --benchmark torchbench --output "$TEST_REPORTS_DIR/cachebench_inference.json"
 }

 test_perf_for_dashboard() {
--- a/benchmarks/dynamo/cachebench.py
+++ b/benchmarks/dynamo/cachebench.py
@ -6,6 +6,7 @@ import os
 import subprocess
 import sys
 import tempfile
+from typing import Callable

 from torch._inductor.utils import fresh_inductor_cache

@ -14,18 +15,22 @@ logger: logging.Logger = logging.getLogger(__name__)

 TIMEOUT: int = 2000

-MODELS: list[str] = ["nanogpt", "BERT_pytorch", "resnet50"]
+
+# Keep in sync with .ci/pytorch/test.sh
+TORCHBENCH_MODELS: list[str] = ["nanogpt", "BERT_pytorch", "resnet50"]


@dataclasses.dataclass
 class RunResult:
    model: str
    mode: str  # inference or training
+    benchmark: str
    dynamic: bool
    device: str  # cuda or cpu
    cold_compile_s: float
    warm_compile_s: float
    speedup: float
+    speedup_pct: float


 def get_compile_time(file: tempfile._TemporaryFileWrapper) -> float:
@ -56,7 +61,15 @@ def _run_torchbench_from_args(model: str, args: list[str]) -> tuple[float, float
        return cold_compile_time, warm_compile_time


-def _run_torchbench_model(results: list[RunResult], model: str, device: str) -> None:
+MODE_ARGS_DICT = {
+    "inference": ["--inference", "--bfloat16"],
+    "training": ["--training", "--amp"],
+}
+
+
+def _run_torchbench_model(
+    results: list[RunResult], model: str, device: str, mode: str
+) -> None:
    cur_file = os.path.abspath(__file__)
    torchbench_file = os.path.join(os.path.dirname(cur_file), "torchbench.py")
    assert os.path.exists(
@ -71,50 +84,49 @@ def _run_torchbench_model(results: list[RunResult], model: str, device: str) ->
        "--performance",
        "--backend=inductor",
        f"--device={device}",
-    ]
-    for mode, mode_args in [
-        ("inference", ["--inference", "--bfloat16"]),
-        ("training", ["--training", "--amp"]),
-    ]:
-        for dynamic, dynamic_args in [
-            (False, []),
-            (True, ["--dynamic-shapes", "--dynamic-batch-only"]),
-        ]:
-            args = list(base_args)
-            args.extend(mode_args)
-            args.extend(dynamic_args)
+    ] + MODE_ARGS_DICT[mode]

-            logger.info(f"Command: {args}")  # noqa: G004
-            try:
-                cold_compile_t, warm_compile_t = _run_torchbench_from_args(model, args)
-                results.append(
-                    RunResult(
-                        "model",
-                        mode,
-                        dynamic,
-                        device,
-                        cold_compile_t,
-                        warm_compile_t,
-                        cold_compile_t / warm_compile_t,
-                    )
+    for dynamic, dynamic_args in [
+        (False, []),
+        (True, ["--dynamic-shapes", "--dynamic-batch-only"]),
+    ]:
+        args = list(base_args)
+        args.extend(dynamic_args)
+
+        logger.info(f"Command: {args}")  # noqa: G004
+        try:
+            cold_compile_t, warm_compile_t = _run_torchbench_from_args(model, args)
+            results.append(
+                RunResult(
+                    model,
+                    mode,
+                    "torchbench",
+                    dynamic,
+                    device,
+                    cold_compile_t,
+                    warm_compile_t,
+                    cold_compile_t / warm_compile_t,
+                    (1 - (warm_compile_t / cold_compile_t)) * 100,
                )
-            except Exception as e:
-                print(e)
-                return None
+            )
+        except Exception:
+            logger.info("fail", exc_info=True)
+            return None


 def _write_results_to_json(results: list[RunResult], output_filename: str) -> None:
    records = []
    for result in results:
        for metric_name, value in [
-            ("cold_compile_time(s)", result.cold_compile_s),
-            ("warm_compile_time(s)", result.warm_compile_s),
-            ("speedup", result.speedup),
+            ("Cold compile time (s)", result.cold_compile_s),
+            ("Warm compile time (s)", result.warm_compile_s),
+            ("Speedup", result.speedup),
+            ("Speedup (%)", result.speedup_pct),
        ]:
            records.append(
                {
                    "benchmark": {
-                        "name": "cache_benchmarks",
+                        "name": "TorchCache Benchmark",
                        "mode": result.mode,
                        "extra_info": {
                            "is_dynamic": result.dynamic,
@ -124,6 +136,7 @@ def _write_results_to_json(results: list[RunResult], output_filename: str) -> No
                    "model": {
                        "name": result.model,
                        "backend": "inductor",
+                        "origins": [result.benchmark],
                    },
                    "metric": {
                        "name": metric_name,
@ -137,15 +150,28 @@ def _write_results_to_json(results: list[RunResult], output_filename: str) -> No


 def parse_cmd_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="Run a TorchBench ServiceLab benchmark."
-    )
+    parser = argparse.ArgumentParser(description="Run a TorchCache benchmark.")
    parser.add_argument(
        "-m",
        "--model",
        help="Name of the model to run",
    )
-    parser.add_argument("-d", "--device", default="cuda", help="cpu or cuda")
+    parser.add_argument(
+        "--benchmark",
+        choices=["torchbench"],
+        required=True,
+        help="Name of benchmark suite to run",
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["inference", "training"],
+        default="training",
+    )
+    parser.add_argument(
+        "--device",
+        default="cuda",
+        choices=["cuda", "cpu"],
+    )
    parser.add_argument(
        "--output",
        required=True,
@ -158,13 +184,18 @@ def parse_cmd_args() -> argparse.Namespace:
 def main() -> None:
    args = parse_cmd_args()

-    results: list[RunResult] = []
+    dispatcher: dict[str, tuple[Callable[..., None], list[str]]] = {
+        "torchbench": (_run_torchbench_model, TORCHBENCH_MODELS)
+    }
+    fn, models = dispatcher[args.benchmark]

+    results: list[RunResult] = []
    if args.model is not None:
-        _run_torchbench_model(results, args.model, args.device)
+        fn(results, args.model, args.device, args.mode)
    else:
-        for model in MODELS:
-            _run_torchbench_model(results, model, args.device)
+        for model in models:
+            fn(results, model, args.device, args.mode)
+
    _write_results_to_json(results, args.output)