Add cachebench (#147537)

This PR adds a new benchmark called cachebench in order to measure/demonstrate the prowess of PT2 caching. ``` python benchmarks/dynamo/cachebench.py --output="result.json" ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/147537 Approved by: https://github.com/jamesjwu
2025-12-06 12:20:52 +01:00 · 2025-02-20 11:20:02 -08:00 · 2025-02-20 11:20:02 -08:00 · a8ce4d1846
commit a8ce4d1846
parent af1072ffb6
1 changed files with 172 additions and 0 deletions
--- a/benchmarks/dynamo/cachebench.py
+++ b/benchmarks/dynamo/cachebench.py
@ -0,0 +1,172 @@
+import argparse
+import dataclasses
+import json
+import logging
+import os
+import subprocess
+import sys
+import tempfile
+
+from torch._inductor.utils import fresh_inductor_cache
+
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+TIMEOUT: int = 2000
+
+MODELS: list[str] = ["nanogpt", "BERT_pytorch", "resnet50"]
+
+
+@dataclasses.dataclass
+class RunResult:
+    model: str
+    mode: str  # inference or training
+    dynamic: bool
+    device: str  # cuda or cpu
+    cold_compile_s: float
+    warm_compile_s: float
+    speedup: float
+
+
+def get_compile_time(file: tempfile._TemporaryFileWrapper) -> float:
+    lines = file.readlines()
+    # Decode from byte string, remove new lines, parse csv
+    lines = [line.decode("utf-8").strip().split(",") for line in lines]
+    compilation_time_idx = lines[0].index("compilation_latency")
+    compilation_time = lines[1][compilation_time_idx]
+    return float(compilation_time)
+
+
+def _run_torchbench_from_args(model: str, args: list[str]) -> tuple[float, float]:
+    with fresh_inductor_cache():
+        env = os.environ.copy()
+        with tempfile.NamedTemporaryFile(suffix=".csv") as file:
+            args.append("--output=" + file.name)
+            logger.info(f"Performing cold-start run for {model}")  # noqa: G004
+            subprocess.check_call(args, timeout=TIMEOUT, env=env)
+            cold_compile_time = get_compile_time(file)
+
+        args.pop()
+        with tempfile.NamedTemporaryFile(suffix=".csv") as file:
+            args.append("--output=" + file.name)
+            logger.info(f"Performing warm-start run for {model}")  # noqa: G004
+            subprocess.check_call(args, timeout=TIMEOUT, env=env)
+            warm_compile_time = get_compile_time(file)
+
+        return cold_compile_time, warm_compile_time
+
+
+def _run_torchbench_model(results: list[RunResult], model: str, device: str) -> None:
+    cur_file = os.path.abspath(__file__)
+    torchbench_file = os.path.join(os.path.dirname(cur_file), "torchbench.py")
+    assert os.path.exists(
+        torchbench_file
+    ), f"Torchbench does not exist at {torchbench_file}"
+
+    base_args = [
+        sys.executable,
+        torchbench_file,
+        f"--only={model}",
+        "--repeat=1",
+        "--performance",
+        "--backend=inductor",
+        f"--device={device}",
+    ]
+    for mode, mode_args in [
+        ("inference", ["--inference", "--bfloat16"]),
+        ("training", ["--training", "--amp"]),
+    ]:
+        for dynamic, dynamic_args in [
+            (False, []),
+            (True, ["--dynamic-shapes", "--dynamic-batch-only"]),
+        ]:
+            args = list(base_args)
+            args.extend(mode_args)
+            args.extend(dynamic_args)
+
+            logger.info(f"Command: {args}")  # noqa: G004
+            try:
+                cold_compile_t, warm_compile_t = _run_torchbench_from_args(model, args)
+                results.append(
+                    RunResult(
+                        "model",
+                        mode,
+                        dynamic,
+                        device,
+                        cold_compile_t,
+                        warm_compile_t,
+                        cold_compile_t / warm_compile_t,
+                    )
+                )
+            except Exception as e:
+                print(e)
+                return None
+
+
+def _write_results_to_json(results: list[RunResult], output_filename: str) -> None:
+    records = []
+    for result in results:
+        for metric_name, value in [
+            ("cold_compile_time(s)", result.cold_compile_s),
+            ("warm_compile_time(s)", result.warm_compile_s),
+            ("speedup", result.speedup),
+        ]:
+            records.append(
+                {
+                    "benchmark": {
+                        "name": "cache_benchmarks",
+                        "mode": result.mode,
+                        "extra_info": {
+                            "is_dynamic": result.dynamic,
+                            "device": result.device,
+                        },
+                    },
+                    "model": {
+                        "name": result.model,
+                        "backend": "inductor",
+                    },
+                    "metric": {
+                        "name": metric_name,
+                        "type": "OSS model",
+                        "benchmark_values": [value],
+                    },
+                }
+            )
+    with open(output_filename, "w") as f:
+        json.dump(records, f)
+
+
+def parse_cmd_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Run a TorchBench ServiceLab benchmark."
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        help="Name of the model to run",
+    )
+    parser.add_argument("-d", "--device", default="cuda", help="cpu or cuda")
+    parser.add_argument(
+        "--output",
+        required=True,
+        help="The output filename (json)",
+    )
+    args, _ = parser.parse_known_args()
+    return args
+
+
+def main() -> None:
+    args = parse_cmd_args()
+
+    results: list[RunResult] = []
+
+    if args.model is not None:
+        _run_torchbench_model(results, args.model, args.device)
+    else:
+        for model in MODELS:
+            _run_torchbench_model(results, model, args.device)
+    _write_results_to_json(results, args.output)
+
+
+if __name__ == "__main__":
+    main()