mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
The `usort` config in `pyproject.toml` has no effect due to a typo. Fixing the typo make `usort` do more and generate the changes in the PR. Except `pyproject.toml`, all changes are generated by `lintrunner -a --take UFMT --all-files`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/127126 Approved by: https://github.com/kit1980
172 lines
5.2 KiB
Python
172 lines
5.2 KiB
Python
# Sparse benchmarks
|
|
|
|
# This benchmark is for sparse matmul performance test.
|
|
# They exist for comparing the performance of sparse matrix routines
|
|
# `sparse @ vector`, `sparse @ sparse` and `sparse @ dense` with different backends (CPU/CUDA)
|
|
# and with other frameworks such as scipy.
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
|
|
from scipy.sparse import isspmatrix
|
|
|
|
import torch
|
|
import torch.utils.benchmark as benchmark_utils
|
|
|
|
from .utils import load_dlmc_dataset
|
|
|
|
|
|
def scipy_matmul(mat1, mat2):
|
|
if isspmatrix(mat1) and isspmatrix(mat2):
|
|
return mat1.dot(mat2).tocoo()
|
|
return mat1.dot(mat2)
|
|
|
|
|
|
def matmul_backward(a_dense, b_dense, grad_output):
|
|
r1 = a_dense.matmul(b_dense)
|
|
r1.backward(grad_output)
|
|
|
|
|
|
def sparse_matmul_backward(a, b, grad_output):
|
|
c = torch.sparse.mm(a, b)
|
|
c.backward(grad_output)
|
|
|
|
|
|
OPS_MAP = {
|
|
"sparse@sparse": "torch.sparse.mm",
|
|
"sparse@dense": "torch.matmul",
|
|
"sparse@vector": "torch.matmul",
|
|
}
|
|
|
|
|
|
# also get the arguments as input from the user using `argparse`
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description="matmul benchmark")
|
|
parser.add_argument("--path", type=str, help="DLMC dataset path")
|
|
parser.add_argument("--dataset", type=str, default="magnitude_pruning")
|
|
parser.add_argument("--hidden-size", "--hidden_size", default=2048, type=int)
|
|
parser.add_argument("--backward-test", "--backward_test", action="store_true")
|
|
parser.add_argument(
|
|
"--operation",
|
|
type=str,
|
|
help="|".join(OPS_MAP.keys()),
|
|
default=next(iter(OPS_MAP)),
|
|
)
|
|
parser.add_argument("--with-cuda", "--with_cuda", action="store_true")
|
|
parser.add_argument(
|
|
"--timer-min-run-time", "--timer_min_run_time", default=1, type=float
|
|
)
|
|
return parser
|
|
|
|
|
|
def get_tasks(op, backward_test, device):
|
|
def filter_ops(operation):
|
|
if backward_test:
|
|
test_name = device + ":matmul-backward"
|
|
return [
|
|
(
|
|
test_name,
|
|
device,
|
|
"torch:" + operation.replace("sparse", "dense"),
|
|
"matmul_backward(dx, dy, grad_output)",
|
|
),
|
|
(
|
|
test_name,
|
|
device,
|
|
"torch:" + operation,
|
|
"sparse_matmul_backward(x, y, sparse_grad_output)",
|
|
),
|
|
]
|
|
else:
|
|
test_name = device + ":matmul-forward"
|
|
return list(
|
|
filter(
|
|
None,
|
|
[
|
|
(
|
|
test_name,
|
|
device,
|
|
"torch:" + operation.replace("sparse", "dense"),
|
|
f"{OPS_MAP[operation]}(dx, dy)",
|
|
),
|
|
(
|
|
test_name,
|
|
device,
|
|
"torch:" + operation,
|
|
f"{OPS_MAP[operation]}(x, y)",
|
|
),
|
|
(
|
|
test_name,
|
|
device,
|
|
"scipy:" + operation,
|
|
"scipy_matmul(sx, sy)",
|
|
)
|
|
if device == "cpu"
|
|
else None,
|
|
],
|
|
)
|
|
)
|
|
|
|
all_operations = {
|
|
"sparse@sparse": filter_ops("sparse@sparse"),
|
|
"sparse@dense": filter_ops("sparse@dense"),
|
|
"sparse@vector": filter_ops("sparse@vector"),
|
|
}
|
|
return all_operations[op]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = parse_args()
|
|
args = parser.parse_args()
|
|
|
|
if args.with_cuda and not torch.cuda.is_available():
|
|
raise RuntimeError("No CUDA available")
|
|
|
|
dataset_path = args.path
|
|
dataset_name = args.dataset
|
|
dataset_path = os.path.join(dataset_path, dataset_name)
|
|
device = "cuda" if args.with_cuda else "cpu"
|
|
|
|
tasks = get_tasks(args.operation, args.backward_test, device)
|
|
repeats = 3
|
|
timers = [
|
|
benchmark_utils.Timer(
|
|
stmt=stmt,
|
|
globals={
|
|
"scipy_matmul": scipy_matmul,
|
|
"matmul_backward": matmul_backward,
|
|
"sparse_matmul_backward": sparse_matmul_backward,
|
|
**variables,
|
|
},
|
|
label=label,
|
|
sub_label=sub_label,
|
|
description=f"{sparsity}",
|
|
env=device,
|
|
)
|
|
for sparsity in [0.5, 0.7, 0.8, 0.9, 0.95, 0.98]
|
|
for label, device, sub_label, stmt in tasks
|
|
for variables in load_dlmc_dataset(
|
|
dataset_path,
|
|
args.operation,
|
|
args.hidden_size,
|
|
sparsity,
|
|
device,
|
|
args.backward_test,
|
|
)
|
|
]
|
|
measurements = []
|
|
|
|
for i, timer in enumerate(timers * repeats):
|
|
m = timer.blocked_autorange(min_run_time=args.timer_min_run_time)
|
|
m.metadata = {"device": "cuda" if m.task_spec.env.find("cuda") >= 0 else "cpu"}
|
|
measurements.append(m)
|
|
print(f"\r{i + 1} / {len(timers) * repeats}", end="")
|
|
sys.stdout.flush()
|
|
print()
|
|
|
|
comparison = benchmark_utils.Compare(measurements)
|
|
|
|
print("== Results " + "=" * 80 + "\n" + "/" * 95 + "\n")
|
|
comparison.print()
|