pytorch/torch/cpu/__init__.py
CaoE 9e14d86573 [Inductor][CPP] Add oneDNN BRGEMM config for Half cpp gemm template (#136255)
`kernel_micro_gemm` generated using BRGEMM:
```
template <bool accum>
inline void kernel_micro_gemm(
    const half* __restrict__ A,
    const half* __restrict__ B,
    float* __restrict__ C,
    int64_t M,
    int64_t N,
    int64_t K,
    int64_t lda,
    int64_t ldb,
    int64_t ldc
) {
    at::native::cpublas::brgemm(
      M, N, K,
      lda, ldb, ldc,
      1.f, accum ? 1.f : 0.f,
      A,
      B,
      C);
}
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/136255
Approved by: https://github.com/jgong5, https://github.com/jansel
2024-11-05 05:33:29 +00:00

195 lines
4.6 KiB
Python

# mypy: allow-untyped-defs
r"""
This package implements abstractions found in ``torch.cuda``
to facilitate writing device-agnostic code.
"""
from contextlib import AbstractContextManager
from typing import Any, Optional, Union
import torch
from .. import device as _device
from . import amp
__all__ = [
"is_available",
"synchronize",
"current_device",
"current_stream",
"stream",
"set_device",
"device_count",
"Stream",
"StreamContext",
"Event",
]
_device_t = Union[_device, str, int, None]
def _is_avx2_supported() -> bool:
r"""Returns a bool indicating if CPU supports AVX2."""
return torch._C._cpu._is_avx2_supported()
def _is_avx512_supported() -> bool:
r"""Returns a bool indicating if CPU supports AVX512."""
return torch._C._cpu._is_avx512_supported()
def _is_avx512_bf16_supported() -> bool:
r"""Returns a bool indicating if CPU supports AVX512_BF16."""
return torch._C._cpu._is_avx512_bf16_supported()
def _is_vnni_supported() -> bool:
r"""Returns a bool indicating if CPU supports VNNI."""
# Note: Currently, it only checks avx512_vnni, will add the support of avx2_vnni later.
return torch._C._cpu._is_avx512_vnni_supported()
def _is_amx_tile_supported() -> bool:
r"""Returns a bool indicating if CPU supports AMX_TILE."""
return torch._C._cpu._is_amx_tile_supported()
def _is_amx_fp16_supported() -> bool:
r"""Returns a bool indicating if CPU supports AMX FP16."""
return torch._C._cpu._is_amx_fp16_supported()
def _init_amx() -> bool:
r"""Initializes AMX instructions."""
return torch._C._cpu._init_amx()
def _is_arm_sve_supported() -> bool:
r"""Returns a bool indicating if CPU supports Arm SVE."""
return torch._C._cpu._is_arm_sve_supported()
def is_available() -> bool:
r"""Returns a bool indicating if CPU is currently available.
N.B. This function only exists to facilitate device-agnostic code
"""
return True
def synchronize(device: _device_t = None) -> None:
r"""Waits for all kernels in all streams on the CPU device to complete.
Args:
device (torch.device or int, optional): ignored, there's only one CPU device.
N.B. This function only exists to facilitate device-agnostic code.
"""
class Stream:
"""
N.B. This class only exists to facilitate device-agnostic code
"""
def __init__(self, priority: int = -1) -> None:
pass
def wait_stream(self, stream) -> None:
pass
class Event:
def query(self) -> bool:
return True
def record(self, stream=None) -> None:
pass
def synchronize(self) -> None:
pass
def wait(self, stream=None) -> None:
pass
_default_cpu_stream = Stream()
_current_stream = _default_cpu_stream
def current_stream(device: _device_t = None) -> Stream:
r"""Returns the currently selected :class:`Stream` for a given device.
Args:
device (torch.device or int, optional): Ignored.
N.B. This function only exists to facilitate device-agnostic code
"""
return _current_stream
class StreamContext(AbstractContextManager):
r"""Context-manager that selects a given stream.
N.B. This class only exists to facilitate device-agnostic code
"""
cur_stream: Optional[Stream]
def __init__(self, stream):
self.stream = stream
self.prev_stream = _default_cpu_stream
def __enter__(self):
cur_stream = self.stream
if cur_stream is None:
return
global _current_stream
self.prev_stream = _current_stream
_current_stream = cur_stream
def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
cur_stream = self.stream
if cur_stream is None:
return
global _current_stream
_current_stream = self.prev_stream
def stream(stream: Stream) -> AbstractContextManager:
r"""Wrapper around the Context-manager StreamContext that
selects a given stream.
N.B. This function only exists to facilitate device-agnostic code
"""
return StreamContext(stream)
def device_count() -> int:
r"""Returns number of CPU devices (not cores). Always 1.
N.B. This function only exists to facilitate device-agnostic code
"""
return 1
def set_device(device: _device_t) -> None:
r"""Sets the current device, in CPU we do nothing.
N.B. This function only exists to facilitate device-agnostic code
"""
def current_device() -> str:
r"""Returns current device for cpu. Always 'cpu'.
N.B. This function only exists to facilitate device-agnostic code
"""
return "cpu"