mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[Intel GPU] skip a cuda api call in amp to save some host overhead on xpu (#151111)
This can save ~0.2ms on non cuda devices by skip calling `amp_definitely_not_available()`. It can improve small models in torchbench like lennard_jones on xpu 10% on both eager and inductor in dynamo benchmarks. Pull Request resolved: https://github.com/pytorch/pytorch/pull/151111 Approved by: https://github.com/soulitzer
This commit is contained in:
parent
1c5619ef9c
commit
b59f3d3ae0
|
|
@ -260,8 +260,8 @@ class autocast:
|
||||||
self._cache_enabled = torch.is_autocast_cache_enabled()
|
self._cache_enabled = torch.is_autocast_cache_enabled()
|
||||||
if (
|
if (
|
||||||
enabled
|
enabled
|
||||||
and torch.cuda.amp.common.amp_definitely_not_available()
|
|
||||||
and self.device == "cuda"
|
and self.device == "cuda"
|
||||||
|
and torch.cuda.amp.common.amp_definitely_not_available()
|
||||||
):
|
):
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"User provided device_type of 'cuda', but CUDA is not available. Disabling"
|
"User provided device_type of 'cuda', but CUDA is not available. Disabling"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user