mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/13342 This PR introduces a few new concepts: - DeviceGuardImplInterface, and implementations for CPU and CUDA, which provide a generic interface for interfacing with device and stream state, without requiring a direct dependency on the code in question. - InlineDeviceGuard, a general template for generating both specialized and dynamically dispatched device guard implementations. Dynamic dispatch is done by specializing it on a VirtualGuardImpl. - Provide a device-independent DeviceGuard class, which can be used even from CPU code. It uses the aforementioned dynamic dispatch. - CUDA-specialized CUDAGuard class, which doesn't have a dynamic dispatch but can only be used from CUDA. - StreamGuard, which is the same as above, but for streams rather than devices. - Optional variants of all the aforementioned guards, which are a no-op if no device/stream is specified - CUDAMultiStreamGuard, specifically for the case when we want to set a device on every guard. There are some subtle semantic changes, which have been thoroughly documented in the class definition. BC-breaking changes: - Move constructor/assignment have been removed from all device guard implementations. - In some cases where you previously wrote 'set_device' (or 'set_stream'), you now must write 'reset_device', because if you switch devices/device types, the stream/device on the previous device is unset. This is different from previous behavior. - CUDAGuard no longer handles streams, or multiple streams. Use CUDAStreamGuard or CUDAMultiStreamGuard as appropriate for your use case. Reviewed By: dzhulgakov Differential Revision: D12849620 fbshipit-source-id: f61956256f0b12be754b3234fcc73c2abc1be04e
48 lines
1.0 KiB
C++
48 lines
1.0 KiB
C++
#include "torch/csrc/autograd/input_buffer.h"
|
|
|
|
#include "torch/csrc/autograd/functions/basic_ops.h"
|
|
|
|
#include <ATen/DeviceGuard.h>
|
|
|
|
#include <cstddef>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace torch { namespace autograd {
|
|
|
|
|
|
void InputBuffer::add(size_t pos, Variable var) {
|
|
AT_ASSERT(pos < buffer.size());
|
|
if (!var.defined()) {
|
|
return;
|
|
}
|
|
auto& old_var = buffer[pos];
|
|
if (!old_var.defined()) {
|
|
buffer[pos] = std::move(var);
|
|
} else {
|
|
at::OptionalDeviceGuard device_guard(device_of(var));
|
|
// ATen doesn't route sparse additions correctly...
|
|
if (old_var.is_sparse()) {
|
|
buffer[pos] = var + old_var;
|
|
} else {
|
|
buffer[pos] = old_var + var;
|
|
}
|
|
}
|
|
}
|
|
|
|
auto InputBuffer::device() const -> int {
|
|
for (auto& var : buffer) {
|
|
if (var.defined() && var.is_cuda()) {
|
|
return var.get_device();
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
auto InputBuffer::variables(InputBuffer&& g) -> std::vector<Variable> {
|
|
std::vector<Variable> result = std::move(g.buffer);
|
|
return result;
|
|
}
|
|
|
|
}} // namespace torch::autograd
|