mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
How the old retains_grad hooks was implemented: - retains_grad hooks are stored on the autograd_meta, as entries in a vector - upon registration, a wrapper hook CppFunctionTensorPreHook is created to wrap that vector, and then that wrapper hook is registered to the grad_fn, i.e., by appending it to a vector of retains_grad hooks on the grad_fn - upon in-place, for the old grad_fn we set the retains_grad hook to nullptr, so that even though the old grad_fn still references the vector, the vector contains a single nullptr. For the new grad_fn, we create a new wrapper hook around the vector (storing the single retains_grad hook) on autograd_meta. The new retains_grad hook implementation: - we store std::function by value, and we store it on the grad_fn rather than the autograd_meta - a single grad_fn can have multiple outputs, so it can potentially hold multiple retains_grad hooks. We use an unordered_map (previously a vector). - on in-place we remove the hook from the old grad_fn and put it in the new grad_fn (small implication of this change is that we we now need to have access to both the old grad_fn and new grad_fn, this isn't a problem) Other details: - CppFunctionTensorPreHook took a shared_ptr to vector of std::function. In our new implementation, we add a new wrapper hook CppFunctionSingleTensorPreHook, which takes a single std::function. Pull Request resolved: https://github.com/pytorch/pytorch/pull/92604 Approved by: https://github.com/albanD
70 lines
1.9 KiB
C++
70 lines
1.9 KiB
C++
#include <c10/util/irange.h>
|
|
#include <torch/csrc/autograd/cpp_hook.h>
|
|
#include <torch/csrc/autograd/custom_function.h>
|
|
#include <torch/csrc/autograd/variable.h>
|
|
|
|
namespace {
|
|
using torch::autograd::Variable;
|
|
void check_single_result(
|
|
const at::TensorBase& value,
|
|
const at::TensorBase& result,
|
|
std::string hook_name) {
|
|
if (!value.defined()) {
|
|
throw std::runtime_error(
|
|
"can't replace a empty gradient with a non-empty value");
|
|
}
|
|
torch::autograd::check_variable_result(value, result, hook_name);
|
|
}
|
|
} // namespace
|
|
|
|
namespace torch {
|
|
namespace autograd {
|
|
|
|
// NOLINTNEXTLINE(modernize-pass-by-value)
|
|
CppFunctionTensorPreHook::CppFunctionTensorPreHook(
|
|
const std::shared_ptr<hooks_list>& hooks,
|
|
int value_idx)
|
|
: hooks_(hooks), value_idx_(value_idx) {}
|
|
|
|
variable_list CppFunctionTensorPreHook::operator()(
|
|
const variable_list& values) {
|
|
auto value = values[value_idx_];
|
|
for (const auto i : c10::irange(hooks_->size())) {
|
|
auto& hook = (*hooks_)[i];
|
|
if (!hook) {
|
|
// hook was removed
|
|
continue;
|
|
}
|
|
auto res = hook(value);
|
|
if (!res.defined()) {
|
|
// Don't change gradient
|
|
continue;
|
|
}
|
|
check_single_result(value, res, c10::to_string(i));
|
|
value = std::move(res);
|
|
}
|
|
variable_list results(values);
|
|
results[value_idx_] = value;
|
|
return results;
|
|
}
|
|
|
|
// NOLINTNEXTLINE(modernize-pass-by-value)
|
|
CppFunctionSingleTensorPreHook::CppFunctionSingleTensorPreHook(
|
|
std::function<at::TensorBase(const at::TensorBase&)> hook,
|
|
int value_idx)
|
|
: hook_(hook), value_idx_(value_idx) {}
|
|
|
|
variable_list CppFunctionSingleTensorPreHook::operator()(
|
|
const variable_list& values) {
|
|
auto value = values[value_idx_];
|
|
auto res = hook_(value);
|
|
TORCH_INTERNAL_ASSERT(
|
|
!res.defined(),
|
|
"CppFunctionSingleTensorPreHook currently only supports hooks that don't return");
|
|
variable_list results(values);
|
|
return results;
|
|
}
|
|
|
|
} // namespace autograd
|
|
} // namespace torch
|