pytorch/torch/csrc/jit/graph_executor.h
Zachary DeVito f74207c99f
Allow autograd to work even when the shape of values cannot be determined (#8641)
This commit implements the solution proposed in https://github.com/pytorch/pytorch/issues/8410
to workaround the need to create zero tensors with the same shape as inputs.
It introduces the concept of a LinearBlock which marks places in the code
where we know if all the inputs to the node are zero, then the outputs
to the node are also zero. Autodiff introduces LinearBlocks around
backwards functions, which have this property. specializeUndef then
propagates Undef nodes using this information.

Notes:
* Since we do not always specialize, we have a pass LowerLinearBlocks
that replaces the block with an if statement that dynamically guards
the Undef case.
* We introduce AutogradAdd which is addition that still works when
its inputs might be undefined. In cases where we specialize this will
get removed in favor of a normal add, but there are cases where
gradient graphs do not specialize (e.g. when they are not differentiable,
but a derivative is required) so it is important for this op to be executable.
2018-06-25 18:40:04 -07:00

68 lines
2.4 KiB
C++

#pragma once
#include <memory>
#include "torch/csrc/jit/ir.h"
#include "torch/csrc/jit/variable_tensor_list.h"
#include "torch/csrc/jit/interpreter.h"
#include "torch/csrc/jit/autodiff.h"
#include "torch/csrc/jit/argument_spec.h"
namespace torch { namespace jit {
struct GraphExecutorState;
// Notice that those structs don't manage lifetime of their members.
// They is only valid only right after you call getDebugState() and should never
// be used again once another GraphExecutor function is called.
struct ExecutionPlanState {
Code* f;
Graph* graph;
// Those two fields are optional
Gradient* grad;
std::shared_ptr<GraphExecutorState> grad_executor; // shared_ptr to break the cycle...
};
struct GraphExecutorState {
Graph* graph;
std::unordered_map<ArgumentSpec, ExecutionPlanState> execution_plans;
// Those two fields are optional
Code* autograd_fallback;
Graph* autograd_fallback_graph;
};
struct GraphExecutorImpl;
struct GraphExecutor {
GraphExecutor() {}
GraphExecutor(std::shared_ptr<Graph> graph, bool optimize = true);
// note: if not specified, symbolically_differentiable is computed from the graph.
GraphExecutor(std::shared_ptr<Graph> graph, bool optimize, bool symbolically_differentiable);
variable_tensor_list run(variable_tensor_list && inputs);
operator bool() const {
return pImpl != nullptr;
}
std::shared_ptr<Graph> graph() const;
std::shared_ptr<Graph> graphFor(const variable_tensor_list& inputs) const;
GraphExecutorState getDebugState();
private:
std::shared_ptr<GraphExecutorImpl> pImpl;
};
// These passes need to run before it is valid to pass to the interpreter
// regardless of whether sizes have been specialized or not.
void runRequiredPasses(const std::shared_ptr<Graph>& g);
// specialize 'graph' to the types, sizes, and other properties described in spec
// this prepares the graph for execution, including running runRequiredPasses,
// but the execution only remains valid for tensors whose properties match spec
// otherwise running the graph will have undefined results.
void specializeToSpec(const std::shared_ptr<Graph>& graph, const ArgumentSpec& spec);
// apply standard optimizations. if graphMustSupportVariables=false then
// then the passes are allowed to modify the graph in ways that make it no longer
// work with tensors that have requires_grad=True
void runOptimization(std::shared_ptr<Graph> & graph, bool graphMustSupportVariables);
}}