#pragma once #include #include #include #include namespace torch { namespace jit { struct TORCH_API GraphFunction : public Function { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) GraphFunction( c10::QualifiedName name, std::shared_ptr graph, std::function function_creator) : name_(std::move(name)), graph_(std::move(graph)), function_creator_(std::move(function_creator)) {} bool isGraphFunction() const override { return true; } void run(Stack& stack) override; c10::intrusive_ptr runAsync( Stack& stack, TaskLauncher taskLauncher = at::launch) override; std::shared_ptr graph() const { return graph_; } std::shared_ptr optimized_graph() const { std::lock_guard lock(compile_mutex); auto& optimized_graph = optimized_graphs_[currentSpecialization()]; if (optimized_graph) { return *optimized_graph; } optimized_graph = graph_->copy(); if (getGraphExecutorOptimize()) { preoptimizeGraph(*optimized_graph); } return *optimized_graph; } const c10::QualifiedName& qualname() const override { return name_; } // if this isn't yet defined, run its method_creator function void ensure_defined() override; size_t num_inputs() const override { return graph()->inputs().size(); } Function& setSchema(FunctionSchema schema) override { schema_ = make_unique(std::move(schema)); return *this; } const FunctionSchema& getSchema() const override; GraphExecutorState getDebugState() { return get_executor().getDebugState(); } bool is_optimized() const { TORCH_WARN( "GraphFunction::is_optimized() is deprecated and always returns true. " "Please use getGraphExecutorOptimize()"); return true; } void check_single_output() { TORCH_CHECK( graph()->outputs().size() == 1, "Method (but not graphs in general) require a single output. Use None/Tuple for 0 or 2+ outputs"); } GraphExecutor& get_executor() override { ensure_defined(); std::lock_guard lock(compile_mutex); auto& executor = executors_[currentSpecialization()]; if (executor) { return executor; } check_single_output(); executor = GraphExecutor(optimized_graph(), name_.name()); return executor; } private: enum SpecializationKey { AutocastOff, CpuAutocastOn, GpuAutocastOn, CpuGpuAutocastOn, // This provides the number of specializations // (Must be last entry) TotalCount }; SpecializationKey currentSpecialization() const; private: c10::QualifiedName name_; // The original, non-optimized graph std::shared_ptr graph_; // for debugging and for inlining // Optimized graph, computed lazily. Used for inlining. mutable std::array< c10::optional>, SpecializationKey::TotalCount> optimized_graphs_; // GraphFunctions are invokable from multiple threads, so this lock needs to // be held when we're initializing graph executor for the first time or // computing the optimized graph. We're using reentrant mutex so that we don't // need to worry about causing a deadlock by calling one method from another // (e.g. optimized_graph() from get_executor()). mutable std::recursive_mutex compile_mutex; // executor_[0] - autocast off // executor_[1] - autocast on std::array executors_; // an optional function that actually creates the method when // ensure_defined() is called. This is used by the compiler so // that it can construct methods out of order std::function function_creator_; // if absent, then we generate a default schema based on the graph // mutable because getSchema caches the default schema if one is requested // before a call to setSchema mutable std::unique_ptr schema_; }; // Short hands for dynamic_cast. TORCH_API GraphFunction* tryToGraphFunction(Function&) noexcept; TORCH_API GraphFunction& toGraphFunction(Function&); TORCH_API const GraphFunction& toGraphFunction(const Function&); } // namespace jit } // namespace torch