diff --git a/torch/csrc/jit/graph_executor.cpp b/torch/csrc/jit/graph_executor.cpp index 4abb884d31c..a98b564945c 100644 --- a/torch/csrc/jit/graph_executor.cpp +++ b/torch/csrc/jit/graph_executor.cpp @@ -713,10 +713,6 @@ bool needsGradient(const std::shared_ptr& graph) { } void runNondiffOptimization(std::shared_ptr& graph) { - // run custom passes that different backends can register - for (const auto& pass : getCustomPasses()) { - pass(graph); - } // decomposition pass, decompose certain ops that will be used in the // following passes (like batchmm and jit fusion) DecomposeOps(graph); @@ -732,6 +728,12 @@ void runNondiffOptimization(std::shared_ptr& graph) { QuantFusion(graph); FuseGraph(graph); + + // Run custom passes that different backends can register. + // This is done last to give internal optimization passes priority. + for (const auto& pass : getCustomPasses()) { + pass(graph); + } } void runOptimization(std::shared_ptr& graph) {