Enabled MKL auto mixed precision in the single binary build

2025-12-06 12:20:11 +01:00 · 2021-04-02 16:20:01 -07:00 · 2021-04-02 16:20:01 -07:00 · bf3bba114c
commit bf3bba114c
parent fb37439d64
3 changed files with 19 additions and 12 deletions
--- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
+++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
@ -1986,7 +1986,7 @@ Status AutoMixedPrecision::Optimize(Cluster* cluster, const GrapplerItem& item,
    return errors::InvalidArgument("cluster == nullptr");
  }

-#if !defined(ENABLE_MKL)
+#if !defined(INTEL_MKL)
  if (mode_ == AutoMixedPrecisionMode::MKL) {
    return errors::Unimplemented(
        "The auto_mixed_precision_mkl optimizer cannot be used since "
@ -1996,7 +1996,7 @@ Status AutoMixedPrecision::Optimize(Cluster* cluster, const GrapplerItem& item,
        "https://software.intel.com/en-us/articles/intel-optimization-for-"
        "tensorflow-installation-guide");
  }
-#endif  // ENABLE_MKL
+#endif  // INTEL_MKL

  // Start by copying input graph to output.
  *output = item.graph;
--- a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc
+++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM || ENABLE_MKL
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM || INTEL_MKL

 #include "tensorflow/core/grappler/optimizers/auto_mixed_precision.h"

@ -1176,7 +1176,7 @@ TEST_F(AutoMixedPrecisionTest, TanhOp) {

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

-#if ENABLE_MKL
+#if INTEL_MKL

 class AutoMixedPrecisionMklTest : public GrapplerTest {
 protected:
@ -1190,7 +1190,8 @@ class AutoMixedPrecisionMklTest : public GrapplerTest {
 };

 TEST_F(AutoMixedPrecisionMklTest, AlreadyBf16) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
+      "/job:localhost/replica:0/task:0/device:CPU:0");
  Output input = ops::Const(s.WithOpName("input"), 1.f, {32, 32});
  Output cst1 = ops::Cast(s.WithOpName("cst1"), input, DT_BFLOAT16);
  Output allow1 = ops::MatMul(s.WithOpName("allow1"), cst1, cst1);
@ -1228,7 +1229,8 @@ TEST_F(AutoMixedPrecisionMklTest, AlreadyBf16) {
 }

 TEST_F(AutoMixedPrecisionMklTest, Simple) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
+      "/job:localhost/replica:0/task:0/device:CPU:0");
  Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32});
  Output deny1 = ops::Exp(s.WithOpName("deny1"), input);
  Output clr1 = ops::Relu(s.WithOpName("clr1"), deny1);
@ -1277,7 +1279,8 @@ TEST_F(AutoMixedPrecisionMklTest, Simple) {
 }

 TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
+      "/job:localhost/replica:0/task:0/device:CPU:0");
  tensorflow::Input shape = {32, 32};
  auto tl1 = ops::TensorListReserve(s.WithOpName("tl1"), {32, 32}, 8, DT_FLOAT);
  Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32});
@ -1352,10 +1355,10 @@ TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) {
  }
 }

-#endif  // ENABLE_MKL
+#endif  // INTEL_MKL

 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow

-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM || ENABLE_MKL
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM || INTEL_MKL
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@ -55,6 +55,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/util/dump_graph.h"
 #include "tensorflow/core/util/ptr_util.h"
+#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/util/xla_config_registry.h"

 namespace tensorflow {
@ -218,8 +219,10 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
             /*CPU layout conversion*/ cfg_.cpu_layout_conversion()));
  MK_OPT("auto_mixed_precision", "auto_mixed_precision",
         new AutoMixedPrecision(AutoMixedPrecisionMode::CUDA));
-  MK_OPT("auto_mixed_precision_mkl", "auto_mixed_precision_mkl",
-         new AutoMixedPrecision(AutoMixedPrecisionMode::MKL));
+  if (IsMKLEnabled()) {
+    MK_OPT("auto_mixed_precision_mkl", "auto_mixed_precision_mkl",
+           new AutoMixedPrecision(AutoMixedPrecisionMode::MKL));
+  }
  MK_OPT("memory", "memory_optimization",
         new MemoryOptimizer(RewriterConfig::MANUAL));
  MK_OPT("common_subgraph_elimination", "common_subgraph_elimination",
@ -309,7 +312,8 @@ Status MetaOptimizer::InitializeOptimizers(
  }
  if (AutoMixedPrecisionEnabled(cfg_.auto_mixed_precision_mkl()) &&
      AutoMixedPrecisionEnabled(
-          plugin_configs.toggle_config["auto_mixed_precision_mkl"])) {
+          plugin_configs.toggle_config["auto_mixed_precision_mkl"]) &&
+      IsMKLEnabled()) {
    optimizers->push_back(
        MakeUnique<AutoMixedPrecision>(AutoMixedPrecisionMode::MKL));
  }