Enabled MKL auto mixed precision in the single binary build

This commit is contained in:
Mahmoud Abuzaina 2021-04-02 16:20:01 -07:00 committed by Penporn Koanantakool
parent fb37439d64
commit bf3bba114c
3 changed files with 19 additions and 12 deletions

View File

@ -1986,7 +1986,7 @@ Status AutoMixedPrecision::Optimize(Cluster* cluster, const GrapplerItem& item,
return errors::InvalidArgument("cluster == nullptr");
}
#if !defined(ENABLE_MKL)
#if !defined(INTEL_MKL)
if (mode_ == AutoMixedPrecisionMode::MKL) {
return errors::Unimplemented(
"The auto_mixed_precision_mkl optimizer cannot be used since "
@ -1996,7 +1996,7 @@ Status AutoMixedPrecision::Optimize(Cluster* cluster, const GrapplerItem& item,
"https://software.intel.com/en-us/articles/intel-optimization-for-"
"tensorflow-installation-guide");
}
#endif // ENABLE_MKL
#endif // INTEL_MKL
// Start by copying input graph to output.
*output = item.graph;

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM || ENABLE_MKL
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM || INTEL_MKL
#include "tensorflow/core/grappler/optimizers/auto_mixed_precision.h"
@ -1176,7 +1176,7 @@ TEST_F(AutoMixedPrecisionTest, TanhOp) {
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if ENABLE_MKL
#if INTEL_MKL
class AutoMixedPrecisionMklTest : public GrapplerTest {
protected:
@ -1190,7 +1190,8 @@ class AutoMixedPrecisionMklTest : public GrapplerTest {
};
TEST_F(AutoMixedPrecisionMklTest, AlreadyBf16) {
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
"/job:localhost/replica:0/task:0/device:CPU:0");
Output input = ops::Const(s.WithOpName("input"), 1.f, {32, 32});
Output cst1 = ops::Cast(s.WithOpName("cst1"), input, DT_BFLOAT16);
Output allow1 = ops::MatMul(s.WithOpName("allow1"), cst1, cst1);
@ -1228,7 +1229,8 @@ TEST_F(AutoMixedPrecisionMklTest, AlreadyBf16) {
}
TEST_F(AutoMixedPrecisionMklTest, Simple) {
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
"/job:localhost/replica:0/task:0/device:CPU:0");
Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32});
Output deny1 = ops::Exp(s.WithOpName("deny1"), input);
Output clr1 = ops::Relu(s.WithOpName("clr1"), deny1);
@ -1277,7 +1279,8 @@ TEST_F(AutoMixedPrecisionMklTest, Simple) {
}
TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) {
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
"/job:localhost/replica:0/task:0/device:CPU:0");
tensorflow::Input shape = {32, 32};
auto tl1 = ops::TensorListReserve(s.WithOpName("tl1"), {32, 32}, 8, DT_FLOAT);
Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32});
@ -1352,10 +1355,10 @@ TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) {
}
}
#endif // ENABLE_MKL
#endif // INTEL_MKL
} // namespace
} // namespace grappler
} // namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM || ENABLE_MKL
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM || INTEL_MKL

View File

@ -55,6 +55,7 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/util/dump_graph.h"
#include "tensorflow/core/util/ptr_util.h"
#include "tensorflow/core/util/util.h"
#include "tensorflow/core/util/xla_config_registry.h"
namespace tensorflow {
@ -218,8 +219,10 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
/*CPU layout conversion*/ cfg_.cpu_layout_conversion()));
MK_OPT("auto_mixed_precision", "auto_mixed_precision",
new AutoMixedPrecision(AutoMixedPrecisionMode::CUDA));
MK_OPT("auto_mixed_precision_mkl", "auto_mixed_precision_mkl",
new AutoMixedPrecision(AutoMixedPrecisionMode::MKL));
if (IsMKLEnabled()) {
MK_OPT("auto_mixed_precision_mkl", "auto_mixed_precision_mkl",
new AutoMixedPrecision(AutoMixedPrecisionMode::MKL));
}
MK_OPT("memory", "memory_optimization",
new MemoryOptimizer(RewriterConfig::MANUAL));
MK_OPT("common_subgraph_elimination", "common_subgraph_elimination",
@ -309,7 +312,8 @@ Status MetaOptimizer::InitializeOptimizers(
}
if (AutoMixedPrecisionEnabled(cfg_.auto_mixed_precision_mkl()) &&
AutoMixedPrecisionEnabled(
plugin_configs.toggle_config["auto_mixed_precision_mkl"])) {
plugin_configs.toggle_config["auto_mixed_precision_mkl"]) &&
IsMKLEnabled()) {
optimizers->push_back(
MakeUnique<AutoMixedPrecision>(AutoMixedPrecisionMode::MKL));
}