mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
Enabled MKL auto mixed precision in the single binary build
This commit is contained in:
parent
fb37439d64
commit
bf3bba114c
|
|
@ -1986,7 +1986,7 @@ Status AutoMixedPrecision::Optimize(Cluster* cluster, const GrapplerItem& item,
|
|||
return errors::InvalidArgument("cluster == nullptr");
|
||||
}
|
||||
|
||||
#if !defined(ENABLE_MKL)
|
||||
#if !defined(INTEL_MKL)
|
||||
if (mode_ == AutoMixedPrecisionMode::MKL) {
|
||||
return errors::Unimplemented(
|
||||
"The auto_mixed_precision_mkl optimizer cannot be used since "
|
||||
|
|
@ -1996,7 +1996,7 @@ Status AutoMixedPrecision::Optimize(Cluster* cluster, const GrapplerItem& item,
|
|||
"https://software.intel.com/en-us/articles/intel-optimization-for-"
|
||||
"tensorflow-installation-guide");
|
||||
}
|
||||
#endif // ENABLE_MKL
|
||||
#endif // INTEL_MKL
|
||||
|
||||
// Start by copying input graph to output.
|
||||
*output = item.graph;
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM || ENABLE_MKL
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM || INTEL_MKL
|
||||
|
||||
#include "tensorflow/core/grappler/optimizers/auto_mixed_precision.h"
|
||||
|
||||
|
|
@ -1176,7 +1176,7 @@ TEST_F(AutoMixedPrecisionTest, TanhOp) {
|
|||
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#if ENABLE_MKL
|
||||
#if INTEL_MKL
|
||||
|
||||
class AutoMixedPrecisionMklTest : public GrapplerTest {
|
||||
protected:
|
||||
|
|
@ -1190,7 +1190,8 @@ class AutoMixedPrecisionMklTest : public GrapplerTest {
|
|||
};
|
||||
|
||||
TEST_F(AutoMixedPrecisionMklTest, AlreadyBf16) {
|
||||
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
|
||||
tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
|
||||
"/job:localhost/replica:0/task:0/device:CPU:0");
|
||||
Output input = ops::Const(s.WithOpName("input"), 1.f, {32, 32});
|
||||
Output cst1 = ops::Cast(s.WithOpName("cst1"), input, DT_BFLOAT16);
|
||||
Output allow1 = ops::MatMul(s.WithOpName("allow1"), cst1, cst1);
|
||||
|
|
@ -1228,7 +1229,8 @@ TEST_F(AutoMixedPrecisionMklTest, AlreadyBf16) {
|
|||
}
|
||||
|
||||
TEST_F(AutoMixedPrecisionMklTest, Simple) {
|
||||
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
|
||||
tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
|
||||
"/job:localhost/replica:0/task:0/device:CPU:0");
|
||||
Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32});
|
||||
Output deny1 = ops::Exp(s.WithOpName("deny1"), input);
|
||||
Output clr1 = ops::Relu(s.WithOpName("clr1"), deny1);
|
||||
|
|
@ -1277,7 +1279,8 @@ TEST_F(AutoMixedPrecisionMklTest, Simple) {
|
|||
}
|
||||
|
||||
TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) {
|
||||
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
|
||||
tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice(
|
||||
"/job:localhost/replica:0/task:0/device:CPU:0");
|
||||
tensorflow::Input shape = {32, 32};
|
||||
auto tl1 = ops::TensorListReserve(s.WithOpName("tl1"), {32, 32}, 8, DT_FLOAT);
|
||||
Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32});
|
||||
|
|
@ -1352,10 +1355,10 @@ TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) {
|
|||
}
|
||||
}
|
||||
|
||||
#endif // ENABLE_MKL
|
||||
#endif // INTEL_MKL
|
||||
|
||||
} // namespace
|
||||
} // namespace grappler
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM || ENABLE_MKL
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM || INTEL_MKL
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ limitations under the License.
|
|||
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||
#include "tensorflow/core/util/dump_graph.h"
|
||||
#include "tensorflow/core/util/ptr_util.h"
|
||||
#include "tensorflow/core/util/util.h"
|
||||
#include "tensorflow/core/util/xla_config_registry.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
|
@ -218,8 +219,10 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
|
|||
/*CPU layout conversion*/ cfg_.cpu_layout_conversion()));
|
||||
MK_OPT("auto_mixed_precision", "auto_mixed_precision",
|
||||
new AutoMixedPrecision(AutoMixedPrecisionMode::CUDA));
|
||||
MK_OPT("auto_mixed_precision_mkl", "auto_mixed_precision_mkl",
|
||||
new AutoMixedPrecision(AutoMixedPrecisionMode::MKL));
|
||||
if (IsMKLEnabled()) {
|
||||
MK_OPT("auto_mixed_precision_mkl", "auto_mixed_precision_mkl",
|
||||
new AutoMixedPrecision(AutoMixedPrecisionMode::MKL));
|
||||
}
|
||||
MK_OPT("memory", "memory_optimization",
|
||||
new MemoryOptimizer(RewriterConfig::MANUAL));
|
||||
MK_OPT("common_subgraph_elimination", "common_subgraph_elimination",
|
||||
|
|
@ -309,7 +312,8 @@ Status MetaOptimizer::InitializeOptimizers(
|
|||
}
|
||||
if (AutoMixedPrecisionEnabled(cfg_.auto_mixed_precision_mkl()) &&
|
||||
AutoMixedPrecisionEnabled(
|
||||
plugin_configs.toggle_config["auto_mixed_precision_mkl"])) {
|
||||
plugin_configs.toggle_config["auto_mixed_precision_mkl"]) &&
|
||||
IsMKLEnabled()) {
|
||||
optimizers->push_back(
|
||||
MakeUnique<AutoMixedPrecision>(AutoMixedPrecisionMode::MKL));
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user