pytorch/caffe2/python/mkl/mkl_fc_speed_test.py
intel b3b66e3d00 MKL related files with review comments incorporated
Summary:
This PR is based on commit "977c6b3" as this version allows MKL to use all the cores available.
All MKL related files are added here after incorporating review comments, major changes include

1. usage of Clang-format(Linter) with --style = Google
2. usage of macros for checking input and filter dimension in the mkl operators
3. merged Max and Average pooling functions
4. created a new folder for mkl related python scripts in Python folder and moved them there
5. there is no mkl_alexnet_test.py as that was redundant while convnet_benchmark.py does the same thing
Closes https://github.com/caffe2/caffe2/pull/270

Differential Revision: D4905219

Pulled By: Yangqing

fbshipit-source-id: e5f5b189714a835b93b9ebda24c52e09572dfca7
2017-04-25 00:31:29 -07:00

97 lines
3.9 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import unittest
import numpy as np
from caffe2.proto import caffe2_pb2
from caffe2.python import cnn, core, workspace, test_util
@unittest.skipIf(not workspace.C.has_mkldnn, "Skipping as we do not have mkldnn.")
class TestMKLBasic(test_util.TestCase):
def testFCSpeed(self):
# We randomly select a shape to test the speed. Intentionally we
# test a batch size of 1 since this may be the most frequent use
# case for MKL during deployment time.
X = np.random.rand(1, 256, 6, 6).astype(np.float32) - 0.5
#X = np.random.rand(32, 256*6*6).astype(np.float32) - 0.5
W = np.random.rand(4096, 9216).astype(np.float32) - 0.5
b = np.random.rand(4096).astype(np.float32) - 0.5
mkl_do = core.DeviceOption(caffe2_pb2.MKLDNN)
# Makes sure that feed works.
workspace.FeedBlob("X", X)
workspace.FeedBlob("W", W)
workspace.FeedBlob("b", b)
workspace.FeedBlob("X_mkl", X, device_option=mkl_do)
workspace.FeedBlob("W_mkl", W, device_option=mkl_do)
workspace.FeedBlob("b_mkl", b, device_option=mkl_do)
net = core.Net("test")
# Makes sure that we can run relu.
net.FC(["X", "W", "b"], "Y")
net.FC(["X_mkl", "W_mkl", "b_mkl"], "Y_mkl", device_option=mkl_do)
workspace.CreateNet(net)
workspace.RunNet(net)
# makes sure that the results are good.
np.testing.assert_allclose(
workspace.FetchBlob("Y"),
workspace.FetchBlob("Y_mkl"),
atol=1e-2,
rtol=1e-2)
runtime = workspace.BenchmarkNet(net.Proto().name, 1, 100, True)
print("FC CPU runtime {}, MKL runtime {}.".format(runtime[1], runtime[2]))
def testConvReluMaxPoolFcSpeed(self):
# We randomly select a shape to test the speed. Intentionally we
# test a batch size of 1 since this may be the most frequent use
# case for MKL during deployment time.
X = np.random.rand(1, 256, 13, 13).astype(np.float32) - 0.5
W = np.random.rand(256, 256, 3, 3).astype(np.float32) - 0.5
b = np.random.rand(256).astype(np.float32) - 0.5
w_fc = np.random.rand(4096, 9216).astype(np.float32) - 0.5
b_fc = np.random.rand(4096).astype(np.float32) - 0.5
mkl_do = core.DeviceOption(caffe2_pb2.MKLDNN)
# Makes sure that feed works.
workspace.FeedBlob("X", X)
workspace.FeedBlob("W", W)
workspace.FeedBlob("b", b)
workspace.FeedBlob("w_fc", w_fc)
workspace.FeedBlob("b_fc", b_fc)
workspace.FeedBlob("X_mkl", X, device_option=mkl_do)
workspace.FeedBlob("W_mkl", W, device_option=mkl_do)
workspace.FeedBlob("b_mkl", b, device_option=mkl_do)
workspace.FeedBlob("w_fc_mkl", w_fc, device_option=mkl_do)
workspace.FeedBlob("b_fc_mkl", b_fc, device_option=mkl_do)
net = core.Net("test")
net.Conv(["X", "W", "b"], "C", pad=1, stride=1, kernel=3)
net.Relu("C", "R")
net.MaxPool("R", "P", stride=2, kernel=3)
net.FC(["P","w_fc", "b_fc"], "Y")
net.Conv(["X_mkl", "W_mkl", "b_mkl"], "C_mkl",
pad=1, stride=1, kernel=3, device_option=mkl_do)
net.Relu("C_mkl", "R_mkl", device_option=mkl_do)
net.MaxPool("R_mkl", "P_mkl",
stride=2, kernel=3, device_option=mkl_do)
net.FC(["P_mkl","w_fc_mkl", "b_fc_mkl"], "Y_mkl", device_option=mkl_do)
workspace.CreateNet(net)
workspace.RunNet(net)
# makes sure that the results are good.
np.testing.assert_allclose(
workspace.FetchBlob("Y"),
workspace.FetchBlob("Y_mkl"),
atol=1e-2,
rtol=1e-2)
runtime = workspace.BenchmarkNet(net.Proto().name, 1, 100, True)
if __name__ == '__main__':
unittest.main()