from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import numpy as np import time from caffe2.python import workspace, cnn, memonger, core import caffe2.python.models.resnet as resnet import hypothesis.strategies as st from hypothesis import given, settings import caffe2.python.hypothesis_test_util as hu def has_blob(proto, needle): for op in proto.op: for inp in op.input: if inp == needle: return True for outp in op.output: if outp == needle: return True return False def count_blobs(proto): blobs = set() for op in proto.op: blobs = blobs.union(set(op.input)).union(set(op.output)) return len(blobs) def count_shared_blobs(proto): blobs = set() for op in proto.op: blobs = blobs.union(set(op.input)).union(set(op.output)) return len([b for b in blobs if "_shared" in b]) class ResnetMemongerTest(hu.HypothesisTestCase): @given(with_shapes=st.booleans(), **hu.gcs_cpu_only) @settings(max_examples=2, timeout=120) def test_resnet_shared_grads(self, with_shapes, gc, dc): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") label = model.net.AddExternalInput("gpu_0/label") (_softmax, loss) = resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, label=label, is_test=False, ) param_to_grad = model.AddGradientOperators([loss]) (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], {'gpu_0/data': [4, 3, 227, 227], 'gpu_0/label': [4]}, ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.share_grad_blobs( model.net, ["gpu_0/loss"], set(model.param_to_grad.values()), "gpu_0/", share_activations=True, dont_share_blobs=set([str(param_to_grad["gpu_0/conv1_w"])]), blob_shapes=shapes if with_shapes else None, ) count_after = count_blobs(optim_proto) self.assertTrue(count_after < count_before) # Run model and compare results. We check that the loss is same # and also that the final gradient (conv1_w_grad is same) workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) label = (np.random.rand(4) * 1000).astype(np.int32) workspace.FeedBlob("gpu_0/data", data) workspace.FeedBlob("gpu_0/label", label) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) workspace.FeedBlob(param_to_grad["gpu_0/conv1_w"], np.array([0.0])) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") optim_conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) print("before: {} after: {}".format(count_before, count_after)) np.testing.assert_almost_equal(loss1, optimized_loss1) np.testing.assert_almost_equal(conv1_w_grad, optim_conv1_w_grad) def test_resnet_forward_only(self): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, is_test=True ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.optimize_inference_for_dag( model.net, ["gpu_0/data"], "gpu_0/" ) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob("gpu_0/data", data) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") self.assertTrue(count_after < count_before) self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0) np.testing.assert_almost_equal(loss1, optimized_loss1) def test_resnet_forward_only_fast_simplenet(self): ''' Test C++ memonger that is only for simple nets ''' model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, is_test=True ) count_before = count_blobs(model.net.Proto()) t = time.time() optim_proto = memonger.optimize_inference_fast( model.net.Proto(), set(["gpu_0/data", "gpu_0/last_out_L1000"]).union( set(model.net.Proto().external_input)) ) print("Optimization took {} secs".format(time.time() - t)) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) self.assertTrue(count_after < count_before) print(count_after, count_before, num_shared_blobs) self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob("gpu_0/data", data) model.net.Proto().type = 'simple' workspace.RunNetOnce(model.net) loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") np.testing.assert_almost_equal(loss1, optimized_loss1) if __name__ == "__main__": import unittest import random random.seed(2603) workspace.GlobalInit([ 'caffe2', '--caffe2_log_level=0', '--caffe2_print_blob_sizes_at_exit=0', '--caffe2_gpu_memory_tracking=1']) unittest.main()