import numpy as np import time from caffe2.python import workspace, cnn, memonger, core def has_blob(proto, needle): for op in proto.op: for inp in op.input: if inp == needle: return True for outp in op.output: if outp == needle: return True return False def count_blobs(proto): blobs = set() for op in proto.op: blobs = blobs.union(set(op.input)).union(set(op.output)) return len(blobs) def count_shared_blobs(proto): blobs = set() for op in proto.op: blobs = blobs.union(set(op.input)).union(set(op.output)) return len([b for b in blobs if "_shared" in b]) def test_shared_grads( with_shapes, create_model, conv_blob, last_out_blob, data_blob='gpu_0/data', label_blob='gpu_0/label', num_labels=1000, ): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput(data_blob) label = model.net.AddExternalInput(label_blob) (_softmax, loss) = create_model( model, data, num_input_channels=3, num_labels=num_labels, label=label, is_test=False, ) param_to_grad = model.AddGradientOperators([loss]) (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], {data_blob: [4, 3, 227, 227], label_blob: [4]}, ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.share_grad_blobs( model.net, ["gpu_0/loss"], set(model.param_to_grad.values()), "gpu_0/", share_activations=True, dont_share_blobs=set([str(param_to_grad[conv_blob])]), blob_shapes=shapes if with_shapes else None, ) count_after = count_blobs(optim_proto) # Run model and compare results. We check that the loss is same # and also that the final gradient (conv1_w_grad is same) workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) label = (np.random.rand(4) * num_labels).astype(np.int32) workspace.FeedBlob(data_blob, data) workspace.FeedBlob(label_blob, label) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob(last_out_blob) conv1_w_grad = workspace.FetchBlob(param_to_grad[conv_blob]) workspace.FeedBlob(param_to_grad[conv_blob], np.array([0.0])) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob(last_out_blob) optim_conv1_w_grad = workspace.FetchBlob(param_to_grad[conv_blob]) return [(count_after, count_before), (loss1, optimized_loss1), (conv1_w_grad, optim_conv1_w_grad)] def test_forward_only( create_model, last_out_blob, data_blob='gpu_0/data', num_labels=1000, ): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput(data_blob) create_model( model, data, num_input_channels=3, num_labels=num_labels, is_test=True ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.optimize_inference_for_dag( model.net, [data_blob], "gpu_0/" ) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob(data_blob, data) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob(last_out_blob) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob(last_out_blob) return [(count_after, count_before), (num_shared_blobs), (loss1, optimized_loss1)] def test_forward_only_fast_simplenet( create_model, last_out_blob, data_blob="gpu_0/data", num_labels=1000, ): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput(data_blob) create_model( model, data, num_input_channels=3, num_labels=num_labels, is_test=True ) count_before = count_blobs(model.net.Proto()) t = time.time() optim_proto = memonger.optimize_inference_fast( model.net.Proto(), set([data_blob, last_out_blob]).union( set(model.net.Proto().external_input)) ) print("Optimization took {} secs".format(time.time() - t)) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) print(count_after, count_before, num_shared_blobs) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob(data_blob, data) model.net.Proto().type = 'simple' workspace.RunNetOnce(model.net) loss1 = workspace.FetchBlob(last_out_blob) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob(last_out_blob) return [(count_after, count_before), (num_shared_blobs), (loss1, optimized_loss1)]