from __future__ import absolute_import from __future__ import division from __future__ import print_function from caffe2.python.optimizer import ( build_sgd, build_multi_precision_sgd, build_ftrl, build_adagrad, build_adam, add_weight_decay, SgdOptimizer) from caffe2.python.optimizer_context import UseOptimizer from caffe2.python.optimizer_test_util import OptimizerTestBase from caffe2.python.test_util import TestCase from caffe2.python import workspace from caffe2.python.core import DataType import numpy as np import unittest class TestSgd(OptimizerTestBase, TestCase): def build_optimizer(self, model): self._skip_gpu = False return build_sgd(model, base_learning_rate=0.1) def check_optimizer(self, optimizer): self.assertTrue(optimizer.get_auxiliary_parameters().shared) self.assertFalse(optimizer.get_auxiliary_parameters().local) for param in optimizer.get_auxiliary_parameters().shared: tensor = workspace.FetchBlob(param) np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5) class TestMultiPrecisionSgd(OptimizerTestBase, TestCase): def build_optimizer(self, model): self._skip_gpu = False return build_multi_precision_sgd(model, base_learning_rate=0.1) def check_optimizer(self, optimizer): self.assertTrue(optimizer.get_auxiliary_parameters().shared) self.assertFalse(optimizer.get_auxiliary_parameters().local) for param in optimizer.get_auxiliary_parameters().shared: tensor = workspace.FetchBlob(param) np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5) @unittest.skipIf(not workspace.has_gpu_support, "No GPU support") def testGPUDense(self): super(TestMultiPrecisionSgd, self).testGPUDense(DataType.FLOAT16) class TestFtrl(OptimizerTestBase, TestCase): def build_optimizer(self, model): self._skip_gpu = True return build_ftrl( model, engine=None, alpha=1.0, beta=0.1, lambda1=0.0, lambda2=0.0) def check_optimizer(self, optimizer): self.assertFalse(optimizer.get_auxiliary_parameters().shared) self.assertTrue(optimizer.get_auxiliary_parameters().local) for param in optimizer.get_auxiliary_parameters().local: workspace.FetchBlob(param) class TestAdagrad(OptimizerTestBase, TestCase): def build_optimizer(self, model): self._skip_gpu = False return build_adagrad(model, base_learning_rate=1.0) def check_optimizer(self, optimizer): self.assertFalse(optimizer.get_auxiliary_parameters().shared) self.assertTrue(optimizer.get_auxiliary_parameters().local) for param in optimizer.get_auxiliary_parameters().local: workspace.FetchBlob(param) class TestAdam(OptimizerTestBase, TestCase): def build_optimizer(self, model): self._skip_gpu = False return build_adam(model, base_learning_rate=0.1) def check_optimizer(self, optimizer): self.assertTrue(optimizer.get_auxiliary_parameters().shared) self.assertTrue(optimizer.get_auxiliary_parameters().local) self.assertTrue(workspace.HasBlob("optimizer_iteration")) iteration_tensor = workspace.FetchBlob("optimizer_iteration") np.testing.assert_allclose(np.array([2000]), iteration_tensor, atol=1e-5) for param in optimizer.get_auxiliary_parameters().shared: workspace.FetchBlob(param) for param in optimizer.get_auxiliary_parameters().local: workspace.FetchBlob(param) class TestMultiOptimizers(TestCase): def test_multiple_optimizers(self): from caffe2.python import brew, core, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test") fc1 = brew.fc(model, 'data', 'fc1', 100, 50) fc2 = brew.fc(model, fc1, 'fc2', 50, 25) pred = brew.fc(model, fc2, 'fc3', 25, 10) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) param_to_device = optimizer._get_param_to_device(model) def infer_blob_device(blob_name): return optimizer.get_param_device( blob_name, "{}_grad".format(blob_name), param_to_device ) sgd_1 = optimizer.SgdOptimizer(base_learning_rate=0.1) sgd_2 = optimizer.SgdOptimizer(base_learning_rate=0.2) adagrad = optimizer.AdagradOptimizer() # Check same optimizer share the same learning rate. with core.DeviceScope(infer_blob_device("fc1_w")): sgd_1(model.net, model.param_init_net, "fc1_w", "fc1_w_grad") with core.DeviceScope(infer_blob_device("fc1_b")): sgd_1(model.net, model.param_init_net, "fc1_b", "fc1_b_grad") fc1_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc1_w' or \ op.input[0] == 'fc1_b': fc1_lr_blobs.append(op.input[3]) self.assertEqual(fc1_lr_blobs[0], fc1_lr_blobs[1]) # Check different instance of the same optimizer has a different lr. with core.DeviceScope(infer_blob_device("fc2_w")): sgd_2(model.net, model.param_init_net, "fc2_w", "fc2_w_grad") with core.DeviceScope(infer_blob_device("fc2_b")): sgd_2(model.net, model.param_init_net, "fc2_b", "fc2_b_grad") fc2_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc2_w' or \ op.input[0] == 'fc2_b': self.assertTrue(op.input[3] not in fc1_lr_blobs) fc2_lr_blobs.append(op.input[3]) self.assertEqual(fc2_lr_blobs[0], fc2_lr_blobs[1]) # Check different optimizer type case with core.DeviceScope(infer_blob_device("fc3_w")): adagrad(model.net, model.param_init_net, "fc3_w", "fc3_w_grad") with core.DeviceScope(infer_blob_device("fc3_b")): adagrad(model.net, model.param_init_net, "fc3_b", "fc3_b_grad") fc3_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'Adagrad' and op.input[0] == 'fc3_w' or \ op.input[0] == 'fc3_b': self.assertTrue(op.input[3] not in fc2_lr_blobs) self.assertTrue(op.input[3] not in fc1_lr_blobs) fc3_lr_blobs.append(op.input[3]) self.assertEqual(fc3_lr_blobs[0], fc3_lr_blobs[1]) class TestWeightDecay(TestCase): def test_weight_decay(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} # Check the proto that all weights are decayed and not non-weights # are decayed. for op in model.net.Proto().op: if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) ) class TestOptimizerContext(TestCase): def test_optimizer_context(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) cnv_optim = SgdOptimizer(0.15) weight_optim = SgdOptimizer(0.2) bias_optim = SgdOptimizer(0.1) with UseOptimizer(cnv_optim): cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}): a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) # use the following optimizer if none specified in param_info build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} expected_learning_rate = { 'cnv_w_lr': -0.15, 'cnv_b_lr': -0.15, 'a_w_lr': -0.2, 'a_b_lr': -0.1, 'b_w_lr': -0.11, 'b_b_lr': -0.11 } for op in model.net.Proto().op: # Check the proto that all weights are decayed and not non-weights # are decayed. if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) # Check the learning rate for each parameter if op.type == 'LearningRate': val = 0 for arg in op.arg: if arg.name == 'base_lr': val = arg.f self.assertEqual( val, expected_learning_rate[op.output[0]] ) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )