pytorch/caffe2/python/optimizer_test.py
Aapo Kyrola 401908d570 add_weight_decay + restore weight decay to resnet50_trainer
Summary:
Add add_weight_decay to optimizer + test.

In D5142973 I accidentally removed weight decay from resnet50 trainer, so this restores it.

Reviewed By: asaadaldien

Differential Revision: D5173594

fbshipit-source-id: c736d8955eddff151632ae6be11afde0883f7531
2017-06-02 14:16:56 -07:00

127 lines
4.9 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from caffe2.python.optimizer import (
build_sgd, build_multi_precision_sgd,
build_ftrl, build_adagrad, build_adam, add_weight_decay)
from caffe2.python.optimizer_test_util import OptimizerTestBase
from caffe2.python.test_util import TestCase
from caffe2.python import workspace
from caffe2.python.core import DataType
import numpy as np
import unittest
class TestSgd(OptimizerTestBase, TestCase):
def build_optimizer(self, model):
self._skip_gpu = False
return build_sgd(model, base_learning_rate=0.1)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertFalse(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().shared:
tensor = workspace.FetchBlob(param)
np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5)
class TestMultiPrecisionSgd(OptimizerTestBase, TestCase):
def build_optimizer(self, model):
self._skip_gpu = False
return build_multi_precision_sgd(model, base_learning_rate=0.1)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertFalse(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().shared:
tensor = workspace.FetchBlob(param)
np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5)
@unittest.skipIf(not workspace.has_gpu_support, "No GPU support")
def testGPUDense(self):
super(TestMultiPrecisionSgd, self).testGPUDense(DataType.FLOAT16)
class TestFtrl(OptimizerTestBase, TestCase):
def build_optimizer(self, model):
self._skip_gpu = True
return build_ftrl(
model, engine=None, alpha=1.0, beta=0.1, lambda1=0.0, lambda2=0.0)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestAdagrad(OptimizerTestBase, TestCase):
def build_optimizer(self, model):
self._skip_gpu = False
return build_adagrad(model, base_learning_rate=1.0)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestAdam(OptimizerTestBase, TestCase):
def build_optimizer(self, model):
self._skip_gpu = False
return build_adam(model, base_learning_rate=0.1)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
self.assertTrue(workspace.HasBlob("optimizer_iteration"))
iteration_tensor = workspace.FetchBlob("optimizer_iteration")
np.testing.assert_allclose(np.array([2000]),
iteration_tensor,
atol=1e-5)
for param in optimizer.get_auxiliary_parameters().shared:
workspace.FetchBlob(param)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestWeightDecay(TestCase):
def test_weight_decay(self):
from caffe2.python import brew
from caffe2.python.model_helper import ModelHelper
model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
a = brew.fc(model, cnv, 'a', 100, 200)
pred = brew.fc(model, a, 'b', 200, 5)
(softmax, loss) = model.SoftmaxWithLoss(
[pred, 'label'],
['softmax', 'loss'],
)
model.AddGradientOperators([loss])
add_weight_decay(model, weight_decay=1e-4)
build_sgd(model, 0.11)
expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}
# Check the proto that all weights are decayed and not non-weights
# are decayed.
for op in model.net.Proto().op:
if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
if op.output[0] not in expected_weight_grad:
print(
"Unexpected param for weight_decay: {}".
format(op.output[0])
)
self.assertTrue(op.output[0] in expected_weight_grad)
expected_weight_grad.remove(op.output[0])
self.assertEqual(
expected_weight_grad,
set(),
"Not all weights were decayed: {}".format(expected_weight_grad)
)