Enable testing the GPU implementations of Adagrad and Adam

Summary:
Enable testing the GPU implementations of Adagrad and Adam incl sparse versions.
Closes https://github.com/caffe2/caffe2/pull/607

Reviewed By: dzhulgakov

Differential Revision: D5121552

Pulled By: Yangqing

fbshipit-source-id: da6b7dde456237c94cf74d00860e7327b2267eab
This commit is contained in:
Pooya Davoodi 2017-06-01 18:07:19 -07:00 committed by Facebook Github Bot
parent fc4d118e6b
commit 2c97c98ca7

View File

@ -597,154 +597,6 @@ class TestOperators(hu.HypothesisTestCase):
return (grad_o, ms_o, mom_o)
self.assertReferenceChecks(gc, op, [grad, ms, mom, lr], rmsprop)
# Reference
@staticmethod
def _dense_adagrad(epsilon, w, h, grad, lr):
lr = lr[0]
h_o = h + np.square(grad)
grad_o = lr * grad / (np.sqrt(h_o) + epsilon)
w_o = w + grad_o
return (w_o, h_o)
# Reference
@staticmethod
def _dense_adam(epsilon, beta1, beta2, w, m1, m2, grad, lr, iters):
lr = lr[0]
iters = iters[0]
t = iters + 1
corrected_local_rate = lr * np.sqrt(1. - np.power(beta2, t)) / \
(1. - np.power(beta1, t))
m1_o = (beta1 * m1) + (1. - beta1) * grad
m2_o = (beta2 * m2) + (1. - beta2) * np.square(grad)
grad_o = corrected_local_rate * m1_o / \
(np.sqrt(m2_o) + epsilon)
w_o = w + grad_o
return (w_o, m1_o, m2_o)
@given(inputs=hu.tensors(n=3),
in_place=st.booleans(),
lr=st.floats(min_value=0.1, max_value=0.9),
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
engine=st.sampled_from([None, "SIMD"]),
**hu.gcs_cpu_only)
def test_adagrad_sgd(self, inputs, in_place, lr, epsilon, engine,
gc, dc):
w, grad, h = inputs
h = np.abs(h) + 0.01
lr = np.asarray([lr], dtype=np.float32)
op = core.CreateOperator(
"Adagrad",
["w", "h", "grad", "lr"],
["w" if in_place else "grad_o",
"h" if in_place else "h_o"],
epsilon=epsilon, engine=engine, device_option=gc)
self.assertDeviceChecks(dc, op, [w, h, grad, lr], [0])
self.assertReferenceChecks(gc, op, [w, h, grad, lr],
partial(self._dense_adagrad, epsilon))
@given(inputs=hu.tensors(n=3),
lr=st.floats(min_value=0.1, max_value=0.9),
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
engine=st.sampled_from([None, "SIMD"]),
**hu.gcs_cpu_only)
def test_sparse_adagrad_sgd(self, inputs, lr, epsilon,
engine, gc, dc):
w, grad, h = inputs
indices = np.arange(h.shape[0])
indices = indices[indices % 2 == 0]
grad = grad[indices]
h = np.abs(h)
lr = np.asarray([lr], dtype=np.float32)
op = core.CreateOperator(
"SparseAdagrad",
["param", "h", "indices", "grad", "lr"],
["param", "h"],
epsilon=epsilon,
engine=engine,
device_option=gc)
self.assertDeviceChecks(
dc, op, [w, h, indices, grad, lr], [0])
def adagrad(param, h, i, grad, lr):
sw, sh = self._dense_adagrad(epsilon, param[i], h[i], grad, lr)
h[i] = sh
param[i] = sw
return (param, h)
self.assertReferenceChecks(gc, op, [w, h, indices, grad, lr], adagrad)
@given(inputs=hu.tensors(n=4),
in_place=st.booleans(),
beta1=st.floats(min_value=0.1, max_value=0.9),
beta2=st.floats(min_value=0.1, max_value=0.9),
lr=st.floats(min_value=0.1, max_value=0.9),
iters=st.integers(min_value=1, max_value=10000),
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
**hu.gcs_cpu_only)
def test_adam_sgd(self, inputs, in_place, beta1, beta2, lr, iters, epsilon,
gc, dc):
w, grad, m1, m2 = inputs
m2 += np.abs(m2) + 0.01
lr = np.asarray([lr], dtype=np.float32)
iters = np.asarray([iters], dtype=np.int64)
op = core.CreateOperator(
"Adam",
["w", "m1", "m2", "grad", "lr", "iters"],
["w" if in_place else "w_o",
"m1" if in_place else "m1_o",
"m2" if in_place else "m2_o"],
beta1=beta1, beta2=beta2, epsilon=epsilon,
device_option=gc)
input_device_options = {"iters": hu.cpu_do}
inputs = [w, m1, m2, grad, lr, iters]
self.assertDeviceChecks(
dc, op, inputs, [0], input_device_options=input_device_options)
self.assertReferenceChecks(gc, op, inputs, partial(self._dense_adam,
epsilon, beta1, beta2),
input_device_options=input_device_options)
@given(inputs=hu.tensors(n=4),
beta1=st.floats(min_value=0.1, max_value=0.9),
beta2=st.floats(min_value=0.1, max_value=0.9),
lr=st.floats(min_value=0.1, max_value=0.9),
iters=st.integers(min_value=1, max_value=10000),
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
**hu.gcs_cpu_only)
def test_sparse_adam_sgd(self, inputs, beta1, beta2, lr, iters,
epsilon, gc, dc):
w, grad, m1, m2 = inputs
indices = np.arange(m1.shape[0])
indices = indices[indices % 2 == 0]
grad = grad[indices]
m2 += np.abs(m2) + 0.01
lr = np.asarray([lr], dtype=np.float32)
iters = np.asarray([iters], dtype=np.int64)
op = core.CreateOperator(
"SparseAdam",
["w", "m1", "m2", "indices", "grad", "lr", "iters"],
["w", "m1", "m2"],
beta1=beta1, beta2=beta2, epsilon=epsilon,
device_option=gc)
input_device_options = {"iters": hu.cpu_do}
inputs = [w, m1, m2, indices, grad, lr, iters]
self.assertDeviceChecks(
dc, op, inputs, [0], input_device_options=input_device_options)
def adam(w, m1, m2, i, grad, lr, iters):
nw, nm1, nm2 = self._dense_adam(epsilon, beta1, beta2, w[i],
m1[i], m2[i], grad, lr, iters)
w[i] = nw
m1[i] = nm1
m2[i] = nm2
return (w, m1, m2)
self.assertReferenceChecks(gc, op, inputs, adam)
# Reference
@staticmethod
def _dense_ftrl(alpha, beta, lambda1, lambda2, w, nz, g):