mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Enable testing the GPU implementations of Adagrad and Adam
Summary: Enable testing the GPU implementations of Adagrad and Adam incl sparse versions. Closes https://github.com/caffe2/caffe2/pull/607 Reviewed By: dzhulgakov Differential Revision: D5121552 Pulled By: Yangqing fbshipit-source-id: da6b7dde456237c94cf74d00860e7327b2267eab
This commit is contained in:
parent
fc4d118e6b
commit
2c97c98ca7
|
|
@ -597,154 +597,6 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
return (grad_o, ms_o, mom_o)
|
||||
self.assertReferenceChecks(gc, op, [grad, ms, mom, lr], rmsprop)
|
||||
|
||||
# Reference
|
||||
@staticmethod
|
||||
def _dense_adagrad(epsilon, w, h, grad, lr):
|
||||
lr = lr[0]
|
||||
h_o = h + np.square(grad)
|
||||
grad_o = lr * grad / (np.sqrt(h_o) + epsilon)
|
||||
w_o = w + grad_o
|
||||
return (w_o, h_o)
|
||||
|
||||
# Reference
|
||||
@staticmethod
|
||||
def _dense_adam(epsilon, beta1, beta2, w, m1, m2, grad, lr, iters):
|
||||
lr = lr[0]
|
||||
iters = iters[0]
|
||||
t = iters + 1
|
||||
corrected_local_rate = lr * np.sqrt(1. - np.power(beta2, t)) / \
|
||||
(1. - np.power(beta1, t))
|
||||
|
||||
m1_o = (beta1 * m1) + (1. - beta1) * grad
|
||||
m2_o = (beta2 * m2) + (1. - beta2) * np.square(grad)
|
||||
grad_o = corrected_local_rate * m1_o / \
|
||||
(np.sqrt(m2_o) + epsilon)
|
||||
w_o = w + grad_o
|
||||
return (w_o, m1_o, m2_o)
|
||||
|
||||
@given(inputs=hu.tensors(n=3),
|
||||
in_place=st.booleans(),
|
||||
lr=st.floats(min_value=0.1, max_value=0.9),
|
||||
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
|
||||
engine=st.sampled_from([None, "SIMD"]),
|
||||
**hu.gcs_cpu_only)
|
||||
def test_adagrad_sgd(self, inputs, in_place, lr, epsilon, engine,
|
||||
gc, dc):
|
||||
w, grad, h = inputs
|
||||
h = np.abs(h) + 0.01
|
||||
lr = np.asarray([lr], dtype=np.float32)
|
||||
op = core.CreateOperator(
|
||||
"Adagrad",
|
||||
["w", "h", "grad", "lr"],
|
||||
["w" if in_place else "grad_o",
|
||||
"h" if in_place else "h_o"],
|
||||
epsilon=epsilon, engine=engine, device_option=gc)
|
||||
self.assertDeviceChecks(dc, op, [w, h, grad, lr], [0])
|
||||
|
||||
self.assertReferenceChecks(gc, op, [w, h, grad, lr],
|
||||
partial(self._dense_adagrad, epsilon))
|
||||
|
||||
@given(inputs=hu.tensors(n=3),
|
||||
lr=st.floats(min_value=0.1, max_value=0.9),
|
||||
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
|
||||
engine=st.sampled_from([None, "SIMD"]),
|
||||
**hu.gcs_cpu_only)
|
||||
def test_sparse_adagrad_sgd(self, inputs, lr, epsilon,
|
||||
engine, gc, dc):
|
||||
w, grad, h = inputs
|
||||
indices = np.arange(h.shape[0])
|
||||
indices = indices[indices % 2 == 0]
|
||||
grad = grad[indices]
|
||||
h = np.abs(h)
|
||||
lr = np.asarray([lr], dtype=np.float32)
|
||||
op = core.CreateOperator(
|
||||
"SparseAdagrad",
|
||||
["param", "h", "indices", "grad", "lr"],
|
||||
["param", "h"],
|
||||
epsilon=epsilon,
|
||||
engine=engine,
|
||||
device_option=gc)
|
||||
self.assertDeviceChecks(
|
||||
dc, op, [w, h, indices, grad, lr], [0])
|
||||
|
||||
def adagrad(param, h, i, grad, lr):
|
||||
sw, sh = self._dense_adagrad(epsilon, param[i], h[i], grad, lr)
|
||||
h[i] = sh
|
||||
param[i] = sw
|
||||
return (param, h)
|
||||
|
||||
self.assertReferenceChecks(gc, op, [w, h, indices, grad, lr], adagrad)
|
||||
|
||||
@given(inputs=hu.tensors(n=4),
|
||||
in_place=st.booleans(),
|
||||
beta1=st.floats(min_value=0.1, max_value=0.9),
|
||||
beta2=st.floats(min_value=0.1, max_value=0.9),
|
||||
lr=st.floats(min_value=0.1, max_value=0.9),
|
||||
iters=st.integers(min_value=1, max_value=10000),
|
||||
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
|
||||
**hu.gcs_cpu_only)
|
||||
def test_adam_sgd(self, inputs, in_place, beta1, beta2, lr, iters, epsilon,
|
||||
gc, dc):
|
||||
w, grad, m1, m2 = inputs
|
||||
m2 += np.abs(m2) + 0.01
|
||||
lr = np.asarray([lr], dtype=np.float32)
|
||||
iters = np.asarray([iters], dtype=np.int64)
|
||||
|
||||
op = core.CreateOperator(
|
||||
"Adam",
|
||||
["w", "m1", "m2", "grad", "lr", "iters"],
|
||||
["w" if in_place else "w_o",
|
||||
"m1" if in_place else "m1_o",
|
||||
"m2" if in_place else "m2_o"],
|
||||
beta1=beta1, beta2=beta2, epsilon=epsilon,
|
||||
device_option=gc)
|
||||
input_device_options = {"iters": hu.cpu_do}
|
||||
inputs = [w, m1, m2, grad, lr, iters]
|
||||
self.assertDeviceChecks(
|
||||
dc, op, inputs, [0], input_device_options=input_device_options)
|
||||
|
||||
self.assertReferenceChecks(gc, op, inputs, partial(self._dense_adam,
|
||||
epsilon, beta1, beta2),
|
||||
input_device_options=input_device_options)
|
||||
|
||||
@given(inputs=hu.tensors(n=4),
|
||||
beta1=st.floats(min_value=0.1, max_value=0.9),
|
||||
beta2=st.floats(min_value=0.1, max_value=0.9),
|
||||
lr=st.floats(min_value=0.1, max_value=0.9),
|
||||
iters=st.integers(min_value=1, max_value=10000),
|
||||
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
|
||||
**hu.gcs_cpu_only)
|
||||
def test_sparse_adam_sgd(self, inputs, beta1, beta2, lr, iters,
|
||||
epsilon, gc, dc):
|
||||
|
||||
w, grad, m1, m2 = inputs
|
||||
indices = np.arange(m1.shape[0])
|
||||
indices = indices[indices % 2 == 0]
|
||||
grad = grad[indices]
|
||||
m2 += np.abs(m2) + 0.01
|
||||
lr = np.asarray([lr], dtype=np.float32)
|
||||
iters = np.asarray([iters], dtype=np.int64)
|
||||
op = core.CreateOperator(
|
||||
"SparseAdam",
|
||||
["w", "m1", "m2", "indices", "grad", "lr", "iters"],
|
||||
["w", "m1", "m2"],
|
||||
beta1=beta1, beta2=beta2, epsilon=epsilon,
|
||||
device_option=gc)
|
||||
input_device_options = {"iters": hu.cpu_do}
|
||||
inputs = [w, m1, m2, indices, grad, lr, iters]
|
||||
self.assertDeviceChecks(
|
||||
dc, op, inputs, [0], input_device_options=input_device_options)
|
||||
|
||||
def adam(w, m1, m2, i, grad, lr, iters):
|
||||
nw, nm1, nm2 = self._dense_adam(epsilon, beta1, beta2, w[i],
|
||||
m1[i], m2[i], grad, lr, iters)
|
||||
w[i] = nw
|
||||
m1[i] = nm1
|
||||
m2[i] = nm2
|
||||
return (w, m1, m2)
|
||||
|
||||
self.assertReferenceChecks(gc, op, inputs, adam)
|
||||
|
||||
# Reference
|
||||
@staticmethod
|
||||
def _dense_ftrl(alpha, beta, lambda1, lambda2, w, nz, g):
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user