pytorch/caffe2/python/operator_test/conv_test.py
Natalia Gimelshein 45aa54d83c relax test deadlines
Summary: Relax test deadlines for c2 tests. We run on loaded machines, and timings are unreliable.

Test Plan: Fixes existing tests

Reviewed By: mruberry

Differential Revision: D28690006

fbshipit-source-id: 457707e81a1ec92548c1f23ea7a0022fa0a3bfda
2021-05-25 15:02:52 -07:00

1010 lines
32 KiB
Python

import collections
import functools
import unittest
import caffe2.python._import_c_extension as C
import caffe2.python.hip_test_util as hiputl
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.serialized_test.serialized_test_util as serial
import hypothesis.strategies as st
import numpy as np
from caffe2.proto import caffe2_pb2
from caffe2.python import brew, core, utils, workspace
from caffe2.python.model_helper import ModelHelper
from hypothesis import assume, given, settings
def _cudnn_supports(dilation=False, nhwc=False, backward=False):
"""Return True if cuDNN supports this configuration."""
v = workspace.GetCuDNNVersion()
if backward:
if nhwc:
# nhwc isn't supported in backward ops.
return False
else:
# Forward mode.
if dilation and v < 6000:
# Dilation not supported until v6
return False
if dilation and nhwc:
# Dilation and NHWC not supported together
return False
return True
def _cudnn_convolution_algo_count(direction):
try:
if direction == "fwd":
return st.integers(0, C.cudnn_convolution_fwd_algo_count - 1)
elif direction == "dgrad":
return st.integers(0, C.cudnn_convolution_bwd_data_algo_count - 1)
elif direction == "wgrad":
return st.integers(0, C.cudnn_convolution_bwd_filter_algo_count - 1)
else:
assert False
except Exception:
return st.sampled_from([-1])
class TestConvolution(serial.SerializedTestCase):
# CUDNN does NOT support different padding values and we skip it
@given(
op_type=st.sampled_from(["Conv", "Conv2D"]),
stride_h=st.integers(1, 3),
stride_w=st.integers(1, 3),
pad_t=st.integers(0, 3),
pad_l=st.integers(0, 3),
pad_b=st.integers(0, 3),
pad_r=st.integers(0, 3),
kernel=st.integers(3, 5),
size=st.integers(1, 8),
input_channels=st.integers(1, 3),
output_channels=st.integers(1, 3),
batch_size=st.integers(0, 3),
group=st.integers(1, 2),
order=st.sampled_from(["NCHW", "NHWC"]),
engine=st.sampled_from(["", "EIGEN"]),
shared_buffer=st.booleans(),
use_bias=st.booleans(),
**hu.gcs
)
@settings(deadline=None, max_examples=50)
def test_convolution_separate_stride_pad_gradients(
self,
op_type,
stride_h,
stride_w,
pad_t,
pad_l,
pad_b,
pad_r,
kernel,
size,
input_channels,
output_channels,
batch_size,
group,
order,
engine,
shared_buffer,
use_bias,
gc,
dc,
):
# TODO: Group conv in NHWC not implemented for GPU yet.
assume(group == 1 or order == "NCHW" or gc.device_type == caffe2_pb2.CPU)
if group != 1 and order == "NHWC":
dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]
# Group conv not implemented with EIGEN engine.
assume(group == 1 or engine != "EIGEN")
input_channels *= group
output_channels *= group
op = core.CreateOperator(
op_type,
["X", "w", "b"] if use_bias else ["X", "w"],
["Y"],
stride_h=stride_h,
stride_w=stride_w,
pad_t=pad_t,
pad_l=pad_l,
pad_b=pad_b,
pad_r=pad_r,
kernel=kernel,
group=group,
order=order,
engine=engine,
shared_buffer=int(shared_buffer),
)
X = (
np.random.rand(batch_size, size, size, input_channels).astype(np.float32)
- 0.5
)
w = (
np.random.rand(
output_channels, kernel, kernel, int(input_channels / group)
).astype(np.float32)
- 0.5
)
b = np.random.rand(output_channels).astype(np.float32) - 0.5
if order == "NCHW":
X = utils.NHWC2NCHW(X)
w = utils.NHWC2NCHW(w)
inputs = [X, w, b] if use_bias else [X, w]
# Error handling path.
if size + pad_r + pad_l < kernel or size + pad_t + pad_b < kernel:
with self.assertRaises(RuntimeError):
self.assertDeviceChecks(dc, op, inputs, [0])
return
self.assertDeviceChecks(dc, op, inputs, [0])
for i in range(len(inputs)):
self.assertGradientChecks(gc, op, inputs, i, [0])
# CUDNN does NOT support different padding values and we skip it
@given(
op_type=st.sampled_from(["Conv", "Conv2D"]),
stride_h=st.integers(1, 3),
stride_w=st.integers(1, 3),
pad_t=st.integers(0, 3),
pad_l=st.integers(0, 3),
pad_b=st.integers(0, 3),
pad_r=st.integers(0, 3),
kernel=st.integers(1, 5),
size=st.integers(7, 10),
input_channels=st.integers(1, 8),
output_channels=st.integers(1, 8),
batch_size=st.integers(0, 3),
engine=st.sampled_from(["", "EIGEN"]),
use_bias=st.booleans(),
**hu.gcs
)
@settings(deadline=None)
def test_convolution_separate_stride_pad_layout(
self,
op_type,
stride_h,
stride_w,
pad_t,
pad_l,
pad_b,
pad_r,
kernel,
size,
input_channels,
output_channels,
batch_size,
engine,
use_bias,
gc,
dc,
):
X = (
np.random.rand(batch_size, size, size, input_channels).astype(np.float32)
- 0.5
)
w = (
np.random.rand(output_channels, kernel, kernel, input_channels).astype(
np.float32
)
- 0.5
)
b = np.random.rand(output_channels).astype(np.float32) - 0.5
outputs = {}
for order in ["NCHW", "NHWC"]:
op = core.CreateOperator(
op_type,
["X", "w", "b"] if use_bias else ["X", "w"],
["Y"],
stride_h=stride_h,
stride_w=stride_w,
kernel=kernel,
pad_t=pad_t,
pad_l=pad_l,
pad_b=pad_b,
pad_r=pad_r,
order=order,
engine=engine,
device_option=gc,
)
if order == "NCHW":
X_f = utils.NHWC2NCHW(X)
w_f = utils.NHWC2NCHW(w)
else:
X_f = X
w_f = w
self.ws.create_blob("X").feed(X_f, device_option=gc)
self.ws.create_blob("w").feed(w_f, device_option=gc)
self.ws.create_blob("b").feed(b, device_option=gc)
self.ws.run(op)
outputs[order] = self.ws.blobs["Y"].fetch()
np.testing.assert_allclose(
outputs["NCHW"], utils.NHWC2NCHW(outputs["NHWC"]), atol=1e-4, rtol=1e-4
)
@given(
op_type=st.sampled_from(["Conv", "Conv2D"]),
stride=st.integers(1, 3),
pad=st.integers(0, 3),
kernel=st.integers(1, 5),
dilation=st.integers(1, 3),
size=st.integers(7, 10),
input_channels=st.integers(1, 8),
output_channels=st.integers(1, 8),
batch_size=st.integers(0, 3),
group=st.integers(1, 2),
order=st.sampled_from(["NCHW", "NHWC"]),
engine=st.sampled_from(["", "CUDNN", "MKLDNN"]),
use_bias=st.booleans(),
force_algo_fwd=_cudnn_convolution_algo_count("fwd"),
force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"),
force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"),
**hu.gcs
)
@settings(max_examples=20, deadline=None)
def test_convolution_gradients(
self,
op_type,
stride,
pad,
kernel,
dilation,
size,
input_channels,
output_channels,
batch_size,
group,
order,
engine,
use_bias,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
):
# TODO: Group conv in NHWC not implemented for GPU yet.
assume(
group == 1
or (order == "NCHW" or gc.device_type == caffe2_pb2.CPU)
and engine != "MKLDNN"
)
if group != 1 and order == "NHWC":
dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]
input_channels *= group
output_channels *= group
dkernel = dilation * (kernel - 1) + 1
if engine == "CUDNN":
if hiputl.run_in_hip(gc, dc):
assume((order == "NCHW") and not (dilation > 1 and group > 1))
else:
assume(
_cudnn_supports(
dilation=(dilation > 1), nhwc=(order == "NHWC"), backward=True
)
)
assume(engine != "MKLDNN" or use_bias is True)
op = core.CreateOperator(
op_type,
["X", "w", "b"] if use_bias else ["X", "w"],
["Y"],
stride=stride,
kernel=kernel,
dilation=dilation,
pad=pad,
group=group,
order=order,
engine=engine,
force_algo_fwd=force_algo_fwd,
force_algo_dgrad=force_algo_dgrad,
force_algo_wgrad=force_algo_wgrad,
)
X = (
np.random.rand(batch_size, size, size, input_channels).astype(np.float32)
- 0.5
)
w = (
np.random.rand(
output_channels, kernel, kernel, int(input_channels / group)
).astype(np.float32)
- 0.5
)
b = np.random.rand(output_channels).astype(np.float32) - 0.5
if order == "NCHW":
X = utils.NHWC2NCHW(X)
w = utils.NHWC2NCHW(w)
inputs = [X, w, b] if use_bias else [X, w]
# Error handling path.
if size + pad + pad < dkernel or size + pad + pad < dkernel:
with self.assertRaises(RuntimeError):
self.assertDeviceChecks(dc, op, inputs, [0])
return
try:
self.assertDeviceChecks(dc, op, inputs, [0])
except RuntimeError as e:
es = str(e)
# CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM should always have
# implementation
if (
"status == CUDNN_STATUS_SUCCESS" not in es
or "CUDNN_STATUS_NOT_SUPPORTED" not in es
or force_algo_fwd == 0
):
raise e
for i in range(len(inputs)):
try:
self.assertGradientChecks(gc, op, inputs, i, [0])
except RuntimeError as e:
es = str(e)
if (
"status == CUDNN_STATUS_SUCCESS" not in es
or "CUDNN_STATUS_NOT_SUPPORTED" not in es
):
raise e
def _nd_convolution(
self,
n,
input_channels_per_group,
output_channels_per_group,
batch_size,
stride,
size,
kernel,
dilation,
pad,
group,
order,
use_bias,
engine,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
):
# TODO: Group conv in NHWC not implemented for GPU yet.
# TODO: Group 1D conv in NCHW not implemented for GPU yet.
assume(
group == 1
or (n != 1 and order == "NCHW")
or gc.device_type == caffe2_pb2.CPU
)
if group != 1 and (n == 1 or order == "NHWC"):
dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]
input_channels = group * input_channels_per_group
output_channels = group * output_channels_per_group
dkernel = dilation * (kernel - 1) + 1
for op_type in ["Conv", "Conv" + str(n) + "D"]:
op = core.CreateOperator(
op_type,
["X", "w", "b"] if use_bias else ["X", "w"],
["Y"],
strides=[stride] * n,
kernels=[kernel] * n,
dilations=[dilation] * n,
pads=[pad] * n * 2,
group=group,
order=order,
engine=engine,
force_algo_fwd=force_algo_fwd,
force_algo_dgrad=force_algo_dgrad,
force_algo_wgrad=force_algo_wgrad,
)
input_dims = [batch_size, input_channels]
input_dims.extend([size] * n)
filter_dims = [output_channels, input_channels // group]
filter_dims.extend([kernel] * n)
X = np.random.rand(*input_dims).astype(np.float32) - 0.5
w = np.random.rand(*filter_dims).astype(np.float32) - 0.5
b = np.random.rand(output_channels).astype(np.float32) - 0.5
if order == "NHWC":
X = utils.NCHW2NHWC(X)
w = utils.NCHW2NHWC(w)
inputs = [X, w, b] if use_bias else [X, w]
if size + pad + pad < dkernel or size + pad + pad < dkernel:
with self.assertRaises(RuntimeError):
self.assertDeviceChecks(dc, op, inputs, [0])
return
self.assertDeviceChecks(dc, op, inputs, [0])
for i in range(len(inputs)):
self.assertGradientChecks(gc, op, inputs, i, [0])
@given(
input_channels=st.integers(1, 3),
output_channels=st.integers(1, 2),
batch_size=st.integers(0, 3),
stride=st.integers(1, 3),
size=st.integers(7, 10),
kernel=st.integers(1, 2),
dilation=st.integers(1, 3),
pad=st.integers(0, 3),
group=st.integers(1, 2),
order=st.sampled_from(["NCHW", "NHWC"]),
use_bias=st.booleans(),
engine=st.sampled_from(["", "CUDNN"]),
force_algo_fwd=_cudnn_convolution_algo_count("fwd"),
force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"),
force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"),
**hu.gcs
)
@settings(deadline=10000)
def test_1d_convolution(
self,
input_channels,
output_channels,
batch_size,
stride,
size,
kernel,
dilation,
pad,
group,
order,
use_bias,
engine,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
):
if hiputl.run_in_hip(gc, dc):
# currently miopen only supports 2d conv
assume(engine != "CUDNN") # CUDNN is aliased to MIOPEN for HIP
# TODO: 1D conv in NHWC not implemented for GPU yet.
assume(order == "NCHW" or gc.device_type == caffe2_pb2.CPU)
if order == "NHWC":
dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]
self._nd_convolution(
1,
input_channels,
output_channels,
batch_size,
stride,
size,
kernel,
dilation,
pad,
group,
order,
use_bias,
engine,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
)
@given(
input_channels=st.integers(1, 2),
output_channels=st.integers(1, 2),
batch_size=st.integers(0, 2),
stride=st.integers(1, 2),
size=st.integers(4, 5),
kernel=st.integers(1, 2),
dilation=st.integers(1, 2),
pad=st.integers(0, 2),
group=st.integers(1, 2),
order=st.sampled_from(["NCHW", "NHWC"]),
use_bias=st.booleans(),
engine=st.sampled_from(["", "MIOPEN"]), # TODO: add "CUDNN"
force_algo_fwd=_cudnn_convolution_algo_count("fwd"),
force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"),
force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"),
**hu.gcs
)
@settings(max_examples=20, deadline=None)
def test_3d_convolution(
self,
input_channels,
output_channels,
batch_size,
stride,
size,
kernel,
dilation,
pad,
group,
order,
use_bias,
engine,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
):
# TODO: 3D conv in NHWC not implemented for GPU yet.
assume(order == "NCHW" or gc.device_type == caffe2_pb2.CPU)
if order == "NHWC":
dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]
self._nd_convolution(
3,
input_channels,
output_channels,
batch_size,
stride,
size,
kernel,
dilation,
pad,
group,
order,
use_bias,
engine,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
)
@given(
op_type=st.sampled_from(["Conv", "Conv3D"]),
batch_size=st.integers(0, 2),
stride=st.integers(1, 2),
size=st.integers(3, 5),
kernel=st.integers(1, 2),
dilation=st.integers(1, 2),
pad=st.integers(0, 2),
use_bias=st.booleans(),
force_algo_fwd=_cudnn_convolution_algo_count("fwd"),
force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"),
force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"),
**hu.gcs_no_hip
) # MIOPEN doesn't support 3D conv yet
@settings(deadline=10000)
def test_3d_convolution_cudnn_nchw(
self,
op_type,
batch_size,
stride,
size,
kernel,
dilation,
pad,
use_bias,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
):
input_channels = 1
output_channels = 1
n = 3
dkernel = dilation * (kernel - 1) + 1
order = "NCHW"
op = core.CreateOperator(
op_type,
["X", "w", "b"] if use_bias else ["X", "w"],
["Y"],
strides=[stride] * n,
kernels=[kernel] * n,
dilations=[dilation] * n,
pads=[pad] * n * 2,
order=order,
engine="CUDNN",
force_algo_fwd=force_algo_fwd,
force_algo_dgrad=force_algo_dgrad,
force_algo_wgrad=force_algo_wgrad,
)
input_dims = [batch_size, input_channels]
input_dims.extend([size] * n)
filter_dims = [output_channels, input_channels]
filter_dims.extend([kernel] * n)
X = np.random.rand(*input_dims).astype(np.float32) - 0.5
w = np.random.rand(*filter_dims).astype(np.float32) - 0.5
b = np.random.rand(output_channels).astype(np.float32) - 0.5
inputs = [X, w, b] if use_bias else [X, w]
if size + pad + pad < dkernel or size + pad + pad < dkernel:
with self.assertRaises(RuntimeError):
self.assertDeviceChecks(dc, op, inputs, [0])
return
try:
self.assertDeviceChecks(dc, op, inputs, [0])
except RuntimeError as e:
es = str(e)
# CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM should always have
# implementation
if (
"status == CUDNN_STATUS_SUCCESS" not in es
or "CUDNN_STATUS_NOT_SUPPORTED" not in es
or force_algo_fwd == 0
):
raise e
for i in range(len(inputs)):
try:
self.assertGradientChecks(gc, op, inputs, i, [0])
except RuntimeError as e:
es = str(e)
if (
"status == CUDNN_STATUS_SUCCESS" not in es
or "CUDNN_STATUS_NOT_SUPPORTED" not in es
):
raise e
@given(
op_type=st.sampled_from(["Conv", "Conv2D"]),
stride=st.integers(1, 3),
pad=st.integers(0, 3),
kernel=st.integers(1, 5),
dilation=st.integers(1, 3),
size=st.integers(7, 10),
input_channels=st.integers(1, 8),
output_channels=st.integers(1, 8),
batch_size=st.integers(0, 3),
use_bias=st.booleans(),
**hu.gcs
)
@settings(deadline=None, max_examples=50)
def test_convolution_layout(
self,
op_type,
stride,
pad,
kernel,
dilation,
size,
input_channels,
output_channels,
batch_size,
use_bias,
gc,
dc,
):
assume(size >= dilation * (kernel - 1) + 1)
X = (
np.random.rand(batch_size, size, size, input_channels).astype(np.float32)
- 0.5
)
w = (
np.random.rand(output_channels, kernel, kernel, input_channels).astype(
np.float32
)
- 0.5
)
b = np.random.rand(output_channels).astype(np.float32) - 0.5
Output = collections.namedtuple("Output", ["Y", "engine", "order"])
outputs = []
for order in ["NCHW", "NHWC"]:
engine_list = [""]
if hiputl.run_in_hip(gc, dc):
if order == "NCHW":
engine_list.append("MIOPEN")
else:
if _cudnn_supports(dilation=(dilation > 1), nhwc=(order == "NHWC")):
engine_list.append("CUDNN")
for engine in engine_list:
op = core.CreateOperator(
op_type,
["X", "w", "b"] if use_bias else ["X", "w"],
["Y"],
stride=stride,
kernel=kernel,
dilation=dilation,
pad=pad,
order=order,
engine=engine,
device_option=gc,
exhaustive_search=True,
)
if order == "NCHW":
X_f = utils.NHWC2NCHW(X)
w_f = utils.NHWC2NCHW(w)
else:
X_f = X
w_f = w
self.assertDeviceChecks(
dc, op, [X_f, w_f, b] if use_bias else [X_f, w_f], [0]
)
self.ws.create_blob("X").feed(X_f, device_option=gc)
self.ws.create_blob("w").feed(w_f, device_option=gc)
self.ws.create_blob("b").feed(b, device_option=gc)
self.ws.run(op)
outputs.append(
Output(Y=self.ws.blobs["Y"].fetch(), engine=engine, order=order)
)
def canonical(o):
if o.order == "NHWC":
return utils.NHWC2NCHW(o.Y)
else:
return o.Y
for o in outputs:
np.testing.assert_allclose(
canonical(outputs[0]), canonical(o), atol=1e-4, rtol=1e-4
)
@given(
num_workers=st.integers(1, 4),
net_type=st.sampled_from(
["simple", "dag"]
+ (
["async_dag"]
if workspace.has_gpu_support
else []
)
),
engine=st.sampled_from(["CUDNN", ""]),
**hu.gcs_no_hip
)
@settings(deadline=None)
def test_convolution_sync(self, net_type, num_workers, engine, gc, dc):
m = ModelHelper(name="test_model")
n = 1
d = 2
depth = 3
iters = 5
h = 5
w = 5
workspace.ResetWorkspace()
use_cudnn = engine == "CUDNN"
np.random.seed(1701)
# Build a binary tree of conv layers, summing at each node.
for i in reversed(range(depth)):
for j in range(2 ** i):
bottom_1 = "{}_{}".format(i + 1, 2 * j)
bottom_2 = "{}_{}".format(i + 1, 2 * j + 1)
mid_1 = "{}_{}_m".format(i + 1, 2 * j)
mid_2 = "{}_{}_m".format(i + 1, 2 * j + 1)
top = "{}_{}".format(i, j)
w1, b1, w2, b2 = np.random.randn(4).tolist()
brew.conv(
m,
bottom_1,
mid_1,
dim_in=d,
dim_out=d,
kernel=3,
weight_init=("ConstantFill", {"value": w1}),
bias_init=("ConstantFill", {"value": b1}),
cudnn_state=np.random.randint(0, 3),
stride=1,
pad=1,
deterministic=1,
use_cudnn=use_cudnn,
engine=engine,
)
brew.conv(
m,
bottom_2,
mid_2,
dim_in=d,
dim_out=d,
kernel=3,
stride=1,
pad=1,
weight_init=("ConstantFill", {"value": w2}),
bias_init=("ConstantFill", {"value": b2}),
deterministic=1,
cudnn_state=np.random.randint(0, 3),
use_cudnn=use_cudnn,
engine=engine,
)
m.net.Sum([mid_1, mid_2], top)
m.net.Flatten(["0_0"], ["0_0_flat"])
m.net.SquaredL2Distance(["0_0_flat", "label"], "xent")
m.net.AveragedLoss("xent", "loss")
input_to_grad = m.AddGradientOperators(["loss"])
m.Proto().device_option.CopyFrom(gc)
m.param_init_net.Proto().device_option.CopyFrom(gc)
m.Proto().type = net_type
m.Proto().num_workers = num_workers
self.ws.run(m.param_init_net)
def run():
import numpy as np
np.random.seed(1701)
input_blobs = ["{}_{}".format(depth, j) for j in range(2 ** depth)]
for input_blob in input_blobs:
self.ws.create_blob(input_blob).feed(
np.random.randn(n, d, h, w).astype(np.float32), device_option=gc
)
self.ws.create_blob("label").feed(
np.random.randn(n, d * h * w).astype(np.float32), device_option=gc
)
self.ws.run(m.net)
gradients = [
self.ws.blobs[str(input_to_grad[input_blob])].fetch()
for input_blob in input_blobs
]
return gradients
outputs = [run() for _ in range(iters)]
for output in outputs[1:]:
np.testing.assert_array_equal(outputs[0], output)
np.testing.assert_allclose(
np.sum(np.square(output)), 1763719461732352.0, rtol=1e-5
)
def test_use_cudnn_engine_interactions(self):
"""Make sure the use_cudnn and engine kwargs work as expected."""
for model_default in [None, True, False]:
arg_scope = {}
if model_default is not None:
arg_scope["use_cudnn"] = model_default
else:
model_default = True # the default
model = ModelHelper(arg_scope=arg_scope)
self.assertEqual(model.arg_scope["use_cudnn"], model_default)
f = functools.partial(brew.conv, model, "conv_in", "conv_out", 10, 10, 5)
for op_cudnn in [None, True, False]:
for op_engine in [None, "", "CUDNN"]:
kwargs = {}
if op_cudnn is not None:
kwargs["use_cudnn"] = op_cudnn
else:
op_cudnn = False # the default
if op_engine is not None:
kwargs["engine"] = op_engine
calculated_cudnn = kwargs.get("use_cudnn", model_default)
expected_engine = kwargs.get(
"engine", "CUDNN" if calculated_cudnn else ""
)
if (calculated_cudnn is False and op_engine == "CUDNN") or (
calculated_cudnn is True and op_engine == ""
):
with self.assertRaises(ValueError):
f(**kwargs)
else:
f(**kwargs)
self.assertEqual(model.Proto().op[-1].engine, expected_engine)
@given(
op_type=st.sampled_from(["Conv", "Conv2D"]),
N=st.integers(0, 3),
G=st.integers(1, 3),
DX=st.integers(1, 3),
DY=st.integers(1, 3),
H=st.integers(1, 3),
W=st.integers(1, 3),
use_bias=st.booleans(),
order=st.sampled_from(["NCHW", "NHWC"]),
force_algo_fwd=_cudnn_convolution_algo_count("fwd"),
force_algo_dgrad=_cudnn_convolution_algo_count("dgrad"),
force_algo_wgrad=_cudnn_convolution_algo_count("wgrad"),
**hu.gcs
)
@settings(deadline=10000)
def test_1x1_conv(
self,
op_type,
N,
G,
DX,
DY,
H,
W,
use_bias,
order,
force_algo_fwd,
force_algo_dgrad,
force_algo_wgrad,
gc,
dc,
):
if hiputl.run_in_hip(gc, dc):
assume(order == "NCHW")
if order == "NHWC":
G = 1
C = G * DX
M = G * DY
op = core.CreateOperator(
op_type,
["X", "filter", "bias"] if use_bias else ["X", "filter"],
["Y"],
stride_h=1,
stride_w=1,
pad_t=0,
pad_l=0,
pad_b=0,
pad_r=0,
kernel=1,
order=order,
group=G,
force_algo_fwd=force_algo_fwd,
force_algo_dgrad=force_algo_dgrad,
force_algo_wgrad=force_algo_wgrad,
)
if order == "NCHW":
X = np.random.randn(N, C, H, W).astype(np.float32)
filter = np.random.randn(M, DX, 1, 1).astype(np.float32)
else:
X = np.random.randn(N, H, W, C).astype(np.float32)
filter = np.random.randn(M, 1, 1, DX).astype(np.float32)
bias = np.random.randn(M).astype(np.float32)
inputs = [X, filter, bias] if use_bias else [X, filter]
def conv_1x1_nchw_ref(X, filter, bias=None):
if N == 0:
Y = np.zeros(shape=(N, M, H, W), dtype=np.float32)
return [Y]
X = X.reshape(N, G, DX, -1)
filter = filter.reshape(G, DY, DX)
Y = np.zeros(shape=(N, G, DY, H * W), dtype=np.float32)
for i in range(N):
for j in range(G):
Y[i, j, :, :] = np.dot(filter[j, :, :], X[i, j, :, :])
Y = Y.reshape(N, M, H, W)
if bias is not None:
bias = bias.reshape(1, M, 1, 1)
Y = np.add(Y, bias)
return [Y]
def conv_1x1_nhwc_ref(X, filter, bias=None):
if N == 0:
Y = np.zeros(shape=(N, H, W, M), dtype=np.float32)
return [Y]
X = X.reshape(N, -1, G, DX)
filter = filter.reshape(G, DY, DX)
Y = np.zeros(shape=(N, H * W, G, DY), dtype=np.float32)
for i in range(N):
for j in range(G):
Y[i, :, j, :] = np.dot(X[i, :, j, :], filter[j, :, :].transpose())
Y = Y.reshape(N, H, W, M)
if bias is not None:
bias = bias.reshape(1, 1, 1, M)
Y = np.add(Y, bias)
return [Y]
if order == "NCHW":
conv_1x1_ref = conv_1x1_nchw_ref
else:
conv_1x1_ref = conv_1x1_nhwc_ref
self.assertReferenceChecks(
device_option=gc, op=op, inputs=inputs, reference=conv_1x1_ref
)
self.assertDeviceChecks(dc, op, inputs, [0])
for i in range(len(inputs)):
self.assertGradientChecks(gc, op, inputs, i, [0])
if __name__ == "__main__":
unittest.main()