mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19118 A bug introduced by D14700576 reported by Yufei (fixed by D14778810 and D14785256) was not detected by our units tests. This diff improves unit tests to catch such errors (with this diff and without D14778810, we can reproduce the bug Yufei reported). This improvement also revealed a bug that affects the accuracy when we pre-pack weight and bias together and the pre-packed weight/bias are used by multiple nets. We were modifying the pre-packed bias in-place which was supposed to be constants. Reviewed By: csummersea Differential Revision: D14806077 fbshipit-source-id: aa9049c74b6ea98d21fbd097de306447a662a46d
520 lines
17 KiB
Python
520 lines
17 KiB
Python
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
import collections
|
|
|
|
import caffe2.python.hypothesis_test_util as hu
|
|
import hypothesis.strategies as st
|
|
from caffe2.python import core, dyndep, utils, workspace
|
|
from caffe2.quantization.server import utils as dnnlowp_utils
|
|
from dnnlowp_test_utils import (
|
|
check_quantized_results_close,
|
|
generate_conv_inputs,
|
|
generate_convnd_inputs,
|
|
run_conv_or_fc,
|
|
)
|
|
from hypothesis import assume, given
|
|
|
|
|
|
dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops")
|
|
workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"])
|
|
|
|
|
|
class DNNLowPOpConvTest(hu.HypothesisTestCase):
|
|
# correctness test with no quantization error in inputs
|
|
@given(
|
|
stride=st.integers(1, 2),
|
|
pad=st.integers(0, 2),
|
|
kernel=st.integers(1, 5),
|
|
dilation=st.integers(1, 2),
|
|
size=st.integers(10, 16),
|
|
group=st.integers(1, 4),
|
|
input_channels_per_group=st.sampled_from([2, 3, 4, 5, 8, 16, 32]),
|
|
output_channels_per_group=st.integers(2, 16),
|
|
batch_size=st.integers(1, 3),
|
|
order=st.sampled_from(["NCHW", "NHWC"]),
|
|
weight_quantized=st.booleans(),
|
|
prepack_weight=st.booleans(),
|
|
share_col_buffer=st.booleans(),
|
|
preserve_activation_sparsity=st.booleans(),
|
|
preserve_weight_sparsity=st.booleans(),
|
|
**hu.gcs_cpu_only
|
|
)
|
|
def test_dnnlowp_conv_int(
|
|
self,
|
|
stride,
|
|
pad,
|
|
kernel,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
weight_quantized,
|
|
prepack_weight,
|
|
share_col_buffer,
|
|
preserve_activation_sparsity,
|
|
preserve_weight_sparsity,
|
|
gc,
|
|
dc,
|
|
):
|
|
assume(group == 1 or dilation == 1)
|
|
assume((not prepack_weight) or order == "NHWC")
|
|
|
|
X, W, b = generate_conv_inputs(
|
|
stride,
|
|
pad,
|
|
kernel,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
preserve_activation_sparsity=preserve_activation_sparsity,
|
|
preserve_weight_sparsity=preserve_weight_sparsity,
|
|
)
|
|
|
|
Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"])
|
|
outputs = []
|
|
|
|
op_engine_list = [
|
|
("Conv", ""),
|
|
("Conv", "DNNLOWP"),
|
|
("Conv", "DNNLOWP_16"),
|
|
("Int8Conv", "DNNLOWP"),
|
|
]
|
|
|
|
for op_type, engine in op_engine_list:
|
|
init_net = core.Net("test_init_net")
|
|
net = core.Net("test_net")
|
|
|
|
do_quantize = "DNNLOWP" in engine
|
|
do_dequantize = "DNNLOWP" in engine
|
|
# If output scale/zp aren't set, it gets computed from ref fp32 op
|
|
# in DNNLOWP, which isn't possible when we quantize input weights.
|
|
# Make sure atleast one output is collected to compute output
|
|
# scale/zp.
|
|
do_quantize_weight = (
|
|
engine == "DNNLOWP" and weight_quantized and len(outputs) > 0
|
|
)
|
|
do_prepack_weight = engine == "DNNLOWP" and prepack_weight
|
|
|
|
if do_quantize:
|
|
quantize = core.CreateOperator(
|
|
"Quantize",
|
|
["X"],
|
|
["X_q"],
|
|
preserve_activation_sparsity=preserve_activation_sparsity,
|
|
engine=engine,
|
|
device_option=gc,
|
|
)
|
|
net.Proto().op.extend([quantize])
|
|
|
|
x_q_param = dnnlowp_utils.choose_quantization_params(
|
|
X.min(), X.max(), preserve_activation_sparsity
|
|
)
|
|
if do_quantize_weight:
|
|
int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
|
|
W, "W_q", preserve_weight_sparsity
|
|
)
|
|
init_net.Proto().op.extend([int8_given_tensor_fill])
|
|
|
|
# Bias
|
|
int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
|
|
b, "b_q", x_q_param, w_q_param
|
|
)
|
|
init_net.Proto().op.extend([int8_bias_tensor_fill])
|
|
|
|
if do_prepack_weight:
|
|
inputs = ["W_q" if do_quantize_weight else "W"]
|
|
if do_dequantize:
|
|
inputs += ["b_q" if do_quantize_weight else "b"]
|
|
pack = core.CreateOperator(
|
|
"Int8ConvPackWeight",
|
|
inputs,
|
|
["W_packed"],
|
|
group=group,
|
|
preserve_weight_sparsity=preserve_weight_sparsity,
|
|
in_scale=x_q_param.scale,
|
|
engine=engine,
|
|
)
|
|
init_net.Proto().op.extend([pack])
|
|
|
|
conv = core.CreateOperator(
|
|
op_type,
|
|
[
|
|
"X_q" if do_quantize else "X",
|
|
"W_packed"
|
|
if do_prepack_weight
|
|
else ("W_q" if do_quantize_weight else "W"),
|
|
"b_q" if do_quantize_weight else "b",
|
|
],
|
|
["Y_q" if do_dequantize else "Y"],
|
|
stride=stride,
|
|
kernel=kernel,
|
|
dilation=dilation,
|
|
pad=pad,
|
|
order=order,
|
|
shared_buffer=(1 if share_col_buffer else 0),
|
|
preserve_activation_sparsity=preserve_activation_sparsity,
|
|
preserve_weight_sparsity=preserve_weight_sparsity,
|
|
engine=engine,
|
|
group=group,
|
|
device_option=gc,
|
|
)
|
|
if do_quantize_weight or do_prepack_weight:
|
|
# When quantized weight is provided, we can't rescale the
|
|
# output dynamically by looking at the range of output of each
|
|
# batch, so here we provide the range of output observed from
|
|
# fp32 reference implementation
|
|
dnnlowp_utils.add_quantization_param_args(
|
|
conv, outputs[0][0], preserve_activation_sparsity
|
|
)
|
|
net.Proto().op.extend([conv])
|
|
|
|
if do_dequantize:
|
|
dequantize = core.CreateOperator(
|
|
"Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc
|
|
)
|
|
net.Proto().op.extend([dequantize])
|
|
|
|
run_conv_or_fc(
|
|
self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
|
|
)
|
|
|
|
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
|
|
|
|
# correctness test with no quantization error in inputs
|
|
@given(
|
|
stride=st.integers(1, 2),
|
|
pad=st.integers(0, 2),
|
|
kernel=st.integers(1, 5),
|
|
dilation=st.integers(1, 2),
|
|
size=st.integers(10, 16),
|
|
group=st.integers(1, 4),
|
|
input_channels_per_group=st.sampled_from([2, 3, 4, 5, 8, 16, 32]),
|
|
output_channels_per_group=st.integers(2, 16),
|
|
batch_size=st.integers(1, 3),
|
|
order=st.sampled_from(["NCHW", "NHWC"]),
|
|
share_col_buffer=st.booleans(),
|
|
**hu.gcs_cpu_only
|
|
)
|
|
def test_dnnlowp_conv_relu_int(
|
|
self,
|
|
stride,
|
|
pad,
|
|
kernel,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
share_col_buffer,
|
|
gc,
|
|
dc,
|
|
):
|
|
assume(group == 1 or dilation == 1)
|
|
|
|
X, W, b = generate_conv_inputs(
|
|
stride,
|
|
pad,
|
|
kernel,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
)
|
|
|
|
Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"])
|
|
outputs = []
|
|
|
|
op_engine_list = [
|
|
("Conv", ""),
|
|
("ConvRelu", "DNNLOWP"),
|
|
("ConvRelu", "DNNLOWP_16"),
|
|
("Int8ConvRelu", "DNNLOWP"),
|
|
]
|
|
|
|
for op_type, engine in op_engine_list:
|
|
net = core.Net("test_net")
|
|
|
|
if "DNNLOWP" in engine:
|
|
quantize = core.CreateOperator(
|
|
"Quantize", ["X"], ["X_q"], engine=engine, device_option=gc
|
|
)
|
|
net.Proto().op.extend([quantize])
|
|
|
|
conv = core.CreateOperator(
|
|
op_type,
|
|
["X_q", "W", "b"],
|
|
["Y_q"],
|
|
stride=stride,
|
|
kernel=kernel,
|
|
dilation=dilation,
|
|
pad=pad,
|
|
order=order,
|
|
engine=engine,
|
|
shared_buffer=(1 if share_col_buffer else 0),
|
|
group=group,
|
|
device_option=gc,
|
|
)
|
|
net.Proto().op.extend([conv])
|
|
|
|
dequantize = core.CreateOperator(
|
|
"Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc
|
|
)
|
|
net.Proto().op.extend([dequantize])
|
|
else:
|
|
conv = core.CreateOperator(
|
|
op_type,
|
|
["X", "W", "b"],
|
|
["Y"],
|
|
stride=stride,
|
|
kernel=kernel,
|
|
dilation=dilation,
|
|
pad=pad,
|
|
order=order,
|
|
shared_buffer=(1 if share_col_buffer else 0),
|
|
engine=engine,
|
|
group=group,
|
|
device_option=gc,
|
|
)
|
|
net.Proto().op.extend([conv])
|
|
|
|
relu = core.CreateOperator(
|
|
"Relu", ["Y"], ["Y"], engine=engine, device_option=gc
|
|
)
|
|
net.Proto().op.extend([relu])
|
|
|
|
run_conv_or_fc(
|
|
self, None, net, X, W, b, op_type, engine, order, gc, outputs
|
|
)
|
|
|
|
check_quantized_results_close(outputs)
|
|
|
|
def _test_dnnlowp_nd_int(
|
|
self,
|
|
stride,
|
|
pad,
|
|
kernels,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
prepack_weight,
|
|
gc,
|
|
dc,
|
|
):
|
|
assume(group == 1 or dilation == 1)
|
|
assume((not prepack_weight) or order == "NHWC")
|
|
ndim = len(kernels)
|
|
|
|
X, W, b = generate_convnd_inputs(
|
|
(stride,) * ndim,
|
|
(pad,) * ndim,
|
|
kernels,
|
|
(dilation,) * ndim,
|
|
(size,) * ndim,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
)
|
|
|
|
Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"])
|
|
outputs = []
|
|
|
|
op_engine_list = [("Conv", ""), ("Conv", "DNNLOWP_16"), ("Int8Conv", "DNNLOWP")]
|
|
|
|
for op_type, engine in op_engine_list:
|
|
init_net = core.Net("test_init_net")
|
|
net = core.Net("test_net")
|
|
|
|
do_quantize = "DNNLOWP" in engine
|
|
do_dequantize = "DNNLOWP" in engine
|
|
# If output scale/zp aren't set, it gets computed from ref fp32 op
|
|
# in DNNLOWP, which isn't possible when we quantize input weights.
|
|
# Make sure atleast one output is collected to compute output
|
|
# scale/zp.
|
|
do_quantize_weight = engine == "DNNLOWP" and len(outputs) > 0
|
|
do_prepack_weight = engine == "DNNLOWP" and prepack_weight
|
|
|
|
if do_quantize:
|
|
quantize = core.CreateOperator(
|
|
"Quantize", ["X"], ["X_q"], engine=engine, device_option=gc
|
|
)
|
|
net.Proto().op.extend([quantize])
|
|
|
|
x_q_param = dnnlowp_utils.choose_quantization_params(X.min(), X.max())
|
|
if do_quantize_weight:
|
|
int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
|
|
W, "W_q"
|
|
)
|
|
init_net.Proto().op.extend([int8_given_tensor_fill])
|
|
|
|
# Bias
|
|
int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
|
|
b, "b_q", x_q_param, w_q_param
|
|
)
|
|
init_net.Proto().op.extend([int8_bias_tensor_fill])
|
|
|
|
if do_prepack_weight:
|
|
inputs = ["W_q" if do_quantize_weight else "W"]
|
|
if do_dequantize:
|
|
inputs += ["b_q" if do_quantize_weight else "b"]
|
|
pack = core.CreateOperator(
|
|
"Int8ConvPackWeight",
|
|
inputs,
|
|
["W_packed"],
|
|
group=group,
|
|
in_scale=x_q_param.scale,
|
|
engine=engine,
|
|
)
|
|
init_net.Proto().op.extend([pack])
|
|
|
|
conv = core.CreateOperator(
|
|
op_type,
|
|
[
|
|
"X_q" if do_quantize else "X",
|
|
"W_packed"
|
|
if do_prepack_weight
|
|
else ("W_q" if do_quantize_weight else "W"),
|
|
"b_q" if do_quantize_weight else "b",
|
|
],
|
|
["Y_q" if do_dequantize else "Y"],
|
|
strides=[stride] * ndim,
|
|
kernels=kernels,
|
|
dilations=[dilation] * ndim,
|
|
pads=[pad] * (ndim * 2),
|
|
order=order,
|
|
dequantize_output=not do_dequantize,
|
|
engine=engine,
|
|
group=group,
|
|
device_option=gc,
|
|
)
|
|
if do_quantize_weight or do_prepack_weight:
|
|
# When quantized weight is provided, we can't rescale the
|
|
# output dynamically by looking at the range of output of each
|
|
# batch, so here we provide the range of output observed from
|
|
# fp32 reference implementation
|
|
dnnlowp_utils.add_quantization_param_args(conv, outputs[0][0])
|
|
net.Proto().op.extend([conv])
|
|
|
|
if do_dequantize:
|
|
dequantize = core.CreateOperator(
|
|
"Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc
|
|
)
|
|
net.Proto().op.extend([dequantize])
|
|
|
|
run_conv_or_fc(
|
|
self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
|
|
)
|
|
|
|
check_quantized_results_close(outputs)
|
|
|
|
@given(
|
|
stride=st.integers(1, 2),
|
|
pad=st.integers(0, 2),
|
|
temporal_kernels=st.sampled_from([1, 5]),
|
|
spatial_kernels=st.sampled_from([1, 3]),
|
|
dilation=st.integers(1, 1),
|
|
size=st.sampled_from([5, 8]),
|
|
group=st.integers(1, 2),
|
|
input_channels_per_group=st.sampled_from([2, 3]),
|
|
output_channels_per_group=st.sampled_from([2, 3]),
|
|
batch_size=st.integers(1, 2),
|
|
order=st.sampled_from(["NCHW", "NHWC"]),
|
|
prepack_weight=st.booleans(),
|
|
**hu.gcs_cpu_only
|
|
)
|
|
def test_dnnlowp_conv3d_int(
|
|
self,
|
|
stride,
|
|
pad,
|
|
temporal_kernels,
|
|
spatial_kernels,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
prepack_weight,
|
|
gc,
|
|
dc,
|
|
):
|
|
self._test_dnnlowp_nd_int(
|
|
stride,
|
|
pad,
|
|
(temporal_kernels,) + (spatial_kernels,) * 2,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
prepack_weight,
|
|
gc,
|
|
dc,
|
|
)
|
|
|
|
@given(
|
|
stride=st.integers(1, 2),
|
|
pad=st.integers(0, 2),
|
|
kernels=st.sampled_from([1, 3]),
|
|
dilation=st.integers(1, 1),
|
|
size=st.sampled_from([5, 8]),
|
|
group=st.integers(1, 2),
|
|
input_channels_per_group=st.sampled_from([2, 3]),
|
|
output_channels_per_group=st.sampled_from([2, 3]),
|
|
batch_size=st.integers(1, 2),
|
|
order=st.sampled_from(["NCHW", "NHWC"]),
|
|
prepack_weight=st.booleans(),
|
|
**hu.gcs_cpu_only
|
|
)
|
|
def test_dnnlowp_conv1d_int(
|
|
self,
|
|
stride,
|
|
pad,
|
|
kernels,
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
prepack_weight,
|
|
gc,
|
|
dc,
|
|
):
|
|
self._test_dnnlowp_nd_int(
|
|
stride,
|
|
pad,
|
|
(kernels,),
|
|
dilation,
|
|
size,
|
|
group,
|
|
input_channels_per_group,
|
|
output_channels_per_group,
|
|
batch_size,
|
|
order,
|
|
prepack_weight,
|
|
gc,
|
|
dc,
|
|
)
|