r"""Importing this file includes common utility methods for checking quantized tensors and modules. """ from __future__ import absolute_import, division, print_function, unicode_literals import numpy as np import torch from contextlib import contextmanager """Computes the output shape given convolution parameters.""" def _conv_output_shape(input_size, kernel_size, padding, stride, dilation, output_padding=0): return np.floor((input_size + 2 * padding - kernel_size - (kernel_size - 1) * (dilation - 1)) / stride) + 2 * output_padding + 1 # Quantization references def _quantize(x, scale, zero_point, qmin=None, qmax=None, dtype=np.uint8): """Quantizes a numpy array.""" if qmin is None: qmin = np.iinfo(dtype).min if qmax is None: qmax = np.iinfo(dtype).max qx = np.round(x / scale + zero_point).astype(np.int64) qx = np.clip(qx, qmin, qmax) qx = qx.astype(dtype) return qx def _dequantize(qx, scale, zero_point): """Dequantizes a numpy array.""" x = (qx.astype(np.float) - zero_point) * scale return x def _requantize(x, multiplier, zero_point, qmin=0, qmax=255, qtype=np.uint8): """Requantizes a numpy array, i.e., intermediate int32 or int16 values are converted back to given type""" qx = (x * multiplier).round() + zero_point qx = np.clip(qx, qmin, qmax).astype(qtype) return qx def _calculate_dynamic_qparams(X, dtype): """Calculate the dynamic quantization parameters (scale, zero_point) according to the min and max element of the tensor""" if isinstance(X, torch.Tensor): X = X.numpy() if dtype == torch.qint8: qmin, qmax = -128, 127 else: # dtype == torch.quint8 qmin, qmax = 0, 255 n_levels = 255.0 min_val = X.min() max_val = X.max() if min_val == max_val: scale = 1.0 zero_point = 0 else: max_val = max(max_val, 0.0) min_val = min(min_val, 0.0) scale = (max_val - min_val) / n_levels scale = max(scale, np.finfo(np.float32).eps) zero_point = qmin - round(min_val / scale) zero_point = max(qmin, zero_point) zero_point = min(qmax, zero_point) return [float(scale), int(zero_point)] def _calculate_dynamic_per_channel_qparams(X, dtype): """Calculate the dynamic quantization parameters (scale, zero_point) according to the min and max element of the tensor""" if isinstance(X, torch.Tensor): X = X.numpy() qmin, qmax = torch.iinfo(dtype).min, torch.iinfo(dtype).max n_levels = qmax - qmin scale = np.zeros(X.shape[0], dtype=np.float64) zero_point = np.zeros(X.shape[0], dtype=np.int64) for i in range(zero_point.shape[0]): min_val = X.min() max_val = X.max() if min_val == max_val: scale[i] = 1.0 zero_point[i] = 0 else: max_val = max(max_val, 0.0) min_val = min(min_val, 0.0) scale[i] = (max_val - min_val) / n_levels scale[i] = max(scale[i], np.finfo(np.float32).eps) zero_point[i] = qmin - round(min_val / scale[i]) zero_point[i] = max(qmin, zero_point[i]) zero_point[i] = min(qmax, zero_point[i]) return scale, zero_point @contextmanager def override_quantized_engine(qengine): previous = torch.backends.quantized.engine torch.backends.quantized.engine = qengine try: yield finally: torch.backends.quantized.engine = previous