#!/usr/bin/env python3 """ MTCNN Face detection plugin """ from __future__ import absolute_import, division, print_function import os from six import string_types, iteritems import cv2 import numpy as np from lib.multithreading import MultiThread from ._base import Detector, dlib, logger # Must import tensorflow inside the spawned process # for Windows machines tf = None # pylint: disable = invalid-name def import_tensorflow(): """ Import tensorflow from inside spawned process """ global tf # pylint: disable = invalid-name,global-statement import tensorflow as tflow tf = tflow class Detect(Detector): """ MTCNN detector for face recognition """ def __init__(self, **kwargs): super().__init__(**kwargs) self.kwargs = self.validate_kwargs() self.name = "mtcnn" self.target = 2073600 # Uses approx 1.30 GB of VRAM self.vram = 1408 def validate_kwargs(self): """ Validate that config options are correct. If not reset to default """ valid = True threshold = [self.config["threshold_1"], self.config["threshold_2"], self.config["threshold_3"]] kwargs = {"minsize": self.config["minsize"], "threshold": threshold, "factor": self.config["scalefactor"]} if kwargs["minsize"] < 10: valid = False elif not all(0.0 < threshold <= 1.0 for threshold in kwargs['threshold']): valid = False elif not 0.0 < kwargs['factor'] < 1.0: valid = False if not valid: kwargs = {"minsize": 20, # minimum size of face "threshold": [0.6, 0.7, 0.7], # three steps threshold "factor": 0.709} # scale factor logger.warning("Invalid MTCNN options in config. Running with defaults") logger.debug("Using mtcnn kwargs: %s", kwargs) return kwargs def set_model_path(self): """ Load the mtcnn models """ for model in ("det1.npy", "det2.npy", "det3.npy"): model_path = os.path.join(self.cachepath, model) if not os.path.exists(model_path): raise Exception("Error: Unable to find {}, reinstall " "the lib!".format(model_path)) logger.debug("Loading model: '%s'", model_path) return self.cachepath def initialize(self, *args, **kwargs): """ Create the mtcnn detector """ super().initialize(*args, **kwargs) logger.info("Initializing MTCNN Detector...") is_gpu = False # Must import tensorflow inside the spawned process # for Windows machines import_tensorflow() vram_free = self.get_vram_free() mtcnn_graph = tf.Graph() # Windows machines sometimes misreport available vram, and overuse # causing OOM. Allow growth fixes that config = tf.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=no-member with mtcnn_graph.as_default(): # pylint: disable=not-context-manager sess = tf.Session(config=config) with sess.as_default(): # pylint: disable=not-context-manager pnet, rnet, onet = create_mtcnn(sess, self.model_path) if any("gpu" in str(device).lower() for device in sess.list_devices()): logger.debug("Using GPU") is_gpu = True mtcnn_graph.finalize() if not is_gpu: alloc = 2048 logger.warning("Using CPU") else: alloc = vram_free logger.debug("Allocated for Tensorflow: %sMB", alloc) self.batch_size = int(alloc / self.vram) if self.batch_size < 1: raise ValueError("Insufficient VRAM available to continue " "({}MB)".format(int(alloc))) logger.verbose("Processing in %s threads", self.batch_size) self.kwargs["pnet"] = pnet self.kwargs["rnet"] = rnet self.kwargs["onet"] = onet self.init.set() logger.info("Initialized MTCNN Detector.") def detect_faces(self, *args, **kwargs): """ Detect faces in Multiple Threads """ super().detect_faces(*args, **kwargs) workers = MultiThread(target=self.detect_thread, thread_count=self.batch_size) workers.start() workers.join() sentinel = self.queues["in"].get() self.queues["out"].put(sentinel) logger.debug("Detecting Faces complete") def detect_thread(self): """ Detect faces in rgb image """ logger.debug("Launching Detect") while True: item = self.get_item() if item == "EOF": break logger.trace("Detecting faces: '%s'", item["filename"]) [detect_image, scale] = self.compile_detection_image(item["image"], False, False) for angle in self.rotation: current_image, rotmat = self.rotate_image(detect_image, angle) faces, points = detect_face(current_image, **self.kwargs) if angle != 0 and faces.any(): logger.verbose("found face(s) by rotating image %s degrees", angle) if faces.any(): break detected_faces = self.process_output(faces, points, rotmat, scale) item["detected_faces"] = detected_faces self.finalize(item) logger.debug("Thread Completed Detect") def process_output(self, faces, points, rotation_matrix, scale): """ Compile found faces for output """ logger.trace("Processing Output: (faces: %s, points: %s, rotation_matrix: %s)", faces, points, rotation_matrix) faces = self.recalculate_bounding_box(faces, points) faces = [dlib.rectangle( # pylint: disable=c-extension-no-member int(face[0]), int(face[1]), int(face[2]), int(face[3])) for face in faces] if isinstance(rotation_matrix, np.ndarray): faces = [self.rotate_rect(face, rotation_matrix) for face in faces] detected = [dlib.rectangle( # pylint: disable=c-extension-no-member int(face.left() / scale), int(face.top() / scale), int(face.right() / scale), int(face.bottom() / scale)) for face in faces] logger.trace("Processed Output: %s", detected) return detected @staticmethod def recalculate_bounding_box(faces, landmarks): """ Recalculate the bounding box for Face Alignment. Face Alignment was built to expect a DLIB bounding box and calculates center and scale based on that. Resize the bounding box around features to present a better box to Face Alignment. Helps its chances on edge cases and helps remove 'jitter' """ logger.trace("Recalculating Bounding Boxes: (faces: %s, landmarks: %s)", faces, landmarks) retval = list() no_faces = len(faces) if no_faces == 0: return retval face_landmarks = np.hsplit(landmarks, no_faces) for idx in range(no_faces): pts = np.reshape(face_landmarks[idx], (5, 2), order="F") nose = pts[2] minmax = (np.amin(pts, axis=0), np.amax(pts, axis=0)) padding = [(minmax[1][0] - minmax[0][0]) / 2, (minmax[1][1] - minmax[0][1]) / 2] center = (minmax[1][0] - padding[0], minmax[1][1] - padding[1]) offset = (center[0] - nose[0], nose[1] - center[1]) center = (center[0] + offset[0], center[1] + offset[1]) padding[0] += padding[0] padding[1] += padding[1] bounding = [center[0] - padding[0], center[1] - padding[1], center[0] + padding[0], center[1] + padding[1]] retval.append(bounding) logger.trace("Recalculated Bounding Boxes: %s", retval) return retval # MTCNN Detector for face alignment # Code adapted from: https://github.com/davidsandberg/facenet # Tensorflow implementation of the face detection / alignment algorithm # found at # https://github.com/kpzhang93/MTCNN_face_detection_alignment # MIT License # # Copyright (c) 2016 David Sandberg # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. def layer(operator): """Decorator for composable network layers.""" def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. name = kwargs.setdefault('name', self.get_unique_name(operator.__name__)) # Figure out the layer inputs. if len(self.terminals) == 0: # pylint: disable=len-as-condition raise RuntimeError('No input variables found for layer %s.' % name) elif len(self.terminals) == 1: layer_input = self.terminals[0] else: layer_input = list(self.terminals) # Perform the operation and get the output. layer_output = operator(self, layer_input, *args, **kwargs) # Add to layer LUT. self.layers[name] = layer_output # This output is now the input for the next layer. self.feed(layer_output) # Return self for chained calls. return self return layer_decorated class Network(): """ Tensorflow Network """ def __init__(self, inputs, trainable=True): # The input nodes for this network self.inputs = inputs # The current list of terminal nodes self.terminals = [] # Mapping from layer names to layers self.layers = dict(inputs) # If true, the resulting variables are set as trainable self.trainable = trainable self.setup() def setup(self): """Construct the network. """ raise NotImplementedError('Must be implemented by the subclass.') @staticmethod def load(model_path, session, ignore_missing=False): """Load network weights. model_path: The path to the numpy-serialized network weights session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. """ # pylint: disable=no-member data_dict = np.load(model_path, encoding='latin1').item() for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): for param_name, data in iteritems(data_dict[op_name]): try: var = tf.get_variable(param_name) session.run(var.assign(data)) except ValueError: if not ignore_missing: raise def feed(self, *args): """Set the input(s) for the next operation by replacing the terminal nodes. The arguments can be either layer names or the actual layers. """ assert len(args) != 0 # pylint: disable=len-as-condition self.terminals = [] for fed_layer in args: if isinstance(fed_layer, string_types): try: fed_layer = self.layers[fed_layer] except KeyError: raise KeyError('Unknown layer name fed: %s' % fed_layer) self.terminals.append(fed_layer) return self def get_output(self): """Returns the current network output.""" return self.terminals[-1] def get_unique_name(self, prefix): """Returns an index-suffixed unique name for the given prefix. This is used for auto-generating layer names based on the type-prefix. """ ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 return '%s_%d' % (prefix, ident) def make_var(self, name, shape): """Creates a new TensorFlow variable.""" return tf.get_variable(name, shape, trainable=self.trainable) @staticmethod def validate_padding(padding): """Verifies that the padding is one of the supported ones.""" assert padding in ('SAME', 'VALID') @layer def conv(self, # pylint: disable=too-many-arguments inp, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding='SAME', group=1, biased=True): """ Conv Layer """ # pylint: disable=too-many-locals # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = int(inp.get_shape()[-1]) # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) # noqa with tf.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) # This is the common-case. Convolve the input without any # further complications. output = convolve(inp, kernel) # Add the biases if biased: biases = self.make_var('biases', [c_o]) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output @layer def prelu(self, inp, name): """ Prelu Layer """ with tf.variable_scope(name): i = int(inp.get_shape()[-1]) alpha = self.make_var('alpha', shape=(i,)) output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) return output @layer def max_pool(self, inp, k_h, k_w, # pylint: disable=too-many-arguments s_h, s_w, name, padding='SAME'): """ Max Pool Layer """ self.validate_padding(padding) return tf.nn.max_pool(inp, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def fc(self, inp, num_out, name, relu=True): # pylint: disable=invalid-name """ FC Layer """ with tf.variable_scope(name): input_shape = inp.get_shape() if input_shape.ndims == 4: # The input is spatial. Vectorize it first. dim = 1 for this_dim in input_shape[1:].as_list(): dim *= int(this_dim) feed_in = tf.reshape(inp, [-1, dim]) else: feed_in, dim = (inp, input_shape[-1].value) weights = self.make_var('weights', shape=[dim, num_out]) biases = self.make_var('biases', [num_out]) operator = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = operator(feed_in, weights, biases, name=name) # pylint: disable=invalid-name return fc @layer def softmax(self, target, axis, name=None): # pylint: disable=no-self-use """ Multi dimensional softmax, refer to https://github.com/tensorflow/tensorflow/issues/210 compute softmax along the dimension of target the native softmax only supports batch_size x dimension """ max_axis = tf.reduce_max(target, axis, keepdims=True) target_exp = tf.exp(target-max_axis) normalize = tf.reduce_sum(target_exp, axis, keepdims=True) softmax = tf.div(target_exp, normalize, name) return softmax class PNet(Network): """ Tensorflow PNet """ def setup(self): (self.feed('data') # pylint: disable=no-value-for-parameter, no-member .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') .prelu(name='PReLU1') .max_pool(2, 2, 2, 2, name='pool1') .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') .prelu(name='PReLU2') .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') .prelu(name='PReLU3') .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') .softmax(3, name='prob1')) (self.feed('PReLU3') # pylint: disable=no-value-for-parameter .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) class RNet(Network): """ Tensorflow RNet """ def setup(self): (self.feed('data') # pylint: disable=no-value-for-parameter, no-member .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') .prelu(name='prelu1') .max_pool(3, 3, 2, 2, name='pool1') .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') .prelu(name='prelu2') .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') .prelu(name='prelu3') .fc(128, relu=False, name='conv4') .prelu(name='prelu4') .fc(2, relu=False, name='conv5-1') .softmax(1, name='prob1')) (self.feed('prelu4') # pylint: disable=no-value-for-parameter .fc(4, relu=False, name='conv5-2')) class ONet(Network): """ Tensorflow ONet """ def setup(self): (self.feed('data') # pylint: disable=no-value-for-parameter, no-member .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') .prelu(name='prelu1') .max_pool(3, 3, 2, 2, name='pool1') .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') .prelu(name='prelu2') .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') .prelu(name='prelu3') .max_pool(2, 2, 2, 2, name='pool3') .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') .prelu(name='prelu4') .fc(256, relu=False, name='conv5') .prelu(name='prelu5') .fc(2, relu=False, name='conv6-1') .softmax(1, name='prob1')) (self.feed('prelu5') # pylint: disable=no-value-for-parameter .fc(4, relu=False, name='conv6-2')) (self.feed('prelu5') # pylint: disable=no-value-for-parameter .fc(10, relu=False, name='conv6-3')) def create_mtcnn(sess, model_path): """ Create the network """ if not model_path: model_path, _ = os.path.split(os.path.realpath(__file__)) with tf.variable_scope('pnet'): data = tf.placeholder(tf.float32, (None, None, None, 3), 'input') pnet = PNet({'data': data}) pnet.load(os.path.join(model_path, 'det1.npy'), sess) with tf.variable_scope('rnet'): data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input') rnet = RNet({'data': data}) rnet.load(os.path.join(model_path, 'det2.npy'), sess) with tf.variable_scope('onet'): data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input') onet = ONet({'data': data}) onet.load(os.path.join(model_path, 'det3.npy'), sess) pnet_fun = lambda img: sess.run(('pnet/conv4-2/BiasAdd:0', # noqa 'pnet/prob1:0'), feed_dict={'pnet/input:0': img}) rnet_fun = lambda img: sess.run(('rnet/conv5-2/conv5-2:0', # noqa 'rnet/prob1:0'), feed_dict={'rnet/input:0': img}) onet_fun = lambda img: sess.run(('onet/conv6-2/conv6-2:0', # noqa 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0': img}) return pnet_fun, rnet_fun, onet_fun def detect_face(img, minsize, pnet, rnet, # pylint: disable=too-many-arguments onet, threshold, factor): """Detects faces in an image, and returns bounding boxes and points for them. img: input image minsize: minimum faces' size pnet, rnet, onet: caffemodel threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold factor: the factor used to create a scaling pyramid of face sizes to detect in the image. """ # pylint: disable=too-many-locals,too-many-statements,too-many-branches factor_count = 0 total_boxes = np.empty((0, 9)) points = np.empty(0) height = img.shape[0] width = img.shape[1] minl = np.amin([height, width]) var_m = 12.0 / minsize minl = minl * var_m # create scale pyramid scales = [] while minl >= 12: scales += [var_m * np.power(factor, factor_count)] minl = minl * factor factor_count += 1 # # # # # # # # # # # # # # first stage - fast proposal network (pnet) to obtain face candidates # # # # # # # # # # # # # for scale in scales: height_scale = int(np.ceil(height * scale)) width_scale = int(np.ceil(width * scale)) im_data = imresample(img, (height_scale, width_scale)) im_data = (im_data - 127.5) * 0.0078125 img_x = np.expand_dims(im_data, 0) img_y = np.transpose(img_x, (0, 2, 1, 3)) out = pnet(img_y) out0 = np.transpose(out[0], (0, 2, 1, 3)) out1 = np.transpose(out[1], (0, 2, 1, 3)) boxes, _ = generate_bounding_box(out1[0, :, :, 1].copy(), out0[0, :, :, :].copy(), scale, threshold[0]) # inter-scale nms pick = nms(boxes.copy(), 0.5, 'Union') if boxes.size > 0 and pick.size > 0: boxes = boxes[pick, :] total_boxes = np.append(total_boxes, boxes, axis=0) numbox = total_boxes.shape[0] if numbox > 0: pick = nms(total_boxes.copy(), 0.7, 'Union') total_boxes = total_boxes[pick, :] regw = total_boxes[:, 2]-total_boxes[:, 0] regh = total_boxes[:, 3]-total_boxes[:, 1] qq_1 = total_boxes[:, 0]+total_boxes[:, 5] * regw qq_2 = total_boxes[:, 1]+total_boxes[:, 6] * regh qq_3 = total_boxes[:, 2]+total_boxes[:, 7] * regw qq_4 = total_boxes[:, 3]+total_boxes[:, 8] * regh total_boxes = np.transpose(np.vstack([qq_1, qq_2, qq_3, qq_4, total_boxes[:, 4]])) total_boxes = rerec(total_boxes.copy()) total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32) d_y, ed_y, d_x, ed_x, var_y, e_y, var_x, e_x, tmpw, tmph = pad(total_boxes.copy(), width, height) numbox = total_boxes.shape[0] # # # # # # # # # # # # # # second stage - refinement of face candidates with rnet # # # # # # # # # # # # # if numbox > 0: tempimg = np.zeros((24, 24, 3, numbox)) for k in range(0, numbox): tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) tmp[d_y[k] - 1:ed_y[k], d_x[k] - 1:ed_x[k], :] = img[var_y[k] - 1:e_y[k], var_x[k]-1:e_x[k], :] if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: tempimg[:, :, :, k] = imresample(tmp, (24, 24)) else: return np.empty() tempimg = (tempimg-127.5)*0.0078125 tempimg1 = np.transpose(tempimg, (3, 1, 0, 2)) out = rnet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) score = out1[1, :] ipass = np.where(score > threshold[1]) total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)]) m_v = out0[:, ipass[0]] if total_boxes.shape[0] > 0: pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick, :] total_boxes = bbreg(total_boxes.copy(), np.transpose(m_v[:, pick])) total_boxes = rerec(total_boxes.copy()) numbox = total_boxes.shape[0] # # # # # # # # # # # # # # third stage - further refinement and facial landmarks positions with onet # NB: Facial landmarks code commented out for faceswap # # # # # # # # # # # # # if numbox > 0: # third stage total_boxes = np.fix(total_boxes).astype(np.int32) d_y, ed_y, d_x, ed_x, var_y, e_y, var_x, e_x, tmpw, tmph = pad(total_boxes.copy(), width, height) tempimg = np.zeros((48, 48, 3, numbox)) for k in range(0, numbox): tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) tmp[d_y[k] - 1:ed_y[k], d_x[k] - 1:ed_x[k], :] = img[var_y[k] - 1:e_y[k], var_x[k] - 1:e_x[k], :] if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: tempimg[:, :, :, k] = imresample(tmp, (48, 48)) else: return np.empty() tempimg = (tempimg-127.5)*0.0078125 tempimg1 = np.transpose(tempimg, (3, 1, 0, 2)) out = onet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) out2 = np.transpose(out[2]) score = out2[1, :] points = out1 ipass = np.where(score > threshold[2]) points = points[:, ipass[0]] total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)]) m_v = out0[:, ipass[0]] width = total_boxes[:, 2] - total_boxes[:, 0] + 1 height = total_boxes[:, 3] - total_boxes[:, 1] + 1 points[0:5, :] = (np.tile(width, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1) points[5:10, :] = (np.tile(height, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1) if total_boxes.shape[0] > 0: total_boxes = bbreg(total_boxes.copy(), np.transpose(m_v)) pick = nms(total_boxes.copy(), 0.7, 'Min') total_boxes = total_boxes[pick, :] points = points[:, pick] return total_boxes, points # function [boundingbox] = bbreg(boundingbox,reg) def bbreg(boundingbox, reg): """Calibrate bounding boxes""" if reg.shape[1] == 1: reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) width = boundingbox[:, 2] - boundingbox[:, 0] + 1 height = boundingbox[:, 3] - boundingbox[:, 1] + 1 b_1 = boundingbox[:, 0] + reg[:, 0] * width b_2 = boundingbox[:, 1] + reg[:, 1] * height b_3 = boundingbox[:, 2] + reg[:, 2] * width b_4 = boundingbox[:, 3] + reg[:, 3] * height boundingbox[:, 0:4] = np.transpose(np.vstack([b_1, b_2, b_3, b_4])) return boundingbox def generate_bounding_box(imap, reg, scale, threshold): """Use heatmap to generate bounding boxes""" # pylint: disable=too-many-locals stride = 2 cellsize = 12 imap = np.transpose(imap) d_x1 = np.transpose(reg[:, :, 0]) d_y1 = np.transpose(reg[:, :, 1]) d_x2 = np.transpose(reg[:, :, 2]) d_y2 = np.transpose(reg[:, :, 3]) dim_y, dim_x = np.where(imap >= threshold) if dim_y.shape[0] == 1: d_x1 = np.flipud(d_x1) d_y1 = np.flipud(d_y1) d_x2 = np.flipud(d_x2) d_y2 = np.flipud(d_y2) score = imap[(dim_y, dim_x)] reg = np.transpose(np.vstack([d_x1[(dim_y, dim_x)], d_y1[(dim_y, dim_x)], d_x2[(dim_y, dim_x)], d_y2[(dim_y, dim_x)]])) if reg.size == 0: reg = np.empty((0, 3)) bbox = np.transpose(np.vstack([dim_y, dim_x])) q_1 = np.fix((stride * bbox + 1) / scale) q_2 = np.fix((stride * bbox + cellsize - 1 + 1) / scale) boundingbox = np.hstack([q_1, q_2, np.expand_dims(score, 1), reg]) return boundingbox, reg # function pick = nms(boxes,threshold,type) def nms(boxes, threshold, method): """ Non_Max Suppression """ # pylint: disable=too-many-locals if boxes.size == 0: return np.empty((0, 3)) x_1 = boxes[:, 0] y_1 = boxes[:, 1] x_2 = boxes[:, 2] y_2 = boxes[:, 3] var_s = boxes[:, 4] area = (x_2 - x_1 + 1) * (y_2 - y_1 + 1) s_sort = np.argsort(var_s) pick = np.zeros_like(var_s, dtype=np.int16) counter = 0 while s_sort.size > 0: i = s_sort[-1] pick[counter] = i counter += 1 idx = s_sort[0:-1] xx_1 = np.maximum(x_1[i], x_1[idx]) yy_1 = np.maximum(y_1[i], y_1[idx]) xx_2 = np.minimum(x_2[i], x_2[idx]) yy_2 = np.minimum(y_2[i], y_2[idx]) width = np.maximum(0.0, xx_2-xx_1+1) height = np.maximum(0.0, yy_2-yy_1+1) inter = width * height if method == 'Min': var_o = inter / np.minimum(area[i], area[idx]) else: var_o = inter / (area[i] + area[idx] - inter) s_sort = s_sort[np.where(var_o <= threshold)] pick = pick[0:counter] return pick # function [d_y ed_y d_x ed_x y e_y x e_x tmp_width tmp_height] = pad(total_boxes,width,height) def pad(total_boxes, width, height): """Compute the padding coordinates (pad the bounding boxes to square)""" tmp_width = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32) tmp_height = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32) numbox = total_boxes.shape[0] d_x = np.ones((numbox), dtype=np.int32) d_y = np.ones((numbox), dtype=np.int32) ed_x = tmp_width.copy().astype(np.int32) ed_y = tmp_height.copy().astype(np.int32) dim_x = total_boxes[:, 0].copy().astype(np.int32) dim_y = total_boxes[:, 1].copy().astype(np.int32) e_x = total_boxes[:, 2].copy().astype(np.int32) e_y = total_boxes[:, 3].copy().astype(np.int32) tmp = np.where(e_x > width) ed_x.flat[tmp] = np.expand_dims(-e_x[tmp] + width + tmp_width[tmp], 1) e_x[tmp] = width tmp = np.where(e_y > height) ed_y.flat[tmp] = np.expand_dims(-e_y[tmp] + height + tmp_height[tmp], 1) e_y[tmp] = height tmp = np.where(dim_x < 1) d_x.flat[tmp] = np.expand_dims(2 - dim_x[tmp], 1) dim_x[tmp] = 1 tmp = np.where(dim_y < 1) d_y.flat[tmp] = np.expand_dims(2 - dim_y[tmp], 1) dim_y[tmp] = 1 return d_y, ed_y, d_x, ed_x, dim_y, e_y, dim_x, e_x, tmp_width, tmp_height # function [bbox_a] = rerec(bbox_a) def rerec(bbox_a): """Convert bbox_a to square.""" height = bbox_a[:, 3]-bbox_a[:, 1] width = bbox_a[:, 2]-bbox_a[:, 0] length = np.maximum(width, height) bbox_a[:, 0] = bbox_a[:, 0] + width * 0.5 - length * 0.5 bbox_a[:, 1] = bbox_a[:, 1] + height * 0.5 - length * 0.5 bbox_a[:, 2:4] = bbox_a[:, 0:2] + np.transpose(np.tile(length, (2, 1))) return bbox_a def imresample(img, size): """ Resample image """ # pylint: disable=no-member im_data = cv2.resize(img, (size[1], size[0]), interpolation=cv2.INTER_AREA) # @UndefinedVariable return im_data