Adding tools.py as main script for using tools, as well as integrating all feature requests from #255 and #278 (#298)

* Add tools.py command and control script for use as the main interface for various tools. The structure and approach is the same as faceswap.py
Add many new features to tools/sort.py: various new sorting methods, grouping by folders, logging file renaming/movemeng, keeping original files in the input directory and improved cli options documentation. Argument parsing has been re-written to inteface with tools.py
Add __init__.py empty file in tools directory for python to register it as a module so that sort.py and future tools can be easily imported.

* Fix various bugs where the correct sorting method would not get called.
Add new sorting methon: face-cnn-dissim.
Update help documentation for face-cnn-dissim.
Change default grouping to rename.
Update initial print in all sorting/grouping methods to say precisely which method is being used.

* Major refactor and redesign.
Use dynamic method allocation to avoid large amounts of nested if-elif statements in process() function and to allow easily combine sort and group methods.

Change cli arguments to make them more intuitive and work with the new design.
Previous: '-g/--grouping' -> '-f/--final-processing' {folders,rename}
Previous: '-by/--by' -> '-s/--sort-by' {blur,face,face-cnn,face-cnn-dissim,face-dissim,hist,hist-dissim}
New: '-g/--group-by' {blur,face,face-cnn,hist}
Add: '--logfile' -> '-lg/--logfile' PATH_TO_LOGFILE

Greatly improve grouping performance.
Grouping now has to sort using one of the sorting methods which makes the grouping stable and no longer dependent on how well the the target files are already sorted.
Sorting and grouping methods can be combined in any way. If no -g/--group-by is specified by user, it will default to group by the non '-dissim' version of sort method.
Different combinations of sorting and grouping methods work well for different sets of data.

Fixes
Fix progress updates not showing properly by setting them to print to stdout instead of stderror.
Fix bug in grouping by face-cnn where wrong score method was being called.

Misc
Add documentation for reload_list() and splice_lists() methods because it's not obvious what they do.
Add warning message to tools.py to tell users to make sure they understand how the tool they want to use works before using it.
Add warning message to tools/sort.py to tell users to make sure they undrestand how the sort tool works before using it.
Update help documentation to reflect new functionality and options.
Set defaults for group by face-cnn to work properly with the correct score method.
Amend commit in order to sign it.

* Perform unittests for all options and combinations of sort and group methods: everything OK.
Fix typos in help documentation.
This commit is contained in:
AbysmalBiscuit 2018-03-21 11:12:12 +01:00 committed by Clorr
parent 6ff64ef1c3
commit e0fa8c071e
3 changed files with 739 additions and 86 deletions

32
tools.py Executable file
View File

@ -0,0 +1,32 @@
#!/usr/bin/env python3
import sys
from lib.cli import FullHelpArgumentParser
# Importing the various tools
from tools.sort import SortProcessor
# Python version check
if sys.version_info[0] < 3:
raise Exception("This program requires at least python3.2")
if sys.version_info[0] == 3 and sys.version_info[1] < 2:
raise Exception("This program requires at least python3.2")
def bad_args(args):
parser.print_help()
exit(0)
if __name__ == "__main__":
_tools_warning = "Please backup your data and/or test the tool you want "
_tools_warning += "to use with a smaller data set to make sure you "
_tools_warning += "understand how it works."
print(_tools_warning)
parser = FullHelpArgumentParser()
subparser = parser.add_subparsers()
sort = SortProcessor(
subparser, "sort", "This command lets you sort images using various "
"methods.")
parser.set_defaults(func=bad_args)
arguments = parser.parse_args()
arguments.func(arguments)

0
tools/__init__.py Normal file
View File

View File

@ -1,3 +1,4 @@
#!/usr/bin/env python3
import argparse
import os
import sys
@ -6,85 +7,240 @@ import numpy as np
import cv2
from tqdm import tqdm
import face_recognition
from shutil import copyfile
import json
import re
if sys.version_info[0] < 3:
raise Exception("This program requires at least python3.2")
if sys.version_info[0] == 3 and sys.version_info[1] < 2:
raise Exception("This program requires at least python3.2")
class SortProcessor(object):
def __init__(self, subparser, command, description='default'):
self.arguments = None
self.changes = None
self.parse_arguments(description, subparser, command)
def __init__(self, parser):
self.init_parser_arguments(parser)
def parse_arguments(self, description, subparser, command):
parser = subparser.add_parser(
command,
help="This command lets you sort images using various methods."
" Please backup your data and/or test this tool with a "
"smaller data set to make sure you understand how it"
"works.",
description=description,
epilog="Questions and feedback: \
https://github.com/deepfakes/faceswap-playground"
)
def process_arguments(self, arguments):
self.arguments = arguments
self.process()
def init_parser_arguments(self, parser):
parser.add_argument('-i', '--input',
dest="input_dir",
default="input_dir",
help="Input directory of aligned faces.",
required=True)
parser.add_argument('-by', '--by',
parser.add_argument('-o', '--output',
dest="output_dir",
default="__default",
help="Output directory for sorted aligned faces.")
parser.add_argument('-f', '--final-process',
type=str,
choices=("blur", "hist", "face"),
dest='method',
choices=("folders", "rename"),
dest='final_process',
default="rename",
help="'folders': files are sorted using the "
"-s/--sort-by method, then they are "
"organized into folders using the "
"-g/--group-by grouping method. "
"'rename': files are sorted using the "
"-s/--sort-by then they are renamed."
"Default: rename")
parser.add_argument('-t', '--ref_threshold',
type=float,
dest='min_threshold',
default=-1.0,
help="Float value. "
"Minimum threshold to use for grouping "
"comparison with 'face' and 'hist' methods. "
"The lower the value the more discriminating "
"the grouping is. "
"For face 0.6 should be enough, with 0.5 "
"being very discriminating. "
"For face-cnn 7.2 should be enough, with 4 "
"being very discriminating. "
"For hist 0.3 should be enough, with 0.2 "
"being very discriminating. "
"Be careful setting a value that's too "
"low in a directory with many images, as "
"this could result in a lot of directories "
" being created. "
"Defaults: face 0.6, face-cnn 7.2, hist 0.3")
parser.add_argument('-b', '--bins',
type=int,
dest='num_bins',
default=5,
help="Integer value. "
"Number of folders that will be used to "
"group by blur. Folder 0 will be the least "
"blurry, while the last folder will be the "
"blurriest. If the number of images doesn't "
"divide evenly into the number of bins, the "
"remaining images get put in the last bin as "
"they will be the blurriest by definition. "
"Default value: 5")
parser.add_argument('-k', '--keep',
action='store_true',
dest='keep_original',
default=False,
help="Keeps the original files in the input "
"directory. Be careful when using this with "
"rename grouping and no specified output "
"directory as this would keep the original "
"and renamed files in the same directory.")
parser.add_argument('-l', '--log-changes',
action='store_true',
dest='log_changes',
default=False,
help="Logs file renaming changes if grouping by "
"renaming, or it logs the file "
"copying/movement if grouping by folders. "
"If no log file is specified with "
"'--log-file', then a 'sort_log.json' file "
"will be created in the input directory.")
parser.add_argument('-lf', '--log-file',
dest='log_file',
default='__default',
help="Specify a log file to use for saving the "
"renaming or grouping information. "
"Default: sort_log.json")
parser.add_argument('-s', '--sort-by',
type=str,
choices=("blur", "face", "face-cnn",
"face-cnn-dissim", "face-dissim", "hist",
"hist-dissim"),
dest='sort_method',
default="hist",
help="Sort by method.")
help="Sort by method. "
"Choose how images are sorted. "
"Default: hist")
parser.add_argument('-g', '--group-by',
type=str,
choices=("blur", "face", "face-cnn", "hist"),
dest='group_method',
default="__default",
help="Group by method. "
"When -fp/--final-processing by folders "
"choose the how the images are grouped after "
"sorting. "
"Default: non-dissim version of "
"-s/--sort-by method")
parser = self.add_optional_arguments(parser)
parser.set_defaults(func=self.process_arguments)
def add_optional_arguments(self, parser):
# Override this for custom arguments
return parser
def process_arguments(self, arguments):
self.arguments = arguments
# Setting default argument values that cannot be set by argparse
# Set output dir to the same value as input dir
# if the user didn't specify it.
if self.arguments.output_dir.lower() == "__default":
self.arguments.output_dir = self.arguments.input_dir
# Set final_process to group if folders was chosen
if self.arguments.final_process.lower() == "folders":
self.arguments.final_process = "group"
# Assign default group_method if not set by user
if self.arguments.group_method == '__default':
self.arguments.group_method = self.arguments.sort_method.replace('-dissim', '')
# Assigning default threshold values based on grouping method
if self.arguments.min_threshold == -1.0 and self.arguments.final_process == "group":
method = self.arguments.group_method.lower()
if method == 'face':
self.arguments.min_threshold = 0.6
elif method == 'face-cnn':
self.arguments.min_threshold = 7.2
elif method == 'hist':
self.arguments.min_threshold = 0.3
# If logging is enabled, prepare container
if self.arguments.log_changes:
self.changes = dict()
# Assign default sort_log.json value if user didn't specify one
if self.arguments.log_file.lower() == '__default':
self.arguments.log_file = os.path.join(self.arguments.input_dir, 'sort_log.json')
self.process()
def process(self):
if self.arguments.method.lower() == 'blur':
self.process_blur()
elif self.arguments.method.lower() == 'hist':
self.process_hist()
elif self.arguments.method.lower() == 'face':
self.process_face()
"""
This method dynamically assigns the functions that will be used to run
the core process of sorting, optionally grouping, renaming/moving into
folders. After the functions are assigned they are executed.
"""
__sort_method = self.arguments.sort_method.lower()
__group_method = self.arguments.group_method.lower()
final_process = self.arguments.final_process.lower()
def process_blur(self):
# Assign the methods that will be used for processing the files
sort_method = self.set_process_method("sort", __sort_method)
group_method = self.set_process_method("group", __group_method)
final_method = self.set_process_method("final_process", final_process)
img_list = getattr(self, sort_method)()
if "group" in final_process:
# Check if non-dissim sort method and group method are not the same
if __sort_method.replace('-dissim', '') != __group_method:
img_list = self.reload_images(group_method, img_list)
img_list = getattr(self, group_method)(img_list)
else:
img_list = getattr(self, group_method)(img_list)
getattr(self, final_method)(img_list)
print ("Done.")
# Methods for sorting
def sort_blur(self):
input_dir = self.arguments.input_dir
print ("Sorting by blur...")
img_list = [ [x, self.estimate_blur(cv2.imread(x))] for x in tqdm(self.find_images(input_dir), desc="Loading") ]
img_list = [ [x, self.estimate_blur(cv2.imread(x))] for x in tqdm(self.find_images(input_dir), desc="Loading", file=sys.stdout) ]
print ("Sorting...")
img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True)
self.process_final_rename(input_dir, img_list)
print ("Done.")
def process_hist(self):
input_dir = self.arguments.input_dir
return img_list
print ("Sorting by histogram similarity...")
img_list = [ [x, cv2.calcHist([cv2.imread(x)], [0], None, [256], [0, 256]) ] for x in tqdm( self.find_images(input_dir), desc="Loading") ]
img_list_len = len(img_list)
for i in tqdm ( range(0, img_list_len-1), desc="Sorting"):
min_score = 9999.9
j_min_score = i+1
for j in range(i+1,len(img_list)):
score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA)
if score < min_score:
min_score = score
j_min_score = j
img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1]
self.process_final_rename (input_dir, img_list)
print ("Done.")
def process_face(self):
def sort_face(self):
input_dir = self.arguments.input_dir
print ("Sorting by face similarity...")
img_list = [ [x, face_recognition.face_encodings(cv2.imread(x)) ] for x in tqdm( self.find_images(input_dir), desc="Loading") ]
img_list = [ [x, face_recognition.face_encodings(cv2.imread(x)) ] for x in tqdm( self.find_images(input_dir), desc="Loading", file=sys.stdout) ]
img_list_len = len(img_list)
for i in tqdm ( range(0, img_list_len-1), desc="Sorting"):
min_score = 9999.9
for i in tqdm ( range(0, img_list_len-1), desc="Sorting", file=sys.stdout):
min_score = float("inf")
j_min_score = i+1
for j in range(i+1,len(img_list)):
@ -93,40 +249,415 @@ class SortProcessor(object):
if f1encs is not None and f2encs is not None and len(f1encs) > 0 and len(f2encs) > 0:
score = face_recognition.face_distance(f1encs[0], f2encs)[0]
else:
score = 9999.9
score = float("inf")
if score < min_score:
min_score = score
j_min_score = j
img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1]
self.process_final_rename (input_dir, img_list)
return img_list
print ("Done.")
def sort_face_dissim(self):
input_dir = self.arguments.input_dir
def process_final_rename(self, input_dir, img_list):
for i in tqdm( range(0,len(img_list)), desc="Renaming" , leave=False):
print ("Sorting by face dissimilarity...")
img_list = [ [x, face_recognition.face_encodings(cv2.imread(x)), 0 ] for x in tqdm( self.find_images(input_dir), desc="Loading", file=sys.stdout) ]
img_list_len = len(img_list)
for i in tqdm ( range(0, img_list_len), desc="Sorting", file=sys.stdout):
score_total = 0
for j in range( 0, img_list_len):
if i == j:
continue
try:
score_total += face_recognition.face_distance([img_list[i][1]], [img_list[j][1]])
except:
pass
img_list[i][2] = score_total
print ("Sorting...")
img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True)
return img_list
def sort_face_cnn(self):
from lib import FaceLandmarksExtractor
input_dir = self.arguments.input_dir
print ("Sorting by face-cnn similarity...")
img_list = []
for x in tqdm( self.find_images(input_dir), desc="Loading", file=sys.stdout):
d = FaceLandmarksExtractor.extract(cv2.imread(x), 'cnn', True)
img_list.append( [x, np.array(d[0][1]) if len(d) > 0 else np.zeros ( (68,2) ) ] )
img_list_len = len(img_list)
for i in tqdm ( range(0, img_list_len-1), desc="Sorting", file=sys.stdout):
min_score = float("inf")
j_min_score = i+1
for j in range(i+1,len(img_list)):
fl1 = img_list[i][1]
fl2 = img_list[j][1]
score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) )
if score < min_score:
min_score = score
j_min_score = j
img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1]
return img_list
def sort_face_cnn_dissim(self):
from lib import FaceLandmarksExtractor
input_dir = self.arguments.input_dir
print ("Sorting by face-cnn dissimilarity...")
img_list = []
for x in tqdm( self.find_images(input_dir), desc="Loading", file=sys.stdout):
d = FaceLandmarksExtractor.extract(cv2.imread(x), 'cnn', True)
img_list.append( [x, np.array(d[0][1]) if len(d) > 0 else np.zeros ( (68,2) ), 0 ] )
img_list_len = len(img_list)
for i in tqdm( range(0, img_list_len-1), desc="Sorting", file=sys.stdout):
score_total = 0
for j in range(i+1,len(img_list)):
if i == j:
continue
fl1 = img_list[i][1]
fl2 = img_list[j][1]
score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) )
img_list[i][2] = score_total
print ("Sorting...")
img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True)
return img_list
def sort_hist(self):
input_dir = self.arguments.input_dir
print ("Sorting by histogram similarity...")
img_list = [ [x, cv2.calcHist([cv2.imread(x)], [0], None, [256], [0, 256]) ] for x in tqdm( self.find_images(input_dir), desc="Loading", file=sys.stdout) ]
img_list_len = len(img_list)
for i in tqdm( range(0, img_list_len-1), desc="Sorting", file=sys.stdout):
min_score = float("inf")
j_min_score = i+1
for j in range(i+1,len(img_list)):
score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA)
if score < min_score:
min_score = score
j_min_score = j
img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1]
return img_list
def sort_hist_dissim(self):
input_dir = self.arguments.input_dir
print ("Sorting by histogram dissimilarity...")
img_list = [ [x, cv2.calcHist([cv2.imread(x)], [0], None, [256], [0, 256]), 0] for x in tqdm( self.find_images(input_dir), desc="Loading", file=sys.stdout) ]
img_list_len = len(img_list)
for i in tqdm ( range(0, img_list_len), desc="Sorting", file=sys.stdout):
score_total = 0
for j in range( 0, img_list_len):
if i == j:
continue
score_total += cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA)
img_list[i][2] = score_total
print ("Sorting...")
img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True)
return img_list
# Methods for grouping
def group_blur(self, img_list):
# Starting the binning process
num_bins = self.arguments.num_bins
# The last bin will get all extra images if it's
# not possible to distribute them evenly
num_per_bin = len(img_list) // num_bins
remainder = len(img_list) % num_bins
print ("Grouping by blur...")
bins = [ [] for _ in range(num_bins) ]
image_index = 0
for i in range(num_bins):
for j in range(num_per_bin):
bins[i].append(img_list[image_index][0])
image_index += 1
# If remainder is 0, nothing gets added to the last bin.
for i in range(1, remainder + 1):
bins[-1].append(img_list[-i][0])
return bins
def group_face(self, img_list):
print ("Grouping by face similarity...")
# Groups are of the form: group_num -> reference face
reference_groups = dict()
# Bins array, where index is the group number and value is
# an array containing the file paths to the images in that group.
# The first group (0), is always the non-face group.
bins = [[]]
# Comparison threshold used to decide how similar
# faces have to be to be grouped together.
min_threshold = self.arguments.min_threshold
img_list_len = len(img_list)
for i in tqdm(range(1, img_list_len), desc="Grouping", file=sys.stdout):
f1encs = img_list[i][1]
# Check if current image is a face, if not then
# add it immediately to the non-face list.
if f1encs is None or len(f1encs) <= 0:
bins[0].append(img_list[i][0])
else:
current_best = [-1, float("inf")]
for key, references in reference_groups.items():
# Non-faces are not added to reference_groups dict, thus
# removing the need to check that f2encs is a face.
# The try-catch block is to handle the first face that gets
# processed, as the first value is None.
try:
score = self.get_avg_score_faces(f1encs, references)
except TypeError:
score = float("inf")
except ZeroDivisionError:
score = float("inf")
if score < current_best[1]:
current_best[0], current_best[1] = key, score
if current_best[1] < min_threshold:
reference_groups[current_best[0]].append(f1encs[0])
bins[current_best[0]].append(img_list[i][0])
else:
reference_groups[len(reference_groups)] = img_list[i][1]
bins.append([img_list[i][0]])
return bins
def group_face_cnn(self, img_list):
print ("Grouping by face-cnn similarity...")
# Groups are of the form: group_num -> reference faces
reference_groups = dict()
# Bins array, where index is the group number and value is
# an array containing the file paths to the images in that group.
bins = []
# Comparison threshold used to decide how similar
# faces have to be to be grouped together.
# It is multiplied by 1000 here to allow the cli option to use smaller
# numbers.
min_threshold = self.arguments.min_threshold * 1000
img_list_len = len(img_list)
for i in tqdm ( range(0, img_list_len - 1), desc="Grouping", file=sys.stdout):
fl1 = img_list[i][1]
current_best = [-1, float("inf")]
for key, references in reference_groups.items():
try:
score = self.get_avg_score_faces_cnn(fl1, references)
except TypeError:
score = float("inf")
except ZeroDivisionError:
score = float("inf")
if score < current_best[1]:
current_best[0], current_best[1] = key, score
if current_best[1] < min_threshold:
reference_groups[current_best[0]].append(fl1[0])
bins[current_best[0]].append(img_list[i][0])
else:
reference_groups[len(reference_groups)] = [img_list[i][1]]
bins.append([img_list[i][0]])
return bins
def group_hist(self, img_list):
print ("Grouping by histogram...")
# Groups are of the form: group_num -> reference histogram
reference_groups = dict()
# Bins array, where index is the group number and value is
# an array containing the file paths to the images in that group
bins = []
min_threshold = self.arguments.min_threshold
img_list_len = len(img_list)
reference_groups[0] = [img_list[0][1]]
bins.append([img_list[0][0]])
for i in tqdm(range(1, img_list_len), desc="Grouping", file=sys.stdout):
current_best = [-1, float("inf")]
for key, value in reference_groups.items():
score = self.get_avg_score_hist(img_list[i][1], value)
if score < current_best[1]:
current_best[0], current_best[1] = key, score
if current_best[1] < min_threshold:
reference_groups[current_best[0]].append(img_list[i][1])
bins[current_best[0]].append(img_list[i][0])
else:
reference_groups[len(reference_groups)] = [img_list[i][1]]
bins.append([img_list[i][0]])
return bins
# Final process methods
def final_process_rename(self, img_list):
output_dir = self.arguments.output_dir
process_file = self.set_process_file_method(self.arguments.log_changes, self.arguments.keep_original)
# Make sure output directory exists
if not os.path.exists (output_dir):
os.makedirs (output_dir)
description = ("Copying and Renaming" if self.arguments.keep_original else "Moving and Renaming")
for i in tqdm(range(0, len(img_list)), desc=description, leave=False, file=sys.stdout):
src = img_list[i][0]
src_basename = os.path.basename(src)
dst = os.path.join (input_dir, '%.5d_%s' % (i, src_basename ) )
dst = os.path.join (output_dir, '%.5d_%s' % (i, src_basename ) )
try:
os.rename (src, dst)
except:
process_file (src, dst, self.changes)
except FileNotFoundError as e:
print(e)
print ('fail to rename %s' % (src) )
for i in tqdm( range(0,len(img_list)) , desc="Renaming" ):
src = img_list[i][0]
for i in tqdm( range(0,len(img_list)) , desc=description, file=sys.stdout):
renaming = self.set_renaming_method(self.arguments.log_changes)
src, dst = renaming(img_list[i][0], output_dir, i, self.changes)
try:
os.rename (src, dst)
except FileNotFoundError as e:
print(e)
print ('fail to rename %s' % (src) )
if self.arguments.log_changes:
self.write_to_log(self.arguments.log_file, self.changes)
def final_process_group(self, bins):
output_dir = self.arguments.output_dir
process_file = self.set_process_file_method(self.arguments.log_changes, self.arguments.keep_original)
# First create new directories to avoid checking
# for directory existence in the moving loop
print ("Creating group directories.")
for i in range(len(bins)):
directory = os.path.join (output_dir, str(i))
if not os.path.exists (directory):
os.makedirs (directory)
description = ("Copying into Groups" if self.arguments.keep_original else "Moving into Groups")
print ("Total groups found: {}".format(len(bins)))
for i in tqdm(range(len(bins)), desc=description, file=sys.stdout):
for j in range(len(bins[i])):
src = bins[i][j]
src_basename = os.path.basename (src)
src = os.path.join (input_dir, '%.5d_%s' % (i, src_basename) )
dst = os.path.join (input_dir, '%.5d%s' % (i, os.path.splitext(src_basename)[1] ) )
dst = os.path.join (output_dir, str(i), src_basename)
try:
os.rename (src, dst)
except:
print ('fail to rename %s' % (src) )
process_file (src, dst, self.changes)
except FileNotFoundError as e:
print (e)
print ('Failed to move {0} to {1}'.format(src, dst))
def find_images(self, input_dir):
if self.arguments.log_changes:
self.write_to_log(self.arguments.log_file, self.changes)
# Various helper methods
def reload_images(self, group_method, img_list):
"""
Reloads the image list by replacing the comparative values with those
that the chosen grouping method expects.
:param group_method: str name of the grouping method that will be used.
:param img_list: image list that has been sorted by one of the sort
methods.
:return: img_list but with the comparative values that the chosen
grouping method expects.
"""
input_dir = self.arguments.input_dir
print("Preparing to group...")
if group_method == 'group_blur':
temp_list = [[x, self.estimate_blur(cv2.imread(x))] for x in tqdm(self.find_images(input_dir), desc="Reloading", file=sys.stdout)]
elif group_method == 'group_face':
temp_list = [[x, face_recognition.face_encodings(cv2.imread(x))] for x in tqdm(self.find_images(input_dir), desc="Reloading", file=sys.stdout)]
elif group_method == 'group_face_cnn':
from lib import FaceLandmarksExtractor
temp_list = []
for x in tqdm(self.find_images(input_dir), desc="Reloading", file=sys.stdout):
d = FaceLandmarksExtractor.extract(cv2.imread(x), 'cnn', True)
temp_list.append([x, np.array(d[0][1]) if len(d) > 0 else np.zeros((68, 2))])
elif group_method == 'group_hist':
temp_list = [[x, cv2.calcHist([cv2.imread(x)], [0], None, [256], [0, 256])] for x in tqdm(self.find_images(input_dir), desc="Reloading", file=sys.stdout)]
else:
raise ValueError("{} group_method not found.".format(group_method))
return self.splice_lists(img_list, temp_list)
@staticmethod
def splice_lists(sorted_list, new_vals_list):
"""
This method replaces the value at index 1 in each sub-list in the
sorted_list with the value that is calculated for the same img_path,
but found in new_vals_list.
Format of lists: [[img_path, value], [img_path2, value2], ...]
:param sorted_list: list that has been sorted by one of the sort
methods.
:param new_vals_list: list that has been loaded by a different method
than the sorted_list.
:return: list that is sorted in the same way as the input sorted list
but the values corresponding to each image are from new_vals_list.
"""
new_list = []
# Make new list of just image paths to serve as an index
val_index_list = [i[0] for i in new_vals_list]
for i in tqdm(range(len(sorted_list)), desc="Splicing", file=sys.stdout):
current_image = sorted_list[i][0]
new_val_index = val_index_list.index(current_image)
new_list.append([current_image, new_vals_list[new_val_index][1]])
return new_list
@staticmethod
def find_images(input_dir):
result = []
extensions = [".jpg", ".png", ".jpeg"]
for root, dirs, files in os.walk(input_dir):
@ -135,7 +666,8 @@ class SortProcessor(object):
result.append (os.path.join(root, file))
return result
def estimate_blur(self, image):
@staticmethod
def estimate_blur(image):
if image.ndim == 3:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
@ -143,20 +675,109 @@ class SortProcessor(object):
score = np.var(blur_map)
return score
def error(self, message):
self.print_help(sys.stderr)
args = {'prog': self.prog, 'message': message}
self.exit(2, '%(prog)s: error: %(message)s\n' % args)
@staticmethod
def set_process_method(prefix, method):
_method = re.sub(r'-', r'_', method)
return prefix + "_" + _method
@staticmethod
def set_process_file_method(log_changes, keep_original):
"""
Assigns the final file processing method based on whether changes are
being logged and whether the original files are being kept in the
input directory.
Relevant cli arguments: -k, -l
:return: function reference
"""
if log_changes:
if keep_original:
def process_file(src, dst, changes):
copyfile(src, dst)
changes[src] = dst
return process_file
else:
def process_file(src, dst, changes):
os.rename(src, dst)
changes[src] = dst
return process_file
else:
if keep_original:
def process_file(src, dst, changes):
copyfile(src, dst)
return process_file
else:
def process_file(src, dst, changes):
os.rename(src, dst)
return process_file
@staticmethod
def set_renaming_method(log_changes):
if log_changes:
def renaming(src, output_dir, i, changes):
src_basename = os.path.basename(src)
__src = os.path.join (output_dir, '%.5d_%s' % (i, src_basename) )
dst = os.path.join (output_dir, '%.5d%s' % (i, os.path.splitext(src_basename)[1] ) )
changes[src] = dst
return __src, dst
return renaming
else:
def renaming(src, output_dir, i, changes):
src_basename = os.path.basename(src)
src = os.path.join (output_dir, '%.5d_%s' % (i, src_basename) )
dst = os.path.join (output_dir, '%.5d%s' % (i, os.path.splitext(src_basename)[1] ) )
return src, dst
return renaming
@staticmethod
def get_avg_score_hist(img1, references):
scores = []
for img2 in references:
score = cv2.compareHist(img1, img2, cv2.HISTCMP_BHATTACHARYYA)
scores.append(score)
return sum(scores)/len(scores)
@staticmethod
def get_avg_score_faces(f1encs, references):
scores = []
for f2encs in references:
score = face_recognition.face_distance(f1encs, f2encs)[0]
scores.append(score)
return sum(scores)/len(scores)
@staticmethod
def get_avg_score_faces_cnn(fl1, references):
scores = []
for fl2 in references:
score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) )
scores.append(score)
return sum(scores)/len(scores)
@staticmethod
def write_to_log(log_file, changes):
with open(log_file, 'w') as lf:
json.dump(changes, lf, sort_keys=True, indent=4)
def bad_args(args):
parser.print_help()
exit(0)
if __name__ == "__main__":
__warning_string = "Important: face-cnn method will cause an error when "
__warning_string += "this tool is called directly instead of through the "
__warning_string += "tools.py command script."
print (__warning_string)
print ("Images sort tool.\n")
parser = argparse.ArgumentParser()
parser.set_defaults(func=bad_args)
subparser = parser.add_subparsers()
sort = SortProcessor(
subparser, "sort", "Sort images using various methods.")
sort = SortProcessor(parser)
sort.process_arguments(parser.parse_args())
parser.set_defaults(func=bad_args)
arguments = parser.parse_args()
arguments.func(arguments)