From d7f5a3a6919f5c36d8d027a50665c849ba5f428a Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 00:16:03 +0200 Subject: [PATCH 01/12] Added a new input pipeline with the possibility to control flip, crop etc per image --- src/facenet.py | 137 +++++++++++++++++++++++++++++++------------ src/lfw.py | 6 +- src/train_softmax.py | 112 +++++++++++++++++++---------------- 3 files changed, 161 insertions(+), 94 deletions(-) diff --git a/src/facenet.py b/src/facenet.py index e958e303b..884626b3e 100644 --- a/src/facenet.py +++ b/src/facenet.py @@ -30,7 +30,6 @@ import os from subprocess import Popen, PIPE import tensorflow as tf -from tensorflow.python.framework import ops import numpy as np from scipy import misc from sklearn.model_selection import KFold @@ -39,6 +38,7 @@ import random import re from tensorflow.python.platform import gfile +import math from six import iteritems def triplet_loss(anchor, positive, negative, alpha): @@ -118,38 +118,75 @@ def random_rotate_image(image): angle = np.random.uniform(low=-10.0, high=10.0) return misc.imrotate(image, angle, 'bicubic') -def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, - random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): - - images = ops.convert_to_tensor(image_list, dtype=tf.string) - labels = ops.convert_to_tensor(label_list, dtype=tf.int32) - - # Makes an input queue - input_queue = tf.train.slice_input_producer([images, labels], - num_epochs=max_nrof_epochs, shuffle=shuffle) - - images_and_labels = [] +# def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, +# random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): +# +# images = ops.convert_to_tensor(image_list, dtype=tf.string) +# labels = ops.convert_to_tensor(label_list, dtype=tf.int32) +# +# # Makes an input queue +# input_queue = tf.train.slice_input_producer([images, labels], +# num_epochs=max_nrof_epochs, shuffle=shuffle) +# +# images_and_labels = [] +# for _ in range(nrof_preprocess_threads): +# image, label = read_images_from_disk(input_queue) +# if random_rotate: +# image = tf.py_func(random_rotate_image, [image], tf.uint8) +# if random_crop: +# image = tf.random_crop(image, [image_size, image_size, 3]) +# else: +# image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) +# if random_flip: +# image = tf.image.random_flip_left_right(image) +# #pylint: disable=no-member +# image.set_shape((image_size, image_size, 3)) +# image = tf.image.per_image_standardization(image) +# images_and_labels.append([image, label]) +# +# image_batch, label_batch = tf.train.batch_join( +# images_and_labels, batch_size=batch_size, +# capacity=4 * nrof_preprocess_threads * batch_size, +# allow_smaller_final_batch=True) +# +# return image_batch, label_batch + +# 1: Random rotate 2: Random crop 4: Random flip 8: Fixed image standardization 16: Flip +RANDOM_ROTATE = 1 +RANDOM_CROP = 2 +RANDOM_FLIP = 4 +FIXED_STANDARDIZATION = 8 +FLIP = 16 +def create_input_pipeline(images_and_labels_list, input_queue, image_size, nrof_preprocess_threads): for _ in range(nrof_preprocess_threads): - image, label = read_images_from_disk(input_queue) - if random_rotate: - image = tf.py_func(random_rotate_image, [image], tf.uint8) - if random_crop: - image = tf.random_crop(image, [image_size, image_size, 3]) - else: - image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) - if random_flip: - image = tf.image.random_flip_left_right(image) - #pylint: disable=no-member - image.set_shape((image_size, image_size, 3)) - image = tf.image.per_image_standardization(image) - images_and_labels.append([image, label]) + filenames, label, control = input_queue.dequeue() + images = [] + for filename in tf.unstack(filenames): + file_contents = tf.read_file(filename) + image = tf.image.decode_image(file_contents, 3) + image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), + lambda:tf.py_func(random_rotate_image, [image], tf.uint8), + lambda:tf.identity(image)) + image = tf.cond(get_control_flag(control[0], RANDOM_CROP), + lambda:tf.random_crop(image, image_size + (3,)), + lambda:tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) + image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), + lambda:tf.image.random_flip_left_right(image), + lambda:tf.identity(image)) + image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION), + lambda:(tf.cast(image, tf.float32) - 127.5)/128.0, + lambda:tf.image.per_image_standardization(image)) + image = tf.cond(get_control_flag(control[0], FLIP), + lambda:tf.image.flip_left_right(image), + lambda:tf.identity(image)) + #pylint: disable=no-member + image.set_shape(image_size + (3,)) + images.append(image) + images_and_labels_list.append([images, label]) + return images_and_labels_list - image_batch, label_batch = tf.train.batch_join( - images_and_labels, batch_size=batch_size, - capacity=4 * nrof_preprocess_threads * batch_size, - allow_smaller_final_batch=True) - - return image_batch, label_batch +def get_control_flag(control, field): + return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1) def _add_loss_summaries(total_loss): """Add summaries for losses. @@ -412,8 +449,24 @@ def get_model_filenames(model_dir): max_step = step ckpt_file = step_str.groups()[0] return meta_file, ckpt_file + +def distance(embeddings1, embeddings2, distance_metric=0): + if distance_metric==0: + # Euclidian distance + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff),1) + elif distance_metric==1: + # Distance based on cosine similarity + dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) + norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) + similarity = dot / norm + dist = np.arccos(similarity) / math.pi + else: + raise 'Undefined distance metric %d' % distance_metric + + return dist -def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10): +def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) @@ -424,11 +477,14 @@ def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_fold fprs = np.zeros((nrof_folds,nrof_thresholds)) accuracy = np.zeros((nrof_folds)) - diff = np.subtract(embeddings1, embeddings2) - dist = np.sum(np.square(diff),1) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) # Find the best threshold for the fold acc_train = np.zeros((nrof_thresholds)) @@ -439,8 +495,8 @@ def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_fold tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) - tpr = np.mean(tprs,0) - fpr = np.mean(fprs,0) + tpr = np.mean(tprs,0) + fpr = np.mean(fprs,0) return tpr, fpr, accuracy def calculate_accuracy(threshold, dist, actual_issame): @@ -457,7 +513,7 @@ def calculate_accuracy(threshold, dist, actual_issame): -def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10): +def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) @@ -467,11 +523,14 @@ def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_targe val = np.zeros(nrof_folds) far = np.zeros(nrof_folds) - diff = np.subtract(embeddings1, embeddings2) - dist = np.sum(np.square(diff),1) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) # Find the threshold that gives FAR = far_target far_train = np.zeros(nrof_thresholds) diff --git a/src/lfw.py b/src/lfw.py index a44e4b778..8539206d6 100644 --- a/src/lfw.py +++ b/src/lfw.py @@ -31,16 +31,16 @@ import numpy as np import facenet -def evaluate(embeddings, actual_issame, nrof_folds=10): +def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): # Calculate evaluation metrics thresholds = np.arange(0, 4, 0.01) embeddings1 = embeddings[0::2] embeddings2 = embeddings[1::2] tpr, fpr, accuracy = facenet.calculate_roc(thresholds, embeddings1, embeddings2, - np.asarray(actual_issame), nrof_folds=nrof_folds) + np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) thresholds = np.arange(0, 4, 0.001) val, val_std, far = facenet.calculate_val(thresholds, embeddings1, embeddings2, - np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds) + np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) return tpr, fpr, accuracy, val, val_std, far def get_paths(lfw_dir, pairs, file_ext): diff --git a/src/train_softmax.py b/src/train_softmax.py index c3326af68..7be9dca43 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -46,6 +46,7 @@ def main(args): network = importlib.import_module(args.model_def) + image_size = (args.image_size, args.image_size) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) @@ -101,46 +102,25 @@ def main(args): index_dequeue_op = index_queue.dequeue_many(args.batch_size*args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') - batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') - phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') - image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') - - labels_placeholder = tf.placeholder(tf.int64, shape=(None,1), name='labels') + labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') + control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') - input_queue = data_flow_ops.FIFOQueue(capacity=100000, - dtypes=[tf.string, tf.int64], - shapes=[(1,), (1,)], - shared_name=None, name=None) - enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder], name='enqueue_op') - - nrof_preprocess_threads = 4 images_and_labels = [] - for _ in range(nrof_preprocess_threads): - filenames, label = input_queue.dequeue() - images = [] - for filename in tf.unstack(filenames): - file_contents = tf.read_file(filename) - image = tf.image.decode_image(file_contents, channels=3) - if args.random_rotate: - image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) - if args.random_crop: - image = tf.random_crop(image, [args.image_size, args.image_size, 3]) - else: - image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) - if args.random_flip: - image = tf.image.random_flip_left_right(image) - - #pylint: disable=no-member - image.set_shape((args.image_size, args.image_size, 3)) - images.append(tf.image.per_image_standardization(image)) - images_and_labels.append([images, label]) - + nrof_preprocess_threads = 4 + + input_queue = data_flow_ops.FIFOQueue(capacity=2000000, + dtypes=[tf.string, tf.int32, tf.int32], + shapes=[(1,), (1,), (1,)], + shared_name=None, name=None) + enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') + images_and_labels = facenet.create_input_pipeline(images_and_labels, input_queue, image_size, nrof_preprocess_threads) + image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, - shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, + shapes=[image_size + (3,), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') @@ -215,16 +195,18 @@ def main(args): epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, - learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, - total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, + total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, + args.random_rotate, args.random_crop, args.random_flip) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: - evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, - embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer) + evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, + embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, + args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images) return model_dir def find_threshold(var, percentile): @@ -259,8 +241,8 @@ def filter_dataset(dataset, data_filename, percentile, min_nrof_images_per_class return filtered_dataset def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, - learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, - loss, train_op, summary_op, summary_writer, regularization_losses, learning_rate_schedule_file): + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, + loss, train_op, summary_op, summary_writer, regularization_losses, learning_rate_schedule_file, random_rotate, random_crop, random_flip): batch_number = 0 if args.learning_rate>0.0: @@ -275,7 +257,9 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o # Enqueue one epoch of image paths and labels labels_array = np.expand_dims(np.array(label_epoch),1) image_paths_array = np.expand_dims(np.array(image_epoch),1) - sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array}) + control_value = facenet.RANDOM_ROTATE * random_rotate + facenet.RANDOM_CROP * random_crop + facenet.RANDOM_FLIP * random_flip + control_array = np.ones_like(labels_array) * control_value + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) # Training loop train_time = 0 @@ -299,33 +283,51 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o summary_writer.add_summary(summary, step) return step -def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, - embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer): +def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, + embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer, distance_metric, subtract_mean, use_flipped_images): start_time = time.time() # Run forward pass to calculate embeddings print('Runnning forward pass on LFW images') # Enqueue one epoch of image paths and labels - labels_array = np.expand_dims(np.arange(0,len(image_paths)),1) - image_paths_array = np.expand_dims(np.array(image_paths),1) - sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array}) + nrof_embeddings = len(actual_issame)*2 # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + if use_flipped_images: + # Flip every second image + control_array = (labels_array % 2)*16 + else: + control_array = np.zeros_like(labels_array) + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) - embedding_size = embeddings.get_shape()[1] - nrof_images = len(actual_issame)*2 + embedding_size = int(embeddings.get_shape()[1]) assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' nrof_batches = nrof_images // batch_size emb_array = np.zeros((nrof_images, embedding_size)) lab_array = np.zeros((nrof_images,)) - for _ in range(nrof_batches): + for i in range(nrof_batches): feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict) lab_array[lab] = lab - emb_array[lab] = emb - + emb_array[lab, :] = emb + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + print('') + embeddings = np.zeros((nrof_embeddings, embedding_size*nrof_flips)) + if use_flipped_images: + # Concatenate embeddings for flipped and non flipped iversion of the images + embeddings[:,:embedding_size] = emb_array[0::2,:] + embeddings[:,embedding_size:] = emb_array[1::2,:] + else: + embeddings = emb_array + assert np.array_equal(lab_array, np.arange(nrof_images))==True, 'Wrong labels used for evaluation, possibly caused by training examples left in the input pipeline' - _, _, accuracy, val, val_std, far = lfw.evaluate(emb_array, actual_issame, nrof_folds=nrof_folds) + _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, actual_issame, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) - print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy))) + print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) lfw_time = time.time() - start_time # Add validation loss and accuracy to summary @@ -439,6 +441,12 @@ def parse_arguments(argv): help='Number of images to process in a batch in the LFW test set.', default=100) parser.add_argument('--lfw_nrof_folds', type=int, help='Number of folds to use for cross validation. Mainly used for testing.', default=10) + parser.add_argument('--lfw_distance_metric', type=int, + help='Type of distance metric to use. 0: Euclidian, 1:Cosine similarity distance.', default=0) + parser.add_argument('--lfw_use_flipped_images', + help='Concatenates embeddings for the image and its horizontally flipped counterpart.', action='store_true') + parser.add_argument('--lfw_subtract_mean', + help='Subtract feature mean before calculating distance.', action='store_true') return parser.parse_args(argv) From 4b98361edf086838169663cd4f85e0b3d24a9475 Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 10:46:31 +0200 Subject: [PATCH 02/12] Added automatic detection of LFW file extensions --- src/lfw.py | 18 +++++++++++++----- src/train_softmax.py | 4 +--- src/train_tripletloss.py | 4 +--- src/validate_on_lfw.py | 4 +--- tmp/test_invariance_on_lfw.py | 4 +--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/lfw.py b/src/lfw.py index 8539206d6..91944332d 100644 --- a/src/lfw.py +++ b/src/lfw.py @@ -43,18 +43,18 @@ def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtra np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) return tpr, fpr, accuracy, val, val_std, far -def get_paths(lfw_dir, pairs, file_ext): +def get_paths(lfw_dir, pairs): nrof_skipped_pairs = 0 path_list = [] issame_list = [] for pair in pairs: if len(pair) == 3: - path0 = os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])+'.'+file_ext) - path1 = os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2])+'.'+file_ext) + path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) + path1 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2]))) issame = True elif len(pair) == 4: - path0 = os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])+'.'+file_ext) - path1 = os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3])+'.'+file_ext) + path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) + path1 = add_extension(os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3]))) issame = False if os.path.exists(path0) and os.path.exists(path1): # Only add the pair if both paths exist path_list += (path0,path1) @@ -65,6 +65,14 @@ def get_paths(lfw_dir, pairs, file_ext): print('Skipped %d image pairs' % nrof_skipped_pairs) return path_list, issame_list + +def add_extension(path): + if os.path.exists(path+'.jpg'): + return path+'.jpg' + elif os.path.exists(path+'.png'): + return path+'.png' + else: + raise RuntimeError('No file "%s" with extension png or jpg.' % path) def read_pairs(pairs_filename): pairs = [] diff --git a/src/train_softmax.py b/src/train_softmax.py index 7be9dca43..efa09ea20 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -83,7 +83,7 @@ def main(args): # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images - lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) + lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) @@ -433,8 +433,6 @@ def parse_arguments(argv): # Parameters for validation on LFW parser.add_argument('--lfw_pairs', type=str, help='The file containing the pairs to use for validation.', default='data/pairs.txt') - parser.add_argument('--lfw_file_ext', type=str, - help='The file extension for the LFW dataset.', default='png', choices=['jpg', 'png']) parser.add_argument('--lfw_dir', type=str, help='Path to the data directory containing aligned face patches.', default='') parser.add_argument('--lfw_batch_size', type=int, diff --git a/src/train_tripletloss.py b/src/train_tripletloss.py index c5c9e9937..8bb2d66fa 100644 --- a/src/train_tripletloss.py +++ b/src/train_tripletloss.py @@ -75,7 +75,7 @@ def main(args): # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images - lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) + lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): @@ -474,8 +474,6 @@ def parse_arguments(argv): # Parameters for validation on LFW parser.add_argument('--lfw_pairs', type=str, help='The file containing the pairs to use for validation.', default='data/pairs.txt') - parser.add_argument('--lfw_file_ext', type=str, - help='The file extension for the LFW dataset.', default='png', choices=['jpg', 'png']) parser.add_argument('--lfw_dir', type=str, help='Path to the data directory containing aligned face patches.', default='') parser.add_argument('--lfw_nrof_folds', type=int, diff --git a/src/validate_on_lfw.py b/src/validate_on_lfw.py index 69b693f00..6ff250f24 100644 --- a/src/validate_on_lfw.py +++ b/src/validate_on_lfw.py @@ -51,7 +51,7 @@ def main(args): pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images - paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) + paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) # Load the model facenet.load_model(args.model) @@ -103,8 +103,6 @@ def parse_arguments(argv): help='Image size (height, width) in pixels.', default=160) parser.add_argument('--lfw_pairs', type=str, help='The file containing the pairs to use for validation.', default='data/pairs.txt') - parser.add_argument('--lfw_file_ext', type=str, - help='The file extension for the LFW dataset.', default='png', choices=['jpg', 'png']) parser.add_argument('--lfw_nrof_folds', type=int, help='Number of folds to use for cross validation. Mainly used for testing.', default=10) return parser.parse_args(argv) diff --git a/tmp/test_invariance_on_lfw.py b/tmp/test_invariance_on_lfw.py index c307dc967..3bbbde00a 100644 --- a/tmp/test_invariance_on_lfw.py +++ b/tmp/test_invariance_on_lfw.py @@ -41,7 +41,7 @@ def main(args): pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) - paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) + paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) result_dir = '../data/' plt.ioff() # Disable interactive plotting mode @@ -198,8 +198,6 @@ def parse_arguments(argv): help='Number of scales to evaluate.', default=21) parser.add_argument('--lfw_pairs', type=str, help='The file containing the pairs to use for validation.', default='../data/pairs.txt') - parser.add_argument('--lfw_file_ext', type=str, - help='The file extension for the LFW dataset.', default='png', choices=['jpg', 'png']) parser.add_argument('--lfw_dir', type=str, help='Path to the data directory containing aligned face patches.', default='~/datasets/lfw/lfw_realigned/') parser.add_argument('--orig_image_size', type=int, From 832344e43caaee3e7a9922745dc4779efe80cf9e Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 10:49:07 +0200 Subject: [PATCH 03/12] Removed decov loss --- src/facenet.py | 13 ------------- src/generative/calculate_dataset_normalization.py | 2 -- 2 files changed, 15 deletions(-) diff --git a/src/facenet.py b/src/facenet.py index 884626b3e..17bfdaf60 100644 --- a/src/facenet.py +++ b/src/facenet.py @@ -61,19 +61,6 @@ def triplet_loss(anchor, positive, negative, alpha): return loss -def decov_loss(xs): - """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf - 'Reducing Overfitting In Deep Networks by Decorrelating Representation' - """ - x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) - m = tf.reduce_mean(x, 0, True) - z = tf.expand_dims(x-m, 2) - corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0) - corr_frob_sqr = tf.reduce_sum(tf.square(corr)) - corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) - loss = 0.5*(corr_frob_sqr - corr_diag_sqr) - return loss - def center_loss(features, label, alfa, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) diff --git a/src/generative/calculate_dataset_normalization.py b/src/generative/calculate_dataset_normalization.py index 791756a9e..9674ef702 100644 --- a/src/generative/calculate_dataset_normalization.py +++ b/src/generative/calculate_dataset_normalization.py @@ -140,8 +140,6 @@ def parse_arguments(argv): help='Keep probability of dropout for the fully connected layer(s).', default=1.0) parser.add_argument('--weight_decay', type=float, help='L2 weight regularization.', default=0.0) - parser.add_argument('--decov_loss_factor', type=float, - help='DeCov loss factor.', default=0.0) parser.add_argument('--center_loss_factor', type=float, help='Center loss factor.', default=0.0) parser.add_argument('--center_loss_alfa', type=float, From 81c5fb0b6b426ec8f1f14ec0b38447eef63309d2 Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 10:52:12 +0200 Subject: [PATCH 04/12] Removed test case for decov loss --- test/decov_loss_test.py | 65 ----------------------------------------- 1 file changed, 65 deletions(-) delete mode 100644 test/decov_loss_test.py diff --git a/test/decov_loss_test.py b/test/decov_loss_test.py deleted file mode 100644 index 2c39cff0a..000000000 --- a/test/decov_loss_test.py +++ /dev/null @@ -1,65 +0,0 @@ -# MIT License -# -# Copyright (c) 2016 David Sandberg -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import unittest -import tensorflow as tf -import numpy as np -import numpy.testing as testing -import facenet - -class DecovLossTest(unittest.TestCase): - - def testDecovLoss(self): - batch_size = 7 - image_size = 4 - channels = 3 - - with tf.Graph().as_default(): - - xs = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, channels), name='input') - loss = facenet.decov_loss(xs) - - sess = tf.Session() - with sess.as_default(): - np.random.seed(seed=666) - xs_ = np.random.normal(loc=0.0, scale=0.1, size=(batch_size,image_size,image_size,channels)) - xflat = xs_.reshape([batch_size,image_size*image_size*channels]) - ui = np.mean(xflat,0) - nd = image_size*image_size*channels - corr_ref = np.zeros((nd,nd)) - for i in range(nd): - for j in range(nd): - corr_ref[i,j] = 0.0 - for n in range(batch_size): - corr_ref[i,j] += (xflat[n,i]-ui[i]) * (xflat[n,j]-ui[j]) / batch_size - - corr_frob_sqr_ref = np.trace(np.matmul(corr_ref.T, corr_ref)) - corr_diag_sqr_ref = np.sum(np.square(np.diag(corr_ref))) - loss_ref = 0.5*(corr_frob_sqr_ref - corr_diag_sqr_ref) - - loss_ = sess.run(loss, feed_dict={xs:xs_}) - - testing.assert_almost_equal(loss_ref, loss_, 6, - 'Tensorflow implementation gives a different result compared to reference') - -if __name__ == "__main__": - unittest.main() From 2ac16d5913ed272f78dd0ce881ba59e32466136d Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 10:52:44 +0200 Subject: [PATCH 05/12] Removed dataset normalization module --- .../calculate_dataset_normalization.py | 188 ------------------ 1 file changed, 188 deletions(-) delete mode 100644 src/generative/calculate_dataset_normalization.py diff --git a/src/generative/calculate_dataset_normalization.py b/src/generative/calculate_dataset_normalization.py deleted file mode 100644 index 9674ef702..000000000 --- a/src/generative/calculate_dataset_normalization.py +++ /dev/null @@ -1,188 +0,0 @@ -"""Calculate the mean and standard deviation (per channel) over all images in a dataset -""" -# MIT License -# -# Copyright (c) 2017 David Sandberg -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path -import sys -import random -import tensorflow as tf -import numpy as np -import argparse -import facenet - -def main(args): - - np.random.seed(seed=args.seed) - random.seed(args.seed) - train_set = facenet.get_dataset(args.data_dir) - result_filename = os.path.join(os.path.expanduser(args.data_dir), 'statistics.txt') - - with tf.Graph().as_default(): - tf.set_random_seed(args.seed) - - # Get a list of image paths and their labels - image_list, _ = facenet.get_image_paths_and_labels(train_set) - nrof_images = len(image_list) - assert nrof_images>0, 'The dataset should not be empty' - - input_queue = tf.train.string_input_producer(image_list, num_epochs=None, - shuffle=False, seed=None, capacity=32) - - - nrof_preprocess_threads = 4 - images = [] - for _ in range(nrof_preprocess_threads): - filename = input_queue.dequeue() - file_contents = tf.read_file(filename) - image = tf.image.decode_image(file_contents) - image = tf.image.resize_image_with_crop_or_pad(image, 160, 160) - - #pylint: disable=no-member - image.set_shape((args.image_size, args.image_size, 3)) - image = tf.cast(image, tf.float32) - images.append((image,)) - - image_batch = tf.train.batch_join(images, batch_size=100, allow_smaller_final_batch=True) - #mean = tf.reduce_mean(image_batch, reduction_indices=[0,1,2]) - m, v = tf.nn.moments(image_batch, [1,2]) - mean = tf.reduce_mean(m, 0) - variance = tf.reduce_mean(v, 0) - - - - # Start running operations on the Graph. - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) - sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) - sess.run(tf.global_variables_initializer()) - tf.train.start_queue_runners(sess=sess) - - with sess.as_default(): - - # Training and validation loop - print('Running training') - nrof_batches = nrof_images // args.batch_size - #nrof_batches = 20 - means = np.zeros(shape=(nrof_batches, 3), dtype=np.float32) - variances = np.zeros(shape=(nrof_batches, 3), dtype=np.float32) - for i in range(nrof_batches): - means[i,:], variances[i,:] = sess.run([mean, variance]) - if (i+1)%10==0: - print('Batch: %5d/%5d, Mean: %s, Variance: %s' % (i+1, nrof_batches, np.array_str(np.mean(means[:i,:],axis=0)), np.array_str(np.mean(variances[:i,:],axis=0)))) - dataset_mean = np.mean(means,axis=0) - dataset_variance = np.mean(variances,axis=0) - print('Final mean: %s' % np.array_str(dataset_mean)) - print('Final variance: %s' % np.array_str(dataset_variance)) - with open(result_filename, 'w') as text_file: - print('Writing result to %s' % result_filename) - text_file.write('Mean: %.5f, %.5f, %.5f\n' % (dataset_mean[0], dataset_mean[1], dataset_mean[2])) - text_file.write('Variance: %.5f, %.5f, %.5f\n' % (dataset_variance[0], dataset_variance[1], dataset_variance[2])) - - - -def parse_arguments(argv): - parser = argparse.ArgumentParser() - - parser.add_argument('--logs_base_dir', type=str, - help='Directory where to write event logs.', default='~/logs/facenet') - parser.add_argument('--models_base_dir', type=str, - help='Directory where to write trained models and checkpoints.', default='~/models/facenet') - parser.add_argument('--gpu_memory_fraction', type=float, - help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0) - parser.add_argument('--pretrained_model', type=str, - help='Load a pretrained model before training starts.') - parser.add_argument('--data_dir', type=str, - help='Path to the data directory containing aligned face patches.', - default='~/datasets/casia/casia_maxpy_mtcnnalign_182_160') - parser.add_argument('--model_def', type=str, - help='Model definition. Points to a module containing the definition of the inference graph.', default='models.inception_resnet_v1') - parser.add_argument('--max_nrof_epochs', type=int, - help='Number of epochs to run.', default=500) - parser.add_argument('--batch_size', type=int, - help='Number of images to process in a batch.', default=90) - parser.add_argument('--image_size', type=int, - help='Image size (height, width) in pixels.', default=160) - parser.add_argument('--epoch_size', type=int, - help='Number of batches per epoch.', default=1000) - parser.add_argument('--embedding_size', type=int, - help='Dimensionality of the embedding.', default=128) - parser.add_argument('--random_crop', - help='Performs random cropping of training images. If false, the center image_size pixels from the training images are used. ' + - 'If the size of the images in the data directory is equal to image_size no cropping is performed', action='store_true') - parser.add_argument('--random_flip', - help='Performs random horizontal flipping of training images.', action='store_true') - parser.add_argument('--random_rotate', - help='Performs random rotations of training images.', action='store_true') - parser.add_argument('--keep_probability', type=float, - help='Keep probability of dropout for the fully connected layer(s).', default=1.0) - parser.add_argument('--weight_decay', type=float, - help='L2 weight regularization.', default=0.0) - parser.add_argument('--center_loss_factor', type=float, - help='Center loss factor.', default=0.0) - parser.add_argument('--center_loss_alfa', type=float, - help='Center update rate for center loss.', default=0.95) - parser.add_argument('--optimizer', type=str, choices=['ADAGRAD', 'ADADELTA', 'ADAM', 'RMSPROP', 'MOM'], - help='The optimization algorithm to use', default='ADAGRAD') - parser.add_argument('--learning_rate', type=float, - help='Initial learning rate. If set to a negative value a learning rate ' + - 'schedule can be specified in the file "learning_rate_schedule.txt"', default=0.1) - parser.add_argument('--learning_rate_decay_epochs', type=int, - help='Number of epochs between learning rate decay.', default=100) - parser.add_argument('--learning_rate_decay_factor', type=float, - help='Learning rate decay factor.', default=1.0) - parser.add_argument('--moving_average_decay', type=float, - help='Exponential decay for tracking of training parameters.', default=0.9999) - parser.add_argument('--seed', type=int, - help='Random seed.', default=666) - parser.add_argument('--nrof_preprocess_threads', type=int, - help='Number of preprocessing (data loading and augmentation) threads.', default=4) - parser.add_argument('--log_histograms', - help='Enables logging of weight/bias histograms in tensorboard.', action='store_true') - parser.add_argument('--learning_rate_schedule_file', type=str, - help='File containing the learning rate schedule that is used when learning_rate is set to to -1.', default='data/learning_rate_schedule.txt') - parser.add_argument('--filter_filename', type=str, - help='File containing image data used for dataset filtering', default='') - parser.add_argument('--filter_percentile', type=float, - help='Keep only the percentile images closed to its class center', default=100.0) - parser.add_argument('--filter_min_nrof_images_per_class', type=int, - help='Keep only the classes with this number of examples or more', default=0) - - # Parameters for validation on LFW - parser.add_argument('--lfw_pairs', type=str, - help='The file containing the pairs to use for validation.', default='data/pairs.txt') - parser.add_argument('--lfw_file_ext', type=str, - help='The file extension for the LFW dataset.', default='png', choices=['jpg', 'png']) - parser.add_argument('--lfw_dir', type=str, - help='Path to the data directory containing aligned face patches.', default='') - parser.add_argument('--lfw_batch_size', type=int, - help='Number of images to process in a batch in the LFW test set.', default=100) - parser.add_argument('--lfw_nrof_folds', type=int, - help='Number of folds to use for cross validation. Mainly used for testing.', default=10) - return parser.parse_args(argv) - - -if __name__ == '__main__': - main(parse_arguments(sys.argv[1:])) From efc2f94dd1c6806aa249ba418aa118ce6f1165df Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 12:00:49 +0200 Subject: [PATCH 06/12] Fixed update of centers for center loss --- src/facenet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/facenet.py b/src/facenet.py index 17bfdaf60..e2ff391aa 100644 --- a/src/facenet.py +++ b/src/facenet.py @@ -72,7 +72,8 @@ def center_loss(features, label, alfa, nrof_classes): centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) - loss = tf.reduce_mean(tf.square(features - centers_batch)) + with tf.control_dependencies([centers]): + loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers def get_image_paths_and_labels(dataset): From 8ebebe405f8cf8aff31ff9a69f2dcea7c64a1125 Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 16:17:24 +0200 Subject: [PATCH 07/12] Added validation on a subset of the dataset --- src/facenet.py | 17 +++++------ src/train_softmax.py | 67 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/facenet.py b/src/facenet.py index e2ff391aa..863b20d2a 100644 --- a/src/facenet.py +++ b/src/facenet.py @@ -370,26 +370,27 @@ def get_image_paths(facedir): image_paths = [os.path.join(facedir,img) for img in images] return image_paths -def split_dataset(dataset, split_ratio, mode): +def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode): if mode=='SPLIT_CLASSES': nrof_classes = len(dataset) class_indices = np.arange(nrof_classes) np.random.shuffle(class_indices) - split = int(round(nrof_classes*split_ratio)) + split = int(round(nrof_classes*(1-split_ratio))) train_set = [dataset[i] for i in class_indices[0:split]] test_set = [dataset[i] for i in class_indices[split:-1]] elif mode=='SPLIT_IMAGES': train_set = [] test_set = [] - min_nrof_images = 2 for cls in dataset: paths = cls.image_paths np.random.shuffle(paths) - split = int(round(len(paths)*split_ratio)) - if split=min_nrof_images_per_class and nrof_images_in_class-split>=1: + train_set.append(ImageClass(cls.name, paths[:split])) + test_set.append(ImageClass(cls.name, paths[split:])) else: raise ValueError('Invalid train/test split mode "%s"' % mode) return train_set, test_set diff --git a/src/train_softmax.py b/src/train_softmax.py index efa09ea20..11a5d0f71 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -65,10 +65,16 @@ def main(args): np.random.seed(seed=args.seed) random.seed(args.seed) - train_set = facenet.get_dataset(args.data_dir) + dataset = facenet.get_dataset(args.data_dir) if args.filter_filename: - train_set = filter_dataset(train_set, os.path.expanduser(args.filter_filename), + dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) + + if args.validation_set_split_ratio>0.0: + train_set, val_set = facenet.split_dataset(dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, 'SPLIT_IMAGES') + else: + train_set, val_set = dataset, [] + nrof_classes = len(train_set) print('Model directory: %s' % model_dir) @@ -91,8 +97,10 @@ def main(args): # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) - assert len(image_list)>0, 'The dataset should not be empty' + assert len(image_list)>0, 'The training set should not be empty' + val_image_list, val_label_list = facenet.get_image_paths_and_labels(val_set) + # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] @@ -127,8 +135,11 @@ def main(args): image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') - print('Total number of classes: %d' % nrof_classes) - print('Total number of examples: %d' % len(image_list)) + print('Number of classes in training set: %d' % nrof_classes) + print('Number of examples in training set: %d' % len(image_list)) + + print('Number of classes in validation set: %d' % len(val_set)) + print('Number of examples in validation set: %d' % len(val_image_list)) print('Building training graph') @@ -199,6 +210,11 @@ def main(args): total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, args.random_rotate, args.random_crop, args.random_flip) + if len(val_image_list)>0 and ((epoch-1) % args.validate_every_n_epochs == args.validate_every_n_epochs-1 or epoch==args.max_nrof_epochs): + validate(args, sess, epoch, val_image_list, val_label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, + total_loss, regularization_losses, cross_entropy_mean, args.validate_every_n_epochs) + # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) @@ -283,6 +299,41 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o summary_writer.add_summary(summary, step) return step +def validate(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, + loss, regularization_losses, cross_entropy_mean, validate_every_n_epochs): + + print('Running forward pass on validation set') + + nrof_batches = len(label_list) // args.lfw_batch_size + nrof_images = nrof_batches * args.lfw_batch_size + + # Enqueue one epoch of image paths and labels + labels_array = np.expand_dims(np.array(label_list[:nrof_images]),1) + image_paths_array = np.expand_dims(np.array(image_list[:nrof_images]),1) + control_array = np.zeros_like(labels_array, np.int32) + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) + + loss_array = np.zeros((nrof_batches,), np.float32) + xent_array = np.zeros((nrof_batches,), np.float32) + + # Training loop + start_time = time.time() + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:args.lfw_batch_size} + err, cross_entropy_mean_ = sess.run([loss, cross_entropy_mean], feed_dict=feed_dict) + loss_array[i], xent_array[i] = (err, cross_entropy_mean_) + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + print('') + + duration = time.time() - start_time + + print('Validation Epoch: %d\tTime %.3f\tLoss %2.3f\tXent %2.3f' % + (epoch, duration, np.mean(loss_array), np.mean(xent_array))) + + def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer, distance_metric, subtract_mean, use_flipped_images): start_time = time.time() @@ -429,6 +480,12 @@ def parse_arguments(argv): help='Keep only the percentile images closed to its class center', default=100.0) parser.add_argument('--filter_min_nrof_images_per_class', type=int, help='Keep only the classes with this number of examples or more', default=0) + parser.add_argument('--validate_every_n_epochs', type=int, + help='Number of epoch between validation', default=5) + parser.add_argument('--validation_set_split_ratio', type=float, + help='The ratio of the total dataset to use for validation', default=0.0) + parser.add_argument('--min_nrof_val_images_per_class', type=float, + help='Classes with fewer images will be removed from the validation set', default=0) # Parameters for validation on LFW parser.add_argument('--lfw_pairs', type=str, From 1ab3e263e08484c93036e530bc184382f5afe0c8 Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 21:53:40 +0200 Subject: [PATCH 08/12] Added storing of statistics to hdf file --- src/train_softmax.py | 121 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 96 insertions(+), 25 deletions(-) diff --git a/src/train_softmax.py b/src/train_softmax.py index 11a5d0f71..b31699d56 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -38,6 +38,7 @@ import facenet import lfw import h5py +import math import tensorflow.contrib.slim as slim from tensorflow.python.ops import data_flow_ops from tensorflow.python.framework import ops @@ -56,6 +57,8 @@ def main(args): if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) + stat_file_name = os.path.join(log_dir, 'stat.h5') + # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) @@ -154,10 +157,14 @@ def main(args): embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') + # Norm for the prelogits + eps = 1e-4 + prelogits_norm = tf.reduce_mean(tf.norm(tf.abs(prelogits)+eps, ord=args.prelogits_norm_p, axis=1)) + tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * args.prelogits_norm_loss_factor) + # Add center loss - if args.center_loss_factor>0.0: - prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) - tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) + prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) + tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) @@ -169,6 +176,9 @@ def main(args): cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) + correct_prediction = tf.cast(tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) + accuracy = tf.reduce_mean(correct_prediction) + # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') @@ -200,29 +210,62 @@ def main(args): # Training and validation loop print('Running training') + nrof_steps = args.max_nrof_epochs*args.epoch_size + nrof_val_samples = int(math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs)) # Validate every validate_every_n_epochs as well as in the last epoch + stat = { + 'loss': np.zeros((nrof_steps,), np.float32), + 'center_loss': np.zeros((nrof_steps,), np.float32), + 'reg_loss': np.zeros((nrof_steps,), np.float32), + 'xent_loss': np.zeros((nrof_steps,), np.float32), + 'prelogits_norm': np.zeros((nrof_steps,), np.float32), + 'accuracy': np.zeros((nrof_steps,), np.float32), + 'val_loss': np.zeros((nrof_val_samples,), np.float32), + 'val_xent_loss': np.zeros((nrof_val_samples,), np.float32), + 'val_accuracy': np.zeros((nrof_val_samples,), np.float32), + 'lfw_accuracy': np.zeros((args.max_nrof_epochs,), np.float32), + 'lfw_valrate': np.zeros((args.max_nrof_epochs,), np.float32), + 'learning_rate': np.zeros((args.max_nrof_epochs,), np.float32), + 'time_train': np.zeros((args.max_nrof_epochs,), np.float32), + 'time_validate': np.zeros((args.max_nrof_epochs,), np.float32), + 'time_evaluate': np.zeros((args.max_nrof_epochs,), np.float32), + 'prelogits_hist': np.zeros((args.max_nrof_epochs, 1000), np.float32), + } epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch + t = time.time() train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, - args.random_rotate, args.random_crop, args.random_flip) + stat, cross_entropy_mean, accuracy, learning_rate, + prelogits, prelogits_center_loss, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max) + stat['time_train'][epoch-1] = time.time() - t + t = time.time() if len(val_image_list)>0 and ((epoch-1) % args.validate_every_n_epochs == args.validate_every_n_epochs-1 or epoch==args.max_nrof_epochs): - validate(args, sess, epoch, val_image_list, val_label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, - learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, - total_loss, regularization_losses, cross_entropy_mean, args.validate_every_n_epochs) + validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, + phase_train_placeholder, batch_size_placeholder, + stat, total_loss, regularization_losses, cross_entropy_mean, accuracy, args.validate_every_n_epochs) + stat['time_validate'][epoch-1] = time.time() - t # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW + t = time.time() if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, - embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, + embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images) + stat['time_evaluate'][epoch-1] = time.time() - t + + print('Saving statistics') + with h5py.File(stat_file_name, 'w') as f: + for key, value in stat.iteritems(): + f.create_dataset(key, data=value) + return model_dir def find_threshold(var, percentile): @@ -257,8 +300,10 @@ def filter_dataset(dataset, data_filename, percentile, min_nrof_images_per_class return filtered_dataset def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, - learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, - loss, train_op, summary_op, summary_writer, regularization_losses, learning_rate_schedule_file, random_rotate, random_crop, random_flip): + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, step, + loss, train_op, summary_op, summary_writer, reg_losses, learning_rate_schedule_file, + stat, cross_entropy_mean, accuracy, + learning_rate, prelogits, prelogits_center_loss, random_rotate, random_crop, random_flip, prelogits_norm, prelogits_hist_max): batch_number = 0 if args.learning_rate>0.0: @@ -282,26 +327,38 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o while batch_number < args.epoch_size: start_time = time.time() feed_dict = {learning_rate_placeholder: lr, phase_train_placeholder:True, batch_size_placeholder:args.batch_size} - if (batch_number % 100 == 0): - err, _, step, reg_loss, summary_str = sess.run([loss, train_op, global_step, regularization_losses, summary_op], feed_dict=feed_dict) - summary_writer.add_summary(summary_str, global_step=step) + tensor_list = [loss, train_op, step, reg_losses, prelogits, cross_entropy_mean, learning_rate, prelogits_norm, accuracy, prelogits_center_loss] + if batch_number % 100 == 0: + loss_, _, step_, reg_losses_, prelogits_, cross_entropy_mean_, lr_, prelogits_norm_, accuracy_, center_loss_, summary_str = sess.run(tensor_list + [summary_op], feed_dict=feed_dict) + summary_writer.add_summary(summary_str, global_step=step_) else: - err, _, step, reg_loss = sess.run([loss, train_op, global_step, regularization_losses], feed_dict=feed_dict) + loss_, _, step_, reg_losses_, prelogits_, cross_entropy_mean_, lr_, prelogits_norm_, accuracy_, center_loss_ = sess.run(tensor_list, feed_dict=feed_dict) + duration = time.time() - start_time - print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f\tRegLoss %2.3f' % - (epoch, batch_number+1, args.epoch_size, duration, err, np.sum(reg_loss))) + stat['loss'][step_-1] = loss_ + stat['center_loss'][step_-1] = center_loss_ + stat['reg_loss'][step_-1] = np.sum(reg_losses_) + stat['xent_loss'][step_-1] = cross_entropy_mean_ + stat['prelogits_norm'][step_-1] = prelogits_norm_ + stat['learning_rate'][epoch-1] = lr_ + stat['accuracy'][step_-1] = accuracy_ + stat['prelogits_hist'][epoch-1,:] += np.histogram(np.minimum(np.abs(prelogits_), prelogits_hist_max), bins=1000, range=(0.0, prelogits_hist_max))[0] + + duration = time.time() - start_time + print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f\tXent %2.3f\tRegLoss %2.3f\tAccuracy %2.3f\tLr %2.5f\tCl %2.3f' % + (epoch, batch_number+1, args.epoch_size, duration, loss_, cross_entropy_mean_, np.sum(reg_losses_), accuracy_, lr_, center_loss_)) batch_number += 1 train_time += duration # Add validation loss and accuracy to summary summary = tf.Summary() #pylint: disable=maybe-no-member summary.value.add(tag='time/total', simple_value=train_time) - summary_writer.add_summary(summary, step) + summary_writer.add_summary(summary, global_step=step_) return step -def validate(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, - learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, - loss, regularization_losses, cross_entropy_mean, validate_every_n_epochs): +def validate(args, sess, epoch, image_list, label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, + phase_train_placeholder, batch_size_placeholder, + stat, loss, regularization_losses, cross_entropy_mean, accuracy, validate_every_n_epochs): print('Running forward pass on validation set') @@ -316,13 +373,14 @@ def validate(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueu loss_array = np.zeros((nrof_batches,), np.float32) xent_array = np.zeros((nrof_batches,), np.float32) + accuracy_array = np.zeros((nrof_batches,), np.float32) # Training loop start_time = time.time() for i in range(nrof_batches): feed_dict = {phase_train_placeholder:False, batch_size_placeholder:args.lfw_batch_size} - err, cross_entropy_mean_ = sess.run([loss, cross_entropy_mean], feed_dict=feed_dict) - loss_array[i], xent_array[i] = (err, cross_entropy_mean_) + loss_, cross_entropy_mean_, accuracy_ = sess.run([loss, cross_entropy_mean, accuracy], feed_dict=feed_dict) + loss_array[i], xent_array[i], accuracy_array[i] = (loss_, cross_entropy_mean_, accuracy_) if i % 10 == 9: print('.', end='') sys.stdout.flush() @@ -330,12 +388,17 @@ def validate(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueu duration = time.time() - start_time - print('Validation Epoch: %d\tTime %.3f\tLoss %2.3f\tXent %2.3f' % - (epoch, duration, np.mean(loss_array), np.mean(xent_array))) + val_index = (epoch-1)//validate_every_n_epochs + stat['val_loss'][val_index] = np.mean(loss_array) + stat['val_xent_loss'][val_index] = np.mean(xent_array) + stat['val_accuracy'][val_index] = np.mean(accuracy_array) + + print('Validation Epoch: %d\tTime %.3f\tLoss %2.3f\tXent %2.3f\tAccuracy %2.3f' % + (epoch, duration, np.mean(loss_array), np.mean(xent_array), np.mean(accuracy_array))) def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, - embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer, distance_metric, subtract_mean, use_flipped_images): + embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer, stat, epoch, distance_metric, subtract_mean, use_flipped_images): start_time = time.time() # Run forward pass to calculate embeddings print('Runnning forward pass on LFW images') @@ -390,6 +453,8 @@ def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phas summary_writer.add_summary(summary, step) with open(os.path.join(log_dir,'lfw_result.txt'),'at') as f: f.write('%d\t%.5f\t%.5f\n' % (step, np.mean(accuracy), val)) + stat['lfw_accuracy'][epoch-1] = np.mean(accuracy) + stat['lfw_valrate'][epoch-1] = val def save_variables_and_metagraph(sess, saver, summary_writer, model_dir, model_name, step): # Save the model checkpoint @@ -455,6 +520,12 @@ def parse_arguments(argv): help='Center loss factor.', default=0.0) parser.add_argument('--center_loss_alfa', type=float, help='Center update rate for center loss.', default=0.95) + parser.add_argument('--prelogits_norm_loss_factor', type=float, + help='Loss based on the norm of the activations in the prelogits layer.', default=0.0) + parser.add_argument('--prelogits_norm_p', type=float, + help='Norm to use for prelogits norm loss.', default=1.0) + parser.add_argument('--prelogits_hist_max', type=float, + help='The max value for the prelogits histogram.', default=10.0) parser.add_argument('--optimizer', type=str, choices=['ADAGRAD', 'ADADELTA', 'ADAM', 'RMSPROP', 'MOM'], help='The optimization algorithm to use', default='ADAGRAD') parser.add_argument('--learning_rate', type=float, From f4bcc12c2b5c115a7dff0371d16388339483952d Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 22:06:48 +0200 Subject: [PATCH 09/12] Madified epoch indexing --- src/train_softmax.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/train_softmax.py b/src/train_softmax.py index b31699d56..745bcf98f 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -230,10 +230,8 @@ def main(args): 'time_evaluate': np.zeros((args.max_nrof_epochs,), np.float32), 'prelogits_hist': np.zeros((args.max_nrof_epochs, 1000), np.float32), } - epoch = 0 - while epoch < args.max_nrof_epochs: + for epoch in range(1,args.max_nrof_epochs+1): step = sess.run(global_step, feed_dict=None) - epoch = step // args.epoch_size # Train for one epoch t = time.time() train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, @@ -251,7 +249,7 @@ def main(args): stat['time_validate'][epoch-1] = time.time() - t # Save variables and the metagraph if it doesn't exist already - save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) + save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, epoch) # Evaluate on LFW t = time.time() From 5ff441d47bb4a2876cd0c385c17034874177ace4 Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 22:22:32 +0200 Subject: [PATCH 10/12] Added the possibility to stop training from learning rate schedule file --- src/facenet.py | 5 ++++- src/train_softmax.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/facenet.py b/src/facenet.py index 863b20d2a..82a9fcbf7 100644 --- a/src/facenet.py +++ b/src/facenet.py @@ -330,7 +330,10 @@ def get_learning_rate_from_file(filename, epoch): if line: par = line.strip().split(':') e = int(par[0]) - lr = float(par[1]) + if par[1]=='-': + lr = -1 + else: + lr = float(par[1]) if e <= epoch: learning_rate = lr else: diff --git a/src/train_softmax.py b/src/train_softmax.py index 745bcf98f..e80277604 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -234,13 +234,16 @@ def main(args): step = sess.run(global_step, feed_dict=None) # Train for one epoch t = time.time() - train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, + cont = train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, stat, cross_entropy_mean, accuracy, learning_rate, prelogits, prelogits_center_loss, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max) stat['time_train'][epoch-1] = time.time() - t - + + if not cont: + break + t = time.time() if len(val_image_list)>0 and ((epoch-1) % args.validate_every_n_epochs == args.validate_every_n_epochs-1 or epoch==args.max_nrof_epochs): validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, @@ -308,6 +311,9 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o lr = args.learning_rate else: lr = facenet.get_learning_rate_from_file(learning_rate_schedule_file, epoch) + + if lr<=0: + return False index_epoch = sess.run(index_dequeue_op) label_epoch = np.array(label_list)[index_epoch] @@ -352,7 +358,7 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o #pylint: disable=maybe-no-member summary.value.add(tag='time/total', simple_value=train_time) summary_writer.add_summary(summary, global_step=step_) - return step + return True def validate(args, sess, epoch, image_list, label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, phase_train_placeholder, batch_size_placeholder, From 22f2eb160048b4b3710ed91ff999b451e5f88954 Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 23:07:01 +0200 Subject: [PATCH 11/12] Changed to Xavier initialization on classification layer --- src/train_softmax.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/train_softmax.py b/src/train_softmax.py index e80277604..8d6968996 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -151,7 +151,7 @@ def main(args): phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, - weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) From 24132775cb7909c1f1131cf57946b7c24059fe7e Mon Sep 17 00:00:00 2001 From: David Sandberg Date: Sat, 31 Mar 2018 23:11:50 +0200 Subject: [PATCH 12/12] Changed to Xavier initialization in inception resnets --- src/models/inception_resnet_v1.py | 2 +- src/models/inception_resnet_v2.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/models/inception_resnet_v1.py b/src/models/inception_resnet_v1.py index c54be90e0..475e81bb4 100644 --- a/src/models/inception_resnet_v1.py +++ b/src/models/inception_resnet_v1.py @@ -141,7 +141,7 @@ def inference(images, keep_probability, phase_train=True, } with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): diff --git a/src/models/inception_resnet_v2.py b/src/models/inception_resnet_v2.py index d2b76f062..0fb176fd0 100644 --- a/src/models/inception_resnet_v2.py +++ b/src/models/inception_resnet_v2.py @@ -26,7 +26,7 @@ import tensorflow as tf import tensorflow.contrib.slim as slim -# Inception-Renset-A +# Inception-Resnet-A def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): """Builds the 35x35 resnet block.""" with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): @@ -47,7 +47,7 @@ def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): net = activation_fn(net) return net -# Inception-Renset-B +# Inception-Resnet-B def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): """Builds the 17x17 resnet block.""" with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): @@ -101,7 +101,7 @@ def inference(images, keep_probability, phase_train=True, 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], } with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params):