From 635e029b3067ceab5f90c3b964182f6e7051ce13 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 15 Oct 2021 11:44:44 +0200 Subject: [PATCH 001/101] Ignore .h5 files from repo --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 410ac728f..6d12a3065 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.pyc *.hdf5 +*.h5 *.log *.pkl *.json From 0017ce8a474d89788abef54d8995a5464c0eb570 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 15 Oct 2021 13:26:22 +0200 Subject: [PATCH 002/101] Start refactoring model generator --- examples/pix2pose/pix2pose/generator.py | 74 +++++++++++++++++++++++ examples/pix2pose/{ => pix2pose}/model.py | 6 +- examples/pix2pose/pix2pose/test.py | 0 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 examples/pix2pose/pix2pose/generator.py rename examples/pix2pose/{ => pix2pose}/model.py (96%) create mode 100644 examples/pix2pose/pix2pose/test.py diff --git a/examples/pix2pose/pix2pose/generator.py b/examples/pix2pose/pix2pose/generator.py new file mode 100644 index 000000000..5270bcf58 --- /dev/null +++ b/examples/pix2pose/pix2pose/generator.py @@ -0,0 +1,74 @@ +from tensorflow.keras.models import Model +from tensorflow.keras.layers import ( + Input, Conv2D, Activation, Dense, Reshape, Conv2DTranspose, Flatten, + LeakyReLU, BatchNormalization, Concatenate) + + +def encoder_convolution_block(x, filters, strides=(2, 2)): + x = Conv2D(filters, (5, 5), strides=strides, padding='same')(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + return x + + +def encoder_block(x, filters): + x_stem = encoder_convolution_block(x, filters) + x_skip = encoder_convolution_block(x, filters) + x_stem = Concatenate()([x_stem, x_skip]) + return x_stem, x_skip + + +def encoder(x): + x, skip_1 = encoder_block(x, 64) + x, skip_2 = encoder_block(x, 128) + x, skip_3 = encoder_block(x, 128) + x, skip_4 = encoder_block(x, 256) + return x, [skip_1, skip_2, skip_3] + + +def decoder_convolution_block(x, filters, strides=(2, 2)): + x = Conv2DTranspose(filters, (5, 5), strides=strides, padding='same')(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + return x + + +def decoder_block(x, x_skip, filters): + filters_1, filters_2 = filters + x = decoder_convolution_block(x, filters_1, (1, 1)) + x = decoder_convolution_block(x, filters_2) + x = Concatenate()([x, x_skip]) + return x + + +def decoder(x, skip_connections): + skip_1, skip_2, skip_3 = skip_connections + x = decoder_convolution_block(x, 256) + x = Concatenate()([x, skip_3]) + x = decoder_block(x, skip_2, [256, 128]) + x = decoder_block(x, skip_1, [256, 64]) + x = decoder_convolution_block(x, 128, (1, 1)) + return x + + +def Generator(input_shape=(128, 128, 3), latent_dimension=256, + name='PIX2POSE_GENERATOR'): + input_image = Input(input_shape, name='input_image') + x, skip_connections = encoder(input_image) + x = Flatten()(x) + x = Dense(latent_dimension)(x) + x = Dense(8 * 8 * latent_dimension)(x) + x = Reshape((8, 8, latent_dimension))(x) + x = decoder(x, skip_connections) + RGB_mask = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x) + RGB_mask = Activation('tanh', name='RGB_mask')(RGB_mask) + error_mask = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x) + error_mask = Activation('sigmoid', name='error_mask')(error_mask) + model = Model([input_image], [RGB_mask, error_mask], name=name) + return model + + +model = Generator() +assert model.count_params() == 25740356 +assert model.output_shape == [(None, 128, 128, 3), (None, 128, 128, 1)] +assert model.input_shape == (None, 128, 128, 3) diff --git a/examples/pix2pose/model.py b/examples/pix2pose/pix2pose/model.py similarity index 96% rename from examples/pix2pose/model.py rename to examples/pix2pose/pix2pose/model.py index f8093300a..71bc6fa6e 100644 --- a/examples/pix2pose/model.py +++ b/examples/pix2pose/pix2pose/model.py @@ -1,7 +1,9 @@ import numpy as np -from tensorflow.keras.layers import Conv2D, Activation, UpSampling2D, Dense, Conv2DTranspose, Dropout, Input, Flatten, Reshape, LeakyReLU, BatchNormalization, Concatenate from tensorflow.keras.models import Model +from tensorflow.keras.layers import (Input, Conv2D, Activation, Dense, Reshape, + Conv2DTranspose, Flatten, LeakyReLU, + BatchNormalization, Concatenate) import tensorflow as tf @@ -205,4 +207,4 @@ def Discriminator(): flatten = Flatten()(d7) output = Dense(1, activation='sigmoid', name='discriminator_output')(flatten) discriminator_model = Model(inputs=input, outputs=[output]) - return discriminator_model \ No newline at end of file + return discriminator_model diff --git a/examples/pix2pose/pix2pose/test.py b/examples/pix2pose/pix2pose/test.py new file mode 100644 index 000000000..e69de29bb From 403c86a4066c905215b6a50b084baae71b657bbf Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 15 Oct 2021 13:43:46 +0200 Subject: [PATCH 003/101] Refactor discriminator --- examples/pix2pose/pix2pose/discriminator.py | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 examples/pix2pose/pix2pose/discriminator.py diff --git a/examples/pix2pose/pix2pose/discriminator.py b/examples/pix2pose/pix2pose/discriminator.py new file mode 100644 index 000000000..b1b95063f --- /dev/null +++ b/examples/pix2pose/pix2pose/discriminator.py @@ -0,0 +1,27 @@ +from tensorflow.keras.models import Model +from tensorflow.keras.layers import (Conv2D, BatchNormalization, LeakyReLU, + Input, Flatten, Dense) + + +def convolution_block(x, filters): + x = Conv2D(filters, (3, 3), strides=(2, 2), padding='same')(x) + x = BatchNormalization()(x) + x = LeakyReLU(0.2)(x) + return x + + +def Discriminator(input_shape=(128, 128, 3), name='PIX2POSE_DISCRIMINATOR'): + input_image = Input(input_shape, name='input_image') + x = convolution_block(input_image, 64) + for filters in [128, 256, 512, 512, 512, 512]: + x = convolution_block(x, filters) + flatten = Flatten()(x) + x = Dense(1, activation='sigmoid', name='discriminator_output')(flatten) + model = Model(input_image, x, name=name) + return model + + +model = Discriminator() +assert model.count_params() == 8640897 +assert model.output_shape == (None, 1) +assert model.input_shape == (None, 128, 128, 3) From 263b64653977c26064f770af3802d1344ba7ef5c Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 15 Oct 2021 13:46:27 +0200 Subject: [PATCH 004/101] Split model and loss utils --- examples/pix2pose/pix2pose/model.py | 210 ---------------------------- examples/pix2pose/pix2pose/test.py | 0 examples/pix2pose/utils.py | 58 ++++++++ 3 files changed, 58 insertions(+), 210 deletions(-) delete mode 100644 examples/pix2pose/pix2pose/model.py delete mode 100644 examples/pix2pose/pix2pose/test.py create mode 100644 examples/pix2pose/utils.py diff --git a/examples/pix2pose/pix2pose/model.py b/examples/pix2pose/pix2pose/model.py deleted file mode 100644 index 71bc6fa6e..000000000 --- a/examples/pix2pose/pix2pose/model.py +++ /dev/null @@ -1,210 +0,0 @@ -import numpy as np - -from tensorflow.keras.models import Model -from tensorflow.keras.layers import (Input, Conv2D, Activation, Dense, Reshape, - Conv2DTranspose, Flatten, LeakyReLU, - BatchNormalization, Concatenate) -import tensorflow as tf - - -def loss_color_wrapped(rotation_matrices): - def loss_color_unwrapped(color_image, predicted_color_image): - min_loss = tf.float32.max - - # Bring the image in the range between 0 and 1 - color_image = (color_image + 1) * 0.5 - - # Calculate masks for the object and the background (they are independent of the rotation) - mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1), - repeats=3, axis=-1) - mask_background = tf.ones(tf.shape(mask_object)) - mask_object - - # Bring the image again in the range between -1 and 1 - color_image = (color_image * 2) - 1 - - # Iterate over all possible rotations - for rotation_matrix in rotation_matrices: - - real_color_image = tf.identity(color_image) - - # Add a small epsilon value to avoid the discontinuity problem - real_color_image = real_color_image + tf.ones_like(real_color_image) * 0.0001 - - # Rotate the object - real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image) - #real_color_image = tf.where(tf.math.less(real_color_image, 0), tf.ones_like(real_color_image) + real_color_image, real_color_image) - - # Set the background to be all -1 - real_color_image *= mask_object - real_color_image += (mask_background*tf.constant(-1.)) - - # Get the number of pixels - num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3]) - beta = 3 - - # Calculate the difference between the real and predicted images including the mask - diff_object = tf.math.abs(predicted_color_image*mask_object - real_color_image*mask_object) - diff_background = tf.math.abs(predicted_color_image*mask_background - real_color_image*mask_background) - - # Calculate the total loss - loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3])) - min_loss = tf.math.minimum(loss_colors, min_loss) - return min_loss - - return loss_color_unwrapped - - -def loss_error(real_error_image, predicted_error_image): - # Get the number of pixels - num_pixels = tf.math.reduce_prod(tf.shape(real_error_image)[1:3]) - loss_error = tf.cast((1/num_pixels), dtype=tf.float32)*(tf.math.reduce_sum(tf.math.square(predicted_error_image - tf.clip_by_value(tf.math.abs(real_error_image), tf.float32.min, 1.)), axis=[1, 2, 3])) - - return loss_error - - -def Generator(): - bn_axis = 3 - - input = Input((128, 128, 3), name='input_image') - - # First layer of the encoder - e1_1 = Conv2D(64, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_1_1')(input) - e1_1 = BatchNormalization(bn_axis)(e1_1) - e1_1 = LeakyReLU()(e1_1) - - e1_2 = Conv2D(64, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_1_2')(input) - e1_2 = BatchNormalization(bn_axis)(e1_2) - e1_2 = LeakyReLU()(e1_2) - - e1 = Concatenate()([e1_1, e1_2]) - - # Second layer of the encoder - e2_1 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_2_1')(e1) - e2_1 = BatchNormalization(bn_axis)(e2_1) - e2_1 = LeakyReLU()(e2_1) - - e2_2 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_2_2')(e1) - e2_2 = BatchNormalization(bn_axis)(e2_2) - e2_2 = LeakyReLU()(e2_2) - - e2 = Concatenate()([e2_1, e2_2]) - - # Third layer of the encoder - e3_1 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_3_1')(e2) - e3_1 = BatchNormalization(bn_axis)(e3_1) - e3_1 = LeakyReLU()(e3_1) - - e3_2 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_3_2')(e2) - e3_2 = BatchNormalization(bn_axis)(e3_2) - e3_2 = LeakyReLU()(e3_2) - - e3 = Concatenate()([e3_1, e3_2]) - - # Fourth layer of the encoder - e4_1 = Conv2D(256, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_4_1')(e3) - e4_1 = BatchNormalization(bn_axis)(e4_1) - e4_1 = LeakyReLU()(e4_1) - - e4_2 = Conv2D(256, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_4_2')(e3) - e4_2 = BatchNormalization(bn_axis)(e4_2) - e4_2 = LeakyReLU()(e4_2) - - e4 = Concatenate()([e4_1, e4_2]) - - # Latent dimension - x = Flatten()(e4) - x = Dense(256)(x) - x = Dense(8*8*256)(x) - x = Reshape((8, 8, 256))(x) - - # First layer of the decoder - d1_1 = Conv2DTranspose(256, (5, 5), strides=(2, 2), padding='same', name='decoder_conv2D_1_1')(x) - d1_1 = BatchNormalization(bn_axis)(d1_1) - d1_1 = LeakyReLU()(d1_1) - - d1 = Concatenate()([d1_1, e3_2]) - - # Second layer of the decoder - d2_1 = Conv2D(256, (5, 5), strides=(1, 1), padding='same', name='decoder_conv2D_2_1')(d1) - d2_1 = BatchNormalization(bn_axis)(d2_1) - d2_1 = LeakyReLU()(d2_1) - - d2_2 = Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', name='decoder_conv2D_2_2')(d2_1) - d2_2 = BatchNormalization(bn_axis)(d2_2) - d2_2 = LeakyReLU()(d2_2) - - d2 = Concatenate()([d2_2, e2_2]) - - # Third layer of the decoder - d3_1 = Conv2D(256, (5, 5), strides=(1, 1), padding='same', name='decoder_conv2D_3_1')(d2) - d3_1 = BatchNormalization(bn_axis)(d3_1) - d3_1 = LeakyReLU()(d3_1) - - d3_2 = Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', name='decoder_conv2D_3_2')(d3_1) - d3_2 = BatchNormalization(bn_axis)(d3_2) - d3_2 = LeakyReLU()(d3_2) - - d3 = Concatenate()([d3_2, e1_2]) - - # Fourth layer - d4_1 = Conv2D(128, (5, 5), strides=(1, 1), padding='same', name='decoder_conv2D_4_1')(d3) - d4_1 = BatchNormalization(bn_axis)(d4_1) - d4_1 = LeakyReLU()(d4_1) - - # Define the two outputs - color_output = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(d4_1) - color_output = Activation('tanh', name='color_output')(color_output) - - error_output = Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same')(d4_1) - error_output = Activation('sigmoid', name='error_output')(error_output) - - # Define model - model = Model(inputs=[input], outputs=[color_output, error_output]) - - return model - - -def Discriminator(): - bn_axis = 3 - - input = Input((128, 128, 3), name='input_image') - - # First layer of the discriminator - d1 = Conv2D(64, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_1_1')(input) - d1 = BatchNormalization(bn_axis)(d1) - d1 = LeakyReLU(0.2)(d1) - - # Second layer of the discriminator - d2 = Conv2D(128, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_2_1')(d1) - d2 = BatchNormalization(bn_axis)(d2) - d2 = LeakyReLU(0.2)(d2) - - # Third layer of the discriminator - d3 = Conv2D(256, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_3_1')(d2) - d3 = BatchNormalization(bn_axis)(d3) - d3 = LeakyReLU(0.2)(d3) - - # Fourth layer of the discriminator - d4 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_4_1')(d3) - d4 = BatchNormalization(bn_axis)(d4) - d4 = LeakyReLU(0.2)(d4) - - # Fifth layer of the discriminator - d5 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_5_1')(d4) - d5 = BatchNormalization(bn_axis)(d5) - d5 = LeakyReLU(0.2)(d5) - - # Sixth layer of the discriminator - d6 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_6_1')(d5) - d6 = BatchNormalization(bn_axis)(d6) - d6 = LeakyReLU(0.2)(d6) - - # Seventh layer of the discriminator - d7 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_7_1')(d6) - d7 = BatchNormalization(bn_axis)(d7) - d7 = LeakyReLU(0.2)(d7) - - flatten = Flatten()(d7) - output = Dense(1, activation='sigmoid', name='discriminator_output')(flatten) - discriminator_model = Model(inputs=input, outputs=[output]) - return discriminator_model diff --git a/examples/pix2pose/pix2pose/test.py b/examples/pix2pose/pix2pose/test.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py new file mode 100644 index 000000000..1a8a9b8c3 --- /dev/null +++ b/examples/pix2pose/utils.py @@ -0,0 +1,58 @@ +import tensorflow as tf + + +def loss_color_wrapped(rotation_matrices): + def loss_color_unwrapped(color_image, predicted_color_image): + min_loss = tf.float32.max + + # Bring the image in the range between 0 and 1 + color_image = (color_image + 1) * 0.5 + + # Calculate masks for the object and the background (they are independent of the rotation) + mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1), + repeats=3, axis=-1) + mask_background = tf.ones(tf.shape(mask_object)) - mask_object + + # Bring the image again in the range between -1 and 1 + color_image = (color_image * 2) - 1 + + # Iterate over all possible rotations + for rotation_matrix in rotation_matrices: + + real_color_image = tf.identity(color_image) + + # Add a small epsilon value to avoid the discontinuity problem + real_color_image = real_color_image + tf.ones_like(real_color_image) * 0.0001 + + # Rotate the object + real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image) + #real_color_image = tf.where(tf.math.less(real_color_image, 0), tf.ones_like(real_color_image) + real_color_image, real_color_image) + + # Set the background to be all -1 + real_color_image *= mask_object + real_color_image += (mask_background*tf.constant(-1.)) + + # Get the number of pixels + num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3]) + beta = 3 + + # Calculate the difference between the real and predicted images including the mask + diff_object = tf.math.abs(predicted_color_image*mask_object - real_color_image*mask_object) + diff_background = tf.math.abs(predicted_color_image*mask_background - real_color_image*mask_background) + + # Calculate the total loss + loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3])) + min_loss = tf.math.minimum(loss_colors, min_loss) + return min_loss + + return loss_color_unwrapped + + +def loss_error(real_error_image, predicted_error_image): + # Get the number of pixels + num_pixels = tf.math.reduce_prod(tf.shape(real_error_image)[1:3]) + loss_error = tf.cast((1/num_pixels), dtype=tf.float32)*(tf.math.reduce_sum(tf.math.square(predicted_error_image - tf.clip_by_value(tf.math.abs(real_error_image), tf.float32.min, 1.)), axis=[1, 2, 3])) + + return loss_error + + From 45da37fcba9a9f5c7a85291dc564258d347c9504 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 15 Oct 2021 16:48:09 +0200 Subject: [PATCH 005/101] Refactor domain randomization processor --- examples/pix2pose/__init__.py | 0 .../{pipelines.py => old_pipelines.py} | 36 ++--------- examples/pix2pose/pipeline.py | 18 ++++++ examples/pix2pose/pix2pose/generator.py | 10 +-- examples/pix2pose/processors.py | 21 +++++++ examples/pix2pose/test.py | 63 +++++++++++++++++++ examples/pix2pose/utils.py | 35 ++++++----- 7 files changed, 133 insertions(+), 50 deletions(-) create mode 100644 examples/pix2pose/__init__.py rename examples/pix2pose/{pipelines.py => old_pipelines.py} (89%) create mode 100644 examples/pix2pose/pipeline.py create mode 100644 examples/pix2pose/processors.py create mode 100644 examples/pix2pose/test.py diff --git a/examples/pix2pose/__init__.py b/examples/pix2pose/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/old_pipelines.py similarity index 89% rename from examples/pix2pose/pipelines.py rename to examples/pix2pose/old_pipelines.py index ee4299670..f484c6a44 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/old_pipelines.py @@ -11,8 +11,7 @@ class GeneratedImageProcessor(Processor): - """ - Loads pre-generated images + """Loads pre-generated images """ def __init__(self, path_images, background_images_paths, num_occlusions=1, split=pr.TRAIN, no_ambiguities=False): super(GeneratedImageProcessor, self).__init__() @@ -38,18 +37,17 @@ def __init__(self, path_images, background_images_paths, num_occlusions=1, split self.alpha_original = [np.load(os.path.join(path_images, "alpha_original/alpha_original_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)] - def call(self): - index = random.randint(0, self.num_images-1) - image_original = self.images_original[index] - image_colors = self.images_colors[index] - alpha_original = self.alpha_original[index] + def call(self, input_image, label_image): + # index = random.randint(0, self.num_images-1) + # image_original = self.images_original[index] + # image_colors = self.images_colors[index] + # alpha_original = self.alpha_original[index] if self.split == pr.TRAIN: image_original = self.augment(image_original, alpha_original) image_original = self.preprocess_input(image_original) image_colors = self.preprocess_output(image_colors) - return image_original, image_colors @@ -150,25 +148,3 @@ def process_batch(self, inputs, labels, batch_index): self._place_sample(sample['labels'], sample_arg, labels) return inputs, labels - - -class NormalizeImageTanh(Processor): - """ - Normalize image so that the values are between -1 and 1 - """ - def __init__(self): - super(NormalizeImageTanh, self).__init__() - - def call(self, image): - return (image/127.5)-1 - - -class DenormalizeImageTanh(Processor): - """ - Transforms an image from the value range -1 to 1 back to 0 to 255 - """ - def __init__(self): - super(DenormalizeImageTanh, self).__init__() - - def call(self, image): - return (image + 1.0)*127.5 diff --git a/examples/pix2pose/pipeline.py b/examples/pix2pose/pipeline.py new file mode 100644 index 000000000..4e3ffdcf6 --- /dev/null +++ b/examples/pix2pose/pipeline.py @@ -0,0 +1,18 @@ +from paz.abstract import SequentialProcessor +from paz.pipelines import RandomizeRenderedImage as RandomizeRender +from paz import processors as pr +from .processors import ImageToClosedOneBall + + +class AutoEncoderDomainRandomization(SequentialProcessor): + """Performs domain randomization on a rendered image + """ + def __init__(self, image_shape, image_paths, num_occlusions=1): + super(AutoEncoderDomainRandomization, self).__init__() + self.add(pr.Render()) + self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0})) + self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0])) + self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) + self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1])) + self.add(pr.SequenceWrapper({0: {'input_image': image_shape}}, + {1: {'label_image': image_shape}})) diff --git a/examples/pix2pose/pix2pose/generator.py b/examples/pix2pose/pix2pose/generator.py index 5270bcf58..720586a33 100644 --- a/examples/pix2pose/pix2pose/generator.py +++ b/examples/pix2pose/pix2pose/generator.py @@ -60,11 +60,11 @@ def Generator(input_shape=(128, 128, 3), latent_dimension=256, x = Dense(8 * 8 * latent_dimension)(x) x = Reshape((8, 8, latent_dimension))(x) x = decoder(x, skip_connections) - RGB_mask = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x) - RGB_mask = Activation('tanh', name='RGB_mask')(RGB_mask) - error_mask = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x) - error_mask = Activation('sigmoid', name='error_mask')(error_mask) - model = Model([input_image], [RGB_mask, error_mask], name=name) + label_image = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x) + label_image = Activation('tanh', name='label_image')(label_image) + error_image = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x) + error_image = Activation('sigmoid', name='error_image')(error_image) + model = Model([input_image], [label_image, error_image], name=name) return model diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py new file mode 100644 index 000000000..550229cf1 --- /dev/null +++ b/examples/pix2pose/processors.py @@ -0,0 +1,21 @@ +from paz.abstract import Processor + + +class ImageToClosedOneBall(Processor): + """Map image value from [0, 255] -> [-1, 1]. + """ + def __init__(self): + super(ImageToClosedOneBall, self).__init__() + + def call(self, image): + return (image / 127.5) - 1 + + +class ClosedOneBallToImage(Processor): + """Map normalized value from [-1, 1] -> [0, 255]. + """ + def __init__(self): + super(ClosedOneBallToImage, self).__init__() + + def call(self, image): + return (image + 1.0) * 127.5 diff --git a/examples/pix2pose/test.py b/examples/pix2pose/test.py new file mode 100644 index 000000000..d23077c90 --- /dev/null +++ b/examples/pix2pose/test.py @@ -0,0 +1,63 @@ +from paz.abstract import SequentialProcessor, Processor +from paz import processors as pr +import numpy as np + + +class PipelineWithTwoChannels(SequentialProcessor): + def __init__(self): + super(PipelineWithTwoChannels, self).__init__() + self.add(lambda x: x) + self.add(pr.ControlMap(pr.Copy(), [0], [1], keep={0: 0})) + + +class PipelineWithThreeChannels(SequentialProcessor): + def __init__(self): + super(PipelineWithThreeChannels, self).__init__() + self.add(lambda a, b: (a, b)) + self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0})) + + +class PipelineWithThreeChannelsPlus(SequentialProcessor): + def __init__(self): + super(PipelineWithThreeChannelsPlus, self).__init__() + self.add(lambda a, b: (a, b)) + self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0})) + self.add(pr.ControlMap(SumTwoValues(), [0, 1], [0])) + + +class SumTwoValues(Processor): + def __init__(self): + super(SumTwoValues, self).__init__() + + def call(self, A, B): + return A + B + + +def test_copy_with_controlmap_using_2_channels(): + pipeline = PipelineWithTwoChannels() + random_values = np.random.random((128, 128)) + values = pipeline(random_values) + assert len(values) == 2 + assert np.allclose(values[0], random_values) + assert np.allclose(values[1], random_values) + + +def test_copy_with_controlmap_using_3_channels(): + pipeline = PipelineWithThreeChannels() + A_random_values = np.random.random((128, 128)) + B_random_values = np.random.random((128, 128)) + values = pipeline(A_random_values, B_random_values) + assert len(values) == 3 + assert np.allclose(values[0], A_random_values) + assert np.allclose(values[1], B_random_values) + assert np.allclose(values[2], A_random_values) + + +def test_copy_with_controlmap_using_3_channels_plus(): + pipeline = PipelineWithThreeChannelsPlus() + A_random_values = np.random.random((128, 128)) + B_random_values = np.random.random((128, 128)) + values = pipeline(A_random_values, B_random_values) + assert len(values) == 2 + assert np.allclose(values[0], A_random_values + B_random_values) + assert np.allclose(values[1], A_random_values) diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py index 1a8a9b8c3..67f73e312 100644 --- a/examples/pix2pose/utils.py +++ b/examples/pix2pose/utils.py @@ -1,8 +1,25 @@ import tensorflow as tf +from tensorflow.keras.losses import Loss -def loss_color_wrapped(rotation_matrices): - def loss_color_unwrapped(color_image, predicted_color_image): +class Pix2PoseLoss(Loss): + def __init__(self): + super(Pix2PoseLoss, self).__init__() + + def call(self, y_true, y_pred): + y_true = tf.clip_by_value(tf.math.abs(y_true), tf.float32.min, 1.0) + squared_error = tf.square(y_pred - y_true) + squared_error = tf.reduce_sum(squared_error, axis=3) + squared_error = tf.reduce_mean(squared_error, axis=[1, 2]) + return squared_error + + +class Pix2PoseColor(Loss): + def __init__(self, rotation_matrices): + super(Pix2PoseColor, self).__init__() + self.rotation_matrices = rotation_matrices + + def call(self, color_image, predicted_color_image): min_loss = tf.float32.max # Bring the image in the range between 0 and 1 @@ -17,7 +34,7 @@ def loss_color_unwrapped(color_image, predicted_color_image): color_image = (color_image * 2) - 1 # Iterate over all possible rotations - for rotation_matrix in rotation_matrices: + for rotation_matrix in self.rotation_matrices: real_color_image = tf.identity(color_image) @@ -44,15 +61,3 @@ def loss_color_unwrapped(color_image, predicted_color_image): loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3])) min_loss = tf.math.minimum(loss_colors, min_loss) return min_loss - - return loss_color_unwrapped - - -def loss_error(real_error_image, predicted_error_image): - # Get the number of pixels - num_pixels = tf.math.reduce_prod(tf.shape(real_error_image)[1:3]) - loss_error = tf.cast((1/num_pixels), dtype=tf.float32)*(tf.math.reduce_sum(tf.math.square(predicted_error_image - tf.clip_by_value(tf.math.abs(real_error_image), tf.float32.min, 1.)), axis=[1, 2, 3])) - - return loss_error - - From 42e85dc424d1bf4e010746b29cb7b0ccc4d74414 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Sat, 16 Oct 2021 17:17:28 +0200 Subject: [PATCH 006/101] Ignore .iml file in complete repository --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6d12a3065..607a7ce7e 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ checkpoint *.npy *.p *.zip +*.iml !.github/manifest.xml From ac2230759d8a61148a2f11478699d5ec03d07392 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Sat, 16 Oct 2021 17:17:57 +0200 Subject: [PATCH 007/101] Add quaternion backend and basic coloring scheme --- examples/pix2pose/backend.py | 76 +++++++++++++++++++++++++++++++++++ examples/pix2pose/coloring.py | 49 ++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 examples/pix2pose/backend.py create mode 100644 examples/pix2pose/coloring.py diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py new file mode 100644 index 000000000..3c2e35454 --- /dev/null +++ b/examples/pix2pose/backend.py @@ -0,0 +1,76 @@ +import numpy as np + + +def homogenous_quaternion_to_rotation_matrix(quaternion): + # w0, q1, q2, q3 = quaternion + q1, q2, q3, w0 = quaternion + + r11 = w0**2 + q1**2 - q2**2 - q3**2 + r12 = 2 * ((q1 * q2) - (w0 * q3)) + r13 = 2 * ((w0 * q2) + (q1 * q3)) + + r21 = 2 * ((w0 * q3) + (q1 * q2)) + r22 = w0**2 - q1**2 + q2**2 - q3**2 + r23 = 2 * ((q2 * q3) - (w0 * q1)) + + r31 = 2 * ((q1 * q3) - (w0 * q2)) + r32 = 2 * ((w0 * q1) + (q2 * q3)) + r33 = w0**2 - q1**2 - q2**2 + q3**2 + + rotation_matrix = np.array([[r11, r12, r13], + [r21, r22, r23], + [r31, r32, r33]]) + return rotation_matrix + + +def inhomogenous_quaternion_to_rotation_matrix(q): + """Transforms quaternion into a rotation matrix + # Arguments + q: quarternion, Numpy array of shape ``[4]`` + # Returns + Numpy array representing a rotation vector having a shape ``[3]``. + """ + # quaternion + # q = q[::-1] + r11 = 1 - (2 * (q[1]**2 + q[2]**2)) + r12 = 2 * (q[0] * q[1] - q[3] * q[2]) + r13 = 2 * (q[3] * q[1] + q[0] * q[2]) + + r21 = 2 * (q[0] * q[1] + q[3] * q[2]) + r22 = 1 - (2 * (q[0]**2 + q[2]**2)) + r23 = 2 * (q[1] * q[2] - q[3] * q[0]) + + r31 = 2 * (q[0] * q[2] - q[3] * q[1]) + r32 = 2 * (q[3] * q[0] + q[1] * q[2]) + r33 = 1 - (2 * (q[0]**2 + q[1]**2)) + + rotation_matrix = np.array([[r11, r12, r13], + [r21, r22, r23], + [r31, r32, r33]]) + + return rotation_matrix + # return np.squeeze(rotation_matrix) + + +def multiply_quaternions(quaternion_0, quaternion_1): + """Multiplies two quaternions. + + # Reference: + Code extracted from [here](https://stackoverflow.com/questions/ + 39000758/how-to-multiply-two-quaternions-by-python-or-numpy) + """ + x0, y0, z0, w0 = quaternion_0 + x1, y1, z1, w1 = quaternion_1 + x2 = +(x1 * w0) + (y1 * z0) - (z1 * y0) + (w1 * x0) + y2 = -(x1 * z0) + (y1 * w0) + (z1 * x0) + (w1 * y0) + z2 = +(x1 * y0) - (y1 * x0) + (z1 * w0) + (w1 * z0) + w2 = -(x1 * x0) - (y1 * y0) - (z1 * z0) + (w1 * w0) + return np.array([x2, y2, z2, w2]) + + +# quaternion = (1 / np.sqrt(30)) * np.array([1, 2, 3, 4]) +# theta = np.deg2rad(0) +# quaternion = np.array([1, 0, 0, 0]) +# a = homogenous_quaternion_to_rotation_matrix(quaternion) +# quaternion = (1 / np.sqrt(30)) * np.array([2, 3, 4, 1]) +# b = inhomogenous_quaternion_to_rotation_matrix(quaternion) diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py new file mode 100644 index 000000000..63d19c93f --- /dev/null +++ b/examples/pix2pose/coloring.py @@ -0,0 +1,49 @@ +import os +import numpy as np +import trimesh +from pyrender import Mesh, Scene, Viewer +from pyrender.constants import RenderFlags + + +def normalize_min_max(x, x_min, x_max): + return (x - x_min) / (x_max - x_min) + + +def load_obj(path): + mesh = trimesh.load(path) + return mesh + + +def extract_corners3D(vertices): + point3D_min = np.min(vertices, axis=0) + point3D_max = np.max(vertices, axis=0) + return point3D_min, point3D_max + + +def compute_vertices_colors(vertices): + corner3D_min, corner3D_max = extract_corners3D(vertices) + normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max) + colors = (255 * normalized_colors).astype('uint8') + return colors + + +def color_object(path): + mesh = load_obj(path) + colors = compute_vertices_colors(mesh.vertices) + mesh.visual = mesh.visual.to_color() + mesh.visual.vertex_colors = colors + return mesh + + +if __name__ == "__main__": + scene = Scene(bg_color=[0, 0, 0]) + root = os.path.expanduser('~') + mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' + path = os.path.join(root, mesh_path) + mesh = color_object(path) + mesh = Mesh.from_trimesh(mesh, smooth=False) + mesh.primitives[0].material.metallicFactor = 0.0 + mesh.primitives[0].material.roughnessFactor = 1.0 + mesh.primitives[0].material.alphaMode = 'OPAQUE' + scene.add(mesh) + Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT) From d49e9f94f2ca1bbd808fce2b6d6f741da57c6fba Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 18 Oct 2021 13:19:35 +0200 Subject: [PATCH 008/101] Add scene for rendering pixel and normal image --- examples/pix2pose/coloring.py | 8 +- examples/pix2pose/old_train.py | 137 ++++++++++++++ .../pix2pose/{pipeline.py => pipelines.py} | 14 +- examples/pix2pose/scenes.py | 67 +++++++ examples/pix2pose/train.py | 173 ++++-------------- 5 files changed, 253 insertions(+), 146 deletions(-) create mode 100644 examples/pix2pose/old_train.py rename examples/pix2pose/{pipeline.py => pipelines.py} (55%) create mode 100644 examples/pix2pose/scenes.py diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py index 63d19c93f..29259800d 100644 --- a/examples/pix2pose/coloring.py +++ b/examples/pix2pose/coloring.py @@ -32,6 +32,10 @@ def color_object(path): colors = compute_vertices_colors(mesh.vertices) mesh.visual = mesh.visual.to_color() mesh.visual.vertex_colors = colors + mesh = Mesh.from_trimesh(mesh, smooth=False) + mesh.primitives[0].material.metallicFactor = 0.0 + mesh.primitives[0].material.roughnessFactor = 1.0 + mesh.primitives[0].material.alphaMode = 'OPAQUE' return mesh @@ -41,9 +45,5 @@ def color_object(path): mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' path = os.path.join(root, mesh_path) mesh = color_object(path) - mesh = Mesh.from_trimesh(mesh, smooth=False) - mesh.primitives[0].material.metallicFactor = 0.0 - mesh.primitives[0].material.roughnessFactor = 1.0 - mesh.primitives[0].material.alphaMode = 'OPAQUE' scene.add(mesh) Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT) diff --git a/examples/pix2pose/old_train.py b/examples/pix2pose/old_train.py new file mode 100644 index 000000000..c7adce3f1 --- /dev/null +++ b/examples/pix2pose/old_train.py @@ -0,0 +1,137 @@ +import os +import glob +import argparse +import numpy as np +import time + +from tensorflow.keras.callbacks import CSVLogger +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.layers import Input +from tensorflow.keras.models import Model + +from paz.abstract import GeneratingSequence +from paz.abstract.sequence import GeneratingSequence + +from pipelines import GeneratingSequencePix2Pose, GeneratedImageGenerator, make_batch_discriminator +from model import Generator, Discriminator, loss_color_wrapped, loss_error + + +description = 'Training script Pix2Pose model' +root_path = os.path.join(os.path.expanduser('~'), '.keras/') +parser = argparse.ArgumentParser(description=description) +parser.add_argument('-cl', '--class_name', default='tless05', type=str, + help='Class name to be added to model save path') +parser.add_argument('-id', '--background_images_directory', type=str, + help='Path to directory containing background images') +parser.add_argument('-pi', '--images_directory', type=str, + help='Path to pre-generated images (npy format)') +parser.add_argument('-bs', '--batch_size', default=4, type=int, + help='Batch size for training') +parser.add_argument('-lr', '--learning_rate', default=0.001, type=float, + help='Initial learning rate for Adam') +parser.add_argument('-ld', '--image_size', default=128, type=int, + help='Size of the side of a square image e.g. 64') +parser.add_argument('-e', '--max_num_epochs', default=10000, type=int, + help='Maximum number of epochs before finishing') +parser.add_argument('-st', '--steps_per_epoch', default=5, type=int, + help='Steps per epoch') +parser.add_argument('-oc', '--num_occlusions', default=2, type=int, + help='Number of occlusions') +parser.add_argument('-sa', '--save_path', + default=os.path.join( + os.path.expanduser('~'), '.keras/paz/models'), + type=str, help='Path for writing model weights and logs') +parser.add_argument('-rm', '--rotation_matrices', + type=str, help='Path to npy file with a list of rotation matrices', required=True) +parser.add_argument('-de', '--description', + type=str, help='Description of the model') +args = parser.parse_args() + +# Building the whole GAN model +dcgan_input = Input(shape=(128, 128, 3)) +discriminator = Discriminator() +generator = Generator() +color_output, error_output = generator(dcgan_input) +discriminator.trainable = False +discriminator_output = discriminator(color_output) +dcgan = Model(inputs=[dcgan_input], outputs={"color_output": color_output, "error_output": error_output, "discriminator_output": discriminator_output}) + +# For the loss function pix2pose needs to know all the rotations under which the pose looks the same +rotation_matrices = np.load(args.rotation_matrices) +loss_color = loss_color_wrapped(rotation_matrices) + +# Set the loss +optimizer = Adam(args.learning_rate, amsgrad=True) +losses = {"color_output": loss_color, + "error_output": loss_error, + "discriminator_output": "binary_crossentropy"} +lossWeights = {"color_output": 100.0, "error_output": 50.0, "discriminator_output": 1.0} +dcgan.compile(optimizer=optimizer, loss=losses, loss_weights=lossWeights, run_eagerly=True) + +discriminator.trainable = True +discriminator.compile(loss=['binary_crossentropy'], optimizer=optimizer) + +# Creating sequencer +background_image_paths = glob.glob(os.path.join(args.background_images_directory, '*.jpg')) +processor_train = GeneratedImageGenerator(os.path.join(args.images_directory, "train"), args.image_size, background_image_paths, num_occlusions=0) +processor_test = GeneratedImageGenerator(os.path.join(args.images_directory, "test"), args.image_size, background_image_paths, num_occlusions=0) +sequence_train = GeneratingSequencePix2Pose(processor_train, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices) +sequence_test = GeneratingSequencePix2Pose(processor_test, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices) + +# Making directory for saving model weights and logs +model_name = '_'.join([dcgan.name, args.class_name]) +save_path = os.path.join(args.save_path, model_name) +if not os.path.exists(save_path): + os.makedirs(save_path) + +# Setting callbacks +log = CSVLogger(os.path.join(save_path, '%s.log' % model_name)) +log.model = dcgan + +callbacks=[log] + +for callback in callbacks: + callback.on_train_begin() + +for num_epoch in range(args.max_num_epochs): + sequence_iterator_train = sequence_train.__iter__() + sequence_iterator_test = sequence_test.__iter__() + + for callback in callbacks: + callback.on_epoch_begin(num_epoch) + + for num_batch in range(args.steps_per_epoch): + # Train the discriminator + discriminator.trainable = True + batch = next(sequence_iterator_train) + + X_discriminator_real, y_discriminator_real = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 1) + loss_discriminator_real = discriminator.train_on_batch(X_discriminator_real, y_discriminator_real) + + X_discriminator_fake, y_discriminator_fake = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 0) + loss_discriminator_fake = discriminator.train_on_batch(X_discriminator_fake, y_discriminator_fake) + + loss_discriminator = (loss_discriminator_real + loss_discriminator_fake)/2. + + # Train the generator + discriminator.trainable = False + loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], {"color_output": batch[1]['color_output'], "error_output": batch[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))}) + + # Test the network + batch_test = next(sequence_iterator_test) + loss_dcgan_test, loss_color_output_test, loss_dcgan_discriminator_test, loss_error_output_test = dcgan.test_on_batch(batch_test[0]['input_image'], {"color_output": batch_test[1]['color_output'], "error_output": batch_test[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))}) + + print("Loss DCGAN: {}".format(loss_dcgan)) + for callback in callbacks: + callback.on_epoch_end(num_epoch, logs={'loss_discriminator': loss_discriminator, + 'loss_dcgan': loss_dcgan, 'loss_color_output': loss_color_output, + 'loss_dcgan_discriminator': loss_dcgan_discriminator, + 'loss_error_output': loss_error_output, + 'loss_dcgan_test': loss_dcgan_test, 'loss_color_output_test': loss_color_output_test, + 'loss_dcgan_discriminator_test': loss_dcgan_discriminator_test, + 'loss_error_output_test': loss_error_output_test + }) + + +for callback in callbacks: + callback.on_train_end() \ No newline at end of file diff --git a/examples/pix2pose/pipeline.py b/examples/pix2pose/pipelines.py similarity index 55% rename from examples/pix2pose/pipeline.py rename to examples/pix2pose/pipelines.py index 4e3ffdcf6..f345a9014 100644 --- a/examples/pix2pose/pipeline.py +++ b/examples/pix2pose/pipelines.py @@ -1,18 +1,18 @@ from paz.abstract import SequentialProcessor from paz.pipelines import RandomizeRenderedImage as RandomizeRender from paz import processors as pr -from .processors import ImageToClosedOneBall +# from processors import ImageToClosedOneBall -class AutoEncoderDomainRandomization(SequentialProcessor): +class DomainRandomization(SequentialProcessor): """Performs domain randomization on a rendered image """ - def __init__(self, image_shape, image_paths, num_occlusions=1): - super(AutoEncoderDomainRandomization, self).__init__() - self.add(pr.Render()) - self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0})) + def __init__(self, renderer, image_shape, image_paths, num_occlusions=1): + super(DomainRandomization, self).__init__() + self.add(pr.Render(renderer)) self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0])) self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) - self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1])) + # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1])) + self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) self.add(pr.SequenceWrapper({0: {'input_image': image_shape}}, {1: {'label_image': image_shape}})) diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py new file mode 100644 index 000000000..6ce85ab16 --- /dev/null +++ b/examples/pix2pose/scenes.py @@ -0,0 +1,67 @@ +import numpy as np +from paz.backend.render import (sample_uniformly, split_alpha_channel, + random_perturbation, sample_point_in_sphere, + compute_modelview_matrices) +from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight, + RenderFlags, Mesh, Scene) +import trimesh +from coloring import color_object + + +class PixelMask(): + """Render-ready scene composed of a single object and a single moving camera. + + # Arguments + path_OBJ: String containing the path to an OBJ file. + viewport_size: List, specifying [H, W] of rendered image. + y_fov: Float indicating the vertical field of view in radians. + distance: List of floats indicating [max_distance, min_distance] + light: List of floats indicating [max_light, min_light] + top_only: Boolean. If True images are only take from the top. + roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis. + shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes. + """ + def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0, + distance=[0.3, 0.5], light=[0.5, 30], top_only=False, + roll=None, shift=None): + self.distance, self.roll, self.shift = distance, roll, shift + self.light_intensity, self.top_only = light, top_only + self._build_scene(path_OBJ, viewport_size, light, y_fov) + self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) + self.flags_RGBA = RenderFlags.RGBA + self.flags_FLAT = RenderFlags.FLAT + self.epsilon = 0.01 + + def _build_scene(self, path, size, light, y_fov): + self.scene = Scene(bg_color=[0, 0, 0, 0]) + self.light = self.scene.add( + DirectionalLight([1.0, 1.0, 1.0], np.mean(light))) + self.camera = self.scene.add( + PerspectiveCamera(y_fov, aspectRatio=np.divide(*size))) + self.pixel_mesh = self.scene.add(color_object(path)) + self.mesh = self.scene.add( + Mesh.from_trimesh(trimesh.load(path), smooth=True)) + self.world_origin = self.mesh.mesh.centroid + + def _sample_parameters(self): + distance = sample_uniformly(self.distance) + camera_origin = sample_point_in_sphere(distance, self.top_only) + camera_origin = random_perturbation(camera_origin, self.epsilon) + light_intensity = sample_uniformly(self.light_intensity) + return camera_origin, light_intensity + + def render(self): + camera_origin, intensity = self._sample_parameters() + camera_to_world, world_to_camera = compute_modelview_matrices( + camera_origin, self.world_origin, self.roll, self.shift) + self.light.light.intensity = intensity + self.scene.set_pose(self.camera, camera_to_world) + self.scene.set_pose(self.light, camera_to_world) + self.pixel_mesh.mesh.is_visible = False + image, depth = self.renderer.render(self.scene, self.flags_RGBA) + self.pixel_mesh.mesh.is_visible = True + image, alpha = split_alpha_channel(image) + self.mesh.mesh.is_visible = False + RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) + self.mesh.mesh.is_visible = True + return image, alpha, RGB_mask diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index c7adce3f1..b19f9d4bc 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -1,137 +1,40 @@ import os import glob -import argparse -import numpy as np -import time - -from tensorflow.keras.callbacks import CSVLogger -from tensorflow.keras.optimizers import Adam -from tensorflow.keras.layers import Input -from tensorflow.keras.models import Model - -from paz.abstract import GeneratingSequence -from paz.abstract.sequence import GeneratingSequence - -from pipelines import GeneratingSequencePix2Pose, GeneratedImageGenerator, make_batch_discriminator -from model import Generator, Discriminator, loss_color_wrapped, loss_error - - -description = 'Training script Pix2Pose model' -root_path = os.path.join(os.path.expanduser('~'), '.keras/') -parser = argparse.ArgumentParser(description=description) -parser.add_argument('-cl', '--class_name', default='tless05', type=str, - help='Class name to be added to model save path') -parser.add_argument('-id', '--background_images_directory', type=str, - help='Path to directory containing background images') -parser.add_argument('-pi', '--images_directory', type=str, - help='Path to pre-generated images (npy format)') -parser.add_argument('-bs', '--batch_size', default=4, type=int, - help='Batch size for training') -parser.add_argument('-lr', '--learning_rate', default=0.001, type=float, - help='Initial learning rate for Adam') -parser.add_argument('-ld', '--image_size', default=128, type=int, - help='Size of the side of a square image e.g. 64') -parser.add_argument('-e', '--max_num_epochs', default=10000, type=int, - help='Maximum number of epochs before finishing') -parser.add_argument('-st', '--steps_per_epoch', default=5, type=int, - help='Steps per epoch') -parser.add_argument('-oc', '--num_occlusions', default=2, type=int, - help='Number of occlusions') -parser.add_argument('-sa', '--save_path', - default=os.path.join( - os.path.expanduser('~'), '.keras/paz/models'), - type=str, help='Path for writing model weights and logs') -parser.add_argument('-rm', '--rotation_matrices', - type=str, help='Path to npy file with a list of rotation matrices', required=True) -parser.add_argument('-de', '--description', - type=str, help='Description of the model') -args = parser.parse_args() - -# Building the whole GAN model -dcgan_input = Input(shape=(128, 128, 3)) -discriminator = Discriminator() -generator = Generator() -color_output, error_output = generator(dcgan_input) -discriminator.trainable = False -discriminator_output = discriminator(color_output) -dcgan = Model(inputs=[dcgan_input], outputs={"color_output": color_output, "error_output": error_output, "discriminator_output": discriminator_output}) - -# For the loss function pix2pose needs to know all the rotations under which the pose looks the same -rotation_matrices = np.load(args.rotation_matrices) -loss_color = loss_color_wrapped(rotation_matrices) - -# Set the loss -optimizer = Adam(args.learning_rate, amsgrad=True) -losses = {"color_output": loss_color, - "error_output": loss_error, - "discriminator_output": "binary_crossentropy"} -lossWeights = {"color_output": 100.0, "error_output": 50.0, "discriminator_output": 1.0} -dcgan.compile(optimizer=optimizer, loss=losses, loss_weights=lossWeights, run_eagerly=True) - -discriminator.trainable = True -discriminator.compile(loss=['binary_crossentropy'], optimizer=optimizer) - -# Creating sequencer -background_image_paths = glob.glob(os.path.join(args.background_images_directory, '*.jpg')) -processor_train = GeneratedImageGenerator(os.path.join(args.images_directory, "train"), args.image_size, background_image_paths, num_occlusions=0) -processor_test = GeneratedImageGenerator(os.path.join(args.images_directory, "test"), args.image_size, background_image_paths, num_occlusions=0) -sequence_train = GeneratingSequencePix2Pose(processor_train, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices) -sequence_test = GeneratingSequencePix2Pose(processor_test, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices) - -# Making directory for saving model weights and logs -model_name = '_'.join([dcgan.name, args.class_name]) -save_path = os.path.join(args.save_path, model_name) -if not os.path.exists(save_path): - os.makedirs(save_path) - -# Setting callbacks -log = CSVLogger(os.path.join(save_path, '%s.log' % model_name)) -log.model = dcgan - -callbacks=[log] - -for callback in callbacks: - callback.on_train_begin() - -for num_epoch in range(args.max_num_epochs): - sequence_iterator_train = sequence_train.__iter__() - sequence_iterator_test = sequence_test.__iter__() - - for callback in callbacks: - callback.on_epoch_begin(num_epoch) - - for num_batch in range(args.steps_per_epoch): - # Train the discriminator - discriminator.trainable = True - batch = next(sequence_iterator_train) - - X_discriminator_real, y_discriminator_real = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 1) - loss_discriminator_real = discriminator.train_on_batch(X_discriminator_real, y_discriminator_real) - - X_discriminator_fake, y_discriminator_fake = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 0) - loss_discriminator_fake = discriminator.train_on_batch(X_discriminator_fake, y_discriminator_fake) - - loss_discriminator = (loss_discriminator_real + loss_discriminator_fake)/2. - - # Train the generator - discriminator.trainable = False - loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], {"color_output": batch[1]['color_output'], "error_output": batch[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))}) - - # Test the network - batch_test = next(sequence_iterator_test) - loss_dcgan_test, loss_color_output_test, loss_dcgan_discriminator_test, loss_error_output_test = dcgan.test_on_batch(batch_test[0]['input_image'], {"color_output": batch_test[1]['color_output'], "error_output": batch_test[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))}) - - print("Loss DCGAN: {}".format(loss_dcgan)) - for callback in callbacks: - callback.on_epoch_end(num_epoch, logs={'loss_discriminator': loss_discriminator, - 'loss_dcgan': loss_dcgan, 'loss_color_output': loss_color_output, - 'loss_dcgan_discriminator': loss_dcgan_discriminator, - 'loss_error_output': loss_error_output, - 'loss_dcgan_test': loss_dcgan_test, 'loss_color_output_test': loss_color_output_test, - 'loss_dcgan_discriminator_test': loss_dcgan_discriminator_test, - 'loss_error_output_test': loss_error_output_test - }) - - -for callback in callbacks: - callback.on_train_end() \ No newline at end of file +from scenes import PixelMask +from pipelines import DomainRandomization +from paz.backend.image import show_image + + +image_shape = [128, 128, 3] +root_path = os.path.expanduser('~') +background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' +background_wildcard = os.path.join(root_path, background_wildcard) +image_paths = glob.glob(background_wildcard) +path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' +path_OBJ = os.path.join(root_path, path_OBJ) +num_occlusions = 1 +viewport_size = image_shape[:2] +y_fov = 3.14159 / 4.0 +distance = [0.3, 0.5] +light = [1.0, 30] +top_only = False +roll = 3.14159 +shift = 0.05 + + +renderer = PixelMask(path_OBJ, viewport_size, y_fov, distance, + light, top_only, roll, shift) + +# for _ in range(100): +image, alpha, RGB_mask = renderer.render() +show_image(image) +show_image(RGB_mask) + +processor = DomainRandomization(renderer, image_shape, + image_paths, num_occlusions) + +for _ in range(100): + sample = processor() + inputs, labels = sample['inputs'], sample['labels'] + show_image((inputs['input_image'] * 255).astype('uint8')) + show_image((labels['label_image'] * 255).astype('uint8')) From 5c1c726da2673af899426a1cce4960cc8d948f5b Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 18 Oct 2021 15:22:04 +0200 Subject: [PATCH 009/101] Start refactoring loss --- examples/pix2pose/loss.py | 68 +++++++++++++++++++++ examples/pix2pose/pix2pose/pix2pose.py | 81 ++++++++++++++++++++++++++ examples/pix2pose/scenes.py | 2 +- examples/pix2pose/train.py | 2 + examples/pix2pose/utils.py | 19 +++--- 5 files changed, 161 insertions(+), 11 deletions(-) create mode 100644 examples/pix2pose/loss.py create mode 100644 examples/pix2pose/pix2pose/pix2pose.py diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py new file mode 100644 index 000000000..8b856689c --- /dev/null +++ b/examples/pix2pose/loss.py @@ -0,0 +1,68 @@ +from tensorflow.keras.losses import Loss +import tensorflow as tf + + +class WeightedRGBMask(Loss): + def __init__(self, beta=3.0, epsilon=1e-4): + super(WeightedRGBMask, self).__init__() + self.beta, self.epsilon = beta, epsilon + + def _extract_masks(RGBA_mask): + # TODO this should be an additional input or extracted from alpha mask + # mask_object = tf.math.ceil(RGB_mask) + # mask_object = tf.math.reduce_max(mask_object, axis=-1, keepdims=True) + # mask_object = tf.repeat(mask_object, repeats=3, axis=-1) + # mask_background = tf.ones(tf.shape(mask_object)) - mask_object + # return mask_object, mask_background + return None + + def _extract_alpha_mask(self, RGBA_mask): + alpha_mask = RGBA_mask[:, :, :, 3:4] + color_mask = RGBA_mask[:, :, :, 0:3] + return color_mask, alpha_mask + + def _compute_masks(self, alpha_mask): + alpha_mask, 1.0 - alpha_mask + + def _unitball_to_normalized(x): + # [-1, 1] -> [0, 1] + return (x + 1) * 0.5 + + def _normalized_to_unitball(x): + # [0, 1] -> [-1, 1] + return (2.0 * x) - 1.0 + + def call(self, RGBA_mask_true, RGB_mask_pred): + # Loss that penalizes more object color mismatch + # Loss that penalizes less background color not being "0" + # RGB_mask_true = self._unitball_to_normalized(RGB_mask_true) + # mask_object, mask_background = self._extract_masks(RGB_mask_true) + # RGB_mask_true = self._normalized_to_unitball(RGB_mask_true) + # RGB_mask_true = RGB_mask_true + self.epsilon + + # Set the background to be all -1 + RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true) + # object_mask, background_mask = self._compute_masks(alpha_mask) + + foreground_true = RGB_mask_true * alpha_mask + foreground_pred = RGB_mask_pred * alpha_mask + background_true = RGB_mask_true * (1.0 - alpha_mask) + background_pred = RGB_mask_true * (1.0 - alpha_mask) + foreground_loss = tf.abs(foreground_true - foreground_pred) + background_loss = tf.abs(background_true - background_pred) + loss = (self.beta * foreground_loss) + background_loss + loss = tf.reduce_mean(loss, axis[1, 2, 3]) + # RGB_mask_true = RGB_mask_true * mask_object + # RGB_mask_true = RGB_mask_true + (mask_background * tf.constant(-1.)) + + # Calculate the difference between the real and predicted images including the mask + # object_error = tf.abs(RGB_mask_pred * mask_object - RGB_mask_true * mask_object) + # background_error = tf.abs(RGB_mask_pred * mask_background - RGB_mask_true * mask_background) + + object_error = tf.reduce_sum(object_error, axis=-1) + background_error = tf.reduce_sum(background_error, axis=-1) + + loss = (self.beta * object_error) + background_error + loss = tf.reduce_mean(loss, axis=[1, 2, 3]) + loss = tf.math.minimum(loss, tf.float32.max) + return loss diff --git a/examples/pix2pose/pix2pose/pix2pose.py b/examples/pix2pose/pix2pose/pix2pose.py new file mode 100644 index 000000000..bc69b2516 --- /dev/null +++ b/examples/pix2pose/pix2pose/pix2pose.py @@ -0,0 +1,81 @@ +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.metrics import Mean + + +class Pix2PoseGAN(Model): + def __init__(self, image_shape, discriminator, generator, latent_dim): + super(Pix2PoseGAN, self).__init__() + self.image_shape = image_shape + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + self.generator_loss_tracker = Mean(name='generator_loss') + self.discriminator_loss_tracker = Mean(name='discriminator_loss') + + @property + def metrics(self): + return [self.generator_loss_tracker, self.discriminator_loss_tracker] + + def compile(self, d_optimizer, g_optimizer, loss_fn): + super(Pix2PoseGAN, self).compile() + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + + def train_step(self, data): + real_images, one_hot_labels = data + + # Add dummy dimensions to the labels so that they can be concatenated with + # the images. This is for the discriminator. + image_one_hot_labels = one_hot_labels[:, :, None, None] + image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size]) + image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes)) + + # Sample random points in the latent space and concatenate the labels. + # This is for the generator. + batch_size = tf.shape(real_images)[0] + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) + + # Decode the noise (guided by labels) to fake images. + generated_images = self.generator(random_vector_labels) + + # Combine them with real images. Note that we are concatenating the labels + # with these images here. + fake_image_and_labels = tf.concat([generated_images, image_one_hot_labels], -1) + real_image_and_labels = tf.concat([real_images, image_one_hot_labels], -1) + combined_images = tf.concat([fake_image_and_labels, real_image_and_labels], axis=0) + + # Assemble labels discriminating real from fake images. + labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0) + + # Train the discriminator. + with tf.GradientTape() as tape: + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + grads = tape.gradient(d_loss, self.discriminator.trainable_weights) + self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights)) + + # Sample random points in the latent space. + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) + + # Assemble labels that say "all real images". + misleading_labels = tf.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + with tf.GradientTape() as tape: + fake_images = self.generator(random_vector_labels) + fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1) + predictions = self.discriminator(fake_image_and_labels) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, self.generator.trainable_weights) + self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) + + # Monitor loss. + self.generator_loss_tracker.update_state(g_loss) + self.discriminator_loss_tracker.update_state(d_loss) + return {'generator_loss': self.generator_loss_tracker.result(), + 'discrminator_loss': self.discriminator_loss_tracker.result()} diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py index 6ce85ab16..f6f99ea08 100644 --- a/examples/pix2pose/scenes.py +++ b/examples/pix2pose/scenes.py @@ -29,7 +29,7 @@ def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0, self._build_scene(path_OBJ, viewport_size, light, y_fov) self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) self.flags_RGBA = RenderFlags.RGBA - self.flags_FLAT = RenderFlags.FLAT + self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT self.epsilon = 0.01 def _build_scene(self, path, size, light, y_fov): diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index b19f9d4bc..494e2d53a 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -33,8 +33,10 @@ processor = DomainRandomization(renderer, image_shape, image_paths, num_occlusions) +""" for _ in range(100): sample = processor() inputs, labels = sample['inputs'], sample['labels'] show_image((inputs['input_image'] * 255).astype('uint8')) show_image((labels['label_image'] * 255).astype('uint8')) +""" diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py index 67f73e312..7aaadf344 100644 --- a/examples/pix2pose/utils.py +++ b/examples/pix2pose/utils.py @@ -2,9 +2,9 @@ from tensorflow.keras.losses import Loss -class Pix2PoseLoss(Loss): +class LossError(Loss): def __init__(self): - super(Pix2PoseLoss, self).__init__() + super(LossError, self).__init__() def call(self, y_true, y_pred): y_true = tf.clip_by_value(tf.math.abs(y_true), tf.float32.min, 1.0) @@ -14,23 +14,23 @@ def call(self, y_true, y_pred): return squared_error -class Pix2PoseColor(Loss): +class LossColor(Loss): def __init__(self, rotation_matrices): - super(Pix2PoseColor, self).__init__() + super(LossColor, self).__init__() self.rotation_matrices = rotation_matrices + def call(self, color_image, predicted_color_image): min_loss = tf.float32.max - # Bring the image in the range between 0 and 1 + # [-1, 1] -> [0, 1] color_image = (color_image + 1) * 0.5 # Calculate masks for the object and the background (they are independent of the rotation) - mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1), - repeats=3, axis=-1) + mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1), repeats=3, axis=-1) mask_background = tf.ones(tf.shape(mask_object)) - mask_object - # Bring the image again in the range between -1 and 1 + # [0, 1] -> [-1, 1] color_image = (color_image * 2) - 1 # Iterate over all possible rotations @@ -43,11 +43,10 @@ def call(self, color_image, predicted_color_image): # Rotate the object real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image) - #real_color_image = tf.where(tf.math.less(real_color_image, 0), tf.ones_like(real_color_image) + real_color_image, real_color_image) # Set the background to be all -1 real_color_image *= mask_object - real_color_image += (mask_background*tf.constant(-1.)) + real_color_image += (mask_background * tf.constant(-1.)) # Get the number of pixels num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3]) From e49f96d4aae0d7b81ca367013eafeae1cdf97fa1 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 18 Oct 2021 15:24:54 +0200 Subject: [PATCH 010/101] Refactor weighted foreground loss --- examples/pix2pose/loss.py | 46 ++------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index 8b856689c..0a5457b25 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -2,48 +2,18 @@ import tensorflow as tf -class WeightedRGBMask(Loss): +class WeightedForeground(Loss): def __init__(self, beta=3.0, epsilon=1e-4): - super(WeightedRGBMask, self).__init__() + super(WeightedForeground, self).__init__() self.beta, self.epsilon = beta, epsilon - def _extract_masks(RGBA_mask): - # TODO this should be an additional input or extracted from alpha mask - # mask_object = tf.math.ceil(RGB_mask) - # mask_object = tf.math.reduce_max(mask_object, axis=-1, keepdims=True) - # mask_object = tf.repeat(mask_object, repeats=3, axis=-1) - # mask_background = tf.ones(tf.shape(mask_object)) - mask_object - # return mask_object, mask_background - return None - def _extract_alpha_mask(self, RGBA_mask): alpha_mask = RGBA_mask[:, :, :, 3:4] color_mask = RGBA_mask[:, :, :, 0:3] return color_mask, alpha_mask - def _compute_masks(self, alpha_mask): - alpha_mask, 1.0 - alpha_mask - - def _unitball_to_normalized(x): - # [-1, 1] -> [0, 1] - return (x + 1) * 0.5 - - def _normalized_to_unitball(x): - # [0, 1] -> [-1, 1] - return (2.0 * x) - 1.0 - def call(self, RGBA_mask_true, RGB_mask_pred): - # Loss that penalizes more object color mismatch - # Loss that penalizes less background color not being "0" - # RGB_mask_true = self._unitball_to_normalized(RGB_mask_true) - # mask_object, mask_background = self._extract_masks(RGB_mask_true) - # RGB_mask_true = self._normalized_to_unitball(RGB_mask_true) - # RGB_mask_true = RGB_mask_true + self.epsilon - - # Set the background to be all -1 RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true) - # object_mask, background_mask = self._compute_masks(alpha_mask) - foreground_true = RGB_mask_true * alpha_mask foreground_pred = RGB_mask_pred * alpha_mask background_true = RGB_mask_true * (1.0 - alpha_mask) @@ -51,18 +21,6 @@ def call(self, RGBA_mask_true, RGB_mask_pred): foreground_loss = tf.abs(foreground_true - foreground_pred) background_loss = tf.abs(background_true - background_pred) loss = (self.beta * foreground_loss) + background_loss - loss = tf.reduce_mean(loss, axis[1, 2, 3]) - # RGB_mask_true = RGB_mask_true * mask_object - # RGB_mask_true = RGB_mask_true + (mask_background * tf.constant(-1.)) - - # Calculate the difference between the real and predicted images including the mask - # object_error = tf.abs(RGB_mask_pred * mask_object - RGB_mask_true * mask_object) - # background_error = tf.abs(RGB_mask_pred * mask_background - RGB_mask_true * mask_background) - - object_error = tf.reduce_sum(object_error, axis=-1) - background_error = tf.reduce_sum(background_error, axis=-1) - - loss = (self.beta * object_error) + background_error loss = tf.reduce_mean(loss, axis=[1, 2, 3]) loss = tf.math.minimum(loss, tf.float32.max) return loss From b71bf3b82305180571d64671b18cd922536eaa4d Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 18 Oct 2021 15:46:02 +0200 Subject: [PATCH 011/101] Change directory name to hold generic models --- examples/pix2pose/{pix2pose => models}/discriminator.py | 0 examples/pix2pose/{pix2pose => models}/generator.py | 0 examples/pix2pose/{pix2pose => models}/pix2pose.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename examples/pix2pose/{pix2pose => models}/discriminator.py (100%) rename examples/pix2pose/{pix2pose => models}/generator.py (100%) rename examples/pix2pose/{pix2pose => models}/pix2pose.py (100%) diff --git a/examples/pix2pose/pix2pose/discriminator.py b/examples/pix2pose/models/discriminator.py similarity index 100% rename from examples/pix2pose/pix2pose/discriminator.py rename to examples/pix2pose/models/discriminator.py diff --git a/examples/pix2pose/pix2pose/generator.py b/examples/pix2pose/models/generator.py similarity index 100% rename from examples/pix2pose/pix2pose/generator.py rename to examples/pix2pose/models/generator.py diff --git a/examples/pix2pose/pix2pose/pix2pose.py b/examples/pix2pose/models/pix2pose.py similarity index 100% rename from examples/pix2pose/pix2pose/pix2pose.py rename to examples/pix2pose/models/pix2pose.py From 3e1186fb78567abcfa91b845e9ad34b4571c891b Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 18 Oct 2021 15:46:32 +0200 Subject: [PATCH 012/101] Fix bug with pipeline incorrect output shape --- examples/pix2pose/loss.py | 4 ++-- examples/pix2pose/pipelines.py | 5 +++-- examples/pix2pose/scenes.py | 2 +- examples/pix2pose/train.py | 29 ++++++++++++++++++++--------- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index 0a5457b25..c98161445 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -3,9 +3,9 @@ class WeightedForeground(Loss): - def __init__(self, beta=3.0, epsilon=1e-4): + def __init__(self, beta=3.0): super(WeightedForeground, self).__init__() - self.beta, self.epsilon = beta, epsilon + self.beta = beta def _extract_alpha_mask(self, RGBA_mask): alpha_mask = RGBA_mask[:, :, :, 3:4] diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index f345a9014..8cacc034f 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -9,10 +9,11 @@ class DomainRandomization(SequentialProcessor): """ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1): super(DomainRandomization, self).__init__() + H, W = image_shape[:2] self.add(pr.Render(renderer)) self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0])) self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1])) self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) - self.add(pr.SequenceWrapper({0: {'input_image': image_shape}}, - {1: {'label_image': image_shape}})) + self.add(pr.SequenceWrapper({0: {'input_image': [H, W, 3]}}, + {1: {'label_image': [H, W, 4]}})) diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py index f6f99ea08..ea03b04ae 100644 --- a/examples/pix2pose/scenes.py +++ b/examples/pix2pose/scenes.py @@ -8,7 +8,7 @@ from coloring import color_object -class PixelMask(): +class PixelMaskRenderer(): """Render-ready scene composed of a single object and a single moving camera. # Arguments diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 494e2d53a..8522bb566 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -1,9 +1,11 @@ import os import glob -from scenes import PixelMask -from pipelines import DomainRandomization +from paz.abstract import GeneratingSequence from paz.backend.image import show_image +from scenes import PixelMaskRenderer +from pipelines import DomainRandomization +from loss import WeightedForeground image_shape = [128, 128, 3] root_path = os.path.expanduser('~') @@ -20,19 +22,28 @@ top_only = False roll = 3.14159 shift = 0.05 +num_steps = 1000 +batch_size = 32 +beta = 3.0 -renderer = PixelMask(path_OBJ, viewport_size, y_fov, distance, - light, top_only, roll, shift) - -# for _ in range(100): -image, alpha, RGB_mask = renderer.render() -show_image(image) -show_image(RGB_mask) +renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, + light, top_only, roll, shift) processor = DomainRandomization(renderer, image_shape, image_paths, num_occlusions) +sequence = GeneratingSequence(processor, batch_size, num_steps) + +weighted_foreground = WeightedForeground(beta) + +# batch = sequence.__getitem__(0) +# for _ in range(100): +# image, alpha, RGB_mask = renderer.render() +# show_image(image) +# show_image(RGB_mask) + + """ for _ in range(100): sample = processor() From 50645dbce87e9c817d055e382307b1428e198681 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 18 Oct 2021 16:01:42 +0200 Subject: [PATCH 013/101] Add a fully convolutional neural network based on KeypointNet2D --- .../models/fully_convolutional_net.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 examples/pix2pose/models/fully_convolutional_net.py diff --git a/examples/pix2pose/models/fully_convolutional_net.py b/examples/pix2pose/models/fully_convolutional_net.py new file mode 100644 index 000000000..57d10e102 --- /dev/null +++ b/examples/pix2pose/models/fully_convolutional_net.py @@ -0,0 +1,34 @@ +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, Conv2D, Activation, LeakyReLU + + +def block(x, filters, dilation_rate, alpha): + x = Conv2D(filters, (3, 3), dilation_rate=dilation_rate, padding='same')(x) + # x = BatchNormalization()(x) + x = LeakyReLU(alpha)(x) + return x + + +def FullyConvolutionalNet(num_classes, input_shape, filters=64, alpha=0.1): + """Fully convolutional network for segmentation. + + # Arguments + num_classes: Int. Number of output channels. + input_shape: List of integers indicating ``[H, W, num_channels]``. + filters: Int. Number of filters used in convolutional layers. + alpha: Float. Alpha parameter of leaky relu. + + # Returns + Keras/tensorflow model + + # References + - [Discovery of Latent 3D Keypoints via End-to-end + Geometric Reasoning](https://arxiv.org/abs/1807.03146) + """ + base = inputs = Input(input_shape, name='image') + for base_arg, rate in enumerate([1, 1, 2, 4, 8, 16, 1, 2, 4, 8, 16, 1]): + base = block(base, filters, (rate, rate), alpha) + x = Conv2D(num_classes, (3, 3), padding='same')(base) + outputs = Activation('softmax', name='masks')(x) + model = Model(inputs, outputs, name='FULLY_CONVOLUTIONAL_NET') + return model From 524ecfc5e8da28d8c2ee0f532756e6fa9735c406 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 21 Oct 2021 08:55:54 +0200 Subject: [PATCH 014/101] Fix bug with predict weighted foreground loss --- examples/pix2pose/loss.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index c98161445..3b57dcc7d 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -14,13 +14,23 @@ def _extract_alpha_mask(self, RGBA_mask): def call(self, RGBA_mask_true, RGB_mask_pred): RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true) + foreground_true = RGB_mask_true * alpha_mask foreground_pred = RGB_mask_pred * alpha_mask - background_true = RGB_mask_true * (1.0 - alpha_mask) - background_pred = RGB_mask_true * (1.0 - alpha_mask) foreground_loss = tf.abs(foreground_true - foreground_pred) + + background_true = RGB_mask_true * (1.0 - alpha_mask) + background_pred = RGB_mask_pred * (1.0 - alpha_mask) background_loss = tf.abs(background_true - background_pred) + loss = (self.beta * foreground_loss) + background_loss + loss = tf.reduce_mean(loss, axis=[1, 2, 3]) - loss = tf.math.minimum(loss, tf.float32.max) + # loss = tf.math.minimum(loss, tf.float32.max) + # loss = tf.losses.mean_squared_error(RGB_mask_true, RGB_mask_pred) return loss + + +def MSE_with_alpha_channel(y_true, y_pred): + squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred) + return tf.reduce_mean(squared_difference, axis=-1) # Note the `axis=-1` From f4a8659796f046d6905aec1ad97c94fb14eb02d3 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 25 Oct 2021 17:27:08 +0200 Subject: [PATCH 015/101] Add backend functions for prediction --- examples/pix2pose/backend.py | 148 +++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 3c2e35454..1831d9f3b 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -1,4 +1,7 @@ import numpy as np +from paz.backend.image.draw import GREEN +from paz.backend.image import draw_line, draw_dot +import cv2 def homogenous_quaternion_to_rotation_matrix(quaternion): @@ -74,3 +77,148 @@ def multiply_quaternions(quaternion_0, quaternion_1): # a = homogenous_quaternion_to_rotation_matrix(quaternion) # quaternion = (1 / np.sqrt(30)) * np.array([2, 3, 4, 1]) # b = inhomogenous_quaternion_to_rotation_matrix(quaternion) + +def build_cube_points3D(width, height, depth): + """ Build the 3D points of a cube in the openCV coordinate system: + 4--------1 + /| /| + / | / | + 3--------2 | + | 8_____|__5 + | / | / + |/ |/ + 7--------6 + + Z (depth) + / + /_____X (width) + | + | + Y (height) + + # Arguments + height: float, height of the 3D box. + width: float, width of the 3D box. + depth: float, width of the 3D box. + + # Returns + Numpy array of shape ``(8, 3)'' corresponding to 3D keypoints of a cube + """ + half_height, half_width, half_depth = height / 2., width / 2., depth / 2. + point_1 = [+half_width, -half_height, +half_depth] + point_2 = [+half_width, -half_height, -half_depth] + point_3 = [-half_width, -half_height, -half_depth] + point_4 = [-half_width, -half_height, +half_depth] + point_5 = [+half_width, +half_height, +half_depth] + point_6 = [+half_width, +half_height, -half_depth] + point_7 = [-half_width, +half_height, -half_depth] + point_8 = [-half_width, +half_height, +half_depth] + return np.array([point_1, point_2, point_3, point_4, + point_5, point_6, point_7, point_8]) + + +def _preprocess_image_points2D(image_points2D): + num_points = len(image_points2D) + image_points2D = image_points2D.reshape(num_points, 1, 2) + image_points2D = image_points2D.astype(np.float64) + image_points2D = np.ascontiguousarray(image_points2D) + return image_points2D + + +def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, + inlier_threshold=5, num_iterations=100): + image_points2D = _preprocess_image_points2D(image_points2D) + success, rotation_vector, translation, inliers = cv2.solvePnPRansac( + object_points3D, image_points2D, camera_intrinsics, None, + flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold, + iterationsCount=num_iterations) + if success is False: + rotation_vector, translation = None, None + return rotation_vector, translation + + +def project_to_image(rotation, translation, points3D, camera_intrinsics): + """Project points3D to image plane using a perspective transformation + """ + if rotation.shape != (3, 3): + raise ValueError('Rotation matrix is not of shape (3, 3)') + if len(translation) != 3: + raise ValueError('Translation vector is not of length 3') + if len(points3D.shape) != 2: + raise ValueError('points3D should have a shape (N, 3)') + if points3D.shape[1] != 3: + raise ValueError('points3D should have a shape (N, 3)') + # TODO missing checks for camera intrinsics conditions + points3D = np.matmul(rotation, points3D.T).T + translation + x, y, z = np.split(points3D, 3, axis=1) + x_focal_length = camera_intrinsics[0, 0] + y_focal_length = camera_intrinsics[1, 1] + x_image_center = camera_intrinsics[0, 2] + y_image_center = camera_intrinsics[1, 2] + x_points = (x_focal_length * (x / z)) + x_image_center + y_points = (y_focal_length * (y / z)) + y_image_center + projected_points2D = np.concatenate([x_points, y_points], axis=1) + return projected_points2D + + +def draw_cube(image, points, color=GREEN, thickness=2, radius=5): + """ Draws a cube in image. + + # Arguments + image: Numpy array of shape ``[H, W, 3]``. + points: List of length 8 having each element a list + of length two indicating ``(y, x)`` openCV coordinates. + color: List of length three indicating RGB color of point. + thickness: Integer indicating the thickness of the line to be drawn. + radius: Integer indicating the radius of corner points to be drawn. + + # Returns + Numpy array with shape ``[H, W, 3]``. Image with cube. + """ + if points.shape != (8, 2): + raise ValueError('Cube points 2D must be of shape (8, 2)') + + # draw bottom + draw_line(image, points[0], points[1], color, thickness) + draw_line(image, points[1], points[2], color, thickness) + draw_line(image, points[3], points[2], color, thickness) + draw_line(image, points[3], points[0], color, thickness) + + # draw top + draw_line(image, points[4], points[5], color, thickness) + draw_line(image, points[6], points[5], color, thickness) + draw_line(image, points[6], points[7], color, thickness) + draw_line(image, points[4], points[7], color, thickness) + + # draw sides + draw_line(image, points[0], points[4], color, thickness) + draw_line(image, points[7], points[3], color, thickness) + draw_line(image, points[5], points[1], color, thickness) + draw_line(image, points[2], points[6], color, thickness) + + # draw X mark on top + draw_line(image, points[4], points[6], color, thickness) + draw_line(image, points[5], points[7], color, thickness) + + # draw dots + [draw_dot(image, np.squeeze(point), color, radius) for point in points] + return image + + +def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0): + lower_than_epsilon = source < threshold + source[lower_than_epsilon] = replacement + return source + + +def arguments_to_image_points2D(row_args, col_args): + row_args = row_args.reshape(-1, 1) + col_args = col_args.reshape(-1, 1) + image_points2D = np.concatenate([col_args, row_args], axis=1) + return image_points2D + + +def rotation_vector_to_rotation_matrix(rotation_vector): + rotation_matrix = np.eye(3) + cv2.Rodrigues(rotation_vector, rotation_matrix) + return rotation_matrix From cab9eb1fcf64267b71dbe01e016b86505186ec26 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 25 Oct 2021 17:27:24 +0200 Subject: [PATCH 016/101] Add small comment on how to get object 3D shape --- examples/pix2pose/coloring.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py index 29259800d..f21e02990 100644 --- a/examples/pix2pose/coloring.py +++ b/examples/pix2pose/coloring.py @@ -47,3 +47,4 @@ def color_object(path): mesh = color_object(path) scene.add(mesh) Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT) + # mesh_extents = np.array([0.184, 0.187, 0.052]) From f8c0dcca08d46f84b5c25edc5dc0efca5a38d974 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 25 Oct 2021 17:27:40 +0200 Subject: [PATCH 017/101] Add partially tested pipeline for full inference --- examples/pix2pose/pipelines.py | 72 ++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 8cacc034f..a4e9b67fe 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -1,7 +1,14 @@ +import numpy as np from paz.abstract import SequentialProcessor from paz.pipelines import RandomizeRenderedImage as RandomizeRender from paz import processors as pr -# from processors import ImageToClosedOneBall +from processors import ( + GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, + ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC, + RotationVectorToRotationMatrix, ReplaceLowerThanThreshold) +from backend import build_cube_points3D, project_to_image, draw_cube +from processors import CropImage +from paz.backend.image import show_image class DomainRandomization(SequentialProcessor): @@ -15,5 +22,64 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1): self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1])) self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) - self.add(pr.SequenceWrapper({0: {'input_image': [H, W, 3]}}, - {1: {'label_image': [H, W, 4]}})) + self.add(pr.SequenceWrapper({0: {'input_1': [H, W, 3]}}, + {1: {'masks': [H, W, 4]}})) + + +class PredictRGBMask(SequentialProcessor): + def __init__(self, model, epsilon=0.15): + super(PredictRGBMask, self).__init__() + self.add(CropImage()) + self.add(pr.ResizeImage((128, 128))) + self.add(pr.NormalizeImage()) + self.add(pr.ExpandDims(0)) + self.add(pr.Predict(model)) + self.add(pr.Squeeze(0)) + self.add(ReplaceLowerThanThreshold(epsilon)) + self.add(pr.DenormalizeImage()) + self.add(pr.CastImage('uint8')) + + +class RGBMaskToObjectPoints3D(SequentialProcessor): + def __init__(self, object_sizes): + super(RGBMaskToObjectPoints3D, self).__init__() + self.add(GetNonZeroValues()) + self.add(ImageToClosedOneBall()) + self.add(Scale(object_sizes / 2.0)) + + +class RGBMaskToImagePoints2D(SequentialProcessor): + def __init__(self): + super(RGBMaskToImagePoints2D, self).__init__() + self.add(GetNonZeroArguments()) + self.add(ArgumentsToImagePoints2D()) + + +class Pix2Pose(pr.Processor): + def __init__(self, model, object_sizes, camera, epsilon=0.15): + self.camera = camera + self.object_sizes = object_sizes + self.predict_RGBMask = PredictRGBMask(model, epsilon) + self.RGBMask_to_object_points3D = RGBMaskToObjectPoints3D( + self.object_sizes) + self.RGBMask_to_image_points2D = RGBMaskToImagePoints2D() + self.predict_pose = SolveChangingObjectPnPRANSAC(camera.intrinsics) + self.vector_to_matrix = RotationVectorToRotationMatrix() + + def call(self, image): + show_image(image, wait=False) + RGBMask = self.predict_RGBMask(image) + print(RGBMask.shape) + return {'image': RGBMask} + points3D = self.RGBMask_to_object_points3D(RGBMask) + points2D = self.RGBMask_to_image_points2D(RGBMask) + rotation_vector, translation = self.predict_pose(points3D, points2D) + rotation_matrix = self.vector_to_matrix(rotation_vector) + translation = np.squeeze(translation, 1) + points3D = build_cube_points3D(*self.object_sizes) + points2D = project_to_image( + rotation_matrix, translation, points3D, self.camera.intrinsics) + points2D = points2D.astype(np.int32) + image = draw_cube(image.astype(float), points2D) + image = image.astype('uint8') + return {'image', image} From 5251fe99b3f5c570e8a74aa9894100482afe1596 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 25 Oct 2021 17:28:01 +0200 Subject: [PATCH 018/101] Add simple processors for pix2pose inference --- examples/pix2pose/processors.py | 121 ++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index 550229cf1..148d601b4 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -1,4 +1,13 @@ +import numpy as np from paz.abstract import Processor +from paz.backend.keypoints import project_points3D +from paz.backend.image import draw_cube + +from backend import build_cube_points3D +from backend import replace_lower_than_threshold +from backend import arguments_to_image_points2D +from backend import solve_PnP_RANSAC +from backend import rotation_vector_to_rotation_matrix class ImageToClosedOneBall(Processor): @@ -19,3 +28,115 @@ def __init__(self): def call(self, image): return (image + 1.0) * 127.5 + + +class DrawBoxes3D(Processor): + def __init__(self, camera, class_to_dimensions, thickness=1): + """Draw boxes 3D of multiple objects + + # Arguments + camera: Instance of ``paz.backend.camera.Camera''. + class_to_dimensions: Dictionary that has as keys the + class names and as value a list [model_height, model_width] + thickness: Int. Thickness of 3D box + """ + super(DrawBoxes3D, self).__init__() + self.camera = camera + self.class_to_dimensions = class_to_dimensions + self.class_to_points = self._build_points(self.class_to_dimensions) + self.thickness = thickness + + def _build_points(self, class_to_dimensions): + class_to_cube3D = {} + print(class_to_dimensions) + for class_name, dimensions in class_to_dimensions.items(): + width, height, depth = dimensions + cube_points3D = build_cube_points3D(width, height, depth) + class_to_cube3D[class_name] = cube_points3D + return class_to_cube3D + + def call(self, image, pose6D): + points3D = self.class_to_points[pose6D.class_name] + points2D = project_points3D(points3D, pose6D, self.camera) + points2D = points2D.astype(np.int32) + # points2D = np.squeeze(points2D) + # return points2D + draw_cube(image, points2D, thickness=self.thickness) + return image + + +class ReplaceLowerThanThreshold(Processor): + def __init__(self, threshold=1e-8, replacement=0.0): + super(ReplaceLowerThanThreshold, self).__init__() + self.threshold = threshold + self.replacement = replacement + + def call(self, image): + return replace_lower_than_threshold( + image, self.threshold, self.replacement) + + +class GetNonZeroValues(Processor): + def __init__(self): + super(GetNonZeroValues, self).__init__() + + def call(self, array): + non_zero_arguments = np.nonzero(array) + return array[non_zero_arguments] + + +class GetNonZeroArguments(Processor): + def __init__(self): + super(GetNonZeroArguments, self).__init__() + + def call(self, array): + non_zero_rows, non_zero_columns = np.nonzero(array) + return non_zero_rows, non_zero_columns + + +class ArgumentsToImagePoints2D(Processor): + def __init__(self): + super(ArgumentsToImagePoints2D, self).__init__() + + def call(self, row_args, col_args): + image_points2D = arguments_to_image_points2D(row_args, col_args) + return image_points2D + + +class Scale(Processor): + def __init__(self, object_sizes): + super(Scale, self).__init__() + self.object_sizes = object_sizes + + def call(self, values): + return self.object_sizes * values + + +class SolveChangingObjectPnPRANSAC(Processor): + def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100): + super(SolveChangingObjectPnPRANSAC, self).__init__() + self.camera_intrinsics = camera_intrinsics + self.inlier_thresh = inlier_thresh + self.num_iterations = num_iterations + + def call(self, object_points3D, image_points2D): + rotation_vector, translation = solve_PnP_RANSAC( + object_points3D, image_points2D, self.camera_intrinsics, + self.inlier_thresh, self.num_iterations) + return rotation_vector, translation + + +class RotationVectorToRotationMatrix(Processor): + def __init__(self): + super(RotationVectorToRotationMatrix, self).__init__() + + def call(self, rotation_vector): + return rotation_vector_to_rotation_matrix(rotation_vector) + + +class CropImage(Processor): + def __init__(self): + super(CropImage, self).__init__() + + def call(self, image): + return image[:128, :128, :] From cb610c02dced36f1b639d22ad6bf9b499f615b9b Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 25 Oct 2021 17:28:23 +0200 Subject: [PATCH 019/101] Changed train script to use UNET-VGG --- examples/pix2pose/train.py | 50 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 8522bb566..1c4b13e30 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -1,11 +1,15 @@ import os import glob +from tensorflow.keras.optimizers import Adam from paz.abstract import GeneratingSequence -from paz.backend.image import show_image +from paz.models.segmentation import UNET_VGG16 +from paz.backend.image import show_image, resize_image +import numpy as np from scenes import PixelMaskRenderer from pipelines import DomainRandomization -from loss import WeightedForeground +from loss import WeightedForeground, MSE_with_alpha_channel +from models.fully_convolutional_net import FullyConvolutionalNet image_shape = [128, 128, 3] root_path = os.path.expanduser('~') @@ -25,6 +29,13 @@ num_steps = 1000 batch_size = 32 beta = 3.0 +alpha = 0.1 +filters = 16 +num_classes = 3 +learning_rate = 0.001 +# steps_per_epoch +max_num_epochs = 10 +steps_per_epoch = num_steps renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, @@ -35,8 +46,22 @@ sequence = GeneratingSequence(processor, batch_size, num_steps) +beta = 3.0 weighted_foreground = WeightedForeground(beta) +# model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha) +model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +# model. +optimizer = Adam(learning_rate) +# model.load_weights('UNET_weights_MSE.hdf5') +model.compile(optimizer, weighted_foreground, metrics=MSE_with_alpha_channel) +model.fit( + sequence, + # steps_per_epoch=args.steps_per_epoch, + epochs=max_num_epochs, + # callbacks=[stop, log, save, plateau, draw], + verbose=1, + workers=0) # batch = sequence.__getitem__(0) # for _ in range(100): # image, alpha, RGB_mask = renderer.render() @@ -44,6 +69,27 @@ # show_image(RGB_mask) +def normalize(image): + return (image * 255.0).astype('uint8') + + +def show_results(): + # image, alpha, pixel_mask_true = renderer.render() + sample = processor() + image = sample['inputs']['input_1'] + pixel_mask_true = sample['labels']['masks'] + image = np.expand_dims(image, 0) + pixel_mask_pred = model.predict(image) + pixel_mask_pred = normalize(np.squeeze(pixel_mask_pred, axis=0)) + image = normalize(np.squeeze(image, axis=0)) + results = np.concatenate( + [image, normalize(pixel_mask_true[..., 0:3]), pixel_mask_pred], axis=1) + H, W = results.shape[:2] + scale = 6 + results = resize_image(results, (scale * W, scale * H)) + show_image(results) + + """ for _ in range(100): sample = processor() From 8876c11d9d388cd5a4ff63e92c483fd917c83b24 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 25 Oct 2021 17:28:53 +0200 Subject: [PATCH 020/101] Add structure with video player --- examples/pix2pose/demo.py | 98 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 examples/pix2pose/demo.py diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py new file mode 100644 index 000000000..fd4a2cd3f --- /dev/null +++ b/examples/pix2pose/demo.py @@ -0,0 +1,98 @@ +import os +import cv2 +import numpy as np +from paz.models import UNET_VGG16 +from paz.backend.image import show_image +from paz import processors as pr +from paz.backend.camera import Camera +from scenes import PixelMaskRenderer +from processors import DrawBoxes3D +from backend import inhomogenous_quaternion_to_rotation_matrix as quaternion_to_rotation_matrix +# from backend import homogenous_quaternion_to_rotation_matrix +from backend import solve_PnP_RANSAC +from backend import project_to_image +from backend import build_cube_points3D +from backend import draw_cube +from pipelines import Pix2Pose +from paz.backend.camera import VideoPlayer + + +root_path = os.path.expanduser('~') +path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' +path_OBJ = os.path.join(root_path, path_OBJ) +image_shape = (128, 128, 3) +viewport_size = image_shape[:2] +num_classes = 3 +y_fov = 3.14159 / 4.0 +distance = [0.3, 0.5] +light = [1.0, 30] +top_only = False +roll = 3.14159 +shift = 0.05 + + +model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +model.load_weights('UNET_weights_epochs-10_beta-3.hdf5') +renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, + light, top_only, roll, shift) + + +camera = Camera(device_id=4) +focal_length = 179 # 128 +image_center = (128 / 2.0, 128 / 2.0) +# building camera parameters +camera.distortion = np.zeros((4)) +camera.intrinsics = np.array([[focal_length, 0, image_center[0]], + [0, focal_length, image_center[1]], + [0, 0, 1]]) + +object_sizes = np.array([0.184, 0.187, 0.052]) +# object_size = np.array([0.20, 0.20, 0.08]) +# camera.intrinsics = renderer.camera.camera.get_projection_matrix()[:3, :3] +epsilon = 0.15 +# object_keypoints3D = renderer.mesh.mesh.primitives[0].positions +# solve_PNP = pr.SolvePNP(object_keypoints3D, camera) +pipeline = Pix2Pose(model, object_sizes, camera, epsilon) +# image_size = (640, 480) +image_size = (128, 128) +player = VideoPlayer(image_size, pipeline, camera) +player.run() +""" +def show_results(): + image, alpha, RGB_mask_true = renderer.render() + normalized_image = np.expand_dims(image / 255.0, 0) + RGB_mask_pred = model.predict(normalized_image) + RGB_mask_pred = np.squeeze(RGB_mask_pred, 0) + RGB_mask_pred[RGB_mask_pred < epsilon] = 0.0 + show_image((RGB_mask_pred * 255.0).astype('uint8')) + + mask_pred = np.sum(RGB_mask_pred, axis=2) + non_zero_arguments = np.nonzero(mask_pred) + RGB_mask_pred = RGB_mask_pred[non_zero_arguments] + RGB_mask_pred = (2.0 * RGB_mask_pred) - 1.0 + # this RGB mask scaling is good since you are scaling in RGB space + object_points3D = (object_size / 2.0) * RGB_mask_pred + num_points = len(object_points3D) + + row_args, col_args = non_zero_arguments + row_args = row_args.reshape(-1, 1) + col_args = col_args.reshape(-1, 1) + image_points2D = np.concatenate([col_args, row_args], axis=1) + image_points2D = image_points2D.reshape(num_points, 1, 2) + image_points2D = image_points2D.astype(np.float64) + image_points2D = np.ascontiguousarray(image_points2D) + + rotation_vector, translation = solve_PnP_RANSAC( + object_points3D, image_points2D, camera.intrinsics) + rotation_matrix = np.eye(3) + cv2.Rodrigues(rotation_vector, rotation_matrix) + translation = np.squeeze(translation, 1) + points3D = build_cube_points3D(0.184, 0.187, 0.052) + points2D = project_to_image( + rotation_matrix, translation, points3D, camera.intrinsics) + points2D = points2D.astype(np.int32) + image = draw_cube(image.astype(float), points2D) + image = image.astype('uint8') + show_image(image) +""" + From 66e8ac507712932d53f5792888c76769b60fefe2 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 25 Oct 2021 17:29:45 +0200 Subject: [PATCH 021/101] Add simple ICP computation --- examples/pix2pose/icp.py | 102 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 examples/pix2pose/icp.py diff --git a/examples/pix2pose/icp.py b/examples/pix2pose/icp.py new file mode 100644 index 000000000..61ca4352c --- /dev/null +++ b/examples/pix2pose/icp.py @@ -0,0 +1,102 @@ +import numpy as np +from sklearn.neighbors import NearestNeighbors + + +def calculate_affine_matrix(pointcloud_A, pointcloud_B): + '''Calculates affine transform with the best least-squares fit transforming + keypoints A to keypoints B. + + # Argument: + pointcloud_A: Array of shape (num_keypoints, 3). + pointcloud_B: Array of shape (num_keypoints, 3). + + # Returns: + T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B + R: mxm rotation matrix + t: mx1 translation vector + ''' + assert pointcloud_A.shape == pointcloud_B.shape + # translate points to their centroids + centroid3D_A = np.mean(pointcloud_A, axis=0) + centroid3D_B = np.mean(pointcloud_B, axis=0) + centered_keypoints3D_A = pointcloud_A - centroid3D_A + centered_keypoints3D_B = pointcloud_B - centroid3D_B + + covariance = np.dot(centered_keypoints3D_A.T, centered_keypoints3D_B) + U, S, Vt = np.linalg.svd(covariance) + # compute rotation matrix + rotation_matrix = np.dot(Vt.T, U.T) + + # resolve special reflection case + if np.linalg.det(rotation_matrix) < 0: + Vt[3 - 1, :] *= -1 + rotation_matrix = np.dot(Vt.T, U.T) + + # compute translation + translation3D = centroid3D_B.T - np.dot(rotation_matrix, centroid3D_A.T) + + affine_matrix = to_affine_matrix(rotation_matrix, translation3D) + return affine_matrix + + +def to_affine_matrix(rotation_matrix, translation_vector): + translation_vector = translation_vector.reshape(3, 1) + affine = np.concatenate([rotation_matrix, translation_vector], axis=0) + affine = np.concatenate([affine, np.array([[0.0, 0.0, 0.0, 1.0]])], axis=1) + return affine + + +def nearest_neighbor(pointcloud_A, pointcloud_B): + '''Find the nearest (Euclidean) neighbor in dst for each point in src + # Arguments: + src: Nxm array of points + dst: Nxm array of points + # Returns: + distances: Euclidean distances of the nearest neighbor + indices: dst indices of the nearest neighbor + ''' + assert pointcloud_A.shape == pointcloud_B.shape + model = NearestNeighbors(n_neighbors=1) + model.fit(pointcloud_B) + distances, indices = model.kneighbors(pointcloud_A, return_distance=True) + return distances.ravel(), indices.ravel() + + +def add_homogenous_coordinate(keypoints3D): + num_keypoints = len(keypoints3D) + ones = np.ones_like(num_keypoints).reshape(-1, 1) + homogenous_keypoints3D = np.concatenate([keypoints3D, ones], axis=1) + return homogenous_keypoints3D + + +def iterative_closes_point(pointcloud_A, pointcloud_B, initial_pose=None, + max_iterations=20, tolerance=1e-3): + '''Find best least square fit that transforms pointcloud A to pointcloud B. + Input: + A: Nxm numpy array of source mD points + B: Nxm numpy array of destination mD point + initial_pose: (m+1)x(m+1) homogeneous transformation + max_iterations: exit algorithm after max_iterations + tolerance: convergence criteria + Output: + T: final homogeneous transformation that maps A on to B + distances: Euclidean distances (errors) of the nearest neighbor + i: number of iterations to converge + ''' + assert pointcloud_A.shape == pointcloud_B.shape + pointcloud_A = add_homogenous_coordinate(pointcloud_A) + pointcloud_B = add_homogenous_coordinate(pointcloud_B) + pointcloud_A_0 = np.copy(pointcloud_A) + if initial_pose is not None: + pointcloud_A = np.dot(initial_pose, pointcloud_A.T).T + previous_error = 0 + for iteration_arg in range(max_iterations): + distances, indices = nearest_neighbor(pointcloud_A, pointcloud_B) + affine_matrix = calculate_affine_matrix(pointcloud_A, pointcloud_B) + pointcloud_A = np.dot(affine_matrix, pointcloud_A.T).T + mean_error = np.mean(distances) + if np.abs(previous_error - mean_error) < tolerance: + break + previous_error = mean_error + affine_transform = calculate_affine_matrix(pointcloud_A_0, pointcloud_A) + return affine_transform, distances, iteration_arg From bf385b40c72c29f3bdb7e317f814afcb1ff80e68 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 27 Oct 2021 16:53:10 +0200 Subject: [PATCH 022/101] Add working demo --- examples/pix2pose/backend.py | 71 ++++++++++++++- examples/pix2pose/calibrate_camera.py | 60 +++++++++++++ examples/pix2pose/demo.py | 52 +++++------ examples/pix2pose/pipelines.py | 122 ++++++++++++++++++++++---- examples/pix2pose/processors.py | 42 ++++++++- 5 files changed, 298 insertions(+), 49 deletions(-) create mode 100644 examples/pix2pose/calibrate_camera.py diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 1831d9f3b..a172b7461 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -1,6 +1,6 @@ import numpy as np from paz.backend.image.draw import GREEN -from paz.backend.image import draw_line, draw_dot +from paz.backend.image import draw_line, draw_dot, draw_circle import cv2 @@ -55,6 +55,14 @@ def inhomogenous_quaternion_to_rotation_matrix(q): # return np.squeeze(rotation_matrix) +def quaternion_to_rotation_matrix(quaternion, homogenous=True): + if homogenous: + matrix = homogenous_quaternion_to_rotation_matrix(quaternion) + else: + matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion) + return matrix + + def multiply_quaternions(quaternion_0, quaternion_1): """Multiplies two quaternions. @@ -137,6 +145,33 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, return rotation_vector, translation +def apply_affine_transform(affine_matrix, vectors): + return np.matmul(affine_matrix, vectors.T).T + + +def project_to_image2(affine_matrix, points3D, camera_intrinsics): + """Project points3D to image plane using a perspective transformation + """ + if affine_matrix.shape != (4, 4): + raise ValueError('Affine matrix is not of shape (4, 4)') + if len(points3D.shape) != 2: + raise ValueError('points3D should have a shape (N, 3)') + if points3D.shape[1] != 3: + raise ValueError('points3D should have a shape (N, 3)') + # TODO missing checks for camera intrinsics conditions + points3D = apply_affine_transform(affine_matrix, points3D) + # points3D = np.matmul(rotation, points3D.T).T + translation + x, y, z = np.split(points3D, 3, axis=1) + x_focal_length = camera_intrinsics[0, 0] + y_focal_length = camera_intrinsics[1, 1] + x_image_center = camera_intrinsics[0, 2] + y_image_center = camera_intrinsics[1, 2] + x_points = (x_focal_length * (x / z)) + x_image_center + y_points = (y_focal_length * (y / z)) + y_image_center + projected_points2D = np.concatenate([x_points, y_points], axis=1) + return projected_points2D + + def project_to_image(rotation, translation, points3D, camera_intrinsics): """Project points3D to image plane using a perspective transformation """ @@ -222,3 +257,37 @@ def rotation_vector_to_rotation_matrix(rotation_vector): rotation_matrix = np.eye(3) cv2.Rodrigues(rotation_vector, rotation_matrix) return rotation_matrix + + +def draw_keypoints(image, keypoints, colors, radius): + for keypoint, color in zip(keypoints, colors): + R, G, B = color + color = (int(R), int(G), int(B)) + draw_circle(image, keypoint.astype('int'), color, radius) + return image + + +def draw_mask(image, keypoints, colors, radius): + for keypoint, color in zip(keypoints, colors): + R, G, B = color + color = (int(R), int(G), int(B)) + draw_circle(image, keypoint.astype('int'), color, radius) + return image + + +def rotation_matrix_to_quaternion(rotation_matrix): + qw = np.sqrt(1 + np.trace(rotation_matrix)) / 2.0 + + m21 = rotation_matrix[2, 1] + m12 = rotation_matrix[1, 2] + + m02 = rotation_matrix[0, 2] + m20 = rotation_matrix[2, 0] + + m10 = rotation_matrix[1, 0] + m01 = rotation_matrix[0, 1] + + qx = (m21 - m12) / (4.0 * qw) + qy = (m02 - m20) / (4.0 * qw) + qz = (m10 - m01) / (4.0 * qw) + return qx, qy, qz, qw diff --git a/examples/pix2pose/calibrate_camera.py b/examples/pix2pose/calibrate_camera.py new file mode 100644 index 000000000..bfc7a3e40 --- /dev/null +++ b/examples/pix2pose/calibrate_camera.py @@ -0,0 +1,60 @@ +from paz.backend.image import show_image +import numpy as np +import cv2 + + +# def calibrate_camera(square_size, pattern_shape=(5, 5)): + +pattern_size = (5, 7) +square_size_mm = 35 +window_size, zero_zone = (11, 11), (-1, -1) + +# constructing default 3D points +point3D = np.zeros((np.prod(pattern_size), 3), np.float32) +xy_coordinates = np.mgrid[0:pattern_size[0], 0:pattern_size[1]].T +point3D[:, :2] = xy_coordinates.reshape(-1, 2) * square_size_mm + +camera = cv2.VideoCapture(0) +cv2.namedWindow('camera_window') +# 2D points in image plane, 3D points in real world space, images, counter +image_points, points3D, images, image_counter = [], [], [], 0 +criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) +print('Press `Escape` to quit') +while True: + + frame = camera.read()[1] + image_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + show_image(image_gray, wait=False) + chessboard_found, corners = cv2.findChessboardCorners( + image_gray, pattern_size, None) + print(chessboard_found) + if chessboard_found: + points3D.append(point3D) + refined_corners = cv2.cornerSubPix( + image_gray, corners, window_size, zero_zone, criteria) + image_points.append(refined_corners) + frame = cv2.drawChessboardCorners( + frame, pattern_size, refined_corners, chessboard_found) + show_image(frame) + image_counter = image_counter + 1 + + cv2.imshow('camera_window', frame) + keystroke = cv2.waitKey(1) + + if keystroke % 256 == 27: + print('`Escape` key hit, closing...') + break + +camera.release() +cv2.destroyAllWindows() + +ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera( + points3D, image_points, image_gray.shape[::-1], None, None) +print(ret, mtx, dist, rvecs, tvecs) +print(mtx) +# fx = 659.10 +# fy = 668.76 +# cx = 276.76 +# cy = 252.35 +# ret = 0.6814 +# dist = [9.86e-3, 1.41, 1.08e-2, 2.431e-3, -7.05] diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index fd4a2cd3f..48928abc1 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -2,61 +2,58 @@ import cv2 import numpy as np from paz.models import UNET_VGG16 -from paz.backend.image import show_image +from paz.backend.image import show_image, load_image from paz import processors as pr from paz.backend.camera import Camera from scenes import PixelMaskRenderer from processors import DrawBoxes3D -from backend import inhomogenous_quaternion_to_rotation_matrix as quaternion_to_rotation_matrix # from backend import homogenous_quaternion_to_rotation_matrix from backend import solve_PnP_RANSAC from backend import project_to_image from backend import build_cube_points3D from backend import draw_cube from pipelines import Pix2Pose +from pipelines import EstimatePoseMasks from paz.backend.camera import VideoPlayer +from paz.applications import SSD300FAT -root_path = os.path.expanduser('~') -path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' -path_OBJ = os.path.join(root_path, path_OBJ) image_shape = (128, 128, 3) -viewport_size = image_shape[:2] num_classes = 3 -y_fov = 3.14159 / 4.0 -distance = [0.3, 0.5] -light = [1.0, 30] -top_only = False -roll = 3.14159 -shift = 0.05 - model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) model.load_weights('UNET_weights_epochs-10_beta-3.hdf5') -renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, - light, top_only, roll, shift) +# approximating intrinsic camera parameters +camera = Camera(device_id=0) +# camera.start() +# image_size = camera.read().shape[0:2] +# camera.stop() -camera = Camera(device_id=4) -focal_length = 179 # 128 -image_center = (128 / 2.0, 128 / 2.0) -# building camera parameters +image = load_image('test_image.jpg') +image_size = image.shape[0:2] +focal_length = image_size[1] +image_center = (image_size[1] / 2.0, image_size[0] / 2.0) camera.distortion = np.zeros((4)) camera.intrinsics = np.array([[focal_length, 0, image_center[0]], [0, focal_length, image_center[1]], [0, 0, 1]]) + object_sizes = np.array([0.184, 0.187, 0.052]) -# object_size = np.array([0.20, 0.20, 0.08]) -# camera.intrinsics = renderer.camera.camera.get_projection_matrix()[:3, :3] epsilon = 0.15 -# object_keypoints3D = renderer.mesh.mesh.primitives[0].positions -# solve_PNP = pr.SolvePNP(object_keypoints3D, camera) -pipeline = Pix2Pose(model, object_sizes, camera, epsilon) +detect = SSD300FAT(draw=False) +offsets = [0.1, 0.1] +estimate_keypoints = Pix2Pose(model, object_sizes) +pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None) + +results = pipeline(image) +predicted_image = results['image'] +show_image(predicted_image) + # image_size = (640, 480) -image_size = (128, 128) -player = VideoPlayer(image_size, pipeline, camera) -player.run() +# player = VideoPlayer(image_size, pipeline, camera) +# player.run() """ def show_results(): image, alpha, RGB_mask_true = renderer.render() @@ -95,4 +92,3 @@ def show_results(): image = image.astype('uint8') show_image(image) """ - diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index a4e9b67fe..050ff0496 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -1,14 +1,17 @@ import numpy as np -from paz.abstract import SequentialProcessor +from paz.abstract import SequentialProcessor, Processor from paz.pipelines import RandomizeRenderedImage as RandomizeRender +from paz.abstract.messages import Pose6D from paz import processors as pr from processors import ( GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC, RotationVectorToRotationMatrix, ReplaceLowerThanThreshold) -from backend import build_cube_points3D, project_to_image, draw_cube -from processors import CropImage +from backend import build_cube_points3D, project_to_image, draw_cube, draw_keypoints, project_to_image2 +from processors import CropImage, UnwrapDictionary, ToAffineMatrix, RotationVectorToQuaternion from paz.backend.image import show_image +from backend import solve_PnP_RANSAC, rotation_matrix_to_quaternion +from backend import rotation_vector_to_rotation_matrix class DomainRandomization(SequentialProcessor): @@ -29,8 +32,7 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1): class PredictRGBMask(SequentialProcessor): def __init__(self, model, epsilon=0.15): super(PredictRGBMask, self).__init__() - self.add(CropImage()) - self.add(pr.ResizeImage((128, 128))) + # self.add(pr.ResizeImage((128, 128))) self.add(pr.NormalizeImage()) self.add(pr.ExpandDims(0)) self.add(pr.Predict(model)) @@ -55,24 +57,38 @@ def __init__(self): self.add(ArgumentsToImagePoints2D()) +class SolveChangingObjectPnP(SequentialProcessor): + def __init__(self, camera_intrinsics): + super(SolveChangingObjectPnP, self).__init__() + self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics)) + self.add(pr.ControlMap(RotationVectorToRotationMatrix())) + # self.add(pr.ControlMap(RotationVectorToQuaternion())) + self.add(pr.ControlMap(pr.Squeeze(1), [1], [1])) + # self.add(ToAffineMatrix()) + + class Pix2Pose(pr.Processor): - def __init__(self, model, object_sizes, camera, epsilon=0.15): - self.camera = camera + def __init__(self, model, object_sizes, epsilon=0.15): self.object_sizes = object_sizes + H, W = model.input_shape[1:3] + self.resize = pr.ResizeImage((W, H)) self.predict_RGBMask = PredictRGBMask(model, epsilon) - self.RGBMask_to_object_points3D = RGBMaskToObjectPoints3D( - self.object_sizes) - self.RGBMask_to_image_points2D = RGBMaskToImagePoints2D() - self.predict_pose = SolveChangingObjectPnPRANSAC(camera.intrinsics) - self.vector_to_matrix = RotationVectorToRotationMatrix() + self.RGBMask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) + self.RGBMask_to_points2D = RGBMaskToImagePoints2D() + self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask']) def call(self, image): - show_image(image, wait=False) - RGBMask = self.predict_RGBMask(image) - print(RGBMask.shape) - return {'image': RGBMask} - points3D = self.RGBMask_to_object_points3D(RGBMask) - points2D = self.RGBMask_to_image_points2D(RGBMask) + # show_image(image, wait=False) + print(image.shape) + image = self.resize(image) + print(image.shape) + RGB_mask = self.predict_RGBMask(image) + print(RGB_mask.shape) + points3D = self.RGBMask_to_points3D(RGB_mask) + # points3D = points3D * 100 + points2D = self.RGBMask_to_points2D(RGB_mask) + return self.wrap(points3D, points2D, RGB_mask) + """ rotation_vector, translation = self.predict_pose(points3D, points2D) rotation_matrix = self.vector_to_matrix(rotation_vector) translation = np.squeeze(translation, 1) @@ -83,3 +99,73 @@ def call(self, image): image = draw_cube(image.astype(float), points2D) image = image.astype('uint8') return {'image', image} + """ + + +class EstimatePoseMasks(Processor): + def __init__(self, detect, estimate_keypoints, camera, offsets, + class_to_dimensions, radius=3, thickness=1): + """Pose estimation pipeline using keypoints. + """ + super(EstimatePoseMasks, self).__init__() + self.detect = detect + self.camera = camera + self.estimate_keypoints = estimate_keypoints + self.square = SequentialProcessor( + [pr.SquareBoxes2D(), pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.unwrap = UnwrapDictionary(['points3D', 'points2D', 'RGB_mask']) + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'RGB_mask', 'poses6D']) + self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + self.denormalize_keypoints = pr.DenormalizeKeypoints() + self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07) + + def call(self, image): + boxes2D = self.detect(image)['boxes2D'] + boxes2D = self.square(boxes2D) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, RGB_masks, cubes_points2D = [], [], [] + for cropped_image, box2D in zip(cropped_images, boxes2D): + if box2D.class_name != '035_power_drill': + continue + keypoints = self.estimate_keypoints(cropped_image) + points3D, points2D, RGB_mask = self.unwrap(keypoints) + # Change keypoints coordinates + points2D = (2 * points2D / 128.0) - 1.0 + x, y = np.split(points2D, 2, axis=1) + points2D = np.concatenate([x, -y], axis=1) + points2D = self.denormalize_keypoints(points2D, cropped_image) + points2D = self.change_coordinates(points2D, box2D) + # ---------------------------- + + rotation, translation = self.predict_pose(points3D, points2D) + # quaternion = rotation_matrix_to_quaternion(rotation) + # pose6D = Pose6D(quaternion, translation, box2D.class_name) + cube_points2D = project_to_image( + rotation, translation, self.cube_points3D, + self.camera.intrinsics) + cube_points2D = cube_points2D.astype(np.int32) + + # draw mask on image + object_sizes = np.array([0.184, 0.187, 0.052]) + colors = points3D / (object_sizes / 2.0) + colors = (colors + 1.0) * 127.5 + colors = colors.astype('int') + print(colors.min(), colors.max()) + draw_keypoints(image, points2D, colors, radius=3) + # ----------------------------------- + poses6D.append(None), RGB_masks.append(RGB_mask) + cubes_points2D.append(cube_points2D) + + image = self.draw_boxes2D(image, boxes2D) + # draw cube + image = image.astype(float) + for cube_points2D in cubes_points2D: + image = draw_cube(image, cube_points2D) + image = image.astype('uint8') + + return self.wrap(image, boxes2D, RGB_masks, poses6D) diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index 148d601b4..d0dff4b85 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -2,6 +2,7 @@ from paz.abstract import Processor from paz.backend.keypoints import project_points3D from paz.backend.image import draw_cube +from paz.backend.quaternion import rotation_vector_to_quaternion from backend import build_cube_points3D from backend import replace_lower_than_threshold @@ -81,7 +82,8 @@ def __init__(self): super(GetNonZeroValues, self).__init__() def call(self, array): - non_zero_arguments = np.nonzero(array) + channel_wise_sum = np.sum(array, axis=2) + non_zero_arguments = np.nonzero(channel_wise_sum) return array[non_zero_arguments] @@ -90,7 +92,8 @@ def __init__(self): super(GetNonZeroArguments, self).__init__() def call(self, array): - non_zero_rows, non_zero_columns = np.nonzero(array) + channel_wise_sum = np.sum(array, axis=2) + non_zero_rows, non_zero_columns = np.nonzero(channel_wise_sum) return non_zero_rows, non_zero_columns @@ -140,3 +143,38 @@ def __init__(self): def call(self, image): return image[:128, :128, :] + + +class UnwrapDictionary(Processor): + def __init__(self, keys): + super(UnwrapDictionary, self).__init__() + self.keys = keys + + def call(self, dictionary): + return [dictionary[key] for key in self.keys] + + +class ToAffineMatrix(Processor): + def __init__(self): + super(ToAffineMatrix, self).__init__() + + def call(self, rotation_matrix, translation): + if len(translation) != 3: + raise ValueError('Translation should be of lenght 3') + if rotation_matrix.shape != (3, 3): + raise ValueError('Rotation matrix should be of shape (3, 3)') + translation = translation.reshape(3, 1) + affine_matrix = np.concatenate([rotation_matrix, translation], axis=1) + affine_row = np.array([[0.0, 0.0, 0.0, 1.0]]) + affine_matrix = np.concatenate([affine_matrix, affine_row], axis=0) + print(affine_matrix.shape) + return affine_matrix + + +class RotationVectorToQuaternion(Processor): + def __init__(self): + super(RotationVectorToQuaternion, self).__init__() + + def call(self, rotation_vector): + quaternion = rotation_vector_to_quaternion(rotation_vector) + return quaternion From 1906c4a403ea4583936d0a8e6d452ea40002a87b Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 28 Oct 2021 12:16:02 +0200 Subject: [PATCH 023/101] Refactor main pipeline --- examples/pix2pose/backend.py | 111 +++++++++++++++++++++++++++++++- examples/pix2pose/demo.py | 1 + examples/pix2pose/pipelines.py | 90 +++++++++++++------------- examples/pix2pose/processors.py | 22 ++++++- 4 files changed, 177 insertions(+), 47 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index a172b7461..a90c06597 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -1,6 +1,8 @@ +from collections import Iterable import numpy as np from paz.backend.image.draw import GREEN from paz.backend.image import draw_line, draw_dot, draw_circle +from paz.abstract import Pose6D import cv2 @@ -267,11 +269,14 @@ def draw_keypoints(image, keypoints, colors, radius): return image -def draw_mask(image, keypoints, colors, radius): +def draw_maski(image, keypoints, colors, radius=5): for keypoint, color in zip(keypoints, colors): R, G, B = color color = (int(R), int(G), int(B)) - draw_circle(image, keypoint.astype('int'), color, radius) + x, y = keypoint + x = int(x) + y = int(y) + draw_dot(image, (x, y), color, radius) return image @@ -291,3 +296,105 @@ def rotation_matrix_to_quaternion(rotation_matrix): qy = (m02 - m20) / (4.0 * qw) qz = (m10 - m01) / (4.0 * qw) return qx, qy, qz, qw + + +def to_pose6D(quaternion, translation, class_name=None): + return Pose6D(quaternion, translation, class_name) + + +class MultiList(Iterable): + def __init__(self, num_lists): + self.num_lists = num_lists + self.lists = [[] for list_arg in range(self.num_lists)] + + def append(self, *args): + if len(args) != self.num_lists: + raise ValueError('Arguments should have equal lenght as num_lists') + for arg, arg_list in zip(args, self.lists): + arg_list.append(arg) + + def __iter__(self): + return iter(self.lists) + + +def draw_mask2(image, points3D, object_sizes): + if len(object_sizes) != 3: + raise ValueError('Object sizes must contain 3 values') + colors = points3D / (object_sizes / 2.0) + colors = (colors + 1.0) * 127.5 + colors = colors.astype('int') + # draw_keypoints(image, points2D, colors, radius=3) + + +def normalize_points2D(points2D, height, width): + """Transform points2D in image coordinates to normalized coordinates. + + # Arguments + points2D: Numpy array of shape ``(num_keypoints, 2)``. + height: Int. Height of the image + width: Int. Width of the image + + # Returns + Numpy array of shape ``(num_keypoints, 2)``. + """ + image_shape = np.array([width, height]) + points2D = points2D / image_shape # [0, W], [0, H] -> [0, 1], [0, 1] + points2D = 2.0 * points2D # [0, 1], [0, 1] -> [0, 2], [0, 2] + points2D = points2D - 1.0 # [0, 2], [0, 2] -> [-1, 1], [-1, 1] + return points2D + + +def denormalize_points2D(points2D, height, width): + image_shape = np.array([width, height]) + points2D = points2D + 1.0 # [-1, 1], [-1, 1] -> [0, 2], [0, 2] + points2D = points2D / 2.0 # [0 , 2], [0 , 2] -> [0, 1], [0, 1] + points2D = points2D * image_shape # [0 , 1], [0 , 1] -> [0, W], [0, H] + return points2D + + +def flip_y_axis(points2D): + x, y = np.split(points2D, 2, axis=1) + points2D = np.concatenate([x, -y], axis=1) + return points2D + + +def denormalize_keypoints2(keypoints, height, width): + # [-1, 1] -> [-127.5, 127.5] -> [0, 255] + half_sizes = np.array([width, height]) / 2.0 + return (half_sizes * keypoints) + half_sizes + + +def translate_points2D(points2D, translation): + if len(points2D.shape) != 2: + raise ValueError('Invalid points2D shape') + if len(translation) != 2: + raise ValueError('Invalid translation lenght') + num_keypoints = len(points2D) + height, width = translation + x_translation = np.full((num_keypoints, 1), width) + y_translation = np.full((num_keypoints, 1), height) + translation = np.concatenate([x_translation, y_translation], axis=1) + translated_points2D = translation - points2D + return translated_points2D + + +def denormalize_keypoints(keypoints, height, width): + """Transform normalized keypoint coordinates into image coordinates + + # Arguments + keypoints: Numpy array of shape ``(num_keypoints, 2)``. + height: Int. Height of the image + width: Int. Width of the image + + # Returns + Numpy array of shape ``(num_keypoints, 2)``. + """ + for keypoint_arg, keypoint in enumerate(keypoints): + x, y = keypoint[:2] + # transform key-point coordinates to image coordinates + x = (min(max(x, -1), 1) * width / 2 + width / 2) - 0.5 + # flip since the image coordinates for y are flipped + y = height - 0.5 - (min(max(y, -1), 1) * height / 2 + height / 2) + x, y = int(round(x)), int(round(y)) + keypoints[keypoint_arg][:2] = [x, y] + return keypoints diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 48928abc1..1b66d8941 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -41,6 +41,7 @@ object_sizes = np.array([0.184, 0.187, 0.052]) +# epsilon = 0.005 epsilon = 0.15 detect = SSD300FAT(draw=False) offsets = [0.1, 0.1] diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 050ff0496..088bcefe8 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -6,12 +6,14 @@ from processors import ( GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC, - RotationVectorToRotationMatrix, ReplaceLowerThanThreshold) -from backend import build_cube_points3D, project_to_image, draw_cube, draw_keypoints, project_to_image2 -from processors import CropImage, UnwrapDictionary, ToAffineMatrix, RotationVectorToQuaternion -from paz.backend.image import show_image -from backend import solve_PnP_RANSAC, rotation_matrix_to_quaternion -from backend import rotation_vector_to_rotation_matrix + ReplaceLowerThanThreshold) +from backend import (build_cube_points3D, project_to_image, draw_cube, + draw_keypoints) +from processors import UnwrapDictionary, RotationVectorToQuaternion +# from paz.backend.image import show_image +from backend import quaternion_to_rotation_matrix, draw_maski +from backend import normalize_points2D, flip_y_axis +from backend import denormalize_points2D class DomainRandomization(SequentialProcessor): @@ -61,9 +63,10 @@ class SolveChangingObjectPnP(SequentialProcessor): def __init__(self, camera_intrinsics): super(SolveChangingObjectPnP, self).__init__() self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics)) - self.add(pr.ControlMap(RotationVectorToRotationMatrix())) - # self.add(pr.ControlMap(RotationVectorToQuaternion())) + # self.add(pr.ControlMap(RotationVectorToRotationMatrix())) + self.add(pr.ControlMap(RotationVectorToQuaternion())) self.add(pr.ControlMap(pr.Squeeze(1), [1], [1])) + # self.add(ToPose6D()) # self.add(ToAffineMatrix()) @@ -78,14 +81,9 @@ def __init__(self, model, object_sizes, epsilon=0.15): self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask']) def call(self, image): - # show_image(image, wait=False) - print(image.shape) image = self.resize(image) - print(image.shape) RGB_mask = self.predict_RGBMask(image) - print(RGB_mask.shape) points3D = self.RGBMask_to_points3D(RGB_mask) - # points3D = points3D * 100 points2D = self.RGBMask_to_points2D(RGB_mask) return self.wrap(points3D, points2D, RGB_mask) """ @@ -117,10 +115,9 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, self.crop = pr.CropBoxes2D() self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.unwrap = UnwrapDictionary(['points3D', 'points2D', 'RGB_mask']) - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'RGB_mask', 'poses6D']) + self.unwrap = UnwrapDictionary(['points2D', 'points3D']) + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.denormalize_keypoints = pr.DenormalizeKeypoints() self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07) def call(self, image): @@ -128,44 +125,49 @@ def call(self, image): boxes2D = self.square(boxes2D) boxes2D = self.clip(image, boxes2D) cropped_images = self.crop(image, boxes2D) - poses6D, RGB_masks, cubes_points2D = [], [], [] - for cropped_image, box2D in zip(cropped_images, boxes2D): + poses6D, points = [], [] + for crop, box2D in zip(cropped_images, boxes2D): if box2D.class_name != '035_power_drill': continue - keypoints = self.estimate_keypoints(cropped_image) - points3D, points2D, RGB_mask = self.unwrap(keypoints) - # Change keypoints coordinates - points2D = (2 * points2D / 128.0) - 1.0 - x, y = np.split(points2D, 2, axis=1) - points2D = np.concatenate([x, -y], axis=1) - points2D = self.denormalize_keypoints(points2D, cropped_image) + points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) + + points2D = normalize_points2D(points2D, 128.0, 128.0) + crop_H, crop_W = crop.shape[:2] + points2D = denormalize_points2D(points2D, crop_H, crop_W) points2D = self.change_coordinates(points2D, box2D) - # ---------------------------- - rotation, translation = self.predict_pose(points3D, points2D) - # quaternion = rotation_matrix_to_quaternion(rotation) - # pose6D = Pose6D(quaternion, translation, box2D.class_name) - cube_points2D = project_to_image( - rotation, translation, self.cube_points3D, - self.camera.intrinsics) - cube_points2D = cube_points2D.astype(np.int32) + quaternion, translation = self.predict_pose(points3D, points2D) + pose6D = Pose6D(quaternion, translation, box2D.class_name) + + poses6D.append(pose6D), points.append([points2D, points3D]) - # draw mask on image + # draw boxes + new_boxes2D = [] + for box2D in boxes2D: + if box2D.class_name == '035_power_drill': + new_boxes2D.append(box2D) + image = self.draw_boxes2D(image, new_boxes2D) + + # draw masks + for points2D, points3D in points: object_sizes = np.array([0.184, 0.187, 0.052]) colors = points3D / (object_sizes / 2.0) colors = (colors + 1.0) * 127.5 colors = colors.astype('int') - print(colors.min(), colors.max()) - draw_keypoints(image, points2D, colors, radius=3) - # ----------------------------------- - poses6D.append(None), RGB_masks.append(RGB_mask) - cubes_points2D.append(cube_points2D) - - image = self.draw_boxes2D(image, boxes2D) - # draw cube + draw_maski(image, points2D, colors) + + # draw cubes image = image.astype(float) - for cube_points2D in cubes_points2D: + for pose6D in poses6D: + rotation = quaternion_to_rotation_matrix(pose6D.quaternion) + rotation = np.squeeze(rotation, axis=2) + cube_points2D = project_to_image( + rotation, + pose6D.translation, + self.cube_points3D, + self.camera.intrinsics) + cube_points2D = cube_points2D.astype(np.int32) image = draw_cube(image, cube_points2D) image = image.astype('uint8') - return self.wrap(image, boxes2D, RGB_masks, poses6D) + return self.wrap(image, boxes2D, poses6D) diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index d0dff4b85..e31f3bddc 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -9,6 +9,7 @@ from backend import arguments_to_image_points2D from backend import solve_PnP_RANSAC from backend import rotation_vector_to_rotation_matrix +from backend import translate_points2D class ImageToClosedOneBall(Processor): @@ -18,7 +19,7 @@ def __init__(self): super(ImageToClosedOneBall, self).__init__() def call(self, image): - return (image / 127.5) - 1 + return (image / 127.5) - 1.0 class ClosedOneBallToImage(Processor): @@ -178,3 +179,22 @@ def __init__(self): def call(self, rotation_vector): quaternion = rotation_vector_to_quaternion(rotation_vector) return quaternion + + +class TranslatePoints2D(Processor): + def __init__(self): + super(TranslatePoints2D, self).__init__() + + def call(points2D, image): + height, width = image.shape[:2] + translated_points2D = translate_points2D(points2D, (height, width)) + return translated_points2D + + +class FlipYAxisPoints2D(Processor): + def __init__(self): + super(FlipYAxisPoints2D, self).__init__() + + def call(self, points2D, image): + height = image.shape[0] + translate_points2D(points2D, (0, height)) From a512a091a1eb8586f53631bbced8b07b33fdcf15 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 28 Oct 2021 15:06:45 +0200 Subject: [PATCH 024/101] Refactor pipelines --- examples/pix2pose/pipelines.py | 81 +++++++++------------------------- 1 file changed, 20 insertions(+), 61 deletions(-) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 088bcefe8..b60c4cbd8 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -7,13 +7,12 @@ GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC, ReplaceLowerThanThreshold) -from backend import (build_cube_points3D, project_to_image, draw_cube, - draw_keypoints) +from backend import build_cube_points3D, project_to_image, draw_cube from processors import UnwrapDictionary, RotationVectorToQuaternion -# from paz.backend.image import show_image +from processors import NormalizePoints2D from backend import quaternion_to_rotation_matrix, draw_maski -from backend import normalize_points2D, flip_y_axis from backend import denormalize_points2D +from backend import draw_poses6D class DomainRandomization(SequentialProcessor): @@ -34,7 +33,7 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1): class PredictRGBMask(SequentialProcessor): def __init__(self, model, epsilon=0.15): super(PredictRGBMask, self).__init__() - # self.add(pr.ResizeImage((128, 128))) + self.add(pr.ResizeImage(model.input_shape[1:3])) self.add(pr.NormalizeImage()) self.add(pr.ExpandDims(0)) self.add(pr.Predict(model)) @@ -53,51 +52,33 @@ def __init__(self, object_sizes): class RGBMaskToImagePoints2D(SequentialProcessor): - def __init__(self): + def __init__(self, output_shape): super(RGBMaskToImagePoints2D, self).__init__() self.add(GetNonZeroArguments()) self.add(ArgumentsToImagePoints2D()) + self.add(NormalizePoints2D(output_shape)) class SolveChangingObjectPnP(SequentialProcessor): def __init__(self, camera_intrinsics): super(SolveChangingObjectPnP, self).__init__() self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics)) - # self.add(pr.ControlMap(RotationVectorToRotationMatrix())) self.add(pr.ControlMap(RotationVectorToQuaternion())) - self.add(pr.ControlMap(pr.Squeeze(1), [1], [1])) - # self.add(ToPose6D()) - # self.add(ToAffineMatrix()) class Pix2Pose(pr.Processor): def __init__(self, model, object_sizes, epsilon=0.15): self.object_sizes = object_sizes - H, W = model.input_shape[1:3] - self.resize = pr.ResizeImage((W, H)) self.predict_RGBMask = PredictRGBMask(model, epsilon) - self.RGBMask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) - self.RGBMask_to_points2D = RGBMaskToImagePoints2D() + self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) + self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask']) def call(self, image): - image = self.resize(image) RGB_mask = self.predict_RGBMask(image) - points3D = self.RGBMask_to_points3D(RGB_mask) - points2D = self.RGBMask_to_points2D(RGB_mask) + points3D = self.mask_to_points3D(RGB_mask) + points2D = self.mask_to_points2D(RGB_mask) return self.wrap(points3D, points2D, RGB_mask) - """ - rotation_vector, translation = self.predict_pose(points3D, points2D) - rotation_matrix = self.vector_to_matrix(rotation_vector) - translation = np.squeeze(translation, 1) - points3D = build_cube_points3D(*self.object_sizes) - points2D = project_to_image( - rotation_matrix, translation, points3D, self.camera.intrinsics) - points2D = points2D.astype(np.int32) - image = draw_cube(image.astype(float), points2D) - image = image.astype('uint8') - return {'image', image} - """ class EstimatePoseMasks(Processor): @@ -109,8 +90,11 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, self.detect = detect self.camera = camera self.estimate_keypoints = estimate_keypoints - self.square = SequentialProcessor( - [pr.SquareBoxes2D(), pr.OffsetBoxes2D(offsets)]) + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + pr.FilterClassBoxes2D(['035_power_drill']), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) self.clip = pr.ClipBoxes2D() self.crop = pr.CropBoxes2D() self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() @@ -121,32 +105,18 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07) def call(self, image): - boxes2D = self.detect(image)['boxes2D'] - boxes2D = self.square(boxes2D) + boxes2D = self.postprocess_boxes(self.detect(image)) boxes2D = self.clip(image, boxes2D) cropped_images = self.crop(image, boxes2D) poses6D, points = [], [] for crop, box2D in zip(cropped_images, boxes2D): - if box2D.class_name != '035_power_drill': - continue points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) - - points2D = normalize_points2D(points2D, 128.0, 128.0) - crop_H, crop_W = crop.shape[:2] - points2D = denormalize_points2D(points2D, crop_H, crop_W) + points2D = denormalize_points2D(points2D, *crop.shape[0:2]) points2D = self.change_coordinates(points2D, box2D) - quaternion, translation = self.predict_pose(points3D, points2D) pose6D = Pose6D(quaternion, translation, box2D.class_name) - poses6D.append(pose6D), points.append([points2D, points3D]) - - # draw boxes - new_boxes2D = [] - for box2D in boxes2D: - if box2D.class_name == '035_power_drill': - new_boxes2D.append(box2D) - image = self.draw_boxes2D(image, new_boxes2D) + image = self.draw_boxes2D(image, boxes2D) # draw masks for points2D, points3D in points: @@ -157,17 +127,6 @@ def call(self, image): draw_maski(image, points2D, colors) # draw cubes - image = image.astype(float) - for pose6D in poses6D: - rotation = quaternion_to_rotation_matrix(pose6D.quaternion) - rotation = np.squeeze(rotation, axis=2) - cube_points2D = project_to_image( - rotation, - pose6D.translation, - self.cube_points3D, - self.camera.intrinsics) - cube_points2D = cube_points2D.astype(np.int32) - image = draw_cube(image, cube_points2D) - image = image.astype('uint8') - + image = draw_poses6D( + image, poses6D, self.cube_points3D, self.camera.intrinsics) return self.wrap(image, boxes2D, poses6D) From bfd1ebda129dbf3bddd7d2a4ad7abe70404ab39e Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 28 Oct 2021 15:07:01 +0200 Subject: [PATCH 025/101] Refactor backend --- examples/pix2pose/backend.py | 16 ++++++++++++++++ examples/pix2pose/demo.py | 1 - 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index a90c06597..25419ea89 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -142,6 +142,7 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, object_points3D, image_points2D, camera_intrinsics, None, flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold, iterationsCount=num_iterations) + translation = np.squeeze(translation, 1) if success is False: rotation_vector, translation = None, None return rotation_vector, translation @@ -352,6 +353,7 @@ def denormalize_points2D(points2D, height, width): return points2D + def flip_y_axis(points2D): x, y = np.split(points2D, 2, axis=1) points2D = np.concatenate([x, -y], axis=1) @@ -398,3 +400,17 @@ def denormalize_keypoints(keypoints, height, width): x, y = int(round(x)), int(round(y)) keypoints[keypoint_arg][:2] = [x, y] return keypoints + + +def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics): + image = image.astype(float) + for pose6D in poses6D: + rotation = quaternion_to_rotation_matrix(pose6D.quaternion) + rotation = np.squeeze(rotation, axis=2) + cube_points2D = project_to_image( + rotation, pose6D.translation, + cube_points3D, camera_intrinsics) + cube_points2D = cube_points2D.astype(np.int32) + image = draw_cube(image, cube_points2D) + image = image.astype('uint8') + return image diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 1b66d8941..48928abc1 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -41,7 +41,6 @@ object_sizes = np.array([0.184, 0.187, 0.052]) -# epsilon = 0.005 epsilon = 0.15 detect = SSD300FAT(draw=False) offsets = [0.1, 0.1] From 858cf268df4dc742bf008fa27f135ec7ac3d86e4 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 28 Oct 2021 15:07:15 +0200 Subject: [PATCH 026/101] Add basic processor --- examples/pix2pose/processors.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index e31f3bddc..a3452323f 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -10,6 +10,7 @@ from backend import solve_PnP_RANSAC from backend import rotation_vector_to_rotation_matrix from backend import translate_points2D +from backend import normalize_points2D class ImageToClosedOneBall(Processor): @@ -198,3 +199,12 @@ def __init__(self): def call(self, points2D, image): height = image.shape[0] translate_points2D(points2D, (0, height)) + + +class NormalizePoints2D(Processor): + def __init__(self, image_shape): + self.height, self.width = image_shape[:2] + + def call(self, points2D): + points2D = normalize_points2D(points2D, self.height, self.width) + return points2D From a40233135a80eef6d94aea0c38d02dfd9a55bb7c Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 28 Oct 2021 15:07:44 +0200 Subject: [PATCH 027/101] Start ObjectHypothesis example --- examples/pix2pose/messages.py | 50 +++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 examples/pix2pose/messages.py diff --git a/examples/pix2pose/messages.py b/examples/pix2pose/messages.py new file mode 100644 index 000000000..1c50c176d --- /dev/null +++ b/examples/pix2pose/messages.py @@ -0,0 +1,50 @@ +from paz.abstract.messages import Box2D, Pose6D + + +class ObjectHypothesis(object): + # TODO: Check if class_name, score is the same + def __init__(self, score=None, class_name=None, box2D=None, pose6D=None): + self.score = score + self.class_name = class_name + self.box2D = box2D + self.pose6D = pose6D + + @property + def box2D(self): + return self._box2D + + @box2D.setter + def box2D(self, value): + if not isinstance(value, Box2D): + raise ValueError('Value must be a Box2D class') + + if self.score is None: + if value.score is not None: + self.score = value.score + else: + if self.score != value.score: + raise ValueError('Mismatch score between Hypothesis and Box2D') + + + if self.score is None and (value.score is not None): + self.score = value.score + elif (self.score is not None) and (value.score is not None): + if self.score != value.score: + raise ValueError('Mismatch score between Hypothesis and Box2D') + if self.class_name is None and (value.class_name is not None): + self.class_name = value.class_name + self._box2D = value + + @property + def pose6D(self): + return self._pose6D + + @pose6D.setter + def pose6D(self, value): + if not isinstance(value, Pose6D): + raise ValueError('Value must be a Pose6D class') + if (self.score is None) and (value.score is not None): + self.score = value.score + if self.class_name is None and (value.class_name is not None): + self.class_name = value.class_name + self._pose6D = value From 9810748ad021f72f98260a7e8318bed1d5a4c2a6 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 28 Oct 2021 15:09:30 +0200 Subject: [PATCH 028/101] Remove comments --- examples/pix2pose/demo.py | 51 +-------------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 48928abc1..2cb953dfd 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -1,20 +1,10 @@ -import os -import cv2 import numpy as np from paz.models import UNET_VGG16 from paz.backend.image import show_image, load_image -from paz import processors as pr from paz.backend.camera import Camera -from scenes import PixelMaskRenderer -from processors import DrawBoxes3D -# from backend import homogenous_quaternion_to_rotation_matrix -from backend import solve_PnP_RANSAC -from backend import project_to_image -from backend import build_cube_points3D -from backend import draw_cube from pipelines import Pix2Pose from pipelines import EstimatePoseMasks -from paz.backend.camera import VideoPlayer +# from paz.backend.camera import VideoPlayer from paz.applications import SSD300FAT @@ -39,7 +29,6 @@ [0, focal_length, image_center[1]], [0, 0, 1]]) - object_sizes = np.array([0.184, 0.187, 0.052]) epsilon = 0.15 detect = SSD300FAT(draw=False) @@ -54,41 +43,3 @@ # image_size = (640, 480) # player = VideoPlayer(image_size, pipeline, camera) # player.run() -""" -def show_results(): - image, alpha, RGB_mask_true = renderer.render() - normalized_image = np.expand_dims(image / 255.0, 0) - RGB_mask_pred = model.predict(normalized_image) - RGB_mask_pred = np.squeeze(RGB_mask_pred, 0) - RGB_mask_pred[RGB_mask_pred < epsilon] = 0.0 - show_image((RGB_mask_pred * 255.0).astype('uint8')) - - mask_pred = np.sum(RGB_mask_pred, axis=2) - non_zero_arguments = np.nonzero(mask_pred) - RGB_mask_pred = RGB_mask_pred[non_zero_arguments] - RGB_mask_pred = (2.0 * RGB_mask_pred) - 1.0 - # this RGB mask scaling is good since you are scaling in RGB space - object_points3D = (object_size / 2.0) * RGB_mask_pred - num_points = len(object_points3D) - - row_args, col_args = non_zero_arguments - row_args = row_args.reshape(-1, 1) - col_args = col_args.reshape(-1, 1) - image_points2D = np.concatenate([col_args, row_args], axis=1) - image_points2D = image_points2D.reshape(num_points, 1, 2) - image_points2D = image_points2D.astype(np.float64) - image_points2D = np.ascontiguousarray(image_points2D) - - rotation_vector, translation = solve_PnP_RANSAC( - object_points3D, image_points2D, camera.intrinsics) - rotation_matrix = np.eye(3) - cv2.Rodrigues(rotation_vector, rotation_matrix) - translation = np.squeeze(translation, 1) - points3D = build_cube_points3D(0.184, 0.187, 0.052) - points2D = project_to_image( - rotation_matrix, translation, points3D, camera.intrinsics) - points2D = points2D.astype(np.int32) - image = draw_cube(image.astype(float), points2D) - image = image.astype('uint8') - show_image(image) -""" From 501f43456f618754abf43efc8b1dfbbd23239189 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 28 Oct 2021 15:40:47 +0200 Subject: [PATCH 029/101] Refactor code --- examples/pix2pose/backend.py | 12 +++++++++++- examples/pix2pose/demo.py | 25 +++++++++++++------------ examples/pix2pose/pipelines.py | 28 +++++++++++----------------- 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 25419ea89..fccf619d3 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -270,7 +270,17 @@ def draw_keypoints(image, keypoints, colors, radius): return image -def draw_maski(image, keypoints, colors, radius=5): +def draw_masks(image, points): + for points2D, points3D in points: + object_sizes = np.array([0.184, 0.187, 0.052]) + colors = points3D / (object_sizes / 2.0) + colors = (colors + 1.0) * 127.5 + colors = colors.astype('int') + image = draw_maski(image, points2D, colors) + return image + + +def draw_maski(image, keypoints, colors, radius=1): for keypoint, color in zip(keypoints, colors): R, G, B = color color = (int(R), int(G), int(B)) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 2cb953dfd..8dd50ab67 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -4,7 +4,7 @@ from paz.backend.camera import Camera from pipelines import Pix2Pose from pipelines import EstimatePoseMasks -# from paz.backend.camera import VideoPlayer +from paz.backend.camera import VideoPlayer from paz.applications import SSD300FAT @@ -15,20 +15,21 @@ model.load_weights('UNET_weights_epochs-10_beta-3.hdf5') # approximating intrinsic camera parameters -camera = Camera(device_id=0) -# camera.start() -# image_size = camera.read().shape[0:2] -# camera.stop() +camera = Camera(device_id=4) +camera.start() +image_size = camera.read().shape[0:2] +camera.stop() +""" image = load_image('test_image.jpg') image_size = image.shape[0:2] +""" focal_length = image_size[1] image_center = (image_size[1] / 2.0, image_size[0] / 2.0) camera.distortion = np.zeros((4)) camera.intrinsics = np.array([[focal_length, 0, image_center[0]], [0, focal_length, image_center[1]], [0, 0, 1]]) - object_sizes = np.array([0.184, 0.187, 0.052]) epsilon = 0.15 detect = SSD300FAT(draw=False) @@ -36,10 +37,10 @@ estimate_keypoints = Pix2Pose(model, object_sizes) pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None) -results = pipeline(image) -predicted_image = results['image'] -show_image(predicted_image) +# results = pipeline(image) +# predicted_image = results['image'] +# show_image(predicted_image) -# image_size = (640, 480) -# player = VideoPlayer(image_size, pipeline, camera) -# player.run() +image_size = (640, 480) +player = VideoPlayer(image_size, pipeline, camera) +player.run() diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index b60c4cbd8..30ff49d97 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -7,12 +7,13 @@ GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC, ReplaceLowerThanThreshold) -from backend import build_cube_points3D, project_to_image, draw_cube +from backend import build_cube_points3D from processors import UnwrapDictionary, RotationVectorToQuaternion from processors import NormalizePoints2D -from backend import quaternion_to_rotation_matrix, draw_maski +from backend import draw_maski from backend import denormalize_points2D from backend import draw_poses6D +from backend import draw_masks class DomainRandomization(SequentialProcessor): @@ -83,13 +84,14 @@ def call(self, image): class EstimatePoseMasks(Processor): def __init__(self, detect, estimate_keypoints, camera, offsets, - class_to_dimensions, radius=3, thickness=1): + class_to_dimensions, radius=3, thickness=1, draw=True): """Pose estimation pipeline using keypoints. """ super(EstimatePoseMasks, self).__init__() self.detect = detect - self.camera = camera self.estimate_keypoints = estimate_keypoints + self.camera = camera + self.draw = draw self.postprocess_boxes = SequentialProcessor( [pr.UnpackDictionary(['boxes2D']), pr.FilterClassBoxes2D(['035_power_drill']), @@ -116,17 +118,9 @@ def call(self, image): quaternion, translation = self.predict_pose(points3D, points2D) pose6D = Pose6D(quaternion, translation, box2D.class_name) poses6D.append(pose6D), points.append([points2D, points3D]) - image = self.draw_boxes2D(image, boxes2D) - - # draw masks - for points2D, points3D in points: - object_sizes = np.array([0.184, 0.187, 0.052]) - colors = points3D / (object_sizes / 2.0) - colors = (colors + 1.0) * 127.5 - colors = colors.astype('int') - draw_maski(image, points2D, colors) - - # draw cubes - image = draw_poses6D( - image, poses6D, self.cube_points3D, self.camera.intrinsics) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + image = draw_masks(image, points) + image = draw_poses6D( + image, poses6D, self.cube_points3D, self.camera.intrinsics) return self.wrap(image, boxes2D, poses6D) From 46c4e2d8fbb975289efed8cfa9d2392ed536f010 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 1 Nov 2021 13:35:01 +0100 Subject: [PATCH 030/101] Refactor code to train GAN --- examples/pix2pose/demo.py | 16 ++-- examples/pix2pose/loss.py | 88 ++++++++++++++----- examples/pix2pose/metrics.py | 15 ++++ examples/pix2pose/models/generator.py | 18 ++-- examples/pix2pose/pipelines.py | 7 +- examples/pix2pose/train.py | 7 +- examples/pix2pose/train_gan.py | 117 ++++++++++++++++++++++++++ 7 files changed, 227 insertions(+), 41 deletions(-) create mode 100644 examples/pix2pose/metrics.py create mode 100644 examples/pix2pose/train_gan.py diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 8dd50ab67..def06b71d 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -16,14 +16,12 @@ # approximating intrinsic camera parameters camera = Camera(device_id=4) -camera.start() -image_size = camera.read().shape[0:2] -camera.stop() +# camera.start() +# image_size = camera.read().shape[0:2] +# camera.stop() -""" image = load_image('test_image.jpg') image_size = image.shape[0:2] -""" focal_length = image_size[1] image_center = (image_size[1] / 2.0, image_size[0] / 2.0) camera.distortion = np.zeros((4)) @@ -37,10 +35,12 @@ estimate_keypoints = Pix2Pose(model, object_sizes) pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None) -# results = pipeline(image) -# predicted_image = results['image'] -# show_image(predicted_image) +results = pipeline(image) +predicted_image = results['image'] +show_image(predicted_image) +""" image_size = (640, 480) player = VideoPlayer(image_size, pipeline, camera) player.run() +""" diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index 3b57dcc7d..741f64dc3 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -1,36 +1,82 @@ from tensorflow.keras.losses import Loss +from tensorflow.keras.losses import mean_squared_error import tensorflow as tf -class WeightedForeground(Loss): - def __init__(self, beta=3.0): - super(WeightedForeground, self).__init__() - self.beta = beta +def extract_alpha_mask(RGBA_mask): + color_mask = RGBA_mask[:, :, :, 0:3] + alpha_mask = RGBA_mask[:, :, :, 3:4] + return color_mask, alpha_mask + + +def extract_error_mask(RGBE_mask): + color_mask = RGBE_mask[:, :, :, 0:3] + error_mask = RGBE_mask[:, :, :, 3:4] + return color_mask, error_mask + + +def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask): + foreground_true = RGB_true * alpha_mask + foreground_pred = RGB_pred * alpha_mask + foreground_loss = tf.abs(foreground_true - foreground_pred) + return foreground_loss + + +def compute_background_loss(RGB_true, RGB_pred, alpha_mask): + background_true = RGB_true * (1.0 - alpha_mask) + background_pred = RGB_pred * (1.0 - alpha_mask) + background_loss = tf.abs(background_true - background_pred) + return background_loss + + +def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0): + RGB_true, alpha_mask = extract_alpha_mask(RGBA_true) + foreground_loss = compute_foreground_loss(RGB_true, RGB_pred, alpha_mask) + background_loss = compute_background_loss(RGB_true, RGB_pred, alpha_mask) + reconstruction_loss = (beta * foreground_loss) + background_loss + return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True) - def _extract_alpha_mask(self, RGBA_mask): - alpha_mask = RGBA_mask[:, :, :, 3:4] - color_mask = RGBA_mask[:, :, :, 0:3] - return color_mask, alpha_mask - def call(self, RGBA_mask_true, RGB_mask_pred): - RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true) +def compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred, + beta=3.0): + RGB_pred, error_mask = extract_error_mask(RGBE_pred) + loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta) + return loss - foreground_true = RGB_mask_true * alpha_mask - foreground_pred = RGB_mask_pred * alpha_mask - foreground_loss = tf.abs(foreground_true - foreground_pred) - background_true = RGB_mask_true * (1.0 - alpha_mask) - background_pred = RGB_mask_pred * (1.0 - alpha_mask) - background_loss = tf.abs(background_true - background_pred) +def compute_error_prediction_loss(RGBA_true, RGBE_pred): + RGB_pred, error_pred = extract_error_mask(RGBE_pred) + error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0) + error_true = tf.minimum(error_true, 1.0) + error_loss = mean_squared_error(error_true, error_pred) + error_loss = tf.expand_dims(error_loss, axis=-1) + return error_loss - loss = (self.beta * foreground_loss) + background_loss - loss = tf.reduce_mean(loss, axis=[1, 2, 3]) - # loss = tf.math.minimum(loss, tf.float32.max) - # loss = tf.losses.mean_squared_error(RGB_mask_true, RGB_mask_pred) +class WeightedReconstructionWithError(Loss): + def __init__(self, beta=3.0): + super(WeightedReconstructionWithError, self).__init__() + self.beta = beta + + def call(self, RGBA_true, RGBE_pred): + reconstruction = compute_weighted_reconstruction_loss_with_error( + RGBA_true, RGBE_pred, self.beta) + error_prediction = compute_error_prediction_loss(RGBA_true, RGBE_pred) + loss = reconstruction + error_prediction + return loss + + +class WeightedReconstruction(Loss): + def __init__(self, beta=3.0): + super(WeightedReconstruction, self).__init__() + self.beta = beta + + def call(self, RGBA_true, RGB_pred): + loss = compute_weighted_reconstruction_loss( + RGBA_true, RGB_pred, self.beta) return loss -def MSE_with_alpha_channel(y_true, y_pred): +def MSE_without_last_channel(y_true, y_pred): squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred) return tf.reduce_mean(squared_difference, axis=-1) # Note the `axis=-1` diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/metrics.py new file mode 100644 index 000000000..27e79bba2 --- /dev/null +++ b/examples/pix2pose/metrics.py @@ -0,0 +1,15 @@ +from loss import compute_weighted_reconstruction_loss_with_error +from loss import compute_error_prediction_loss +from loss import compute_weighted_reconstruction_loss + + +def weighted_reconstruction(RGBA_true, RGBE_pred, beta=3.0, with_error=False): + if with_error: + loss_function = compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred, beta) + else: + loss_function = compute_weighted_reconstruction_loss(RGBA_true, RGBE_pred, beta) + return loss_function + + +def error_prediction(RGBA_true, RGBE_pred, beta=3.0): + return compute_error_prediction_loss(RGBA_true, RGBE_pred) diff --git a/examples/pix2pose/models/generator.py b/examples/pix2pose/models/generator.py index 720586a33..357d3b646 100644 --- a/examples/pix2pose/models/generator.py +++ b/examples/pix2pose/models/generator.py @@ -53,22 +53,24 @@ def decoder(x, skip_connections): def Generator(input_shape=(128, 128, 3), latent_dimension=256, name='PIX2POSE_GENERATOR'): - input_image = Input(input_shape, name='input_image') - x, skip_connections = encoder(input_image) + RGB_input = Input(input_shape, name='RGB_input') + x, skip_connections = encoder(RGB_input) x = Flatten()(x) x = Dense(latent_dimension)(x) x = Dense(8 * 8 * latent_dimension)(x) x = Reshape((8, 8, latent_dimension))(x) x = decoder(x, skip_connections) - label_image = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x) - label_image = Activation('tanh', name='label_image')(label_image) - error_image = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x) - error_image = Activation('sigmoid', name='error_image')(error_image) - model = Model([input_image], [label_image, error_image], name=name) + RGB = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x) + RGB = Activation('tanh', name='RGB')(RGB) + error = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x) + error = Activation('sigmoid', name='error')(error) + RGB_with_error = Concatenate(axis=-1, name='RGB_with_error')([RGB, error]) + model = Model(RGB_input, RGB_with_error, name=name) return model model = Generator() assert model.count_params() == 25740356 -assert model.output_shape == [(None, 128, 128, 3), (None, 128, 128, 1)] +# assert model.output_shape == [(None, 128, 128, 3), (None, 128, 128, 1)] +assert model.output_shape == (None, 128, 128, 4) assert model.input_shape == (None, 128, 128, 3) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 30ff49d97..bf48f7082 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -19,7 +19,8 @@ class DomainRandomization(SequentialProcessor): """Performs domain randomization on a rendered image """ - def __init__(self, renderer, image_shape, image_paths, num_occlusions=1): + def __init__(self, renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions=1): super(DomainRandomization, self).__init__() H, W = image_shape[:2] self.add(pr.Render(renderer)) @@ -27,8 +28,12 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1): self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1])) self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) + """ self.add(pr.SequenceWrapper({0: {'input_1': [H, W, 3]}}, {1: {'masks': [H, W, 4]}})) + """ + self.add(pr.SequenceWrapper({0: inputs_to_shape}, + {1: labels_to_shape})) class PredictRGBMask(SequentialProcessor): diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 1c4b13e30..42195bafc 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -8,7 +8,7 @@ from scenes import PixelMaskRenderer from pipelines import DomainRandomization -from loss import WeightedForeground, MSE_with_alpha_channel +from loss import WeightedReconstruction, MSE_with_alpha_channel from models.fully_convolutional_net import FullyConvolutionalNet image_shape = [128, 128, 3] @@ -47,14 +47,15 @@ sequence = GeneratingSequence(processor, batch_size, num_steps) beta = 3.0 -weighted_foreground = WeightedForeground(beta) +weighted_reconstruction = WeightedReconstruction(beta) # model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha) model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) # model. optimizer = Adam(learning_rate) # model.load_weights('UNET_weights_MSE.hdf5') -model.compile(optimizer, weighted_foreground, metrics=MSE_with_alpha_channel) +model.compile( + optimizer, weighted_reconstruction, metrics=MSE_with_alpha_channel) model.fit( sequence, # steps_per_epoch=args.steps_per_epoch, diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py new file mode 100644 index 000000000..65dfeca28 --- /dev/null +++ b/examples/pix2pose/train_gan.py @@ -0,0 +1,117 @@ +import os +import glob +from tensorflow.keras.optimizers import Adam +from paz.abstract import GeneratingSequence +from paz.models.segmentation import UNET_VGG16 +from models.generator import Generator +from paz.backend.image import show_image, resize_image +import numpy as np + +from scenes import PixelMaskRenderer +from pipelines import DomainRandomization +from loss import WeightedReconstruction +from loss import WeightedReconstructionWithError +from metrics import error_prediction, weighted_reconstruction +# from models.fully_convolutional_net import FullyConvolutionalNet + +H, W, num_channels = image_shape = [128, 128, 3] +root_path = os.path.expanduser('~') +background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' +background_wildcard = os.path.join(root_path, background_wildcard) +image_paths = glob.glob(background_wildcard) +path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' +path_OBJ = os.path.join(root_path, path_OBJ) +num_occlusions = 1 +viewport_size = image_shape[:2] +y_fov = 3.14159 / 4.0 +distance = [0.3, 0.5] +light = [1.0, 30] +top_only = False +roll = 3.14159 +shift = 0.05 +num_steps = 1000 +batch_size = 32 +beta = 3.0 +alpha = 0.1 +filters = 16 +num_classes = 3 +learning_rate = 0.001 +# steps_per_epoch +model_names = ['PIX2POSE', 'UNET_VGG16'] +model_name = 'UNET_VGG16' +# model_name = 'PIX2POSE' +max_num_epochs = 1 +latent_dimension = 128 +beta = 3.0 + + +renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, + light, top_only, roll, shift) + + +# model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha) +# name_to_model = dict(zip(model_names, [Generator, UNET_VGG16]) +# model = name_to_model[model_name] + +if model_name == 'UNET_VGG16': + model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) + loss = WeightedReconstruction(beta) + inputs_to_shape = {'input_1': [H, W, num_channels]} + labels_to_shape = {'masks': [H, W, 4]} + metrics = weighted_reconstruction +if model_name == 'PIX2POSE': + model = Generator(image_shape, latent_dimension) + reconstruction_loss = WeightedReconstructionWithError(beta) + # error_prediction_loss = ErrorPrediction() + # loss = {'RGB_with_error': [reconstruction_loss, error_prediction_loss]} + loss = WeightedReconstructionWithError() + H, W, num_channels = image_shape + inputs_to_shape = {'RGB_input': [H, W, num_channels]} + labels_to_shape = {'RGB_with_error': [H, W, 4]} + metrics = {'RGB_with_error': [weighted_reconstruction, error_prediction]} + + +processor = DomainRandomization( + renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions) + +sequence = GeneratingSequence(processor, batch_size, num_steps) + +optimizer = Adam(learning_rate) + +# inputs, labels = sequence.__getitem__(0) +# preds = model(inputs) +# error_prediction = ErrorPrediction() +# losses = error_prediction(preds, labels['RGB_with_error']) + +# model.compile(optimizer, loss, metrics=mean_squared_error) +model.compile(optimizer, loss, metrics) + +model.fit( + sequence, + epochs=max_num_epochs, + # callbacks=[stop, log, save, plateau, draw], + verbose=1, + workers=0) + +""" +def normalize(image): + return (image * 255.0).astype('uint8') + + +def show_results(): + # image, alpha, pixel_mask_true = renderer.render() + sample = processor() + image = sample['inputs']['input_1'] + pixel_mask_true = sample['labels']['masks'] + image = np.expand_dims(image, 0) + pixel_mask_pred = model.predict(image) + pixel_mask_pred = normalize(np.squeeze(pixel_mask_pred, axis=0)) + image = normalize(np.squeeze(image, axis=0)) + results = np.concatenate( + [image, normalize(pixel_mask_true[..., 0:3]), pixel_mask_pred], axis=1) + H, W = results.shape[:2] + scale = 6 + results = resize_image(results, (scale * W, scale * H)) + show_image(results) +""" From 5810d8ef6311fd6f31bb2b47253253a71866ef50 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 1 Nov 2021 14:11:19 +0100 Subject: [PATCH 031/101] Add training for UNET and GAN --- examples/pix2pose/metrics.py | 37 ++++++++++++++++++++++++++++------ examples/pix2pose/train_gan.py | 24 ++++++++++------------ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/metrics.py index 27e79bba2..4f4b451af 100644 --- a/examples/pix2pose/metrics.py +++ b/examples/pix2pose/metrics.py @@ -1,15 +1,40 @@ from loss import compute_weighted_reconstruction_loss_with_error from loss import compute_error_prediction_loss from loss import compute_weighted_reconstruction_loss +import tensorflow as tf -def weighted_reconstruction(RGBA_true, RGBE_pred, beta=3.0, with_error=False): - if with_error: - loss_function = compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred, beta) - else: - loss_function = compute_weighted_reconstruction_loss(RGBA_true, RGBE_pred, beta) - return loss_function +def weighted_reconstruction_with_error(RGBA_true, RGBE_pred, beta=3.0): + return compute_weighted_reconstruction_loss_with_error( + RGBA_true, RGBE_pred, beta) + + +def weighted_reconstruction(RGBA_true, RGB_pred, beta=3.0): + return compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta) def error_prediction(RGBA_true, RGBE_pred, beta=3.0): return compute_error_prediction_loss(RGBA_true, RGBE_pred) + + +def mean_squared_error(y_true, y_pred): + squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred[:, :, :, 0:3]) + return tf.reduce_mean(squared_difference, axis=-1) + + +def weighted_reconstruction2(y_true, y_pred, beta=3.0, with_error=False): + if with_error: + return compute_weighted_reconstruction_loss_with_error(y_true, y_pred, beta) + else: + return compute_error_prediction_loss(y_true, y_pred, beta) + + +def weighted_reconstruction_wrapper(beta=3.0, with_error=False): + if with_error: + def weighted_reconstruction(y_true, y_pred): + return compute_weighted_reconstruction_loss_with_error( + y_true, y_pred, beta) + else: + def weighted_reconstruction(y_true, y_pred): + return compute_weighted_reconstruction_loss(y_true, y_pred, beta) + return weighted_reconstruction diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py index 65dfeca28..2f64d4f3b 100644 --- a/examples/pix2pose/train_gan.py +++ b/examples/pix2pose/train_gan.py @@ -4,14 +4,17 @@ from paz.abstract import GeneratingSequence from paz.models.segmentation import UNET_VGG16 from models.generator import Generator -from paz.backend.image import show_image, resize_image -import numpy as np +# from paz.backend.image import show_image, resize_image +# import numpy as np from scenes import PixelMaskRenderer from pipelines import DomainRandomization from loss import WeightedReconstruction from loss import WeightedReconstructionWithError -from metrics import error_prediction, weighted_reconstruction +# from metrics import error_prediction, weighted_reconstruction +# from metrics import weighted_reconstruction_with_error +from metrics import mean_squared_error, error_prediction +from metrics import weighted_reconstruction_wrapper # from models.fully_convolutional_net import FullyConvolutionalNet H, W, num_channels = image_shape = [128, 128, 3] @@ -58,17 +61,18 @@ loss = WeightedReconstruction(beta) inputs_to_shape = {'input_1': [H, W, num_channels]} labels_to_shape = {'masks': [H, W, 4]} - metrics = weighted_reconstruction + weighted_reconstruction = weighted_reconstruction_wrapper(beta, False) + metrics = {'masks': [weighted_reconstruction, mean_squared_error]} if model_name == 'PIX2POSE': model = Generator(image_shape, latent_dimension) reconstruction_loss = WeightedReconstructionWithError(beta) - # error_prediction_loss = ErrorPrediction() - # loss = {'RGB_with_error': [reconstruction_loss, error_prediction_loss]} loss = WeightedReconstructionWithError() H, W, num_channels = image_shape inputs_to_shape = {'RGB_input': [H, W, num_channels]} labels_to_shape = {'RGB_with_error': [H, W, 4]} - metrics = {'RGB_with_error': [weighted_reconstruction, error_prediction]} + weighted_reconstruction = weighted_reconstruction_wrapper(beta, True) + metrics = {'RGB_with_error': + [weighted_reconstruction, error_prediction, mean_squared_error]} processor = DomainRandomization( @@ -79,12 +83,6 @@ optimizer = Adam(learning_rate) -# inputs, labels = sequence.__getitem__(0) -# preds = model(inputs) -# error_prediction = ErrorPrediction() -# losses = error_prediction(preds, labels['RGB_with_error']) - -# model.compile(optimizer, loss, metrics=mean_squared_error) model.compile(optimizer, loss, metrics) model.fit( From d2b014f9842b2ed7f9b3bb0d3c498ddfae6c1bc3 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 1 Nov 2021 14:13:09 +0100 Subject: [PATCH 032/101] Remove unecessary metrics --- examples/pix2pose/metrics.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/metrics.py index 4f4b451af..7304ab7d8 100644 --- a/examples/pix2pose/metrics.py +++ b/examples/pix2pose/metrics.py @@ -4,15 +4,6 @@ import tensorflow as tf -def weighted_reconstruction_with_error(RGBA_true, RGBE_pred, beta=3.0): - return compute_weighted_reconstruction_loss_with_error( - RGBA_true, RGBE_pred, beta) - - -def weighted_reconstruction(RGBA_true, RGB_pred, beta=3.0): - return compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta) - - def error_prediction(RGBA_true, RGBE_pred, beta=3.0): return compute_error_prediction_loss(RGBA_true, RGBE_pred) @@ -22,13 +13,6 @@ def mean_squared_error(y_true, y_pred): return tf.reduce_mean(squared_difference, axis=-1) -def weighted_reconstruction2(y_true, y_pred, beta=3.0, with_error=False): - if with_error: - return compute_weighted_reconstruction_loss_with_error(y_true, y_pred, beta) - else: - return compute_error_prediction_loss(y_true, y_pred, beta) - - def weighted_reconstruction_wrapper(beta=3.0, with_error=False): if with_error: def weighted_reconstruction(y_true, y_pred): From 3b85afaad4219d9757be4677f2d76f91b12f419d Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 2 Nov 2021 10:35:06 +0100 Subject: [PATCH 033/101] Add available GAN training --- examples/pix2pose/models/gan_example.py | 81 +++++++++++++++ examples/pix2pose/models/keras_example.py | 67 ++++++++++++ examples/pix2pose/models/pix2pose.py | 118 +++++++++++----------- examples/pix2pose/old_train.py | 7 +- examples/pix2pose/train_gan.py | 36 +++++-- 5 files changed, 237 insertions(+), 72 deletions(-) create mode 100644 examples/pix2pose/models/gan_example.py create mode 100644 examples/pix2pose/models/keras_example.py diff --git a/examples/pix2pose/models/gan_example.py b/examples/pix2pose/models/gan_example.py new file mode 100644 index 000000000..8472a9462 --- /dev/null +++ b/examples/pix2pose/models/gan_example.py @@ -0,0 +1,81 @@ +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.metrics import Mean + + +class Pix2PoseGAN(Model): + def __init__(self, image_shape, discriminator, generator, latent_dim): + super(Pix2PoseGAN, self).__init__() + self.image_shape = image_shape + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + self.generator_loss_tracker = Mean(name='generator_loss') + self.discriminator_loss_tracker = Mean(name='discriminator_loss') + + @property + def metrics(self): + return [self.generator_loss_tracker, self.discriminator_loss_tracker] + + def compile(self, d_optimizer, g_optimizer, loss_fn): + super(Pix2PoseGAN, self).compile() + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + + def train_step(self, data): + RGB_inputs, RGB_labels = data + RGB_generated = self.generator(RGB_inputs) + RGB_combined = tf.concat([RGB_generated, RGB_labels], axis=0) + """ + # Add dummy dimensions to the labels so that they can be concatenated with + # the images. This is for the discriminator. + image_one_hot_labels = one_hot_labels[:, :, None, None] + image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size]) + image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes)) + + # Sample random points in the latent space and concatenate the labels. + # This is for the generator. + batch_size = tf.shape(real_images)[0] + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) + + # Decode the noise (guided by labels) to fake images. + generated_images = self.generator(random_vector_labels) + """ + + # Combine them with real images. Note that we are concatenating the labels + # with these images here. + + # Assemble labels discriminating real from fake images. + labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0) + + # Train the discriminator. + with tf.GradientTape() as tape: + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + grads = tape.gradient(d_loss, self.discriminator.trainable_weights) + self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights)) + + # Sample random points in the latent space. + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) + + # Assemble labels that say "all real images". + misleading_labels = tf.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + with tf.GradientTape() as tape: + fake_images = self.generator(random_vector_labels) + fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1) + predictions = self.discriminator(fake_image_and_labels) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, self.generator.trainable_weights) + self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) + + # Monitor loss. + self.generator_loss_tracker.update_state(g_loss) + self.discriminator_loss_tracker.update_state(d_loss) + return {'generator_loss': self.generator_loss_tracker.result(), + 'discrminator_loss': self.discriminator_loss_tracker.result()} diff --git a/examples/pix2pose/models/keras_example.py b/examples/pix2pose/models/keras_example.py new file mode 100644 index 000000000..c3f016cfa --- /dev/null +++ b/examples/pix2pose/models/keras_example.py @@ -0,0 +1,67 @@ +class GAN(keras.Model): + def __init__(self, discriminator, generator, latent_dim): + super(GAN, self).__init__() + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + + def compile(self, d_optimizer, g_optimizer, loss_fn): + super(GAN, self).compile() + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + self.d_loss_metric = keras.metrics.Mean(name="d_loss") + self.g_loss_metric = keras.metrics.Mean(name="g_loss") + + @property + def metrics(self): + return [self.d_loss_metric, self.g_loss_metric] + + def train_step(self, real_images): + # Sample random points in the latent space + batch_size = tf.shape(real_images)[0] + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + + # Decode them to fake images + generated_images = self.generator(random_latent_vectors) + + # Combine them with real images + combined_images = tf.concat([generated_images, real_images], axis=0) + + # Assemble labels discriminating real from fake images + labels = tf.concat( + [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0 + ) + # Add random noise to the labels - important trick! + labels += 0.05 * tf.random.uniform(tf.shape(labels)) + + # Train the discriminator + with tf.GradientTape() as tape: + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + grads = tape.gradient(d_loss, self.discriminator.trainable_weights) + self.d_optimizer.apply_gradients( + zip(grads, self.discriminator.trainable_weights) + ) + + # Sample random points in the latent space + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + + # Assemble labels that say "all real images" + misleading_labels = tf.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + with tf.GradientTape() as tape: + predictions = self.discriminator(self.generator(random_latent_vectors)) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, self.generator.trainable_weights) + self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) + + # Update metrics + self.d_loss_metric.update_state(d_loss) + self.g_loss_metric.update_state(g_loss) + return { + "d_loss": self.d_loss_metric.result(), + "g_loss": self.g_loss_metric.result(), + } diff --git a/examples/pix2pose/models/pix2pose.py b/examples/pix2pose/models/pix2pose.py index bc69b2516..37cb9ce39 100644 --- a/examples/pix2pose/models/pix2pose.py +++ b/examples/pix2pose/models/pix2pose.py @@ -1,81 +1,77 @@ -import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.metrics import Mean +import tensorflow as tf -class Pix2PoseGAN(Model): +class Pix2Pose(Model): def __init__(self, image_shape, discriminator, generator, latent_dim): - super(Pix2PoseGAN, self).__init__() + super(Pix2Pose, self).__init__() self.image_shape = image_shape - self.discriminator = discriminator - self.generator = generator + self.D = discriminator + self.G = generator self.latent_dim = latent_dim - self.generator_loss_tracker = Mean(name='generator_loss') - self.discriminator_loss_tracker = Mean(name='discriminator_loss') @property def metrics(self): - return [self.generator_loss_tracker, self.discriminator_loss_tracker] - - def compile(self, d_optimizer, g_optimizer, loss_fn): - super(Pix2PoseGAN, self).compile() - self.d_optimizer = d_optimizer - self.g_optimizer = g_optimizer - self.loss_fn = loss_fn - - def train_step(self, data): - real_images, one_hot_labels = data + return [self.G_loss_metric, self.D_loss_metric] - # Add dummy dimensions to the labels so that they can be concatenated with - # the images. This is for the discriminator. - image_one_hot_labels = one_hot_labels[:, :, None, None] - image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size]) - image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes)) + def compile(self, optimizer_D, optimizer_G, loss): + super(Pix2Pose, self).compile() + self.optimizer_G = optimizer_G + self.optimizer_D = optimizer_D + self.loss = loss + self.G_loss_metric = Mean(name='generator_loss') + self.D_loss_metric = Mean(name='discriminator_loss') - # Sample random points in the latent space and concatenate the labels. - # This is for the generator. - batch_size = tf.shape(real_images)[0] - random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) - random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) + def _build_discriminator_labels(self, batch_size): + return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0) - # Decode the noise (guided by labels) to fake images. - generated_images = self.generator(random_vector_labels) + def _add_noise_to_labels(self, labels): + noise = tf.random.uniform(tf.shape(labels)) + labels = labels + 0.05 * noise + return labels - # Combine them with real images. Note that we are concatenating the labels - # with these images here. - fake_image_and_labels = tf.concat([generated_images, image_one_hot_labels], -1) - real_image_and_labels = tf.concat([real_images, image_one_hot_labels], -1) - combined_images = tf.concat([fake_image_and_labels, real_image_and_labels], axis=0) - - # Assemble labels discriminating real from fake images. - labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0) + def _train_D(self, y_true, x_combined): + with tf.GradientTape() as tape: + y_pred = self.D(x_combined) + D_loss = self.loss(y_true, y_pred) + grads = tape.gradient(D_loss, self.D.trainable_weights) + self.optimizer_D.apply_gradients(zip(grads, self.D.trainable_weights)) + return D_loss - # Train the discriminator. + def _train_G(self, RGB_inputs): + batch_size = tf.shape(RGB_inputs)[0] + y_misleading = tf.zeros((batch_size, 1)) with tf.GradientTape() as tape: - predictions = self.discriminator(combined_images) - d_loss = self.loss_fn(labels, predictions) - grads = tape.gradient(d_loss, self.discriminator.trainable_weights) - self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights)) + y_pred = self.D(self.G(RGB_inputs)[:, :, :, 0:3]) + G_loss = self.loss(y_misleading, y_pred) + grads = tape.gradient(G_loss, self.G.trainable_weights) + self.optimizer_G.apply_gradients(zip(grads, self.G.trainable_weights)) + return G_loss - # Sample random points in the latent space. - random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) - random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) + def _update_metrics(self, D_loss, G_loss): + self.D_loss_metric.update_state(D_loss) + self.G_loss_metric.update_state(G_loss) - # Assemble labels that say "all real images". - misleading_labels = tf.zeros((batch_size, 1)) + def train_step(self, data): + RGB_inputs, RGB_labels = data + RGB_inputs = RGB_inputs['RGB_input'][:, :, :, 0:3] + RGB_labels = RGB_labels['RGB_with_error'][:, :, :, 0:3] + RGB_generated = self.G(RGB_inputs)[:, :, :, 0:3] - # Train the generator (note that we should *not* update the weights - # of the discriminator)! - with tf.GradientTape() as tape: - fake_images = self.generator(random_vector_labels) - fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1) - predictions = self.discriminator(fake_image_and_labels) - g_loss = self.loss_fn(misleading_labels, predictions) - grads = tape.gradient(g_loss, self.generator.trainable_weights) - self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) + combined_images = tf.concat([RGB_generated, RGB_labels], axis=0) + batch_size = tf.shape(RGB_inputs)[0] + y_true = self._build_discriminator_labels(batch_size) + y_true = self._add_noise_to_labels(y_true) - # Monitor loss. - self.generator_loss_tracker.update_state(g_loss) - self.discriminator_loss_tracker.update_state(d_loss) - return {'generator_loss': self.generator_loss_tracker.result(), - 'discrminator_loss': self.discriminator_loss_tracker.result()} + D_loss = self._train_D(y_true, combined_images) + G_loss = self._train_G(RGB_inputs) + self._update_metrics(D_loss, G_loss) + return {"discriminator_loss": self.D_loss_metric.result(), + "generator_loss": self.G_loss_metric.result()} + """ + def call(self, data): + generated = self.G(data) + predictions = self.D(generated) + return generated , predictions + """ diff --git a/examples/pix2pose/old_train.py b/examples/pix2pose/old_train.py index c7adce3f1..481457ad3 100644 --- a/examples/pix2pose/old_train.py +++ b/examples/pix2pose/old_train.py @@ -115,7 +115,10 @@ # Train the generator discriminator.trainable = False - loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], {"color_output": batch[1]['color_output'], "error_output": batch[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))}) + loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], + {"color_output": batch[1]['color_output'], + "error_output": batch[1]['error_output'], + "discriminator_output": np.ones((args.batch_size, 1))}) # Test the network batch_test = next(sequence_iterator_test) @@ -134,4 +137,4 @@ for callback in callbacks: - callback.on_train_end() \ No newline at end of file + callback.on_train_end() diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py index 2f64d4f3b..de9ebb7df 100644 --- a/examples/pix2pose/train_gan.py +++ b/examples/pix2pose/train_gan.py @@ -4,6 +4,9 @@ from paz.abstract import GeneratingSequence from paz.models.segmentation import UNET_VGG16 from models.generator import Generator +from models.discriminator import Discriminator +from models.pix2pose import Pix2Pose +from tensorflow.keras.losses import BinaryCrossentropy # from paz.backend.image import show_image, resize_image # import numpy as np @@ -40,9 +43,10 @@ num_classes = 3 learning_rate = 0.001 # steps_per_epoch -model_names = ['PIX2POSE', 'UNET_VGG16'] -model_name = 'UNET_VGG16' -# model_name = 'PIX2POSE' +model_names = ['PIX2POSE', 'PIX2POSE_GENERATOR', 'UNET_VGG16'] +# model_name = 'UNET_VGG16' +# model_name = 'PIX2POSE_GENERATOR' +model_name = 'PIX2POSE' max_num_epochs = 1 latent_dimension = 128 beta = 3.0 @@ -63,7 +67,9 @@ labels_to_shape = {'masks': [H, W, 4]} weighted_reconstruction = weighted_reconstruction_wrapper(beta, False) metrics = {'masks': [weighted_reconstruction, mean_squared_error]} -if model_name == 'PIX2POSE': + optimizer = Adam(learning_rate) + model.compile(optimizer, loss, metrics) +if model_name == 'PIX2POSE_GENERATOR': model = Generator(image_shape, latent_dimension) reconstruction_loss = WeightedReconstructionWithError(beta) loss = WeightedReconstructionWithError() @@ -73,7 +79,23 @@ weighted_reconstruction = weighted_reconstruction_wrapper(beta, True) metrics = {'RGB_with_error': [weighted_reconstruction, error_prediction, mean_squared_error]} - + optimizer = Adam(learning_rate) + model.compile(optimizer, loss, metrics) +if model_name == 'PIX2POSE': + discriminator = Discriminator(image_shape) + generator = Generator(image_shape, latent_dimension) + model = Pix2Pose(image_shape, discriminator, generator, latent_dimension) + # reconstruction_loss = WeightedReconstructionWithError(beta) + # loss = WeightedReconstructionWithError() + H, W, num_channels = image_shape + inputs_to_shape = {'RGB_input': [H, W, num_channels]} + labels_to_shape = {'RGB_with_error': [H, W, 4]} + # weighted_reconstruction = weighted_reconstruction_wrapper(beta, True) + # metrics = {'RGB_with_error': + # [weighted_reconstruction,error_prediction, mean_squared_error]} + optimizer_D = Adam(learning_rate) + optimizer_G = Adam(learning_rate) + model.compile(optimizer_D, optimizer_G, BinaryCrossentropy()) processor = DomainRandomization( renderer, image_shape, image_paths, inputs_to_shape, @@ -81,10 +103,6 @@ sequence = GeneratingSequence(processor, batch_size, num_steps) -optimizer = Adam(learning_rate) - -model.compile(optimizer, loss, metrics) - model.fit( sequence, epochs=max_num_epochs, From b9901b8e096604afd4bac1bce82c1b380a84fccc Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 8 Nov 2021 13:42:24 +0100 Subject: [PATCH 034/101] Add additional losses to pix2pose --- examples/pix2pose/models/pix2pose.py | 102 +++++++++++++++++++-------- examples/pix2pose/train_gan.py | 1 + 2 files changed, 72 insertions(+), 31 deletions(-) diff --git a/examples/pix2pose/models/pix2pose.py b/examples/pix2pose/models/pix2pose.py index 37cb9ce39..efe3ae195 100644 --- a/examples/pix2pose/models/pix2pose.py +++ b/examples/pix2pose/models/pix2pose.py @@ -1,27 +1,33 @@ from tensorflow.keras.models import Model from tensorflow.keras.metrics import Mean import tensorflow as tf +from loss import compute_weighted_reconstruction_loss_with_error +from loss import compute_error_prediction_loss class Pix2Pose(Model): def __init__(self, image_shape, discriminator, generator, latent_dim): super(Pix2Pose, self).__init__() self.image_shape = image_shape - self.D = discriminator - self.G = generator + self.discriminator = discriminator + self.generator = generator self.latent_dim = latent_dim @property def metrics(self): - return [self.G_loss_metric, self.D_loss_metric] + return [self.generator_loss, self.discriminator_loss] - def compile(self, optimizer_D, optimizer_G, loss): + def compile(self, optimizer_D, optimizer_G, gan_loss): super(Pix2Pose, self).compile() self.optimizer_G = optimizer_G self.optimizer_D = optimizer_D - self.loss = loss - self.G_loss_metric = Mean(name='generator_loss') - self.D_loss_metric = Mean(name='discriminator_loss') + self.gan_loss = gan_loss + # self.reconstruction = reconstruction + # self.error_prediction = error_prediction + self.generator_loss = Mean(name='generator_loss') + self.discriminator_loss = Mean(name='discriminator_loss') + self.reconstruction_loss = Mean(name='weighted_reconstruction') + self.error_prediction_loss = Mean(name='error_prediction') def _build_discriminator_labels(self, batch_size): return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0) @@ -33,45 +39,79 @@ def _add_noise_to_labels(self, labels): def _train_D(self, y_true, x_combined): with tf.GradientTape() as tape: - y_pred = self.D(x_combined) - D_loss = self.loss(y_true, y_pred) - grads = tape.gradient(D_loss, self.D.trainable_weights) - self.optimizer_D.apply_gradients(zip(grads, self.D.trainable_weights)) - return D_loss + y_pred = self.discriminator(x_combined) + discriminator_loss = self.gan_loss(y_true, y_pred) + grads = tape.gradient( + discriminator_loss, self.discriminator.trainable_weights) + self.optimizer_D.apply_gradients( + zip(grads, self.discriminator.trainable_weights)) + return discriminator_loss def _train_G(self, RGB_inputs): batch_size = tf.shape(RGB_inputs)[0] y_misleading = tf.zeros((batch_size, 1)) with tf.GradientTape() as tape: - y_pred = self.D(self.G(RGB_inputs)[:, :, :, 0:3]) - G_loss = self.loss(y_misleading, y_pred) - grads = tape.gradient(G_loss, self.G.trainable_weights) - self.optimizer_G.apply_gradients(zip(grads, self.G.trainable_weights)) - return G_loss + y_pred = self.discriminator( + self.generator(RGB_inputs)[:, :, :, 0:3]) + generator_loss = self.gan_loss(y_misleading, y_pred) + grads = tape.gradient(generator_loss, self.generator.trainable_weights) + self.optimizer_G.apply_gradients( + zip(grads, self.generator.trainable_weights)) + return generator_loss - def _update_metrics(self, D_loss, G_loss): - self.D_loss_metric.update_state(D_loss) - self.G_loss_metric.update_state(G_loss) + def _train_G_reconstruction(self, RGB_inputs, RGBA_true): + with tf.GradientTape() as tape: + RGBE_pred = self.generator(RGB_inputs) + loss = compute_weighted_reconstruction_loss_with_error( + RGBA_true, RGBE_pred, beta=3.0) + grads = tape.gradient(loss, self.generator.trainable_weights) + self.optimizer_G.apply_gradients( + zip(grads, self.generator.trainable_weights)) + return loss + + def _train_G_error_prediction(self, RGB_inputs, RGBA_true): + with tf.GradientTape() as tape: + RGBE_pred = self.generator(RGB_inputs) + loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) + grads = tape.gradient(loss, self.generator.trainable_weights) + self.optimizer_G.apply_gradients( + zip(grads, self.generator.trainable_weights)) + return loss + + def _update_metrics(self, discriminator_loss, generator_loss): + self.discriminator_loss.update_state(discriminator_loss) + self.generator_loss.update_state(generator_loss) def train_step(self, data): - RGB_inputs, RGB_labels = data - RGB_inputs = RGB_inputs['RGB_input'][:, :, :, 0:3] - RGB_labels = RGB_labels['RGB_with_error'][:, :, :, 0:3] - RGB_generated = self.G(RGB_inputs)[:, :, :, 0:3] + inputs, labels = data + RGB_inputs, RGBA_true = inputs['RGB_input'], labels['RGB_with_error'] + + reconstruction_loss = self._train_G_reconstruction(RGB_inputs, RGBA_true) + self.reconstruction_loss.update_state(reconstruction_loss) + + error_prediction_loss = self._train_G_error_prediction(RGB_inputs, RGBA_true) + self.error_prediction_loss.update_state(error_prediction_loss) + # reconstruction_loss = self.error_prediction(RGBA_true, RGBE_pred, beta) + + RGB_labels = RGBA_true[:, :, :, 0:3] + RGB_generated = self.generator(RGB_inputs)[:, :, :, 0:3] combined_images = tf.concat([RGB_generated, RGB_labels], axis=0) batch_size = tf.shape(RGB_inputs)[0] y_true = self._build_discriminator_labels(batch_size) y_true = self._add_noise_to_labels(y_true) - D_loss = self._train_D(y_true, combined_images) - G_loss = self._train_G(RGB_inputs) - self._update_metrics(D_loss, G_loss) - return {"discriminator_loss": self.D_loss_metric.result(), - "generator_loss": self.G_loss_metric.result()} + discriminator_loss = self._train_D(y_true, combined_images) + generator_loss = self._train_G(RGB_inputs) + self._update_metrics(discriminator_loss, generator_loss) + return {'discriminator_loss': self.discriminator_loss.result(), + 'generator_loss': self.generator_loss.result(), + 'reconstruction_loss': self.reconstruction_loss.result(), + 'error_prediction_loss': self.error_prediction_loss.result()} + """ def call(self, data): - generated = self.G(data) - predictions = self.D(generated) + generated = self.generator(data) + predictions = self.discriminator(generated) return generated , predictions """ diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py index de9ebb7df..1872c181d 100644 --- a/examples/pix2pose/train_gan.py +++ b/examples/pix2pose/train_gan.py @@ -110,6 +110,7 @@ verbose=1, workers=0) +model.save_weights('PIX2POSE_GAN.hdf5') """ def normalize(image): return (image * 255.0).astype('uint8') From 7d72dde665bcbd08fd44335618945261b36b48a6 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 8 Nov 2021 15:53:03 +0100 Subject: [PATCH 035/101] Add basic training with full GAN model --- examples/pix2pose/loss.py | 19 ++-- examples/pix2pose/models/generator.py | 6 +- examples/pix2pose/models/pix2pose.py | 130 ++++++++++++++------------ examples/pix2pose/train_gan.py | 24 +++-- 4 files changed, 99 insertions(+), 80 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index 741f64dc3..bfe7e90ea 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -37,8 +37,8 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0): return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True) -def compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred, - beta=3.0): +def compute_weighted_reconstruction_loss_with_error( + RGBA_true, RGBE_pred, beta=3.0): RGB_pred, error_mask = extract_error_mask(RGBE_pred) loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta) return loss @@ -53,17 +53,24 @@ def compute_error_prediction_loss(RGBA_true, RGBE_pred): return error_loss +class ErrorPrediction(Loss): + def __init__(self): + super(ErrorPrediction, self).__init__() + + def call(self, RGBA_true, RGBE_pred): + error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) + return error_loss + + class WeightedReconstructionWithError(Loss): def __init__(self, beta=3.0): super(WeightedReconstructionWithError, self).__init__() self.beta = beta def call(self, RGBA_true, RGBE_pred): - reconstruction = compute_weighted_reconstruction_loss_with_error( + reconstruction_loss = compute_weighted_reconstruction_loss_with_error( RGBA_true, RGBE_pred, self.beta) - error_prediction = compute_error_prediction_loss(RGBA_true, RGBE_pred) - loss = reconstruction + error_prediction - return loss + return reconstruction_loss class WeightedReconstruction(Loss): diff --git a/examples/pix2pose/models/generator.py b/examples/pix2pose/models/generator.py index 357d3b646..2d7766e58 100644 --- a/examples/pix2pose/models/generator.py +++ b/examples/pix2pose/models/generator.py @@ -52,7 +52,7 @@ def decoder(x, skip_connections): def Generator(input_shape=(128, 128, 3), latent_dimension=256, - name='PIX2POSE_GENERATOR'): + activation='sigmoid', name='PIX2POSE_GENERATOR'): RGB_input = Input(input_shape, name='RGB_input') x, skip_connections = encoder(RGB_input) x = Flatten()(x) @@ -61,9 +61,9 @@ def Generator(input_shape=(128, 128, 3), latent_dimension=256, x = Reshape((8, 8, latent_dimension))(x) x = decoder(x, skip_connections) RGB = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x) - RGB = Activation('tanh', name='RGB')(RGB) + RGB = Activation(activation, name='RGB')(RGB) error = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x) - error = Activation('sigmoid', name='error')(error) + error = Activation(activation, name='error')(error) RGB_with_error = Concatenate(axis=-1, name='RGB_with_error')([RGB, error]) model = Model(RGB_input, RGB_with_error, name=name) return model diff --git a/examples/pix2pose/models/pix2pose.py b/examples/pix2pose/models/pix2pose.py index efe3ae195..b120a4fa4 100644 --- a/examples/pix2pose/models/pix2pose.py +++ b/examples/pix2pose/models/pix2pose.py @@ -1,8 +1,8 @@ from tensorflow.keras.models import Model from tensorflow.keras.metrics import Mean import tensorflow as tf -from loss import compute_weighted_reconstruction_loss_with_error -from loss import compute_error_prediction_loss +# from loss import compute_weighted_reconstruction_loss_with_error +# from loss import compute_error_prediction_loss class Pix2Pose(Model): @@ -17,17 +17,20 @@ def __init__(self, image_shape, discriminator, generator, latent_dim): def metrics(self): return [self.generator_loss, self.discriminator_loss] - def compile(self, optimizer_D, optimizer_G, gan_loss): + def compile(self, optimizers, losses, loss_weights): super(Pix2Pose, self).compile() - self.optimizer_G = optimizer_G - self.optimizer_D = optimizer_D - self.gan_loss = gan_loss - # self.reconstruction = reconstruction - # self.error_prediction = error_prediction + self.optimizer_generator = optimizers['generator'] + self.optimizer_discriminator = optimizers['discriminator'] + self.compute_reconstruction_loss = losses['weighted_reconstruction'] + self.compute_error_prediction_loss = losses['error_prediction'] + self.compute_discriminator_loss = losses['discriminator'] + self.generator_loss = Mean(name='generator_loss') self.discriminator_loss = Mean(name='discriminator_loss') self.reconstruction_loss = Mean(name='weighted_reconstruction') self.error_prediction_loss = Mean(name='error_prediction') + self.reconstruction_weight = loss_weights['weighted_reconstruction'] + self.error_prediction_weight = loss_weights['error_prediction'] def _build_discriminator_labels(self, batch_size): return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0) @@ -37,81 +40,84 @@ def _add_noise_to_labels(self, labels): labels = labels + 0.05 * noise return labels - def _train_D(self, y_true, x_combined): + def _get_batch_size(self, values): + return tf.shape(values)[0] + + def _train_discriminator(self, RGB_inputs, RGBA_true): + RGB_true = RGBA_true[:, :, :, 0:3] + RGB_fake = self.generator(RGB_inputs)[:, :, :, 0:3] + RGB_fake_true = tf.concat([RGB_fake, RGB_true], axis=0) + + batch_size = self._get_batch_size(RGB_inputs) + y_true = self._build_discriminator_labels(batch_size) + y_true = self._add_noise_to_labels(y_true) + with tf.GradientTape() as tape: - y_pred = self.discriminator(x_combined) - discriminator_loss = self.gan_loss(y_true, y_pred) - grads = tape.gradient( - discriminator_loss, self.discriminator.trainable_weights) - self.optimizer_D.apply_gradients( - zip(grads, self.discriminator.trainable_weights)) + y_pred = self.discriminator(RGB_fake_true) + discriminator_loss = self.compute_discriminator_loss( + y_true, y_pred) + gradients = tape.gradient(discriminator_loss, + self.discriminator.trainable_weights) + self.optimizer_discriminator.apply_gradients( + zip(gradients, self.discriminator.trainable_weights)) return discriminator_loss - def _train_G(self, RGB_inputs): + def _train_generator(self, RGB_inputs): batch_size = tf.shape(RGB_inputs)[0] y_misleading = tf.zeros((batch_size, 1)) with tf.GradientTape() as tape: - y_pred = self.discriminator( - self.generator(RGB_inputs)[:, :, :, 0:3]) - generator_loss = self.gan_loss(y_misleading, y_pred) - grads = tape.gradient(generator_loss, self.generator.trainable_weights) - self.optimizer_G.apply_gradients( - zip(grads, self.generator.trainable_weights)) + RGBE_preds = self.generator(RGB_inputs) + y_pred = self.discriminator(RGBE_preds[..., 0:3]) + generator_loss = self.compute_discriminator_loss( + y_misleading, y_pred) + gradients = tape.gradient(generator_loss, + self.generator.trainable_weights) + self.optimizer_generator.apply_gradients( + zip(gradients, self.generator.trainable_weights)) return generator_loss - def _train_G_reconstruction(self, RGB_inputs, RGBA_true): + def _train_reconstruction(self, RGB_inputs, RGBA_true): with tf.GradientTape() as tape: RGBE_pred = self.generator(RGB_inputs) - loss = compute_weighted_reconstruction_loss_with_error( - RGBA_true, RGBE_pred, beta=3.0) - grads = tape.gradient(loss, self.generator.trainable_weights) - self.optimizer_G.apply_gradients( - zip(grads, self.generator.trainable_weights)) - return loss - - def _train_G_error_prediction(self, RGB_inputs, RGBA_true): + reconstruction_loss = self.compute_reconstruction_loss( + RGBA_true, RGBE_pred) + reconstruction_loss = ( + self.reconstruction_weight * reconstruction_loss) + gradients = tape.gradient(reconstruction_loss, + self.generator.trainable_weights) + self.optimizer_generator.apply_gradients( + zip(gradients, self.generator.trainable_weights)) + return reconstruction_loss + + def _train_error_prediction(self, RGB_inputs, RGBA_true): with tf.GradientTape() as tape: RGBE_pred = self.generator(RGB_inputs) - loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) - grads = tape.gradient(loss, self.generator.trainable_weights) - self.optimizer_G.apply_gradients( - zip(grads, self.generator.trainable_weights)) - return loss - - def _update_metrics(self, discriminator_loss, generator_loss): - self.discriminator_loss.update_state(discriminator_loss) - self.generator_loss.update_state(generator_loss) + error_prediction_loss = self.compute_error_prediction_loss( + RGBA_true, RGBE_pred) + error_prediction_loss = ( + self.error_prediction_weight * error_prediction_loss) + gradients = tape.gradient( + error_prediction_loss, self.generator.trainable_weights) + self.optimizer_generator.apply_gradients( + zip(gradients, self.generator.trainable_weights)) + return error_prediction_loss def train_step(self, data): - inputs, labels = data - RGB_inputs, RGBA_true = inputs['RGB_input'], labels['RGB_with_error'] + RGB_inputs, RGBA_true = data[0]['RGB_input'], data[1]['RGB_with_error'] - reconstruction_loss = self._train_G_reconstruction(RGB_inputs, RGBA_true) + reconstruction_loss = self._train_reconstruction(RGB_inputs, RGBA_true) self.reconstruction_loss.update_state(reconstruction_loss) - error_prediction_loss = self._train_G_error_prediction(RGB_inputs, RGBA_true) - self.error_prediction_loss.update_state(error_prediction_loss) - # reconstruction_loss = self.error_prediction(RGBA_true, RGBE_pred, beta) + error_loss = self._train_error_prediction(RGB_inputs, RGBA_true) + self.error_prediction_loss.update_state(error_loss) - RGB_labels = RGBA_true[:, :, :, 0:3] - RGB_generated = self.generator(RGB_inputs)[:, :, :, 0:3] + discriminator_loss = self._train_discriminator(RGB_inputs, RGBA_true) + self.discriminator_loss.update_state(discriminator_loss) - combined_images = tf.concat([RGB_generated, RGB_labels], axis=0) - batch_size = tf.shape(RGB_inputs)[0] - y_true = self._build_discriminator_labels(batch_size) - y_true = self._add_noise_to_labels(y_true) + generator_loss = self._train_generator(RGB_inputs) + self.generator_loss.update_state(generator_loss) - discriminator_loss = self._train_D(y_true, combined_images) - generator_loss = self._train_G(RGB_inputs) - self._update_metrics(discriminator_loss, generator_loss) return {'discriminator_loss': self.discriminator_loss.result(), 'generator_loss': self.generator_loss.result(), 'reconstruction_loss': self.reconstruction_loss.result(), 'error_prediction_loss': self.error_prediction_loss.result()} - - """ - def call(self, data): - generated = self.generator(data) - predictions = self.discriminator(generated) - return generated , predictions - """ diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py index 1872c181d..2d0e32fa5 100644 --- a/examples/pix2pose/train_gan.py +++ b/examples/pix2pose/train_gan.py @@ -14,6 +14,7 @@ from pipelines import DomainRandomization from loss import WeightedReconstruction from loss import WeightedReconstructionWithError +from loss import ErrorPrediction # from metrics import error_prediction, weighted_reconstruction # from metrics import weighted_reconstruction_with_error from metrics import mean_squared_error, error_prediction @@ -69,6 +70,9 @@ metrics = {'masks': [weighted_reconstruction, mean_squared_error]} optimizer = Adam(learning_rate) model.compile(optimizer, loss, metrics) + +# TODO this is not working at the moment because the loss does not include +# the error prediction loss. if model_name == 'PIX2POSE_GENERATOR': model = Generator(image_shape, latent_dimension) reconstruction_loss = WeightedReconstructionWithError(beta) @@ -81,28 +85,29 @@ [weighted_reconstruction, error_prediction, mean_squared_error]} optimizer = Adam(learning_rate) model.compile(optimizer, loss, metrics) + if model_name == 'PIX2POSE': discriminator = Discriminator(image_shape) generator = Generator(image_shape, latent_dimension) model = Pix2Pose(image_shape, discriminator, generator, latent_dimension) - # reconstruction_loss = WeightedReconstructionWithError(beta) - # loss = WeightedReconstructionWithError() H, W, num_channels = image_shape inputs_to_shape = {'RGB_input': [H, W, num_channels]} labels_to_shape = {'RGB_with_error': [H, W, 4]} - # weighted_reconstruction = weighted_reconstruction_wrapper(beta, True) - # metrics = {'RGB_with_error': - # [weighted_reconstruction,error_prediction, mean_squared_error]} - optimizer_D = Adam(learning_rate) - optimizer_G = Adam(learning_rate) - model.compile(optimizer_D, optimizer_G, BinaryCrossentropy()) + optimizers = {'discriminator': Adam(learning_rate), + 'generator': Adam(learning_rate)} + losses = {'discriminator': BinaryCrossentropy(), + 'weighted_reconstruction': WeightedReconstructionWithError(), + 'error_prediction': ErrorPrediction()} + loss_weights = {'weighted_reconstruction': 100, 'error_prediction': 50} + model.compile(optimizers, losses, loss_weights) processor = DomainRandomization( renderer, image_shape, image_paths, inputs_to_shape, labels_to_shape, num_occlusions) sequence = GeneratingSequence(processor, batch_size, num_steps) - +model.load_weights('PIX2POSE_GAN.hdf5') +""" model.fit( sequence, epochs=max_num_epochs, @@ -112,6 +117,7 @@ model.save_weights('PIX2POSE_GAN.hdf5') """ +""" def normalize(image): return (image * 255.0).astype('uint8') From 7472cea9b6de09508dae684457f3a7800f17731a Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 17 Nov 2021 11:22:44 +0100 Subject: [PATCH 036/101] Add fix to PnP having to solve for less than 4 mask points --- examples/pix2pose/backend.py | 2 ++ examples/pix2pose/pipelines.py | 2 ++ examples/pix2pose/processors.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index fccf619d3..847117868 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -137,6 +137,8 @@ def _preprocess_image_points2D(image_points2D): def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, inlier_threshold=5, num_iterations=100): + if ((len(object_points3D) < 4) or (len(image_points2D) < 4)): + return None, None image_points2D = _preprocess_image_points2D(image_points2D) success, rotation_vector, translation, inliers = cv2.solvePnPRansac( object_points3D, image_points2D, camera_intrinsics, None, diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index bf48f7082..688ea9d03 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -121,6 +121,8 @@ def call(self, image): points2D = denormalize_points2D(points2D, *crop.shape[0:2]) points2D = self.change_coordinates(points2D, box2D) quaternion, translation = self.predict_pose(points3D, points2D) + if (quaternion is None) or (translation is None): + continue pose6D = Pose6D(quaternion, translation, box2D.class_name) poses6D.append(pose6D), points.append([points2D, points3D]) if self.draw: diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index a3452323f..e51137201 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -178,6 +178,8 @@ def __init__(self): super(RotationVectorToQuaternion, self).__init__() def call(self, rotation_vector): + if rotation_vector is None: + return None quaternion = rotation_vector_to_quaternion(rotation_vector) return quaternion From 74c4888970b5dfb362ef664c2c29d237f3acc168 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 17 Nov 2021 11:22:58 +0100 Subject: [PATCH 037/101] Change demo for image processing --- examples/pix2pose/demo.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index def06b71d..44167d034 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -15,10 +15,10 @@ model.load_weights('UNET_weights_epochs-10_beta-3.hdf5') # approximating intrinsic camera parameters -camera = Camera(device_id=4) -# camera.start() -# image_size = camera.read().shape[0:2] -# camera.stop() +camera = Camera(device_id=0) +camera.start() +image_size = camera.read().shape[0:2] +camera.stop() image = load_image('test_image.jpg') image_size = image.shape[0:2] @@ -30,8 +30,9 @@ [0, 0, 1]]) object_sizes = np.array([0.184, 0.187, 0.052]) epsilon = 0.15 -detect = SSD300FAT(draw=False) -offsets = [0.1, 0.1] +score_thresh = 0.50 +detect = SSD300FAT(score_thresh, draw=False) +offsets = [0.2, 0.2] estimate_keypoints = Pix2Pose(model, object_sizes) pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None) @@ -39,8 +40,6 @@ predicted_image = results['image'] show_image(predicted_image) -""" -image_size = (640, 480) -player = VideoPlayer(image_size, pipeline, camera) -player.run() -""" +# image_size = (640, 480) +# player = VideoPlayer(image_size, pipeline, camera) +# player.run() From b6aeb0bb9b96c80bf16a278215f10cd3d0db695e Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 17 Nov 2021 12:11:54 +0100 Subject: [PATCH 038/101] Add accessible state of failure and success of internal PnP solution --- examples/pix2pose/backend.py | 6 ++---- examples/pix2pose/pipelines.py | 16 ++++++++++------ examples/pix2pose/processors.py | 6 ++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 847117868..513a3bb88 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -138,16 +138,14 @@ def _preprocess_image_points2D(image_points2D): def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, inlier_threshold=5, num_iterations=100): if ((len(object_points3D) < 4) or (len(image_points2D) < 4)): - return None, None + raise ValueError('Solve PnP requires at least 4 3D and 2D points') image_points2D = _preprocess_image_points2D(image_points2D) success, rotation_vector, translation, inliers = cv2.solvePnPRansac( object_points3D, image_points2D, camera_intrinsics, None, flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold, iterationsCount=num_iterations) translation = np.squeeze(translation, 1) - if success is False: - rotation_vector, translation = None, None - return rotation_vector, translation + return success, rotation_vector, translation def apply_affine_transform(affine_matrix, vectors): diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 688ea9d03..547f2ec0b 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -1,4 +1,3 @@ -import numpy as np from paz.abstract import SequentialProcessor, Processor from paz.pipelines import RandomizeRenderedImage as RandomizeRender from paz.abstract.messages import Pose6D @@ -8,12 +7,12 @@ ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC, ReplaceLowerThanThreshold) from backend import build_cube_points3D -from processors import UnwrapDictionary, RotationVectorToQuaternion +from processors import UnwrapDictionary from processors import NormalizePoints2D -from backend import draw_maski from backend import denormalize_points2D from backend import draw_poses6D from backend import draw_masks +from paz.backend.quaternion import rotation_vector_to_quaternion class DomainRandomization(SequentialProcessor): @@ -68,8 +67,9 @@ def __init__(self, output_shape): class SolveChangingObjectPnP(SequentialProcessor): def __init__(self, camera_intrinsics): super(SolveChangingObjectPnP, self).__init__() + self.MINIMUM_REQUIRED_POINTS = 4 self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics)) - self.add(pr.ControlMap(RotationVectorToQuaternion())) + # self.add(pr.ControlMap(RotationVectorToQuaternion())) class Pix2Pose(pr.Processor): @@ -120,9 +120,13 @@ def call(self, image): points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) points2D = denormalize_points2D(points2D, *crop.shape[0:2]) points2D = self.change_coordinates(points2D, box2D) - quaternion, translation = self.predict_pose(points3D, points2D) - if (quaternion is None) or (translation is None): + if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: continue + success, rotation, translation = self.predict_pose( + points3D, points2D) + if success is False: + continue + quaternion = rotation_vector_to_quaternion(rotation) pose6D = Pose6D(quaternion, translation, box2D.class_name) poses6D.append(pose6D), points.append([points2D, points3D]) if self.draw: diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index e51137201..03164771c 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -125,10 +125,10 @@ def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100): self.num_iterations = num_iterations def call(self, object_points3D, image_points2D): - rotation_vector, translation = solve_PnP_RANSAC( + success, rotation_vector, translation = solve_PnP_RANSAC( object_points3D, image_points2D, self.camera_intrinsics, self.inlier_thresh, self.num_iterations) - return rotation_vector, translation + return success, rotation_vector, translation class RotationVectorToRotationMatrix(Processor): @@ -178,8 +178,6 @@ def __init__(self): super(RotationVectorToQuaternion, self).__init__() def call(self, rotation_vector): - if rotation_vector is None: - return None quaternion = rotation_vector_to_quaternion(rotation_vector) return quaternion From ca3b26c7950b29bee1efde29f0e28070edec0ea5 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 17 Nov 2021 13:03:25 +0100 Subject: [PATCH 039/101] Remove unsued functions and processors --- examples/pix2pose/backend.py | 295 ++++++++------------------------ examples/pix2pose/pipelines.py | 1 - examples/pix2pose/processors.py | 77 +++------ 3 files changed, 94 insertions(+), 279 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 513a3bb88..be696aaa1 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -1,93 +1,11 @@ -from collections import Iterable +# from collections import Iterable import numpy as np from paz.backend.image.draw import GREEN -from paz.backend.image import draw_line, draw_dot, draw_circle -from paz.abstract import Pose6D +from paz.backend.image import draw_line, draw_dot +# from paz.abstract import Pose6D import cv2 -def homogenous_quaternion_to_rotation_matrix(quaternion): - # w0, q1, q2, q3 = quaternion - q1, q2, q3, w0 = quaternion - - r11 = w0**2 + q1**2 - q2**2 - q3**2 - r12 = 2 * ((q1 * q2) - (w0 * q3)) - r13 = 2 * ((w0 * q2) + (q1 * q3)) - - r21 = 2 * ((w0 * q3) + (q1 * q2)) - r22 = w0**2 - q1**2 + q2**2 - q3**2 - r23 = 2 * ((q2 * q3) - (w0 * q1)) - - r31 = 2 * ((q1 * q3) - (w0 * q2)) - r32 = 2 * ((w0 * q1) + (q2 * q3)) - r33 = w0**2 - q1**2 - q2**2 + q3**2 - - rotation_matrix = np.array([[r11, r12, r13], - [r21, r22, r23], - [r31, r32, r33]]) - return rotation_matrix - - -def inhomogenous_quaternion_to_rotation_matrix(q): - """Transforms quaternion into a rotation matrix - # Arguments - q: quarternion, Numpy array of shape ``[4]`` - # Returns - Numpy array representing a rotation vector having a shape ``[3]``. - """ - # quaternion - # q = q[::-1] - r11 = 1 - (2 * (q[1]**2 + q[2]**2)) - r12 = 2 * (q[0] * q[1] - q[3] * q[2]) - r13 = 2 * (q[3] * q[1] + q[0] * q[2]) - - r21 = 2 * (q[0] * q[1] + q[3] * q[2]) - r22 = 1 - (2 * (q[0]**2 + q[2]**2)) - r23 = 2 * (q[1] * q[2] - q[3] * q[0]) - - r31 = 2 * (q[0] * q[2] - q[3] * q[1]) - r32 = 2 * (q[3] * q[0] + q[1] * q[2]) - r33 = 1 - (2 * (q[0]**2 + q[1]**2)) - - rotation_matrix = np.array([[r11, r12, r13], - [r21, r22, r23], - [r31, r32, r33]]) - - return rotation_matrix - # return np.squeeze(rotation_matrix) - - -def quaternion_to_rotation_matrix(quaternion, homogenous=True): - if homogenous: - matrix = homogenous_quaternion_to_rotation_matrix(quaternion) - else: - matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion) - return matrix - - -def multiply_quaternions(quaternion_0, quaternion_1): - """Multiplies two quaternions. - - # Reference: - Code extracted from [here](https://stackoverflow.com/questions/ - 39000758/how-to-multiply-two-quaternions-by-python-or-numpy) - """ - x0, y0, z0, w0 = quaternion_0 - x1, y1, z1, w1 = quaternion_1 - x2 = +(x1 * w0) + (y1 * z0) - (z1 * y0) + (w1 * x0) - y2 = -(x1 * z0) + (y1 * w0) + (z1 * x0) + (w1 * y0) - z2 = +(x1 * y0) - (y1 * x0) + (z1 * w0) + (w1 * z0) - w2 = -(x1 * x0) - (y1 * y0) - (z1 * z0) + (w1 * w0) - return np.array([x2, y2, z2, w2]) - - -# quaternion = (1 / np.sqrt(30)) * np.array([1, 2, 3, 4]) -# theta = np.deg2rad(0) -# quaternion = np.array([1, 0, 0, 0]) -# a = homogenous_quaternion_to_rotation_matrix(quaternion) -# quaternion = (1 / np.sqrt(30)) * np.array([2, 3, 4, 1]) -# b = inhomogenous_quaternion_to_rotation_matrix(quaternion) - def build_cube_points3D(width, height, depth): """ Build the 3D points of a cube in the openCV coordinate system: 4--------1 @@ -148,33 +66,6 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, return success, rotation_vector, translation -def apply_affine_transform(affine_matrix, vectors): - return np.matmul(affine_matrix, vectors.T).T - - -def project_to_image2(affine_matrix, points3D, camera_intrinsics): - """Project points3D to image plane using a perspective transformation - """ - if affine_matrix.shape != (4, 4): - raise ValueError('Affine matrix is not of shape (4, 4)') - if len(points3D.shape) != 2: - raise ValueError('points3D should have a shape (N, 3)') - if points3D.shape[1] != 3: - raise ValueError('points3D should have a shape (N, 3)') - # TODO missing checks for camera intrinsics conditions - points3D = apply_affine_transform(affine_matrix, points3D) - # points3D = np.matmul(rotation, points3D.T).T + translation - x, y, z = np.split(points3D, 3, axis=1) - x_focal_length = camera_intrinsics[0, 0] - y_focal_length = camera_intrinsics[1, 1] - x_image_center = camera_intrinsics[0, 2] - y_image_center = camera_intrinsics[1, 2] - x_points = (x_focal_length * (x / z)) + x_image_center - y_points = (y_focal_length * (y / z)) + y_image_center - projected_points2D = np.concatenate([x_points, y_points], axis=1) - return projected_points2D - - def project_to_image(rotation, translation, points3D, camera_intrinsics): """Project points3D to image plane using a perspective transformation """ @@ -256,20 +147,6 @@ def arguments_to_image_points2D(row_args, col_args): return image_points2D -def rotation_vector_to_rotation_matrix(rotation_vector): - rotation_matrix = np.eye(3) - cv2.Rodrigues(rotation_vector, rotation_matrix) - return rotation_matrix - - -def draw_keypoints(image, keypoints, colors, radius): - for keypoint, color in zip(keypoints, colors): - R, G, B = color - color = (int(R), int(G), int(B)) - draw_circle(image, keypoint.astype('int'), color, radius) - return image - - def draw_masks(image, points): for points2D, points3D in points: object_sizes = np.array([0.184, 0.187, 0.052]) @@ -291,52 +168,6 @@ def draw_maski(image, keypoints, colors, radius=1): return image -def rotation_matrix_to_quaternion(rotation_matrix): - qw = np.sqrt(1 + np.trace(rotation_matrix)) / 2.0 - - m21 = rotation_matrix[2, 1] - m12 = rotation_matrix[1, 2] - - m02 = rotation_matrix[0, 2] - m20 = rotation_matrix[2, 0] - - m10 = rotation_matrix[1, 0] - m01 = rotation_matrix[0, 1] - - qx = (m21 - m12) / (4.0 * qw) - qy = (m02 - m20) / (4.0 * qw) - qz = (m10 - m01) / (4.0 * qw) - return qx, qy, qz, qw - - -def to_pose6D(quaternion, translation, class_name=None): - return Pose6D(quaternion, translation, class_name) - - -class MultiList(Iterable): - def __init__(self, num_lists): - self.num_lists = num_lists - self.lists = [[] for list_arg in range(self.num_lists)] - - def append(self, *args): - if len(args) != self.num_lists: - raise ValueError('Arguments should have equal lenght as num_lists') - for arg, arg_list in zip(args, self.lists): - arg_list.append(arg) - - def __iter__(self): - return iter(self.lists) - - -def draw_mask2(image, points3D, object_sizes): - if len(object_sizes) != 3: - raise ValueError('Object sizes must contain 3 values') - colors = points3D / (object_sizes / 2.0) - colors = (colors + 1.0) * 127.5 - colors = colors.astype('int') - # draw_keypoints(image, points2D, colors, radius=3) - - def normalize_points2D(points2D, height, width): """Transform points2D in image coordinates to normalized coordinates. @@ -363,55 +194,6 @@ def denormalize_points2D(points2D, height, width): return points2D - -def flip_y_axis(points2D): - x, y = np.split(points2D, 2, axis=1) - points2D = np.concatenate([x, -y], axis=1) - return points2D - - -def denormalize_keypoints2(keypoints, height, width): - # [-1, 1] -> [-127.5, 127.5] -> [0, 255] - half_sizes = np.array([width, height]) / 2.0 - return (half_sizes * keypoints) + half_sizes - - -def translate_points2D(points2D, translation): - if len(points2D.shape) != 2: - raise ValueError('Invalid points2D shape') - if len(translation) != 2: - raise ValueError('Invalid translation lenght') - num_keypoints = len(points2D) - height, width = translation - x_translation = np.full((num_keypoints, 1), width) - y_translation = np.full((num_keypoints, 1), height) - translation = np.concatenate([x_translation, y_translation], axis=1) - translated_points2D = translation - points2D - return translated_points2D - - -def denormalize_keypoints(keypoints, height, width): - """Transform normalized keypoint coordinates into image coordinates - - # Arguments - keypoints: Numpy array of shape ``(num_keypoints, 2)``. - height: Int. Height of the image - width: Int. Width of the image - - # Returns - Numpy array of shape ``(num_keypoints, 2)``. - """ - for keypoint_arg, keypoint in enumerate(keypoints): - x, y = keypoint[:2] - # transform key-point coordinates to image coordinates - x = (min(max(x, -1), 1) * width / 2 + width / 2) - 0.5 - # flip since the image coordinates for y are flipped - y = height - 0.5 - (min(max(y, -1), 1) * height / 2 + height / 2) - x, y = int(round(x)), int(round(y)) - keypoints[keypoint_arg][:2] = [x, y] - return keypoints - - def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics): image = image.astype(float) for pose6D in poses6D: @@ -424,3 +206,74 @@ def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics): image = draw_cube(image, cube_points2D) image = image.astype('uint8') return image + + +# NOT USED +def homogenous_quaternion_to_rotation_matrix(quaternion): + # w0, q1, q2, q3 = quaternion + q1, q2, q3, w0 = quaternion + + r11 = w0**2 + q1**2 - q2**2 - q3**2 + r12 = 2 * ((q1 * q2) - (w0 * q3)) + r13 = 2 * ((w0 * q2) + (q1 * q3)) + + r21 = 2 * ((w0 * q3) + (q1 * q2)) + r22 = w0**2 - q1**2 + q2**2 - q3**2 + r23 = 2 * ((q2 * q3) - (w0 * q1)) + + r31 = 2 * ((q1 * q3) - (w0 * q2)) + r32 = 2 * ((w0 * q1) + (q2 * q3)) + r33 = w0**2 - q1**2 - q2**2 + q3**2 + + rotation_matrix = np.array([[r11, r12, r13], + [r21, r22, r23], + [r31, r32, r33]]) + return rotation_matrix + + +def inhomogenous_quaternion_to_rotation_matrix(q): + # quaternion + # q = q[::-1] + r11 = 1 - (2 * (q[1]**2 + q[2]**2)) + r12 = 2 * (q[0] * q[1] - q[3] * q[2]) + r13 = 2 * (q[3] * q[1] + q[0] * q[2]) + + r21 = 2 * (q[0] * q[1] + q[3] * q[2]) + r22 = 1 - (2 * (q[0]**2 + q[2]**2)) + r23 = 2 * (q[1] * q[2] - q[3] * q[0]) + + r31 = 2 * (q[0] * q[2] - q[3] * q[1]) + r32 = 2 * (q[3] * q[0] + q[1] * q[2]) + r33 = 1 - (2 * (q[0]**2 + q[1]**2)) + + rotation_matrix = np.array([[r11, r12, r13], + [r21, r22, r23], + [r31, r32, r33]]) + + return rotation_matrix + + +def quaternion_to_rotation_matrix(quaternion, homogenous=True): + if homogenous: + matrix = homogenous_quaternion_to_rotation_matrix(quaternion) + else: + matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion) + return matrix + + +def rotation_vector_to_rotation_matrix(rotation_vector): + rotation_matrix = np.eye(3) + cv2.Rodrigues(rotation_vector, rotation_matrix) + return rotation_matrix + + +def to_affine_matrix(rotation_matrix, translation): + if len(translation) != 3: + raise ValueError('Translation should be of lenght 3') + if rotation_matrix.shape != (3, 3): + raise ValueError('Rotation matrix should be of shape (3, 3)') + translation = translation.reshape(3, 1) + affine_top = np.concatenate([rotation_matrix, translation], axis=1) + affine_row = np.array([[0.0, 0.0, 0.0, 1.0]]) + affine_matrix = np.concatenate([affine_top, affine_row], axis=0) + return affine_matrix diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 547f2ec0b..16d35ad62 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -69,7 +69,6 @@ def __init__(self, camera_intrinsics): super(SolveChangingObjectPnP, self).__init__() self.MINIMUM_REQUIRED_POINTS = 4 self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics)) - # self.add(pr.ControlMap(RotationVectorToQuaternion())) class Pix2Pose(pr.Processor): diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index 03164771c..c47acd2e9 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -8,9 +8,9 @@ from backend import replace_lower_than_threshold from backend import arguments_to_image_points2D from backend import solve_PnP_RANSAC -from backend import rotation_vector_to_rotation_matrix -from backend import translate_points2D from backend import normalize_points2D +from backend import rotation_vector_to_rotation_matrix +from backend import to_affine_matrix class ImageToClosedOneBall(Processor): @@ -62,8 +62,6 @@ def call(self, image, pose6D): points3D = self.class_to_points[pose6D.class_name] points2D = project_points3D(points3D, pose6D, self.camera) points2D = points2D.astype(np.int32) - # points2D = np.squeeze(points2D) - # return points2D draw_cube(image, points2D, thickness=self.thickness) return image @@ -74,9 +72,9 @@ def __init__(self, threshold=1e-8, replacement=0.0): self.threshold = threshold self.replacement = replacement - def call(self, image): + def call(self, values): return replace_lower_than_threshold( - image, self.threshold, self.replacement) + values, self.threshold, self.replacement) class GetNonZeroValues(Processor): @@ -131,22 +129,6 @@ def call(self, object_points3D, image_points2D): return success, rotation_vector, translation -class RotationVectorToRotationMatrix(Processor): - def __init__(self): - super(RotationVectorToRotationMatrix, self).__init__() - - def call(self, rotation_vector): - return rotation_vector_to_rotation_matrix(rotation_vector) - - -class CropImage(Processor): - def __init__(self): - super(CropImage, self).__init__() - - def call(self, image): - return image[:128, :128, :] - - class UnwrapDictionary(Processor): def __init__(self, keys): super(UnwrapDictionary, self).__init__() @@ -156,23 +138,6 @@ def call(self, dictionary): return [dictionary[key] for key in self.keys] -class ToAffineMatrix(Processor): - def __init__(self): - super(ToAffineMatrix, self).__init__() - - def call(self, rotation_matrix, translation): - if len(translation) != 3: - raise ValueError('Translation should be of lenght 3') - if rotation_matrix.shape != (3, 3): - raise ValueError('Rotation matrix should be of shape (3, 3)') - translation = translation.reshape(3, 1) - affine_matrix = np.concatenate([rotation_matrix, translation], axis=1) - affine_row = np.array([[0.0, 0.0, 0.0, 1.0]]) - affine_matrix = np.concatenate([affine_matrix, affine_row], axis=0) - print(affine_matrix.shape) - return affine_matrix - - class RotationVectorToQuaternion(Processor): def __init__(self): super(RotationVectorToQuaternion, self).__init__() @@ -182,29 +147,27 @@ def call(self, rotation_vector): return quaternion -class TranslatePoints2D(Processor): - def __init__(self): - super(TranslatePoints2D, self).__init__() +class NormalizePoints2D(Processor): + def __init__(self, image_shape): + self.height, self.width = image_shape[:2] - def call(points2D, image): - height, width = image.shape[:2] - translated_points2D = translate_points2D(points2D, (height, width)) - return translated_points2D + def call(self, points2D): + points2D = normalize_points2D(points2D, self.height, self.width) + return points2D -class FlipYAxisPoints2D(Processor): +class RotationVectorToRotationMatrix(Processor): def __init__(self): - super(FlipYAxisPoints2D, self).__init__() + super(RotationVectorToRotationMatrix, self).__init__() - def call(self, points2D, image): - height = image.shape[0] - translate_points2D(points2D, (0, height)) + def call(self, rotation_vector): + return rotation_vector_to_rotation_matrix(rotation_vector) -class NormalizePoints2D(Processor): - def __init__(self, image_shape): - self.height, self.width = image_shape[:2] +class ToAffineMatrix(Processor): + def __init__(self): + super(ToAffineMatrix, self).__init__() - def call(self, points2D): - points2D = normalize_points2D(points2D, self.height, self.width) - return points2D + def call(self, rotation_matrix, translation): + affine_matrix = to_affine_matrix(rotation_matrix, translation) + return affine_matrix From 6266d3af545aa1374af6f3dc88c81ed94c93c8be Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 17 Nov 2021 14:04:32 +0100 Subject: [PATCH 040/101] Remove unnecessary files --- examples/pix2pose/old_pipelines.py | 150 ----------------------------- examples/pix2pose/old_train.py | 140 --------------------------- examples/pix2pose/pix2pose.sh | 1 - examples/pix2pose/utils.py | 62 ------------ 4 files changed, 353 deletions(-) delete mode 100644 examples/pix2pose/old_pipelines.py delete mode 100644 examples/pix2pose/old_train.py delete mode 100644 examples/pix2pose/pix2pose.sh delete mode 100644 examples/pix2pose/utils.py diff --git a/examples/pix2pose/old_pipelines.py b/examples/pix2pose/old_pipelines.py deleted file mode 100644 index f484c6a44..000000000 --- a/examples/pix2pose/old_pipelines.py +++ /dev/null @@ -1,150 +0,0 @@ -import numpy as np -import os -import glob -import random -from tensorflow.keras.utils import Sequence - -from paz.abstract import SequentialProcessor, Processor -from paz.abstract.sequence import SequenceExtra -from paz.pipelines import RandomizeRenderedImage -from paz import processors as pr - - -class GeneratedImageProcessor(Processor): - """Loads pre-generated images - """ - def __init__(self, path_images, background_images_paths, num_occlusions=1, split=pr.TRAIN, no_ambiguities=False): - super(GeneratedImageProcessor, self).__init__() - self.copy = pr.Copy() - self.augment = RandomizeRenderedImage(background_images_paths, num_occlusions) - preprocessors_input = [pr.NormalizeImage()] - preprocessors_output = [NormalizeImageTanh()] - self.preprocess_input = SequentialProcessor(preprocessors_input) - self.preprocess_output = SequentialProcessor(preprocessors_output) - self.split = split - - # Total number of images - self.num_images = len(glob.glob(os.path.join(path_images, "image_original/*"))) - - # Load all images into memory to save time - self.images_original = [np.load(os.path.join(path_images, "image_original/image_original_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)] - - if no_ambiguities: - self.images_colors = [np.load(os.path.join(path_images, "image_colors_no_ambiguities/image_colors_no_ambiguities_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)] - else: - self.images_colors = [np.load(os.path.join(path_images, "image_colors/image_colors_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)] - - self.alpha_original = [np.load(os.path.join(path_images, "alpha_original/alpha_original_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)] - - - def call(self, input_image, label_image): - # index = random.randint(0, self.num_images-1) - # image_original = self.images_original[index] - # image_colors = self.images_colors[index] - # alpha_original = self.alpha_original[index] - - if self.split == pr.TRAIN: - image_original = self.augment(image_original, alpha_original) - - image_original = self.preprocess_input(image_original) - image_colors = self.preprocess_output(image_colors) - return image_original, image_colors - - -class GeneratedImageGenerator(SequentialProcessor): - def __init__(self, path_images, size, background_images_paths, num_occlusions=1, split=pr.TRAIN): - super(GeneratedImageGenerator, self).__init__() - self.add(GeneratedImageProcessor( - path_images, background_images_paths, num_occlusions, split)) - self.add(pr.SequenceWrapper( - {0: {'input_image': [size, size, 3]}}, - {1: {'color_output': [size, size, 3]}, 0: {'error_output': [size, size, 1]}})) - -""" -Creates a batch of train data for the discriminator. For real images the label is 1, -for fake images the label is 0 -""" -def make_batch_discriminator(generator, input_images, color_output_images, label): - if label == 1: - return color_output_images, np.ones(len(color_output_images)) - elif label == 0: - predictions = generator.predict(input_images) - return predictions[0], np.zeros(len(predictions[0])) - - -class GeneratingSequencePix2Pose(SequenceExtra): - """Sequence generator used for generating samples. - Unfortunately the GeneratingSequence class from paz.abstract cannot be used here. Reason: not all of - the training data is available right at the start. The error images depend on the predicted color images, - so that they have to be generated on-the-fly during training. This is done here. - - # Arguments - processor: Function used for generating and processing ``samples``. - model: Keras model - batch_size: Int. - num_steps: Int. Number of steps for each epoch. - as_list: Bool, if True ``inputs`` and ``labels`` are dispatched as - lists. If false ``inputs`` and ``labels`` are dispatched as - dictionaries. - """ - def __init__(self, processor, model, batch_size, num_steps, as_list=False, rotation_matrices=None): - self.num_steps = num_steps - self.model = model - self.rotation_matrices = rotation_matrices - super(GeneratingSequencePix2Pose, self).__init__( - processor, batch_size, as_list) - - def __len__(self): - return self.num_steps - - def rotate_image(self, image, rotation_matrix): - mask_image = np.ma.masked_not_equal(np.sum(image, axis=-1), -1.*3).mask.astype(float) - mask_image = np.repeat(mask_image[..., np.newaxis], 3, axis=-1) - mask_background = np.ones_like(mask_image) - mask_image - - # Rotate the object - image_rotated = np.einsum('ij,klj->kli', rotation_matrix, image) - image_rotated *= mask_image - image_rotated += (mask_background * -1.) - - return image_rotated - - def process_batch(self, inputs, labels, batch_index): - input_images, samples = list(), list() - for sample_arg in range(self.batch_size): - sample = self.pipeline() - samples.append(sample) - input_image = sample['inputs'][self.ordered_input_names[0]] - input_images.append(input_image) - - input_images = np.asarray(input_images) - # This line is very important. If model.predict(...) is used instead the results are wrong. - # Reason: BatchNormalization behaves differently, depending on whether it is in train or - # inference mode. model.predict(...) is the inference mode, so the predictions here will - # be different from the predictions the model is trained on --> Result: the error images - # generated here are also wrong - predictions = self.model(input_images, training=True) - - # Calculate the errors between the target output and the predicted output - for sample_arg in range(self.batch_size): - sample = samples[sample_arg] - - # List of tuples of the form (error, error_image) - stored_errors = [] - - # Iterate over all rotation matrices to find the object position - # with the smallest error - for rotation_matrix in self.rotation_matrices: - color_image_rotated = self.rotate_image(sample['labels']['color_output'], rotation_matrix) - error_image = np.sum(predictions['color_output'][sample_arg] - color_image_rotated, axis=-1, keepdims=True) - - error_value = np.sum(np.abs(error_image)) - stored_errors.append((error_value, error_image)) - - # Select the error image with the smallest error - minimal_error_pair = min(stored_errors, key=lambda t: t[0]) - sample['labels'][self.ordered_label_names[0]] = minimal_error_pair[1] - self._place_sample(sample['inputs'], sample_arg, inputs) - self._place_sample(sample['labels'], sample_arg, labels) - - return inputs, labels diff --git a/examples/pix2pose/old_train.py b/examples/pix2pose/old_train.py deleted file mode 100644 index 481457ad3..000000000 --- a/examples/pix2pose/old_train.py +++ /dev/null @@ -1,140 +0,0 @@ -import os -import glob -import argparse -import numpy as np -import time - -from tensorflow.keras.callbacks import CSVLogger -from tensorflow.keras.optimizers import Adam -from tensorflow.keras.layers import Input -from tensorflow.keras.models import Model - -from paz.abstract import GeneratingSequence -from paz.abstract.sequence import GeneratingSequence - -from pipelines import GeneratingSequencePix2Pose, GeneratedImageGenerator, make_batch_discriminator -from model import Generator, Discriminator, loss_color_wrapped, loss_error - - -description = 'Training script Pix2Pose model' -root_path = os.path.join(os.path.expanduser('~'), '.keras/') -parser = argparse.ArgumentParser(description=description) -parser.add_argument('-cl', '--class_name', default='tless05', type=str, - help='Class name to be added to model save path') -parser.add_argument('-id', '--background_images_directory', type=str, - help='Path to directory containing background images') -parser.add_argument('-pi', '--images_directory', type=str, - help='Path to pre-generated images (npy format)') -parser.add_argument('-bs', '--batch_size', default=4, type=int, - help='Batch size for training') -parser.add_argument('-lr', '--learning_rate', default=0.001, type=float, - help='Initial learning rate for Adam') -parser.add_argument('-ld', '--image_size', default=128, type=int, - help='Size of the side of a square image e.g. 64') -parser.add_argument('-e', '--max_num_epochs', default=10000, type=int, - help='Maximum number of epochs before finishing') -parser.add_argument('-st', '--steps_per_epoch', default=5, type=int, - help='Steps per epoch') -parser.add_argument('-oc', '--num_occlusions', default=2, type=int, - help='Number of occlusions') -parser.add_argument('-sa', '--save_path', - default=os.path.join( - os.path.expanduser('~'), '.keras/paz/models'), - type=str, help='Path for writing model weights and logs') -parser.add_argument('-rm', '--rotation_matrices', - type=str, help='Path to npy file with a list of rotation matrices', required=True) -parser.add_argument('-de', '--description', - type=str, help='Description of the model') -args = parser.parse_args() - -# Building the whole GAN model -dcgan_input = Input(shape=(128, 128, 3)) -discriminator = Discriminator() -generator = Generator() -color_output, error_output = generator(dcgan_input) -discriminator.trainable = False -discriminator_output = discriminator(color_output) -dcgan = Model(inputs=[dcgan_input], outputs={"color_output": color_output, "error_output": error_output, "discriminator_output": discriminator_output}) - -# For the loss function pix2pose needs to know all the rotations under which the pose looks the same -rotation_matrices = np.load(args.rotation_matrices) -loss_color = loss_color_wrapped(rotation_matrices) - -# Set the loss -optimizer = Adam(args.learning_rate, amsgrad=True) -losses = {"color_output": loss_color, - "error_output": loss_error, - "discriminator_output": "binary_crossentropy"} -lossWeights = {"color_output": 100.0, "error_output": 50.0, "discriminator_output": 1.0} -dcgan.compile(optimizer=optimizer, loss=losses, loss_weights=lossWeights, run_eagerly=True) - -discriminator.trainable = True -discriminator.compile(loss=['binary_crossentropy'], optimizer=optimizer) - -# Creating sequencer -background_image_paths = glob.glob(os.path.join(args.background_images_directory, '*.jpg')) -processor_train = GeneratedImageGenerator(os.path.join(args.images_directory, "train"), args.image_size, background_image_paths, num_occlusions=0) -processor_test = GeneratedImageGenerator(os.path.join(args.images_directory, "test"), args.image_size, background_image_paths, num_occlusions=0) -sequence_train = GeneratingSequencePix2Pose(processor_train, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices) -sequence_test = GeneratingSequencePix2Pose(processor_test, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices) - -# Making directory for saving model weights and logs -model_name = '_'.join([dcgan.name, args.class_name]) -save_path = os.path.join(args.save_path, model_name) -if not os.path.exists(save_path): - os.makedirs(save_path) - -# Setting callbacks -log = CSVLogger(os.path.join(save_path, '%s.log' % model_name)) -log.model = dcgan - -callbacks=[log] - -for callback in callbacks: - callback.on_train_begin() - -for num_epoch in range(args.max_num_epochs): - sequence_iterator_train = sequence_train.__iter__() - sequence_iterator_test = sequence_test.__iter__() - - for callback in callbacks: - callback.on_epoch_begin(num_epoch) - - for num_batch in range(args.steps_per_epoch): - # Train the discriminator - discriminator.trainable = True - batch = next(sequence_iterator_train) - - X_discriminator_real, y_discriminator_real = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 1) - loss_discriminator_real = discriminator.train_on_batch(X_discriminator_real, y_discriminator_real) - - X_discriminator_fake, y_discriminator_fake = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 0) - loss_discriminator_fake = discriminator.train_on_batch(X_discriminator_fake, y_discriminator_fake) - - loss_discriminator = (loss_discriminator_real + loss_discriminator_fake)/2. - - # Train the generator - discriminator.trainable = False - loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], - {"color_output": batch[1]['color_output'], - "error_output": batch[1]['error_output'], - "discriminator_output": np.ones((args.batch_size, 1))}) - - # Test the network - batch_test = next(sequence_iterator_test) - loss_dcgan_test, loss_color_output_test, loss_dcgan_discriminator_test, loss_error_output_test = dcgan.test_on_batch(batch_test[0]['input_image'], {"color_output": batch_test[1]['color_output'], "error_output": batch_test[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))}) - - print("Loss DCGAN: {}".format(loss_dcgan)) - for callback in callbacks: - callback.on_epoch_end(num_epoch, logs={'loss_discriminator': loss_discriminator, - 'loss_dcgan': loss_dcgan, 'loss_color_output': loss_color_output, - 'loss_dcgan_discriminator': loss_dcgan_discriminator, - 'loss_error_output': loss_error_output, - 'loss_dcgan_test': loss_dcgan_test, 'loss_color_output_test': loss_color_output_test, - 'loss_dcgan_discriminator_test': loss_dcgan_discriminator_test, - 'loss_error_output_test': loss_error_output_test - }) - - -for callback in callbacks: - callback.on_train_end() diff --git a/examples/pix2pose/pix2pose.sh b/examples/pix2pose/pix2pose.sh deleted file mode 100644 index fb315cb9f..000000000 --- a/examples/pix2pose/pix2pose.sh +++ /dev/null @@ -1 +0,0 @@ -python3 train.py --images_directory /home/fabian/.keras/tless_obj05/pix2pose/normal_coloring --background_images_directory /home/fabian/.keras/backgrounds --batch_size 4 --steps_per_epoch 5 --image_size 128 --rotation_matrices /home/fabian/Uni/masterarbeit/src/paz/examples/pix2pose/rotation_matrices/2_fold_symmetry_rotation_matrices.npy \ No newline at end of file diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py deleted file mode 100644 index 7aaadf344..000000000 --- a/examples/pix2pose/utils.py +++ /dev/null @@ -1,62 +0,0 @@ -import tensorflow as tf -from tensorflow.keras.losses import Loss - - -class LossError(Loss): - def __init__(self): - super(LossError, self).__init__() - - def call(self, y_true, y_pred): - y_true = tf.clip_by_value(tf.math.abs(y_true), tf.float32.min, 1.0) - squared_error = tf.square(y_pred - y_true) - squared_error = tf.reduce_sum(squared_error, axis=3) - squared_error = tf.reduce_mean(squared_error, axis=[1, 2]) - return squared_error - - -class LossColor(Loss): - def __init__(self, rotation_matrices): - super(LossColor, self).__init__() - self.rotation_matrices = rotation_matrices - - - def call(self, color_image, predicted_color_image): - min_loss = tf.float32.max - - # [-1, 1] -> [0, 1] - color_image = (color_image + 1) * 0.5 - - # Calculate masks for the object and the background (they are independent of the rotation) - mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1), repeats=3, axis=-1) - mask_background = tf.ones(tf.shape(mask_object)) - mask_object - - # [0, 1] -> [-1, 1] - color_image = (color_image * 2) - 1 - - # Iterate over all possible rotations - for rotation_matrix in self.rotation_matrices: - - real_color_image = tf.identity(color_image) - - # Add a small epsilon value to avoid the discontinuity problem - real_color_image = real_color_image + tf.ones_like(real_color_image) * 0.0001 - - # Rotate the object - real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image) - - # Set the background to be all -1 - real_color_image *= mask_object - real_color_image += (mask_background * tf.constant(-1.)) - - # Get the number of pixels - num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3]) - beta = 3 - - # Calculate the difference between the real and predicted images including the mask - diff_object = tf.math.abs(predicted_color_image*mask_object - real_color_image*mask_object) - diff_background = tf.math.abs(predicted_color_image*mask_background - real_color_image*mask_background) - - # Calculate the total loss - loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3])) - min_loss = tf.math.minimum(loss_colors, min_loss) - return min_loss From d9fe2c075a61b83c225160a67832ad05dca672ac Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 18 Nov 2021 13:42:56 +0100 Subject: [PATCH 041/101] Add comments to functions --- examples/pix2pose/backend.py | 161 +++++++++++++++++++++++++++++++---- 1 file changed, 144 insertions(+), 17 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index be696aaa1..7b0c4b109 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -7,7 +7,7 @@ def build_cube_points3D(width, height, depth): - """ Build the 3D points of a cube in the openCV coordinate system: + """Build the 3D points of a cube in the openCV coordinate system: 4--------1 /| /| / | / | @@ -46,6 +46,14 @@ def build_cube_points3D(width, height, depth): def _preprocess_image_points2D(image_points2D): + """Preprocessing image points for PnPRANSAC + + # Arguments + image_points2D: Array of shape (num_points, 2) + + # Returns + Contiguous float64 array of shape (num_points, 1, 2) + """ num_points = len(image_points2D) image_points2D = image_points2D.reshape(num_points, 1, 2) image_points2D = image_points2D.astype(np.float64) @@ -55,6 +63,39 @@ def _preprocess_image_points2D(image_points2D): def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, inlier_threshold=5, num_iterations=100): + """Returns rotation (Roc) and translation (Toc) vectors that transform + 3D points in object frame to camera frame. + + O------------O + /| /| + / | / | + O------------O | + | | z | | + | O____|____|__O + | / |___y| / object + | / / | / coordinates + |/ x |/ + O------------O + ___ + Z | + / | Rco, Tco + /_____X <------| + | + | camera + Y coordinates + + # Arguments + object_points3D: Array (num_points, 3). Points 3D in object reference + frame. Represented as (0) in image above. + image_points2D: Array (num_points, 2). Points in 2D in camera UV space. + camera_intrinsics: Array of shape (3, 3). Diagonal elements represent + focal lenghts and last column the image center translation. + inlier_threshold: Number of inliers for RANSAC method. + num_iterations: Maximum number of iterations. + + # Returns + Rotation vector in axis-angle form (3) and translation vector (3). + """ if ((len(object_points3D) < 4) or (len(image_points2D) < 4)): raise ValueError('Solve PnP requires at least 4 3D and 2D points') image_points2D = _preprocess_image_points2D(image_points2D) @@ -67,16 +108,36 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, def project_to_image(rotation, translation, points3D, camera_intrinsics): - """Project points3D to image plane using a perspective transformation + """Project points3D to image plane using a perspective transformation. + + Image plane + + (0,0)--------> (U) + | + | + | + v + + (V) + + # Arguments + rotation: Array (3, 3). Rotation matrix (Rco). + translation: Array (3). Translation (Tco). + points3D: Array (num_points, 3). Points 3D in object frame. + camera_intrinsics: Array of shape (3, 3). Diagonal elements represent + focal lenghts and last column the image center translation. + + # Returns + Array (num_points, 2) in UV image space. """ if rotation.shape != (3, 3): raise ValueError('Rotation matrix is not of shape (3, 3)') if len(translation) != 3: raise ValueError('Translation vector is not of length 3') if len(points3D.shape) != 2: - raise ValueError('points3D should have a shape (N, 3)') + raise ValueError('Points3D should have a shape (num_points, 3)') if points3D.shape[1] != 3: - raise ValueError('points3D should have a shape (N, 3)') + raise ValueError('Points3D should have a shape (num_points, 3)') # TODO missing checks for camera intrinsics conditions points3D = np.matmul(rotation, points3D.T).T + translation x, y, z = np.split(points3D, 3, axis=1) @@ -91,18 +152,18 @@ def project_to_image(rotation, translation, points3D, camera_intrinsics): def draw_cube(image, points, color=GREEN, thickness=2, radius=5): - """ Draws a cube in image. + """Draws a cube in image. # Arguments - image: Numpy array of shape ``[H, W, 3]``. + image: Numpy array of shape (H, W, 3). points: List of length 8 having each element a list - of length two indicating ``(y, x)`` openCV coordinates. + of length two indicating (U, V) openCV coordinates. color: List of length three indicating RGB color of point. thickness: Integer indicating the thickness of the line to be drawn. radius: Integer indicating the radius of corner points to be drawn. # Returns - Numpy array with shape ``[H, W, 3]``. Image with cube. + Numpy array with shape (H, W, 3). Image with cube. """ if points.shape != (8, 2): raise ValueError('Cube points 2D must be of shape (8, 2)') @@ -135,12 +196,46 @@ def draw_cube(image, points, color=GREEN, thickness=2, radius=5): def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0): + """Replace values from source that are lower than the given threshold. + + # Arguments + source: Array. + threshold: Float. Values lower than this value will be replaced. + replacement: Float. Value taken by elements lower than threshold. + + # Returns + Array of same shape as source. + """ lower_than_epsilon = source < threshold source[lower_than_epsilon] = replacement return source def arguments_to_image_points2D(row_args, col_args): + """Convert array arguments into UV coordinates. + + Image plane + + (0,0)--------> (U) + | + | + | + v + + (V) + + # Arguments + row_args: Array (num_rows). + col_args: Array (num_cols). + + # Returns + Array (num_cols, num_rows) representing points2D in UV space. + + # Notes + Arguments are row args (V) and col args (U). Iamge points are in UV + coordinates; thus, we concatenate them in that order + i.e. [col_args, row_args] + """ row_args = row_args.reshape(-1, 1) col_args = col_args.reshape(-1, 1) image_points2D = np.concatenate([col_args, row_args], axis=1) @@ -169,28 +264,60 @@ def draw_maski(image, keypoints, colors, radius=1): def normalize_points2D(points2D, height, width): - """Transform points2D in image coordinates to normalized coordinates. + """Transform points2D in image coordinates to normalized coordinates i.e. + [U, V] -> [-1, 1]. UV have maximum values of [W, H] respectively. + + Image plane + + (0,0)--------> (U) + | + | + | + v + + (V) # Arguments - points2D: Numpy array of shape ``(num_keypoints, 2)``. + points2D: Numpy array of shape (num_keypoints, 2). height: Int. Height of the image width: Int. Width of the image # Returns - Numpy array of shape ``(num_keypoints, 2)``. + Numpy array of shape (num_keypoints, 2). """ image_shape = np.array([width, height]) - points2D = points2D / image_shape # [0, W], [0, H] -> [0, 1], [0, 1] - points2D = 2.0 * points2D # [0, 1], [0, 1] -> [0, 2], [0, 2] - points2D = points2D - 1.0 # [0, 2], [0, 2] -> [-1, 1], [-1, 1] + points2D = points2D / image_shape # [W, 0], [0, H] -> [1, 0], [0, 1] + points2D = 2.0 * points2D # [1, 0], [0, 1] -> [2, 0], [0, 2] + points2D = points2D - 1.0 # [2, 0], [0, 2] -> [-1, 1], [-1, 1] return points2D def denormalize_points2D(points2D, height, width): + """Transform nomralized points2D to image UV coordinates i.e. + [-1, 1] -> [U, V]. UV have maximum values of [W, H] respectively. + + Image plane + + (0,0)--------> (U) + | + | + | + v + + (V) + + # Arguments + points2D: Numpy array of shape (num_keypoints, 2). + height: Int. Height of the image + width: Int. Width of the image + + # Returns + Numpy array of shape (num_keypoints, 2). + """ image_shape = np.array([width, height]) - points2D = points2D + 1.0 # [-1, 1], [-1, 1] -> [0, 2], [0, 2] - points2D = points2D / 2.0 # [0 , 2], [0 , 2] -> [0, 1], [0, 1] - points2D = points2D * image_shape # [0 , 1], [0 , 1] -> [0, W], [0, H] + points2D = points2D + 1.0 # [-1, 1], [-1, 1] -> [2, 0], [0, 2] + points2D = points2D / 2.0 # [2 , 0], [0 , 2] -> [1, 0], [0, 1] + points2D = points2D * image_shape # [1 , 0], [0 , 1] -> [W, 0], [0, H] return points2D From acde9da8ebe2f29c7875bf7afb78d6829cd9b7bc Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 18 Nov 2021 15:58:21 +0100 Subject: [PATCH 042/101] Add backend function comments --- examples/pix2pose/backend.py | 58 ++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 7b0c4b109..f12d3276e 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -1,8 +1,6 @@ -# from collections import Iterable import numpy as np from paz.backend.image.draw import GREEN from paz.backend.image import draw_line, draw_dot -# from paz.abstract import Pose6D import cv2 @@ -214,7 +212,7 @@ def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0): def arguments_to_image_points2D(row_args, col_args): """Convert array arguments into UV coordinates. - Image plane + Image plane (0,0)--------> (U) | @@ -242,24 +240,52 @@ def arguments_to_image_points2D(row_args, col_args): return image_points2D -def draw_masks(image, points): +def points3D_to_RGB(points3D, object_sizes): + """Transforms points3D in object frame to RGB color space. + # Arguments + points3D: Array (num_points, 3). Points3D a + object_sizes: List (3) indicating the + (width, height, depth) of object. + + # Returns + Array of ints (num_points, 3) in RGB space. + """ + colors = points3D / (0.5 * object_sizes) + colors = colors + 1.0 + colors = colors * 127.5 + colors = colors.astype(np.uint8) + return colors + + +def draw_masks(image, points, object_sizes): for points2D, points3D in points: - object_sizes = np.array([0.184, 0.187, 0.052]) - colors = points3D / (object_sizes / 2.0) - colors = (colors + 1.0) * 127.5 - colors = colors.astype('int') - image = draw_maski(image, points2D, colors) + colors = points3D_to_RGB(points3D, object_sizes) + image = draw_points2D(image, points2D, colors) + return image + + +def draw_points2D(image, points2D, colors): + """Draws mask using points2D in UV space using only numpy. + + # Arguments + image: Array (H, W). + keypoints: Array (num_points, U, V). Keypoints in image space + colors: Array (num_points, 3). Colors in RGB space. + + # Returns + Array with drawn points. + """ + keypoints = points2D.astype(int) + U = keypoints[:, 0] + V = keypoints[:, 1] + image[V, U, :] = colors return image -def draw_maski(image, keypoints, colors, radius=1): - for keypoint, color in zip(keypoints, colors): - R, G, B = color +def draw_points2D_(image, keypoints, colors, radius=1): + for (u, v), (R, G, B) in zip(keypoints, colors): color = (int(R), int(G), int(B)) - x, y = keypoint - x = int(x) - y = int(y) - draw_dot(image, (x, y), color, radius) + draw_dot(image, (u, v), color, radius) return image From 2535574828a3d603e250459c44f48a451a005f85 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 18 Nov 2021 15:59:00 +0100 Subject: [PATCH 043/101] Remove unecessary values from pipelines --- examples/pix2pose/demo.py | 2 +- examples/pix2pose/pipelines.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 44167d034..18a18d2cf 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -34,7 +34,7 @@ detect = SSD300FAT(score_thresh, draw=False) offsets = [0.2, 0.2] estimate_keypoints = Pix2Pose(model, object_sizes) -pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None) +pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets) results = pipeline(image) predicted_image = results['image'] diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 16d35ad62..9ef05220b 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -87,8 +87,7 @@ def call(self, image): class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_keypoints, camera, offsets, - class_to_dimensions, radius=3, thickness=1, draw=True): + def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): """Pose estimation pipeline using keypoints. """ super(EstimatePoseMasks, self).__init__() @@ -108,7 +107,9 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, self.unwrap = UnwrapDictionary(['points2D', 'points3D']) self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07) + self.object_sizes = self.estimate_keypoints.object_sizes + # self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07) + self.cube_points3D = build_cube_points3D(*self.object_sizes) def call(self, image): boxes2D = self.postprocess_boxes(self.detect(image)) @@ -130,7 +131,7 @@ def call(self, image): poses6D.append(pose6D), points.append([points2D, points3D]) if self.draw: image = self.draw_boxes2D(image, boxes2D) - image = draw_masks(image, points) + image = draw_masks(image, points, self.object_sizes) image = draw_poses6D( image, poses6D, self.cube_points3D, self.camera.intrinsics) return self.wrap(image, boxes2D, poses6D) From 2f37602a3627135f2001795bcf88ecdc46a3f439 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 18 Nov 2021 16:06:53 +0100 Subject: [PATCH 044/101] Remove unecessary files --- examples/pix2pose/calibrate_camera.py | 60 --------------- examples/pix2pose/icp.py | 102 -------------------------- examples/pix2pose/messages.py | 50 ------------- 3 files changed, 212 deletions(-) delete mode 100644 examples/pix2pose/calibrate_camera.py delete mode 100644 examples/pix2pose/icp.py delete mode 100644 examples/pix2pose/messages.py diff --git a/examples/pix2pose/calibrate_camera.py b/examples/pix2pose/calibrate_camera.py deleted file mode 100644 index bfc7a3e40..000000000 --- a/examples/pix2pose/calibrate_camera.py +++ /dev/null @@ -1,60 +0,0 @@ -from paz.backend.image import show_image -import numpy as np -import cv2 - - -# def calibrate_camera(square_size, pattern_shape=(5, 5)): - -pattern_size = (5, 7) -square_size_mm = 35 -window_size, zero_zone = (11, 11), (-1, -1) - -# constructing default 3D points -point3D = np.zeros((np.prod(pattern_size), 3), np.float32) -xy_coordinates = np.mgrid[0:pattern_size[0], 0:pattern_size[1]].T -point3D[:, :2] = xy_coordinates.reshape(-1, 2) * square_size_mm - -camera = cv2.VideoCapture(0) -cv2.namedWindow('camera_window') -# 2D points in image plane, 3D points in real world space, images, counter -image_points, points3D, images, image_counter = [], [], [], 0 -criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) -print('Press `Escape` to quit') -while True: - - frame = camera.read()[1] - image_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - show_image(image_gray, wait=False) - chessboard_found, corners = cv2.findChessboardCorners( - image_gray, pattern_size, None) - print(chessboard_found) - if chessboard_found: - points3D.append(point3D) - refined_corners = cv2.cornerSubPix( - image_gray, corners, window_size, zero_zone, criteria) - image_points.append(refined_corners) - frame = cv2.drawChessboardCorners( - frame, pattern_size, refined_corners, chessboard_found) - show_image(frame) - image_counter = image_counter + 1 - - cv2.imshow('camera_window', frame) - keystroke = cv2.waitKey(1) - - if keystroke % 256 == 27: - print('`Escape` key hit, closing...') - break - -camera.release() -cv2.destroyAllWindows() - -ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera( - points3D, image_points, image_gray.shape[::-1], None, None) -print(ret, mtx, dist, rvecs, tvecs) -print(mtx) -# fx = 659.10 -# fy = 668.76 -# cx = 276.76 -# cy = 252.35 -# ret = 0.6814 -# dist = [9.86e-3, 1.41, 1.08e-2, 2.431e-3, -7.05] diff --git a/examples/pix2pose/icp.py b/examples/pix2pose/icp.py deleted file mode 100644 index 61ca4352c..000000000 --- a/examples/pix2pose/icp.py +++ /dev/null @@ -1,102 +0,0 @@ -import numpy as np -from sklearn.neighbors import NearestNeighbors - - -def calculate_affine_matrix(pointcloud_A, pointcloud_B): - '''Calculates affine transform with the best least-squares fit transforming - keypoints A to keypoints B. - - # Argument: - pointcloud_A: Array of shape (num_keypoints, 3). - pointcloud_B: Array of shape (num_keypoints, 3). - - # Returns: - T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B - R: mxm rotation matrix - t: mx1 translation vector - ''' - assert pointcloud_A.shape == pointcloud_B.shape - # translate points to their centroids - centroid3D_A = np.mean(pointcloud_A, axis=0) - centroid3D_B = np.mean(pointcloud_B, axis=0) - centered_keypoints3D_A = pointcloud_A - centroid3D_A - centered_keypoints3D_B = pointcloud_B - centroid3D_B - - covariance = np.dot(centered_keypoints3D_A.T, centered_keypoints3D_B) - U, S, Vt = np.linalg.svd(covariance) - # compute rotation matrix - rotation_matrix = np.dot(Vt.T, U.T) - - # resolve special reflection case - if np.linalg.det(rotation_matrix) < 0: - Vt[3 - 1, :] *= -1 - rotation_matrix = np.dot(Vt.T, U.T) - - # compute translation - translation3D = centroid3D_B.T - np.dot(rotation_matrix, centroid3D_A.T) - - affine_matrix = to_affine_matrix(rotation_matrix, translation3D) - return affine_matrix - - -def to_affine_matrix(rotation_matrix, translation_vector): - translation_vector = translation_vector.reshape(3, 1) - affine = np.concatenate([rotation_matrix, translation_vector], axis=0) - affine = np.concatenate([affine, np.array([[0.0, 0.0, 0.0, 1.0]])], axis=1) - return affine - - -def nearest_neighbor(pointcloud_A, pointcloud_B): - '''Find the nearest (Euclidean) neighbor in dst for each point in src - # Arguments: - src: Nxm array of points - dst: Nxm array of points - # Returns: - distances: Euclidean distances of the nearest neighbor - indices: dst indices of the nearest neighbor - ''' - assert pointcloud_A.shape == pointcloud_B.shape - model = NearestNeighbors(n_neighbors=1) - model.fit(pointcloud_B) - distances, indices = model.kneighbors(pointcloud_A, return_distance=True) - return distances.ravel(), indices.ravel() - - -def add_homogenous_coordinate(keypoints3D): - num_keypoints = len(keypoints3D) - ones = np.ones_like(num_keypoints).reshape(-1, 1) - homogenous_keypoints3D = np.concatenate([keypoints3D, ones], axis=1) - return homogenous_keypoints3D - - -def iterative_closes_point(pointcloud_A, pointcloud_B, initial_pose=None, - max_iterations=20, tolerance=1e-3): - '''Find best least square fit that transforms pointcloud A to pointcloud B. - Input: - A: Nxm numpy array of source mD points - B: Nxm numpy array of destination mD point - initial_pose: (m+1)x(m+1) homogeneous transformation - max_iterations: exit algorithm after max_iterations - tolerance: convergence criteria - Output: - T: final homogeneous transformation that maps A on to B - distances: Euclidean distances (errors) of the nearest neighbor - i: number of iterations to converge - ''' - assert pointcloud_A.shape == pointcloud_B.shape - pointcloud_A = add_homogenous_coordinate(pointcloud_A) - pointcloud_B = add_homogenous_coordinate(pointcloud_B) - pointcloud_A_0 = np.copy(pointcloud_A) - if initial_pose is not None: - pointcloud_A = np.dot(initial_pose, pointcloud_A.T).T - previous_error = 0 - for iteration_arg in range(max_iterations): - distances, indices = nearest_neighbor(pointcloud_A, pointcloud_B) - affine_matrix = calculate_affine_matrix(pointcloud_A, pointcloud_B) - pointcloud_A = np.dot(affine_matrix, pointcloud_A.T).T - mean_error = np.mean(distances) - if np.abs(previous_error - mean_error) < tolerance: - break - previous_error = mean_error - affine_transform = calculate_affine_matrix(pointcloud_A_0, pointcloud_A) - return affine_transform, distances, iteration_arg diff --git a/examples/pix2pose/messages.py b/examples/pix2pose/messages.py deleted file mode 100644 index 1c50c176d..000000000 --- a/examples/pix2pose/messages.py +++ /dev/null @@ -1,50 +0,0 @@ -from paz.abstract.messages import Box2D, Pose6D - - -class ObjectHypothesis(object): - # TODO: Check if class_name, score is the same - def __init__(self, score=None, class_name=None, box2D=None, pose6D=None): - self.score = score - self.class_name = class_name - self.box2D = box2D - self.pose6D = pose6D - - @property - def box2D(self): - return self._box2D - - @box2D.setter - def box2D(self, value): - if not isinstance(value, Box2D): - raise ValueError('Value must be a Box2D class') - - if self.score is None: - if value.score is not None: - self.score = value.score - else: - if self.score != value.score: - raise ValueError('Mismatch score between Hypothesis and Box2D') - - - if self.score is None and (value.score is not None): - self.score = value.score - elif (self.score is not None) and (value.score is not None): - if self.score != value.score: - raise ValueError('Mismatch score between Hypothesis and Box2D') - if self.class_name is None and (value.class_name is not None): - self.class_name = value.class_name - self._box2D = value - - @property - def pose6D(self): - return self._pose6D - - @pose6D.setter - def pose6D(self, value): - if not isinstance(value, Pose6D): - raise ValueError('Value must be a Pose6D class') - if (self.score is None) and (value.score is not None): - self.score = value.score - if self.class_name is None and (value.class_name is not None): - self.class_name = value.class_name - self._pose6D = value From 22ea0a1ba138669a764247015da28fb2158b17fd Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 18 Nov 2021 17:35:19 +0100 Subject: [PATCH 045/101] Add resize option for augmenting keypoints based on interpolation --- examples/pix2pose/backend.py | 2 ++ examples/pix2pose/pipelines.py | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index f12d3276e..003db374d 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -237,6 +237,7 @@ def arguments_to_image_points2D(row_args, col_args): row_args = row_args.reshape(-1, 1) col_args = col_args.reshape(-1, 1) image_points2D = np.concatenate([col_args, row_args], axis=1) + # image_points2D = np.concatenate([row_args, col_args], axis=1) return image_points2D @@ -275,6 +276,7 @@ def draw_points2D(image, points2D, colors): # Returns Array with drawn points. """ + # print(np.max(points2D, axis=0)) keypoints = points2D.astype(int) U = keypoints[:, 0] V = keypoints[:, 1] diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 9ef05220b..5e1e1c348 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -13,6 +13,7 @@ from backend import draw_poses6D from backend import draw_masks from paz.backend.quaternion import rotation_vector_to_quaternion +from paz.backend.image import resize_image, show_image class DomainRandomization(SequentialProcessor): @@ -61,7 +62,7 @@ def __init__(self, output_shape): super(RGBMaskToImagePoints2D, self).__init__() self.add(GetNonZeroArguments()) self.add(ArgumentsToImagePoints2D()) - self.add(NormalizePoints2D(output_shape)) + # self.add(NormalizePoints2D(output_shape)) class SolveChangingObjectPnP(SequentialProcessor): @@ -72,17 +73,25 @@ def __init__(self, camera_intrinsics): class Pix2Pose(pr.Processor): - def __init__(self, model, object_sizes, epsilon=0.15): + def __init__(self, model, object_sizes, epsilon=0.15, with_resize=True): self.object_sizes = object_sizes self.predict_RGBMask = PredictRGBMask(model, epsilon) self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask']) + self.with_resize = with_resize def call(self, image): RGB_mask = self.predict_RGBMask(image) + if self.with_resize: + print(image.shape, RGB_mask.shape) + RGB_mask = resize_image(RGB_mask, image.shape[:2][::-1]) + print(RGB_mask.shape) + show_image(RGB_mask) points3D = self.mask_to_points3D(RGB_mask) points2D = self.mask_to_points2D(RGB_mask) + from backend import normalize_points2D + points2D = normalize_points2D(points2D, *image.shape[:2][::-1]) return self.wrap(points3D, points2D, RGB_mask) @@ -108,7 +117,6 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) self.object_sizes = self.estimate_keypoints.object_sizes - # self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07) self.cube_points3D = build_cube_points3D(*self.object_sizes) def call(self, image): @@ -119,7 +127,11 @@ def call(self, image): for crop, box2D in zip(cropped_images, boxes2D): points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) points2D = denormalize_points2D(points2D, *crop.shape[0:2]) + print(box2D.coordinates) points2D = self.change_coordinates(points2D, box2D) + import numpy as np + print(np.max(points2D, axis=0)) + print(points2D.shape) if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: continue success, rotation, translation = self.predict_pose( @@ -132,6 +144,6 @@ def call(self, image): if self.draw: image = self.draw_boxes2D(image, boxes2D) image = draw_masks(image, points, self.object_sizes) - image = draw_poses6D( - image, poses6D, self.cube_points3D, self.camera.intrinsics) + image = draw_poses6D(image, poses6D, self.cube_points3D, + self.camera.intrinsics) return self.wrap(image, boxes2D, poses6D) From bc30c791f2f50923769d3771b29f358d0bcc7475 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 11:06:16 +0100 Subject: [PATCH 046/101] Add comments to missing functions --- examples/pix2pose/backend.py | 124 ++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 39 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 003db374d..f8c73fac3 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -237,7 +237,6 @@ def arguments_to_image_points2D(row_args, col_args): row_args = row_args.reshape(-1, 1) col_args = col_args.reshape(-1, 1) image_points2D = np.concatenate([col_args, row_args], axis=1) - # image_points2D = np.concatenate([row_args, col_args], axis=1) return image_points2D @@ -251,6 +250,7 @@ def points3D_to_RGB(points3D, object_sizes): # Returns Array of ints (num_points, 3) in RGB space. """ + # TODO add domain and codomain transform as comments colors = points3D / (0.5 * object_sizes) colors = colors + 1.0 colors = colors * 127.5 @@ -258,6 +258,7 @@ def points3D_to_RGB(points3D, object_sizes): return colors +# TODO change to processor def draw_masks(image, points, object_sizes): for points2D, points3D in points: colors = points3D_to_RGB(points3D, object_sizes) @@ -266,7 +267,7 @@ def draw_masks(image, points, object_sizes): def draw_points2D(image, points2D, colors): - """Draws mask using points2D in UV space using only numpy. + """Draws a pixel for all points2D in UV space using only numpy. # Arguments image: Array (H, W). @@ -276,7 +277,6 @@ def draw_points2D(image, points2D, colors): # Returns Array with drawn points. """ - # print(np.max(points2D, axis=0)) keypoints = points2D.astype(int) U = keypoints[:, 0] V = keypoints[:, 1] @@ -349,23 +349,61 @@ def denormalize_points2D(points2D, height, width): return points2D +def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics): + """Draws pose6D by projecting cube3D to image space with camera intrinsics. + + # Arguments + image: Array (H, W, 3) + pose6D: paz message Pose6D with quaternion and translation values. + cube3D: Array (8, 3). Cube 3D points in object frame. + camera_intrinsics: Array of shape (3, 3). Diagonal elements represent + focal lenghts and last column the image center translation. + + # Returns + Original image array (H, W, 3) with drawn cube points. + """ + quaternion, translation = pose6D.quaternion, pose6D.translation + rotation = quaternion_to_rotation_matrix(quaternion) + rotation = np.squeeze(rotation, axis=2) + cube_points2D = project_to_image( + rotation, translation, cube_points3D, camera_intrinsics) + cube_points2D = cube_points2D.astype(np.int32) + image = draw_cube(image, cube_points2D) + return image + + def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics): - image = image.astype(float) + """Draws pose6D by projecting cube3D to image space with camera intrinsics. + + # Arguments + image: Array (H, W, 3) + pose6D: List paz messages Pose6D with quaternions and translations. + cube3D: Array (8, 3). Cube 3D points in object frame. + camera_intrinsics: Array of shape (3, 3). Diagonal elements represent + focal lenghts and last column the image center translation. + + # Returns + Original image array (H, W, 3) with drawn cube points for all poses6D. + """ for pose6D in poses6D: - rotation = quaternion_to_rotation_matrix(pose6D.quaternion) - rotation = np.squeeze(rotation, axis=2) - cube_points2D = project_to_image( - rotation, pose6D.translation, - cube_points3D, camera_intrinsics) - cube_points2D = cube_points2D.astype(np.int32) - image = draw_cube(image, cube_points2D) - image = image.astype('uint8') + image = draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics) return image -# NOT USED def homogenous_quaternion_to_rotation_matrix(quaternion): - # w0, q1, q2, q3 = quaternion + """Transforms quaternion to rotation matrix. + + # Arguments + quaternion: Array containing quaternion value [q1, q2, q3, w0]. + + # Returns + Rotation matrix [3, 3]. + + # Note + If quaternion is not a unit quaternion the rotation matrix is not + unitary but still orthogonal i.e. the outputted rotation matrix is + a scalar multiple of a rotation matrix. + """ q1, q2, q3, w0 = quaternion r11 = w0**2 + q1**2 - q2**2 - q3**2 @@ -386,43 +424,51 @@ def homogenous_quaternion_to_rotation_matrix(quaternion): return rotation_matrix -def inhomogenous_quaternion_to_rotation_matrix(q): - # quaternion - # q = q[::-1] - r11 = 1 - (2 * (q[1]**2 + q[2]**2)) - r12 = 2 * (q[0] * q[1] - q[3] * q[2]) - r13 = 2 * (q[3] * q[1] + q[0] * q[2]) - - r21 = 2 * (q[0] * q[1] + q[3] * q[2]) - r22 = 1 - (2 * (q[0]**2 + q[2]**2)) - r23 = 2 * (q[1] * q[2] - q[3] * q[0]) - - r31 = 2 * (q[0] * q[2] - q[3] * q[1]) - r32 = 2 * (q[3] * q[0] + q[1] * q[2]) - r33 = 1 - (2 * (q[0]**2 + q[1]**2)) - - rotation_matrix = np.array([[r11, r12, r13], - [r21, r22, r23], - [r31, r32, r33]]) - - return rotation_matrix +def quaternion_to_rotation_matrix(quaternion): + """Transforms quaternion to rotation matrix. + # Arguments + quaternion: Array containing quaternion value [q1, q2, q3, w0]. -def quaternion_to_rotation_matrix(quaternion, homogenous=True): - if homogenous: - matrix = homogenous_quaternion_to_rotation_matrix(quaternion) - else: - matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion) + # Returns + Rotation matrix [3, 3]. + + # Note + "If the quaternion "is not a unit quaternion then the homogeneous form + is still a scalar multiple of a rotation matrix, while the + inhomogeneous form is in general no longer an orthogonal matrix. + This is why in numerical work the homogeneous form is to be preferred + if distortion is to be avoided." [wikipedia](https://en.wikipedia.org/ + wiki/Conversion_between_quaternions_and_Euler_angles) + """ + matrix = homogenous_quaternion_to_rotation_matrix(quaternion) return matrix def rotation_vector_to_rotation_matrix(rotation_vector): + """Transforms rotation vector (axis-angle) form to rotation matrix. + + # Arguments + rotation_vector: Array (3). Rotation vector in axis-angle form. + + # Returns + Array (3, 3) rotation matrix. + """ rotation_matrix = np.eye(3) cv2.Rodrigues(rotation_vector, rotation_matrix) return rotation_matrix def to_affine_matrix(rotation_matrix, translation): + """Builds affine matrix from rotation matrix and translation vector. + + # Arguments + rotation_matrix: Array (3, 3). Representing a rotation matrix. + translation: Array (3). Translation vector. + + # Returns + Array (4, 4) representing an affine matrix. + """ if len(translation) != 3: raise ValueError('Translation should be of lenght 3') if rotation_matrix.shape != (3, 3): From a8066a492d0976d91922663112d04b3853bb3bb6 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 11:07:01 +0100 Subject: [PATCH 047/101] Remove unnecessary flags for estimate keypoints pipeline --- examples/pix2pose/demo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 18a18d2cf..00aec2636 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -20,6 +20,7 @@ image_size = camera.read().shape[0:2] camera.stop() +# image = load_image('test_image2.jpg') image = load_image('test_image.jpg') image_size = image.shape[0:2] focal_length = image_size[1] @@ -29,11 +30,11 @@ [0, focal_length, image_center[1]], [0, 0, 1]]) object_sizes = np.array([0.184, 0.187, 0.052]) -epsilon = 0.15 +epsilon = 0.001 score_thresh = 0.50 detect = SSD300FAT(score_thresh, draw=False) offsets = [0.2, 0.2] -estimate_keypoints = Pix2Pose(model, object_sizes) +estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True) pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets) results = pipeline(image) From 646d733b0f98039320870cb534792804653176a9 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 11:07:36 +0100 Subject: [PATCH 048/101] Add reminder to debug incorrect shape management --- examples/pix2pose/pipelines.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 5e1e1c348..affa419af 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -12,6 +12,7 @@ from backend import denormalize_points2D from backend import draw_poses6D from backend import draw_masks +from backend import normalize_points2D from paz.backend.quaternion import rotation_vector_to_quaternion from paz.backend.image import resize_image, show_image @@ -66,10 +67,11 @@ def __init__(self, output_shape): class SolveChangingObjectPnP(SequentialProcessor): - def __init__(self, camera_intrinsics): + def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100): super(SolveChangingObjectPnP, self).__init__() self.MINIMUM_REQUIRED_POINTS = 4 - self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics)) + self.add(SolveChangingObjectPnPRANSAC( + camera_intrinsics, inlier_thresh, num_iterations)) class Pix2Pose(pr.Processor): @@ -84,13 +86,9 @@ def __init__(self, model, object_sizes, epsilon=0.15, with_resize=True): def call(self, image): RGB_mask = self.predict_RGBMask(image) if self.with_resize: - print(image.shape, RGB_mask.shape) RGB_mask = resize_image(RGB_mask, image.shape[:2][::-1]) - print(RGB_mask.shape) - show_image(RGB_mask) points3D = self.mask_to_points3D(RGB_mask) points2D = self.mask_to_points2D(RGB_mask) - from backend import normalize_points2D points2D = normalize_points2D(points2D, *image.shape[:2][::-1]) return self.wrap(points3D, points2D, RGB_mask) @@ -127,18 +125,16 @@ def call(self, image): for crop, box2D in zip(cropped_images, boxes2D): points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) points2D = denormalize_points2D(points2D, *crop.shape[0:2]) - print(box2D.coordinates) points2D = self.change_coordinates(points2D, box2D) - import numpy as np - print(np.max(points2D, axis=0)) - print(points2D.shape) if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: continue success, rotation, translation = self.predict_pose( points3D, points2D) if success is False: continue + print('ROTATION', rotation.shape) quaternion = rotation_vector_to_quaternion(rotation) + print('QUATERNION', quaternion.shape) pose6D = Pose6D(quaternion, translation, box2D.class_name) poses6D.append(pose6D), points.append([points2D, points3D]) if self.draw: From cf8a1d7157c1fbfbfc9122e0e829e65d16f98108 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 11:42:54 +0100 Subject: [PATCH 049/101] Add comments to loss functions --- examples/pix2pose/loss.py | 139 +++++++++++++++++++++++++++++++++----- examples/pix2pose/test.py | 6 ++ 2 files changed, 129 insertions(+), 16 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index bfe7e90ea..b27118b7f 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -3,19 +3,47 @@ import tensorflow as tf -def extract_alpha_mask(RGBA_mask): +def split_alpha_mask(RGBA_mask): + """Splits alpha mask and RGB image. + + # Arguments + RGBA_mask: Tensor [batch, H, W, 4] + + # Returns + Color tensor [batch, H, W, 3] and alpha tensor [batch, H, W, 1] + """ color_mask = RGBA_mask[:, :, :, 0:3] alpha_mask = RGBA_mask[:, :, :, 3:4] return color_mask, alpha_mask -def extract_error_mask(RGBE_mask): +def split_error_mask(RGBE_mask): + """Splits error mask and RGB image. + + # Arguments + RGBA_mask: Tensor [batch, H, W, 4] + + # Returns + Color tensor [batch, H, W, 3] and error tensor [batch, H, W, 1] + + """ color_mask = RGBE_mask[:, :, :, 0:3] error_mask = RGBE_mask[:, :, :, 3:4] return color_mask, error_mask def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask): + """Computes foreground reconstruction L1 loss by using only positive alpha + mask values. + + # Arguments + RGB_true: Tensor [batch, H, W, 3]. True RGB label values. + RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. + alpha_mask: Tensor [batch, H, W, 1]. True normalized alpha mask values. + + # Returns + Tensor [batch, H, W, 3] with foreground loss values. + """ foreground_true = RGB_true * alpha_mask foreground_pred = RGB_pred * alpha_mask foreground_loss = tf.abs(foreground_true - foreground_pred) @@ -23,6 +51,17 @@ def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask): def compute_background_loss(RGB_true, RGB_pred, alpha_mask): + """Computes background reconstruction L1 loss by using the inverted alpha + mask values. + + # Arguments + RGB_true: Tensor [batch, H, W, 3]. True RGB label values. + RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. + alpha_mask: Tensor [batch, H, W, 1]. True normalized alpha mask values. + + # Returns + Tensor [batch, H, W, 3] with background loss values. + """ background_true = RGB_true * (1.0 - alpha_mask) background_pred = RGB_pred * (1.0 - alpha_mask) background_loss = tf.abs(background_true - background_pred) @@ -30,7 +69,19 @@ def compute_background_loss(RGB_true, RGB_pred, alpha_mask): def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0): - RGB_true, alpha_mask = extract_alpha_mask(RGBA_true) + """Computes L1 reconstruction loss by multiplying positive alpha mask + by beta. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. + beta: Float. Value used to multiple positive alpha mask values. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ + RGB_true, alpha_mask = split_alpha_mask(RGBA_true) foreground_loss = compute_foreground_loss(RGB_true, RGB_pred, alpha_mask) background_loss = compute_background_loss(RGB_true, RGB_pred, alpha_mask) reconstruction_loss = (beta * foreground_loss) + background_loss @@ -39,13 +90,35 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0): def compute_weighted_reconstruction_loss_with_error( RGBA_true, RGBE_pred, beta=3.0): - RGB_pred, error_mask = extract_error_mask(RGBE_pred) + """Computes L1 reconstruction loss by multiplying positive alpha mask + by beta. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGBE_pred: Tensor [batch, H, W, 4]. Predicted RGB and error mask. + beta: Float. Value used to multiple positive alpha mask values. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ + RGB_pred, error_mask = split_error_mask(RGBE_pred) loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta) return loss def compute_error_prediction_loss(RGBA_true, RGBE_pred): - RGB_pred, error_pred = extract_error_mask(RGBE_pred) + """Computes L2 reconstruction loss of predicted error mask. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ + RGB_pred, error_pred = split_error_mask(RGBE_pred) error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0) error_true = tf.minimum(error_true, 1.0) error_loss = mean_squared_error(error_true, error_pred) @@ -53,7 +126,40 @@ def compute_error_prediction_loss(RGBA_true, RGBE_pred): return error_loss +class WeightedReconstruction(Loss): + """Computes L1 reconstruction loss by multiplying positive alpha mask + by beta. + + # Arguments + beta: Float. Value used to multiple positive alpha mask values. + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ + def __init__(self, beta=3.0): + super(WeightedReconstruction, self).__init__() + self.beta = beta + + def call(self, RGBA_true, RGB_pred): + loss = compute_weighted_reconstruction_loss( + RGBA_true, RGB_pred, self.beta) + return loss + + class ErrorPrediction(Loss): + """Computes L2 reconstruction loss of predicted error mask. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ def __init__(self): super(ErrorPrediction, self).__init__() @@ -63,6 +169,18 @@ def call(self, RGBA_true, RGBE_pred): class WeightedReconstructionWithError(Loss): + """Computes L1 reconstruction loss by multiplying positive alpha mask + by beta. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGBE_pred: Tensor [batch, H, W, 4]. Predicted RGB and error mask. + beta: Float. Value used to multiple positive alpha mask values. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ def __init__(self, beta=3.0): super(WeightedReconstructionWithError, self).__init__() self.beta = beta @@ -73,17 +191,6 @@ def call(self, RGBA_true, RGBE_pred): return reconstruction_loss -class WeightedReconstruction(Loss): - def __init__(self, beta=3.0): - super(WeightedReconstruction, self).__init__() - self.beta = beta - - def call(self, RGBA_true, RGB_pred): - loss = compute_weighted_reconstruction_loss( - RGBA_true, RGB_pred, self.beta) - return loss - - def MSE_without_last_channel(y_true, y_pred): squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred) return tf.reduce_mean(squared_difference, axis=-1) # Note the `axis=-1` diff --git a/examples/pix2pose/test.py b/examples/pix2pose/test.py index d23077c90..d9effe5b6 100644 --- a/examples/pix2pose/test.py +++ b/examples/pix2pose/test.py @@ -1,6 +1,8 @@ from paz.abstract import SequentialProcessor, Processor from paz import processors as pr import numpy as np +from backend import build_cube_points3D +# import pytest class PipelineWithTwoChannels(SequentialProcessor): @@ -61,3 +63,7 @@ def test_copy_with_controlmap_using_3_channels_plus(): assert len(values) == 2 assert np.allclose(values[0], A_random_values + B_random_values) assert np.allclose(values[1], A_random_values) + + +def test_build_cube_points3D(width, height, depth): + cube_points3D = build_cube_points3D(width, height, depth) From f3005843be7edd49dc1dcdf35cfd74c6ff88c7e1 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 11:47:51 +0100 Subject: [PATCH 050/101] Removed keras GAN examples --- .../models/fully_convolutional_net.py | 5 +- examples/pix2pose/models/gan_example.py | 81 ------------------- examples/pix2pose/models/keras_example.py | 67 --------------- 3 files changed, 3 insertions(+), 150 deletions(-) delete mode 100644 examples/pix2pose/models/gan_example.py delete mode 100644 examples/pix2pose/models/keras_example.py diff --git a/examples/pix2pose/models/fully_convolutional_net.py b/examples/pix2pose/models/fully_convolutional_net.py index 57d10e102..8a9da517e 100644 --- a/examples/pix2pose/models/fully_convolutional_net.py +++ b/examples/pix2pose/models/fully_convolutional_net.py @@ -1,10 +1,11 @@ from tensorflow.keras.models import Model -from tensorflow.keras.layers import Input, Conv2D, Activation, LeakyReLU +from tensorflow.keras.layers import ( + Input, Conv2D, Activation, LeakyReLU, BatchNormalization) def block(x, filters, dilation_rate, alpha): x = Conv2D(filters, (3, 3), dilation_rate=dilation_rate, padding='same')(x) - # x = BatchNormalization()(x) + x = BatchNormalization()(x) x = LeakyReLU(alpha)(x) return x diff --git a/examples/pix2pose/models/gan_example.py b/examples/pix2pose/models/gan_example.py deleted file mode 100644 index 8472a9462..000000000 --- a/examples/pix2pose/models/gan_example.py +++ /dev/null @@ -1,81 +0,0 @@ -import tensorflow as tf -from tensorflow.keras.models import Model -from tensorflow.keras.metrics import Mean - - -class Pix2PoseGAN(Model): - def __init__(self, image_shape, discriminator, generator, latent_dim): - super(Pix2PoseGAN, self).__init__() - self.image_shape = image_shape - self.discriminator = discriminator - self.generator = generator - self.latent_dim = latent_dim - self.generator_loss_tracker = Mean(name='generator_loss') - self.discriminator_loss_tracker = Mean(name='discriminator_loss') - - @property - def metrics(self): - return [self.generator_loss_tracker, self.discriminator_loss_tracker] - - def compile(self, d_optimizer, g_optimizer, loss_fn): - super(Pix2PoseGAN, self).compile() - self.d_optimizer = d_optimizer - self.g_optimizer = g_optimizer - self.loss_fn = loss_fn - - def train_step(self, data): - RGB_inputs, RGB_labels = data - RGB_generated = self.generator(RGB_inputs) - RGB_combined = tf.concat([RGB_generated, RGB_labels], axis=0) - """ - # Add dummy dimensions to the labels so that they can be concatenated with - # the images. This is for the discriminator. - image_one_hot_labels = one_hot_labels[:, :, None, None] - image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size]) - image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes)) - - # Sample random points in the latent space and concatenate the labels. - # This is for the generator. - batch_size = tf.shape(real_images)[0] - random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) - random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) - - # Decode the noise (guided by labels) to fake images. - generated_images = self.generator(random_vector_labels) - """ - - # Combine them with real images. Note that we are concatenating the labels - # with these images here. - - # Assemble labels discriminating real from fake images. - labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0) - - # Train the discriminator. - with tf.GradientTape() as tape: - predictions = self.discriminator(combined_images) - d_loss = self.loss_fn(labels, predictions) - grads = tape.gradient(d_loss, self.discriminator.trainable_weights) - self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights)) - - # Sample random points in the latent space. - random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) - random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1) - - # Assemble labels that say "all real images". - misleading_labels = tf.zeros((batch_size, 1)) - - # Train the generator (note that we should *not* update the weights - # of the discriminator)! - with tf.GradientTape() as tape: - fake_images = self.generator(random_vector_labels) - fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1) - predictions = self.discriminator(fake_image_and_labels) - g_loss = self.loss_fn(misleading_labels, predictions) - grads = tape.gradient(g_loss, self.generator.trainable_weights) - self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) - - # Monitor loss. - self.generator_loss_tracker.update_state(g_loss) - self.discriminator_loss_tracker.update_state(d_loss) - return {'generator_loss': self.generator_loss_tracker.result(), - 'discrminator_loss': self.discriminator_loss_tracker.result()} diff --git a/examples/pix2pose/models/keras_example.py b/examples/pix2pose/models/keras_example.py deleted file mode 100644 index c3f016cfa..000000000 --- a/examples/pix2pose/models/keras_example.py +++ /dev/null @@ -1,67 +0,0 @@ -class GAN(keras.Model): - def __init__(self, discriminator, generator, latent_dim): - super(GAN, self).__init__() - self.discriminator = discriminator - self.generator = generator - self.latent_dim = latent_dim - - def compile(self, d_optimizer, g_optimizer, loss_fn): - super(GAN, self).compile() - self.d_optimizer = d_optimizer - self.g_optimizer = g_optimizer - self.loss_fn = loss_fn - self.d_loss_metric = keras.metrics.Mean(name="d_loss") - self.g_loss_metric = keras.metrics.Mean(name="g_loss") - - @property - def metrics(self): - return [self.d_loss_metric, self.g_loss_metric] - - def train_step(self, real_images): - # Sample random points in the latent space - batch_size = tf.shape(real_images)[0] - random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) - - # Decode them to fake images - generated_images = self.generator(random_latent_vectors) - - # Combine them with real images - combined_images = tf.concat([generated_images, real_images], axis=0) - - # Assemble labels discriminating real from fake images - labels = tf.concat( - [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0 - ) - # Add random noise to the labels - important trick! - labels += 0.05 * tf.random.uniform(tf.shape(labels)) - - # Train the discriminator - with tf.GradientTape() as tape: - predictions = self.discriminator(combined_images) - d_loss = self.loss_fn(labels, predictions) - grads = tape.gradient(d_loss, self.discriminator.trainable_weights) - self.d_optimizer.apply_gradients( - zip(grads, self.discriminator.trainable_weights) - ) - - # Sample random points in the latent space - random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) - - # Assemble labels that say "all real images" - misleading_labels = tf.zeros((batch_size, 1)) - - # Train the generator (note that we should *not* update the weights - # of the discriminator)! - with tf.GradientTape() as tape: - predictions = self.discriminator(self.generator(random_latent_vectors)) - g_loss = self.loss_fn(misleading_labels, predictions) - grads = tape.gradient(g_loss, self.generator.trainable_weights) - self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) - - # Update metrics - self.d_loss_metric.update_state(d_loss) - self.g_loss_metric.update_state(g_loss) - return { - "d_loss": self.d_loss_metric.result(), - "g_loss": self.g_loss_metric.result(), - } From 317793049a5279c513c15122b70cd591c28edb2e Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 11:49:28 +0100 Subject: [PATCH 051/101] Remove unecessary metric function --- examples/pix2pose/loss.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index b27118b7f..8e28f8a6c 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -189,8 +189,3 @@ def call(self, RGBA_true, RGBE_pred): reconstruction_loss = compute_weighted_reconstruction_loss_with_error( RGBA_true, RGBE_pred, self.beta) return reconstruction_loss - - -def MSE_without_last_channel(y_true, y_pred): - squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred) - return tf.reduce_mean(squared_difference, axis=-1) # Note the `axis=-1` From 0338f70b1e21c3d5fd68da4b333b9eda2e13c028 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 23:33:36 +0100 Subject: [PATCH 052/101] Add untested symmetric weighted loss --- examples/pix2pose/loss.py | 60 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index 8e28f8a6c..50332258c 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -51,8 +51,8 @@ def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask): def compute_background_loss(RGB_true, RGB_pred, alpha_mask): - """Computes background reconstruction L1 loss by using the inverted alpha - mask values. + """Computes the L1 reconstruction loss, weighting the inverted alpha + mask values in the predicted RGB image by beta. # Arguments RGB_true: Tensor [batch, H, W, 3]. True RGB label values. @@ -69,8 +69,8 @@ def compute_background_loss(RGB_true, RGB_pred, alpha_mask): def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0): - """Computes L1 reconstruction loss by multiplying positive alpha mask - by beta. + """Computes the L1 reconstruction loss, weighting the positive alpha + mask values in the predicted RGB image by beta. # Arguments RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. @@ -88,6 +88,44 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0): return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True) +def to_normalized_device_coordinates(image): + """Map image value from [0, 1] -> [-1, 1]. + """ + return (image * 2) - 1.0 + + +def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): + """Computes the mininum of all rotated L1 reconstruction losses weighting + the positive alpha mask values in the predicted RGB image by beta. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. + rotations: Array (num_symmetries, 3, 3). Rotation matrices + that when applied lead to the same object view. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + """ + # alpha mask is invariant to rotations that leave the shape symmetric. + RGB_true, alpha = split_alpha_mask(RGBA_true) + RGB_original_shape = tf.shape(RGBA_true) + RGB_true = tf.reshape(RGB_true, [-1, 3]) + RGB_true = to_normalized_device_coordinates(RGB_true) + symmetric_losses = [] + for rotation in rotations: + RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T + RGB_true_symmetric = tf.reshape(RGB_true_symmetric, RGB_original_shape) + RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3) + symmetric_loss = compute_weighted_reconstruction_loss( + RGBA_true_symmetric, RGB_pred, beta) + symmetric_loss = tf.expand_dims(symmetric_loss, -1) + symmetric_losses.append(symmetric_loss) + symmetric_losses = tf.concat(symmetric_losses, axis=-1) + minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1) + return minimum_symmetric_loss + + def compute_weighted_reconstruction_loss_with_error( RGBA_true, RGBE_pred, beta=3.0): """Computes L1 reconstruction loss by multiplying positive alpha mask @@ -149,6 +187,20 @@ def call(self, RGBA_true, RGB_pred): return loss +class WeightedSymmetricReconstruction(Loss): + """Computes the mininum of all rotated L1 reconstruction losses weighting + the positive alpha mask values in the predicted RGB image by beta. + """ + def __init__(self, rotations, beta=3.0): + self.rotations = rotations + self.beta = beta + + def call(self, RGBA_true, RGB_pred): + loss = compute_weighted_symmetric_loss( + RGBA_true, RGB_pred, self.rotations, self.beta) + return loss + + class ErrorPrediction(Loss): """Computes L2 reconstruction loss of predicted error mask. From db21312357e38cf80154d4ed26706009b473aa5d Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 23:34:27 +0100 Subject: [PATCH 053/101] Change space name to compute graphics convention i.e. NDC --- examples/pix2pose/pipelines.py | 4 ++-- examples/pix2pose/processors.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index affa419af..e8f1e4a9d 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -4,7 +4,7 @@ from paz import processors as pr from processors import ( GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, - ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC, + ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC, ReplaceLowerThanThreshold) from backend import build_cube_points3D from processors import UnwrapDictionary @@ -54,7 +54,7 @@ class RGBMaskToObjectPoints3D(SequentialProcessor): def __init__(self, object_sizes): super(RGBMaskToObjectPoints3D, self).__init__() self.add(GetNonZeroValues()) - self.add(ImageToClosedOneBall()) + self.add(ImageToNormalizedDeviceCoordinates()) self.add(Scale(object_sizes / 2.0)) diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py index c47acd2e9..1a6255626 100644 --- a/examples/pix2pose/processors.py +++ b/examples/pix2pose/processors.py @@ -11,26 +11,28 @@ from backend import normalize_points2D from backend import rotation_vector_to_rotation_matrix from backend import to_affine_matrix +from backend import image_to_normalized_device_coordinates +from backend import normalized_device_coordinates_to_image -class ImageToClosedOneBall(Processor): +class ImageToNormalizedDeviceCoordinates(Processor): """Map image value from [0, 255] -> [-1, 1]. """ def __init__(self): - super(ImageToClosedOneBall, self).__init__() + super(ImageToNormalizedDeviceCoordinates, self).__init__() def call(self, image): - return (image / 127.5) - 1.0 + return image_to_normalized_device_coordinates(image) -class ClosedOneBallToImage(Processor): +class NormalizedDeviceCoordinatesToImage(Processor): """Map normalized value from [-1, 1] -> [0, 255]. """ def __init__(self): - super(ClosedOneBallToImage, self).__init__() + super(NormalizedDeviceCoordinatesToImage, self).__init__() def call(self, image): - return (image + 1.0) * 127.5 + return normalized_device_coordinates_to_image(image) class DrawBoxes3D(Processor): From 34f1565b86437ae55516816bee3f50302ef179c2 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 19 Nov 2021 23:35:01 +0100 Subject: [PATCH 054/101] Add NDC transforms and rotation matrix builds --- examples/pix2pose/backend.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index f8c73fac3..4ef476610 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -478,3 +478,30 @@ def to_affine_matrix(rotation_matrix, translation): affine_row = np.array([[0.0, 0.0, 0.0, 1.0]]) affine_matrix = np.concatenate([affine_top, affine_row], axis=0) return affine_matrix + + +def image_to_normalized_device_coordinates(image): + """Map image value from [0, 255] -> [-1, 1]. + """ + return (image / 127.5) - 1.0 + + +def normalized_device_coordinates_to_image(image): + """Map normalized value from [-1, 1] -> [0, 255]. + """ + return (image + 1.0) * 127.5 + + +def build_rotation_matrix_z(angle): + """Builds rotation matrix in Z axis. + # Arguments + angle: Float. Angle in radians. + # Return + Array (3, 3) rotation matrix in Z axis. + """ + cos_angle = np.cos(angle) + sin_angle = np.cos(angle) + rotation_matrix_z = np.array([[+cos_angle, -sin_angle, 0.0], + [+sin_angle, +cos_angle, 0.0], + [0.0, 0.0, 1.0]]) + return rotation_matrix_z From a5fd6a0214313c7fe5a626abe30db9f4dc6b323d Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Sat, 20 Nov 2021 00:14:09 +0100 Subject: [PATCH 055/101] Add untested symmetric loss training script --- examples/pix2pose/loss.py | 12 +++-- examples/pix2pose/train_symmetric.py | 72 ++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 examples/pix2pose/train_symmetric.py diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index 50332258c..88d4e0596 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -109,13 +109,16 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): """ # alpha mask is invariant to rotations that leave the shape symmetric. RGB_true, alpha = split_alpha_mask(RGBA_true) - RGB_original_shape = tf.shape(RGBA_true) - RGB_true = tf.reshape(RGB_true, [-1, 3]) + # RGB_original_shape = tf.shape(RGBA_true) + batch_size, H, W, num_channels = RGB_true.shape + batch_size, H, W, num_channels = 32, 128, 128, 3 + RGB_true = tf.reshape(RGB_true, [batch_size, -1, 3]) RGB_true = to_normalized_device_coordinates(RGB_true) symmetric_losses = [] for rotation in rotations: - RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T - RGB_true_symmetric = tf.reshape(RGB_true_symmetric, RGB_original_shape) + # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T + RGB_true_symmetric = tf.einsum('ij,bpj->bpi', rotation, RGB_true) + RGB_true_symmetric = tf.reshape(RGB_true_symmetric, (batch_size, H, W, num_channels)) RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3) symmetric_loss = compute_weighted_reconstruction_loss( RGBA_true_symmetric, RGB_pred, beta) @@ -192,6 +195,7 @@ class WeightedSymmetricReconstruction(Loss): the positive alpha mask values in the predicted RGB image by beta. """ def __init__(self, rotations, beta=3.0): + super(WeightedSymmetricReconstruction, self).__init__() self.rotations = rotations self.beta = beta diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py new file mode 100644 index 000000000..ae9f078ea --- /dev/null +++ b/examples/pix2pose/train_symmetric.py @@ -0,0 +1,72 @@ +import os +import glob +import numpy as np +from tensorflow.keras.optimizers import Adam +from paz.abstract import GeneratingSequence +from paz.models.segmentation import UNET_VGG16 +from backend import build_rotation_matrix_z + +from scenes import PixelMaskRenderer +from pipelines import DomainRandomization +from loss import WeightedSymmetricReconstruction +from metrics import mean_squared_error + +image_shape = [128, 128, 3] +root_path = os.path.expanduser('~') +background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' +background_wildcard = os.path.join(root_path, background_wildcard) +image_paths = glob.glob(background_wildcard) +# path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' +path_OBJ = 'single_solar_panel_02.obj' +path_OBJ = os.path.join(root_path, path_OBJ) +num_occlusions = 1 +viewport_size = image_shape[:2] +y_fov = 3.14159 / 4.0 +distance = [0.3, 0.5] +light = [1.0, 30] +top_only = False +roll = 3.14159 +shift = 0.05 +batch_size = 32 +beta = 3.0 +alpha = 0.1 +filters = 16 +num_classes = 3 +learning_rate = 0.001 +max_num_epochs = 10 +beta = 3.0 +steps_per_epoch = 1000 +H, W, num_channels = image_shape = [128, 128, 3] + + +renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, + light, top_only, roll, shift) + +inputs_to_shape = {'input_1': [H, W, num_channels]} +labels_to_shape = {'masks': [H, W, 4]} +processor = DomainRandomization( + renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions) + + +sequence = GeneratingSequence(processor, batch_size, steps_per_epoch) + +angles = np.linspace(0, 2 * np.pi, 6) +rotations = [] +for angle in angles: + rotations.append(build_rotation_matrix_z(angle)) +rotations = np.array(rotations) + + +loss = WeightedSymmetricReconstruction(rotations, beta) + +model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +optimizer = Adam(learning_rate) + +model.compile(optimizer, loss, mean_squared_error) + +model.fit( + sequence, + epochs=max_num_epochs, + verbose=1, + workers=0) From a9f2a27a5cfa5ab5a5a1336cc76b5cd3c7880322 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 23 Nov 2021 09:31:58 +0100 Subject: [PATCH 056/101] Add predictions transformation --- examples/pix2pose/loss.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index 88d4e0596..c408ab882 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -114,6 +114,7 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): batch_size, H, W, num_channels = 32, 128, 128, 3 RGB_true = tf.reshape(RGB_true, [batch_size, -1, 3]) RGB_true = to_normalized_device_coordinates(RGB_true) + RGB_pred = to_normalized_device_coordinates(RGB_pred) symmetric_losses = [] for rotation in rotations: # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T From b2aca585624897cc1e88fade891f4b92bb2eca42 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 23 Nov 2021 11:27:09 +0100 Subject: [PATCH 057/101] Fix bug with rotation matrix creation --- examples/pix2pose/backend.py | 25 ++++++++- examples/pix2pose/test_rotated_image.py | 71 +++++++++++++++++++++++++ examples/pix2pose/train_symmetric.py | 4 +- 3 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 examples/pix2pose/test_rotated_image.py diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 4ef476610..3617a7eec 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -500,8 +500,31 @@ def build_rotation_matrix_z(angle): Array (3, 3) rotation matrix in Z axis. """ cos_angle = np.cos(angle) - sin_angle = np.cos(angle) + sin_angle = np.sin(angle) rotation_matrix_z = np.array([[+cos_angle, -sin_angle, 0.0], [+sin_angle, +cos_angle, 0.0], [0.0, 0.0, 1.0]]) return rotation_matrix_z + + +def rotate_image(image, rotation_matrix, epsilon=1e-4): + """Rotates an image with a symmetry. + # Arguments + image: Array (H, W, 3) with domain [0, 255]. + rotation_matrix: Array (3, 3). + """ + mask_image = np.sum(image, axis=-1, keepdims=True) + mask_image = mask_image != 0 + # mask_image = np.repeat(mask_image, 3, axis=-1) + + image = image_to_normalized_device_coordinates(image) + # image_colors = (image * 2) - 1 + + # rotated_image = image + epsilon + rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image) + image = normalized_device_coordinates_to_image(rotated_image) + # rotated_image = (rotated_image + 1) / 2 + + # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) + # rotated_image = rotated_image * mask_image + return rotated_image diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py new file mode 100644 index 000000000..c71343a79 --- /dev/null +++ b/examples/pix2pose/test_rotated_image.py @@ -0,0 +1,71 @@ +import numpy as np +import os +import glob +from paz.backend.image import show_image + +from backend import build_rotation_matrix_z +from backend import normalized_device_coordinates_to_image +from backend import image_to_normalized_device_coordinates +from scenes import PixelMaskRenderer + +scale = 4 +image_shape = [128 * scale, 128 * scale, 3] +root_path = os.path.expanduser('~') +background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' +background_wildcard = os.path.join(root_path, background_wildcard) +image_paths = glob.glob(background_wildcard) + +path_OBJ = 'single_solar_panel_02.obj' +path_OBJ = os.path.join(root_path, path_OBJ) +num_occlusions = 1 +viewport_size = image_shape[:2] +y_fov = 3.14159 / 4.0 +distance = [1.0, 1.0] +light = [1.0, 30] +top_only = False +roll = 3.14159 +shift = 0.05 + +renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, + light, top_only, roll, shift) + + +def rotate_image(image, rotation_matrix, epsilon=1e-4): + mask_image = np.sum(image, axis=-1, keepdims=True) + mask_image = mask_image != 0 + + image = image_to_normalized_device_coordinates(image) + # image = image / 255.0 + print(image.min(), image.max()) + # image = (image * 2) - 1 + + # rotated_image = image + epsilon + rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image) + rotated_image = normalized_device_coordinates_to_image(rotated_image) + # rotated_image = (rotated_image + 1) / 2 + # print(rotated_image.min(), rotated_image.max()) + + # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=1.0) + rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) + # rotated_image = rotated_image * 255.0 + rotated_image = rotated_image * mask_image + return rotated_image + + +image, alpha, RGB_mask = renderer.render() +RGB_mask = RGB_mask[..., 0:3] +show_image(image) +show_image(RGB_mask) +angles = np.linspace(0, 2 * np.pi, 7) +images = [] +for angle in angles: + print('-' * 40) + print('angle', angle) + rotation_matrix = build_rotation_matrix_z(angle) + print(rotation_matrix) + rotated_image = rotate_image(RGB_mask, rotation_matrix) + rotated_image = rotated_image.astype('uint8') + images.append(rotated_image) + # show_image(rotated_image) +images = np.concatenate(images, axis=1) +show_image(images) diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py index ae9f078ea..4475ffc83 100644 --- a/examples/pix2pose/train_symmetric.py +++ b/examples/pix2pose/train_symmetric.py @@ -33,7 +33,7 @@ filters = 16 num_classes = 3 learning_rate = 0.001 -max_num_epochs = 10 +max_num_epochs = 5 beta = 3.0 steps_per_epoch = 1000 H, W, num_channels = image_shape = [128, 128, 3] @@ -70,3 +70,5 @@ epochs=max_num_epochs, verbose=1, workers=0) + +model.save_weights('UNET_VGG_symmetric_weights.hdf5') From e9ac4bb54273c710db625d29c777aef1b53cf462 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 23 Nov 2021 14:59:08 +0100 Subject: [PATCH 058/101] Add python rotate image function --- examples/pix2pose/backend.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 3617a7eec..7a2190219 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -507,24 +507,19 @@ def build_rotation_matrix_z(angle): return rotation_matrix_z -def rotate_image(image, rotation_matrix, epsilon=1e-4): +def rotate_image(image, rotation_matrix): """Rotates an image with a symmetry. # Arguments image: Array (H, W, 3) with domain [0, 255]. rotation_matrix: Array (3, 3). - """ - mask_image = np.sum(image, axis=-1, keepdims=True) - mask_image = mask_image != 0 - # mask_image = np.repeat(mask_image, 3, axis=-1) + # Returns + Array (H, W, 3) with domain [0, 255] + """ + mask_image = np.sum(image, axis=-1, keepdims=True) != 0 image = image_to_normalized_device_coordinates(image) - # image_colors = (image * 2) - 1 - - # rotated_image = image + epsilon rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image) - image = normalized_device_coordinates_to_image(rotated_image) - # rotated_image = (rotated_image + 1) / 2 - - # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) - # rotated_image = rotated_image * mask_image + rotated_image = normalized_device_coordinates_to_image(rotated_image) + rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) + rotated_image = rotated_image * mask_image return rotated_image From 31b063b73ace10509987b75da082585f7d6cc5e8 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 23 Nov 2021 14:59:44 +0100 Subject: [PATCH 059/101] Refactor symmetric loss with based on rotate_image backend function --- examples/pix2pose/loss.py | 40 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index c408ab882..d22325c1c 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -88,10 +88,16 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0): return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True) -def to_normalized_device_coordinates(image): +def normalized_image_to_normalized_device_coordinates(image): """Map image value from [0, 1] -> [-1, 1]. """ - return (image * 2) - 1.0 + return (image * 2.0) - 1.0 + + +def normalized_device_coordinates_to_normalized_image(image): + """Map image value from [0, 1] -> [-1, 1]. + """ + return (image + 1.0) / 2.0 def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): @@ -104,6 +110,34 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): rotations: Array (num_symmetries, 3, 3). Rotation matrices that when applied lead to the same object view. + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + """ + RGB_true, alpha = split_alpha_mask(RGBA_true) + RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true) + symmetric_losses = [] + for rotation in rotations: + RGB_true = tf.einsum('ij,bklj->bkli', rotation, RGB_true) + RGB_true = normalized_device_coordinates_to_normalized_image(RGB_true) + RGB_true = tf.concat([RGB_true, alpha], axis=3) + loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta) + loss = tf.expand_dims(loss, -1) + symmetric_losses.append(loss) + symmetric_losses = tf.concat(symmetric_losses, axis=-1) + minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1) + return minimum_symmetric_loss + + +def compute_weighted_symmetric_loss2(RGBA_true, RGB_pred, rotations, beta=3.0): + """Computes the mininum of all rotated L1 reconstruction losses weighting + the positive alpha mask values in the predicted RGB image by beta. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. + rotations: Array (num_symmetries, 3, 3). Rotation matrices + that when applied lead to the same object view. + # Returns Tensor [batch, H, W] with weighted reconstruction loss values. """ @@ -118,7 +152,7 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): symmetric_losses = [] for rotation in rotations: # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T - RGB_true_symmetric = tf.einsum('ij,bpj->bpi', rotation, RGB_true) + RGB_true_symmetric = tf.einsum('ij,klj->kli', rotation, RGB_true) RGB_true_symmetric = tf.reshape(RGB_true_symmetric, (batch_size, H, W, num_channels)) RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3) symmetric_loss = compute_weighted_reconstruction_loss( From 0ec6aa09d87d398bb8e780f34fd217c995dfa697 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 23 Nov 2021 15:00:21 +0100 Subject: [PATCH 060/101] Update training scripts --- examples/pix2pose/test_rotated_image.py | 59 +++++++------------------ examples/pix2pose/train_symmetric.py | 13 +++--- 2 files changed, 23 insertions(+), 49 deletions(-) diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py index c71343a79..8383744c2 100644 --- a/examples/pix2pose/test_rotated_image.py +++ b/examples/pix2pose/test_rotated_image.py @@ -3,7 +3,7 @@ import glob from paz.backend.image import show_image -from backend import build_rotation_matrix_z +from backend import build_rotation_matrix_z, rotate_image from backend import normalized_device_coordinates_to_image from backend import image_to_normalized_device_coordinates from scenes import PixelMaskRenderer @@ -28,44 +28,19 @@ renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, light, top_only, roll, shift) - - -def rotate_image(image, rotation_matrix, epsilon=1e-4): - mask_image = np.sum(image, axis=-1, keepdims=True) - mask_image = mask_image != 0 - - image = image_to_normalized_device_coordinates(image) - # image = image / 255.0 - print(image.min(), image.max()) - # image = (image * 2) - 1 - - # rotated_image = image + epsilon - rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image) - rotated_image = normalized_device_coordinates_to_image(rotated_image) - # rotated_image = (rotated_image + 1) / 2 - # print(rotated_image.min(), rotated_image.max()) - - # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=1.0) - rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) - # rotated_image = rotated_image * 255.0 - rotated_image = rotated_image * mask_image - return rotated_image - - -image, alpha, RGB_mask = renderer.render() -RGB_mask = RGB_mask[..., 0:3] -show_image(image) -show_image(RGB_mask) -angles = np.linspace(0, 2 * np.pi, 7) -images = [] -for angle in angles: - print('-' * 40) - print('angle', angle) - rotation_matrix = build_rotation_matrix_z(angle) - print(rotation_matrix) - rotated_image = rotate_image(RGB_mask, rotation_matrix) - rotated_image = rotated_image.astype('uint8') - images.append(rotated_image) - # show_image(rotated_image) -images = np.concatenate(images, axis=1) -show_image(images) +renderer.scene.ambient_light = [1.0, 1.0, 1.0] + +for _ in range(3): + image, alpha, RGB_mask = renderer.render() + RGB_mask = RGB_mask[..., 0:3] + show_image(image) + show_image(RGB_mask) + angles = np.linspace(0, 2 * np.pi, 7)[0:6] + images = [] + for angle in angles: + rotation_matrix = build_rotation_matrix_z(angle) + rotated_image = rotate_image(RGB_mask, rotation_matrix) + rotated_image = rotated_image.astype('uint8') + images.append(rotated_image) + images = np.concatenate(images, axis=1) + show_image(images) diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py index 4475ffc83..cd6f85376 100644 --- a/examples/pix2pose/train_symmetric.py +++ b/examples/pix2pose/train_symmetric.py @@ -41,22 +41,21 @@ renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, light, top_only, roll, shift) +# check why this is needed in this object +renderer.scene.ambient_light = [1.0, 1.0, 1.0] inputs_to_shape = {'input_1': [H, W, num_channels]} labels_to_shape = {'masks': [H, W, 4]} + processor = DomainRandomization( renderer, image_shape, image_paths, inputs_to_shape, labels_to_shape, num_occlusions) - sequence = GeneratingSequence(processor, batch_size, steps_per_epoch) -angles = np.linspace(0, 2 * np.pi, 6) -rotations = [] -for angle in angles: - rotations.append(build_rotation_matrix_z(angle)) -rotations = np.array(rotations) - +# build all symmetric rotations for solar pannel +angles = np.linspace(0, 2 * np.pi, 7)[:6] +rotations = np.array([build_rotation_matrix_z(angle) for angle in angles]) loss = WeightedSymmetricReconstruction(rotations, beta) From a36763b2d1b952a661047b89b9a469aabe17fd79 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 25 Nov 2021 16:43:50 +0100 Subject: [PATCH 061/101] Add canonical coloring scheme scene --- examples/pix2pose/canonical_coloring.py | 254 ++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 examples/pix2pose/canonical_coloring.py diff --git a/examples/pix2pose/canonical_coloring.py b/examples/pix2pose/canonical_coloring.py new file mode 100644 index 000000000..bc2f23202 --- /dev/null +++ b/examples/pix2pose/canonical_coloring.py @@ -0,0 +1,254 @@ +import numpy as np +from backend import build_rotation_matrix_y +from paz.backend.render import sample_uniformly, split_alpha_channel +from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight, + RenderFlags, Mesh, Scene) +import trimesh +from coloring import color_object +from backend import quaternion_to_rotation_matrix +from backend import to_affine_matrix + + +def sample_uniform(min_value, max_value): + """Samples values inside segment [min_value, max_value) + + # Arguments + segment_limits: List (2) containing min and max segment values. + + # Returns + Float inside segment [min_value, max_value] + """ + if min_value > max_value: + raise ValueError('First value must be lower than second value') + value = np.random.uniform(min_value, max_value) + return value + + +def sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D): + """ Samples points inside a 3D box defined by the + width, height and depth limits. + ________ + / /| + / / | + / / | + /_______/ / + | | | / / + height | | / depth + | |_______|/ / + + --widht-- + + # Arguments + width_limits: List (2) with [min_value_width, max_value_width]. + height_limits: List (2) with [min_value_height, max_value_height]. + depth_limits: List (2) with [min_value_depth, max_value_depth]. + + # Returns + Array (3) of point inside the 3D box. + """ + W = sample_uniform(min_W, max_W) + H = sample_uniform(min_H, max_H) + D = sample_uniform(min_D, max_D) + box_point3D = np.array([W, H, D]) + return box_point3D + + +def sample_random_rotation_matrix2(): + """Samples SO3 in rotation matrix form. + + # Return + Array (3, 3). + + # References + [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/ + 12/random-rotation-matrix.html) + [real-time rendering](from http://www.realtimerendering.com/resources/ + GraphicsGems/gemsiii/rand_rotation.c) + """ + theta = 2.0 * np.pi * np.random.uniform() + phi = 2.0 * np.pi * np.random.uniform() + z = 2.0 * np.random.uniform() + # Compute a vector V used for distributing points over the sphere via the + # reflection I - V Transpose(V). + # This formulation of V will guarantee that if x[1] and x[2] are uniformly + # distributed, the reflected points will be uniform on the sphere. + # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix + r = np.sqrt(z) + random_vector = np.array([np.sin(phi) * r, + np.cos(phi) * r, + np.sqrt(2.0 - z)]) + sin_theta = np.sin(theta) + cos_theta = np.cos(theta) + R = np.array([[+cos_theta, +sin_theta, 0.0], + [-sin_theta, +cos_theta, 0.0], + [0.0, 0.0, 1.0]]) + random_rotation_matrix = ( + np.outer(random_vector, random_vector) - np.eye(3)).dot(R) + return random_rotation_matrix + + +def sample_random_rotation_matrix(): + quaternion = np.random.rand(4) + quaternion = quaternion / np.linalg.norm(quaternion) + rotation_matrix = quaternion_to_rotation_matrix(quaternion) + return rotation_matrix + + +def sample_random_rotation_matrix3(): + epsilon = 0.1 + x_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon) + y_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon) + z_angle = np.random.uniform(np.pi, -np.pi) + + x_matrix = build_rotation_matrix_x(x_angle) + y_matrix = build_rotation_matrix_y(y_angle) + z_matrix = build_rotation_matrix_z(z_angle) + + rotation_matrix = np.dot(z_matrix, np.dot(y_matrix, x_matrix)) + return rotation_matrix + + +def sample_affine_transform(min_corner, max_corner): + min_W, min_H, min_D = min_corner + max_W, max_H, max_D = max_corner + translation = sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D) + rotation_matrix = sample_random_rotation_matrix3() + affine_matrix = to_affine_matrix(rotation_matrix, translation) + return affine_matrix + + +class CanonicalScene(): + def __init__(self, path_OBJ, camera_pose, min_corner, max_corner, + symmetric_transforms, + viewport_size=(128, 128), y_fov=3.14159 / 4.0, + light_intensity=[0.5, 30]): + self.light_intensity = light_intensity + self.symmetric_transforms = symmetric_transforms + self.min_corner, self.max_corner = min_corner, max_corner + self.scene = Scene(bg_color=[0, 0, 0, 0]) + self.light = self._build_light(light_intensity, camera_pose) + self.camera = self._build_camera(y_fov, viewport_size, camera_pose) + self.pixel_mesh = self.scene.add(color_object(path_OBJ)) + self.mesh = self.scene.add( + Mesh.from_trimesh(trimesh.load(path_OBJ), smooth=True)) + + self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) + + self.flags_RGBA = RenderFlags.RGBA + self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT + + def _build_light(self, light, pose): + directional_light = DirectionalLight([1.0, 1.0, 1.0], np.mean(light)) + directional_light = self.scene.add(directional_light, pose=pose) + return directional_light + + def _build_camera(self, y_fov, viewport_size, pose): + aspect_ratio = np.divide(*viewport_size) + camera = PerspectiveCamera(y_fov, aspectRatio=aspect_ratio) + camera = self.scene.add(camera, pose=pose) + return camera + + def _sample_parameters(self, min_corner, max_corner): + mesh_transform = sample_affine_transform(min_corner, max_corner) + light_intensity = sample_uniformly(self.light_intensity) + return mesh_transform, light_intensity + + def render(self): + mesh_transform, light_intensity = self._sample_parameters( + self.min_corner, self.max_corner) + mesh_rotation = mesh_transform[0:3, 0:3] + canonical_rotation = calculate_canonical_rotation( + mesh_rotation, self.symmetric_transforms) + # mesh_rotation[0:3, 0:3] = canonical_rotation + canonical_rotation = np.dot(mesh_rotation, canonical_rotation) + mesh_rotation[0:3, 0:3] = canonical_rotation + self.scene.set_pose(self.mesh, mesh_transform) + self.scene.set_pose(self.pixel_mesh, mesh_transform) + self.light.light.intensity = light_intensity + + self.pixel_mesh.mesh.is_visible = False + image, depth = self.renderer.render(self.scene, self.flags_RGBA) + self.pixel_mesh.mesh.is_visible = True + image, alpha = split_alpha_channel(image) + self.mesh.mesh.is_visible = False + RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) + self.mesh.mesh.is_visible = True + return image, alpha, RGB_mask + + def render_symmetries(self): + images, alphas, RGB_masks = [], [], [] + for rotation in self.symmetric_transforms: + symmetric_transform = to_affine_matrix(rotation, np.zeros(3)) + self.scene.set_pose(self.mesh, symmetric_transform) + self.scene.set_pose(self.pixel_mesh, symmetric_transform) + self.pixel_mesh.mesh.is_visible = False + image, depth = self.renderer.render(self.scene, self.flags_RGBA) + self.pixel_mesh.mesh.is_visible = True + image, alpha = split_alpha_channel(image) + self.mesh.mesh.is_visible = False + RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) + self.mesh.mesh.is_visible = True + images.append(image) + alphas.append(alpha) + RGB_masks.append(RGB_mask[..., 0:3]) + images = np.concatenate(images, axis=1) + RGB_masks = np.concatenate(RGB_masks, axis=1) + print(images.shape) + print(RGB_masks.shape) + images = np.concatenate([images, RGB_masks], axis=0) + return images + + +def compute_norm_SO3(rotation_mesh, rotation): + difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3) + distance = np.linalg.norm(difference, ord='fro') + return distance + + +def calculate_canonical_rotation(rotation_mesh, rotations): + norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] + closest_rotation_arg = np.argmin(norms) + print(closest_rotation_arg) + closest_rotation = rotations[closest_rotation_arg] + canonical_rotation = np.linalg.inv(closest_rotation) + return canonical_rotation + + +if __name__ == "__main__": + import os + from paz.backend.image import show_image + from backend import build_rotation_matrix_z + from backend import build_rotation_matrix_x + from backend import build_rotation_matrix_y + path_OBJ = 'single_solar_panel_02.obj' + root_path = os.path.expanduser('~') + path_OBJ = os.path.join(root_path, path_OBJ) + num_occlusions = 1 + image_shape = (128, 128, 3) + viewport_size = image_shape[:2] + y_fov = 3.14159 / 4.0 + distance = [1.0, 1.0] + light = [1.0, 30] + + # min_corner = [-0.1, -0.1, -0.0] + # max_corner = [+0.1, +0.1, +0.4] + angles = np.linspace(0, 2 * np.pi, 7)[:6] + symmetric_rotations = np.array([build_rotation_matrix_z(angle) for angle in angles]) + min_corner = [0.0, 0.0, -0.4] + max_corner = [0.0, 0.0, +0.0] + # translation = np.array([0.0, 0.0, 1.0]) + translation = np.array([0.0, 0.0, 1.0]) + camera_rotation = np.eye(3) + camera_rotation = build_rotation_matrix_x(np.pi) + translation = np.array([0.0, 0.0, -1.0]) + camera_pose = to_affine_matrix(camera_rotation, translation) + scene = CanonicalScene(path_OBJ, camera_pose, min_corner, max_corner, symmetric_rotations) + from pyrender import Viewer + Viewer(scene.scene) + scene.scene.ambient_light = [1.0, 1.0, 1.0] + image = scene.render_symmetries() + show_image(image) + for _ in range(100): + image, alpha, RGB_mask = scene.render() + show_image(image) + show_image(RGB_mask[:, :, 0:3]) From 2a86373db94ce4c7889db7cda9aa7164c6f7906e Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 25 Nov 2021 16:44:11 +0100 Subject: [PATCH 062/101] Add rotation build matrices --- examples/pix2pose/backend.py | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 7a2190219..350947db9 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -494,8 +494,10 @@ def normalized_device_coordinates_to_image(image): def build_rotation_matrix_z(angle): """Builds rotation matrix in Z axis. + # Arguments angle: Float. Angle in radians. + # Return Array (3, 3) rotation matrix in Z axis. """ @@ -507,8 +509,43 @@ def build_rotation_matrix_z(angle): return rotation_matrix_z +def build_rotation_matrix_x(angle): + """Builds rotation matrix in X axis. + + # Arguments + angle: Float. Angle in radians. + + # Return + Array (3, 3) rotation matrix in Z axis. + """ + cos_angle = np.cos(angle) + sin_angle = np.sin(angle) + rotation_matrix_x = np.array([[1.0, 0.0, 0.0], + [0.0, +cos_angle, -sin_angle], + [0.0, +sin_angle, +cos_angle]]) + return rotation_matrix_x + + +def build_rotation_matrix_y(angle): + """Builds rotation matrix in Y axis. + + # Arguments + angle: Float. Angle in radians. + + # Return + Array (3, 3) rotation matrix in Z axis. + """ + cos_angle = np.cos(angle) + sin_angle = np.sin(angle) + rotation_matrix_y = np.array([[+cos_angle, 0.0, +sin_angle], + [0.0, 1.0, 0.0], + [-sin_angle, 0.0, +cos_angle]]) + return rotation_matrix_y + + def rotate_image(image, rotation_matrix): """Rotates an image with a symmetry. + # Arguments image: Array (H, W, 3) with domain [0, 255]. rotation_matrix: Array (3, 3). From 86bec271787a6af0a229435a2e78a42386d50be1 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 25 Nov 2021 19:58:45 +0100 Subject: [PATCH 063/101] Add training script for canonical pose estimation --- examples/pix2pose/canonical_coloring.py | 75 +++++++++++++++++++++---- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/examples/pix2pose/canonical_coloring.py b/examples/pix2pose/canonical_coloring.py index bc2f23202..0a9f74f7e 100644 --- a/examples/pix2pose/canonical_coloring.py +++ b/examples/pix2pose/canonical_coloring.py @@ -193,8 +193,8 @@ def render_symmetries(self): RGB_masks.append(RGB_mask[..., 0:3]) images = np.concatenate(images, axis=1) RGB_masks = np.concatenate(RGB_masks, axis=1) - print(images.shape) - print(RGB_masks.shape) + # print(images.shape) + # print(RGB_masks.shape) images = np.concatenate([images, RGB_masks], axis=0) return images @@ -208,7 +208,7 @@ def compute_norm_SO3(rotation_mesh, rotation): def calculate_canonical_rotation(rotation_mesh, rotations): norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] closest_rotation_arg = np.argmin(norms) - print(closest_rotation_arg) + # print(closest_rotation_arg) closest_rotation = rotations[closest_rotation_arg] canonical_rotation = np.linalg.inv(closest_rotation) return canonical_rotation @@ -219,7 +219,7 @@ def calculate_canonical_rotation(rotation_mesh, rotations): from paz.backend.image import show_image from backend import build_rotation_matrix_z from backend import build_rotation_matrix_x - from backend import build_rotation_matrix_y + # from backend import build_rotation_matrix_y path_OBJ = 'single_solar_panel_02.obj' root_path = os.path.expanduser('~') path_OBJ = os.path.join(root_path, path_OBJ) @@ -233,7 +233,8 @@ def calculate_canonical_rotation(rotation_mesh, rotations): # min_corner = [-0.1, -0.1, -0.0] # max_corner = [+0.1, +0.1, +0.4] angles = np.linspace(0, 2 * np.pi, 7)[:6] - symmetric_rotations = np.array([build_rotation_matrix_z(angle) for angle in angles]) + symmetric_rotations = np.array( + [build_rotation_matrix_z(angle) for angle in angles]) min_corner = [0.0, 0.0, -0.4] max_corner = [0.0, 0.0, +0.0] # translation = np.array([0.0, 0.0, 1.0]) @@ -242,13 +243,63 @@ def calculate_canonical_rotation(rotation_mesh, rotations): camera_rotation = build_rotation_matrix_x(np.pi) translation = np.array([0.0, 0.0, -1.0]) camera_pose = to_affine_matrix(camera_rotation, translation) - scene = CanonicalScene(path_OBJ, camera_pose, min_corner, max_corner, symmetric_rotations) - from pyrender import Viewer - Viewer(scene.scene) - scene.scene.ambient_light = [1.0, 1.0, 1.0] - image = scene.render_symmetries() + renderer = CanonicalScene(path_OBJ, camera_pose, min_corner, + max_corner, symmetric_rotations) + # from pyrender import Viewer + # Viewer(scene.scene) + renderer.scene.ambient_light = [1.0, 1.0, 1.0] + image = renderer.render_symmetries() show_image(image) - for _ in range(100): - image, alpha, RGB_mask = scene.render() + for _ in range(0): + image, alpha, RGB_mask = renderer.render() show_image(image) show_image(RGB_mask[:, :, 0:3]) + + from pipelines import DomainRandomization + from paz.abstract.sequence import GeneratingSequence + from loss import WeightedReconstruction + from paz.models import UNET_VGG16 + from tensorflow.keras.optimizers import Adam + from metrics import mean_squared_error + import glob + + background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' + background_wildcard = os.path.join(root_path, background_wildcard) + image_paths = glob.glob(background_wildcard) + + H, W, num_channels = image_shape + batch_size = 32 + steps_per_epoch = 1000 + beta = 3.0 + num_classes = 3 + learning_rate = 0.001 + max_num_epochs = 5 + + inputs_to_shape = {'input_1': [H, W, num_channels]} + labels_to_shape = {'masks': [H, W, 4]} + + processor = DomainRandomization( + renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions) + + sequence = GeneratingSequence(processor, batch_size, steps_per_epoch) + + # build all symmetric rotations for solar pannel + angles = np.linspace(0, 2 * np.pi, 7)[:6] + rotations = np.array([build_rotation_matrix_z(angle) for angle in angles]) + + # loss = WeightedSymmetricReconstruction(rotations, beta) + loss = WeightedReconstruction(beta) + + model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) + optimizer = Adam(learning_rate) + + model.compile(optimizer, loss, mean_squared_error) + + model.fit( + sequence, + epochs=max_num_epochs, + verbose=1, + workers=0) + model.save_weights('UNET-VGG_solar_panel_canonical.hdf5') + From 0aa168bfcaa94ff466196ff4045cd472ab400515 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 26 Nov 2021 19:43:38 +0100 Subject: [PATCH 064/101] Move canonical functions to backend --- examples/pix2pose/backend.py | 111 +++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 350947db9..4fd5b4bc8 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -560,3 +560,114 @@ def rotate_image(image, rotation_matrix): rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) rotated_image = rotated_image * mask_image return rotated_image + + +def sample_uniform(min_value, max_value): + """Samples values inside segment [min_value, max_value) + + # Arguments + segment_limits: List (2) containing min and max segment values. + + # Returns + Float inside segment [min_value, max_value] + """ + if min_value > max_value: + raise ValueError('First value must be lower than second value') + value = np.random.uniform(min_value, max_value) + return value + + +def sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D): + """ Samples points inside a 3D box defined by the + width, height and depth limits. + ________ + / /| + / / | + / / | + /_______/ / + | | | / / + height | | / depth + | |_______|/ / + + --widht-- + + # Arguments + width_limits: List (2) with [min_value_width, max_value_width]. + height_limits: List (2) with [min_value_height, max_value_height]. + depth_limits: List (2) with [min_value_depth, max_value_depth]. + + # Returns + Array (3) of point inside the 3D box. + """ + W = sample_uniform(min_W, max_W) + H = sample_uniform(min_H, max_H) + D = sample_uniform(min_D, max_D) + box_point3D = np.array([W, H, D]) + return box_point3D + + +def sample_front_rotation_matrix(epsilon=0.1): + x_angle = np.random.uniform((-np.pi / 2.0) + epsilon, + (np.pi / 2.0) - epsilon) + y_angle = np.random.uniform((-np.pi / 2.0) + epsilon, + (np.pi / 2.0) - epsilon) + z_angle = np.random.uniform(np.pi, -np.pi) + + x_matrix = build_rotation_matrix_x(x_angle) + y_matrix = build_rotation_matrix_y(y_angle) + z_matrix = build_rotation_matrix_z(z_angle) + + rotation_matrix = np.dot(z_matrix, np.dot(y_matrix, x_matrix)) + return rotation_matrix + + +def sample_affine_transform(min_corner, max_corner): + min_W, min_H, min_D = min_corner + max_W, max_H, max_D = max_corner + translation = sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D) + rotation_matrix = sample_front_rotation_matrix() + affine_matrix = to_affine_matrix(rotation_matrix, translation) + return affine_matrix + + +def sample_random_rotation_matrix(): + """Samples SO3 in rotation matrix form. + + # Return + Array (3, 3). + + # References + [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/ + 12/random-rotation-matrix.html) + [real-time rendering](from http://www.realtimerendering.com/resources/ + GraphicsGems/gemsiii/rand_rotation.c) + """ + theta = 2.0 * np.pi * np.random.uniform() + phi = 2.0 * np.pi * np.random.uniform() + z = 2.0 * np.random.uniform() + # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix + r = np.sqrt(z) + random_vector = np.array( + [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)]) + sin_theta = np.sin(theta) + cos_theta = np.cos(theta) + R = np.array([[+cos_theta, +sin_theta, 0.0], + [-sin_theta, +cos_theta, 0.0], + [0.0, 0.0, 1.0]]) + random_rotation_matrix = ( + np.outer(random_vector, random_vector) - np.eye(3)).dot(R) + return random_rotation_matrix + + +def compute_norm_SO3(rotation_mesh, rotation): + difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3) + distance = np.linalg.norm(difference, ord='fro') + return distance + + +def calculate_canonical_rotation(rotation_mesh, rotations): + norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] + closest_rotation_arg = np.argmin(norms) + closest_rotation = rotations[closest_rotation_arg] + canonical_rotation = np.linalg.inv(closest_rotation) + return canonical_rotation From 049f3dfa99eb8d9e455b6994a993c50682bdf895 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 26 Nov 2021 19:44:21 +0100 Subject: [PATCH 065/101] Fix bug with tensor name being overwritten --- examples/pix2pose/loss.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index d22325c1c..d171df124 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -117,10 +117,12 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true) symmetric_losses = [] for rotation in rotations: - RGB_true = tf.einsum('ij,bklj->bkli', rotation, RGB_true) - RGB_true = normalized_device_coordinates_to_normalized_image(RGB_true) - RGB_true = tf.concat([RGB_true, alpha], axis=3) - loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta) + RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true) + RGB_true_rotated = normalized_device_coordinates_to_normalized_image(RGB_true_rotated) + RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0) + RGB_true_rotated = RGB_true_rotated * alpha + RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3) + loss = compute_weighted_reconstruction_loss(RGBA_true_rotated, RGB_pred, beta) loss = tf.expand_dims(loss, -1) symmetric_losses.append(loss) symmetric_losses = tf.concat(symmetric_losses, axis=-1) From 6dac02b5ca26a515c26d9aaecc3bd200cca04ffa Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 26 Nov 2021 19:44:49 +0100 Subject: [PATCH 066/101] Add scene for canonical discrete transformations --- examples/pix2pose/scenes.py | 82 +++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py index ea03b04ae..3b94a655c 100644 --- a/examples/pix2pose/scenes.py +++ b/examples/pix2pose/scenes.py @@ -6,6 +6,9 @@ RenderFlags, Mesh, Scene) import trimesh from coloring import color_object +from backend import to_affine_matrix +from backend import sample_affine_transform +from backend import calculate_canonical_rotation class PixelMaskRenderer(): @@ -65,3 +68,82 @@ def render(self): RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) self.mesh.mesh.is_visible = True return image, alpha, RGB_mask + + +class CanonicalScene(): + def __init__(self, path_OBJ, camera_pose, min_corner, max_corner, + symmetric_transforms, viewport_size=(128, 128), + y_fov=3.14159 / 4.0, light_intensity=[0.5, 30]): + self.light_intensity = light_intensity + self.symmetric_transforms = symmetric_transforms + self.min_corner, self.max_corner = min_corner, max_corner + self.scene = Scene(bg_color=[0, 0, 0, 0]) + self.light = self._build_light(light_intensity, camera_pose) + self.camera = self._build_camera(y_fov, viewport_size, camera_pose) + self.pixel_mesh = self.scene.add(color_object(path_OBJ)) + self.mesh = self.scene.add( + Mesh.from_trimesh(trimesh.load(path_OBJ), smooth=True)) + + self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) + + self.flags_RGBA = RenderFlags.RGBA + self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT + + def _build_light(self, light, pose): + directional_light = DirectionalLight([1.0, 1.0, 1.0], np.mean(light)) + directional_light = self.scene.add(directional_light, pose=pose) + return directional_light + + def _build_camera(self, y_fov, viewport_size, pose): + aspect_ratio = np.divide(*viewport_size) + camera = PerspectiveCamera(y_fov, aspectRatio=aspect_ratio) + camera = self.scene.add(camera, pose=pose) + return camera + + def _sample_parameters(self, min_corner, max_corner): + mesh_transform = sample_affine_transform(min_corner, max_corner) + light_intensity = sample_uniformly(self.light_intensity) + return mesh_transform, light_intensity + + def render(self): + mesh_transform, light_intensity = self._sample_parameters( + self.min_corner, self.max_corner) + mesh_rotation = mesh_transform[0:3, 0:3] + canonical_rotation = calculate_canonical_rotation( + mesh_rotation, self.symmetric_transforms) + # mesh_rotation[0:3, 0:3] = canonical_rotation + canonical_rotation = np.dot(mesh_rotation, canonical_rotation) + mesh_rotation[0:3, 0:3] = canonical_rotation + self.scene.set_pose(self.mesh, mesh_transform) + self.scene.set_pose(self.pixel_mesh, mesh_transform) + self.light.light.intensity = light_intensity + + self.pixel_mesh.mesh.is_visible = False + image, depth = self.renderer.render(self.scene, self.flags_RGBA) + self.pixel_mesh.mesh.is_visible = True + image, alpha = split_alpha_channel(image) + self.mesh.mesh.is_visible = False + RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) + self.mesh.mesh.is_visible = True + return image, alpha, RGB_mask + + def render_symmetries(self): + images, alphas, RGB_masks = [], [], [] + for rotation in self.symmetric_transforms: + symmetric_transform = to_affine_matrix(rotation, np.zeros(3)) + self.scene.set_pose(self.mesh, symmetric_transform) + self.scene.set_pose(self.pixel_mesh, symmetric_transform) + self.pixel_mesh.mesh.is_visible = False + image, depth = self.renderer.render(self.scene, self.flags_RGBA) + self.pixel_mesh.mesh.is_visible = True + image, alpha = split_alpha_channel(image) + self.mesh.mesh.is_visible = False + RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) + self.mesh.mesh.is_visible = True + images.append(image) + alphas.append(alpha) + RGB_masks.append(RGB_mask[..., 0:3]) + images = np.concatenate(images, axis=1) + RGB_masks = np.concatenate(RGB_masks, axis=1) + images = np.concatenate([images, RGB_masks], axis=0) + return images From 5a8083df88f8dffe2a41d82f828870d2609178aa Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Fri, 26 Nov 2021 19:47:53 +0100 Subject: [PATCH 067/101] Add training script for canonical transformation --- .../pix2pose/train_canonical_transform.py | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 examples/pix2pose/train_canonical_transform.py diff --git a/examples/pix2pose/train_canonical_transform.py b/examples/pix2pose/train_canonical_transform.py new file mode 100644 index 000000000..c9387e77c --- /dev/null +++ b/examples/pix2pose/train_canonical_transform.py @@ -0,0 +1,87 @@ +import os +import glob + +import numpy as np +from tensorflow.keras.optimizers import Adam +from paz.backend.image import show_image +from paz.models import UNET_VGG16 +from paz.abstract.sequence import GeneratingSequence + +from scenes import CanonicalScene +from backend import build_rotation_matrix_z +from backend import build_rotation_matrix_x +from backend import to_affine_matrix +from pipelines import DomainRandomization +from loss import WeightedReconstruction +from metrics import mean_squared_error + + +path_OBJ = 'single_solar_panel_02.obj' +root_path = os.path.expanduser('~') +path_OBJ = os.path.join(root_path, path_OBJ) +num_occlusions = 1 +image_shape = (128, 128, 3) +viewport_size = image_shape[:2] +y_fov = 3.14159 / 4.0 +distance = [1.0, 1.0] +light = [1.0, 30] + +angles = np.linspace(0, 2 * np.pi, 7)[:6] +symmetric_rotations = np.array( + [build_rotation_matrix_z(angle) for angle in angles]) +min_corner = [0.0, 0.0, -0.4] +max_corner = [0.0, 0.0, +0.0] +camera_rotation = build_rotation_matrix_x(np.pi) +translation = np.array([0.0, 0.0, -1.0]) +camera_pose = to_affine_matrix(camera_rotation, translation) +renderer = CanonicalScene(path_OBJ, camera_pose, min_corner, + max_corner, symmetric_rotations) +# from pyrender import Viewer +# Viewer(scene.scene) +renderer.scene.ambient_light = [1.0, 1.0, 1.0] +image = renderer.render_symmetries() +show_image(image) +for _ in range(100): + image, alpha, RGB_mask = renderer.render() + show_image(image) + show_image(RGB_mask[:, :, 0:3]) + +background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' +background_wildcard = os.path.join(root_path, background_wildcard) +image_paths = glob.glob(background_wildcard) + +H, W, num_channels = image_shape +batch_size = 32 +steps_per_epoch = 1000 +beta = 3.0 +num_classes = 3 +learning_rate = 0.001 +max_num_epochs = 5 + +inputs_to_shape = {'input_1': [H, W, num_channels]} +labels_to_shape = {'masks': [H, W, 4]} + +processor = DomainRandomization( + renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions) + +sequence = GeneratingSequence(processor, batch_size, steps_per_epoch) + +# build all symmetric rotations for solar pannel +angles = np.linspace(0, 2 * np.pi, 7)[:6] +rotations = np.array([build_rotation_matrix_z(angle) for angle in angles]) + +loss = WeightedReconstruction(beta) + +model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +optimizer = Adam(learning_rate) + +model.compile(optimizer, loss, mean_squared_error) +""" +model.fit( + sequence, + epochs=max_num_epochs, + verbose=1, + workers=0) +model.save_weights('UNET-VGG_solar_panel_canonical.hdf5') +""" From 9134957258e04f96f79eb5a45197a003ecd1e985 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Mon, 29 Nov 2021 09:59:17 +0100 Subject: [PATCH 068/101] Update training canonical scripts --- examples/pix2pose/backend.py | 1 + examples/pix2pose/canonical_coloring.py | 305 ------------------ examples/pix2pose/scenes.py | 86 +++++ examples/pix2pose/test_rotated_image.py | 110 +++++-- .../pix2pose/train_canonical_transform.py | 53 +-- examples/pix2pose/train_symmetric.py | 38 ++- 6 files changed, 244 insertions(+), 349 deletions(-) delete mode 100644 examples/pix2pose/canonical_coloring.py diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 4fd5b4bc8..5555d33fc 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -668,6 +668,7 @@ def compute_norm_SO3(rotation_mesh, rotation): def calculate_canonical_rotation(rotation_mesh, rotations): norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] closest_rotation_arg = np.argmin(norms) + # print(closest_rotation_arg) closest_rotation = rotations[closest_rotation_arg] canonical_rotation = np.linalg.inv(closest_rotation) return canonical_rotation diff --git a/examples/pix2pose/canonical_coloring.py b/examples/pix2pose/canonical_coloring.py deleted file mode 100644 index 0a9f74f7e..000000000 --- a/examples/pix2pose/canonical_coloring.py +++ /dev/null @@ -1,305 +0,0 @@ -import numpy as np -from backend import build_rotation_matrix_y -from paz.backend.render import sample_uniformly, split_alpha_channel -from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight, - RenderFlags, Mesh, Scene) -import trimesh -from coloring import color_object -from backend import quaternion_to_rotation_matrix -from backend import to_affine_matrix - - -def sample_uniform(min_value, max_value): - """Samples values inside segment [min_value, max_value) - - # Arguments - segment_limits: List (2) containing min and max segment values. - - # Returns - Float inside segment [min_value, max_value] - """ - if min_value > max_value: - raise ValueError('First value must be lower than second value') - value = np.random.uniform(min_value, max_value) - return value - - -def sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D): - """ Samples points inside a 3D box defined by the - width, height and depth limits. - ________ - / /| - / / | - / / | - /_______/ / - | | | / / - height | | / depth - | |_______|/ / - - --widht-- - - # Arguments - width_limits: List (2) with [min_value_width, max_value_width]. - height_limits: List (2) with [min_value_height, max_value_height]. - depth_limits: List (2) with [min_value_depth, max_value_depth]. - - # Returns - Array (3) of point inside the 3D box. - """ - W = sample_uniform(min_W, max_W) - H = sample_uniform(min_H, max_H) - D = sample_uniform(min_D, max_D) - box_point3D = np.array([W, H, D]) - return box_point3D - - -def sample_random_rotation_matrix2(): - """Samples SO3 in rotation matrix form. - - # Return - Array (3, 3). - - # References - [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/ - 12/random-rotation-matrix.html) - [real-time rendering](from http://www.realtimerendering.com/resources/ - GraphicsGems/gemsiii/rand_rotation.c) - """ - theta = 2.0 * np.pi * np.random.uniform() - phi = 2.0 * np.pi * np.random.uniform() - z = 2.0 * np.random.uniform() - # Compute a vector V used for distributing points over the sphere via the - # reflection I - V Transpose(V). - # This formulation of V will guarantee that if x[1] and x[2] are uniformly - # distributed, the reflected points will be uniform on the sphere. - # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix - r = np.sqrt(z) - random_vector = np.array([np.sin(phi) * r, - np.cos(phi) * r, - np.sqrt(2.0 - z)]) - sin_theta = np.sin(theta) - cos_theta = np.cos(theta) - R = np.array([[+cos_theta, +sin_theta, 0.0], - [-sin_theta, +cos_theta, 0.0], - [0.0, 0.0, 1.0]]) - random_rotation_matrix = ( - np.outer(random_vector, random_vector) - np.eye(3)).dot(R) - return random_rotation_matrix - - -def sample_random_rotation_matrix(): - quaternion = np.random.rand(4) - quaternion = quaternion / np.linalg.norm(quaternion) - rotation_matrix = quaternion_to_rotation_matrix(quaternion) - return rotation_matrix - - -def sample_random_rotation_matrix3(): - epsilon = 0.1 - x_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon) - y_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon) - z_angle = np.random.uniform(np.pi, -np.pi) - - x_matrix = build_rotation_matrix_x(x_angle) - y_matrix = build_rotation_matrix_y(y_angle) - z_matrix = build_rotation_matrix_z(z_angle) - - rotation_matrix = np.dot(z_matrix, np.dot(y_matrix, x_matrix)) - return rotation_matrix - - -def sample_affine_transform(min_corner, max_corner): - min_W, min_H, min_D = min_corner - max_W, max_H, max_D = max_corner - translation = sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D) - rotation_matrix = sample_random_rotation_matrix3() - affine_matrix = to_affine_matrix(rotation_matrix, translation) - return affine_matrix - - -class CanonicalScene(): - def __init__(self, path_OBJ, camera_pose, min_corner, max_corner, - symmetric_transforms, - viewport_size=(128, 128), y_fov=3.14159 / 4.0, - light_intensity=[0.5, 30]): - self.light_intensity = light_intensity - self.symmetric_transforms = symmetric_transforms - self.min_corner, self.max_corner = min_corner, max_corner - self.scene = Scene(bg_color=[0, 0, 0, 0]) - self.light = self._build_light(light_intensity, camera_pose) - self.camera = self._build_camera(y_fov, viewport_size, camera_pose) - self.pixel_mesh = self.scene.add(color_object(path_OBJ)) - self.mesh = self.scene.add( - Mesh.from_trimesh(trimesh.load(path_OBJ), smooth=True)) - - self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) - - self.flags_RGBA = RenderFlags.RGBA - self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT - - def _build_light(self, light, pose): - directional_light = DirectionalLight([1.0, 1.0, 1.0], np.mean(light)) - directional_light = self.scene.add(directional_light, pose=pose) - return directional_light - - def _build_camera(self, y_fov, viewport_size, pose): - aspect_ratio = np.divide(*viewport_size) - camera = PerspectiveCamera(y_fov, aspectRatio=aspect_ratio) - camera = self.scene.add(camera, pose=pose) - return camera - - def _sample_parameters(self, min_corner, max_corner): - mesh_transform = sample_affine_transform(min_corner, max_corner) - light_intensity = sample_uniformly(self.light_intensity) - return mesh_transform, light_intensity - - def render(self): - mesh_transform, light_intensity = self._sample_parameters( - self.min_corner, self.max_corner) - mesh_rotation = mesh_transform[0:3, 0:3] - canonical_rotation = calculate_canonical_rotation( - mesh_rotation, self.symmetric_transforms) - # mesh_rotation[0:3, 0:3] = canonical_rotation - canonical_rotation = np.dot(mesh_rotation, canonical_rotation) - mesh_rotation[0:3, 0:3] = canonical_rotation - self.scene.set_pose(self.mesh, mesh_transform) - self.scene.set_pose(self.pixel_mesh, mesh_transform) - self.light.light.intensity = light_intensity - - self.pixel_mesh.mesh.is_visible = False - image, depth = self.renderer.render(self.scene, self.flags_RGBA) - self.pixel_mesh.mesh.is_visible = True - image, alpha = split_alpha_channel(image) - self.mesh.mesh.is_visible = False - RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) - self.mesh.mesh.is_visible = True - return image, alpha, RGB_mask - - def render_symmetries(self): - images, alphas, RGB_masks = [], [], [] - for rotation in self.symmetric_transforms: - symmetric_transform = to_affine_matrix(rotation, np.zeros(3)) - self.scene.set_pose(self.mesh, symmetric_transform) - self.scene.set_pose(self.pixel_mesh, symmetric_transform) - self.pixel_mesh.mesh.is_visible = False - image, depth = self.renderer.render(self.scene, self.flags_RGBA) - self.pixel_mesh.mesh.is_visible = True - image, alpha = split_alpha_channel(image) - self.mesh.mesh.is_visible = False - RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) - self.mesh.mesh.is_visible = True - images.append(image) - alphas.append(alpha) - RGB_masks.append(RGB_mask[..., 0:3]) - images = np.concatenate(images, axis=1) - RGB_masks = np.concatenate(RGB_masks, axis=1) - # print(images.shape) - # print(RGB_masks.shape) - images = np.concatenate([images, RGB_masks], axis=0) - return images - - -def compute_norm_SO3(rotation_mesh, rotation): - difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3) - distance = np.linalg.norm(difference, ord='fro') - return distance - - -def calculate_canonical_rotation(rotation_mesh, rotations): - norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] - closest_rotation_arg = np.argmin(norms) - # print(closest_rotation_arg) - closest_rotation = rotations[closest_rotation_arg] - canonical_rotation = np.linalg.inv(closest_rotation) - return canonical_rotation - - -if __name__ == "__main__": - import os - from paz.backend.image import show_image - from backend import build_rotation_matrix_z - from backend import build_rotation_matrix_x - # from backend import build_rotation_matrix_y - path_OBJ = 'single_solar_panel_02.obj' - root_path = os.path.expanduser('~') - path_OBJ = os.path.join(root_path, path_OBJ) - num_occlusions = 1 - image_shape = (128, 128, 3) - viewport_size = image_shape[:2] - y_fov = 3.14159 / 4.0 - distance = [1.0, 1.0] - light = [1.0, 30] - - # min_corner = [-0.1, -0.1, -0.0] - # max_corner = [+0.1, +0.1, +0.4] - angles = np.linspace(0, 2 * np.pi, 7)[:6] - symmetric_rotations = np.array( - [build_rotation_matrix_z(angle) for angle in angles]) - min_corner = [0.0, 0.0, -0.4] - max_corner = [0.0, 0.0, +0.0] - # translation = np.array([0.0, 0.0, 1.0]) - translation = np.array([0.0, 0.0, 1.0]) - camera_rotation = np.eye(3) - camera_rotation = build_rotation_matrix_x(np.pi) - translation = np.array([0.0, 0.0, -1.0]) - camera_pose = to_affine_matrix(camera_rotation, translation) - renderer = CanonicalScene(path_OBJ, camera_pose, min_corner, - max_corner, symmetric_rotations) - # from pyrender import Viewer - # Viewer(scene.scene) - renderer.scene.ambient_light = [1.0, 1.0, 1.0] - image = renderer.render_symmetries() - show_image(image) - for _ in range(0): - image, alpha, RGB_mask = renderer.render() - show_image(image) - show_image(RGB_mask[:, :, 0:3]) - - from pipelines import DomainRandomization - from paz.abstract.sequence import GeneratingSequence - from loss import WeightedReconstruction - from paz.models import UNET_VGG16 - from tensorflow.keras.optimizers import Adam - from metrics import mean_squared_error - import glob - - background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' - background_wildcard = os.path.join(root_path, background_wildcard) - image_paths = glob.glob(background_wildcard) - - H, W, num_channels = image_shape - batch_size = 32 - steps_per_epoch = 1000 - beta = 3.0 - num_classes = 3 - learning_rate = 0.001 - max_num_epochs = 5 - - inputs_to_shape = {'input_1': [H, W, num_channels]} - labels_to_shape = {'masks': [H, W, 4]} - - processor = DomainRandomization( - renderer, image_shape, image_paths, inputs_to_shape, - labels_to_shape, num_occlusions) - - sequence = GeneratingSequence(processor, batch_size, steps_per_epoch) - - # build all symmetric rotations for solar pannel - angles = np.linspace(0, 2 * np.pi, 7)[:6] - rotations = np.array([build_rotation_matrix_z(angle) for angle in angles]) - - # loss = WeightedSymmetricReconstruction(rotations, beta) - loss = WeightedReconstruction(beta) - - model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) - optimizer = Adam(learning_rate) - - model.compile(optimizer, loss, mean_squared_error) - - model.fit( - sequence, - epochs=max_num_epochs, - verbose=1, - workers=0) - model.save_weights('UNET-VGG_solar_panel_canonical.hdf5') - diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py index 3b94a655c..1cbb9b9a7 100644 --- a/examples/pix2pose/scenes.py +++ b/examples/pix2pose/scenes.py @@ -9,6 +9,7 @@ from backend import to_affine_matrix from backend import sample_affine_transform from backend import calculate_canonical_rotation +from paz.models import UNET_VGG16 class PixelMaskRenderer(): @@ -147,3 +148,88 @@ def render_symmetries(self): RGB_masks = np.concatenate(RGB_masks, axis=1) images = np.concatenate([images, RGB_masks], axis=0) return images + + +if __name__ == "__main__": + import os + from paz.backend.image import show_image + from backend import build_rotation_matrix_x + from backend import build_rotation_matrix_z + from backend import build_rotation_matrix_y + from paz.backend.render import compute_modelview_matrices + from pipelines import DomainRandomization + import glob + + # generic parameters + root_path = os.path.expanduser('~') + num_occlusions = 1 + image_shape = (128, 128, 3) + viewport_size = image_shape[:2] + y_fov = 3.14159 / 4.0 + light = [1.0, 30] + + # solar panel parameters + """ + OBJ_name = 'single_solar_panel_02.obj' + path_OBJ = os.path.join(root_path, OBJ_name) + angles = np.linspace(0, 2 * np.pi, 7)[:6] + symmetries = np.array([build_rotation_matrix_z(angle) for angle in angles]) + camera_rotation = build_rotation_matrix_x(np.pi) + translation = np.array([0.0, 0.0, -1.0]) + camera_pose = to_affine_matrix(camera_rotation, translation) + min_corner = [0.0, 0.0, -0.4] + max_corner = [0.0, 0.0, +0.0] + """ + + # large clamp parameters + # REMEMBER TO CHANGE THE Ns coefficient to values between [0, 1] in + # textured.mtl. For example change 96.07 to .967 + OBJ_name = '.keras/paz/datasets/ycb_models/051_large_clamp/textured.obj' + path_OBJ = os.path.join(root_path, OBJ_name) + translation = np.array([0.0, 0.0, 0.25]) + camera_pose, y = compute_modelview_matrices(translation, np.zeros((3))) + align_z = build_rotation_matrix_z(np.pi / 20) + camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3]) + min_corner = [-0.05, -0.02, -0.05] + max_corner = [+0.05, +0.02, +0.01] + + angles = [0.0, np.pi] + symmetries = np.array([build_rotation_matrix_y(angle) for angle in angles]) + renderer = CanonicalScene(path_OBJ, camera_pose, min_corner, + max_corner, symmetries) + renderer.scene.ambient_light = [1.0, 1.0, 1.0] + image = renderer.render_symmetries() + show_image(image) + for arg in range(0): + image, alpha, RGB_mask = renderer.render() + show_image(RGB_mask[:, :, 0:3]) + + model = UNET_VGG16(3, image_shape, freeze_backbone=True) + model.load_weights('UNET-VGG_large_clamp_canonical_10.hdf5') + + background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' + background_wildcard = os.path.join(root_path, background_wildcard) + image_paths = glob.glob(background_wildcard) + + H, W, num_channels = image_shape = (128, 128, 3) + inputs_to_shape = {'input_1': [H, W, num_channels]} + labels_to_shape = {'masks': [H, W, 4]} + processor = DomainRandomization( + renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions) + + for arg in range(100): + sample = processor() + image = sample['inputs']['input_1'] + image = (image * 255.0).astype('uint8') + RGB_mask = sample['labels']['masks'] + # image, alpha, RGB_mask = renderer.render() + RGB_mask_true = (RGB_mask[:, :, 0:3] * 255.0).astype('uint8') + RGB_mask_pred = model.predict(np.expand_dims(image / 255.0, 0)) + RGB_mask_pred = np.squeeze(RGB_mask_pred * 255.0, 0) + # error = np.square(RGB_mask_true - RGB_mask_pred) + # error = RGB_mask_pred - RGB_mask + RGB_mask_pred = RGB_mask_pred.astype('uint8') + print(image.dtype, RGB_mask_pred.dtype, RGB_mask_true.dtype) + images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1) + show_image(images) diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py index 8383744c2..9c383edea 100644 --- a/examples/pix2pose/test_rotated_image.py +++ b/examples/pix2pose/test_rotated_image.py @@ -1,15 +1,29 @@ import numpy as np import os import glob -from paz.backend.image import show_image +from paz.backend.image import show_image, resize_image +from paz.models import UNET_VGG16 +from paz.abstract import GeneratingSequence +from paz.backend.camera import Camera +from pipelines import Pix2Pose +from pipelines import EstimatePoseMasks +from pipelines import DomainRandomization from backend import build_rotation_matrix_z, rotate_image -from backend import normalized_device_coordinates_to_image -from backend import image_to_normalized_device_coordinates from scenes import PixelMaskRenderer +from backend import build_rotation_matrix_x, build_rotation_matrix_y +from backend import denormalize_points2D +from processors import SolveChangingObjectPnPRANSAC +from paz.backend.quaternion import rotation_vector_to_quaternion +from paz.abstract.messages import Pose6D +from backend import build_cube_points3D +from backend import draw_poses6D +from paz.backend.image import load_image +from backend import draw_masks + scale = 4 -image_shape = [128 * scale, 128 * scale, 3] +H, W, num_channels = image_shape = [128, 128, 3] root_path = os.path.expanduser('~') background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' background_wildcard = os.path.join(root_path, background_wildcard) @@ -24,23 +38,81 @@ light = [1.0, 30] top_only = False roll = 3.14159 -shift = 0.05 +shift = 0 # %0.05 +batch_size = 32 +steps_per_epoch = 1000 + +image_size = [128, 128] +focal_length = image_size[1] +image_center = (image_size[1] / 2.0, image_size[0] / 2.0) +camera_intrinsics = np.array([[focal_length, 0, image_center[0]], + [0, focal_length, image_center[1]], + [0, 0, 1]]) + + +image_shape = (128, 128, 3) +num_classes = 3 +model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5') +object_sizes_list = [15000, 15000, 2000] +object_sizes = np.array(object_sizes_list) +cube_points = object_sizes +cube_points3D = build_cube_points3D(*object_sizes) +epsilon = 0.15 +estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True) +print(object_sizes) +predict_pose = SolveChangingObjectPnPRANSAC(camera_intrinsics, 5, 100) + + +def quick_pose(image): + image = resize_image(image, (128, 128)) + keypoints = estimate_keypoints(image) + points2D = keypoints['points2D'] + points3D = keypoints['points3D'] + # points3D[:, 2:3] = 0.0 + points2D = denormalize_points2D(points2D, 128, 128) + success, rotation, translation = predict_pose(points3D, points2D) + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, 'solar_panel') + poses6D = [pose6D] + # show_image(image) + points = [[points2D, points3D]] + image = draw_masks(image, points, object_sizes) + image = image.astype('float') + image = draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics) + image = image.astype('uint8') + image = resize_image(image, (256, 256)) + show_image(image) + + +image = load_image('zed_left_1011.png') +image = image[250:800, 250:850, :] +quick_pose(image) + +image = load_image('MicrosoftTeams-image.png') +quick_pose(image) + +image = load_image('zed_left_705.png') +image = image[250:1080, 250:1400, :] +quick_pose(image) renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, light, top_only, roll, shift) renderer.scene.ambient_light = [1.0, 1.0, 1.0] -for _ in range(3): - image, alpha, RGB_mask = renderer.render() - RGB_mask = RGB_mask[..., 0:3] - show_image(image) - show_image(RGB_mask) - angles = np.linspace(0, 2 * np.pi, 7)[0:6] - images = [] - for angle in angles: - rotation_matrix = build_rotation_matrix_z(angle) - rotated_image = rotate_image(RGB_mask, rotation_matrix) - rotated_image = rotated_image.astype('uint8') - images.append(rotated_image) - images = np.concatenate(images, axis=1) - show_image(images) +inputs_to_shape = {'input_1': [H, W, num_channels]} +labels_to_shape = {'masks': [H, W, 4]} + +processor = DomainRandomization( + renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions) + +for _ in range(100): + sample = processor() + image = sample['inputs']['input_1'] + masks = sample['labels']['masks'] + image = (image * 255).astype('uint8') + # image, alpha, RGB_mask = renderer.render() + # show_image((image * 255).astype('uint8')) + quick_pose(image) + # show_image(images) diff --git a/examples/pix2pose/train_canonical_transform.py b/examples/pix2pose/train_canonical_transform.py index c9387e77c..49de71cef 100644 --- a/examples/pix2pose/train_canonical_transform.py +++ b/examples/pix2pose/train_canonical_transform.py @@ -6,26 +6,36 @@ from paz.backend.image import show_image from paz.models import UNET_VGG16 from paz.abstract.sequence import GeneratingSequence +from paz.backend.render import compute_modelview_matrices from scenes import CanonicalScene from backend import build_rotation_matrix_z from backend import build_rotation_matrix_x +from backend import build_rotation_matrix_y from backend import to_affine_matrix from pipelines import DomainRandomization from loss import WeightedReconstruction from metrics import mean_squared_error -path_OBJ = 'single_solar_panel_02.obj' root_path = os.path.expanduser('~') -path_OBJ = os.path.join(root_path, path_OBJ) num_occlusions = 1 image_shape = (128, 128, 3) viewport_size = image_shape[:2] y_fov = 3.14159 / 4.0 -distance = [1.0, 1.0] light = [1.0, 30] +# training parameters +H, W, num_channels = image_shape +batch_size = 32 +steps_per_epoch = 1000 +beta = 3.0 +num_classes = 3 +learning_rate = 0.001 +max_num_epochs = 5 + +""" +path_OBJ = 'single_solar_panel_02.obj' angles = np.linspace(0, 2 * np.pi, 7)[:6] symmetric_rotations = np.array( [build_rotation_matrix_z(angle) for angle in angles]) @@ -34,30 +44,35 @@ camera_rotation = build_rotation_matrix_x(np.pi) translation = np.array([0.0, 0.0, -1.0]) camera_pose = to_affine_matrix(camera_rotation, translation) +""" + +# large clamp parameters +# REMEMBER TO CHANGE THE Ns coefficient to values between [0, 1] in +# textured.mtl. For example change 96.07 to .967 +OBJ_name = '.keras/paz/datasets/ycb_models/051_large_clamp/textured.obj' +translation = np.array([0.0, 0.0, 0.25]) +camera_pose, y = compute_modelview_matrices(translation, np.zeros((3))) +align_z = build_rotation_matrix_z(np.pi / 20) +camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3]) +min_corner = [-0.05, -0.02, -0.05] +max_corner = [+0.05, +0.02, +0.01] + +angles = [0.0, np.pi] +symmetric_rotations = np.array( + [build_rotation_matrix_y(angle) for angle in angles]) + + +path_OBJ = os.path.join(root_path, OBJ_name) renderer = CanonicalScene(path_OBJ, camera_pose, min_corner, max_corner, symmetric_rotations) -# from pyrender import Viewer -# Viewer(scene.scene) renderer.scene.ambient_light = [1.0, 1.0, 1.0] image = renderer.render_symmetries() show_image(image) -for _ in range(100): - image, alpha, RGB_mask = renderer.render() - show_image(image) - show_image(RGB_mask[:, :, 0:3]) background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' background_wildcard = os.path.join(root_path, background_wildcard) image_paths = glob.glob(background_wildcard) -H, W, num_channels = image_shape -batch_size = 32 -steps_per_epoch = 1000 -beta = 3.0 -num_classes = 3 -learning_rate = 0.001 -max_num_epochs = 5 - inputs_to_shape = {'input_1': [H, W, num_channels]} labels_to_shape = {'masks': [H, W, 4]} @@ -77,11 +92,9 @@ optimizer = Adam(learning_rate) model.compile(optimizer, loss, mean_squared_error) -""" model.fit( sequence, epochs=max_num_epochs, verbose=1, workers=0) -model.save_weights('UNET-VGG_solar_panel_canonical.hdf5') -""" +model.save_weights('UNET-VGG_large_clamp_canonical.hdf5') diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py index cd6f85376..313658ff8 100644 --- a/examples/pix2pose/train_symmetric.py +++ b/examples/pix2pose/train_symmetric.py @@ -9,6 +9,7 @@ from scenes import PixelMaskRenderer from pipelines import DomainRandomization from loss import WeightedSymmetricReconstruction +from loss import WeightedReconstruction from metrics import mean_squared_error image_shape = [128, 128, 3] @@ -22,7 +23,8 @@ num_occlusions = 1 viewport_size = image_shape[:2] y_fov = 3.14159 / 4.0 -distance = [0.3, 0.5] +distance = [1.0, 1.0] +# distance = [0.3, 0.5] light = [1.0, 30] top_only = False roll = 3.14159 @@ -32,9 +34,9 @@ alpha = 0.1 filters = 16 num_classes = 3 -learning_rate = 0.001 +learning_rate = 0.0001 max_num_epochs = 5 -beta = 3.0 +beta = 10.0 steps_per_epoch = 1000 H, W, num_channels = image_shape = [128, 128, 3] @@ -43,6 +45,29 @@ light, top_only, roll, shift) # check why this is needed in this object renderer.scene.ambient_light = [1.0, 1.0, 1.0] +# pose = np.eye(4) +# pose[1, 1] = +np.cos(np.deg2rad(90)) +# pose[1, 2] = -np.sin(np.deg2rad(90)) +# pose[2, 2] = +np.cos(np.deg2rad(90)) +# pose[2, 1] = +np.sin(np.deg2rad(90)) +# renderer.scene.set_pose(renderer.mesh, pose) +# renderer.scene.set_pose(renderer.pixel_mesh, pose) +from paz.backend.image import show_image +from backend import rotate_image +for _ in range(0): + image, alpha, RGB_mask = renderer.render() + RGB_mask = RGB_mask[..., 0:3] + show_image(RGB_mask) + angles = np.linspace(0, 2 * np.pi, 7)[0:6] + images = [] + for angle in angles: + rotation_matrix = build_rotation_matrix_z(angle) + rotated_image = rotate_image(RGB_mask, rotation_matrix) + rotated_image = rotated_image.astype('uint8') + images.append(rotated_image) + images = np.concatenate(images, axis=1) + show_image(images) + inputs_to_shape = {'input_1': [H, W, num_channels]} labels_to_shape = {'masks': [H, W, 4]} @@ -58,16 +83,19 @@ rotations = np.array([build_rotation_matrix_z(angle) for angle in angles]) loss = WeightedSymmetricReconstruction(rotations, beta) +# loss = WeightedReconstruction(beta) -model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +model = UNET_VGG16(num_classes, image_shape, freeze_backbone=False) optimizer = Adam(learning_rate) model.compile(optimizer, loss, mean_squared_error) +model.load_weights('UNET_solar_panel_weights_notsym_04.hdf5') model.fit( sequence, epochs=max_num_epochs, verbose=1, workers=0) -model.save_weights('UNET_VGG_symmetric_weights.hdf5') +model.save_weights('UNET-VGG_solar_panel_weights_not_and_symmetric.hdf5') + From 8111c1194a66aaac4841d4ddce0431eb86f182a2 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 30 Nov 2021 13:15:11 +0100 Subject: [PATCH 069/101] Add single drawing mask function --- examples/pix2pose/backend.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 5555d33fc..bbc3fcdb2 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -258,6 +258,12 @@ def points3D_to_RGB(points3D, object_sizes): return colors +def draw_mask(image, points2D, points3D, object_sizes): + colors = points3D_to_RGB(points3D, object_sizes) + image = draw_points2D(image, points2D, colors) + return image + + # TODO change to processor def draw_masks(image, points, object_sizes): for points2D, points3D in points: From 042630f69e885d44fc9c992f1886eeeb5fb0f398 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 30 Nov 2021 13:15:40 +0100 Subject: [PATCH 070/101] Add additional pipelines for single inference visualization --- examples/pix2pose/pipelines2.py | 294 ++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 examples/pix2pose/pipelines2.py diff --git a/examples/pix2pose/pipelines2.py b/examples/pix2pose/pipelines2.py new file mode 100644 index 000000000..06357bc17 --- /dev/null +++ b/examples/pix2pose/pipelines2.py @@ -0,0 +1,294 @@ +from paz.abstract import SequentialProcessor, Processor +from paz.pipelines import RandomizeRenderedImage as RandomizeRender +from paz.abstract.messages import Pose6D +from paz import processors as pr +from processors import ( + GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, + ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC, + ReplaceLowerThanThreshold) +from backend import build_cube_points3D +from processors import UnwrapDictionary +from processors import NormalizePoints2D +from backend import denormalize_points2D +from backend import draw_poses6D +from backend import draw_masks +from backend import draw_mask +from backend import draw_pose6D +from backend import normalize_points2D +from paz.backend.quaternion import rotation_vector_to_quaternion +from paz.backend.image import resize_image, show_image + + +class DomainRandomization(SequentialProcessor): + """Performs domain randomization on a rendered image + """ + def __init__(self, renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions=1): + super(DomainRandomization, self).__init__() + H, W = image_shape[:2] + self.add(pr.Render(renderer)) + self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0])) + self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) + self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) + self.add(pr.SequenceWrapper({0: inputs_to_shape}, + {1: labels_to_shape})) + + +class PredictRGBMask(SequentialProcessor): + def __init__(self, model, epsilon=0.15): + super(PredictRGBMask, self).__init__() + self.add(pr.ResizeImage(model.input_shape[1:3])) + self.add(pr.NormalizeImage()) + self.add(pr.ExpandDims(0)) + self.add(pr.Predict(model)) + self.add(pr.Squeeze(0)) + self.add(ReplaceLowerThanThreshold(epsilon)) + self.add(pr.DenormalizeImage()) + self.add(pr.CastImage('uint8')) + + +class RGBMaskToObjectPoints3D(SequentialProcessor): + def __init__(self, object_sizes): + super(RGBMaskToObjectPoints3D, self).__init__() + self.add(GetNonZeroValues()) + self.add(ImageToNormalizedDeviceCoordinates()) + self.add(Scale(object_sizes / 2.0)) + + +class RGBMaskToImagePoints2D(SequentialProcessor): + def __init__(self, output_shape): + super(RGBMaskToImagePoints2D, self).__init__() + self.add(GetNonZeroArguments()) + self.add(ArgumentsToImagePoints2D()) + # self.add(NormalizePoints2D(output_shape)) + + +class SolveChangingObjectPnP(SequentialProcessor): + def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100): + super(SolveChangingObjectPnP, self).__init__() + self.MINIMUM_REQUIRED_POINTS = 4 + self.add(SolveChangingObjectPnPRANSAC( + camera_intrinsics, inlier_thresh, num_iterations)) + + +class Pix2Points(pr.Processor): + def __init__(self, model, object_sizes, epsilon=0.15, + resize=True, draw=True): + self.object_sizes = object_sizes + self.predict_RGBMask = PredictRGBMask(model, epsilon) + self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) + self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) + self.resize, self.draw = resize, draw + self.wrap = pr.WrapOutput( + ['image', 'points2D', 'points3D', 'RGB_mask']) + + def call(self, image): + RGB_mask = self.predict_RGBMask(image) + H, W, num_channels = image.shape + if self.resize: + RGB_mask = resize_image(RGB_mask, (W, H)) + points3D = self.mask_to_points3D(RGB_mask) + points2D = self.mask_to_points2D(RGB_mask) + points2D = normalize_points2D(points2D, W, H) + if self.draw: + image = draw_mask(image, points2D, points3D, self.object_sizes) + return self.wrap(image, points2D, points3D, RGB_mask) + + +class Pix2Pose(pr.Processor): + def __init__(self, model, object_sizes, camera, epsilon=0.15, + class_name=None, resize=True, draw=True): + self.pix2points = Pix2Points( + model, object_sizes, epsilon, resize, False) + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.MIN_REQUIRED_POINTS = self.predict_pose.MINIMUM_REQUIRED_POINTS + self.class_name = str(class_name) if class_name is None else class_name + self.object_sizes = object_sizes + self.cube_points3D = build_cube_points3D(*self.object_sizes) + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.camera = camera + self.draw = draw + + def call(self, image, box2D=None): + results = self.pix2points(image) + points2D, points3D = results['points2D'], results['points3D'] + H, W, num_channels = image.shape + points2D = denormalize_points2D(points2D, H, W) + if box2D is not None: + points2D = self.change_coordinates(points2D, box2D) + + valid_num_points = len(points3D) > self.MIN_REQUIRED_POINTS + if valid_num_points: + success, rotation, translation = self.predict_pose(points3D, + points2D) + if success and valid_num_points: + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, self.class_name) + else: + pose6D = None + + if self.draw: + image = draw_mask(image, points2D, points3D, self.object_sizes) + image = draw_pose6D(image, pose6D, self.cube_points3D, + self.camera.intrinsics) + results['pose6D'], results['image'] = pose6D, image + return results + + +class EstimatePoseMasks(Processor): + def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): + """Pose estimation pipeline using keypoints. + """ + super(EstimatePoseMasks, self).__init__() + self.detect = detect + self.estimate_keypoints = estimate_keypoints + self.camera = camera + self.draw = draw + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + pr.FilterClassBoxes2D(['035_power_drill']), + # pr.FilterClassBoxes2D(['solar_panel']), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.unwrap = UnwrapDictionary(['points2D', 'points3D']) + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + # self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel']) + self.object_sizes = self.estimate_keypoints.object_sizes + self.cube_points3D = build_cube_points3D(*self.object_sizes) + # affine_matrix = build_rotation_matrix_z(3.14156 / 6) + # self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T + # 25000, + # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0]) + + def call(self, image): + from paz.abstract.messages import Box2D + detections = self.detect(image) + # detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]} + boxes2D = self.postprocess_boxes(detections) + # boxes2D = self.postprocess_boxes(self.detect(image)) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, points = [], [] + for crop, box2D in zip(cropped_images, boxes2D): + points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) + points2D = denormalize_points2D(points2D, *crop.shape[0:2]) + points2D = self.change_coordinates(points2D, box2D) + if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: + continue + success, rotation, translation = self.predict_pose( + points3D, points2D) + if success is False: + continue + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, box2D.class_name) + poses6D.append(pose6D), points.append([points2D, points3D]) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + image = draw_masks(image, points, self.object_sizes) + image = draw_poses6D(image, poses6D, self.cube_points3D, + self.camera.intrinsics) + return self.wrap(image, boxes2D, poses6D) + + + +class Pix2Pose2(pr.Processor): + def __init__(self, model, object_sizes, epsilon=0.15, + class_name=None, with_resize=True, draw=True): + self.object_sizes = object_sizes + self.predict_RGBMask = PredictRGBMask(model, epsilon) + self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) + self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.wrap = pr.WrapOutput(['image', 'points3D', 'points2D', 'RGB_mask']) + self.with_resize = with_resize + self.class_name = str(class_name) if class_name is None else class_name + self.draw = draw + + def call(self, image): + RGB_mask = self.predict_RGBMask(image) + H, W, num_channels = image.shape + if self.with_resize: + RGB_mask = resize_image(RGB_mask, (W, H)) + points3D = self.mask_to_points3D(RGB_mask) + points2D = self.mask_to_points2D(RGB_mask) + points2D = normalize_points2D(points2D, (W, H)) + if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: + pose6D = None + success, rotation, translation = self.predict_pose(points3D, points2D) + if success is False: + pose6D = None + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, self.class_name) + if self.draw: + image = draw_mask(image, points2D, points3D, self.object_sizes) + image = draw_pose6D(image, pose6D, self.cube_points3D, self.camera.intrinsics) + return self.wrap(image, points3D, points2D, RGB_mask) + + +class EstimatePoseMasks(Processor): + def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): + """Pose estimation pipeline using keypoints. + """ + super(EstimatePoseMasks, self).__init__() + self.detect = detect + self.estimate_keypoints = estimate_keypoints + self.camera = camera + self.draw = draw + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + # pr.FilterClassBoxes2D(['035_power_drill']), + pr.FilterClassBoxes2D(['solar_panel']), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.unwrap = UnwrapDictionary(['points2D', 'points3D']) + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + # self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel']) + self.object_sizes = self.estimate_keypoints.object_sizes + from backend import build_rotation_matrix_z + import numpy as np + self.cube_points3D = build_cube_points3D(*self.object_sizes) + affine_matrix = build_rotation_matrix_z(3.14156 / 6) + self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T + # 25000, + # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0]) + + def call(self, image): + from paz.abstract.messages import Box2D + detections = self.detect(image) + detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]} + boxes2D = self.postprocess_boxes(detections) + # boxes2D = self.postprocess_boxes(self.detect(image)) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, points = [], [] + for crop, box2D in zip(cropped_images, boxes2D): + points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) + points2D = denormalize_points2D(points2D, *crop.shape[0:2]) + points2D = self.change_coordinates(points2D, box2D) + if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: + continue + success, rotation, translation = self.predict_pose( + points3D, points2D) + if success is False: + continue + print('ROTATION', rotation.shape) + quaternion = rotation_vector_to_quaternion(rotation) + print('QUATERNION', quaternion.shape) + pose6D = Pose6D(quaternion, translation, box2D.class_name) + poses6D.append(pose6D), points.append([points2D, points3D]) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + image = draw_masks(image, points, self.object_sizes) + image = draw_poses6D(image, poses6D, self.cube_points3D, + self.camera.intrinsics) + return self.wrap(image, boxes2D, poses6D) From 3cbba0d927ff77b5735c71889df0429d38dbae67 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 30 Nov 2021 16:38:01 +0100 Subject: [PATCH 071/101] Refactor pipelines to work independent from detector --- examples/pix2pose/pipelines3.py | 120 ++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 examples/pix2pose/pipelines3.py diff --git a/examples/pix2pose/pipelines3.py b/examples/pix2pose/pipelines3.py new file mode 100644 index 000000000..0fe383eb7 --- /dev/null +++ b/examples/pix2pose/pipelines3.py @@ -0,0 +1,120 @@ +from paz.abstract import SequentialProcessor, Processor +from paz.pipelines import RandomizeRenderedImage as RandomizeRender +from paz.abstract.messages import Pose6D +from paz import processors as pr +from processors import ( + GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, + ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC, + ReplaceLowerThanThreshold) +from backend import build_cube_points3D +from processors import UnwrapDictionary +from processors import NormalizePoints2D +from backend import denormalize_points2D +from backend import draw_poses6D +from backend import draw_masks +from backend import draw_mask +from backend import normalize_points2D +from backend import draw_pose6D +from paz.backend.quaternion import rotation_vector_to_quaternion +from paz.backend.image import resize_image, show_image +from pipelines import SolveChangingObjectPnP +from pipelines import RGBMaskToImagePoints2D, RGBMaskToObjectPoints3D, PredictRGBMask + + +class Pix2Points(pr.Processor): + def __init__(self, model, object_sizes, epsilon=0.15, resize=True): + self.object_sizes = object_sizes + self.predict_RGBMask = PredictRGBMask(model, epsilon) + self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) + self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) + self.resize = resize + self.wrap = pr.WrapOutput(['points2D', 'points3D', 'RGB_mask']) + + def call(self, image): + RGB_mask = self.predict_RGBMask(image) + H, W, num_channels = image.shape + if self.resize: + RGB_mask = resize_image(RGB_mask, (W, H)) + points3D = self.mask_to_points3D(RGB_mask) + points2D = self.mask_to_points2D(RGB_mask) + points2D = normalize_points2D(points2D, H, W) + return self.wrap(points2D, points3D, RGB_mask) + + +class Pix2Pose(pr.Processor): + def __init__(self, model, object_sizes, camera, + epsilon=0.15, class_name=None, draw=True): + + self.pix2points = Pix2Points(model, object_sizes, epsilon, True) + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.class_name = str(class_name) if class_name is None else class_name + self.object_sizes = object_sizes + self.cube_points3D = build_cube_points3D(*self.object_sizes) + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.camera = camera + self.draw = draw + + def call(self, image, box2D=None): + results = self.pix2points(image) + points2D, points3D = results['points2D'], results['points3D'] + H, W, num_channels = image.shape + points2D = denormalize_points2D(points2D, H, W) + if box2D is not None: + points2D = self.change_coordinates(points2D, box2D) + self.class_name = box2D.class_name + + min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS + if min_num_points: + pose_results = self.predict_pose(points3D, points2D) + success, rotation, translation = pose_results + if success and min_num_points: + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, self.class_name) + else: + pose6D = None + if self.draw: + topic = 'image_crop' if box2D is not None else 'image' + image = draw_mask(image, points2D, points3D, self.object_sizes) + image = draw_pose6D(image, pose6D, self.cube_points3D, + self.camera.intrinsics) + results[topic] = image + results['points2D'], results['pose6D'] = points2D, pose6D + return results + + +class EstimatePoseMasks(Processor): + def __init__(self, detect, estimate_pose, offsets, draw=True): + """Pose estimation pipeline using keypoints. + """ + super(EstimatePoseMasks, self).__init__() + self.detect = detect + self.estimate_pose = estimate_pose + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + pr.FilterClassBoxes2D(['035_power_drill']), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D']) + self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + self.object_sizes = self.estimate_pose.object_sizes + self.cube_points3D = build_cube_points3D(*self.object_sizes) + self.draw = draw + + def call(self, image): + boxes2D = self.postprocess_boxes(self.detect(image)) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, points = [], [] + for crop, box2D in zip(cropped_images, boxes2D): + results = self.estimate_pose(crop, box2D) + pose6D, points2D, points3D = self.unwrap(results) + poses6D.append(pose6D), points.append([points2D, points3D]) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + image = draw_masks(image, points, self.object_sizes) + image = draw_poses6D(image, poses6D, self.cube_points3D, + self.estimate_pose.camera.intrinsics) + return self.wrap(image, boxes2D, poses6D) From 0a8b1f3bbe7ec6e36ef0b69ea1a5861d36e2a5f9 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 30 Nov 2021 17:21:26 +0100 Subject: [PATCH 072/101] Refactor demo to work with new pipelines --- examples/pix2pose/backend.py | 6 +++--- examples/pix2pose/demo.py | 37 ++++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index bbc3fcdb2..e19f9e948 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -283,9 +283,9 @@ def draw_points2D(image, points2D, colors): # Returns Array with drawn points. """ - keypoints = points2D.astype(int) - U = keypoints[:, 0] - V = keypoints[:, 1] + points2D = points2D.astype(int) + U = points2D[:, 0] + V = points2D[:, 1] image[V, U, :] = colors return image diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 00aec2636..a76dc1af8 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -2,26 +2,29 @@ from paz.models import UNET_VGG16 from paz.backend.image import show_image, load_image from paz.backend.camera import Camera -from pipelines import Pix2Pose -from pipelines import EstimatePoseMasks from paz.backend.camera import VideoPlayer from paz.applications import SSD300FAT +# from pipelines import Pix2Pose +# from pipelines import EstimatePoseMasks +from pipelines3 import Pix2Pose +from pipelines3 import EstimatePoseMasks + image_shape = (128, 128, 3) num_classes = 3 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) -model.load_weights('UNET_weights_epochs-10_beta-3.hdf5') +model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5') # approximating intrinsic camera parameters camera = Camera(device_id=0) -camera.start() -image_size = camera.read().shape[0:2] -camera.stop() +# camera.start() +# image_size = camera.read().shape[0:2] +# camera.stop() # image = load_image('test_image2.jpg') -image = load_image('test_image.jpg') +image = load_image('images/test_image.jpg') image_size = image.shape[0:2] focal_length = image_size[1] image_center = (image_size[1] / 2.0, image_size[0] / 2.0) @@ -29,17 +32,31 @@ camera.intrinsics = np.array([[focal_length, 0, image_center[0]], [0, focal_length, image_center[1]], [0, 0, 1]]) -object_sizes = np.array([0.184, 0.187, 0.052]) +# object_sizes = np.array([0.184, 0.187, 0.052]) epsilon = 0.001 score_thresh = 0.50 detect = SSD300FAT(score_thresh, draw=False) offsets = [0.2, 0.2] -estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True) -pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets) +# estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True) +# pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets) + +object_sizes = np.array([1840, 1870, 520]) +# object_sizes = np.array([0.184, 0.187, 0.052]) +estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False) +# image = image[50:320, 60:320] +# show_image(estimate_pose(image)['image']) +pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True) +results = pipeline(image) +predicted_image = results['image'] +show_image(predicted_image) + +""" +estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False) results = pipeline(image) predicted_image = results['image'] show_image(predicted_image) +""" # image_size = (640, 480) # player = VideoPlayer(image_size, pipeline, camera) From 63bc57cf881cf128bc112b98608947c426cd55d8 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 1 Dec 2021 08:19:38 +0100 Subject: [PATCH 073/101] Found bug with mask drawing when box2D is given --- examples/pix2pose/pipelines3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/pix2pose/pipelines3.py b/examples/pix2pose/pipelines3.py index 0fe383eb7..8c70fca96 100644 --- a/examples/pix2pose/pipelines3.py +++ b/examples/pix2pose/pipelines3.py @@ -72,7 +72,8 @@ def call(self, image, box2D=None): pose6D = Pose6D(quaternion, translation, self.class_name) else: pose6D = None - if self.draw: + # change_coordinates puts points2D outside image. + if (self.draw and (box2D is None)): topic = 'image_crop' if box2D is not None else 'image' image = draw_mask(image, points2D, points3D, self.object_sizes) image = draw_pose6D(image, pose6D, self.cube_points3D, From a54c3c19c4fef53fbc3f6de8c13b257d57239224 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 1 Dec 2021 08:21:13 +0100 Subject: [PATCH 074/101] Refactor pipelines for better modularity --- examples/pix2pose/demo.py | 15 +- examples/pix2pose/pipelines.py | 101 ++++++----- examples/pix2pose/pipelines2.py | 294 -------------------------------- examples/pix2pose/pipelines3.py | 121 ------------- 4 files changed, 65 insertions(+), 466 deletions(-) delete mode 100644 examples/pix2pose/pipelines2.py delete mode 100644 examples/pix2pose/pipelines3.py diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index a76dc1af8..5ab9e6416 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -7,8 +7,8 @@ # from pipelines import Pix2Pose # from pipelines import EstimatePoseMasks -from pipelines3 import Pix2Pose -from pipelines3 import EstimatePoseMasks +from pipelines import Pix2Pose +from pipelines import EstimatePoseMasks image_shape = (128, 128, 3) @@ -41,8 +41,7 @@ # pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets) object_sizes = np.array([1840, 1870, 520]) -# object_sizes = np.array([0.184, 0.187, 0.052]) -estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False) +estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True) # image = image[50:320, 60:320] # show_image(estimate_pose(image)['image']) pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True) @@ -50,14 +49,6 @@ predicted_image = results['image'] show_image(predicted_image) -""" -estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False) - -results = pipeline(image) -predicted_image = results['image'] -show_image(predicted_image) -""" - # image_size = (640, 480) # player = VideoPlayer(image_size, pipeline, camera) # player.run() diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index e8f1e4a9d..7383e3571 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -8,13 +8,14 @@ ReplaceLowerThanThreshold) from backend import build_cube_points3D from processors import UnwrapDictionary -from processors import NormalizePoints2D from backend import denormalize_points2D from backend import draw_poses6D +from backend import draw_pose6D from backend import draw_masks +from backend import draw_mask from backend import normalize_points2D from paz.backend.quaternion import rotation_vector_to_quaternion -from paz.backend.image import resize_image, show_image +from paz.backend.image import resize_image class DomainRandomization(SequentialProcessor): @@ -27,12 +28,7 @@ def __init__(self, renderer, image_shape, image_paths, inputs_to_shape, self.add(pr.Render(renderer)) self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0])) self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) - # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1])) self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) - """ - self.add(pr.SequenceWrapper({0: {'input_1': [H, W, 3]}}, - {1: {'masks': [H, W, 4]}})) - """ self.add(pr.SequenceWrapper({0: inputs_to_shape}, {1: labels_to_shape})) @@ -63,45 +59,84 @@ def __init__(self, output_shape): super(RGBMaskToImagePoints2D, self).__init__() self.add(GetNonZeroArguments()) self.add(ArgumentsToImagePoints2D()) - # self.add(NormalizePoints2D(output_shape)) class SolveChangingObjectPnP(SequentialProcessor): def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100): super(SolveChangingObjectPnP, self).__init__() - self.MINIMUM_REQUIRED_POINTS = 4 + self.MIN_REQUIRED_POINTS = 4 self.add(SolveChangingObjectPnPRANSAC( camera_intrinsics, inlier_thresh, num_iterations)) -class Pix2Pose(pr.Processor): - def __init__(self, model, object_sizes, epsilon=0.15, with_resize=True): +class Pix2Points(pr.Processor): + def __init__(self, model, object_sizes, epsilon=0.15, resize=True): self.object_sizes = object_sizes self.predict_RGBMask = PredictRGBMask(model, epsilon) self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) - self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask']) - self.with_resize = with_resize + self.resize = resize + self.wrap = pr.WrapOutput(['points2D', 'points3D', 'RGB_mask']) def call(self, image): RGB_mask = self.predict_RGBMask(image) - if self.with_resize: - RGB_mask = resize_image(RGB_mask, image.shape[:2][::-1]) + H, W, num_channels = image.shape + if self.resize: + RGB_mask = resize_image(RGB_mask, (W, H)) points3D = self.mask_to_points3D(RGB_mask) points2D = self.mask_to_points2D(RGB_mask) - points2D = normalize_points2D(points2D, *image.shape[:2][::-1]) - return self.wrap(points3D, points2D, RGB_mask) + points2D = normalize_points2D(points2D, H, W) + return self.wrap(points2D, points3D, RGB_mask) + + +class Pix2Pose(pr.Processor): + def __init__(self, model, object_sizes, camera, + epsilon=0.15, class_name=None, draw=True): + self.pix2points = Pix2Points(model, object_sizes, epsilon, True) + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.class_name = str(class_name) if class_name is None else class_name + self.object_sizes = object_sizes + self.cube_points3D = build_cube_points3D(*self.object_sizes) + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.camera = camera + self.draw = draw + + def call(self, image, box2D=None): + results = self.pix2points(image) + points2D, points3D = results['points2D'], results['points3D'] + H, W, num_channels = image.shape + points2D = denormalize_points2D(points2D, H, W) + if box2D is not None: + points2D = self.change_coordinates(points2D, box2D) + self.class_name = box2D.class_name + + min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS + if min_num_points: + pose_results = self.predict_pose(points3D, points2D) + success, rotation, translation = pose_results + if success and min_num_points: + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, self.class_name) + else: + pose6D = None + # change_coordinates puts points2D outside image. + if (self.draw and (box2D is None)): + topic = 'image_crop' if box2D is not None else 'image' + image = draw_mask(image, points2D, points3D, self.object_sizes) + image = draw_pose6D(image, pose6D, self.cube_points3D, + self.camera.intrinsics) + results[topic] = image + results['points2D'], results['pose6D'] = points2D, pose6D + return results class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): + def __init__(self, detect, estimate_pose, offsets, draw=True): """Pose estimation pipeline using keypoints. """ super(EstimatePoseMasks, self).__init__() self.detect = detect - self.estimate_keypoints = estimate_keypoints - self.camera = camera - self.draw = draw + self.estimate_pose = estimate_pose self.postprocess_boxes = SequentialProcessor( [pr.UnpackDictionary(['boxes2D']), pr.FilterClassBoxes2D(['035_power_drill']), @@ -109,13 +144,12 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): pr.OffsetBoxes2D(offsets)]) self.clip = pr.ClipBoxes2D() self.crop = pr.CropBoxes2D() - self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.unwrap = UnwrapDictionary(['points2D', 'points3D']) self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D']) self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.object_sizes = self.estimate_keypoints.object_sizes + self.object_sizes = self.estimate_pose.object_sizes self.cube_points3D = build_cube_points3D(*self.object_sizes) + self.draw = draw def call(self, image): boxes2D = self.postprocess_boxes(self.detect(image)) @@ -123,23 +157,12 @@ def call(self, image): cropped_images = self.crop(image, boxes2D) poses6D, points = [], [] for crop, box2D in zip(cropped_images, boxes2D): - points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) - points2D = denormalize_points2D(points2D, *crop.shape[0:2]) - points2D = self.change_coordinates(points2D, box2D) - if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: - continue - success, rotation, translation = self.predict_pose( - points3D, points2D) - if success is False: - continue - print('ROTATION', rotation.shape) - quaternion = rotation_vector_to_quaternion(rotation) - print('QUATERNION', quaternion.shape) - pose6D = Pose6D(quaternion, translation, box2D.class_name) + results = self.estimate_pose(crop, box2D) + pose6D, points2D, points3D = self.unwrap(results) poses6D.append(pose6D), points.append([points2D, points3D]) if self.draw: image = self.draw_boxes2D(image, boxes2D) image = draw_masks(image, points, self.object_sizes) image = draw_poses6D(image, poses6D, self.cube_points3D, - self.camera.intrinsics) + self.estimate_pose.camera.intrinsics) return self.wrap(image, boxes2D, poses6D) diff --git a/examples/pix2pose/pipelines2.py b/examples/pix2pose/pipelines2.py deleted file mode 100644 index 06357bc17..000000000 --- a/examples/pix2pose/pipelines2.py +++ /dev/null @@ -1,294 +0,0 @@ -from paz.abstract import SequentialProcessor, Processor -from paz.pipelines import RandomizeRenderedImage as RandomizeRender -from paz.abstract.messages import Pose6D -from paz import processors as pr -from processors import ( - GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, - ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC, - ReplaceLowerThanThreshold) -from backend import build_cube_points3D -from processors import UnwrapDictionary -from processors import NormalizePoints2D -from backend import denormalize_points2D -from backend import draw_poses6D -from backend import draw_masks -from backend import draw_mask -from backend import draw_pose6D -from backend import normalize_points2D -from paz.backend.quaternion import rotation_vector_to_quaternion -from paz.backend.image import resize_image, show_image - - -class DomainRandomization(SequentialProcessor): - """Performs domain randomization on a rendered image - """ - def __init__(self, renderer, image_shape, image_paths, inputs_to_shape, - labels_to_shape, num_occlusions=1): - super(DomainRandomization, self).__init__() - H, W = image_shape[:2] - self.add(pr.Render(renderer)) - self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0])) - self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0])) - self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1])) - self.add(pr.SequenceWrapper({0: inputs_to_shape}, - {1: labels_to_shape})) - - -class PredictRGBMask(SequentialProcessor): - def __init__(self, model, epsilon=0.15): - super(PredictRGBMask, self).__init__() - self.add(pr.ResizeImage(model.input_shape[1:3])) - self.add(pr.NormalizeImage()) - self.add(pr.ExpandDims(0)) - self.add(pr.Predict(model)) - self.add(pr.Squeeze(0)) - self.add(ReplaceLowerThanThreshold(epsilon)) - self.add(pr.DenormalizeImage()) - self.add(pr.CastImage('uint8')) - - -class RGBMaskToObjectPoints3D(SequentialProcessor): - def __init__(self, object_sizes): - super(RGBMaskToObjectPoints3D, self).__init__() - self.add(GetNonZeroValues()) - self.add(ImageToNormalizedDeviceCoordinates()) - self.add(Scale(object_sizes / 2.0)) - - -class RGBMaskToImagePoints2D(SequentialProcessor): - def __init__(self, output_shape): - super(RGBMaskToImagePoints2D, self).__init__() - self.add(GetNonZeroArguments()) - self.add(ArgumentsToImagePoints2D()) - # self.add(NormalizePoints2D(output_shape)) - - -class SolveChangingObjectPnP(SequentialProcessor): - def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100): - super(SolveChangingObjectPnP, self).__init__() - self.MINIMUM_REQUIRED_POINTS = 4 - self.add(SolveChangingObjectPnPRANSAC( - camera_intrinsics, inlier_thresh, num_iterations)) - - -class Pix2Points(pr.Processor): - def __init__(self, model, object_sizes, epsilon=0.15, - resize=True, draw=True): - self.object_sizes = object_sizes - self.predict_RGBMask = PredictRGBMask(model, epsilon) - self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) - self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) - self.resize, self.draw = resize, draw - self.wrap = pr.WrapOutput( - ['image', 'points2D', 'points3D', 'RGB_mask']) - - def call(self, image): - RGB_mask = self.predict_RGBMask(image) - H, W, num_channels = image.shape - if self.resize: - RGB_mask = resize_image(RGB_mask, (W, H)) - points3D = self.mask_to_points3D(RGB_mask) - points2D = self.mask_to_points2D(RGB_mask) - points2D = normalize_points2D(points2D, W, H) - if self.draw: - image = draw_mask(image, points2D, points3D, self.object_sizes) - return self.wrap(image, points2D, points3D, RGB_mask) - - -class Pix2Pose(pr.Processor): - def __init__(self, model, object_sizes, camera, epsilon=0.15, - class_name=None, resize=True, draw=True): - self.pix2points = Pix2Points( - model, object_sizes, epsilon, resize, False) - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.MIN_REQUIRED_POINTS = self.predict_pose.MINIMUM_REQUIRED_POINTS - self.class_name = str(class_name) if class_name is None else class_name - self.object_sizes = object_sizes - self.cube_points3D = build_cube_points3D(*self.object_sizes) - self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() - self.camera = camera - self.draw = draw - - def call(self, image, box2D=None): - results = self.pix2points(image) - points2D, points3D = results['points2D'], results['points3D'] - H, W, num_channels = image.shape - points2D = denormalize_points2D(points2D, H, W) - if box2D is not None: - points2D = self.change_coordinates(points2D, box2D) - - valid_num_points = len(points3D) > self.MIN_REQUIRED_POINTS - if valid_num_points: - success, rotation, translation = self.predict_pose(points3D, - points2D) - if success and valid_num_points: - quaternion = rotation_vector_to_quaternion(rotation) - pose6D = Pose6D(quaternion, translation, self.class_name) - else: - pose6D = None - - if self.draw: - image = draw_mask(image, points2D, points3D, self.object_sizes) - image = draw_pose6D(image, pose6D, self.cube_points3D, - self.camera.intrinsics) - results['pose6D'], results['image'] = pose6D, image - return results - - -class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): - """Pose estimation pipeline using keypoints. - """ - super(EstimatePoseMasks, self).__init__() - self.detect = detect - self.estimate_keypoints = estimate_keypoints - self.camera = camera - self.draw = draw - self.postprocess_boxes = SequentialProcessor( - [pr.UnpackDictionary(['boxes2D']), - pr.FilterClassBoxes2D(['035_power_drill']), - # pr.FilterClassBoxes2D(['solar_panel']), - pr.SquareBoxes2D(), - pr.OffsetBoxes2D(offsets)]) - self.clip = pr.ClipBoxes2D() - self.crop = pr.CropBoxes2D() - self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.unwrap = UnwrapDictionary(['points2D', 'points3D']) - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) - self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - # self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel']) - self.object_sizes = self.estimate_keypoints.object_sizes - self.cube_points3D = build_cube_points3D(*self.object_sizes) - # affine_matrix = build_rotation_matrix_z(3.14156 / 6) - # self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T - # 25000, - # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0]) - - def call(self, image): - from paz.abstract.messages import Box2D - detections = self.detect(image) - # detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]} - boxes2D = self.postprocess_boxes(detections) - # boxes2D = self.postprocess_boxes(self.detect(image)) - boxes2D = self.clip(image, boxes2D) - cropped_images = self.crop(image, boxes2D) - poses6D, points = [], [] - for crop, box2D in zip(cropped_images, boxes2D): - points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) - points2D = denormalize_points2D(points2D, *crop.shape[0:2]) - points2D = self.change_coordinates(points2D, box2D) - if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: - continue - success, rotation, translation = self.predict_pose( - points3D, points2D) - if success is False: - continue - quaternion = rotation_vector_to_quaternion(rotation) - pose6D = Pose6D(quaternion, translation, box2D.class_name) - poses6D.append(pose6D), points.append([points2D, points3D]) - if self.draw: - image = self.draw_boxes2D(image, boxes2D) - image = draw_masks(image, points, self.object_sizes) - image = draw_poses6D(image, poses6D, self.cube_points3D, - self.camera.intrinsics) - return self.wrap(image, boxes2D, poses6D) - - - -class Pix2Pose2(pr.Processor): - def __init__(self, model, object_sizes, epsilon=0.15, - class_name=None, with_resize=True, draw=True): - self.object_sizes = object_sizes - self.predict_RGBMask = PredictRGBMask(model, epsilon) - self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) - self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.wrap = pr.WrapOutput(['image', 'points3D', 'points2D', 'RGB_mask']) - self.with_resize = with_resize - self.class_name = str(class_name) if class_name is None else class_name - self.draw = draw - - def call(self, image): - RGB_mask = self.predict_RGBMask(image) - H, W, num_channels = image.shape - if self.with_resize: - RGB_mask = resize_image(RGB_mask, (W, H)) - points3D = self.mask_to_points3D(RGB_mask) - points2D = self.mask_to_points2D(RGB_mask) - points2D = normalize_points2D(points2D, (W, H)) - if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: - pose6D = None - success, rotation, translation = self.predict_pose(points3D, points2D) - if success is False: - pose6D = None - quaternion = rotation_vector_to_quaternion(rotation) - pose6D = Pose6D(quaternion, translation, self.class_name) - if self.draw: - image = draw_mask(image, points2D, points3D, self.object_sizes) - image = draw_pose6D(image, pose6D, self.cube_points3D, self.camera.intrinsics) - return self.wrap(image, points3D, points2D, RGB_mask) - - -class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True): - """Pose estimation pipeline using keypoints. - """ - super(EstimatePoseMasks, self).__init__() - self.detect = detect - self.estimate_keypoints = estimate_keypoints - self.camera = camera - self.draw = draw - self.postprocess_boxes = SequentialProcessor( - [pr.UnpackDictionary(['boxes2D']), - # pr.FilterClassBoxes2D(['035_power_drill']), - pr.FilterClassBoxes2D(['solar_panel']), - pr.SquareBoxes2D(), - pr.OffsetBoxes2D(offsets)]) - self.clip = pr.ClipBoxes2D() - self.crop = pr.CropBoxes2D() - self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.unwrap = UnwrapDictionary(['points2D', 'points3D']) - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) - # self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel']) - self.object_sizes = self.estimate_keypoints.object_sizes - from backend import build_rotation_matrix_z - import numpy as np - self.cube_points3D = build_cube_points3D(*self.object_sizes) - affine_matrix = build_rotation_matrix_z(3.14156 / 6) - self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T - # 25000, - # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0]) - - def call(self, image): - from paz.abstract.messages import Box2D - detections = self.detect(image) - detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]} - boxes2D = self.postprocess_boxes(detections) - # boxes2D = self.postprocess_boxes(self.detect(image)) - boxes2D = self.clip(image, boxes2D) - cropped_images = self.crop(image, boxes2D) - poses6D, points = [], [] - for crop, box2D in zip(cropped_images, boxes2D): - points2D, points3D = self.unwrap(self.estimate_keypoints(crop)) - points2D = denormalize_points2D(points2D, *crop.shape[0:2]) - points2D = self.change_coordinates(points2D, box2D) - if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS: - continue - success, rotation, translation = self.predict_pose( - points3D, points2D) - if success is False: - continue - print('ROTATION', rotation.shape) - quaternion = rotation_vector_to_quaternion(rotation) - print('QUATERNION', quaternion.shape) - pose6D = Pose6D(quaternion, translation, box2D.class_name) - poses6D.append(pose6D), points.append([points2D, points3D]) - if self.draw: - image = self.draw_boxes2D(image, boxes2D) - image = draw_masks(image, points, self.object_sizes) - image = draw_poses6D(image, poses6D, self.cube_points3D, - self.camera.intrinsics) - return self.wrap(image, boxes2D, poses6D) diff --git a/examples/pix2pose/pipelines3.py b/examples/pix2pose/pipelines3.py deleted file mode 100644 index 8c70fca96..000000000 --- a/examples/pix2pose/pipelines3.py +++ /dev/null @@ -1,121 +0,0 @@ -from paz.abstract import SequentialProcessor, Processor -from paz.pipelines import RandomizeRenderedImage as RandomizeRender -from paz.abstract.messages import Pose6D -from paz import processors as pr -from processors import ( - GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, - ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC, - ReplaceLowerThanThreshold) -from backend import build_cube_points3D -from processors import UnwrapDictionary -from processors import NormalizePoints2D -from backend import denormalize_points2D -from backend import draw_poses6D -from backend import draw_masks -from backend import draw_mask -from backend import normalize_points2D -from backend import draw_pose6D -from paz.backend.quaternion import rotation_vector_to_quaternion -from paz.backend.image import resize_image, show_image -from pipelines import SolveChangingObjectPnP -from pipelines import RGBMaskToImagePoints2D, RGBMaskToObjectPoints3D, PredictRGBMask - - -class Pix2Points(pr.Processor): - def __init__(self, model, object_sizes, epsilon=0.15, resize=True): - self.object_sizes = object_sizes - self.predict_RGBMask = PredictRGBMask(model, epsilon) - self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) - self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3]) - self.resize = resize - self.wrap = pr.WrapOutput(['points2D', 'points3D', 'RGB_mask']) - - def call(self, image): - RGB_mask = self.predict_RGBMask(image) - H, W, num_channels = image.shape - if self.resize: - RGB_mask = resize_image(RGB_mask, (W, H)) - points3D = self.mask_to_points3D(RGB_mask) - points2D = self.mask_to_points2D(RGB_mask) - points2D = normalize_points2D(points2D, H, W) - return self.wrap(points2D, points3D, RGB_mask) - - -class Pix2Pose(pr.Processor): - def __init__(self, model, object_sizes, camera, - epsilon=0.15, class_name=None, draw=True): - - self.pix2points = Pix2Points(model, object_sizes, epsilon, True) - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.class_name = str(class_name) if class_name is None else class_name - self.object_sizes = object_sizes - self.cube_points3D = build_cube_points3D(*self.object_sizes) - self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() - self.camera = camera - self.draw = draw - - def call(self, image, box2D=None): - results = self.pix2points(image) - points2D, points3D = results['points2D'], results['points3D'] - H, W, num_channels = image.shape - points2D = denormalize_points2D(points2D, H, W) - if box2D is not None: - points2D = self.change_coordinates(points2D, box2D) - self.class_name = box2D.class_name - - min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS - if min_num_points: - pose_results = self.predict_pose(points3D, points2D) - success, rotation, translation = pose_results - if success and min_num_points: - quaternion = rotation_vector_to_quaternion(rotation) - pose6D = Pose6D(quaternion, translation, self.class_name) - else: - pose6D = None - # change_coordinates puts points2D outside image. - if (self.draw and (box2D is None)): - topic = 'image_crop' if box2D is not None else 'image' - image = draw_mask(image, points2D, points3D, self.object_sizes) - image = draw_pose6D(image, pose6D, self.cube_points3D, - self.camera.intrinsics) - results[topic] = image - results['points2D'], results['pose6D'] = points2D, pose6D - return results - - -class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_pose, offsets, draw=True): - """Pose estimation pipeline using keypoints. - """ - super(EstimatePoseMasks, self).__init__() - self.detect = detect - self.estimate_pose = estimate_pose - self.postprocess_boxes = SequentialProcessor( - [pr.UnpackDictionary(['boxes2D']), - pr.FilterClassBoxes2D(['035_power_drill']), - pr.SquareBoxes2D(), - pr.OffsetBoxes2D(offsets)]) - self.clip = pr.ClipBoxes2D() - self.crop = pr.CropBoxes2D() - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) - self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D']) - self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.object_sizes = self.estimate_pose.object_sizes - self.cube_points3D = build_cube_points3D(*self.object_sizes) - self.draw = draw - - def call(self, image): - boxes2D = self.postprocess_boxes(self.detect(image)) - boxes2D = self.clip(image, boxes2D) - cropped_images = self.crop(image, boxes2D) - poses6D, points = [], [] - for crop, box2D in zip(cropped_images, boxes2D): - results = self.estimate_pose(crop, box2D) - pose6D, points2D, points3D = self.unwrap(results) - poses6D.append(pose6D), points.append([points2D, points3D]) - if self.draw: - image = self.draw_boxes2D(image, boxes2D) - image = draw_masks(image, points, self.object_sizes) - image = draw_poses6D(image, poses6D, self.cube_points3D, - self.estimate_pose.camera.intrinsics) - return self.wrap(image, boxes2D, poses6D) From 0555b9436b35e4eee5602e83d32690c6629bdd53 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 2 Dec 2021 18:10:25 +0100 Subject: [PATCH 075/101] Add MultiPoseEstimation pipeline --- examples/pix2pose/demo_image.py | 59 ++++++++++++++++++ examples/pix2pose/pipelines.py | 104 +++++++++++++++++++++++++++++++- 2 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 examples/pix2pose/demo_image.py diff --git a/examples/pix2pose/demo_image.py b/examples/pix2pose/demo_image.py new file mode 100644 index 000000000..b6ec18aed --- /dev/null +++ b/examples/pix2pose/demo_image.py @@ -0,0 +1,59 @@ +import numpy as np +from paz.models import UNET_VGG16 +from paz.backend.image import show_image, load_image +from paz.backend.camera import Camera +from paz.pipelines import DetectSingleShot +from paz.models import SSD300 + +from pipelines import MultiPix2Pose + + +image_path = 'images/lab_condition.png' +epsilon = 0.001 +score_thresh = 0.50 +offsets = [0.2, 0.2] +nms_thresh = 0.45 + +image_shape = (128, 128, 3) +num_classes = 3 +camera = Camera(device_id=0) +image = load_image(image_path) +image_size = image.shape[0:2] +focal_length = image_size[1] +image_center = (image_size[1] / 2.0, image_size[0] / 2.0) +camera.distortion = np.zeros((4)) +camera.intrinsics = np.array([[focal_length, 0, image_center[0]], + [0, focal_length, image_center[1]], + [0, 0, 1]]) + +class_names = ['background', 'Large_clamp', 'flat_screwdriver', + 'hammer', 'Solar_panel', 'power_drill'] +detection = SSD300(len(class_names), head_weights=None) +detection.load_weights('weights/SSD300_weights_.53-1.40.hdf5') +detect = DetectSingleShot(detection, class_names, score_thresh, + nms_thresh, draw=False) + +name_to_sizes = { + 'power_drill': np.array([1840, 1870, 520]), + 'Solar_panel': np.array([15000, 15000, 2000]), + 'Large_clamp': np.array([12000, 17100, 3900]), + 'hammer': np.array([18210, 33272, 3280])} + + +name_to_weights = { + 'power_drill': 'weights/UNET_weights_epochs-10_beta-3.hdf5', + 'Solar_panel': 'weights/UNET-VGG_solar_panel_canonical_13.hdf5', + 'Large_clamp': 'weights/UNET-VGG_large_clamp_canonical_10.hdf5', + 'hammer': 'weights/UNET-VGG16_weights_hammer_10.hdf5'} + + +segment = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +valid_class_names = ['power_drill', 'Solar_panel', 'Large_clamp', 'hammer'] + +pipeline = MultiPix2Pose(detect, segment, camera, name_to_weights, + name_to_sizes, valid_class_names, offsets, + epsilon, draw=True) + +results = pipeline(image) +predicted_image = results['image'] +show_image(predicted_image) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 7383e3571..7ebe0d7a3 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -14,8 +14,11 @@ from backend import draw_masks from backend import draw_mask from backend import normalize_points2D +from backend import points3D_to_RGB +from backend import draw_points2D from paz.backend.quaternion import rotation_vector_to_quaternion from paz.backend.image import resize_image +import numpy as np class DomainRandomization(SequentialProcessor): @@ -71,6 +74,7 @@ def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100): class Pix2Points(pr.Processor): def __init__(self, model, object_sizes, epsilon=0.15, resize=True): + self.model = model self.object_sizes = object_sizes self.predict_RGBMask = PredictRGBMask(model, epsilon) self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes) @@ -92,6 +96,7 @@ def call(self, image): class Pix2Pose(pr.Processor): def __init__(self, model, object_sizes, camera, epsilon=0.15, class_name=None, draw=True): + self.model = model self.pix2points = Pix2Points(model, object_sizes, epsilon, True) self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) self.class_name = str(class_name) if class_name is None else class_name @@ -111,6 +116,7 @@ def call(self, image, box2D=None): self.class_name = box2D.class_name min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS + success = False if min_num_points: pose_results = self.predict_pose(points3D, points2D) success, rotation, translation = pose_results @@ -130,8 +136,102 @@ def call(self, image, box2D=None): return results +class MultiPix2Pose(Processor): + def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes, + valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True): + self.detect = detect + self.name_to_weights = name_to_weights + self.name_to_sizes = name_to_sizes + self.valid_class_names = valid_class_names + self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon) + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.camera = camera + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + pr.FilterClassBoxes2D(valid_class_names), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + self.draw = draw + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + self.name_to_cube_points3D = {} + self.mask_to_points2D = RGBMaskToImagePoints2D( + segment.output_shape[1:3]) + for name in self.name_to_sizes: + W, H, D = self.name_to_sizes[name] + cube_points3D = build_cube_points3D(W, H, D) + self.name_to_cube_points3D[name] = cube_points3D + + self.predict_RGBMask = PredictRGBMask(segment, epsilon) + + def call(self, image): + boxes2D = self.postprocess_boxes(self.detect(image)) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, points2D, points3D = [], [], [] + for crop, box2D in zip(cropped_images, boxes2D): + class_name = box2D.class_name + name_to_weights = self.name_to_weights[class_name] + self.pix2points.model.load_weights(name_to_weights) + object_sizes = self.name_to_sizes[class_name] + # self.pix2points.object_sizes = object_sizes + # points = self.pix2points(crop) + + RGB_mask = self.predict_RGBMask(crop) + H, W, num_channels = crop.shape + RGB_mask = resize_image(RGB_mask, (W, H)) + + self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes) + class_points3D = self.mask_to_points3D(RGB_mask) + class_points2D = self.mask_to_points2D(RGB_mask) + class_points2D = normalize_points2D(class_points2D, H, W) + + # from paz.backend.image import show_image + # show_image((points['RGB_mask'] * 255).astype('uint8')) + # class_points2D = points['points2D'] + # class_points3D = points['points3D'] + H, W, num_channels = crop.shape + class_points2D = denormalize_points2D(class_points2D, H, W) + class_points2D = self.change_coordinates(class_points2D, box2D) + print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS) + print(len(class_points3D), len(class_points2D)) + if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS: + pose_results = self.predict_pose(class_points3D, class_points2D) + success, rotation, translation = pose_results + print('solver success', success) + # success = True + else: + success = False + if success: + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, class_name) + else: + pose6D = None + print(success) + points2D.append(class_points2D) + points3D.append(class_points3D) + poses6D.append(pose6D) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D): + class_name = pose6D.class_name + object_sizes = self.name_to_sizes[class_name] + colors = points3D_to_RGB(class_points3D, object_sizes) + image = draw_points2D(image, class_points2D, colors) + + for pose6D in poses6D: + class_name = pose6D.class_name + cube_points3D = self.name_to_cube_points3D[class_name] + image = draw_pose6D(image, pose6D, cube_points3D, + self.camera.intrinsics) + return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D} + + class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_pose, offsets, draw=True): + def __init__(self, detect, estimate_pose, offsets, draw=True, valid_class_names=['035_power_drill']): """Pose estimation pipeline using keypoints. """ super(EstimatePoseMasks, self).__init__() @@ -139,7 +239,7 @@ def __init__(self, detect, estimate_pose, offsets, draw=True): self.estimate_pose = estimate_pose self.postprocess_boxes = SequentialProcessor( [pr.UnpackDictionary(['boxes2D']), - pr.FilterClassBoxes2D(['035_power_drill']), + pr.FilterClassBoxes2D(valid_class_names), pr.SquareBoxes2D(), pr.OffsetBoxes2D(offsets)]) self.clip = pr.ClipBoxes2D() From 424430b132b8b546212c830ee32c3f80aa355de1 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 2 Dec 2021 18:10:40 +0100 Subject: [PATCH 076/101] Add multi samples in demo --- examples/pix2pose/demo.py | 60 +++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index 5ab9e6416..c972f7165 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -15,7 +15,9 @@ num_classes = 3 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) -model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5') +# model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5') +# model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5') +# model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5') # approximating intrinsic camera parameters camera = Camera(device_id=0) @@ -24,7 +26,7 @@ # camera.stop() # image = load_image('test_image2.jpg') -image = load_image('images/test_image.jpg') +image = load_image('images/lab_condition.png') image_size = image.shape[0:2] focal_length = image_size[1] image_center = (image_size[1] / 2.0, image_size[0] / 2.0) @@ -40,14 +42,54 @@ # estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True) # pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets) -object_sizes = np.array([1840, 1870, 520]) + +object_sizes = np.array([1840, 1870, 520]) # power drill +object_sizes = np.array([15000, 15000, 2000]) # solar panel +object_sizes = np.array([15000, 15000, 2000]) # solar panel estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True) -# image = image[50:320, 60:320] -# show_image(estimate_pose(image)['image']) -pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True) -results = pipeline(image) -predicted_image = results['image'] -show_image(predicted_image) +# image = image[768:1324, 622:784] +# image = image[622:784, 768:1324] + + +# image_hammer = image[460:1030, 740:1340] +# model.load_weights('weights/UNET-VGG16_weights_hammer_10.hdf5') +# show_image(estimate_pose(image_hammer)['image']) + +# show_image(image) +image_clamp = image[670:1000, 1000:1400] +# image_hammer = image[460:1030, 740:1340] +model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5') +show_image(estimate_pose(image_clamp)['image']) + +""" +image = load_image('images/zed_left_1011.png') +image = image[250:800, 250:850, :] +H, W, num_channels = image.shape +show_image(estimate_pose(image)['image']) + +image = load_image('images/MicrosoftTeams-image.png') +show_image(estimate_pose(image)['image']) + +image = load_image('images/zed_left_705.png') +image = image[250:1080, 250:1400, :] +show_image(estimate_pose(image)['image']) + + +image = load_image('images/zed_left_792.png') +image = image[30:1400, 280:1060, :] +show_image(estimate_pose(image)['image']) +""" + +# image = load_image('images/large_clamp.jpeg') +# show_image(image[1]) +# results = estimate_pose(image) +# show_image(results['image']) + + +# pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True) +# results = pipeline(image) +# predicted_image = results['image'] +# show_image(predicted_image) # image_size = (640, 480) # player = VideoPlayer(image_size, pipeline, camera) From be139c270d83f979a755bfdd746167dc07929969 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 2 Dec 2021 18:12:05 +0100 Subject: [PATCH 077/101] Added parameters for multiple objects in the scene --- examples/pix2pose/scenes.py | 61 +++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py index 1cbb9b9a7..79d8da90c 100644 --- a/examples/pix2pose/scenes.py +++ b/examples/pix2pose/scenes.py @@ -168,8 +168,9 @@ def render_symmetries(self): y_fov = 3.14159 / 4.0 light = [1.0, 30] + # model = UNET_VGG16(3, image_shape, freeze_backbone=True) + # solar panel parameters - """ OBJ_name = 'single_solar_panel_02.obj' path_OBJ = os.path.join(root_path, OBJ_name) angles = np.linspace(0, 2 * np.pi, 7)[:6] @@ -179,8 +180,14 @@ def render_symmetries(self): camera_pose = to_affine_matrix(camera_rotation, translation) min_corner = [0.0, 0.0, -0.4] max_corner = [0.0, 0.0, +0.0] - """ + # model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5') + renderer = CanonicalScene(path_OBJ, camera_pose, min_corner, + max_corner, symmetries) + renderer.scene.ambient_light = [1.0, 1.0, 1.0] + image = renderer.render_symmetries() + show_image(image) + """ # large clamp parameters # REMEMBER TO CHANGE THE Ns coefficient to values between [0, 1] in # textured.mtl. For example change 96.07 to .967 @@ -192,6 +199,7 @@ def render_symmetries(self): camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3]) min_corner = [-0.05, -0.02, -0.05] max_corner = [+0.05, +0.02, +0.01] + # model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5') angles = [0.0, np.pi] symmetries = np.array([build_rotation_matrix_y(angle) for angle in angles]) @@ -200,13 +208,52 @@ def render_symmetries(self): renderer.scene.ambient_light = [1.0, 1.0, 1.0] image = renderer.render_symmetries() show_image(image) + """ + """ + # ------------------------------------------------------------- + # Training scene for hammer + # -------------------------------------------------------------- + OBJ_name = '.keras/paz/datasets/ycb_models/048_hammer/textured.obj' + path_OBJ = os.path.join(root_path, OBJ_name) + distance = [0.5, 0.6] + top_only = False + roll = 3.14159 + shift = 0.05 + renderer = PixelMaskRenderer( + path_OBJ, viewport_size, y_fov, distance, light, top_only, roll, shift) + for arg in range(100): + image, alpha, RGBA_mask = renderer.render() + image = np.concatenate([image, RGBA_mask[..., 0:3]], axis=1) + show_image(image) + """ + """ + translation = np.array([0.0, 0.0, 0.50]) + camera_pose, y = compute_modelview_matrices(translation, np.zeros((3))) + align_z = build_rotation_matrix_z(np.pi / 8) + camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3]) + min_corner = [-0.05, -0.02, -0.05] + max_corner = [+0.05, +0.02, +0.01] + + symmetries, angles = [], [0.0, np.pi] + for angle in angles: + symmetry = build_rotation_matrix_y(angle) + symmetries.append(symmetry) + symmetries = np.array(symmetries) + + renderer = CanonicalScene(path_OBJ, camera_pose, min_corner, + max_corner, symmetries) + renderer.scene.ambient_light = [1.0, 1.0, 1.0] + image = renderer.render_symmetries() + show_image(image) + """ + + + """ + show_image(image) for arg in range(0): image, alpha, RGB_mask = renderer.render() show_image(RGB_mask[:, :, 0:3]) - model = UNET_VGG16(3, image_shape, freeze_backbone=True) - model.load_weights('UNET-VGG_large_clamp_canonical_10.hdf5') - background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' background_wildcard = os.path.join(root_path, background_wildcard) image_paths = glob.glob(background_wildcard) @@ -231,5 +278,7 @@ def render_symmetries(self): # error = RGB_mask_pred - RGB_mask RGB_mask_pred = RGB_mask_pred.astype('uint8') print(image.dtype, RGB_mask_pred.dtype, RGB_mask_true.dtype) - images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1) + # images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1) + images = np.concatenate([image, RGB_mask_pred], axis=1) show_image(images) + """ From 46492c309c4957d4b2f8ced7abc7136559f2f10c Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 2 Dec 2021 18:12:37 +0100 Subject: [PATCH 078/101] Refactor training script for multiple objects --- examples/pix2pose/train.py | 97 ++++++++++++++------------------------ 1 file changed, 35 insertions(+), 62 deletions(-) diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 42195bafc..12c48a715 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -3,98 +3,71 @@ from tensorflow.keras.optimizers import Adam from paz.abstract import GeneratingSequence from paz.models.segmentation import UNET_VGG16 -from paz.backend.image import show_image, resize_image -import numpy as np from scenes import PixelMaskRenderer from pipelines import DomainRandomization -from loss import WeightedReconstruction, MSE_with_alpha_channel -from models.fully_convolutional_net import FullyConvolutionalNet +from loss import WeightedReconstruction +from metrics import mean_squared_error as MSE -image_shape = [128, 128, 3] +# global training parameters +H, W, num_channels = image_shape = [128, 128, 3] +beta = 3.0 +batch_size = 32 +num_classes = 3 +learning_rate = 0.001 +max_num_epochs = 10 +steps_per_epoch = 1000 +inputs_to_shape = {'input_1': [H, W, 3]} +labels_to_shape = {'masks': [H, W, 4]} + +# global rendering parameters root_path = os.path.expanduser('~') background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' background_wildcard = os.path.join(root_path, background_wildcard) image_paths = glob.glob(background_wildcard) -path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' -path_OBJ = os.path.join(root_path, path_OBJ) num_occlusions = 1 viewport_size = image_shape[:2] +light = [1.0, 30] y_fov = 3.14159 / 4.0 + +# power drill parameters +""" +OBJ_name = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' distance = [0.3, 0.5] -light = [1.0, 30] top_only = False roll = 3.14159 shift = 0.05 -num_steps = 1000 -batch_size = 32 -beta = 3.0 -alpha = 0.1 -filters = 16 -num_classes = 3 -learning_rate = 0.001 -# steps_per_epoch -max_num_epochs = 10 -steps_per_epoch = num_steps +""" +# hammer parameters +OBJ_name = '.keras/paz/datasets/ycb_models/048_hammer/textured.obj' +distance = [0.5, 0.6] +top_only = False +roll = 3.14159 +shift = 0.05 + +path_OBJ = os.path.join(root_path, OBJ_name) renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, light, top_only, roll, shift) -processor = DomainRandomization(renderer, image_shape, - image_paths, num_occlusions) +processor = DomainRandomization( + renderer, image_shape, image_paths, inputs_to_shape, + labels_to_shape, num_occlusions) -sequence = GeneratingSequence(processor, batch_size, num_steps) +sequence = GeneratingSequence(processor, batch_size, steps_per_epoch) -beta = 3.0 weighted_reconstruction = WeightedReconstruction(beta) -# model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha) model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) -# model. optimizer = Adam(learning_rate) -# model.load_weights('UNET_weights_MSE.hdf5') -model.compile( - optimizer, weighted_reconstruction, metrics=MSE_with_alpha_channel) +model.compile(optimizer, weighted_reconstruction, metrics=MSE) + model.fit( sequence, - # steps_per_epoch=args.steps_per_epoch, epochs=max_num_epochs, # callbacks=[stop, log, save, plateau, draw], verbose=1, workers=0) -# batch = sequence.__getitem__(0) -# for _ in range(100): -# image, alpha, RGB_mask = renderer.render() -# show_image(image) -# show_image(RGB_mask) - -def normalize(image): - return (image * 255.0).astype('uint8') - - -def show_results(): - # image, alpha, pixel_mask_true = renderer.render() - sample = processor() - image = sample['inputs']['input_1'] - pixel_mask_true = sample['labels']['masks'] - image = np.expand_dims(image, 0) - pixel_mask_pred = model.predict(image) - pixel_mask_pred = normalize(np.squeeze(pixel_mask_pred, axis=0)) - image = normalize(np.squeeze(image, axis=0)) - results = np.concatenate( - [image, normalize(pixel_mask_true[..., 0:3]), pixel_mask_pred], axis=1) - H, W = results.shape[:2] - scale = 6 - results = resize_image(results, (scale * W, scale * H)) - show_image(results) - - -""" -for _ in range(100): - sample = processor() - inputs, labels = sample['inputs'], sample['labels'] - show_image((inputs['input_image'] * 255).astype('uint8')) - show_image((labels['label_image'] * 255).astype('uint8')) -""" +model.save_weights('UNET-VGG16_weights_hammer_10.hdf5') From efa75f7b783d1990fa9e3136224cdfac732bedff Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 2 Dec 2021 18:13:30 +0100 Subject: [PATCH 079/101] Remove jpegs from repository --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 607a7ce7e..beeee88dc 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ checkpoint *.p *.zip *.iml +*.jpeg !.github/manifest.xml From 0587a9e60cf437103b6eb47bf95ea60096f71569 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 2 Dec 2021 18:13:50 +0100 Subject: [PATCH 080/101] Add rotated image --- examples/pix2pose/test_rotated_image.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py index 9c383edea..6e48a3658 100644 --- a/examples/pix2pose/test_rotated_image.py +++ b/examples/pix2pose/test_rotated_image.py @@ -66,6 +66,7 @@ def quick_pose(image): image = resize_image(image, (128, 128)) + # show_image(resize_image(image, (256 * 3, 256 * 3))) keypoints = estimate_keypoints(image) points2D = keypoints['points2D'] points3D = keypoints['points3D'] @@ -81,21 +82,31 @@ def quick_pose(image): image = image.astype('float') image = draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics) image = image.astype('uint8') - image = resize_image(image, (256, 256)) + image = resize_image(image, (256 * 3, 256 * 3)) show_image(image) -image = load_image('zed_left_1011.png') +image = load_image('images/zed_left_1011.png') image = image[250:800, 250:850, :] +H, W, num_channels = image.shape +# image = resize_image(image, (W * 20, H * 20)) quick_pose(image) -image = load_image('MicrosoftTeams-image.png') +image = load_image('images/MicrosoftTeams-image.png') quick_pose(image) -image = load_image('zed_left_705.png') +image = load_image('images/zed_left_705.png') image = image[250:1080, 250:1400, :] quick_pose(image) + +image = load_image('images/zed_left_792.png') +# image = image[280:1060, 320:1060, :] +image = image[320:1300, 280:1060, :] +quick_pose(image) + + + renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, light, top_only, roll, shift) renderer.scene.ambient_light = [1.0, 1.0, 1.0] From 181802448659bfa4a6939d17176790c0b043563e Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 19 Jan 2022 12:54:32 +0100 Subject: [PATCH 081/101] Remove unused loss backend function --- examples/pix2pose/loss.py | 42 ++++----------------------------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py index d171df124..ead212f09 100644 --- a/examples/pix2pose/loss.py +++ b/examples/pix2pose/loss.py @@ -118,11 +118,13 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): symmetric_losses = [] for rotation in rotations: RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true) - RGB_true_rotated = normalized_device_coordinates_to_normalized_image(RGB_true_rotated) + RGB_true_rotated = normalized_device_coordinates_to_normalized_image( + RGB_true_rotated) RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0) RGB_true_rotated = RGB_true_rotated * alpha RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3) - loss = compute_weighted_reconstruction_loss(RGBA_true_rotated, RGB_pred, beta) + loss = compute_weighted_reconstruction_loss( + RGBA_true_rotated, RGB_pred, beta) loss = tf.expand_dims(loss, -1) symmetric_losses.append(loss) symmetric_losses = tf.concat(symmetric_losses, axis=-1) @@ -130,42 +132,6 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): return minimum_symmetric_loss -def compute_weighted_symmetric_loss2(RGBA_true, RGB_pred, rotations, beta=3.0): - """Computes the mininum of all rotated L1 reconstruction losses weighting - the positive alpha mask values in the predicted RGB image by beta. - - # Arguments - RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. - RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. - rotations: Array (num_symmetries, 3, 3). Rotation matrices - that when applied lead to the same object view. - - # Returns - Tensor [batch, H, W] with weighted reconstruction loss values. - """ - # alpha mask is invariant to rotations that leave the shape symmetric. - RGB_true, alpha = split_alpha_mask(RGBA_true) - # RGB_original_shape = tf.shape(RGBA_true) - batch_size, H, W, num_channels = RGB_true.shape - batch_size, H, W, num_channels = 32, 128, 128, 3 - RGB_true = tf.reshape(RGB_true, [batch_size, -1, 3]) - RGB_true = to_normalized_device_coordinates(RGB_true) - RGB_pred = to_normalized_device_coordinates(RGB_pred) - symmetric_losses = [] - for rotation in rotations: - # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T - RGB_true_symmetric = tf.einsum('ij,klj->kli', rotation, RGB_true) - RGB_true_symmetric = tf.reshape(RGB_true_symmetric, (batch_size, H, W, num_channels)) - RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3) - symmetric_loss = compute_weighted_reconstruction_loss( - RGBA_true_symmetric, RGB_pred, beta) - symmetric_loss = tf.expand_dims(symmetric_loss, -1) - symmetric_losses.append(symmetric_loss) - symmetric_losses = tf.concat(symmetric_losses, axis=-1) - minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1) - return minimum_symmetric_loss - - def compute_weighted_reconstruction_loss_with_error( RGBA_true, RGBE_pred, beta=3.0): """Computes L1 reconstruction loss by multiplying positive alpha mask From 1c9d5209988f3756a86c3d0283795ca925ceb1b7 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 25 Jan 2022 16:22:17 +0100 Subject: [PATCH 082/101] Add basic tests for loss functions --- .../{loss.py => weighted_reconstruction.py} | 0 .../pix2pose/weighted_reconstruction_test.py | 116 ++++++++++++++++++ 2 files changed, 116 insertions(+) rename examples/pix2pose/{loss.py => weighted_reconstruction.py} (100%) create mode 100644 examples/pix2pose/weighted_reconstruction_test.py diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/weighted_reconstruction.py similarity index 100% rename from examples/pix2pose/loss.py rename to examples/pix2pose/weighted_reconstruction.py diff --git a/examples/pix2pose/weighted_reconstruction_test.py b/examples/pix2pose/weighted_reconstruction_test.py new file mode 100644 index 000000000..73a02d7d7 --- /dev/null +++ b/examples/pix2pose/weighted_reconstruction_test.py @@ -0,0 +1,116 @@ +import pytest +import numpy as np + +from .weighted_reconstruction import split_alpha_mask +from .weighted_reconstruction import compute_foreground_loss +from .weighted_reconstruction import compute_background_loss +from .weighted_reconstruction import compute_error_prediction_loss +from .weighted_reconstruction import compute_weighted_reconstruction_loss +from .weighted_reconstruction import ( + compute_weighted_reconstruction_loss_with_error) +from .weighted_reconstruction import ( + normalized_image_to_normalized_device_coordinates, + normalized_device_coordinates_to_normalized_image) +from .weighted_reconstruction import WeightedReconstruction + + +@pytest.fixture +def RGBA_mask(): + return np.ones((32, 128, 128, 4), dtype=np.float32) + + +@pytest.fixture +def RGB_true(): + return np.ones((32, 128, 128, 3), dtype=np.float32) + + +@pytest.fixture +def RGBA_true(): + return np.ones((32, 128, 128, 4), dtype=np.float32) + + +@pytest.fixture +def RGB_pred(): + return 0.5 * np.ones((32, 128, 128, 3), dtype=np.float32) + + +@pytest.fixture +def RGBE_pred(): + return 0.5 * np.ones((32, 128, 128, 4), dtype=np.float32) + + +@pytest.fixture +def alpha_mask(): + return np.ones((32, 128, 128, 1), dtype=np.float32) + + +def test_split_alpha_mask(RGBA_mask): + batch_size, H, W, num_channels = RGBA_mask.shape + color_mask, alpha_mask = split_alpha_mask(RGBA_mask) + assert color_mask.shape == (batch_size, H, W, 3) + assert alpha_mask.shape == (batch_size, H, W, 1) + + +def test_split_error_mask(RGBA_mask): + batch_size, H, W, num_channels = RGBA_mask.shape + color_mask, alpha_mask = split_alpha_mask(RGBA_mask) + assert color_mask.shape == (batch_size, H, W, 3) + assert alpha_mask.shape == (batch_size, H, W, 1) + + +def test_compute_foreground_loss(RGB_true, RGB_pred, alpha_mask): + foreground_loss = compute_foreground_loss(RGB_true, RGB_pred, alpha_mask) + assert np.allclose(foreground_loss, 0.5) + + +def test_compute_background_loss(RGB_true, RGB_pred, alpha_mask): + alpha_mask = 1.0 - alpha_mask + background_loss = compute_background_loss(RGB_true, RGB_pred, alpha_mask) + assert np.allclose(background_loss, 0.5) + + +def test_compute_weighted_reconstruction_loss(RGBA_true, RGB_pred): + loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 3.0) + assert np.allclose(loss, 1.5) + + +def test_normalized_image_to_normalized_device_coordinates(RGB_true): + value = normalized_image_to_normalized_device_coordinates(RGB_true) + assert np.max(value) == 1.0 + + +def test_normalized_image_to_normalized_device_coordinates_segment(): + image = np.array([0, 0.5, 1.0]) + value = normalized_image_to_normalized_device_coordinates(image) + assert ((np.min(value) == -1.0) and (np.max(value) == 1.0)) + + +def test_normalized_device_coordinates_to_normalized_image(): + image = np.array([-1.0, 0.0, 1.0]) + value = normalized_device_coordinates_to_normalized_image(image) + assert ((np.min(value) == 0.0) and (np.max(value) == 1.0)) + + +def test_weighted_reconstruction_loss(RGBA_true, RGB_pred): + compute_loss = WeightedReconstruction(beta=3.0) + loss = compute_loss(RGBA_true, RGB_pred) + assert np.allclose(loss, 1.5) + + +def test_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred): + loss = compute_weighted_reconstruction_loss_with_error( + RGBA_true, RGBE_pred, beta=3.0) + assert np.allclose(loss, 1.5) + + +def test_error_prediction_loss(RGBA_true, RGBE_pred): + # TODO change RGBE_pred + loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) + print(loss) + assert True + +# test_WeightedReconstructionWithError +# test_ErrorPrediction + +# test_WeightedSymmetricReconstruction +# test_compute_weighted_symmetric_loss From 8a50045ec05cdfa18700aecf408d86cee07e677f Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 26 Jan 2022 13:25:29 +0100 Subject: [PATCH 083/101] Start test for backend --- examples/pix2pose/backend_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 examples/pix2pose/backend_test.py diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py new file mode 100644 index 000000000..1c3dd1d8f --- /dev/null +++ b/examples/pix2pose/backend_test.py @@ -0,0 +1,17 @@ +import pytest +import numpy as np + +from .backend import build_cube_points3D + + +@pytest.fixture +def unit_cube(): + return np.array([[-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5] + [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]]) + + +def test_build_cube_points3D(unit_cube): + cube_points = build_cube_points3D(1, 1, 1) + print(cube_points.shape) + print(cube_points) + assert np.allclose(unit_cube, cube_points) From 28cf4c7ec746e922572d04e51e6c4829f11885e5 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 1 Feb 2022 12:30:18 +0100 Subject: [PATCH 084/101] Add tests and refactor backend --- examples/pix2pose/backend.py | 39 +-- examples/pix2pose/backend_test.py | 271 +++++++++++++++++- examples/pix2pose/legacy.py | 118 ++++++++ examples/pix2pose/pipelines.py | 3 +- examples/pix2pose/weighted_reconstruction.py | 86 ------ .../pix2pose/weighted_reconstruction_test.py | 16 +- 6 files changed, 403 insertions(+), 130 deletions(-) create mode 100644 examples/pix2pose/legacy.py diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index e19f9e948..1f3f3524f 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -43,8 +43,8 @@ def build_cube_points3D(width, height, depth): point_5, point_6, point_7, point_8]) -def _preprocess_image_points2D(image_points2D): - """Preprocessing image points for PnPRANSAC +def preprocess_image_points2D(image_points2D): + """Preprocessing image points for openCV's PnPRANSAC # Arguments image_points2D: Array of shape (num_points, 2) @@ -96,7 +96,7 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, """ if ((len(object_points3D) < 4) or (len(image_points2D) < 4)): raise ValueError('Solve PnP requires at least 4 3D and 2D points') - image_points2D = _preprocess_image_points2D(image_points2D) + image_points2D = preprocess_image_points2D(image_points2D) success, rotation_vector, translation, inliers = cv2.solvePnPRansac( object_points3D, image_points2D, camera_intrinsics, None, flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold, @@ -195,6 +195,7 @@ def draw_cube(image, points, color=GREEN, thickness=2, radius=5): def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0): """Replace values from source that are lower than the given threshold. + This function doesn't create a new array but does replacement in place. # Arguments source: Array. @@ -230,7 +231,7 @@ def arguments_to_image_points2D(row_args, col_args): Array (num_cols, num_rows) representing points2D in UV space. # Notes - Arguments are row args (V) and col args (U). Iamge points are in UV + Arguments are row args (V) and col args (U). Image points are in UV coordinates; thus, we concatenate them in that order i.e. [col_args, row_args] """ @@ -290,22 +291,16 @@ def draw_points2D(image, points2D, colors): return image -def draw_points2D_(image, keypoints, colors, radius=1): - for (u, v), (R, G, B) in zip(keypoints, colors): - color = (int(R), int(G), int(B)) - draw_dot(image, (u, v), color, radius) - return image - - def normalize_points2D(points2D, height, width): """Transform points2D in image coordinates to normalized coordinates i.e. [U, V] -> [-1, 1]. UV have maximum values of [W, H] respectively. Image plane + width (0,0)--------> (U) | - | + height | | v @@ -549,25 +544,6 @@ def build_rotation_matrix_y(angle): return rotation_matrix_y -def rotate_image(image, rotation_matrix): - """Rotates an image with a symmetry. - - # Arguments - image: Array (H, W, 3) with domain [0, 255]. - rotation_matrix: Array (3, 3). - - # Returns - Array (H, W, 3) with domain [0, 255] - """ - mask_image = np.sum(image, axis=-1, keepdims=True) != 0 - image = image_to_normalized_device_coordinates(image) - rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image) - rotated_image = normalized_device_coordinates_to_image(rotated_image) - rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) - rotated_image = rotated_image * mask_image - return rotated_image - - def sample_uniform(min_value, max_value): """Samples values inside segment [min_value, max_value) @@ -674,7 +650,6 @@ def compute_norm_SO3(rotation_mesh, rotation): def calculate_canonical_rotation(rotation_mesh, rotations): norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] closest_rotation_arg = np.argmin(norms) - # print(closest_rotation_arg) closest_rotation = rotations[closest_rotation_arg] canonical_rotation = np.linalg.inv(closest_rotation) return canonical_rotation diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py index 1c3dd1d8f..c36e64f25 100644 --- a/examples/pix2pose/backend_test.py +++ b/examples/pix2pose/backend_test.py @@ -2,16 +2,279 @@ import numpy as np from .backend import build_cube_points3D +from .backend import preprocess_image_points2D +from .backend import replace_lower_than_threshold +from .backend import arguments_to_image_points2D +# from .backend import points3D_to_RGB +from .backend import normalize_points2D +from .backend import denormalize_points2D +from .backend import homogenous_quaternion_to_rotation_matrix +from .backend import quaternion_to_rotation_matrix +from .backend import rotation_vector_to_rotation_matrix +from .backend import to_affine_matrix +from .backend import image_to_normalized_device_coordinates +from .backend import normalized_device_coordinates_to_image +from .backend import build_rotation_matrix_x +from .backend import build_rotation_matrix_y +from .backend import build_rotation_matrix_z +from .backend import compute_norm_SO3 + + +@pytest.fixture +def rotation_matrix_X_HALF_PI(): + rotation_matrix = np.array([[1.0, 0.0, 0.0], + [0.0, 0.0, -1.0], + [0.0, 1.0, 0.0]]) + return rotation_matrix + + +@pytest.fixture +def rotation_matrix_Y_HALF_PI(): + rotation_matrix = np.array([[0.0, 0.0, 1.0], + [0.0, 1.0, 0.0], + [-1.0, 0.0, 0.0]]) + return rotation_matrix + +@pytest.fixture +def rotation_matrix_Z_HALF_PI(): + rotation_matrix = np.array([[0.0, -1.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 1.0]]) + return rotation_matrix @pytest.fixture def unit_cube(): - return np.array([[-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5] - [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]]) + return np.array([[0.5, -0.5, 0.5], + [0.5, -0.5, -0.5], + [-0.5, -0.5, -0.5], + [-0.5, -0.5, 0.5], + [0.5, 0.5, 0.5], + [0.5, 0.5, -0.5], + [-0.5, 0.5, -0.5], + [-0.5, 0.5, 0.5]]) + + +@pytest.fixture +def points2D(): + return np.array([[10, 301], + [145, 253], + [203, 5], + [214, 244], + [23, 67], + [178, 48], + [267, 310]]) def test_build_cube_points3D(unit_cube): cube_points = build_cube_points3D(1, 1, 1) - print(cube_points.shape) - print(cube_points) assert np.allclose(unit_cube, cube_points) + + +def test_preprocess_image_point2D(points2D): + image_points2D = preprocess_image_points2D(points2D) + num_points = len(points2D) + assert image_points2D.shape == (num_points, 1, 2) + assert image_points2D.data.contiguous + assert np.allclose(np.squeeze(image_points2D, 1), points2D) + + +# def test_solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics, +# def test_project_to_image(rotation, translation, points3D, camera_intrisincs) +# def draw_cube + +def test_replace_lower_than_threshold(): + source = np.ones((128, 128, 3)) + target = replace_lower_than_threshold(source, 2.0, 5.0) + assert np.allclose(target, 5.0) + + source = np.ones((128, 128, 3)) + target = replace_lower_than_threshold(source, 0.0, -1.0) + assert np.allclose(target, 1.0) + + +def test_arguments_to_image_points2D(): + col_args = np.array([3, 44, 6]) + row_args = np.array([66, 0, 5]) + image_points2D = arguments_to_image_points2D(row_args, col_args) + assert np.allclose(image_points2D, np.array([[3, 66], [44, 0], [6, 5]])) + + +# def test_points3D_to_RGB(points3D): +# def draw_mask +# def draw_masks +# def draw_points2D + +def test_normalize_points2D(): + height, width = 480, 640 + points2D = np.array([[0, 0], [320, 240], [640, 480]]) + normalized_points = normalize_points2D(points2D, height, width) + assert np.allclose(normalized_points, np.array([[-1, -1], [0, 0], [1, 1]])) + + +def test_denormalize_points2D(): + height, width = 480, 640 + normalized_points = np.array([[-1, -1], [0, 0], [1, 1]]) + points2D = denormalize_points2D(normalized_points, height, width) + assert np.allclose(points2D, np.array([[0, 0], [320, 240], [640, 480]])) + +# def draw_pose6D +# def draw_poses6D + + +def test_homogenous_quaternion_to_rotation_matrix_identity(): + quaternion = np.array([0.0, 0.0, 0.0, 1.0]) + matrix = homogenous_quaternion_to_rotation_matrix(quaternion) + assert np.allclose(np.eye(3), matrix) + + +def test_homogenous_quaternion_to_rotation_matrix_Z(rotation_matrix_Z_HALF_PI): + quaternion = np.array([0, 0, 0.7071068, 0.7071068]) + matrix = homogenous_quaternion_to_rotation_matrix(quaternion) + assert np.allclose(rotation_matrix_Z_HALF_PI, matrix) + + +def test_homogenous_quaternion_to_rotation_matrix_Y(rotation_matrix_Y_HALF_PI): + quaternion = np.array([0, 0.7071068, 0.0, 0.7071068]) + matrix = homogenous_quaternion_to_rotation_matrix(quaternion) + assert np.allclose(rotation_matrix_Y_HALF_PI, matrix) + + +def test_homogenous_quaternion_to_rotation_matrix_X(rotation_matrix_X_HALF_PI): + quaternion = np.array([0.7071068, 0.0, 0.0, 0.7071068]) + matrix = homogenous_quaternion_to_rotation_matrix(quaternion) + assert np.allclose(rotation_matrix_X_HALF_PI, matrix) + + +def test_quaternion_to_rotation_matrix_identity(): + quaternion = np.array([0.0, 0.0, 0.0, 1.0]) + matrix = quaternion_to_rotation_matrix(quaternion) + assert np.allclose(np.eye(3), matrix) + + +def test_quaternion_to_rotation_matrix_Z(rotation_matrix_Z_HALF_PI): + quaternion = np.array([0, 0, 0.7071068, 0.7071068]) + matrix = quaternion_to_rotation_matrix(quaternion) + assert np.allclose(rotation_matrix_Z_HALF_PI, matrix) + + +def test_quaternion_to_rotation_matrix_Y(rotation_matrix_Y_HALF_PI): + quaternion = np.array([0, 0.7071068, 0.0, 0.7071068]) + matrix = quaternion_to_rotation_matrix(quaternion) + assert np.allclose(rotation_matrix_Y_HALF_PI, matrix) + + +def test_quaternion_to_rotation_matrix_X(rotation_matrix_X_HALF_PI): + quaternion = np.array([0.7071068, 0.0, 0.0, 0.7071068]) + matrix = quaternion_to_rotation_matrix(quaternion) + assert np.allclose(rotation_matrix_X_HALF_PI, matrix) + + +def test_rotation_vector_to_rotation_matrix_identity(): + rotation_vector = np.array([0.0, 0.0, 0.0]) + matrix = rotation_vector_to_rotation_matrix(rotation_vector) + assert np.allclose(np.eye(3), matrix) + + +def test_rotation_vector_to_rotation_matrix_Z(rotation_matrix_Z_HALF_PI): + rotation_vector = np.array([0.0, 0.0, np.pi / 2.0]) + matrix = rotation_vector_to_rotation_matrix(rotation_vector) + assert np.allclose(rotation_matrix_Z_HALF_PI, matrix) + + +def test_rotation_vector_to_rotation_matrix_Y(rotation_matrix_Y_HALF_PI): + rotation_vector = np.array([0.0, np.pi / 2.0, 0.0]) + matrix = rotation_vector_to_rotation_matrix(rotation_vector) + assert np.allclose(rotation_matrix_Y_HALF_PI, matrix) + + +def test_rotation_vector_to_rotation_matrix_X(rotation_matrix_X_HALF_PI): + rotation_vector = np.array([np.pi / 2.0, 0.0, 0.0]) + matrix = rotation_vector_to_rotation_matrix(rotation_vector) + assert np.allclose(rotation_matrix_X_HALF_PI, matrix) + + +def test_to_affine_matrix_identity(): + rotation_matrix = np.eye(3) + translation = np.zeros(3) + matrix = to_affine_matrix(rotation_matrix, translation) + assert np.allclose(matrix, np.eye(4)) + + +def test_to_affine_matrix(): + rotation_matrix = np.array([[1.0, 0.0, 0.0], + [0.0, 0.0, -1.0], + [0.0, 1.0, 0.0]]) + translation = np.array([3.0, 1.2, 3.0]) + matrix = to_affine_matrix(rotation_matrix, translation) + affine_matrix = np.array([[1.0, 0.0, 0.0, 3.0], + [0.0, 0.0, -1.0, 1.2], + [0.0, 1.0, 0.0, 3.0], + [0.0, 0.0, 0.0, 1.0]]) + assert np.allclose(affine_matrix, matrix) + + +def test_image_to_normalized_device_coordinates(): + image = np.array([[0, 127.5, 255]]) + values = image_to_normalized_device_coordinates(image) + assert np.allclose(values, np.array([[-1.0, 0.0, 1.0]])) + + +def test_normalized_device_coordinates_to_image(): + coordinates = np.array([[-1.0, 0.0, 1.0]]) + values = normalized_device_coordinates_to_image(coordinates) + assert np.allclose(values, np.array([[0.0, 127.5, 255.0]])) + + +def test_build_rotation_matrix_x(rotation_matrix_X_HALF_PI): + angle = np.pi / 2.0 + matrix = build_rotation_matrix_x(angle) + assert np.allclose(matrix, rotation_matrix_X_HALF_PI) + + +def test_build_rotation_matrix_y(rotation_matrix_Y_HALF_PI): + angle = np.pi / 2.0 + matrix = build_rotation_matrix_y(angle) + assert np.allclose(matrix, rotation_matrix_Y_HALF_PI) + + +def test_build_rotation_matrix_z(rotation_matrix_Z_HALF_PI): + angle = np.pi / 2.0 + matrix = build_rotation_matrix_z(angle) + assert np.allclose(matrix, rotation_matrix_Z_HALF_PI) + + +# test_sample_uniform +# test_sample_inside_box3D +# test_sample_front_rotation_matrix +# test_sample_afine_transform +# test_sample_random_rotation_matrix + +def test_compute_norm_SO3_X(rotation_matrix_X_HALF_PI): + norm = compute_norm_SO3(np.eye(3), rotation_matrix_X_HALF_PI) + assert np.allclose(norm, 2.0) + + +def test_compute_norm_SO3_Y(rotation_matrix_Y_HALF_PI): + norm = compute_norm_SO3(np.eye(3), rotation_matrix_Y_HALF_PI) + assert np.allclose(norm, 2.0) + + +def test_compute_norm_SO3_Z(rotation_matrix_Z_HALF_PI): + norm = compute_norm_SO3(np.eye(3), rotation_matrix_Z_HALF_PI) + assert np.allclose(norm, 2.0) + + +def test_compute_norm_SO3_identity(): + norm = compute_norm_SO3(np.eye(3), np.eye(3)) + assert np.allclose(norm, 0.0) + + +def test_compute_norm_SO3_X_to_Z(rotation_matrix_X_HALF_PI, + rotation_matrix_Z_HALF_PI): + norm = compute_norm_SO3(rotation_matrix_X_HALF_PI, + rotation_matrix_Z_HALF_PI) + assert np.allclose(norm, 2.449489742783178) + + +# calculate_canonical_rotation diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy.py new file mode 100644 index 000000000..6069ada13 --- /dev/null +++ b/examples/pix2pose/legacy.py @@ -0,0 +1,118 @@ +from tensorflow.keras.losses import Loss +from tensorflow.keras.losses import mean_squared_error +import tensorflow as tf + + +def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): + """Computes the mininum of all rotated L1 reconstruction losses weighting + the positive alpha mask values in the predicted RGB image by beta. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. + rotations: Array (num_symmetries, 3, 3). Rotation matrices + that when applied lead to the same object view. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + """ + RGB_true, alpha = split_alpha_mask(RGBA_true) + RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true) + symmetric_losses = [] + for rotation in rotations: + RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true) + RGB_true_rotated = normalized_device_coordinates_to_normalized_image( + RGB_true_rotated) + RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0) + RGB_true_rotated = RGB_true_rotated * alpha + RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3) + loss = compute_weighted_reconstruction_loss( + RGBA_true_rotated, RGB_pred, beta) + loss = tf.expand_dims(loss, -1) + symmetric_losses.append(loss) + symmetric_losses = tf.concat(symmetric_losses, axis=-1) + minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1) + return minimum_symmetric_loss + + +class WeightedSymmetricReconstruction(Loss): + """Computes the mininum of all rotated L1 reconstruction losses weighting + the positive alpha mask values in the predicted RGB image by beta. + """ + def __init__(self, rotations, beta=3.0): + super(WeightedSymmetricReconstruction, self).__init__() + self.rotations = rotations + self.beta = beta + + def call(self, RGBA_true, RGB_pred): + loss = compute_weighted_symmetric_loss( + RGBA_true, RGB_pred, self.rotations, self.beta) + return loss + + +def compute_error_prediction_loss(RGBA_true, RGBE_pred): + """Computes L2 reconstruction loss of predicted error mask. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ + RGB_pred, error_pred = split_error_mask(RGBE_pred) + error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0) + # TODO check we need to set minimum to 1.0? + error_true = tf.minimum(error_true, 1.0) + error_loss = mean_squared_error(error_true, error_pred) + error_loss = tf.expand_dims(error_loss, axis=-1) + return error_loss + + +class ErrorPrediction(Loss): + """Computes L2 reconstruction loss of predicted error mask. + + # Arguments + RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. + RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. + + # Returns + Tensor [batch, H, W] with weighted reconstruction loss values. + + """ + def __init__(self): + super(ErrorPrediction, self).__init__() + + def call(self, RGBA_true, RGBE_pred): + error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) + return error_loss + + +from paz.backend.image import draw_dot + + +def draw_points2D_(image, keypoints, colors, radius=1): + for (u, v), (R, G, B) in zip(keypoints, colors): + color = (int(R), int(G), int(B)) + draw_dot(image, (u, v), color, radius) + return image + + +def rotate_image(image, rotation_matrix): + """Rotates an image with a symmetry. + + # Arguments + image: Array (H, W, 3) with domain [0, 255]. + rotation_matrix: Array (3, 3). + + # Returns + Array (H, W, 3) with domain [0, 255] + """ + mask_image = np.sum(image, axis=-1, keepdims=True) != 0 + image = image_to_normalized_device_coordinates(image) + rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image) + rotated_image = normalized_device_coordinates_to_image(rotated_image) + rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) + rotated_image = rotated_image * mask_image + return rotated_image diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 7ebe0d7a3..7d6abf446 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -231,7 +231,8 @@ def call(self, image): class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_pose, offsets, draw=True, valid_class_names=['035_power_drill']): + def __init__(self, detect, estimate_pose, offsets, draw=True, + valid_class_names=['035_power_drill']): """Pose estimation pipeline using keypoints. """ super(EstimatePoseMasks, self).__init__() diff --git a/examples/pix2pose/weighted_reconstruction.py b/examples/pix2pose/weighted_reconstruction.py index ead212f09..099257b17 100644 --- a/examples/pix2pose/weighted_reconstruction.py +++ b/examples/pix2pose/weighted_reconstruction.py @@ -1,5 +1,4 @@ from tensorflow.keras.losses import Loss -from tensorflow.keras.losses import mean_squared_error import tensorflow as tf @@ -100,38 +99,6 @@ def normalized_device_coordinates_to_normalized_image(image): return (image + 1.0) / 2.0 -def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): - """Computes the mininum of all rotated L1 reconstruction losses weighting - the positive alpha mask values in the predicted RGB image by beta. - - # Arguments - RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. - RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. - rotations: Array (num_symmetries, 3, 3). Rotation matrices - that when applied lead to the same object view. - - # Returns - Tensor [batch, H, W] with weighted reconstruction loss values. - """ - RGB_true, alpha = split_alpha_mask(RGBA_true) - RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true) - symmetric_losses = [] - for rotation in rotations: - RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true) - RGB_true_rotated = normalized_device_coordinates_to_normalized_image( - RGB_true_rotated) - RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0) - RGB_true_rotated = RGB_true_rotated * alpha - RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3) - loss = compute_weighted_reconstruction_loss( - RGBA_true_rotated, RGB_pred, beta) - loss = tf.expand_dims(loss, -1) - symmetric_losses.append(loss) - symmetric_losses = tf.concat(symmetric_losses, axis=-1) - minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1) - return minimum_symmetric_loss - - def compute_weighted_reconstruction_loss_with_error( RGBA_true, RGBE_pred, beta=3.0): """Computes L1 reconstruction loss by multiplying positive alpha mask @@ -151,25 +118,6 @@ def compute_weighted_reconstruction_loss_with_error( return loss -def compute_error_prediction_loss(RGBA_true, RGBE_pred): - """Computes L2 reconstruction loss of predicted error mask. - - # Arguments - RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. - RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. - - # Returns - Tensor [batch, H, W] with weighted reconstruction loss values. - - """ - RGB_pred, error_pred = split_error_mask(RGBE_pred) - error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0) - error_true = tf.minimum(error_true, 1.0) - error_loss = mean_squared_error(error_true, error_pred) - error_loss = tf.expand_dims(error_loss, axis=-1) - return error_loss - - class WeightedReconstruction(Loss): """Computes L1 reconstruction loss by multiplying positive alpha mask by beta. @@ -193,40 +141,6 @@ def call(self, RGBA_true, RGB_pred): return loss -class WeightedSymmetricReconstruction(Loss): - """Computes the mininum of all rotated L1 reconstruction losses weighting - the positive alpha mask values in the predicted RGB image by beta. - """ - def __init__(self, rotations, beta=3.0): - super(WeightedSymmetricReconstruction, self).__init__() - self.rotations = rotations - self.beta = beta - - def call(self, RGBA_true, RGB_pred): - loss = compute_weighted_symmetric_loss( - RGBA_true, RGB_pred, self.rotations, self.beta) - return loss - - -class ErrorPrediction(Loss): - """Computes L2 reconstruction loss of predicted error mask. - - # Arguments - RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. - RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. - - # Returns - Tensor [batch, H, W] with weighted reconstruction loss values. - - """ - def __init__(self): - super(ErrorPrediction, self).__init__() - - def call(self, RGBA_true, RGBE_pred): - error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) - return error_loss - - class WeightedReconstructionWithError(Loss): """Computes L1 reconstruction loss by multiplying positive alpha mask by beta. diff --git a/examples/pix2pose/weighted_reconstruction_test.py b/examples/pix2pose/weighted_reconstruction_test.py index 73a02d7d7..bbfd8c09f 100644 --- a/examples/pix2pose/weighted_reconstruction_test.py +++ b/examples/pix2pose/weighted_reconstruction_test.py @@ -4,7 +4,6 @@ from .weighted_reconstruction import split_alpha_mask from .weighted_reconstruction import compute_foreground_loss from .weighted_reconstruction import compute_background_loss -from .weighted_reconstruction import compute_error_prediction_loss from .weighted_reconstruction import compute_weighted_reconstruction_loss from .weighted_reconstruction import ( compute_weighted_reconstruction_loss_with_error) @@ -12,6 +11,7 @@ normalized_image_to_normalized_device_coordinates, normalized_device_coordinates_to_normalized_image) from .weighted_reconstruction import WeightedReconstruction +from .weighted_reconstruction import WeightedReconstructionWithError @pytest.fixture @@ -103,13 +103,15 @@ def test_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred): assert np.allclose(loss, 1.5) -def test_error_prediction_loss(RGBA_true, RGBE_pred): - # TODO change RGBE_pred - loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) - print(loss) - assert True +def test_WeightedReconstructionWithError(RGBA_true, RGBE_pred): + compute_loss = WeightedReconstructionWithError(beta=3.0) + loss = compute_loss(RGBA_true, RGBE_pred) + assert np.allclose(loss, 1.5) + + -# test_WeightedReconstructionWithError +# def test_error_prediction_loss(RGBA_true, RGBE_pred): +# def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0) # test_ErrorPrediction # test_WeightedSymmetricReconstruction From 0c4700e6a7f28716d7f41987289fc6e95c21d548 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 1 Feb 2022 12:44:40 +0100 Subject: [PATCH 085/101] Removed mulitple pix2pose pipeline --- examples/pix2pose/legacy.py | 133 ++++++++++++++++++++++++++++ examples/pix2pose/pipelines.py | 154 +++------------------------------ 2 files changed, 145 insertions(+), 142 deletions(-) diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy.py index 6069ada13..76b2b6973 100644 --- a/examples/pix2pose/legacy.py +++ b/examples/pix2pose/legacy.py @@ -116,3 +116,136 @@ def rotate_image(image, rotation_matrix): rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) rotated_image = rotated_image * mask_image return rotated_image + + +class EstimatePoseMasks(Processor): + def __init__(self, detect, estimate_pose, offsets, draw=True, + valid_class_names=['035_power_drill']): + """Pose estimation pipeline using keypoints. + """ + super(EstimatePoseMasks, self).__init__() + self.detect = detect + self.estimate_pose = estimate_pose + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + pr.FilterClassBoxes2D(valid_class_names), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D']) + self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + self.object_sizes = self.estimate_pose.object_sizes + self.cube_points3D = build_cube_points3D(*self.object_sizes) + self.draw = draw + + def call(self, image): + boxes2D = self.postprocess_boxes(self.detect(image)) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, points = [], [] + for crop, box2D in zip(cropped_images, boxes2D): + results = self.estimate_pose(crop, box2D) + pose6D, points2D, points3D = self.unwrap(results) + poses6D.append(pose6D), points.append([points2D, points3D]) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + image = draw_masks(image, points, self.object_sizes) + image = draw_poses6D(image, poses6D, self.cube_points3D, + self.estimate_pose.camera.intrinsics) + return self.wrap(image, boxes2D, poses6D) + + +class MultiPix2Pose(Processor): + def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes, + valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True): + self.detect = detect + self.name_to_weights = name_to_weights + self.name_to_sizes = name_to_sizes + self.valid_class_names = valid_class_names + self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon) + self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) + self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() + self.camera = camera + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + pr.FilterClassBoxes2D(valid_class_names), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + self.draw = draw + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + self.name_to_cube_points3D = {} + self.mask_to_points2D = RGBMaskToImagePoints2D( + segment.output_shape[1:3]) + for name in self.name_to_sizes: + W, H, D = self.name_to_sizes[name] + cube_points3D = build_cube_points3D(W, H, D) + self.name_to_cube_points3D[name] = cube_points3D + + self.predict_RGBMask = PredictRGBMask(segment, epsilon) + + def call(self, image): + boxes2D = self.postprocess_boxes(self.detect(image)) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, points2D, points3D = [], [], [] + for crop, box2D in zip(cropped_images, boxes2D): + class_name = box2D.class_name + name_to_weights = self.name_to_weights[class_name] + self.pix2points.model.load_weights(name_to_weights) + object_sizes = self.name_to_sizes[class_name] + # self.pix2points.object_sizes = object_sizes + # points = self.pix2points(crop) + + RGB_mask = self.predict_RGBMask(crop) + H, W, num_channels = crop.shape + RGB_mask = resize_image(RGB_mask, (W, H)) + + self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes) + class_points3D = self.mask_to_points3D(RGB_mask) + class_points2D = self.mask_to_points2D(RGB_mask) + class_points2D = normalize_points2D(class_points2D, H, W) + + # from paz.backend.image import show_image + # show_image((points['RGB_mask'] * 255).astype('uint8')) + # class_points2D = points['points2D'] + # class_points3D = points['points3D'] + H, W, num_channels = crop.shape + class_points2D = denormalize_points2D(class_points2D, H, W) + class_points2D = self.change_coordinates(class_points2D, box2D) + print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS) + print(len(class_points3D), len(class_points2D)) + if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS: + pose_results = self.predict_pose(class_points3D, class_points2D) + success, rotation, translation = pose_results + print('solver success', success) + # success = True + else: + success = False + if success: + quaternion = rotation_vector_to_quaternion(rotation) + pose6D = Pose6D(quaternion, translation, class_name) + else: + pose6D = None + print(success) + points2D.append(class_points2D) + points3D.append(class_points3D) + poses6D.append(pose6D) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D): + class_name = pose6D.class_name + object_sizes = self.name_to_sizes[class_name] + colors = points3D_to_RGB(class_points3D, object_sizes) + image = draw_points2D(image, class_points2D, colors) + + for pose6D in poses6D: + class_name = pose6D.class_name + cube_points3D = self.name_to_cube_points3D[class_name] + image = draw_pose6D(image, pose6D, cube_points3D, + self.camera.intrinsics) + return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D} diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 7d6abf446..2e6fb2a97 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -1,24 +1,27 @@ -from paz.abstract import SequentialProcessor, Processor +from paz.abstract import SequentialProcessor from paz.pipelines import RandomizeRenderedImage as RandomizeRender from paz.abstract.messages import Pose6D +from paz.backend.quaternion import rotation_vector_to_quaternion +from paz.backend.image import resize_image from paz import processors as pr + from processors import ( GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC, ReplaceLowerThanThreshold) + from backend import build_cube_points3D -from processors import UnwrapDictionary from backend import denormalize_points2D -from backend import draw_poses6D from backend import draw_pose6D -from backend import draw_masks from backend import draw_mask from backend import normalize_points2D -from backend import points3D_to_RGB -from backend import draw_points2D -from paz.backend.quaternion import rotation_vector_to_quaternion -from paz.backend.image import resize_image -import numpy as np + +# from processors import UnwrapDictionary +# from backend import draw_poses6D +# from backend import draw_masks +# from backend import points3D_to_RGB +# from backend import draw_points2D +# import numpy as np class DomainRandomization(SequentialProcessor): @@ -134,136 +137,3 @@ def call(self, image, box2D=None): results[topic] = image results['points2D'], results['pose6D'] = points2D, pose6D return results - - -class MultiPix2Pose(Processor): - def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes, - valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True): - self.detect = detect - self.name_to_weights = name_to_weights - self.name_to_sizes = name_to_sizes - self.valid_class_names = valid_class_names - self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon) - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() - self.camera = camera - self.postprocess_boxes = SequentialProcessor( - [pr.UnpackDictionary(['boxes2D']), - pr.FilterClassBoxes2D(valid_class_names), - pr.SquareBoxes2D(), - pr.OffsetBoxes2D(offsets)]) - self.clip = pr.ClipBoxes2D() - self.crop = pr.CropBoxes2D() - self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.draw = draw - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) - self.name_to_cube_points3D = {} - self.mask_to_points2D = RGBMaskToImagePoints2D( - segment.output_shape[1:3]) - for name in self.name_to_sizes: - W, H, D = self.name_to_sizes[name] - cube_points3D = build_cube_points3D(W, H, D) - self.name_to_cube_points3D[name] = cube_points3D - - self.predict_RGBMask = PredictRGBMask(segment, epsilon) - - def call(self, image): - boxes2D = self.postprocess_boxes(self.detect(image)) - boxes2D = self.clip(image, boxes2D) - cropped_images = self.crop(image, boxes2D) - poses6D, points2D, points3D = [], [], [] - for crop, box2D in zip(cropped_images, boxes2D): - class_name = box2D.class_name - name_to_weights = self.name_to_weights[class_name] - self.pix2points.model.load_weights(name_to_weights) - object_sizes = self.name_to_sizes[class_name] - # self.pix2points.object_sizes = object_sizes - # points = self.pix2points(crop) - - RGB_mask = self.predict_RGBMask(crop) - H, W, num_channels = crop.shape - RGB_mask = resize_image(RGB_mask, (W, H)) - - self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes) - class_points3D = self.mask_to_points3D(RGB_mask) - class_points2D = self.mask_to_points2D(RGB_mask) - class_points2D = normalize_points2D(class_points2D, H, W) - - # from paz.backend.image import show_image - # show_image((points['RGB_mask'] * 255).astype('uint8')) - # class_points2D = points['points2D'] - # class_points3D = points['points3D'] - H, W, num_channels = crop.shape - class_points2D = denormalize_points2D(class_points2D, H, W) - class_points2D = self.change_coordinates(class_points2D, box2D) - print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS) - print(len(class_points3D), len(class_points2D)) - if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS: - pose_results = self.predict_pose(class_points3D, class_points2D) - success, rotation, translation = pose_results - print('solver success', success) - # success = True - else: - success = False - if success: - quaternion = rotation_vector_to_quaternion(rotation) - pose6D = Pose6D(quaternion, translation, class_name) - else: - pose6D = None - print(success) - points2D.append(class_points2D) - points3D.append(class_points3D) - poses6D.append(pose6D) - if self.draw: - image = self.draw_boxes2D(image, boxes2D) - for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D): - class_name = pose6D.class_name - object_sizes = self.name_to_sizes[class_name] - colors = points3D_to_RGB(class_points3D, object_sizes) - image = draw_points2D(image, class_points2D, colors) - - for pose6D in poses6D: - class_name = pose6D.class_name - cube_points3D = self.name_to_cube_points3D[class_name] - image = draw_pose6D(image, pose6D, cube_points3D, - self.camera.intrinsics) - return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D} - - -class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_pose, offsets, draw=True, - valid_class_names=['035_power_drill']): - """Pose estimation pipeline using keypoints. - """ - super(EstimatePoseMasks, self).__init__() - self.detect = detect - self.estimate_pose = estimate_pose - self.postprocess_boxes = SequentialProcessor( - [pr.UnpackDictionary(['boxes2D']), - pr.FilterClassBoxes2D(valid_class_names), - pr.SquareBoxes2D(), - pr.OffsetBoxes2D(offsets)]) - self.clip = pr.ClipBoxes2D() - self.crop = pr.CropBoxes2D() - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) - self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D']) - self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.object_sizes = self.estimate_pose.object_sizes - self.cube_points3D = build_cube_points3D(*self.object_sizes) - self.draw = draw - - def call(self, image): - boxes2D = self.postprocess_boxes(self.detect(image)) - boxes2D = self.clip(image, boxes2D) - cropped_images = self.crop(image, boxes2D) - poses6D, points = [], [] - for crop, box2D in zip(cropped_images, boxes2D): - results = self.estimate_pose(crop, box2D) - pose6D, points2D, points3D = self.unwrap(results) - poses6D.append(pose6D), points.append([points2D, points3D]) - if self.draw: - image = self.draw_boxes2D(image, boxes2D) - image = draw_masks(image, points, self.object_sizes) - image = draw_poses6D(image, poses6D, self.cube_points3D, - self.estimate_pose.camera.intrinsics) - return self.wrap(image, boxes2D, poses6D) From 24db9d04ade3a6cd0b68b97b129e5cc9bb88d8e2 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 1 Feb 2022 12:52:04 +0100 Subject: [PATCH 086/101] Move scene to legacy --- examples/pix2pose/legacy.py | 59 +++++++++++++++++++++++++++++++ examples/pix2pose/scenes.py | 69 ++----------------------------------- 2 files changed, 61 insertions(+), 67 deletions(-) diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy.py index 76b2b6973..e8140fb48 100644 --- a/examples/pix2pose/legacy.py +++ b/examples/pix2pose/legacy.py @@ -249,3 +249,62 @@ def call(self, image): image = draw_pose6D(image, pose6D, cube_points3D, self.camera.intrinsics) return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D} + + +class PixelMaskRenderer(): + """Render-ready scene composed of a single object and a single moving camera. + + # Arguments + path_OBJ: String containing the path to an OBJ file. + viewport_size: List, specifying [H, W] of rendered image. + y_fov: Float indicating the vertical field of view in radians. + distance: List of floats indicating [max_distance, min_distance] + light: List of floats indicating [max_light, min_light] + top_only: Boolean. If True images are only take from the top. + roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis. + shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes. + """ + def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0, + distance=[0.3, 0.5], light=[0.5, 30], top_only=False, + roll=None, shift=None): + self.distance, self.roll, self.shift = distance, roll, shift + self.light_intensity, self.top_only = light, top_only + self._build_scene(path_OBJ, viewport_size, light, y_fov) + self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) + self.flags_RGBA = RenderFlags.RGBA + self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT + self.epsilon = 0.01 + + def _build_scene(self, path, size, light, y_fov): + self.scene = Scene(bg_color=[0, 0, 0, 0]) + self.light = self.scene.add( + DirectionalLight([1.0, 1.0, 1.0], np.mean(light))) + self.camera = self.scene.add( + PerspectiveCamera(y_fov, aspectRatio=np.divide(*size))) + self.pixel_mesh = self.scene.add(color_object(path)) + self.mesh = self.scene.add( + Mesh.from_trimesh(trimesh.load(path), smooth=True)) + self.world_origin = self.mesh.mesh.centroid + + def _sample_parameters(self): + distance = sample_uniformly(self.distance) + camera_origin = sample_point_in_sphere(distance, self.top_only) + camera_origin = random_perturbation(camera_origin, self.epsilon) + light_intensity = sample_uniformly(self.light_intensity) + return camera_origin, light_intensity + + def render(self): + camera_origin, intensity = self._sample_parameters() + camera_to_world, world_to_camera = compute_modelview_matrices( + camera_origin, self.world_origin, self.roll, self.shift) + self.light.light.intensity = intensity + self.scene.set_pose(self.camera, camera_to_world) + self.scene.set_pose(self.light, camera_to_world) + self.pixel_mesh.mesh.is_visible = False + image, depth = self.renderer.render(self.scene, self.flags_RGBA) + self.pixel_mesh.mesh.is_visible = True + image, alpha = split_alpha_channel(image) + self.mesh.mesh.is_visible = False + RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) + self.mesh.mesh.is_visible = True + return image, alpha, RGB_mask diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py index 79d8da90c..53730deca 100644 --- a/examples/pix2pose/scenes.py +++ b/examples/pix2pose/scenes.py @@ -1,74 +1,13 @@ import numpy as np -from paz.backend.render import (sample_uniformly, split_alpha_channel, - random_perturbation, sample_point_in_sphere, - compute_modelview_matrices) +from paz.backend.render import sample_uniformly, split_alpha_channel from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight, RenderFlags, Mesh, Scene) import trimesh + from coloring import color_object from backend import to_affine_matrix from backend import sample_affine_transform from backend import calculate_canonical_rotation -from paz.models import UNET_VGG16 - - -class PixelMaskRenderer(): - """Render-ready scene composed of a single object and a single moving camera. - - # Arguments - path_OBJ: String containing the path to an OBJ file. - viewport_size: List, specifying [H, W] of rendered image. - y_fov: Float indicating the vertical field of view in radians. - distance: List of floats indicating [max_distance, min_distance] - light: List of floats indicating [max_light, min_light] - top_only: Boolean. If True images are only take from the top. - roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis. - shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes. - """ - def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0, - distance=[0.3, 0.5], light=[0.5, 30], top_only=False, - roll=None, shift=None): - self.distance, self.roll, self.shift = distance, roll, shift - self.light_intensity, self.top_only = light, top_only - self._build_scene(path_OBJ, viewport_size, light, y_fov) - self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) - self.flags_RGBA = RenderFlags.RGBA - self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT - self.epsilon = 0.01 - - def _build_scene(self, path, size, light, y_fov): - self.scene = Scene(bg_color=[0, 0, 0, 0]) - self.light = self.scene.add( - DirectionalLight([1.0, 1.0, 1.0], np.mean(light))) - self.camera = self.scene.add( - PerspectiveCamera(y_fov, aspectRatio=np.divide(*size))) - self.pixel_mesh = self.scene.add(color_object(path)) - self.mesh = self.scene.add( - Mesh.from_trimesh(trimesh.load(path), smooth=True)) - self.world_origin = self.mesh.mesh.centroid - - def _sample_parameters(self): - distance = sample_uniformly(self.distance) - camera_origin = sample_point_in_sphere(distance, self.top_only) - camera_origin = random_perturbation(camera_origin, self.epsilon) - light_intensity = sample_uniformly(self.light_intensity) - return camera_origin, light_intensity - - def render(self): - camera_origin, intensity = self._sample_parameters() - camera_to_world, world_to_camera = compute_modelview_matrices( - camera_origin, self.world_origin, self.roll, self.shift) - self.light.light.intensity = intensity - self.scene.set_pose(self.camera, camera_to_world) - self.scene.set_pose(self.light, camera_to_world) - self.pixel_mesh.mesh.is_visible = False - image, depth = self.renderer.render(self.scene, self.flags_RGBA) - self.pixel_mesh.mesh.is_visible = True - image, alpha = split_alpha_channel(image) - self.mesh.mesh.is_visible = False - RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) - self.mesh.mesh.is_visible = True - return image, alpha, RGB_mask class CanonicalScene(): @@ -155,10 +94,6 @@ def render_symmetries(self): from paz.backend.image import show_image from backend import build_rotation_matrix_x from backend import build_rotation_matrix_z - from backend import build_rotation_matrix_y - from paz.backend.render import compute_modelview_matrices - from pipelines import DomainRandomization - import glob # generic parameters root_path = os.path.expanduser('~') From 4472df90ec72a0848d2b0fa11c91842c4c99e970 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 1 Feb 2022 12:53:18 +0100 Subject: [PATCH 087/101] Remove test --- examples/pix2pose/test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/pix2pose/test.py b/examples/pix2pose/test.py index d9effe5b6..4304c95fd 100644 --- a/examples/pix2pose/test.py +++ b/examples/pix2pose/test.py @@ -1,7 +1,6 @@ from paz.abstract import SequentialProcessor, Processor from paz import processors as pr import numpy as np -from backend import build_cube_points3D # import pytest @@ -63,7 +62,3 @@ def test_copy_with_controlmap_using_3_channels_plus(): assert len(values) == 2 assert np.allclose(values[0], A_random_values + B_random_values) assert np.allclose(values[1], A_random_values) - - -def test_build_cube_points3D(width, height, depth): - cube_points3D = build_cube_points3D(width, height, depth) From 8aba37db75f1ac9733f9afaab760d45556793a6a Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 1 Feb 2022 12:58:46 +0100 Subject: [PATCH 088/101] Rearranged structure to include tested functionality --- examples/pix2pose/{test.py => abstract_test.py} | 0 examples/pix2pose/{ => legacy}/demo_image.py | 0 examples/pix2pose/{ => legacy}/legacy.py | 0 examples/pix2pose/{ => legacy}/test_rotated_image.py | 0 examples/pix2pose/{ => legacy}/train_gan.py | 0 examples/pix2pose/{ => legacy}/train_symmetric.py | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename examples/pix2pose/{test.py => abstract_test.py} (100%) rename examples/pix2pose/{ => legacy}/demo_image.py (100%) rename examples/pix2pose/{ => legacy}/legacy.py (100%) rename examples/pix2pose/{ => legacy}/test_rotated_image.py (100%) rename examples/pix2pose/{ => legacy}/train_gan.py (100%) rename examples/pix2pose/{ => legacy}/train_symmetric.py (100%) diff --git a/examples/pix2pose/test.py b/examples/pix2pose/abstract_test.py similarity index 100% rename from examples/pix2pose/test.py rename to examples/pix2pose/abstract_test.py diff --git a/examples/pix2pose/demo_image.py b/examples/pix2pose/legacy/demo_image.py similarity index 100% rename from examples/pix2pose/demo_image.py rename to examples/pix2pose/legacy/demo_image.py diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy/legacy.py similarity index 100% rename from examples/pix2pose/legacy.py rename to examples/pix2pose/legacy/legacy.py diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/legacy/test_rotated_image.py similarity index 100% rename from examples/pix2pose/test_rotated_image.py rename to examples/pix2pose/legacy/test_rotated_image.py diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/legacy/train_gan.py similarity index 100% rename from examples/pix2pose/train_gan.py rename to examples/pix2pose/legacy/train_gan.py diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/legacy/train_symmetric.py similarity index 100% rename from examples/pix2pose/train_symmetric.py rename to examples/pix2pose/legacy/train_symmetric.py From c5939cb228540f7a63731a84705bbd1ed27b30de Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 1 Feb 2022 13:59:09 +0100 Subject: [PATCH 089/101] Refactor coloring to be in backend and add pixel mask rendering for simple segmentation training --- examples/pix2pose/backend.py | 38 ++++++++++++ examples/pix2pose/backend_test.py | 39 +++++++++++++ examples/pix2pose/coloring.py | 50 ---------------- examples/pix2pose/scenes.py | 97 +++++++++++++++++++++++++++++-- 4 files changed, 170 insertions(+), 54 deletions(-) delete mode 100644 examples/pix2pose/coloring.py diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 1f3f3524f..775b17ce2 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -642,6 +642,8 @@ def sample_random_rotation_matrix(): def compute_norm_SO3(rotation_mesh, rotation): + """Computes norm between SO3 elements. + """ difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3) distance = np.linalg.norm(difference, ord='fro') return distance @@ -653,3 +655,39 @@ def calculate_canonical_rotation(rotation_mesh, rotations): closest_rotation = rotations[closest_rotation_arg] canonical_rotation = np.linalg.inv(closest_rotation) return canonical_rotation + + +def normalize_min_max(x, x_min, x_max): + """Normalized data using it's maximum and minimum values + + # Arguments + x: array + x_min: minimum value of x + x_max: maximum value of x + + # Returns + min-max normalized data + """ + return (x - x_min) / (x_max - x_min) + + +def extract_bounding_box_corners(points3D): + """Extracts the (x_min, y_min, z_min) and the (x_max, y_max, z_max) + coordinates from an array of points3D + # Arguments + points3D: Array (num_points, 3) + + # Returns + Left-down-bottom corner (x_min, y_min, z_min) and right-up-top + (x_max, y_max, z_max) corner. + """ + XYZ_min = np.min(points3D, axis=0) + XYZ_max = np.max(points3D, axis=0) + return XYZ_min, XYZ_max + + +def compute_vertices_colors(vertices): + corner3D_min, corner3D_max = extract_bounding_box_corners(vertices) + normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max) + colors = (255 * normalized_colors).astype('uint8') + return colors diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py index c36e64f25..28a944a2f 100644 --- a/examples/pix2pose/backend_test.py +++ b/examples/pix2pose/backend_test.py @@ -18,6 +18,9 @@ from .backend import build_rotation_matrix_y from .backend import build_rotation_matrix_z from .backend import compute_norm_SO3 +from .backend import normalize_min_max +from .backend import extract_bounding_box_corners +from .backend import compute_vertices_colors @pytest.fixture @@ -35,6 +38,7 @@ def rotation_matrix_Y_HALF_PI(): [-1.0, 0.0, 0.0]]) return rotation_matrix + @pytest.fixture def rotation_matrix_Z_HALF_PI(): rotation_matrix = np.array([[0.0, -1.0, 0.0], @@ -66,6 +70,17 @@ def points2D(): [267, 310]]) +@pytest.fixture +def points3D(): + return np.array([[10, 301, 30], + [145, 253, 12], + [203, 5, 299], + [214, 244, 98], + [23, 67, 16], + [178, 48, 234], + [267, 310, 2]]) + + def test_build_cube_points3D(unit_cube): cube_points = build_cube_points3D(1, 1, 1) assert np.allclose(unit_cube, cube_points) @@ -278,3 +293,27 @@ def test_compute_norm_SO3_X_to_Z(rotation_matrix_X_HALF_PI, # calculate_canonical_rotation + + +def test_normalize_min_max(): + x = np.array([-1.0, 0.0, 1.0]) + values = normalize_min_max(x, np.min(x), np.max(x)) + assert np.allclose(values, np.array([0.0, 0.5, 1.0])) + + +def test_extract_corners3D(points3D): + bottom_left, top_right = extract_bounding_box_corners(points3D) + assert np.allclose(bottom_left, np.array([10, 5, 2])) + assert np.allclose(top_right, np.array([267, 310, 299])) + + +def test_compute_vertices_colors(points3D): + values = compute_vertices_colors(points3D) + colors = np.array([[0, 247, 24], + [133, 207, 8], + [191, 0, 255], + [202, 199, 82], + [12, 51, 12], + [166, 35, 199], + [255, 255, 0]]) + assert np.allclose(values, colors) diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py deleted file mode 100644 index f21e02990..000000000 --- a/examples/pix2pose/coloring.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -import numpy as np -import trimesh -from pyrender import Mesh, Scene, Viewer -from pyrender.constants import RenderFlags - - -def normalize_min_max(x, x_min, x_max): - return (x - x_min) / (x_max - x_min) - - -def load_obj(path): - mesh = trimesh.load(path) - return mesh - - -def extract_corners3D(vertices): - point3D_min = np.min(vertices, axis=0) - point3D_max = np.max(vertices, axis=0) - return point3D_min, point3D_max - - -def compute_vertices_colors(vertices): - corner3D_min, corner3D_max = extract_corners3D(vertices) - normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max) - colors = (255 * normalized_colors).astype('uint8') - return colors - - -def color_object(path): - mesh = load_obj(path) - colors = compute_vertices_colors(mesh.vertices) - mesh.visual = mesh.visual.to_color() - mesh.visual.vertex_colors = colors - mesh = Mesh.from_trimesh(mesh, smooth=False) - mesh.primitives[0].material.metallicFactor = 0.0 - mesh.primitives[0].material.roughnessFactor = 1.0 - mesh.primitives[0].material.alphaMode = 'OPAQUE' - return mesh - - -if __name__ == "__main__": - scene = Scene(bg_color=[0, 0, 0]) - root = os.path.expanduser('~') - mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' - path = os.path.join(root, mesh_path) - mesh = color_object(path) - scene.add(mesh) - Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT) - # mesh_extents = np.array([0.184, 0.187, 0.052]) diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py index 53730deca..07e79e2e9 100644 --- a/examples/pix2pose/scenes.py +++ b/examples/pix2pose/scenes.py @@ -1,13 +1,102 @@ import numpy as np from paz.backend.render import sample_uniformly, split_alpha_channel +from paz.backend.render import ( + sample_point_in_sphere, random_perturbation, compute_modelview_matrices) from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight, - RenderFlags, Mesh, Scene) + RenderFlags, Mesh, Scene, Viewer) import trimesh -from coloring import color_object from backend import to_affine_matrix from backend import sample_affine_transform from backend import calculate_canonical_rotation +from backend import compute_vertices_colors + + +def load_obj(path): + mesh = trimesh.load(path) + return mesh + + +def color_object(path): + mesh = load_obj(path) + colors = compute_vertices_colors(mesh.vertices) + mesh.visual = mesh.visual.to_color() + mesh.visual.vertex_colors = colors + mesh = Mesh.from_trimesh(mesh, smooth=False) + mesh.primitives[0].material.metallicFactor = 0.0 + mesh.primitives[0].material.roughnessFactor = 1.0 + mesh.primitives[0].material.alphaMode = 'OPAQUE' + return mesh + + +def quick_color_visualize(): + scene = Scene(bg_color=[0, 0, 0]) + root = os.path.expanduser('~') + mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' + path = os.path.join(root, mesh_path) + mesh = color_object(path) + scene.add(mesh) + Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT) + # mesh_extents = np.array([0.184, 0.187, 0.052]) + + +class PixelMaskRenderer(): + """Render-ready scene composed of a single object and a single moving camera. + + # Arguments + path_OBJ: String containing the path to an OBJ file. + viewport_size: List, specifying [H, W] of rendered image. + y_fov: Float indicating the vertical field of view in radians. + distance: List of floats indicating [max_distance, min_distance] + light: List of floats indicating [max_light, min_light] + top_only: Boolean. If True images are only take from the top. + roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis. + shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes. + """ + def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0, + distance=[0.3, 0.5], light=[0.5, 30], top_only=False, + roll=None, shift=None): + self.distance, self.roll, self.shift = distance, roll, shift + self.light_intensity, self.top_only = light, top_only + self._build_scene(path_OBJ, viewport_size, light, y_fov) + self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) + self.flags_RGBA = RenderFlags.RGBA + self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT + self.epsilon = 0.01 + + def _build_scene(self, path, size, light, y_fov): + self.scene = Scene(bg_color=[0, 0, 0, 0]) + self.light = self.scene.add( + DirectionalLight([1.0, 1.0, 1.0], np.mean(light))) + self.camera = self.scene.add( + PerspectiveCamera(y_fov, aspectRatio=np.divide(*size))) + self.pixel_mesh = self.scene.add(color_object(path)) + self.mesh = self.scene.add( + Mesh.from_trimesh(trimesh.load(path), smooth=True)) + self.world_origin = self.mesh.mesh.centroid + + def _sample_parameters(self): + distance = sample_uniformly(self.distance) + camera_origin = sample_point_in_sphere(distance, self.top_only) + camera_origin = random_perturbation(camera_origin, self.epsilon) + light_intensity = sample_uniformly(self.light_intensity) + return camera_origin, light_intensity + + def render(self): + camera_origin, intensity = self._sample_parameters() + camera_to_world, world_to_camera = compute_modelview_matrices( + camera_origin, self.world_origin, self.roll, self.shift) + self.light.light.intensity = intensity + self.scene.set_pose(self.camera, camera_to_world) + self.scene.set_pose(self.light, camera_to_world) + self.pixel_mesh.mesh.is_visible = False + image, depth = self.renderer.render(self.scene, self.flags_RGBA) + self.pixel_mesh.mesh.is_visible = True + image, alpha = split_alpha_channel(image) + self.mesh.mesh.is_visible = False + RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) + self.mesh.mesh.is_visible = True + return image, alpha, RGB_mask class CanonicalScene(): @@ -182,7 +271,6 @@ def render_symmetries(self): show_image(image) """ - """ show_image(image) for arg in range(0): @@ -213,7 +301,8 @@ def render_symmetries(self): # error = RGB_mask_pred - RGB_mask RGB_mask_pred = RGB_mask_pred.astype('uint8') print(image.dtype, RGB_mask_pred.dtype, RGB_mask_true.dtype) - # images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1) + # images = np.concatenate( + [image, RGB_mask_pred, RGB_mask_true], axis=1) images = np.concatenate([image, RGB_mask_pred], axis=1) show_image(images) """ From 1959b5fe31f7ee6780c53bb7b116ed1f6ddca2a4 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 1 Feb 2022 19:37:38 +0100 Subject: [PATCH 090/101] Fix bug with demo importing legacy pipeline --- examples/pix2pose/demo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index c972f7165..e1a9a046b 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -5,10 +5,7 @@ from paz.backend.camera import VideoPlayer from paz.applications import SSD300FAT -# from pipelines import Pix2Pose -# from pipelines import EstimatePoseMasks from pipelines import Pix2Pose -from pipelines import EstimatePoseMasks image_shape = (128, 128, 3) From 3b0cfb696b9a4e67e9b6f54cf4d92c60fadb0d43 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 2 Feb 2022 23:58:59 +0100 Subject: [PATCH 091/101] Refactor basic training script for pix2pose RGB mask --- examples/pix2pose/train.py | 157 +++++++++++++++++++++++++------------ 1 file changed, 107 insertions(+), 50 deletions(-) diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 12c48a715..6eccd230a 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -1,6 +1,14 @@ import os import glob +import json +import argparse +from datetime import datetime + +from tensorflow.keras.utils import get_file from tensorflow.keras.optimizers import Adam +from tensorflow.keras.callbacks import ( + EarlyStopping, CSVLogger, ModelCheckpoint, ReduceLROnPlateau) + from paz.abstract import GeneratingSequence from paz.models.segmentation import UNET_VGG16 @@ -9,65 +17,114 @@ from loss import WeightedReconstruction from metrics import mean_squared_error as MSE -# global training parameters -H, W, num_channels = image_shape = [128, 128, 3] -beta = 3.0 -batch_size = 32 -num_classes = 3 -learning_rate = 0.001 -max_num_epochs = 10 -steps_per_epoch = 1000 -inputs_to_shape = {'input_1': [H, W, 3]} -labels_to_shape = {'masks': [H, W, 4]} - -# global rendering parameters +MTL_FILE = 'textured.mtl' +OBJ_FILE = 'textured.obj' +PNG_FILE = 'texture_map.png' +cache_subdir = 'paz/datasets/ycb_video/035_power_drill' +URL = 'https://github.com/oarriaga/altamira-data/releases/download/v0.12/' + +MTL_FILEPATH = get_file(MTL_FILE, URL + MTL_FILE, cache_subdir=cache_subdir) +OBJ_FILEPATH = get_file(OBJ_FILE, URL + OBJ_FILE, cache_subdir=cache_subdir) +PNG_FILEPATH = get_file(PNG_FILE, URL + PNG_FILE, cache_subdir=cache_subdir) + root_path = os.path.expanduser('~') -background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png' -background_wildcard = os.path.join(root_path, background_wildcard) -image_paths = glob.glob(background_wildcard) -num_occlusions = 1 -viewport_size = image_shape[:2] -light = [1.0, 30] -y_fov = 3.14159 / 4.0 - -# power drill parameters -""" -OBJ_name = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj' -distance = [0.3, 0.5] -top_only = False -roll = 3.14159 -shift = 0.05 -""" - -# hammer parameters -OBJ_name = '.keras/paz/datasets/ycb_models/048_hammer/textured.obj' -distance = [0.5, 0.6] -top_only = False -roll = 3.14159 -shift = 0.05 - -path_OBJ = os.path.join(root_path, OBJ_name) - -renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance, - light, top_only, roll, shift) +description = 'Training script for pix2pose model' +parser = argparse.ArgumentParser(description=description) +parser.add_argument('--obj_path', default=OBJ_FILEPATH, type=str, + help='Path to OBJ model') +parser.add_argument('--save_path', default='experiments', type=str, + help='Path for saving evaluations') +parser.add_argument('--model', default='UNET_VGG16', type=str, + choices=['UNET_VGG16']) +parser.add_argument('--batch_size', default=32, type=int, + help='Batch size used during optimization') +parser.add_argument('--learning_rate', default=0.001, type=float, + help='Initial learning rate for Adam') +parser.add_argument('--beta', default=3.0, type=float, + help='Loss Weight for pixels in object') +parser.add_argument('--max_num_epochs', default=100, type=int, + help='Number of epochs before finishing') +parser.add_argument('--steps_per_epoch', default=1000, type=int, + help='Steps per epoch') +parser.add_argument('--stop_patience', default=5, type=int, + help='Early stop patience') +parser.add_argument('--reduce_patience', default=2, type=int, + help='Reduce learning rate patience') +parser.add_argument('--run_label', default='RUN_00', type=str, + help='Label used to distinguish between different runs') +parser.add_argument('--time', type=str, + default=datetime.now().strftime("%d/%m/%Y %H:%M:%S")) +parser.add_argument('--light', nargs='+', type=float, default=[1.0, 30]) +parser.add_argument('--y_fov', default=3.14159 / 4.0, type=float, + help='Field of view angle in radians') +parser.add_argument('--distance', nargs='+', type=float, default=[0.3, 0.5], + help='Distance from camera to origin in meters') +parser.add_argument('--top_only', default=0, choices=[0, 1], type=int, + help='Flag for full sphere or top half for rendering') +parser.add_argument('--roll', default=3.14159, type=float, + help='Threshold for camera roll in radians') +parser.add_argument('--shift', default=0.05, type=float, + help='Threshold of random shift of camera') +parser.add_argument('--num_occlusions', default=1, type=int, + help='Number of occlusions added to image') +parser.add_argument('--image_size', default=128, type=int, + help='Size of the side of a square image e.g. 64') +parser.add_argument('--background_wildcard', type=str, + help='Wildcard for backgroun images', default=os.path.join( + root_path, + '.keras/paz/datasets/voc-backgrounds/*.png')) +args = parser.parse_args() + + +# loading background image paths +image_paths = glob.glob(args.background_wildcard) +if len(image_paths) == 0: + raise ValueError('Background images not found. Provide path to png images') + +# setting rendering function +H, W, num_channels = image_shape = [args.image_size, args.image_size, 3] +renderer = PixelMaskRenderer( + args.obj_path, [H, W], args.y_fov, args.distance, args.light, + args.top_only, args.roll, args.shift) + +# building full processor +inputs_to_shape = {'input_1': [H, W, num_channels]} # inputs RGB +labels_to_shape = {'masks': [H, W, num_channels + 1]} # labels RGBMask + alpha processor = DomainRandomization( renderer, image_shape, image_paths, inputs_to_shape, - labels_to_shape, num_occlusions) + labels_to_shape, args.num_occlusions) + +# building python generator +sequence = GeneratingSequence(processor, args.batch_size, args.steps_per_epoch) -sequence = GeneratingSequence(processor, batch_size, steps_per_epoch) +# instantiating the model and loss +model = UNET_VGG16(num_channels, image_shape, freeze_backbone=True) +optimizer = Adam(args.learning_rate) +loss = WeightedReconstruction(args.beta) +model.compile(optimizer, loss, metrics=MSE) -weighted_reconstruction = WeightedReconstruction(beta) +# building experiment path +experiment_label = '_'.join([model.name, args.run_label]) +experiment_path = os.path.join(args.save_path, experiment_label) -model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) -optimizer = Adam(learning_rate) -model.compile(optimizer, weighted_reconstruction, metrics=MSE) +# setting additional callbacks +log = CSVLogger(os.path.join(experiment_path, 'optimization.log')) +stop = EarlyStopping('loss', patience=args.stop_patience, verbose=1) +plateau = ReduceLROnPlateau('loss', patience=args.reduce_patience, verbose=1) +save_filename = os.path.join(experiment_path, 'model_weights.hdf5') +save = ModelCheckpoint(save_filename, 'loss', verbose=1, save_best_only=True, + save_weights_only=True) +callbacks = [log, stop, save, plateau] + +# saving hyper-parameters and model summary +with open(os.path.join(experiment_path, 'hyperparameters.json'), 'w') as filer: + json.dump(args.__dict__, filer, indent=4) +with open(os.path.join(experiment_path, 'model_summary.txt'), 'w') as filer: + model.summary(print_fn=lambda x: filer.write(x + '\n')) model.fit( sequence, - epochs=max_num_epochs, - # callbacks=[stop, log, save, plateau, draw], + epochs=args.max_num_epochs, verbose=1, workers=0) - -model.save_weights('UNET-VGG16_weights_hammer_10.hdf5') From 3be885f36dddb082f52f558441cba429dfdfdc7a Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Thu, 3 Feb 2022 11:04:38 +0100 Subject: [PATCH 092/101] Move metrics to legacy dir --- examples/pix2pose/{ => legacy}/metrics.py | 0 examples/pix2pose/train.py | 13 ++++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) rename examples/pix2pose/{ => legacy}/metrics.py (100%) diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/legacy/metrics.py similarity index 100% rename from examples/pix2pose/metrics.py rename to examples/pix2pose/legacy/metrics.py diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 6eccd230a..35a873481 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -4,6 +4,7 @@ import argparse from datetime import datetime +import tensorflow as tf from tensorflow.keras.utils import get_file from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import ( @@ -14,8 +15,7 @@ from scenes import PixelMaskRenderer from pipelines import DomainRandomization -from loss import WeightedReconstruction -from metrics import mean_squared_error as MSE +from weighted_reconstruction import WeightedReconstruction MTL_FILE = 'textured.mtl' OBJ_FILE = 'textured.obj' @@ -98,11 +98,18 @@ # building python generator sequence = GeneratingSequence(processor, args.batch_size, args.steps_per_epoch) + +# metric for labels with alpha mask +def mean_squared_error(y_true, y_pred): + squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred[:, :, :, 0:3]) + return tf.reduce_mean(squared_difference, axis=-1) + + # instantiating the model and loss model = UNET_VGG16(num_channels, image_shape, freeze_backbone=True) optimizer = Adam(args.learning_rate) loss = WeightedReconstruction(args.beta) -model.compile(optimizer, loss, metrics=MSE) +model.compile(optimizer, loss, mean_squared_error) # building experiment path experiment_label = '_'.join([model.name, args.run_label]) From 788de8808e0168c0af096e1bd2aa2018b6cdd0b3 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Sat, 5 Feb 2022 15:42:39 +0100 Subject: [PATCH 093/101] Refactor tests --- examples/pix2pose/backend_test.py | 18 ++++- examples/pix2pose/legacy/legacy.py | 59 --------------- examples/pix2pose/pipelines.py | 61 +++++++++++---- examples/pix2pose/processors_test.py | 108 +++++++++++++++++++++++++++ 4 files changed, 172 insertions(+), 74 deletions(-) create mode 100644 examples/pix2pose/processors_test.py diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py index 28a944a2f..2e7f8b128 100644 --- a/examples/pix2pose/backend_test.py +++ b/examples/pix2pose/backend_test.py @@ -5,7 +5,6 @@ from .backend import preprocess_image_points2D from .backend import replace_lower_than_threshold from .backend import arguments_to_image_points2D -# from .backend import points3D_to_RGB from .backend import normalize_points2D from .backend import denormalize_points2D from .backend import homogenous_quaternion_to_rotation_matrix @@ -21,6 +20,7 @@ from .backend import normalize_min_max from .backend import extract_bounding_box_corners from .backend import compute_vertices_colors +from .backend import project_to_image @pytest.fixture @@ -317,3 +317,19 @@ def test_compute_vertices_colors(points3D): [166, 35, 199], [255, 255, 0]]) assert np.allclose(values, colors) + + +def test_project_to_image(): + points3D = np.array([[1.0, 1.0, 1.0]]) + translation = np.array([0.0, 0.0, -3.0]) + rotation = np.array([[0.0, 0.0, -1.0], + [0.0, 1.0, 0.0], + [1.0, 0.0, 0.0]]) + fx = 1.0 + fy = 1.0 + tx = 0.0 + ty = 0.0 + camera_intrinsics = np.array([[fx, 0.0, tx], [0.0, fy, ty]]) + points2D = project_to_image(rotation, translation, + points3D, camera_intrinsics) + assert np.allclose(points2D, np.array([0.5, -0.5])) diff --git a/examples/pix2pose/legacy/legacy.py b/examples/pix2pose/legacy/legacy.py index e8140fb48..76b2b6973 100644 --- a/examples/pix2pose/legacy/legacy.py +++ b/examples/pix2pose/legacy/legacy.py @@ -249,62 +249,3 @@ def call(self, image): image = draw_pose6D(image, pose6D, cube_points3D, self.camera.intrinsics) return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D} - - -class PixelMaskRenderer(): - """Render-ready scene composed of a single object and a single moving camera. - - # Arguments - path_OBJ: String containing the path to an OBJ file. - viewport_size: List, specifying [H, W] of rendered image. - y_fov: Float indicating the vertical field of view in radians. - distance: List of floats indicating [max_distance, min_distance] - light: List of floats indicating [max_light, min_light] - top_only: Boolean. If True images are only take from the top. - roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis. - shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes. - """ - def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0, - distance=[0.3, 0.5], light=[0.5, 30], top_only=False, - roll=None, shift=None): - self.distance, self.roll, self.shift = distance, roll, shift - self.light_intensity, self.top_only = light, top_only - self._build_scene(path_OBJ, viewport_size, light, y_fov) - self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1]) - self.flags_RGBA = RenderFlags.RGBA - self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT - self.epsilon = 0.01 - - def _build_scene(self, path, size, light, y_fov): - self.scene = Scene(bg_color=[0, 0, 0, 0]) - self.light = self.scene.add( - DirectionalLight([1.0, 1.0, 1.0], np.mean(light))) - self.camera = self.scene.add( - PerspectiveCamera(y_fov, aspectRatio=np.divide(*size))) - self.pixel_mesh = self.scene.add(color_object(path)) - self.mesh = self.scene.add( - Mesh.from_trimesh(trimesh.load(path), smooth=True)) - self.world_origin = self.mesh.mesh.centroid - - def _sample_parameters(self): - distance = sample_uniformly(self.distance) - camera_origin = sample_point_in_sphere(distance, self.top_only) - camera_origin = random_perturbation(camera_origin, self.epsilon) - light_intensity = sample_uniformly(self.light_intensity) - return camera_origin, light_intensity - - def render(self): - camera_origin, intensity = self._sample_parameters() - camera_to_world, world_to_camera = compute_modelview_matrices( - camera_origin, self.world_origin, self.roll, self.shift) - self.light.light.intensity = intensity - self.scene.set_pose(self.camera, camera_to_world) - self.scene.set_pose(self.light, camera_to_world) - self.pixel_mesh.mesh.is_visible = False - image, depth = self.renderer.render(self.scene, self.flags_RGBA) - self.pixel_mesh.mesh.is_visible = True - image, alpha = split_alpha_channel(image) - self.mesh.mesh.is_visible = False - RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT) - self.mesh.mesh.is_visible = True - return image, alpha, RGB_mask diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 2e6fb2a97..067c2d658 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -1,6 +1,5 @@ -from paz.abstract import SequentialProcessor +from paz.abstract import SequentialProcessor, Processor, Pose6D from paz.pipelines import RandomizeRenderedImage as RandomizeRender -from paz.abstract.messages import Pose6D from paz.backend.quaternion import rotation_vector_to_quaternion from paz.backend.image import resize_image from paz import processors as pr @@ -8,20 +7,15 @@ from processors import ( GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC, - ReplaceLowerThanThreshold) + ReplaceLowerThanThreshold, UnwrapDictionary) -from backend import build_cube_points3D -from backend import denormalize_points2D -from backend import draw_pose6D -from backend import draw_mask -from backend import normalize_points2D +# TODO replace draw_pose6D with draw_poses6D +# TODO replace draw_mask with draw_masks +from backend import draw_pose6D, draw_mask -# from processors import UnwrapDictionary -# from backend import draw_poses6D -# from backend import draw_masks -# from backend import points3D_to_RGB -# from backend import draw_points2D -# import numpy as np +from backend import ( + build_cube_points3D, denormalize_points2D, normalize_points2D, + draw_masks, draw_poses6D) class DomainRandomization(SequentialProcessor): @@ -137,3 +131,42 @@ def call(self, image, box2D=None): results[topic] = image results['points2D'], results['pose6D'] = points2D, pose6D return results + + +class EstimatePoseMasks(Processor): + def __init__(self, detect, estimate_pose, offsets, draw=True, + valid_class_names=['035_power_drill']): + """Pose estimation pipeline using keypoints. + """ + super(EstimatePoseMasks, self).__init__() + self.detect = detect + self.estimate_pose = estimate_pose + self.postprocess_boxes = SequentialProcessor( + [pr.UnpackDictionary(['boxes2D']), + pr.FilterClassBoxes2D(valid_class_names), + pr.SquareBoxes2D(), + pr.OffsetBoxes2D(offsets)]) + self.clip = pr.ClipBoxes2D() + self.crop = pr.CropBoxes2D() + self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) + self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D']) + self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) + self.object_sizes = self.estimate_pose.object_sizes + self.cube_points3D = build_cube_points3D(*self.object_sizes) + self.draw = draw + + def call(self, image): + boxes2D = self.postprocess_boxes(self.detect(image)) + boxes2D = self.clip(image, boxes2D) + cropped_images = self.crop(image, boxes2D) + poses6D, points = [], [] + for crop, box2D in zip(cropped_images, boxes2D): + results = self.estimate_pose(crop, box2D) + pose6D, points2D, points3D = self.unwrap(results) + poses6D.append(pose6D), points.append([points2D, points3D]) + if self.draw: + image = self.draw_boxes2D(image, boxes2D) + image = draw_masks(image, points, self.object_sizes) + image = draw_poses6D(image, poses6D, self.cube_points3D, + self.estimate_pose.camera.intrinsics) + return self.wrap(image, boxes2D, poses6D) diff --git a/examples/pix2pose/processors_test.py b/examples/pix2pose/processors_test.py new file mode 100644 index 000000000..31da496fe --- /dev/null +++ b/examples/pix2pose/processors_test.py @@ -0,0 +1,108 @@ +import pytest +import numpy as np + +from .processors import ImageToNormalizedDeviceCoordinates +from .processors import NormalizedDeviceCoordinatesToImage +from .processors import ReplaceLowerThanThreshold +from .processors import NormalizePoints2D +from .processors import ToAffineMatrix +from .processors import ArgumentsToImagePoints2D +from .processors import UnwrapDictionary +# from .processors import GetNonZeroArguments +# from .processors import GetNonZeroValues +# from .processors import Scale +# from .processors import SolveChangingObjectPnPRANSAC + + +@pytest.fixture +def rotation_matrix_X_HALF_PI(): + rotation_matrix = np.array([[1.0, 0.0, 0.0], + [0.0, 0.0, -1.0], + [0.0, 1.0, 0.0]]) + return rotation_matrix + + +@pytest.fixture +def rotation_matrix_Y_HALF_PI(): + rotation_matrix = np.array([[0.0, 0.0, 1.0], + [0.0, 1.0, 0.0], + [-1.0, 0.0, 0.0]]) + return rotation_matrix + + +@pytest.fixture +def rotation_matrix_Z_HALF_PI(): + rotation_matrix = np.array([[0.0, -1.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 1.0]]) + return rotation_matrix + + +def test_ImageToNormalizedDeviceCoordinates(): + image = np.array([[0, 127.5, 255]]) + image_to_NDC = ImageToNormalizedDeviceCoordinates() + values = image_to_NDC(image) + assert np.allclose(values, np.array([[-1.0, 0.0, 1.0]])) + + +def test_NormalizedDeviceCoordinatesToImage(): + coordinates = np.array([[-1.0, 0.0, 1.0]]) + NDC_to_image = NormalizedDeviceCoordinatesToImage() + values = NDC_to_image(coordinates) + assert np.allclose(values, np.array([[0.0, 127.5, 255.0]])) + + +def test_ReplaceLowerThanThreshold(): + source = np.ones((128, 128, 3)) + replace_lower_than_threshold = ReplaceLowerThanThreshold(2.0, 5.0) + target = replace_lower_than_threshold(source) + assert np.allclose(target, 5.0) + + source = np.ones((128, 128, 3)) + replace_lower_than_threshold = ReplaceLowerThanThreshold(0.0, -1.0) + target = replace_lower_than_threshold(source) + assert np.allclose(target, 1.0) + + +def test_NormalizePoints2D(): + height, width = 480, 640 + points2D = np.array([[0, 0], [320, 240], [640, 480]]) + normalize_points2D = NormalizePoints2D((height, width)) + normalized_points = normalize_points2D(points2D, height, width) + assert np.allclose(normalized_points, np.array([[-1, -1], [0, 0], [1, 1]])) + + +def test_ToAffineMarixIdentity(): + rotation_matrix = np.eye(3) + translation = np.zeros(3) + to_affine_matrix = ToAffineMatrix() + matrix = to_affine_matrix(rotation_matrix, translation) + assert np.allclose(matrix, np.eye(4)) + + +def test_ToAffineMatrix(): + rotation_matrix = np.array([[1.0, 0.0, 0.0], + [0.0, 0.0, -1.0], + [0.0, 1.0, 0.0]]) + translation = np.array([3.0, 1.2, 3.0]) + to_affine_matrix = ToAffineMatrix() + matrix = to_affine_matrix(rotation_matrix, translation) + affine_matrix = np.array([[1.0, 0.0, 0.0, 3.0], + [0.0, 0.0, -1.0, 1.2], + [0.0, 1.0, 0.0, 3.0], + [0.0, 0.0, 0.0, 1.0]]) + assert np.allclose(affine_matrix, matrix) + + +def test_ArgumentsToImagePoints2D(): + col_args = np.array([3, 44, 6]) + row_args = np.array([66, 0, 5]) + arguments_to_image_points2D = ArgumentsToImagePoints2D() + image_points2D = arguments_to_image_points2D(row_args, col_args) + assert np.allclose(image_points2D, np.array([[3, 66], [44, 0], [6, 5]])) + + +def test_UnwrapDictionary(): + dictionary = {'a': 1, 'b': 2, 'c': 3} + unwrap = UnwrapDictionary(['b', 'a', 'c']) + assert unwrap(dictionary) == [2, 1, 3] From 99c19543925c2cbcfe7da314ece9002a7e1a2375 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 8 Feb 2022 10:58:54 +0100 Subject: [PATCH 094/101] Add backend and test processors --- examples/pix2pose/backend.py | 135 +++++++++++---------------- examples/pix2pose/backend_test.py | 32 +++++++ examples/pix2pose/legacy/icp.py | 102 ++++++++++++++++++++ examples/pix2pose/legacy/legacy.py | 30 ++++++ examples/pix2pose/processors_test.py | 16 +++- 5 files changed, 231 insertions(+), 84 deletions(-) create mode 100644 examples/pix2pose/legacy/icp.py diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 775b17ce2..4f9ae19ce 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -245,7 +245,7 @@ def points3D_to_RGB(points3D, object_sizes): """Transforms points3D in object frame to RGB color space. # Arguments points3D: Array (num_points, 3). Points3D a - object_sizes: List (3) indicating the + object_sizes: Array (3) indicating the (width, height, depth) of object. # Returns @@ -493,6 +493,58 @@ def normalized_device_coordinates_to_image(image): return (image + 1.0) * 127.5 +def compute_norm_SO3(rotation_mesh, rotation): + """Computes norm between SO3 elements. + """ + difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3) + distance = np.linalg.norm(difference, ord='fro') + return distance + + +def calculate_canonical_rotation(rotation_mesh, rotations): + norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] + closest_rotation_arg = np.argmin(norms) + closest_rotation = rotations[closest_rotation_arg] + canonical_rotation = np.linalg.inv(closest_rotation) + return canonical_rotation + + +def normalize_min_max(x, x_min, x_max): + """Normalized data using it's maximum and minimum values + + # Arguments + x: array + x_min: minimum value of x + x_max: maximum value of x + + # Returns + min-max normalized data + """ + return (x - x_min) / (x_max - x_min) + + +def extract_bounding_box_corners(points3D): + """Extracts the (x_min, y_min, z_min) and the (x_max, y_max, z_max) + coordinates from an array of points3D + # Arguments + points3D: Array (num_points, 3) + + # Returns + Left-down-bottom corner (x_min, y_min, z_min) and right-up-top + (x_max, y_max, z_max) corner. + """ + XYZ_min = np.min(points3D, axis=0) + XYZ_max = np.max(points3D, axis=0) + return XYZ_min, XYZ_max + + +def compute_vertices_colors(vertices): + corner3D_min, corner3D_max = extract_bounding_box_corners(vertices) + normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max) + colors = (255 * normalized_colors).astype('uint8') + return colors + + def build_rotation_matrix_z(angle): """Builds rotation matrix in Z axis. @@ -610,84 +662,3 @@ def sample_affine_transform(min_corner, max_corner): rotation_matrix = sample_front_rotation_matrix() affine_matrix = to_affine_matrix(rotation_matrix, translation) return affine_matrix - - -def sample_random_rotation_matrix(): - """Samples SO3 in rotation matrix form. - - # Return - Array (3, 3). - - # References - [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/ - 12/random-rotation-matrix.html) - [real-time rendering](from http://www.realtimerendering.com/resources/ - GraphicsGems/gemsiii/rand_rotation.c) - """ - theta = 2.0 * np.pi * np.random.uniform() - phi = 2.0 * np.pi * np.random.uniform() - z = 2.0 * np.random.uniform() - # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix - r = np.sqrt(z) - random_vector = np.array( - [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)]) - sin_theta = np.sin(theta) - cos_theta = np.cos(theta) - R = np.array([[+cos_theta, +sin_theta, 0.0], - [-sin_theta, +cos_theta, 0.0], - [0.0, 0.0, 1.0]]) - random_rotation_matrix = ( - np.outer(random_vector, random_vector) - np.eye(3)).dot(R) - return random_rotation_matrix - - -def compute_norm_SO3(rotation_mesh, rotation): - """Computes norm between SO3 elements. - """ - difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3) - distance = np.linalg.norm(difference, ord='fro') - return distance - - -def calculate_canonical_rotation(rotation_mesh, rotations): - norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations] - closest_rotation_arg = np.argmin(norms) - closest_rotation = rotations[closest_rotation_arg] - canonical_rotation = np.linalg.inv(closest_rotation) - return canonical_rotation - - -def normalize_min_max(x, x_min, x_max): - """Normalized data using it's maximum and minimum values - - # Arguments - x: array - x_min: minimum value of x - x_max: maximum value of x - - # Returns - min-max normalized data - """ - return (x - x_min) / (x_max - x_min) - - -def extract_bounding_box_corners(points3D): - """Extracts the (x_min, y_min, z_min) and the (x_max, y_max, z_max) - coordinates from an array of points3D - # Arguments - points3D: Array (num_points, 3) - - # Returns - Left-down-bottom corner (x_min, y_min, z_min) and right-up-top - (x_max, y_max, z_max) corner. - """ - XYZ_min = np.min(points3D, axis=0) - XYZ_max = np.max(points3D, axis=0) - return XYZ_min, XYZ_max - - -def compute_vertices_colors(vertices): - corner3D_min, corner3D_max = extract_bounding_box_corners(vertices) - normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max) - colors = (255 * normalized_colors).astype('uint8') - return colors diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py index 2e7f8b128..2cb53f52f 100644 --- a/examples/pix2pose/backend_test.py +++ b/examples/pix2pose/backend_test.py @@ -17,10 +17,12 @@ from .backend import build_rotation_matrix_y from .backend import build_rotation_matrix_z from .backend import compute_norm_SO3 +from .backend import calculate_canonical_rotation from .backend import normalize_min_max from .backend import extract_bounding_box_corners from .backend import compute_vertices_colors from .backend import project_to_image +from .backend import points3D_to_RGB @pytest.fixture @@ -81,6 +83,23 @@ def points3D(): [267, 310, 2]]) +@pytest.fixture +def object_colors(): + return np.array([[136, 166, 159], + [3, 119, 140], + [56, 132, 189], + [66, 110, 231], + [148, 193, 144], + [33, 174, 120], + [114, 175, 129]]) + + +@pytest.fixture +def object_sizes(): + object_sizes = np.array([280, 260, 240]) + return object_sizes + + def test_build_cube_points3D(unit_cube): cube_points = build_cube_points3D(1, 1, 1) assert np.allclose(unit_cube, cube_points) @@ -333,3 +352,16 @@ def test_project_to_image(): points2D = project_to_image(rotation, translation, points3D, camera_intrinsics) assert np.allclose(points2D, np.array([0.5, -0.5])) + + +def test_calculate_canonical_rotation(rotation_matrix_X_HALF_PI): + X_PI = np.matmul(rotation_matrix_X_HALF_PI, rotation_matrix_X_HALF_PI) + rotations = [X_PI, rotation_matrix_X_HALF_PI] + canonical_rotation = calculate_canonical_rotation(np.eye(3), rotations) + assert np.allclose( + canonical_rotation, np.linalg.inv(rotation_matrix_X_HALF_PI)) + + +def test_points3D_to_RGB(points3D, object_sizes, object_colors): + values = points3D_to_RGB(points3D, object_sizes) + assert np.allclose(values, object_colors) diff --git a/examples/pix2pose/legacy/icp.py b/examples/pix2pose/legacy/icp.py new file mode 100644 index 000000000..61ca4352c --- /dev/null +++ b/examples/pix2pose/legacy/icp.py @@ -0,0 +1,102 @@ +import numpy as np +from sklearn.neighbors import NearestNeighbors + + +def calculate_affine_matrix(pointcloud_A, pointcloud_B): + '''Calculates affine transform with the best least-squares fit transforming + keypoints A to keypoints B. + + # Argument: + pointcloud_A: Array of shape (num_keypoints, 3). + pointcloud_B: Array of shape (num_keypoints, 3). + + # Returns: + T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B + R: mxm rotation matrix + t: mx1 translation vector + ''' + assert pointcloud_A.shape == pointcloud_B.shape + # translate points to their centroids + centroid3D_A = np.mean(pointcloud_A, axis=0) + centroid3D_B = np.mean(pointcloud_B, axis=0) + centered_keypoints3D_A = pointcloud_A - centroid3D_A + centered_keypoints3D_B = pointcloud_B - centroid3D_B + + covariance = np.dot(centered_keypoints3D_A.T, centered_keypoints3D_B) + U, S, Vt = np.linalg.svd(covariance) + # compute rotation matrix + rotation_matrix = np.dot(Vt.T, U.T) + + # resolve special reflection case + if np.linalg.det(rotation_matrix) < 0: + Vt[3 - 1, :] *= -1 + rotation_matrix = np.dot(Vt.T, U.T) + + # compute translation + translation3D = centroid3D_B.T - np.dot(rotation_matrix, centroid3D_A.T) + + affine_matrix = to_affine_matrix(rotation_matrix, translation3D) + return affine_matrix + + +def to_affine_matrix(rotation_matrix, translation_vector): + translation_vector = translation_vector.reshape(3, 1) + affine = np.concatenate([rotation_matrix, translation_vector], axis=0) + affine = np.concatenate([affine, np.array([[0.0, 0.0, 0.0, 1.0]])], axis=1) + return affine + + +def nearest_neighbor(pointcloud_A, pointcloud_B): + '''Find the nearest (Euclidean) neighbor in dst for each point in src + # Arguments: + src: Nxm array of points + dst: Nxm array of points + # Returns: + distances: Euclidean distances of the nearest neighbor + indices: dst indices of the nearest neighbor + ''' + assert pointcloud_A.shape == pointcloud_B.shape + model = NearestNeighbors(n_neighbors=1) + model.fit(pointcloud_B) + distances, indices = model.kneighbors(pointcloud_A, return_distance=True) + return distances.ravel(), indices.ravel() + + +def add_homogenous_coordinate(keypoints3D): + num_keypoints = len(keypoints3D) + ones = np.ones_like(num_keypoints).reshape(-1, 1) + homogenous_keypoints3D = np.concatenate([keypoints3D, ones], axis=1) + return homogenous_keypoints3D + + +def iterative_closes_point(pointcloud_A, pointcloud_B, initial_pose=None, + max_iterations=20, tolerance=1e-3): + '''Find best least square fit that transforms pointcloud A to pointcloud B. + Input: + A: Nxm numpy array of source mD points + B: Nxm numpy array of destination mD point + initial_pose: (m+1)x(m+1) homogeneous transformation + max_iterations: exit algorithm after max_iterations + tolerance: convergence criteria + Output: + T: final homogeneous transformation that maps A on to B + distances: Euclidean distances (errors) of the nearest neighbor + i: number of iterations to converge + ''' + assert pointcloud_A.shape == pointcloud_B.shape + pointcloud_A = add_homogenous_coordinate(pointcloud_A) + pointcloud_B = add_homogenous_coordinate(pointcloud_B) + pointcloud_A_0 = np.copy(pointcloud_A) + if initial_pose is not None: + pointcloud_A = np.dot(initial_pose, pointcloud_A.T).T + previous_error = 0 + for iteration_arg in range(max_iterations): + distances, indices = nearest_neighbor(pointcloud_A, pointcloud_B) + affine_matrix = calculate_affine_matrix(pointcloud_A, pointcloud_B) + pointcloud_A = np.dot(affine_matrix, pointcloud_A.T).T + mean_error = np.mean(distances) + if np.abs(previous_error - mean_error) < tolerance: + break + previous_error = mean_error + affine_transform = calculate_affine_matrix(pointcloud_A_0, pointcloud_A) + return affine_transform, distances, iteration_arg diff --git a/examples/pix2pose/legacy/legacy.py b/examples/pix2pose/legacy/legacy.py index 76b2b6973..4335d2e8d 100644 --- a/examples/pix2pose/legacy/legacy.py +++ b/examples/pix2pose/legacy/legacy.py @@ -1,6 +1,36 @@ from tensorflow.keras.losses import Loss from tensorflow.keras.losses import mean_squared_error import tensorflow as tf +import numpy as np + + +def sample_random_rotation_matrix(): + """Samples SO3 in rotation matrix form. + + # Return + Array (3, 3). + + # References + [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/ + 12/random-rotation-matrix.html) + [real-time rendering](from http://www.realtimerendering.com/resources/ + GraphicsGems/gemsiii/rand_rotation.c) + """ + theta = 2.0 * np.pi * np.random.uniform() + phi = 2.0 * np.pi * np.random.uniform() + z = 2.0 * np.random.uniform() + # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix + r = np.sqrt(z) + random_vector = np.array( + [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)]) + sin_theta = np.sin(theta) + cos_theta = np.cos(theta) + R = np.array([[+cos_theta, +sin_theta, 0.0], + [-sin_theta, +cos_theta, 0.0], + [0.0, 0.0, 1.0]]) + random_rotation_matrix = ( + np.outer(random_vector, random_vector) - np.eye(3)).dot(R) + return random_rotation_matrix def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): diff --git a/examples/pix2pose/processors_test.py b/examples/pix2pose/processors_test.py index 31da496fe..a06b94664 100644 --- a/examples/pix2pose/processors_test.py +++ b/examples/pix2pose/processors_test.py @@ -10,8 +10,7 @@ from .processors import UnwrapDictionary # from .processors import GetNonZeroArguments # from .processors import GetNonZeroValues -# from .processors import Scale -# from .processors import SolveChangingObjectPnPRANSAC +from .processors import Scale @pytest.fixture @@ -38,6 +37,12 @@ def rotation_matrix_Z_HALF_PI(): return rotation_matrix +@pytest.fixture +def object_sizes(): + object_sizes = np.array([280, 260, 240]) + return object_sizes + + def test_ImageToNormalizedDeviceCoordinates(): image = np.array([[0, 127.5, 255]]) image_to_NDC = ImageToNormalizedDeviceCoordinates() @@ -106,3 +111,10 @@ def test_UnwrapDictionary(): dictionary = {'a': 1, 'b': 2, 'c': 3} unwrap = UnwrapDictionary(['b', 'a', 'c']) assert unwrap(dictionary) == [2, 1, 3] + + +def test_Scale(object_sizes): + scale = Scale(object_sizes) + values = np.array([1.0, 0.5, 0.25]) + scaled_values = scale(values) + assert np.allclose(scaled_values, values * object_sizes) From 7b174c43ef28a47d42c5d938f979d83f32c19227 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 8 Feb 2022 11:36:31 +0100 Subject: [PATCH 095/101] Add comment to function --- examples/pix2pose/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 4f9ae19ce..52b011654 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -237,7 +237,7 @@ def arguments_to_image_points2D(row_args, col_args): """ row_args = row_args.reshape(-1, 1) col_args = col_args.reshape(-1, 1) - image_points2D = np.concatenate([col_args, row_args], axis=1) + image_points2D = np.concatenate([col_args, row_args], axis=1) # (U, V) return image_points2D From 59a3ea5b40c37891ad68d3e935f794579a380205 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 8 Feb 2022 12:22:05 +0100 Subject: [PATCH 096/101] Add drawing callback --- examples/pix2pose/train.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 35a873481..0b57e4ff3 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -4,6 +4,7 @@ import argparse from datetime import datetime +import numpy as np import tensorflow as tf from tensorflow.keras.utils import get_file from tensorflow.keras.optimizers import Adam @@ -12,9 +13,11 @@ from paz.abstract import GeneratingSequence from paz.models.segmentation import UNET_VGG16 +from paz.optimization.callbacks import DrawInferences +from paz.backend.camera import Camera from scenes import PixelMaskRenderer -from pipelines import DomainRandomization +from pipelines import DomainRandomization, Pix2Pose from weighted_reconstruction import WeightedReconstruction MTL_FILE = 'textured.mtl' @@ -45,7 +48,7 @@ help='Loss Weight for pixels in object') parser.add_argument('--max_num_epochs', default=100, type=int, help='Number of epochs before finishing') -parser.add_argument('--steps_per_epoch', default=1000, type=int, +parser.add_argument('--steps_per_epoch', default=10, type=int, help='Steps per epoch') parser.add_argument('--stop_patience', default=5, type=int, help='Early stop patience') @@ -54,7 +57,7 @@ parser.add_argument('--run_label', default='RUN_00', type=str, help='Label used to distinguish between different runs') parser.add_argument('--time', type=str, - default=datetime.now().strftime("%d/%m/%Y %H:%M:%S")) + default=datetime.now().strftime("%d-%m-%Y_%H-%M-%S")) parser.add_argument('--light', nargs='+', type=float, default=[1.0, 30]) parser.add_argument('--y_fov', default=3.14159 / 4.0, type=float, help='Field of view angle in radians') @@ -68,6 +71,8 @@ help='Threshold of random shift of camera') parser.add_argument('--num_occlusions', default=1, type=int, help='Number of occlusions added to image') +parser.add_argument('--num_test_images', default=100, type=int, + help='Number of test images') parser.add_argument('--image_size', default=128, type=int, help='Size of the side of a square image e.g. 64') parser.add_argument('--background_wildcard', type=str, @@ -95,6 +100,7 @@ renderer, image_shape, image_paths, inputs_to_shape, labels_to_shape, args.num_occlusions) + # building python generator sequence = GeneratingSequence(processor, args.batch_size, args.steps_per_epoch) @@ -112,7 +118,7 @@ def mean_squared_error(y_true, y_pred): model.compile(optimizer, loss, mean_squared_error) # building experiment path -experiment_label = '_'.join([model.name, args.run_label]) +experiment_label = '_'.join([model.name, args.run_label, args.time]) experiment_path = os.path.join(args.save_path, experiment_label) # setting additional callbacks @@ -122,7 +128,15 @@ def mean_squared_error(y_true, y_pred): save_filename = os.path.join(experiment_path, 'model_weights.hdf5') save = ModelCheckpoint(save_filename, 'loss', verbose=1, save_best_only=True, save_weights_only=True) -callbacks = [log, stop, save, plateau] +images = [np.copy(renderer.render()[0]) for _ in range(args.num_test_images)] +# setting drawing callback +camera = Camera() +camera.distortion = np.zeros((4)) +object_sizes = renderer.mesh.mesh.extents * 100 # from meters to milimiters +camera.intrinsics = renderer.camera.camera.get_projection_matrix()[:3, :3] +draw_pipeline = Pix2Pose(model, object_sizes, camera, draw=True) +draw = DrawInferences(experiment_path, images, draw_pipeline) +callbacks = [log, stop, save, plateau, draw] # saving hyper-parameters and model summary with open(os.path.join(experiment_path, 'hyperparameters.json'), 'w') as filer: @@ -133,5 +147,6 @@ def mean_squared_error(y_true, y_pred): model.fit( sequence, epochs=args.max_num_epochs, + callbacks=callbacks, verbose=1, workers=0) From 305861b079e8a14b3c42b2204a592ad9af849606 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Tue, 8 Feb 2022 13:57:42 +0100 Subject: [PATCH 097/101] Remove drawing of pose6D in Pix2Pose pipeline and save original images of training --- examples/pix2pose/pipelines.py | 8 ++++---- examples/pix2pose/train.py | 23 ++++++++++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 067c2d658..8cfcac23b 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -124,11 +124,11 @@ def call(self, image, box2D=None): pose6D = None # change_coordinates puts points2D outside image. if (self.draw and (box2D is None)): - topic = 'image_crop' if box2D is not None else 'image' image = draw_mask(image, points2D, points3D, self.object_sizes) - image = draw_pose6D(image, pose6D, self.cube_points3D, - self.camera.intrinsics) - results[topic] = image + # TODO: commented it out for DrawInfferences callback + # image = draw_pose6D(image, pose6D, self.cube_points3D, + # self.camera.intrinsics) + results['image'] = image results['points2D'], results['pose6D'] = points2D, pose6D return results diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py index 0b57e4ff3..5f3aab1cd 100644 --- a/examples/pix2pose/train.py +++ b/examples/pix2pose/train.py @@ -15,6 +15,7 @@ from paz.models.segmentation import UNET_VGG16 from paz.optimization.callbacks import DrawInferences from paz.backend.camera import Camera +from paz.backend.image import write_image from scenes import PixelMaskRenderer from pipelines import DomainRandomization, Pix2Pose @@ -48,7 +49,7 @@ help='Loss Weight for pixels in object') parser.add_argument('--max_num_epochs', default=100, type=int, help='Number of epochs before finishing') -parser.add_argument('--steps_per_epoch', default=10, type=int, +parser.add_argument('--steps_per_epoch', default=250, type=int, help='Steps per epoch') parser.add_argument('--stop_patience', default=5, type=int, help='Early stop patience') @@ -129,6 +130,26 @@ def mean_squared_error(y_true, y_pred): save = ModelCheckpoint(save_filename, 'loss', verbose=1, save_best_only=True, save_weights_only=True) images = [np.copy(renderer.render()[0]) for _ in range(args.num_test_images)] +images = [] + + +image_directory = os.path.join(experiment_path, 'original_images') +if not os.path.exists(image_directory): + os.makedirs(image_directory) + +for image_arg in range(args.num_test_images): + image, alpha, masks = renderer.render() + image = np.copy(image) # TODO: renderer outputs unwritable numpy arrays + masks = np.copy(masks) # TODO: renderer outputs unwritable numpy arrays + image_filename = 'image_%03d.png' % image_arg + masks_filename = 'masks_%03d.png' % image_arg + image_directory = os.path.join(experiment_path, 'original_images') + image_filename = os.path.join(image_directory, image_filename) + masks_filename = os.path.join(image_directory, masks_filename) + write_image(image_filename, image) + write_image(masks_filename, masks) + images.append(image) + # setting drawing callback camera = Camera() camera.distortion = np.zeros((4)) From d3f2f81ac08def5befab3c004eeac5f795163367 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 9 Feb 2022 09:22:00 +0100 Subject: [PATCH 098/101] Add thickness option for drawing poses --- examples/pix2pose/backend.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py index 52b011654..98a26a963 100644 --- a/examples/pix2pose/backend.py +++ b/examples/pix2pose/backend.py @@ -350,7 +350,7 @@ def denormalize_points2D(points2D, height, width): return points2D -def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics): +def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics, thickness=2): """Draws pose6D by projecting cube3D to image space with camera intrinsics. # Arguments @@ -369,11 +369,12 @@ def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics): cube_points2D = project_to_image( rotation, translation, cube_points3D, camera_intrinsics) cube_points2D = cube_points2D.astype(np.int32) - image = draw_cube(image, cube_points2D) + image = draw_cube(image, cube_points2D, thickness=thickness) return image -def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics): +def draw_poses6D(image, poses6D, cube_points3D, + camera_intrinsics, thickness=2): """Draws pose6D by projecting cube3D to image space with camera intrinsics. # Arguments @@ -387,7 +388,8 @@ def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics): Original image array (H, W, 3) with drawn cube points for all poses6D. """ for pose6D in poses6D: - image = draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics) + image = draw_pose6D(image, pose6D, cube_points3D, + camera_intrinsics, thickness) return image From 41edbf04ac1f9f6626e6c34b2a3558b484db4940 Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 9 Feb 2022 09:22:39 +0100 Subject: [PATCH 099/101] Change resize interpolation --- examples/pix2pose/pipelines.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py index 8cfcac23b..40e9ec23c 100644 --- a/examples/pix2pose/pipelines.py +++ b/examples/pix2pose/pipelines.py @@ -3,6 +3,7 @@ from paz.backend.quaternion import rotation_vector_to_quaternion from paz.backend.image import resize_image from paz import processors as pr +import cv2 from processors import ( GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D, @@ -83,7 +84,7 @@ def call(self, image): RGB_mask = self.predict_RGBMask(image) H, W, num_channels = image.shape if self.resize: - RGB_mask = resize_image(RGB_mask, (W, H)) + RGB_mask = cv2.resize(RGB_mask, (W, H), cv2.INTER_CUBIC) points3D = self.mask_to_points3D(RGB_mask) points2D = self.mask_to_points2D(RGB_mask) points2D = normalize_points2D(points2D, H, W) @@ -168,5 +169,6 @@ def call(self, image): image = self.draw_boxes2D(image, boxes2D) image = draw_masks(image, points, self.object_sizes) image = draw_poses6D(image, poses6D, self.cube_points3D, - self.estimate_pose.camera.intrinsics) + self.estimate_pose.camera.intrinsics, + thickness=2) return self.wrap(image, boxes2D, poses6D) From c081d6affdcdb4d91be76ee185052140f5488a3f Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 9 Feb 2022 09:22:57 +0100 Subject: [PATCH 100/101] Revert demo for single image --- examples/pix2pose/demo.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py index e1a9a046b..67acd545a 100644 --- a/examples/pix2pose/demo.py +++ b/examples/pix2pose/demo.py @@ -5,13 +5,14 @@ from paz.backend.camera import VideoPlayer from paz.applications import SSD300FAT -from pipelines import Pix2Pose +from pipelines import Pix2Pose, EstimatePoseMasks image_shape = (128, 128, 3) num_classes = 3 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True) +model.load_weights('experiments/UNET-VGG16_RUN_00_08-02-2022_14-39-55/weights.hdf5') # model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5') # model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5') # model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5') @@ -22,8 +23,8 @@ # image_size = camera.read().shape[0:2] # camera.stop() -# image = load_image('test_image2.jpg') -image = load_image('images/lab_condition.png') +image = load_image('images/test_image2.jpg') +# image = load_image('images/lab_condition.png') image_size = image.shape[0:2] focal_length = image_size[1] image_center = (image_size[1] / 2.0, image_size[0] / 2.0) @@ -32,18 +33,23 @@ [0, focal_length, image_center[1]], [0, 0, 1]]) # object_sizes = np.array([0.184, 0.187, 0.052]) -epsilon = 0.001 +# object_sizes = np.array([184, 187, 52]) +object_sizes = np.array([1840, 1870, 520]) # power drill +epsilon = 0.015 score_thresh = 0.50 detect = SSD300FAT(score_thresh, draw=False) -offsets = [0.2, 0.2] -# estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True) -# pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets) - - -object_sizes = np.array([1840, 1870, 520]) # power drill -object_sizes = np.array([15000, 15000, 2000]) # solar panel -object_sizes = np.array([15000, 15000, 2000]) # solar panel -estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True) +offsets = [0.5, 0.5] +estimate_keypoints = Pix2Pose(model, object_sizes, camera, epsilon, draw=False) +pipeline = EstimatePoseMasks(detect, estimate_keypoints, offsets) +predicted_image = pipeline(image)['image'] +show_image(predicted_image) +from paz.backend.image import write_image +write_image('images/predicted_power_drill.png', predicted_image) + +# object_sizes = np.array([1840, 1870, 520]) # power drill +# object_sizes = np.array([15000, 15000, 2000]) # solar panel +# object_sizes = np.array([15000, 15000, 2000]) # solar panel +# estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True) # image = image[768:1324, 622:784] # image = image[622:784, 768:1324] @@ -53,10 +59,10 @@ # show_image(estimate_pose(image_hammer)['image']) # show_image(image) -image_clamp = image[670:1000, 1000:1400] +# image_clamp = image[670:1000, 1000:1400] # image_hammer = image[460:1030, 740:1340] -model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5') -show_image(estimate_pose(image_clamp)['image']) +# model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5') +# show_image(estimate_pose(image_clamp)['image']) """ image = load_image('images/zed_left_1011.png') From 72d14000486ce1fc929419b1555759ba9934f40c Mon Sep 17 00:00:00 2001 From: Octavio Arriaga Date: Wed, 9 Feb 2022 09:30:57 +0100 Subject: [PATCH 101/101] Delete legacy file to keep pep8 master --- examples/pix2pose/legacy/legacy.py | 281 ----------------------------- 1 file changed, 281 deletions(-) delete mode 100644 examples/pix2pose/legacy/legacy.py diff --git a/examples/pix2pose/legacy/legacy.py b/examples/pix2pose/legacy/legacy.py deleted file mode 100644 index 4335d2e8d..000000000 --- a/examples/pix2pose/legacy/legacy.py +++ /dev/null @@ -1,281 +0,0 @@ -from tensorflow.keras.losses import Loss -from tensorflow.keras.losses import mean_squared_error -import tensorflow as tf -import numpy as np - - -def sample_random_rotation_matrix(): - """Samples SO3 in rotation matrix form. - - # Return - Array (3, 3). - - # References - [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/ - 12/random-rotation-matrix.html) - [real-time rendering](from http://www.realtimerendering.com/resources/ - GraphicsGems/gemsiii/rand_rotation.c) - """ - theta = 2.0 * np.pi * np.random.uniform() - phi = 2.0 * np.pi * np.random.uniform() - z = 2.0 * np.random.uniform() - # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix - r = np.sqrt(z) - random_vector = np.array( - [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)]) - sin_theta = np.sin(theta) - cos_theta = np.cos(theta) - R = np.array([[+cos_theta, +sin_theta, 0.0], - [-sin_theta, +cos_theta, 0.0], - [0.0, 0.0, 1.0]]) - random_rotation_matrix = ( - np.outer(random_vector, random_vector) - np.eye(3)).dot(R) - return random_rotation_matrix - - -def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0): - """Computes the mininum of all rotated L1 reconstruction losses weighting - the positive alpha mask values in the predicted RGB image by beta. - - # Arguments - RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. - RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values. - rotations: Array (num_symmetries, 3, 3). Rotation matrices - that when applied lead to the same object view. - - # Returns - Tensor [batch, H, W] with weighted reconstruction loss values. - """ - RGB_true, alpha = split_alpha_mask(RGBA_true) - RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true) - symmetric_losses = [] - for rotation in rotations: - RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true) - RGB_true_rotated = normalized_device_coordinates_to_normalized_image( - RGB_true_rotated) - RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0) - RGB_true_rotated = RGB_true_rotated * alpha - RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3) - loss = compute_weighted_reconstruction_loss( - RGBA_true_rotated, RGB_pred, beta) - loss = tf.expand_dims(loss, -1) - symmetric_losses.append(loss) - symmetric_losses = tf.concat(symmetric_losses, axis=-1) - minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1) - return minimum_symmetric_loss - - -class WeightedSymmetricReconstruction(Loss): - """Computes the mininum of all rotated L1 reconstruction losses weighting - the positive alpha mask values in the predicted RGB image by beta. - """ - def __init__(self, rotations, beta=3.0): - super(WeightedSymmetricReconstruction, self).__init__() - self.rotations = rotations - self.beta = beta - - def call(self, RGBA_true, RGB_pred): - loss = compute_weighted_symmetric_loss( - RGBA_true, RGB_pred, self.rotations, self.beta) - return loss - - -def compute_error_prediction_loss(RGBA_true, RGBE_pred): - """Computes L2 reconstruction loss of predicted error mask. - - # Arguments - RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. - RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. - - # Returns - Tensor [batch, H, W] with weighted reconstruction loss values. - - """ - RGB_pred, error_pred = split_error_mask(RGBE_pred) - error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0) - # TODO check we need to set minimum to 1.0? - error_true = tf.minimum(error_true, 1.0) - error_loss = mean_squared_error(error_true, error_pred) - error_loss = tf.expand_dims(error_loss, axis=-1) - return error_loss - - -class ErrorPrediction(Loss): - """Computes L2 reconstruction loss of predicted error mask. - - # Arguments - RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values. - RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask. - - # Returns - Tensor [batch, H, W] with weighted reconstruction loss values. - - """ - def __init__(self): - super(ErrorPrediction, self).__init__() - - def call(self, RGBA_true, RGBE_pred): - error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred) - return error_loss - - -from paz.backend.image import draw_dot - - -def draw_points2D_(image, keypoints, colors, radius=1): - for (u, v), (R, G, B) in zip(keypoints, colors): - color = (int(R), int(G), int(B)) - draw_dot(image, (u, v), color, radius) - return image - - -def rotate_image(image, rotation_matrix): - """Rotates an image with a symmetry. - - # Arguments - image: Array (H, W, 3) with domain [0, 255]. - rotation_matrix: Array (3, 3). - - # Returns - Array (H, W, 3) with domain [0, 255] - """ - mask_image = np.sum(image, axis=-1, keepdims=True) != 0 - image = image_to_normalized_device_coordinates(image) - rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image) - rotated_image = normalized_device_coordinates_to_image(rotated_image) - rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0) - rotated_image = rotated_image * mask_image - return rotated_image - - -class EstimatePoseMasks(Processor): - def __init__(self, detect, estimate_pose, offsets, draw=True, - valid_class_names=['035_power_drill']): - """Pose estimation pipeline using keypoints. - """ - super(EstimatePoseMasks, self).__init__() - self.detect = detect - self.estimate_pose = estimate_pose - self.postprocess_boxes = SequentialProcessor( - [pr.UnpackDictionary(['boxes2D']), - pr.FilterClassBoxes2D(valid_class_names), - pr.SquareBoxes2D(), - pr.OffsetBoxes2D(offsets)]) - self.clip = pr.ClipBoxes2D() - self.crop = pr.CropBoxes2D() - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) - self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D']) - self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.object_sizes = self.estimate_pose.object_sizes - self.cube_points3D = build_cube_points3D(*self.object_sizes) - self.draw = draw - - def call(self, image): - boxes2D = self.postprocess_boxes(self.detect(image)) - boxes2D = self.clip(image, boxes2D) - cropped_images = self.crop(image, boxes2D) - poses6D, points = [], [] - for crop, box2D in zip(cropped_images, boxes2D): - results = self.estimate_pose(crop, box2D) - pose6D, points2D, points3D = self.unwrap(results) - poses6D.append(pose6D), points.append([points2D, points3D]) - if self.draw: - image = self.draw_boxes2D(image, boxes2D) - image = draw_masks(image, points, self.object_sizes) - image = draw_poses6D(image, poses6D, self.cube_points3D, - self.estimate_pose.camera.intrinsics) - return self.wrap(image, boxes2D, poses6D) - - -class MultiPix2Pose(Processor): - def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes, - valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True): - self.detect = detect - self.name_to_weights = name_to_weights - self.name_to_sizes = name_to_sizes - self.valid_class_names = valid_class_names - self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon) - self.predict_pose = SolveChangingObjectPnP(camera.intrinsics) - self.change_coordinates = pr.ChangeKeypointsCoordinateSystem() - self.camera = camera - self.postprocess_boxes = SequentialProcessor( - [pr.UnpackDictionary(['boxes2D']), - pr.FilterClassBoxes2D(valid_class_names), - pr.SquareBoxes2D(), - pr.OffsetBoxes2D(offsets)]) - self.clip = pr.ClipBoxes2D() - self.crop = pr.CropBoxes2D() - self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names) - self.draw = draw - self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D']) - self.name_to_cube_points3D = {} - self.mask_to_points2D = RGBMaskToImagePoints2D( - segment.output_shape[1:3]) - for name in self.name_to_sizes: - W, H, D = self.name_to_sizes[name] - cube_points3D = build_cube_points3D(W, H, D) - self.name_to_cube_points3D[name] = cube_points3D - - self.predict_RGBMask = PredictRGBMask(segment, epsilon) - - def call(self, image): - boxes2D = self.postprocess_boxes(self.detect(image)) - boxes2D = self.clip(image, boxes2D) - cropped_images = self.crop(image, boxes2D) - poses6D, points2D, points3D = [], [], [] - for crop, box2D in zip(cropped_images, boxes2D): - class_name = box2D.class_name - name_to_weights = self.name_to_weights[class_name] - self.pix2points.model.load_weights(name_to_weights) - object_sizes = self.name_to_sizes[class_name] - # self.pix2points.object_sizes = object_sizes - # points = self.pix2points(crop) - - RGB_mask = self.predict_RGBMask(crop) - H, W, num_channels = crop.shape - RGB_mask = resize_image(RGB_mask, (W, H)) - - self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes) - class_points3D = self.mask_to_points3D(RGB_mask) - class_points2D = self.mask_to_points2D(RGB_mask) - class_points2D = normalize_points2D(class_points2D, H, W) - - # from paz.backend.image import show_image - # show_image((points['RGB_mask'] * 255).astype('uint8')) - # class_points2D = points['points2D'] - # class_points3D = points['points3D'] - H, W, num_channels = crop.shape - class_points2D = denormalize_points2D(class_points2D, H, W) - class_points2D = self.change_coordinates(class_points2D, box2D) - print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS) - print(len(class_points3D), len(class_points2D)) - if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS: - pose_results = self.predict_pose(class_points3D, class_points2D) - success, rotation, translation = pose_results - print('solver success', success) - # success = True - else: - success = False - if success: - quaternion = rotation_vector_to_quaternion(rotation) - pose6D = Pose6D(quaternion, translation, class_name) - else: - pose6D = None - print(success) - points2D.append(class_points2D) - points3D.append(class_points3D) - poses6D.append(pose6D) - if self.draw: - image = self.draw_boxes2D(image, boxes2D) - for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D): - class_name = pose6D.class_name - object_sizes = self.name_to_sizes[class_name] - colors = points3D_to_RGB(class_points3D, object_sizes) - image = draw_points2D(image, class_points2D, colors) - - for pose6D in poses6D: - class_name = pose6D.class_name - cube_points3D = self.name_to_cube_points3D[class_name] - image = draw_pose6D(image, pose6D, cube_points3D, - self.camera.intrinsics) - return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D}