From 635e029b3067ceab5f90c3b964182f6e7051ce13 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 15 Oct 2021 11:44:44 +0200
Subject: [PATCH 001/101] Ignore .h5 files from repo

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 410ac728f..6d12a3065 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.pyc
 *.hdf5
+*.h5
 *.log
 *.pkl
 *.json

From 0017ce8a474d89788abef54d8995a5464c0eb570 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 15 Oct 2021 13:26:22 +0200
Subject: [PATCH 002/101] Start refactoring model generator

---
 examples/pix2pose/pix2pose/generator.py   | 74 +++++++++++++++++++++++
 examples/pix2pose/{ => pix2pose}/model.py |  6 +-
 examples/pix2pose/pix2pose/test.py        |  0
 3 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 examples/pix2pose/pix2pose/generator.py
 rename examples/pix2pose/{ => pix2pose}/model.py (96%)
 create mode 100644 examples/pix2pose/pix2pose/test.py

diff --git a/examples/pix2pose/pix2pose/generator.py b/examples/pix2pose/pix2pose/generator.py
new file mode 100644
index 000000000..5270bcf58
--- /dev/null
+++ b/examples/pix2pose/pix2pose/generator.py
@@ -0,0 +1,74 @@
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import (
+    Input, Conv2D, Activation, Dense, Reshape, Conv2DTranspose, Flatten,
+    LeakyReLU, BatchNormalization, Concatenate)
+
+
+def encoder_convolution_block(x, filters, strides=(2, 2)):
+    x = Conv2D(filters, (5, 5), strides=strides, padding='same')(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    return x
+
+
+def encoder_block(x, filters):
+    x_stem = encoder_convolution_block(x, filters)
+    x_skip = encoder_convolution_block(x, filters)
+    x_stem = Concatenate()([x_stem, x_skip])
+    return x_stem, x_skip
+
+
+def encoder(x):
+    x, skip_1 = encoder_block(x, 64)
+    x, skip_2 = encoder_block(x, 128)
+    x, skip_3 = encoder_block(x, 128)
+    x, skip_4 = encoder_block(x, 256)
+    return x, [skip_1, skip_2, skip_3]
+
+
+def decoder_convolution_block(x, filters, strides=(2, 2)):
+    x = Conv2DTranspose(filters, (5, 5), strides=strides, padding='same')(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    return x
+
+
+def decoder_block(x, x_skip, filters):
+    filters_1, filters_2 = filters
+    x = decoder_convolution_block(x, filters_1, (1, 1))
+    x = decoder_convolution_block(x, filters_2)
+    x = Concatenate()([x, x_skip])
+    return x
+
+
+def decoder(x, skip_connections):
+    skip_1, skip_2, skip_3 = skip_connections
+    x = decoder_convolution_block(x, 256)
+    x = Concatenate()([x, skip_3])
+    x = decoder_block(x, skip_2, [256, 128])
+    x = decoder_block(x, skip_1, [256, 64])
+    x = decoder_convolution_block(x, 128, (1, 1))
+    return x
+
+
+def Generator(input_shape=(128, 128, 3), latent_dimension=256,
+              name='PIX2POSE_GENERATOR'):
+    input_image = Input(input_shape, name='input_image')
+    x, skip_connections = encoder(input_image)
+    x = Flatten()(x)
+    x = Dense(latent_dimension)(x)
+    x = Dense(8 * 8 * latent_dimension)(x)
+    x = Reshape((8, 8, latent_dimension))(x)
+    x = decoder(x, skip_connections)
+    RGB_mask = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x)
+    RGB_mask = Activation('tanh', name='RGB_mask')(RGB_mask)
+    error_mask = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x)
+    error_mask = Activation('sigmoid', name='error_mask')(error_mask)
+    model = Model([input_image], [RGB_mask, error_mask], name=name)
+    return model
+
+
+model = Generator()
+assert model.count_params() == 25740356
+assert model.output_shape == [(None, 128, 128, 3), (None, 128, 128, 1)]
+assert model.input_shape == (None, 128, 128, 3)
diff --git a/examples/pix2pose/model.py b/examples/pix2pose/pix2pose/model.py
similarity index 96%
rename from examples/pix2pose/model.py
rename to examples/pix2pose/pix2pose/model.py
index f8093300a..71bc6fa6e 100644
--- a/examples/pix2pose/model.py
+++ b/examples/pix2pose/pix2pose/model.py
@@ -1,7 +1,9 @@
 import numpy as np
 
-from tensorflow.keras.layers import Conv2D, Activation, UpSampling2D, Dense, Conv2DTranspose, Dropout, Input, Flatten, Reshape, LeakyReLU, BatchNormalization, Concatenate
 from tensorflow.keras.models import Model
+from tensorflow.keras.layers import (Input, Conv2D, Activation, Dense, Reshape,
+                                     Conv2DTranspose, Flatten, LeakyReLU,
+                                     BatchNormalization, Concatenate)
 import tensorflow as tf
 
 
@@ -205,4 +207,4 @@ def Discriminator():
     flatten = Flatten()(d7)
     output = Dense(1, activation='sigmoid', name='discriminator_output')(flatten)
     discriminator_model = Model(inputs=input, outputs=[output])
-    return discriminator_model
\ No newline at end of file
+    return discriminator_model
diff --git a/examples/pix2pose/pix2pose/test.py b/examples/pix2pose/pix2pose/test.py
new file mode 100644
index 000000000..e69de29bb

From 403c86a4066c905215b6a50b084baae71b657bbf Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 15 Oct 2021 13:43:46 +0200
Subject: [PATCH 003/101] Refactor discriminator

---
 examples/pix2pose/pix2pose/discriminator.py | 27 +++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 examples/pix2pose/pix2pose/discriminator.py

diff --git a/examples/pix2pose/pix2pose/discriminator.py b/examples/pix2pose/pix2pose/discriminator.py
new file mode 100644
index 000000000..b1b95063f
--- /dev/null
+++ b/examples/pix2pose/pix2pose/discriminator.py
@@ -0,0 +1,27 @@
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import (Conv2D, BatchNormalization, LeakyReLU,
+                                     Input, Flatten, Dense)
+
+
+def convolution_block(x, filters):
+    x = Conv2D(filters, (3, 3), strides=(2, 2), padding='same')(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU(0.2)(x)
+    return x
+
+
+def Discriminator(input_shape=(128, 128, 3), name='PIX2POSE_DISCRIMINATOR'):
+    input_image = Input(input_shape, name='input_image')
+    x = convolution_block(input_image, 64)
+    for filters in [128, 256, 512, 512, 512, 512]:
+        x = convolution_block(x, filters)
+    flatten = Flatten()(x)
+    x = Dense(1, activation='sigmoid', name='discriminator_output')(flatten)
+    model = Model(input_image, x, name=name)
+    return model
+
+
+model = Discriminator()
+assert model.count_params() == 8640897
+assert model.output_shape == (None, 1)
+assert model.input_shape == (None, 128, 128, 3)

From 263b64653977c26064f770af3802d1344ba7ef5c Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 15 Oct 2021 13:46:27 +0200
Subject: [PATCH 004/101] Split model and loss utils

---
 examples/pix2pose/pix2pose/model.py | 210 ----------------------------
 examples/pix2pose/pix2pose/test.py  |   0
 examples/pix2pose/utils.py          |  58 ++++++++
 3 files changed, 58 insertions(+), 210 deletions(-)
 delete mode 100644 examples/pix2pose/pix2pose/model.py
 delete mode 100644 examples/pix2pose/pix2pose/test.py
 create mode 100644 examples/pix2pose/utils.py

diff --git a/examples/pix2pose/pix2pose/model.py b/examples/pix2pose/pix2pose/model.py
deleted file mode 100644
index 71bc6fa6e..000000000
--- a/examples/pix2pose/pix2pose/model.py
+++ /dev/null
@@ -1,210 +0,0 @@
-import numpy as np
-
-from tensorflow.keras.models import Model
-from tensorflow.keras.layers import (Input, Conv2D, Activation, Dense, Reshape,
-                                     Conv2DTranspose, Flatten, LeakyReLU,
-                                     BatchNormalization, Concatenate)
-import tensorflow as tf
-
-
-def loss_color_wrapped(rotation_matrices):
-    def loss_color_unwrapped(color_image, predicted_color_image):
-        min_loss = tf.float32.max
-
-        # Bring the image in the range between 0 and 1
-        color_image = (color_image + 1) * 0.5
-
-        # Calculate masks for the object and the background (they are independent of the rotation)
-        mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1),
-                                repeats=3, axis=-1)
-        mask_background = tf.ones(tf.shape(mask_object)) - mask_object
-
-        # Bring the image again in the range between -1 and 1
-        color_image = (color_image * 2) - 1
-
-        # Iterate over all possible rotations
-        for rotation_matrix in rotation_matrices:
-
-            real_color_image = tf.identity(color_image)
-
-            # Add a small epsilon value to avoid the discontinuity problem
-            real_color_image = real_color_image + tf.ones_like(real_color_image) * 0.0001
-
-            # Rotate the object
-            real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image)
-            #real_color_image = tf.where(tf.math.less(real_color_image, 0), tf.ones_like(real_color_image) + real_color_image, real_color_image)
-
-            # Set the background to be all -1
-            real_color_image *= mask_object
-            real_color_image += (mask_background*tf.constant(-1.))
-
-            # Get the number of pixels
-            num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3])
-            beta = 3
-
-            # Calculate the difference between the real and predicted images including the mask
-            diff_object = tf.math.abs(predicted_color_image*mask_object - real_color_image*mask_object)
-            diff_background = tf.math.abs(predicted_color_image*mask_background - real_color_image*mask_background)
-
-            # Calculate the total loss
-            loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3]))
-            min_loss = tf.math.minimum(loss_colors, min_loss)
-        return min_loss
-
-    return loss_color_unwrapped
-
-
-def loss_error(real_error_image, predicted_error_image):
-    # Get the number of pixels
-    num_pixels = tf.math.reduce_prod(tf.shape(real_error_image)[1:3])
-    loss_error = tf.cast((1/num_pixels), dtype=tf.float32)*(tf.math.reduce_sum(tf.math.square(predicted_error_image - tf.clip_by_value(tf.math.abs(real_error_image), tf.float32.min, 1.)), axis=[1, 2, 3]))
-
-    return loss_error
-
-
-def Generator():
-    bn_axis = 3
-
-    input = Input((128, 128, 3), name='input_image')
-
-    # First layer of the encoder
-    e1_1 = Conv2D(64, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_1_1')(input)
-    e1_1 = BatchNormalization(bn_axis)(e1_1)
-    e1_1 = LeakyReLU()(e1_1)
-
-    e1_2 = Conv2D(64, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_1_2')(input)
-    e1_2 = BatchNormalization(bn_axis)(e1_2)
-    e1_2 = LeakyReLU()(e1_2)
-
-    e1 = Concatenate()([e1_1, e1_2])
-
-    # Second layer of the encoder
-    e2_1 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_2_1')(e1)
-    e2_1 = BatchNormalization(bn_axis)(e2_1)
-    e2_1 = LeakyReLU()(e2_1)
-
-    e2_2 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_2_2')(e1)
-    e2_2 = BatchNormalization(bn_axis)(e2_2)
-    e2_2 = LeakyReLU()(e2_2)
-
-    e2 = Concatenate()([e2_1, e2_2])
-
-    # Third layer of the encoder
-    e3_1 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_3_1')(e2)
-    e3_1 = BatchNormalization(bn_axis)(e3_1)
-    e3_1 = LeakyReLU()(e3_1)
-
-    e3_2 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_3_2')(e2)
-    e3_2 = BatchNormalization(bn_axis)(e3_2)
-    e3_2 = LeakyReLU()(e3_2)
-
-    e3 = Concatenate()([e3_1, e3_2])
-
-    # Fourth layer of the encoder
-    e4_1 = Conv2D(256, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_4_1')(e3)
-    e4_1 = BatchNormalization(bn_axis)(e4_1)
-    e4_1 = LeakyReLU()(e4_1)
-
-    e4_2 = Conv2D(256, (5, 5), strides=(2, 2), padding='same', name='encoder_conv2D_4_2')(e3)
-    e4_2 = BatchNormalization(bn_axis)(e4_2)
-    e4_2 = LeakyReLU()(e4_2)
-
-    e4 = Concatenate()([e4_1, e4_2])
-
-    # Latent dimension
-    x = Flatten()(e4)
-    x = Dense(256)(x)
-    x = Dense(8*8*256)(x)
-    x = Reshape((8, 8, 256))(x)
-
-    # First layer of the decoder
-    d1_1 = Conv2DTranspose(256, (5, 5), strides=(2, 2), padding='same', name='decoder_conv2D_1_1')(x)
-    d1_1 = BatchNormalization(bn_axis)(d1_1)
-    d1_1 = LeakyReLU()(d1_1)
-
-    d1 = Concatenate()([d1_1, e3_2])
-
-    # Second layer of the decoder
-    d2_1 = Conv2D(256, (5, 5), strides=(1, 1), padding='same', name='decoder_conv2D_2_1')(d1)
-    d2_1 = BatchNormalization(bn_axis)(d2_1)
-    d2_1 = LeakyReLU()(d2_1)
-
-    d2_2 = Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', name='decoder_conv2D_2_2')(d2_1)
-    d2_2 = BatchNormalization(bn_axis)(d2_2)
-    d2_2 = LeakyReLU()(d2_2)
-
-    d2 = Concatenate()([d2_2, e2_2])
-
-    # Third layer of the decoder
-    d3_1 = Conv2D(256, (5, 5), strides=(1, 1), padding='same', name='decoder_conv2D_3_1')(d2)
-    d3_1 = BatchNormalization(bn_axis)(d3_1)
-    d3_1 = LeakyReLU()(d3_1)
-
-    d3_2 = Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', name='decoder_conv2D_3_2')(d3_1)
-    d3_2 = BatchNormalization(bn_axis)(d3_2)
-    d3_2 = LeakyReLU()(d3_2)
-
-    d3 = Concatenate()([d3_2, e1_2])
-
-    # Fourth layer
-    d4_1 = Conv2D(128, (5, 5), strides=(1, 1), padding='same', name='decoder_conv2D_4_1')(d3)
-    d4_1 = BatchNormalization(bn_axis)(d4_1)
-    d4_1 = LeakyReLU()(d4_1)
-
-    # Define the two outputs
-    color_output = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(d4_1)
-    color_output = Activation('tanh', name='color_output')(color_output)
-
-    error_output = Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same')(d4_1)
-    error_output = Activation('sigmoid', name='error_output')(error_output)
-
-    # Define model
-    model = Model(inputs=[input], outputs=[color_output, error_output])
-
-    return model
-
-
-def Discriminator():
-    bn_axis = 3
-
-    input = Input((128, 128, 3), name='input_image')
-
-    # First layer of the discriminator
-    d1 = Conv2D(64, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_1_1')(input)
-    d1 = BatchNormalization(bn_axis)(d1)
-    d1 = LeakyReLU(0.2)(d1)
-
-    # Second layer of the discriminator
-    d2 = Conv2D(128, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_2_1')(d1)
-    d2 = BatchNormalization(bn_axis)(d2)
-    d2 = LeakyReLU(0.2)(d2)
-
-    # Third layer of the discriminator
-    d3 = Conv2D(256, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_3_1')(d2)
-    d3 = BatchNormalization(bn_axis)(d3)
-    d3 = LeakyReLU(0.2)(d3)
-
-    # Fourth layer of the discriminator
-    d4 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_4_1')(d3)
-    d4 = BatchNormalization(bn_axis)(d4)
-    d4 = LeakyReLU(0.2)(d4)
-
-    # Fifth layer of the discriminator
-    d5 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_5_1')(d4)
-    d5 = BatchNormalization(bn_axis)(d5)
-    d5 = LeakyReLU(0.2)(d5)
-
-    # Sixth layer of the discriminator
-    d6 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_6_1')(d5)
-    d6 = BatchNormalization(bn_axis)(d6)
-    d6 = LeakyReLU(0.2)(d6)
-
-    # Seventh layer of the discriminator
-    d7 = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='discriminator_conv2D_7_1')(d6)
-    d7 = BatchNormalization(bn_axis)(d7)
-    d7 = LeakyReLU(0.2)(d7)
-
-    flatten = Flatten()(d7)
-    output = Dense(1, activation='sigmoid', name='discriminator_output')(flatten)
-    discriminator_model = Model(inputs=input, outputs=[output])
-    return discriminator_model
diff --git a/examples/pix2pose/pix2pose/test.py b/examples/pix2pose/pix2pose/test.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py
new file mode 100644
index 000000000..1a8a9b8c3
--- /dev/null
+++ b/examples/pix2pose/utils.py
@@ -0,0 +1,58 @@
+import tensorflow as tf
+
+
+def loss_color_wrapped(rotation_matrices):
+    def loss_color_unwrapped(color_image, predicted_color_image):
+        min_loss = tf.float32.max
+
+        # Bring the image in the range between 0 and 1
+        color_image = (color_image + 1) * 0.5
+
+        # Calculate masks for the object and the background (they are independent of the rotation)
+        mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1),
+                                repeats=3, axis=-1)
+        mask_background = tf.ones(tf.shape(mask_object)) - mask_object
+
+        # Bring the image again in the range between -1 and 1
+        color_image = (color_image * 2) - 1
+
+        # Iterate over all possible rotations
+        for rotation_matrix in rotation_matrices:
+
+            real_color_image = tf.identity(color_image)
+
+            # Add a small epsilon value to avoid the discontinuity problem
+            real_color_image = real_color_image + tf.ones_like(real_color_image) * 0.0001
+
+            # Rotate the object
+            real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image)
+            #real_color_image = tf.where(tf.math.less(real_color_image, 0), tf.ones_like(real_color_image) + real_color_image, real_color_image)
+
+            # Set the background to be all -1
+            real_color_image *= mask_object
+            real_color_image += (mask_background*tf.constant(-1.))
+
+            # Get the number of pixels
+            num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3])
+            beta = 3
+
+            # Calculate the difference between the real and predicted images including the mask
+            diff_object = tf.math.abs(predicted_color_image*mask_object - real_color_image*mask_object)
+            diff_background = tf.math.abs(predicted_color_image*mask_background - real_color_image*mask_background)
+
+            # Calculate the total loss
+            loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3]))
+            min_loss = tf.math.minimum(loss_colors, min_loss)
+        return min_loss
+
+    return loss_color_unwrapped
+
+
+def loss_error(real_error_image, predicted_error_image):
+    # Get the number of pixels
+    num_pixels = tf.math.reduce_prod(tf.shape(real_error_image)[1:3])
+    loss_error = tf.cast((1/num_pixels), dtype=tf.float32)*(tf.math.reduce_sum(tf.math.square(predicted_error_image - tf.clip_by_value(tf.math.abs(real_error_image), tf.float32.min, 1.)), axis=[1, 2, 3]))
+
+    return loss_error
+
+

From 45da37fcba9a9f5c7a85291dc564258d347c9504 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 15 Oct 2021 16:48:09 +0200
Subject: [PATCH 005/101] Refactor domain randomization processor

---
 examples/pix2pose/__init__.py                 |  0
 .../{pipelines.py => old_pipelines.py}        | 36 ++---------
 examples/pix2pose/pipeline.py                 | 18 ++++++
 examples/pix2pose/pix2pose/generator.py       | 10 +--
 examples/pix2pose/processors.py               | 21 +++++++
 examples/pix2pose/test.py                     | 63 +++++++++++++++++++
 examples/pix2pose/utils.py                    | 35 ++++++-----
 7 files changed, 133 insertions(+), 50 deletions(-)
 create mode 100644 examples/pix2pose/__init__.py
 rename examples/pix2pose/{pipelines.py => old_pipelines.py} (89%)
 create mode 100644 examples/pix2pose/pipeline.py
 create mode 100644 examples/pix2pose/processors.py
 create mode 100644 examples/pix2pose/test.py

diff --git a/examples/pix2pose/__init__.py b/examples/pix2pose/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/old_pipelines.py
similarity index 89%
rename from examples/pix2pose/pipelines.py
rename to examples/pix2pose/old_pipelines.py
index ee4299670..f484c6a44 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/old_pipelines.py
@@ -11,8 +11,7 @@
 
 
 class GeneratedImageProcessor(Processor):
-    """
-    Loads pre-generated images
+    """Loads pre-generated images
     """
     def __init__(self, path_images, background_images_paths, num_occlusions=1, split=pr.TRAIN, no_ambiguities=False):
         super(GeneratedImageProcessor, self).__init__()
@@ -38,18 +37,17 @@ def __init__(self, path_images, background_images_paths, num_occlusions=1, split
         self.alpha_original = [np.load(os.path.join(path_images, "alpha_original/alpha_original_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)]
 
 
-    def call(self):
-        index = random.randint(0, self.num_images-1)
-        image_original = self.images_original[index]
-        image_colors = self.images_colors[index]
-        alpha_original = self.alpha_original[index]
+    def call(self, input_image, label_image):
+        # index = random.randint(0, self.num_images-1)
+        # image_original = self.images_original[index]
+        # image_colors = self.images_colors[index]
+        # alpha_original = self.alpha_original[index]
 
         if self.split == pr.TRAIN:
             image_original = self.augment(image_original, alpha_original)
 
         image_original = self.preprocess_input(image_original)
         image_colors = self.preprocess_output(image_colors)
-
         return image_original, image_colors
 
 
@@ -150,25 +148,3 @@ def process_batch(self, inputs, labels, batch_index):
             self._place_sample(sample['labels'], sample_arg, labels)
 
         return inputs, labels
-
-
-class NormalizeImageTanh(Processor):
-    """
-    Normalize image so that the values are between -1 and 1
-    """
-    def __init__(self):
-        super(NormalizeImageTanh, self).__init__()
-
-    def call(self, image):
-        return (image/127.5)-1
-
-
-class DenormalizeImageTanh(Processor):
-    """
-    Transforms an image from the value range -1 to 1 back to 0 to 255
-    """
-    def __init__(self):
-        super(DenormalizeImageTanh, self).__init__()
-
-    def call(self, image):
-        return (image + 1.0)*127.5
diff --git a/examples/pix2pose/pipeline.py b/examples/pix2pose/pipeline.py
new file mode 100644
index 000000000..4e3ffdcf6
--- /dev/null
+++ b/examples/pix2pose/pipeline.py
@@ -0,0 +1,18 @@
+from paz.abstract import SequentialProcessor
+from paz.pipelines import RandomizeRenderedImage as RandomizeRender
+from paz import processors as pr
+from .processors import ImageToClosedOneBall
+
+
+class AutoEncoderDomainRandomization(SequentialProcessor):
+    """Performs domain randomization on a rendered image
+    """
+    def __init__(self, image_shape, image_paths, num_occlusions=1):
+        super(AutoEncoderDomainRandomization, self).__init__()
+        self.add(pr.Render())
+        self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0}))
+        self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0]))
+        self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
+        self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1]))
+        self.add(pr.SequenceWrapper({0: {'input_image': image_shape}},
+                                    {1: {'label_image': image_shape}}))
diff --git a/examples/pix2pose/pix2pose/generator.py b/examples/pix2pose/pix2pose/generator.py
index 5270bcf58..720586a33 100644
--- a/examples/pix2pose/pix2pose/generator.py
+++ b/examples/pix2pose/pix2pose/generator.py
@@ -60,11 +60,11 @@ def Generator(input_shape=(128, 128, 3), latent_dimension=256,
     x = Dense(8 * 8 * latent_dimension)(x)
     x = Reshape((8, 8, latent_dimension))(x)
     x = decoder(x, skip_connections)
-    RGB_mask = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x)
-    RGB_mask = Activation('tanh', name='RGB_mask')(RGB_mask)
-    error_mask = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x)
-    error_mask = Activation('sigmoid', name='error_mask')(error_mask)
-    model = Model([input_image], [RGB_mask, error_mask], name=name)
+    label_image = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x)
+    label_image = Activation('tanh', name='label_image')(label_image)
+    error_image = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x)
+    error_image = Activation('sigmoid', name='error_image')(error_image)
+    model = Model([input_image], [label_image, error_image], name=name)
     return model
 
 
diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
new file mode 100644
index 000000000..550229cf1
--- /dev/null
+++ b/examples/pix2pose/processors.py
@@ -0,0 +1,21 @@
+from paz.abstract import Processor
+
+
+class ImageToClosedOneBall(Processor):
+    """Map image value from [0, 255] -> [-1, 1].
+    """
+    def __init__(self):
+        super(ImageToClosedOneBall, self).__init__()
+
+    def call(self, image):
+        return (image / 127.5) - 1
+
+
+class ClosedOneBallToImage(Processor):
+    """Map normalized value from [-1, 1] -> [0, 255].
+    """
+    def __init__(self):
+        super(ClosedOneBallToImage, self).__init__()
+
+    def call(self, image):
+        return (image + 1.0) * 127.5
diff --git a/examples/pix2pose/test.py b/examples/pix2pose/test.py
new file mode 100644
index 000000000..d23077c90
--- /dev/null
+++ b/examples/pix2pose/test.py
@@ -0,0 +1,63 @@
+from paz.abstract import SequentialProcessor, Processor
+from paz import processors as pr
+import numpy as np
+
+
+class PipelineWithTwoChannels(SequentialProcessor):
+    def __init__(self):
+        super(PipelineWithTwoChannels, self).__init__()
+        self.add(lambda x: x)
+        self.add(pr.ControlMap(pr.Copy(), [0], [1], keep={0: 0}))
+
+
+class PipelineWithThreeChannels(SequentialProcessor):
+    def __init__(self):
+        super(PipelineWithThreeChannels, self).__init__()
+        self.add(lambda a, b: (a, b))
+        self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0}))
+
+
+class PipelineWithThreeChannelsPlus(SequentialProcessor):
+    def __init__(self):
+        super(PipelineWithThreeChannelsPlus, self).__init__()
+        self.add(lambda a, b: (a, b))
+        self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0}))
+        self.add(pr.ControlMap(SumTwoValues(), [0, 1], [0]))
+
+
+class SumTwoValues(Processor):
+    def __init__(self):
+        super(SumTwoValues, self).__init__()
+
+    def call(self, A, B):
+        return A + B
+
+
+def test_copy_with_controlmap_using_2_channels():
+    pipeline = PipelineWithTwoChannels()
+    random_values = np.random.random((128, 128))
+    values = pipeline(random_values)
+    assert len(values) == 2
+    assert np.allclose(values[0], random_values)
+    assert np.allclose(values[1], random_values)
+
+
+def test_copy_with_controlmap_using_3_channels():
+    pipeline = PipelineWithThreeChannels()
+    A_random_values = np.random.random((128, 128))
+    B_random_values = np.random.random((128, 128))
+    values = pipeline(A_random_values, B_random_values)
+    assert len(values) == 3
+    assert np.allclose(values[0], A_random_values)
+    assert np.allclose(values[1], B_random_values)
+    assert np.allclose(values[2], A_random_values)
+
+
+def test_copy_with_controlmap_using_3_channels_plus():
+    pipeline = PipelineWithThreeChannelsPlus()
+    A_random_values = np.random.random((128, 128))
+    B_random_values = np.random.random((128, 128))
+    values = pipeline(A_random_values, B_random_values)
+    assert len(values) == 2
+    assert np.allclose(values[0], A_random_values + B_random_values)
+    assert np.allclose(values[1], A_random_values)
diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py
index 1a8a9b8c3..67f73e312 100644
--- a/examples/pix2pose/utils.py
+++ b/examples/pix2pose/utils.py
@@ -1,8 +1,25 @@
 import tensorflow as tf
+from tensorflow.keras.losses import Loss
 
 
-def loss_color_wrapped(rotation_matrices):
-    def loss_color_unwrapped(color_image, predicted_color_image):
+class Pix2PoseLoss(Loss):
+    def __init__(self):
+        super(Pix2PoseLoss, self).__init__()
+
+    def call(self, y_true, y_pred):
+        y_true = tf.clip_by_value(tf.math.abs(y_true), tf.float32.min, 1.0)
+        squared_error = tf.square(y_pred - y_true)
+        squared_error = tf.reduce_sum(squared_error, axis=3)
+        squared_error = tf.reduce_mean(squared_error, axis=[1, 2])
+        return squared_error
+
+
+class Pix2PoseColor(Loss):
+    def __init__(self, rotation_matrices):
+        super(Pix2PoseColor, self).__init__()
+        self.rotation_matrices = rotation_matrices
+
+    def call(self, color_image, predicted_color_image):
         min_loss = tf.float32.max
 
         # Bring the image in the range between 0 and 1
@@ -17,7 +34,7 @@ def loss_color_unwrapped(color_image, predicted_color_image):
         color_image = (color_image * 2) - 1
 
         # Iterate over all possible rotations
-        for rotation_matrix in rotation_matrices:
+        for rotation_matrix in self.rotation_matrices:
 
             real_color_image = tf.identity(color_image)
 
@@ -44,15 +61,3 @@ def loss_color_unwrapped(color_image, predicted_color_image):
             loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3]))
             min_loss = tf.math.minimum(loss_colors, min_loss)
         return min_loss
-
-    return loss_color_unwrapped
-
-
-def loss_error(real_error_image, predicted_error_image):
-    # Get the number of pixels
-    num_pixels = tf.math.reduce_prod(tf.shape(real_error_image)[1:3])
-    loss_error = tf.cast((1/num_pixels), dtype=tf.float32)*(tf.math.reduce_sum(tf.math.square(predicted_error_image - tf.clip_by_value(tf.math.abs(real_error_image), tf.float32.min, 1.)), axis=[1, 2, 3]))
-
-    return loss_error
-
-

From 42e85dc424d1bf4e010746b29cb7b0ccc4d74414 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Sat, 16 Oct 2021 17:17:28 +0200
Subject: [PATCH 006/101] Ignore .iml file in complete repository

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 6d12a3065..607a7ce7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@ checkpoint
 *.npy
 *.p
 *.zip
+*.iml
 
 !.github/manifest.xml
 

From ac2230759d8a61148a2f11478699d5ec03d07392 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Sat, 16 Oct 2021 17:17:57 +0200
Subject: [PATCH 007/101] Add quaternion backend and basic coloring scheme

---
 examples/pix2pose/backend.py  | 76 +++++++++++++++++++++++++++++++++++
 examples/pix2pose/coloring.py | 49 ++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 examples/pix2pose/backend.py
 create mode 100644 examples/pix2pose/coloring.py

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
new file mode 100644
index 000000000..3c2e35454
--- /dev/null
+++ b/examples/pix2pose/backend.py
@@ -0,0 +1,76 @@
+import numpy as np
+
+
+def homogenous_quaternion_to_rotation_matrix(quaternion):
+    # w0, q1, q2, q3 = quaternion
+    q1, q2, q3, w0 = quaternion
+
+    r11 = w0**2 + q1**2 - q2**2 - q3**2
+    r12 = 2 * ((q1 * q2) - (w0 * q3))
+    r13 = 2 * ((w0 * q2) + (q1 * q3))
+
+    r21 = 2 * ((w0 * q3) + (q1 * q2))
+    r22 = w0**2 - q1**2 + q2**2 - q3**2
+    r23 = 2 * ((q2 * q3) - (w0 * q1))
+
+    r31 = 2 * ((q1 * q3) - (w0 * q2))
+    r32 = 2 * ((w0 * q1) + (q2 * q3))
+    r33 = w0**2 - q1**2 - q2**2 + q3**2
+
+    rotation_matrix = np.array([[r11, r12, r13],
+                                [r21, r22, r23],
+                                [r31, r32, r33]])
+    return rotation_matrix
+
+
+def inhomogenous_quaternion_to_rotation_matrix(q):
+    """Transforms quaternion into a rotation matrix
+    # Arguments
+        q: quarternion, Numpy array of shape ``[4]``
+    # Returns
+        Numpy array representing a rotation vector having a shape ``[3]``.
+    """
+    # quaternion
+    # q = q[::-1]
+    r11 = 1 - (2 * (q[1]**2 + q[2]**2))
+    r12 = 2 * (q[0] * q[1] - q[3] * q[2])
+    r13 = 2 * (q[3] * q[1] + q[0] * q[2])
+
+    r21 = 2 * (q[0] * q[1] + q[3] * q[2])
+    r22 = 1 - (2 * (q[0]**2 + q[2]**2))
+    r23 = 2 * (q[1] * q[2] - q[3] * q[0])
+
+    r31 = 2 * (q[0] * q[2] - q[3] * q[1])
+    r32 = 2 * (q[3] * q[0] + q[1] * q[2])
+    r33 = 1 - (2 * (q[0]**2 + q[1]**2))
+
+    rotation_matrix = np.array([[r11, r12, r13],
+                                [r21, r22, r23],
+                                [r31, r32, r33]])
+
+    return rotation_matrix
+    # return np.squeeze(rotation_matrix)
+
+
+def multiply_quaternions(quaternion_0, quaternion_1):
+    """Multiplies two quaternions.
+
+    # Reference:
+        Code extracted from [here](https://stackoverflow.com/questions/
+            39000758/how-to-multiply-two-quaternions-by-python-or-numpy)
+    """
+    x0, y0, z0, w0 = quaternion_0
+    x1, y1, z1, w1 = quaternion_1
+    x2 = +(x1 * w0) + (y1 * z0) - (z1 * y0) + (w1 * x0)
+    y2 = -(x1 * z0) + (y1 * w0) + (z1 * x0) + (w1 * y0)
+    z2 = +(x1 * y0) - (y1 * x0) + (z1 * w0) + (w1 * z0)
+    w2 = -(x1 * x0) - (y1 * y0) - (z1 * z0) + (w1 * w0)
+    return np.array([x2, y2, z2, w2])
+
+
+# quaternion = (1 / np.sqrt(30)) * np.array([1, 2, 3, 4])
+# theta = np.deg2rad(0)
+# quaternion = np.array([1, 0, 0, 0])
+# a = homogenous_quaternion_to_rotation_matrix(quaternion)
+# quaternion = (1 / np.sqrt(30)) * np.array([2, 3, 4, 1])
+# b = inhomogenous_quaternion_to_rotation_matrix(quaternion)
diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py
new file mode 100644
index 000000000..63d19c93f
--- /dev/null
+++ b/examples/pix2pose/coloring.py
@@ -0,0 +1,49 @@
+import os
+import numpy as np
+import trimesh
+from pyrender import Mesh, Scene, Viewer
+from pyrender.constants import RenderFlags
+
+
+def normalize_min_max(x, x_min, x_max):
+    return (x - x_min) / (x_max - x_min)
+
+
+def load_obj(path):
+    mesh = trimesh.load(path)
+    return mesh
+
+
+def extract_corners3D(vertices):
+    point3D_min = np.min(vertices, axis=0)
+    point3D_max = np.max(vertices, axis=0)
+    return point3D_min, point3D_max
+
+
+def compute_vertices_colors(vertices):
+    corner3D_min, corner3D_max = extract_corners3D(vertices)
+    normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max)
+    colors = (255 * normalized_colors).astype('uint8')
+    return colors
+
+
+def color_object(path):
+    mesh = load_obj(path)
+    colors = compute_vertices_colors(mesh.vertices)
+    mesh.visual = mesh.visual.to_color()
+    mesh.visual.vertex_colors = colors
+    return mesh
+
+
+if __name__ == "__main__":
+    scene = Scene(bg_color=[0, 0, 0])
+    root = os.path.expanduser('~')
+    mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
+    path = os.path.join(root, mesh_path)
+    mesh = color_object(path)
+    mesh = Mesh.from_trimesh(mesh, smooth=False)
+    mesh.primitives[0].material.metallicFactor = 0.0
+    mesh.primitives[0].material.roughnessFactor = 1.0
+    mesh.primitives[0].material.alphaMode = 'OPAQUE'
+    scene.add(mesh)
+    Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT)

From d49e9f94f2ca1bbd808fce2b6d6f741da57c6fba Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 18 Oct 2021 13:19:35 +0200
Subject: [PATCH 008/101] Add scene for rendering pixel and normal image

---
 examples/pix2pose/coloring.py                 |   8 +-
 examples/pix2pose/old_train.py                | 137 ++++++++++++++
 .../pix2pose/{pipeline.py => pipelines.py}    |  14 +-
 examples/pix2pose/scenes.py                   |  67 +++++++
 examples/pix2pose/train.py                    | 173 ++++--------------
 5 files changed, 253 insertions(+), 146 deletions(-)
 create mode 100644 examples/pix2pose/old_train.py
 rename examples/pix2pose/{pipeline.py => pipelines.py} (55%)
 create mode 100644 examples/pix2pose/scenes.py

diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py
index 63d19c93f..29259800d 100644
--- a/examples/pix2pose/coloring.py
+++ b/examples/pix2pose/coloring.py
@@ -32,6 +32,10 @@ def color_object(path):
     colors = compute_vertices_colors(mesh.vertices)
     mesh.visual = mesh.visual.to_color()
     mesh.visual.vertex_colors = colors
+    mesh = Mesh.from_trimesh(mesh, smooth=False)
+    mesh.primitives[0].material.metallicFactor = 0.0
+    mesh.primitives[0].material.roughnessFactor = 1.0
+    mesh.primitives[0].material.alphaMode = 'OPAQUE'
     return mesh
 
 
@@ -41,9 +45,5 @@ def color_object(path):
     mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
     path = os.path.join(root, mesh_path)
     mesh = color_object(path)
-    mesh = Mesh.from_trimesh(mesh, smooth=False)
-    mesh.primitives[0].material.metallicFactor = 0.0
-    mesh.primitives[0].material.roughnessFactor = 1.0
-    mesh.primitives[0].material.alphaMode = 'OPAQUE'
     scene.add(mesh)
     Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT)
diff --git a/examples/pix2pose/old_train.py b/examples/pix2pose/old_train.py
new file mode 100644
index 000000000..c7adce3f1
--- /dev/null
+++ b/examples/pix2pose/old_train.py
@@ -0,0 +1,137 @@
+import os
+import glob
+import argparse
+import numpy as np
+import time
+
+from tensorflow.keras.callbacks import CSVLogger
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.layers import Input
+from tensorflow.keras.models import Model
+
+from paz.abstract import GeneratingSequence
+from paz.abstract.sequence import GeneratingSequence
+
+from pipelines import GeneratingSequencePix2Pose, GeneratedImageGenerator, make_batch_discriminator
+from model import Generator, Discriminator, loss_color_wrapped, loss_error
+
+
+description = 'Training script Pix2Pose model'
+root_path = os.path.join(os.path.expanduser('~'), '.keras/')
+parser = argparse.ArgumentParser(description=description)
+parser.add_argument('-cl', '--class_name', default='tless05', type=str,
+                    help='Class name to be added to model save path')
+parser.add_argument('-id', '--background_images_directory', type=str,
+                    help='Path to directory containing background images')
+parser.add_argument('-pi', '--images_directory', type=str,
+                    help='Path to pre-generated images (npy format)')
+parser.add_argument('-bs', '--batch_size', default=4, type=int,
+                    help='Batch size for training')
+parser.add_argument('-lr', '--learning_rate', default=0.001, type=float,
+                    help='Initial learning rate for Adam')
+parser.add_argument('-ld', '--image_size', default=128, type=int,
+                    help='Size of the side of a square image e.g. 64')
+parser.add_argument('-e', '--max_num_epochs', default=10000, type=int,
+                    help='Maximum number of epochs before finishing')
+parser.add_argument('-st', '--steps_per_epoch', default=5, type=int,
+                    help='Steps per epoch')
+parser.add_argument('-oc', '--num_occlusions', default=2, type=int,
+                    help='Number of occlusions')
+parser.add_argument('-sa', '--save_path',
+                    default=os.path.join(
+                        os.path.expanduser('~'), '.keras/paz/models'),
+                    type=str, help='Path for writing model weights and logs')
+parser.add_argument('-rm', '--rotation_matrices',
+                    type=str, help='Path to npy file with a list of rotation matrices', required=True)
+parser.add_argument('-de', '--description',
+                    type=str, help='Description of the model')
+args = parser.parse_args()
+
+# Building the whole GAN model
+dcgan_input = Input(shape=(128, 128, 3))
+discriminator = Discriminator()
+generator = Generator()
+color_output, error_output = generator(dcgan_input)
+discriminator.trainable = False
+discriminator_output = discriminator(color_output)
+dcgan = Model(inputs=[dcgan_input], outputs={"color_output": color_output, "error_output": error_output, "discriminator_output": discriminator_output})
+
+# For the loss function pix2pose needs to know all the rotations under which the pose looks the same
+rotation_matrices = np.load(args.rotation_matrices)
+loss_color = loss_color_wrapped(rotation_matrices)
+
+# Set the loss
+optimizer = Adam(args.learning_rate, amsgrad=True)
+losses = {"color_output": loss_color,
+          "error_output": loss_error,
+          "discriminator_output": "binary_crossentropy"}
+lossWeights = {"color_output": 100.0, "error_output": 50.0, "discriminator_output": 1.0}
+dcgan.compile(optimizer=optimizer, loss=losses, loss_weights=lossWeights, run_eagerly=True)
+
+discriminator.trainable = True
+discriminator.compile(loss=['binary_crossentropy'], optimizer=optimizer)
+
+# Creating sequencer
+background_image_paths = glob.glob(os.path.join(args.background_images_directory, '*.jpg'))
+processor_train = GeneratedImageGenerator(os.path.join(args.images_directory, "train"), args.image_size, background_image_paths, num_occlusions=0)
+processor_test = GeneratedImageGenerator(os.path.join(args.images_directory, "test"), args.image_size, background_image_paths, num_occlusions=0)
+sequence_train = GeneratingSequencePix2Pose(processor_train, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices)
+sequence_test = GeneratingSequencePix2Pose(processor_test, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices)
+
+# Making directory for saving model weights and logs
+model_name = '_'.join([dcgan.name, args.class_name])
+save_path = os.path.join(args.save_path, model_name)
+if not os.path.exists(save_path):
+    os.makedirs(save_path)
+
+# Setting callbacks
+log = CSVLogger(os.path.join(save_path, '%s.log' % model_name))
+log.model = dcgan
+
+callbacks=[log]
+
+for callback in callbacks:
+    callback.on_train_begin()
+
+for num_epoch in range(args.max_num_epochs):
+    sequence_iterator_train = sequence_train.__iter__()
+    sequence_iterator_test = sequence_test.__iter__()
+
+    for callback in callbacks:
+        callback.on_epoch_begin(num_epoch)
+
+    for num_batch in range(args.steps_per_epoch):
+        # Train the discriminator
+        discriminator.trainable = True
+        batch = next(sequence_iterator_train)
+
+        X_discriminator_real, y_discriminator_real = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 1)
+        loss_discriminator_real = discriminator.train_on_batch(X_discriminator_real, y_discriminator_real)
+
+        X_discriminator_fake, y_discriminator_fake = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 0)
+        loss_discriminator_fake = discriminator.train_on_batch(X_discriminator_fake, y_discriminator_fake)
+
+        loss_discriminator = (loss_discriminator_real + loss_discriminator_fake)/2.
+
+        # Train the generator
+        discriminator.trainable = False
+        loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], {"color_output": batch[1]['color_output'], "error_output": batch[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))})
+
+        # Test the network
+        batch_test = next(sequence_iterator_test)
+        loss_dcgan_test, loss_color_output_test, loss_dcgan_discriminator_test, loss_error_output_test = dcgan.test_on_batch(batch_test[0]['input_image'], {"color_output": batch_test[1]['color_output'], "error_output": batch_test[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))})
+
+        print("Loss DCGAN: {}".format(loss_dcgan))
+    for callback in callbacks:
+        callback.on_epoch_end(num_epoch, logs={'loss_discriminator': loss_discriminator,
+                                               'loss_dcgan': loss_dcgan, 'loss_color_output': loss_color_output,
+                                               'loss_dcgan_discriminator': loss_dcgan_discriminator,
+                                               'loss_error_output': loss_error_output,
+                                               'loss_dcgan_test': loss_dcgan_test, 'loss_color_output_test': loss_color_output_test,
+                                               'loss_dcgan_discriminator_test': loss_dcgan_discriminator_test,
+                                               'loss_error_output_test': loss_error_output_test
+                                               })
+
+
+for callback in callbacks:
+    callback.on_train_end()
\ No newline at end of file
diff --git a/examples/pix2pose/pipeline.py b/examples/pix2pose/pipelines.py
similarity index 55%
rename from examples/pix2pose/pipeline.py
rename to examples/pix2pose/pipelines.py
index 4e3ffdcf6..f345a9014 100644
--- a/examples/pix2pose/pipeline.py
+++ b/examples/pix2pose/pipelines.py
@@ -1,18 +1,18 @@
 from paz.abstract import SequentialProcessor
 from paz.pipelines import RandomizeRenderedImage as RandomizeRender
 from paz import processors as pr
-from .processors import ImageToClosedOneBall
+# from processors import ImageToClosedOneBall
 
 
-class AutoEncoderDomainRandomization(SequentialProcessor):
+class DomainRandomization(SequentialProcessor):
     """Performs domain randomization on a rendered image
     """
-    def __init__(self, image_shape, image_paths, num_occlusions=1):
-        super(AutoEncoderDomainRandomization, self).__init__()
-        self.add(pr.Render())
-        self.add(pr.ControlMap(pr.Copy(), [0], [2], keep={0: 0}))
+    def __init__(self, renderer, image_shape, image_paths, num_occlusions=1):
+        super(DomainRandomization, self).__init__()
+        self.add(pr.Render(renderer))
         self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0]))
         self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
-        self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1]))
+        # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1]))
+        self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1]))
         self.add(pr.SequenceWrapper({0: {'input_image': image_shape}},
                                     {1: {'label_image': image_shape}}))
diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
new file mode 100644
index 000000000..6ce85ab16
--- /dev/null
+++ b/examples/pix2pose/scenes.py
@@ -0,0 +1,67 @@
+import numpy as np
+from paz.backend.render import (sample_uniformly, split_alpha_channel,
+                                random_perturbation, sample_point_in_sphere,
+                                compute_modelview_matrices)
+from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight,
+                      RenderFlags, Mesh, Scene)
+import trimesh
+from coloring import color_object
+
+
+class PixelMask():
+    """Render-ready scene composed of a single object and a single moving camera.
+
+    # Arguments
+        path_OBJ: String containing the path to an OBJ file.
+        viewport_size: List, specifying [H, W] of rendered image.
+        y_fov: Float indicating the vertical field of view in radians.
+        distance: List of floats indicating [max_distance, min_distance]
+        light: List of floats indicating [max_light, min_light]
+        top_only: Boolean. If True images are only take from the top.
+        roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis.
+        shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes.
+    """
+    def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0,
+                 distance=[0.3, 0.5], light=[0.5, 30], top_only=False,
+                 roll=None, shift=None):
+        self.distance, self.roll, self.shift = distance, roll, shift
+        self.light_intensity, self.top_only = light, top_only
+        self._build_scene(path_OBJ, viewport_size, light, y_fov)
+        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
+        self.flags_RGBA = RenderFlags.RGBA
+        self.flags_FLAT = RenderFlags.FLAT
+        self.epsilon = 0.01
+
+    def _build_scene(self, path, size, light, y_fov):
+        self.scene = Scene(bg_color=[0, 0, 0, 0])
+        self.light = self.scene.add(
+            DirectionalLight([1.0, 1.0, 1.0], np.mean(light)))
+        self.camera = self.scene.add(
+            PerspectiveCamera(y_fov, aspectRatio=np.divide(*size)))
+        self.pixel_mesh = self.scene.add(color_object(path))
+        self.mesh = self.scene.add(
+            Mesh.from_trimesh(trimesh.load(path), smooth=True))
+        self.world_origin = self.mesh.mesh.centroid
+
+    def _sample_parameters(self):
+        distance = sample_uniformly(self.distance)
+        camera_origin = sample_point_in_sphere(distance, self.top_only)
+        camera_origin = random_perturbation(camera_origin, self.epsilon)
+        light_intensity = sample_uniformly(self.light_intensity)
+        return camera_origin, light_intensity
+
+    def render(self):
+        camera_origin, intensity = self._sample_parameters()
+        camera_to_world, world_to_camera = compute_modelview_matrices(
+            camera_origin, self.world_origin, self.roll, self.shift)
+        self.light.light.intensity = intensity
+        self.scene.set_pose(self.camera, camera_to_world)
+        self.scene.set_pose(self.light, camera_to_world)
+        self.pixel_mesh.mesh.is_visible = False
+        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
+        self.pixel_mesh.mesh.is_visible = True
+        image, alpha = split_alpha_channel(image)
+        self.mesh.mesh.is_visible = False
+        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
+        self.mesh.mesh.is_visible = True
+        return image, alpha, RGB_mask
diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index c7adce3f1..b19f9d4bc 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -1,137 +1,40 @@
 import os
 import glob
-import argparse
-import numpy as np
-import time
-
-from tensorflow.keras.callbacks import CSVLogger
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.layers import Input
-from tensorflow.keras.models import Model
-
-from paz.abstract import GeneratingSequence
-from paz.abstract.sequence import GeneratingSequence
-
-from pipelines import GeneratingSequencePix2Pose, GeneratedImageGenerator, make_batch_discriminator
-from model import Generator, Discriminator, loss_color_wrapped, loss_error
-
-
-description = 'Training script Pix2Pose model'
-root_path = os.path.join(os.path.expanduser('~'), '.keras/')
-parser = argparse.ArgumentParser(description=description)
-parser.add_argument('-cl', '--class_name', default='tless05', type=str,
-                    help='Class name to be added to model save path')
-parser.add_argument('-id', '--background_images_directory', type=str,
-                    help='Path to directory containing background images')
-parser.add_argument('-pi', '--images_directory', type=str,
-                    help='Path to pre-generated images (npy format)')
-parser.add_argument('-bs', '--batch_size', default=4, type=int,
-                    help='Batch size for training')
-parser.add_argument('-lr', '--learning_rate', default=0.001, type=float,
-                    help='Initial learning rate for Adam')
-parser.add_argument('-ld', '--image_size', default=128, type=int,
-                    help='Size of the side of a square image e.g. 64')
-parser.add_argument('-e', '--max_num_epochs', default=10000, type=int,
-                    help='Maximum number of epochs before finishing')
-parser.add_argument('-st', '--steps_per_epoch', default=5, type=int,
-                    help='Steps per epoch')
-parser.add_argument('-oc', '--num_occlusions', default=2, type=int,
-                    help='Number of occlusions')
-parser.add_argument('-sa', '--save_path',
-                    default=os.path.join(
-                        os.path.expanduser('~'), '.keras/paz/models'),
-                    type=str, help='Path for writing model weights and logs')
-parser.add_argument('-rm', '--rotation_matrices',
-                    type=str, help='Path to npy file with a list of rotation matrices', required=True)
-parser.add_argument('-de', '--description',
-                    type=str, help='Description of the model')
-args = parser.parse_args()
-
-# Building the whole GAN model
-dcgan_input = Input(shape=(128, 128, 3))
-discriminator = Discriminator()
-generator = Generator()
-color_output, error_output = generator(dcgan_input)
-discriminator.trainable = False
-discriminator_output = discriminator(color_output)
-dcgan = Model(inputs=[dcgan_input], outputs={"color_output": color_output, "error_output": error_output, "discriminator_output": discriminator_output})
-
-# For the loss function pix2pose needs to know all the rotations under which the pose looks the same
-rotation_matrices = np.load(args.rotation_matrices)
-loss_color = loss_color_wrapped(rotation_matrices)
-
-# Set the loss
-optimizer = Adam(args.learning_rate, amsgrad=True)
-losses = {"color_output": loss_color,
-          "error_output": loss_error,
-          "discriminator_output": "binary_crossentropy"}
-lossWeights = {"color_output": 100.0, "error_output": 50.0, "discriminator_output": 1.0}
-dcgan.compile(optimizer=optimizer, loss=losses, loss_weights=lossWeights, run_eagerly=True)
-
-discriminator.trainable = True
-discriminator.compile(loss=['binary_crossentropy'], optimizer=optimizer)
-
-# Creating sequencer
-background_image_paths = glob.glob(os.path.join(args.background_images_directory, '*.jpg'))
-processor_train = GeneratedImageGenerator(os.path.join(args.images_directory, "train"), args.image_size, background_image_paths, num_occlusions=0)
-processor_test = GeneratedImageGenerator(os.path.join(args.images_directory, "test"), args.image_size, background_image_paths, num_occlusions=0)
-sequence_train = GeneratingSequencePix2Pose(processor_train, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices)
-sequence_test = GeneratingSequencePix2Pose(processor_test, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices)
-
-# Making directory for saving model weights and logs
-model_name = '_'.join([dcgan.name, args.class_name])
-save_path = os.path.join(args.save_path, model_name)
-if not os.path.exists(save_path):
-    os.makedirs(save_path)
-
-# Setting callbacks
-log = CSVLogger(os.path.join(save_path, '%s.log' % model_name))
-log.model = dcgan
-
-callbacks=[log]
-
-for callback in callbacks:
-    callback.on_train_begin()
-
-for num_epoch in range(args.max_num_epochs):
-    sequence_iterator_train = sequence_train.__iter__()
-    sequence_iterator_test = sequence_test.__iter__()
-
-    for callback in callbacks:
-        callback.on_epoch_begin(num_epoch)
-
-    for num_batch in range(args.steps_per_epoch):
-        # Train the discriminator
-        discriminator.trainable = True
-        batch = next(sequence_iterator_train)
-
-        X_discriminator_real, y_discriminator_real = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 1)
-        loss_discriminator_real = discriminator.train_on_batch(X_discriminator_real, y_discriminator_real)
-
-        X_discriminator_fake, y_discriminator_fake = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 0)
-        loss_discriminator_fake = discriminator.train_on_batch(X_discriminator_fake, y_discriminator_fake)
-
-        loss_discriminator = (loss_discriminator_real + loss_discriminator_fake)/2.
-
-        # Train the generator
-        discriminator.trainable = False
-        loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], {"color_output": batch[1]['color_output'], "error_output": batch[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))})
-
-        # Test the network
-        batch_test = next(sequence_iterator_test)
-        loss_dcgan_test, loss_color_output_test, loss_dcgan_discriminator_test, loss_error_output_test = dcgan.test_on_batch(batch_test[0]['input_image'], {"color_output": batch_test[1]['color_output'], "error_output": batch_test[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))})
-
-        print("Loss DCGAN: {}".format(loss_dcgan))
-    for callback in callbacks:
-        callback.on_epoch_end(num_epoch, logs={'loss_discriminator': loss_discriminator,
-                                               'loss_dcgan': loss_dcgan, 'loss_color_output': loss_color_output,
-                                               'loss_dcgan_discriminator': loss_dcgan_discriminator,
-                                               'loss_error_output': loss_error_output,
-                                               'loss_dcgan_test': loss_dcgan_test, 'loss_color_output_test': loss_color_output_test,
-                                               'loss_dcgan_discriminator_test': loss_dcgan_discriminator_test,
-                                               'loss_error_output_test': loss_error_output_test
-                                               })
-
-
-for callback in callbacks:
-    callback.on_train_end()
\ No newline at end of file
+from scenes import PixelMask
+from pipelines import DomainRandomization
+from paz.backend.image import show_image
+
+
+image_shape = [128, 128, 3]
+root_path = os.path.expanduser('~')
+background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
+background_wildcard = os.path.join(root_path, background_wildcard)
+image_paths = glob.glob(background_wildcard)
+path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
+path_OBJ = os.path.join(root_path, path_OBJ)
+num_occlusions = 1
+viewport_size = image_shape[:2]
+y_fov = 3.14159 / 4.0
+distance = [0.3, 0.5]
+light = [1.0, 30]
+top_only = False
+roll = 3.14159
+shift = 0.05
+
+
+renderer = PixelMask(path_OBJ, viewport_size, y_fov, distance,
+                     light, top_only, roll, shift)
+
+# for _ in range(100):
+image, alpha, RGB_mask = renderer.render()
+show_image(image)
+show_image(RGB_mask)
+
+processor = DomainRandomization(renderer, image_shape,
+                                image_paths, num_occlusions)
+
+for _ in range(100):
+    sample = processor()
+    inputs, labels = sample['inputs'], sample['labels']
+    show_image((inputs['input_image'] * 255).astype('uint8'))
+    show_image((labels['label_image'] * 255).astype('uint8'))

From 5c1c726da2673af899426a1cce4960cc8d948f5b Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 18 Oct 2021 15:22:04 +0200
Subject: [PATCH 009/101] Start refactoring loss

---
 examples/pix2pose/loss.py              | 68 +++++++++++++++++++++
 examples/pix2pose/pix2pose/pix2pose.py | 81 ++++++++++++++++++++++++++
 examples/pix2pose/scenes.py            |  2 +-
 examples/pix2pose/train.py             |  2 +
 examples/pix2pose/utils.py             | 19 +++---
 5 files changed, 161 insertions(+), 11 deletions(-)
 create mode 100644 examples/pix2pose/loss.py
 create mode 100644 examples/pix2pose/pix2pose/pix2pose.py

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
new file mode 100644
index 000000000..8b856689c
--- /dev/null
+++ b/examples/pix2pose/loss.py
@@ -0,0 +1,68 @@
+from tensorflow.keras.losses import Loss
+import tensorflow as tf
+
+
+class WeightedRGBMask(Loss):
+    def __init__(self, beta=3.0, epsilon=1e-4):
+        super(WeightedRGBMask, self).__init__()
+        self.beta, self.epsilon = beta, epsilon
+
+    def _extract_masks(RGBA_mask):
+        # TODO this should be an additional input or extracted from alpha mask
+        # mask_object = tf.math.ceil(RGB_mask)
+        # mask_object = tf.math.reduce_max(mask_object, axis=-1, keepdims=True)
+        # mask_object = tf.repeat(mask_object, repeats=3, axis=-1)
+        # mask_background = tf.ones(tf.shape(mask_object)) - mask_object
+        # return mask_object, mask_background
+        return None
+
+    def _extract_alpha_mask(self, RGBA_mask):
+        alpha_mask = RGBA_mask[:, :, :, 3:4]
+        color_mask = RGBA_mask[:, :, :, 0:3]
+        return color_mask, alpha_mask
+
+    def _compute_masks(self, alpha_mask):
+        alpha_mask, 1.0 - alpha_mask
+
+    def _unitball_to_normalized(x):
+        # [-1, 1] -> [0, 1]
+        return (x + 1) * 0.5
+
+    def _normalized_to_unitball(x):
+        # [0, 1] -> [-1, 1]
+        return (2.0 * x) - 1.0
+
+    def call(self, RGBA_mask_true, RGB_mask_pred):
+        # Loss that penalizes more object color mismatch
+        # Loss that penalizes less background color not being "0"
+        # RGB_mask_true = self._unitball_to_normalized(RGB_mask_true)
+        # mask_object, mask_background = self._extract_masks(RGB_mask_true)
+        # RGB_mask_true = self._normalized_to_unitball(RGB_mask_true)
+        # RGB_mask_true = RGB_mask_true + self.epsilon
+
+        # Set the background to be all -1
+        RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true)
+        # object_mask, background_mask = self._compute_masks(alpha_mask)
+
+        foreground_true = RGB_mask_true * alpha_mask
+        foreground_pred = RGB_mask_pred * alpha_mask
+        background_true = RGB_mask_true * (1.0 - alpha_mask)
+        background_pred = RGB_mask_true * (1.0 - alpha_mask)
+        foreground_loss = tf.abs(foreground_true - foreground_pred)
+        background_loss = tf.abs(background_true - background_pred)
+        loss = (self.beta * foreground_loss) + background_loss
+        loss = tf.reduce_mean(loss, axis[1, 2, 3])
+        # RGB_mask_true = RGB_mask_true * mask_object
+        # RGB_mask_true = RGB_mask_true + (mask_background * tf.constant(-1.))
+
+        # Calculate the difference between the real and predicted images including the mask
+        # object_error = tf.abs(RGB_mask_pred * mask_object - RGB_mask_true * mask_object)
+        # background_error = tf.abs(RGB_mask_pred * mask_background - RGB_mask_true * mask_background)
+
+        object_error = tf.reduce_sum(object_error, axis=-1)
+        background_error = tf.reduce_sum(background_error, axis=-1)
+
+        loss = (self.beta * object_error) + background_error
+        loss = tf.reduce_mean(loss, axis=[1, 2, 3])
+        loss = tf.math.minimum(loss, tf.float32.max)
+        return loss
diff --git a/examples/pix2pose/pix2pose/pix2pose.py b/examples/pix2pose/pix2pose/pix2pose.py
new file mode 100644
index 000000000..bc69b2516
--- /dev/null
+++ b/examples/pix2pose/pix2pose/pix2pose.py
@@ -0,0 +1,81 @@
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.metrics import Mean
+
+
+class Pix2PoseGAN(Model):
+    def __init__(self, image_shape, discriminator, generator, latent_dim):
+        super(Pix2PoseGAN, self).__init__()
+        self.image_shape = image_shape
+        self.discriminator = discriminator
+        self.generator = generator
+        self.latent_dim = latent_dim
+        self.generator_loss_tracker = Mean(name='generator_loss')
+        self.discriminator_loss_tracker = Mean(name='discriminator_loss')
+
+    @property
+    def metrics(self):
+        return [self.generator_loss_tracker, self.discriminator_loss_tracker]
+
+    def compile(self, d_optimizer, g_optimizer, loss_fn):
+        super(Pix2PoseGAN, self).compile()
+        self.d_optimizer = d_optimizer
+        self.g_optimizer = g_optimizer
+        self.loss_fn = loss_fn
+
+    def train_step(self, data):
+        real_images, one_hot_labels = data
+
+        # Add dummy dimensions to the labels so that they can be concatenated with
+        # the images. This is for the discriminator.
+        image_one_hot_labels = one_hot_labels[:, :, None, None]
+        image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size])
+        image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes))
+
+        # Sample random points in the latent space and concatenate the labels.
+        # This is for the generator.
+        batch_size = tf.shape(real_images)[0]
+        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
+        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
+
+        # Decode the noise (guided by labels) to fake images.
+        generated_images = self.generator(random_vector_labels)
+
+        # Combine them with real images. Note that we are concatenating the labels
+        # with these images here.
+        fake_image_and_labels = tf.concat([generated_images, image_one_hot_labels], -1)
+        real_image_and_labels = tf.concat([real_images, image_one_hot_labels], -1)
+        combined_images = tf.concat([fake_image_and_labels, real_image_and_labels], axis=0)
+
+        # Assemble labels discriminating real from fake images.
+        labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0)
+
+        # Train the discriminator.
+        with tf.GradientTape() as tape:
+            predictions = self.discriminator(combined_images)
+            d_loss = self.loss_fn(labels, predictions)
+        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
+        self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))
+
+        # Sample random points in the latent space.
+        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
+        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
+
+        # Assemble labels that say "all real images".
+        misleading_labels = tf.zeros((batch_size, 1))
+
+        # Train the generator (note that we should *not* update the weights
+        # of the discriminator)!
+        with tf.GradientTape() as tape:
+            fake_images = self.generator(random_vector_labels)
+            fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1)
+            predictions = self.discriminator(fake_image_and_labels)
+            g_loss = self.loss_fn(misleading_labels, predictions)
+        grads = tape.gradient(g_loss, self.generator.trainable_weights)
+        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))
+
+        # Monitor loss.
+        self.generator_loss_tracker.update_state(g_loss)
+        self.discriminator_loss_tracker.update_state(d_loss)
+        return {'generator_loss': self.generator_loss_tracker.result(),
+                'discrminator_loss': self.discriminator_loss_tracker.result()}
diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
index 6ce85ab16..f6f99ea08 100644
--- a/examples/pix2pose/scenes.py
+++ b/examples/pix2pose/scenes.py
@@ -29,7 +29,7 @@ def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0,
         self._build_scene(path_OBJ, viewport_size, light, y_fov)
         self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
         self.flags_RGBA = RenderFlags.RGBA
-        self.flags_FLAT = RenderFlags.FLAT
+        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
         self.epsilon = 0.01
 
     def _build_scene(self, path, size, light, y_fov):
diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index b19f9d4bc..494e2d53a 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -33,8 +33,10 @@
 processor = DomainRandomization(renderer, image_shape,
                                 image_paths, num_occlusions)
 
+"""
 for _ in range(100):
     sample = processor()
     inputs, labels = sample['inputs'], sample['labels']
     show_image((inputs['input_image'] * 255).astype('uint8'))
     show_image((labels['label_image'] * 255).astype('uint8'))
+"""
diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py
index 67f73e312..7aaadf344 100644
--- a/examples/pix2pose/utils.py
+++ b/examples/pix2pose/utils.py
@@ -2,9 +2,9 @@
 from tensorflow.keras.losses import Loss
 
 
-class Pix2PoseLoss(Loss):
+class LossError(Loss):
     def __init__(self):
-        super(Pix2PoseLoss, self).__init__()
+        super(LossError, self).__init__()
 
     def call(self, y_true, y_pred):
         y_true = tf.clip_by_value(tf.math.abs(y_true), tf.float32.min, 1.0)
@@ -14,23 +14,23 @@ def call(self, y_true, y_pred):
         return squared_error
 
 
-class Pix2PoseColor(Loss):
+class LossColor(Loss):
     def __init__(self, rotation_matrices):
-        super(Pix2PoseColor, self).__init__()
+        super(LossColor, self).__init__()
         self.rotation_matrices = rotation_matrices
 
+
     def call(self, color_image, predicted_color_image):
         min_loss = tf.float32.max
 
-        # Bring the image in the range between 0 and 1
+        # [-1, 1] -> [0, 1]
         color_image = (color_image + 1) * 0.5
 
         # Calculate masks for the object and the background (they are independent of the rotation)
-        mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1),
-                                repeats=3, axis=-1)
+        mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1), repeats=3, axis=-1)
         mask_background = tf.ones(tf.shape(mask_object)) - mask_object
 
-        # Bring the image again in the range between -1 and 1
+        # [0, 1] -> [-1, 1]
         color_image = (color_image * 2) - 1
 
         # Iterate over all possible rotations
@@ -43,11 +43,10 @@ def call(self, color_image, predicted_color_image):
 
             # Rotate the object
             real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image)
-            #real_color_image = tf.where(tf.math.less(real_color_image, 0), tf.ones_like(real_color_image) + real_color_image, real_color_image)
 
             # Set the background to be all -1
             real_color_image *= mask_object
-            real_color_image += (mask_background*tf.constant(-1.))
+            real_color_image += (mask_background * tf.constant(-1.))
 
             # Get the number of pixels
             num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3])

From e49f96d4aae0d7b81ca367013eafeae1cdf97fa1 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 18 Oct 2021 15:24:54 +0200
Subject: [PATCH 010/101] Refactor weighted foreground loss

---
 examples/pix2pose/loss.py | 46 ++-------------------------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index 8b856689c..0a5457b25 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -2,48 +2,18 @@
 import tensorflow as tf
 
 
-class WeightedRGBMask(Loss):
+class WeightedForeground(Loss):
     def __init__(self, beta=3.0, epsilon=1e-4):
-        super(WeightedRGBMask, self).__init__()
+        super(WeightedForeground, self).__init__()
         self.beta, self.epsilon = beta, epsilon
 
-    def _extract_masks(RGBA_mask):
-        # TODO this should be an additional input or extracted from alpha mask
-        # mask_object = tf.math.ceil(RGB_mask)
-        # mask_object = tf.math.reduce_max(mask_object, axis=-1, keepdims=True)
-        # mask_object = tf.repeat(mask_object, repeats=3, axis=-1)
-        # mask_background = tf.ones(tf.shape(mask_object)) - mask_object
-        # return mask_object, mask_background
-        return None
-
     def _extract_alpha_mask(self, RGBA_mask):
         alpha_mask = RGBA_mask[:, :, :, 3:4]
         color_mask = RGBA_mask[:, :, :, 0:3]
         return color_mask, alpha_mask
 
-    def _compute_masks(self, alpha_mask):
-        alpha_mask, 1.0 - alpha_mask
-
-    def _unitball_to_normalized(x):
-        # [-1, 1] -> [0, 1]
-        return (x + 1) * 0.5
-
-    def _normalized_to_unitball(x):
-        # [0, 1] -> [-1, 1]
-        return (2.0 * x) - 1.0
-
     def call(self, RGBA_mask_true, RGB_mask_pred):
-        # Loss that penalizes more object color mismatch
-        # Loss that penalizes less background color not being "0"
-        # RGB_mask_true = self._unitball_to_normalized(RGB_mask_true)
-        # mask_object, mask_background = self._extract_masks(RGB_mask_true)
-        # RGB_mask_true = self._normalized_to_unitball(RGB_mask_true)
-        # RGB_mask_true = RGB_mask_true + self.epsilon
-
-        # Set the background to be all -1
         RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true)
-        # object_mask, background_mask = self._compute_masks(alpha_mask)
-
         foreground_true = RGB_mask_true * alpha_mask
         foreground_pred = RGB_mask_pred * alpha_mask
         background_true = RGB_mask_true * (1.0 - alpha_mask)
@@ -51,18 +21,6 @@ def call(self, RGBA_mask_true, RGB_mask_pred):
         foreground_loss = tf.abs(foreground_true - foreground_pred)
         background_loss = tf.abs(background_true - background_pred)
         loss = (self.beta * foreground_loss) + background_loss
-        loss = tf.reduce_mean(loss, axis[1, 2, 3])
-        # RGB_mask_true = RGB_mask_true * mask_object
-        # RGB_mask_true = RGB_mask_true + (mask_background * tf.constant(-1.))
-
-        # Calculate the difference between the real and predicted images including the mask
-        # object_error = tf.abs(RGB_mask_pred * mask_object - RGB_mask_true * mask_object)
-        # background_error = tf.abs(RGB_mask_pred * mask_background - RGB_mask_true * mask_background)
-
-        object_error = tf.reduce_sum(object_error, axis=-1)
-        background_error = tf.reduce_sum(background_error, axis=-1)
-
-        loss = (self.beta * object_error) + background_error
         loss = tf.reduce_mean(loss, axis=[1, 2, 3])
         loss = tf.math.minimum(loss, tf.float32.max)
         return loss

From b71bf3b82305180571d64671b18cd922536eaa4d Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 18 Oct 2021 15:46:02 +0200
Subject: [PATCH 011/101] Change directory name to hold generic models

---
 examples/pix2pose/{pix2pose => models}/discriminator.py | 0
 examples/pix2pose/{pix2pose => models}/generator.py     | 0
 examples/pix2pose/{pix2pose => models}/pix2pose.py      | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename examples/pix2pose/{pix2pose => models}/discriminator.py (100%)
 rename examples/pix2pose/{pix2pose => models}/generator.py (100%)
 rename examples/pix2pose/{pix2pose => models}/pix2pose.py (100%)

diff --git a/examples/pix2pose/pix2pose/discriminator.py b/examples/pix2pose/models/discriminator.py
similarity index 100%
rename from examples/pix2pose/pix2pose/discriminator.py
rename to examples/pix2pose/models/discriminator.py
diff --git a/examples/pix2pose/pix2pose/generator.py b/examples/pix2pose/models/generator.py
similarity index 100%
rename from examples/pix2pose/pix2pose/generator.py
rename to examples/pix2pose/models/generator.py
diff --git a/examples/pix2pose/pix2pose/pix2pose.py b/examples/pix2pose/models/pix2pose.py
similarity index 100%
rename from examples/pix2pose/pix2pose/pix2pose.py
rename to examples/pix2pose/models/pix2pose.py

From 3e1186fb78567abcfa91b845e9ad34b4571c891b Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 18 Oct 2021 15:46:32 +0200
Subject: [PATCH 012/101] Fix bug with pipeline incorrect output shape

---
 examples/pix2pose/loss.py      |  4 ++--
 examples/pix2pose/pipelines.py |  5 +++--
 examples/pix2pose/scenes.py    |  2 +-
 examples/pix2pose/train.py     | 29 ++++++++++++++++++++---------
 4 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index 0a5457b25..c98161445 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -3,9 +3,9 @@
 
 
 class WeightedForeground(Loss):
-    def __init__(self, beta=3.0, epsilon=1e-4):
+    def __init__(self, beta=3.0):
         super(WeightedForeground, self).__init__()
-        self.beta, self.epsilon = beta, epsilon
+        self.beta = beta
 
     def _extract_alpha_mask(self, RGBA_mask):
         alpha_mask = RGBA_mask[:, :, :, 3:4]
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index f345a9014..8cacc034f 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -9,10 +9,11 @@ class DomainRandomization(SequentialProcessor):
     """
     def __init__(self, renderer, image_shape, image_paths, num_occlusions=1):
         super(DomainRandomization, self).__init__()
+        H, W = image_shape[:2]
         self.add(pr.Render(renderer))
         self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0]))
         self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
         # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1]))
         self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1]))
-        self.add(pr.SequenceWrapper({0: {'input_image': image_shape}},
-                                    {1: {'label_image': image_shape}}))
+        self.add(pr.SequenceWrapper({0: {'input_image': [H, W, 3]}},
+                                    {1: {'label_image': [H, W, 4]}}))
diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
index f6f99ea08..ea03b04ae 100644
--- a/examples/pix2pose/scenes.py
+++ b/examples/pix2pose/scenes.py
@@ -8,7 +8,7 @@
 from coloring import color_object
 
 
-class PixelMask():
+class PixelMaskRenderer():
     """Render-ready scene composed of a single object and a single moving camera.
 
     # Arguments
diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 494e2d53a..8522bb566 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -1,9 +1,11 @@
 import os
 import glob
-from scenes import PixelMask
-from pipelines import DomainRandomization
+from paz.abstract import GeneratingSequence
 from paz.backend.image import show_image
 
+from scenes import PixelMaskRenderer
+from pipelines import DomainRandomization
+from loss import WeightedForeground
 
 image_shape = [128, 128, 3]
 root_path = os.path.expanduser('~')
@@ -20,19 +22,28 @@
 top_only = False
 roll = 3.14159
 shift = 0.05
+num_steps = 1000
+batch_size = 32
+beta = 3.0
 
 
-renderer = PixelMask(path_OBJ, viewport_size, y_fov, distance,
-                     light, top_only, roll, shift)
-
-# for _ in range(100):
-image, alpha, RGB_mask = renderer.render()
-show_image(image)
-show_image(RGB_mask)
+renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
+                             light, top_only, roll, shift)
 
 processor = DomainRandomization(renderer, image_shape,
                                 image_paths, num_occlusions)
 
+sequence = GeneratingSequence(processor, batch_size, num_steps)
+
+weighted_foreground = WeightedForeground(beta)
+
+# batch = sequence.__getitem__(0)
+# for _ in range(100):
+# image, alpha, RGB_mask = renderer.render()
+# show_image(image)
+# show_image(RGB_mask)
+
+
 """
 for _ in range(100):
     sample = processor()

From 50645dbce87e9c817d055e382307b1428e198681 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 18 Oct 2021 16:01:42 +0200
Subject: [PATCH 013/101] Add a fully convolutional neural network based on
 KeypointNet2D

---
 .../models/fully_convolutional_net.py         | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 examples/pix2pose/models/fully_convolutional_net.py

diff --git a/examples/pix2pose/models/fully_convolutional_net.py b/examples/pix2pose/models/fully_convolutional_net.py
new file mode 100644
index 000000000..57d10e102
--- /dev/null
+++ b/examples/pix2pose/models/fully_convolutional_net.py
@@ -0,0 +1,34 @@
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, Conv2D, Activation, LeakyReLU
+
+
+def block(x, filters, dilation_rate, alpha):
+    x = Conv2D(filters, (3, 3), dilation_rate=dilation_rate, padding='same')(x)
+    # x = BatchNormalization()(x)
+    x = LeakyReLU(alpha)(x)
+    return x
+
+
+def FullyConvolutionalNet(num_classes, input_shape, filters=64, alpha=0.1):
+    """Fully convolutional network for segmentation.
+
+    # Arguments
+        num_classes: Int. Number of output channels.
+        input_shape: List of integers indicating ``[H, W, num_channels]``.
+        filters: Int. Number of filters used in convolutional layers.
+        alpha: Float. Alpha parameter of leaky relu.
+
+    # Returns
+        Keras/tensorflow model
+
+    # References
+        - [Discovery of Latent 3D Keypoints via End-to-end
+            Geometric Reasoning](https://arxiv.org/abs/1807.03146)
+    """
+    base = inputs = Input(input_shape, name='image')
+    for base_arg, rate in enumerate([1, 1, 2, 4, 8, 16, 1, 2, 4, 8, 16, 1]):
+        base = block(base, filters, (rate, rate), alpha)
+    x = Conv2D(num_classes, (3, 3), padding='same')(base)
+    outputs = Activation('softmax', name='masks')(x)
+    model = Model(inputs, outputs, name='FULLY_CONVOLUTIONAL_NET')
+    return model

From 524ecfc5e8da28d8c2ee0f532756e6fa9735c406 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 21 Oct 2021 08:55:54 +0200
Subject: [PATCH 014/101] Fix bug with predict weighted foreground loss

---
 examples/pix2pose/loss.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index c98161445..3b57dcc7d 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -14,13 +14,23 @@ def _extract_alpha_mask(self, RGBA_mask):
 
     def call(self, RGBA_mask_true, RGB_mask_pred):
         RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true)
+
         foreground_true = RGB_mask_true * alpha_mask
         foreground_pred = RGB_mask_pred * alpha_mask
-        background_true = RGB_mask_true * (1.0 - alpha_mask)
-        background_pred = RGB_mask_true * (1.0 - alpha_mask)
         foreground_loss = tf.abs(foreground_true - foreground_pred)
+
+        background_true = RGB_mask_true * (1.0 - alpha_mask)
+        background_pred = RGB_mask_pred * (1.0 - alpha_mask)
         background_loss = tf.abs(background_true - background_pred)
+
         loss = (self.beta * foreground_loss) + background_loss
+
         loss = tf.reduce_mean(loss, axis=[1, 2, 3])
-        loss = tf.math.minimum(loss, tf.float32.max)
+        # loss = tf.math.minimum(loss, tf.float32.max)
+        # loss = tf.losses.mean_squared_error(RGB_mask_true, RGB_mask_pred)
         return loss
+
+
+def MSE_with_alpha_channel(y_true, y_pred):
+    squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred)
+    return tf.reduce_mean(squared_difference, axis=-1)  # Note the `axis=-1`

From f4a8659796f046d6905aec1ad97c94fb14eb02d3 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 25 Oct 2021 17:27:08 +0200
Subject: [PATCH 015/101] Add backend functions for prediction

---
 examples/pix2pose/backend.py | 148 +++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 3c2e35454..1831d9f3b 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -1,4 +1,7 @@
 import numpy as np
+from paz.backend.image.draw import GREEN
+from paz.backend.image import draw_line, draw_dot
+import cv2
 
 
 def homogenous_quaternion_to_rotation_matrix(quaternion):
@@ -74,3 +77,148 @@ def multiply_quaternions(quaternion_0, quaternion_1):
 # a = homogenous_quaternion_to_rotation_matrix(quaternion)
 # quaternion = (1 / np.sqrt(30)) * np.array([2, 3, 4, 1])
 # b = inhomogenous_quaternion_to_rotation_matrix(quaternion)
+
+def build_cube_points3D(width, height, depth):
+    """ Build the 3D points of a cube in the openCV coordinate system:
+                               4--------1
+                              /|       /|
+                             / |      / |
+                            3--------2  |
+                            |  8_____|__5
+                            | /      | /
+                            |/       |/
+                            7--------6
+
+                   Z (depth)
+                  /
+                 /_____X (width)
+                 |
+                 |
+                 Y (height)
+
+    # Arguments
+        height: float, height of the 3D box.
+        width: float,  width of the 3D box.
+        depth: float,  width of the 3D box.
+
+    # Returns
+        Numpy array of shape ``(8, 3)'' corresponding to 3D keypoints of a cube
+    """
+    half_height, half_width, half_depth = height / 2., width / 2., depth / 2.
+    point_1 = [+half_width, -half_height, +half_depth]
+    point_2 = [+half_width, -half_height, -half_depth]
+    point_3 = [-half_width, -half_height, -half_depth]
+    point_4 = [-half_width, -half_height, +half_depth]
+    point_5 = [+half_width, +half_height, +half_depth]
+    point_6 = [+half_width, +half_height, -half_depth]
+    point_7 = [-half_width, +half_height, -half_depth]
+    point_8 = [-half_width, +half_height, +half_depth]
+    return np.array([point_1, point_2, point_3, point_4,
+                     point_5, point_6, point_7, point_8])
+
+
+def _preprocess_image_points2D(image_points2D):
+    num_points = len(image_points2D)
+    image_points2D = image_points2D.reshape(num_points, 1, 2)
+    image_points2D = image_points2D.astype(np.float64)
+    image_points2D = np.ascontiguousarray(image_points2D)
+    return image_points2D
+
+
+def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
+                     inlier_threshold=5, num_iterations=100):
+    image_points2D = _preprocess_image_points2D(image_points2D)
+    success, rotation_vector, translation, inliers = cv2.solvePnPRansac(
+        object_points3D, image_points2D, camera_intrinsics, None,
+        flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold,
+        iterationsCount=num_iterations)
+    if success is False:
+        rotation_vector, translation = None, None
+    return rotation_vector, translation
+
+
+def project_to_image(rotation, translation, points3D, camera_intrinsics):
+    """Project points3D to image plane using a perspective transformation
+    """
+    if rotation.shape != (3, 3):
+        raise ValueError('Rotation matrix is not of shape (3, 3)')
+    if len(translation) != 3:
+        raise ValueError('Translation vector is not of length 3')
+    if len(points3D.shape) != 2:
+        raise ValueError('points3D should have a shape (N, 3)')
+    if points3D.shape[1] != 3:
+        raise ValueError('points3D should have a shape (N, 3)')
+    # TODO missing checks for camera intrinsics conditions
+    points3D = np.matmul(rotation, points3D.T).T + translation
+    x, y, z = np.split(points3D, 3, axis=1)
+    x_focal_length = camera_intrinsics[0, 0]
+    y_focal_length = camera_intrinsics[1, 1]
+    x_image_center = camera_intrinsics[0, 2]
+    y_image_center = camera_intrinsics[1, 2]
+    x_points = (x_focal_length * (x / z)) + x_image_center
+    y_points = (y_focal_length * (y / z)) + y_image_center
+    projected_points2D = np.concatenate([x_points, y_points], axis=1)
+    return projected_points2D
+
+
+def draw_cube(image, points, color=GREEN, thickness=2, radius=5):
+    """ Draws a cube in image.
+
+    # Arguments
+        image: Numpy array of shape ``[H, W, 3]``.
+        points: List of length 8  having each element a list
+            of length two indicating ``(y, x)`` openCV coordinates.
+        color: List of length three indicating RGB color of point.
+        thickness: Integer indicating the thickness of the line to be drawn.
+        radius: Integer indicating the radius of corner points to be drawn.
+
+    # Returns
+        Numpy array with shape ``[H, W, 3]``. Image with cube.
+    """
+    if points.shape != (8, 2):
+        raise ValueError('Cube points 2D must be of shape (8, 2)')
+
+    # draw bottom
+    draw_line(image, points[0], points[1], color, thickness)
+    draw_line(image, points[1], points[2], color, thickness)
+    draw_line(image, points[3], points[2], color, thickness)
+    draw_line(image, points[3], points[0], color, thickness)
+
+    # draw top
+    draw_line(image, points[4], points[5], color, thickness)
+    draw_line(image, points[6], points[5], color, thickness)
+    draw_line(image, points[6], points[7], color, thickness)
+    draw_line(image, points[4], points[7], color, thickness)
+
+    # draw sides
+    draw_line(image, points[0], points[4], color, thickness)
+    draw_line(image, points[7], points[3], color, thickness)
+    draw_line(image, points[5], points[1], color, thickness)
+    draw_line(image, points[2], points[6], color, thickness)
+
+    # draw X mark on top
+    draw_line(image, points[4], points[6], color, thickness)
+    draw_line(image, points[5], points[7], color, thickness)
+
+    # draw dots
+    [draw_dot(image, np.squeeze(point), color, radius) for point in points]
+    return image
+
+
+def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0):
+    lower_than_epsilon = source < threshold
+    source[lower_than_epsilon] = replacement
+    return source
+
+
+def arguments_to_image_points2D(row_args, col_args):
+    row_args = row_args.reshape(-1, 1)
+    col_args = col_args.reshape(-1, 1)
+    image_points2D = np.concatenate([col_args, row_args], axis=1)
+    return image_points2D
+
+
+def rotation_vector_to_rotation_matrix(rotation_vector):
+    rotation_matrix = np.eye(3)
+    cv2.Rodrigues(rotation_vector, rotation_matrix)
+    return rotation_matrix

From cab9eb1fcf64267b71dbe01e016b86505186ec26 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 25 Oct 2021 17:27:24 +0200
Subject: [PATCH 016/101] Add small comment on how to get object 3D shape

---
 examples/pix2pose/coloring.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py
index 29259800d..f21e02990 100644
--- a/examples/pix2pose/coloring.py
+++ b/examples/pix2pose/coloring.py
@@ -47,3 +47,4 @@ def color_object(path):
     mesh = color_object(path)
     scene.add(mesh)
     Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT)
+    # mesh_extents = np.array([0.184, 0.187, 0.052])

From f8c0dcca08d46f84b5c25edc5dc0efca5a38d974 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 25 Oct 2021 17:27:40 +0200
Subject: [PATCH 017/101] Add partially tested pipeline for full inference

---
 examples/pix2pose/pipelines.py | 72 ++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 8cacc034f..a4e9b67fe 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -1,7 +1,14 @@
+import numpy as np
 from paz.abstract import SequentialProcessor
 from paz.pipelines import RandomizeRenderedImage as RandomizeRender
 from paz import processors as pr
-# from processors import ImageToClosedOneBall
+from processors import (
+    GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
+    ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC,
+    RotationVectorToRotationMatrix, ReplaceLowerThanThreshold)
+from backend import build_cube_points3D, project_to_image, draw_cube
+from processors import CropImage
+from paz.backend.image import show_image
 
 
 class DomainRandomization(SequentialProcessor):
@@ -15,5 +22,64 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1):
         self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
         # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1]))
         self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1]))
-        self.add(pr.SequenceWrapper({0: {'input_image': [H, W, 3]}},
-                                    {1: {'label_image': [H, W, 4]}}))
+        self.add(pr.SequenceWrapper({0: {'input_1': [H, W, 3]}},
+                                    {1: {'masks': [H, W, 4]}}))
+
+
+class PredictRGBMask(SequentialProcessor):
+    def __init__(self, model, epsilon=0.15):
+        super(PredictRGBMask, self).__init__()
+        self.add(CropImage())
+        self.add(pr.ResizeImage((128, 128)))
+        self.add(pr.NormalizeImage())
+        self.add(pr.ExpandDims(0))
+        self.add(pr.Predict(model))
+        self.add(pr.Squeeze(0))
+        self.add(ReplaceLowerThanThreshold(epsilon))
+        self.add(pr.DenormalizeImage())
+        self.add(pr.CastImage('uint8'))
+
+
+class RGBMaskToObjectPoints3D(SequentialProcessor):
+    def __init__(self, object_sizes):
+        super(RGBMaskToObjectPoints3D, self).__init__()
+        self.add(GetNonZeroValues())
+        self.add(ImageToClosedOneBall())
+        self.add(Scale(object_sizes / 2.0))
+
+
+class RGBMaskToImagePoints2D(SequentialProcessor):
+    def __init__(self):
+        super(RGBMaskToImagePoints2D, self).__init__()
+        self.add(GetNonZeroArguments())
+        self.add(ArgumentsToImagePoints2D())
+
+
+class Pix2Pose(pr.Processor):
+    def __init__(self, model, object_sizes, camera, epsilon=0.15):
+        self.camera = camera
+        self.object_sizes = object_sizes
+        self.predict_RGBMask = PredictRGBMask(model, epsilon)
+        self.RGBMask_to_object_points3D = RGBMaskToObjectPoints3D(
+            self.object_sizes)
+        self.RGBMask_to_image_points2D = RGBMaskToImagePoints2D()
+        self.predict_pose = SolveChangingObjectPnPRANSAC(camera.intrinsics)
+        self.vector_to_matrix = RotationVectorToRotationMatrix()
+
+    def call(self, image):
+        show_image(image, wait=False)
+        RGBMask = self.predict_RGBMask(image)
+        print(RGBMask.shape)
+        return {'image': RGBMask}
+        points3D = self.RGBMask_to_object_points3D(RGBMask)
+        points2D = self.RGBMask_to_image_points2D(RGBMask)
+        rotation_vector, translation = self.predict_pose(points3D, points2D)
+        rotation_matrix = self.vector_to_matrix(rotation_vector)
+        translation = np.squeeze(translation, 1)
+        points3D = build_cube_points3D(*self.object_sizes)
+        points2D = project_to_image(
+            rotation_matrix, translation, points3D, self.camera.intrinsics)
+        points2D = points2D.astype(np.int32)
+        image = draw_cube(image.astype(float), points2D)
+        image = image.astype('uint8')
+        return {'image', image}

From 5251fe99b3f5c570e8a74aa9894100482afe1596 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 25 Oct 2021 17:28:01 +0200
Subject: [PATCH 018/101] Add simple processors for pix2pose inference

---
 examples/pix2pose/processors.py | 121 ++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index 550229cf1..148d601b4 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -1,4 +1,13 @@
+import numpy as np
 from paz.abstract import Processor
+from paz.backend.keypoints import project_points3D
+from paz.backend.image import draw_cube
+
+from backend import build_cube_points3D
+from backend import replace_lower_than_threshold
+from backend import arguments_to_image_points2D
+from backend import solve_PnP_RANSAC
+from backend import rotation_vector_to_rotation_matrix
 
 
 class ImageToClosedOneBall(Processor):
@@ -19,3 +28,115 @@ def __init__(self):
 
     def call(self, image):
         return (image + 1.0) * 127.5
+
+
+class DrawBoxes3D(Processor):
+    def __init__(self, camera, class_to_dimensions, thickness=1):
+        """Draw boxes 3D of multiple objects
+
+        # Arguments
+            camera: Instance of ``paz.backend.camera.Camera''.
+            class_to_dimensions: Dictionary that has as keys the
+                class names and as value a list [model_height, model_width]
+            thickness: Int. Thickness of 3D box
+        """
+        super(DrawBoxes3D, self).__init__()
+        self.camera = camera
+        self.class_to_dimensions = class_to_dimensions
+        self.class_to_points = self._build_points(self.class_to_dimensions)
+        self.thickness = thickness
+
+    def _build_points(self, class_to_dimensions):
+        class_to_cube3D = {}
+        print(class_to_dimensions)
+        for class_name, dimensions in class_to_dimensions.items():
+            width, height, depth = dimensions
+            cube_points3D = build_cube_points3D(width, height, depth)
+            class_to_cube3D[class_name] = cube_points3D
+        return class_to_cube3D
+
+    def call(self, image, pose6D):
+        points3D = self.class_to_points[pose6D.class_name]
+        points2D = project_points3D(points3D, pose6D, self.camera)
+        points2D = points2D.astype(np.int32)
+        # points2D = np.squeeze(points2D)
+        # return points2D
+        draw_cube(image, points2D, thickness=self.thickness)
+        return image
+
+
+class ReplaceLowerThanThreshold(Processor):
+    def __init__(self, threshold=1e-8, replacement=0.0):
+        super(ReplaceLowerThanThreshold, self).__init__()
+        self.threshold = threshold
+        self.replacement = replacement
+
+    def call(self, image):
+        return replace_lower_than_threshold(
+            image, self.threshold, self.replacement)
+
+
+class GetNonZeroValues(Processor):
+    def __init__(self):
+        super(GetNonZeroValues, self).__init__()
+
+    def call(self, array):
+        non_zero_arguments = np.nonzero(array)
+        return array[non_zero_arguments]
+
+
+class GetNonZeroArguments(Processor):
+    def __init__(self):
+        super(GetNonZeroArguments, self).__init__()
+
+    def call(self, array):
+        non_zero_rows, non_zero_columns = np.nonzero(array)
+        return non_zero_rows, non_zero_columns
+
+
+class ArgumentsToImagePoints2D(Processor):
+    def __init__(self):
+        super(ArgumentsToImagePoints2D, self).__init__()
+
+    def call(self, row_args, col_args):
+        image_points2D = arguments_to_image_points2D(row_args, col_args)
+        return image_points2D
+
+
+class Scale(Processor):
+    def __init__(self, object_sizes):
+        super(Scale, self).__init__()
+        self.object_sizes = object_sizes
+
+    def call(self, values):
+        return self.object_sizes * values
+
+
+class SolveChangingObjectPnPRANSAC(Processor):
+    def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
+        super(SolveChangingObjectPnPRANSAC, self).__init__()
+        self.camera_intrinsics = camera_intrinsics
+        self.inlier_thresh = inlier_thresh
+        self.num_iterations = num_iterations
+
+    def call(self, object_points3D, image_points2D):
+        rotation_vector, translation = solve_PnP_RANSAC(
+            object_points3D, image_points2D, self.camera_intrinsics,
+            self.inlier_thresh, self.num_iterations)
+        return rotation_vector, translation
+
+
+class RotationVectorToRotationMatrix(Processor):
+    def __init__(self):
+        super(RotationVectorToRotationMatrix, self).__init__()
+
+    def call(self, rotation_vector):
+        return rotation_vector_to_rotation_matrix(rotation_vector)
+
+
+class CropImage(Processor):
+    def __init__(self):
+        super(CropImage, self).__init__()
+
+    def call(self, image):
+        return image[:128, :128, :]

From cb610c02dced36f1b639d22ad6bf9b499f615b9b Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 25 Oct 2021 17:28:23 +0200
Subject: [PATCH 019/101] Changed train script to use UNET-VGG

---
 examples/pix2pose/train.py | 50 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 8522bb566..1c4b13e30 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -1,11 +1,15 @@
 import os
 import glob
+from tensorflow.keras.optimizers import Adam
 from paz.abstract import GeneratingSequence
-from paz.backend.image import show_image
+from paz.models.segmentation import UNET_VGG16
+from paz.backend.image import show_image, resize_image
+import numpy as np
 
 from scenes import PixelMaskRenderer
 from pipelines import DomainRandomization
-from loss import WeightedForeground
+from loss import WeightedForeground, MSE_with_alpha_channel
+from models.fully_convolutional_net import FullyConvolutionalNet
 
 image_shape = [128, 128, 3]
 root_path = os.path.expanduser('~')
@@ -25,6 +29,13 @@
 num_steps = 1000
 batch_size = 32
 beta = 3.0
+alpha = 0.1
+filters = 16
+num_classes = 3
+learning_rate = 0.001
+# steps_per_epoch
+max_num_epochs = 10
+steps_per_epoch = num_steps
 
 
 renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
@@ -35,8 +46,22 @@
 
 sequence = GeneratingSequence(processor, batch_size, num_steps)
 
+beta = 3.0
 weighted_foreground = WeightedForeground(beta)
 
+# model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha)
+model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+# model.
+optimizer = Adam(learning_rate)
+# model.load_weights('UNET_weights_MSE.hdf5')
+model.compile(optimizer, weighted_foreground, metrics=MSE_with_alpha_channel)
+model.fit(
+    sequence,
+    # steps_per_epoch=args.steps_per_epoch,
+    epochs=max_num_epochs,
+    # callbacks=[stop, log, save, plateau, draw],
+    verbose=1,
+    workers=0)
 # batch = sequence.__getitem__(0)
 # for _ in range(100):
 # image, alpha, RGB_mask = renderer.render()
@@ -44,6 +69,27 @@
 # show_image(RGB_mask)
 
 
+def normalize(image):
+    return (image * 255.0).astype('uint8')
+
+
+def show_results():
+    # image, alpha, pixel_mask_true = renderer.render()
+    sample = processor()
+    image = sample['inputs']['input_1']
+    pixel_mask_true = sample['labels']['masks']
+    image = np.expand_dims(image, 0)
+    pixel_mask_pred = model.predict(image)
+    pixel_mask_pred = normalize(np.squeeze(pixel_mask_pred, axis=0))
+    image = normalize(np.squeeze(image, axis=0))
+    results = np.concatenate(
+        [image, normalize(pixel_mask_true[..., 0:3]), pixel_mask_pred], axis=1)
+    H, W = results.shape[:2]
+    scale = 6
+    results = resize_image(results, (scale * W, scale * H))
+    show_image(results)
+
+
 """
 for _ in range(100):
     sample = processor()

From 8876c11d9d388cd5a4ff63e92c483fd917c83b24 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 25 Oct 2021 17:28:53 +0200
Subject: [PATCH 020/101] Add structure with video player

---
 examples/pix2pose/demo.py | 98 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 examples/pix2pose/demo.py

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
new file mode 100644
index 000000000..fd4a2cd3f
--- /dev/null
+++ b/examples/pix2pose/demo.py
@@ -0,0 +1,98 @@
+import os
+import cv2
+import numpy as np
+from paz.models import UNET_VGG16
+from paz.backend.image import show_image
+from paz import processors as pr
+from paz.backend.camera import Camera
+from scenes import PixelMaskRenderer
+from processors import DrawBoxes3D
+from backend import inhomogenous_quaternion_to_rotation_matrix as quaternion_to_rotation_matrix
+# from backend import homogenous_quaternion_to_rotation_matrix
+from backend import solve_PnP_RANSAC
+from backend import project_to_image
+from backend import build_cube_points3D
+from backend import draw_cube
+from pipelines import Pix2Pose
+from paz.backend.camera import VideoPlayer
+
+
+root_path = os.path.expanduser('~')
+path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
+path_OBJ = os.path.join(root_path, path_OBJ)
+image_shape = (128, 128, 3)
+viewport_size = image_shape[:2]
+num_classes = 3
+y_fov = 3.14159 / 4.0
+distance = [0.3, 0.5]
+light = [1.0, 30]
+top_only = False
+roll = 3.14159
+shift = 0.05
+
+
+model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+model.load_weights('UNET_weights_epochs-10_beta-3.hdf5')
+renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
+                             light, top_only, roll, shift)
+
+
+camera = Camera(device_id=4)
+focal_length = 179  # 128
+image_center = (128 / 2.0, 128 / 2.0)
+# building camera parameters
+camera.distortion = np.zeros((4))
+camera.intrinsics = np.array([[focal_length, 0, image_center[0]],
+                              [0, focal_length, image_center[1]],
+                              [0, 0, 1]])
+
+object_sizes = np.array([0.184, 0.187, 0.052])
+# object_size = np.array([0.20, 0.20, 0.08])
+# camera.intrinsics = renderer.camera.camera.get_projection_matrix()[:3, :3]
+epsilon = 0.15
+# object_keypoints3D = renderer.mesh.mesh.primitives[0].positions
+# solve_PNP = pr.SolvePNP(object_keypoints3D, camera)
+pipeline = Pix2Pose(model, object_sizes, camera, epsilon)
+# image_size = (640, 480)
+image_size = (128, 128)
+player = VideoPlayer(image_size, pipeline, camera)
+player.run()
+"""
+def show_results():
+    image, alpha, RGB_mask_true = renderer.render()
+    normalized_image = np.expand_dims(image / 255.0, 0)
+    RGB_mask_pred = model.predict(normalized_image)
+    RGB_mask_pred = np.squeeze(RGB_mask_pred, 0)
+    RGB_mask_pred[RGB_mask_pred < epsilon] = 0.0
+    show_image((RGB_mask_pred * 255.0).astype('uint8'))
+
+    mask_pred = np.sum(RGB_mask_pred, axis=2)
+    non_zero_arguments = np.nonzero(mask_pred)
+    RGB_mask_pred = RGB_mask_pred[non_zero_arguments]
+    RGB_mask_pred = (2.0 * RGB_mask_pred) - 1.0
+    # this RGB mask scaling is good since you are scaling in RGB space
+    object_points3D = (object_size / 2.0) * RGB_mask_pred
+    num_points = len(object_points3D)
+
+    row_args, col_args = non_zero_arguments
+    row_args = row_args.reshape(-1, 1)
+    col_args = col_args.reshape(-1, 1)
+    image_points2D = np.concatenate([col_args, row_args], axis=1)
+    image_points2D = image_points2D.reshape(num_points, 1, 2)
+    image_points2D = image_points2D.astype(np.float64)
+    image_points2D = np.ascontiguousarray(image_points2D)
+
+    rotation_vector, translation = solve_PnP_RANSAC(
+        object_points3D, image_points2D, camera.intrinsics)
+    rotation_matrix = np.eye(3)
+    cv2.Rodrigues(rotation_vector, rotation_matrix)
+    translation = np.squeeze(translation, 1)
+    points3D = build_cube_points3D(0.184, 0.187, 0.052)
+    points2D = project_to_image(
+        rotation_matrix, translation, points3D, camera.intrinsics)
+    points2D = points2D.astype(np.int32)
+    image = draw_cube(image.astype(float), points2D)
+    image = image.astype('uint8')
+    show_image(image)
+"""
+

From 66e8ac507712932d53f5792888c76769b60fefe2 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 25 Oct 2021 17:29:45 +0200
Subject: [PATCH 021/101] Add simple ICP computation

---
 examples/pix2pose/icp.py | 102 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 examples/pix2pose/icp.py

diff --git a/examples/pix2pose/icp.py b/examples/pix2pose/icp.py
new file mode 100644
index 000000000..61ca4352c
--- /dev/null
+++ b/examples/pix2pose/icp.py
@@ -0,0 +1,102 @@
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+
+
+def calculate_affine_matrix(pointcloud_A, pointcloud_B):
+    '''Calculates affine transform with the best least-squares fit transforming
+        keypoints A to keypoints B.
+
+    # Argument:
+        pointcloud_A: Array of shape (num_keypoints, 3).
+        pointcloud_B: Array of shape (num_keypoints, 3).
+
+    # Returns:
+        T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B
+        R: mxm rotation matrix
+        t: mx1 translation vector
+    '''
+    assert pointcloud_A.shape == pointcloud_B.shape
+    # translate points to their centroids
+    centroid3D_A = np.mean(pointcloud_A, axis=0)
+    centroid3D_B = np.mean(pointcloud_B, axis=0)
+    centered_keypoints3D_A = pointcloud_A - centroid3D_A
+    centered_keypoints3D_B = pointcloud_B - centroid3D_B
+
+    covariance = np.dot(centered_keypoints3D_A.T, centered_keypoints3D_B)
+    U, S, Vt = np.linalg.svd(covariance)
+    # compute rotation matrix
+    rotation_matrix = np.dot(Vt.T, U.T)
+
+    # resolve special reflection case
+    if np.linalg.det(rotation_matrix) < 0:
+        Vt[3 - 1, :] *= -1
+        rotation_matrix = np.dot(Vt.T, U.T)
+
+    # compute translation
+    translation3D = centroid3D_B.T - np.dot(rotation_matrix, centroid3D_A.T)
+
+    affine_matrix = to_affine_matrix(rotation_matrix, translation3D)
+    return affine_matrix
+
+
+def to_affine_matrix(rotation_matrix, translation_vector):
+    translation_vector = translation_vector.reshape(3, 1)
+    affine = np.concatenate([rotation_matrix, translation_vector], axis=0)
+    affine = np.concatenate([affine, np.array([[0.0, 0.0, 0.0, 1.0]])], axis=1)
+    return affine
+
+
+def nearest_neighbor(pointcloud_A, pointcloud_B):
+    '''Find the nearest (Euclidean) neighbor in dst for each point in src
+    # Arguments:
+        src: Nxm array of points
+        dst: Nxm array of points
+    # Returns:
+        distances: Euclidean distances of the nearest neighbor
+        indices: dst indices of the nearest neighbor
+    '''
+    assert pointcloud_A.shape == pointcloud_B.shape
+    model = NearestNeighbors(n_neighbors=1)
+    model.fit(pointcloud_B)
+    distances, indices = model.kneighbors(pointcloud_A, return_distance=True)
+    return distances.ravel(), indices.ravel()
+
+
+def add_homogenous_coordinate(keypoints3D):
+    num_keypoints = len(keypoints3D)
+    ones = np.ones_like(num_keypoints).reshape(-1, 1)
+    homogenous_keypoints3D = np.concatenate([keypoints3D, ones], axis=1)
+    return homogenous_keypoints3D
+
+
+def iterative_closes_point(pointcloud_A, pointcloud_B, initial_pose=None,
+                           max_iterations=20, tolerance=1e-3):
+    '''Find best least square fit that transforms pointcloud A to pointcloud B.
+    Input:
+        A: Nxm numpy array of source mD points
+        B: Nxm numpy array of destination mD point
+        initial_pose: (m+1)x(m+1) homogeneous transformation
+        max_iterations: exit algorithm after max_iterations
+        tolerance: convergence criteria
+    Output:
+        T: final homogeneous transformation that maps A on to B
+        distances: Euclidean distances (errors) of the nearest neighbor
+        i: number of iterations to converge
+    '''
+    assert pointcloud_A.shape == pointcloud_B.shape
+    pointcloud_A = add_homogenous_coordinate(pointcloud_A)
+    pointcloud_B = add_homogenous_coordinate(pointcloud_B)
+    pointcloud_A_0 = np.copy(pointcloud_A)
+    if initial_pose is not None:
+        pointcloud_A = np.dot(initial_pose, pointcloud_A.T).T
+    previous_error = 0
+    for iteration_arg in range(max_iterations):
+        distances, indices = nearest_neighbor(pointcloud_A, pointcloud_B)
+        affine_matrix = calculate_affine_matrix(pointcloud_A, pointcloud_B)
+        pointcloud_A = np.dot(affine_matrix, pointcloud_A.T).T
+        mean_error = np.mean(distances)
+        if np.abs(previous_error - mean_error) < tolerance:
+            break
+        previous_error = mean_error
+    affine_transform = calculate_affine_matrix(pointcloud_A_0, pointcloud_A)
+    return affine_transform, distances, iteration_arg

From bf385b40c72c29f3bdb7e317f814afcb1ff80e68 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 27 Oct 2021 16:53:10 +0200
Subject: [PATCH 022/101] Add working demo

---
 examples/pix2pose/backend.py          |  71 ++++++++++++++-
 examples/pix2pose/calibrate_camera.py |  60 +++++++++++++
 examples/pix2pose/demo.py             |  52 +++++------
 examples/pix2pose/pipelines.py        | 122 ++++++++++++++++++++++----
 examples/pix2pose/processors.py       |  42 ++++++++-
 5 files changed, 298 insertions(+), 49 deletions(-)
 create mode 100644 examples/pix2pose/calibrate_camera.py

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 1831d9f3b..a172b7461 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -1,6 +1,6 @@
 import numpy as np
 from paz.backend.image.draw import GREEN
-from paz.backend.image import draw_line, draw_dot
+from paz.backend.image import draw_line, draw_dot, draw_circle
 import cv2
 
 
@@ -55,6 +55,14 @@ def inhomogenous_quaternion_to_rotation_matrix(q):
     # return np.squeeze(rotation_matrix)
 
 
+def quaternion_to_rotation_matrix(quaternion, homogenous=True):
+    if homogenous:
+        matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
+    else:
+        matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion)
+    return matrix
+
+
 def multiply_quaternions(quaternion_0, quaternion_1):
     """Multiplies two quaternions.
 
@@ -137,6 +145,33 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
     return rotation_vector, translation
 
 
+def apply_affine_transform(affine_matrix, vectors):
+    return np.matmul(affine_matrix, vectors.T).T
+
+
+def project_to_image2(affine_matrix, points3D, camera_intrinsics):
+    """Project points3D to image plane using a perspective transformation
+    """
+    if affine_matrix.shape != (4, 4):
+        raise ValueError('Affine matrix is not of shape (4, 4)')
+    if len(points3D.shape) != 2:
+        raise ValueError('points3D should have a shape (N, 3)')
+    if points3D.shape[1] != 3:
+        raise ValueError('points3D should have a shape (N, 3)')
+    # TODO missing checks for camera intrinsics conditions
+    points3D = apply_affine_transform(affine_matrix, points3D)
+    # points3D = np.matmul(rotation, points3D.T).T + translation
+    x, y, z = np.split(points3D, 3, axis=1)
+    x_focal_length = camera_intrinsics[0, 0]
+    y_focal_length = camera_intrinsics[1, 1]
+    x_image_center = camera_intrinsics[0, 2]
+    y_image_center = camera_intrinsics[1, 2]
+    x_points = (x_focal_length * (x / z)) + x_image_center
+    y_points = (y_focal_length * (y / z)) + y_image_center
+    projected_points2D = np.concatenate([x_points, y_points], axis=1)
+    return projected_points2D
+
+
 def project_to_image(rotation, translation, points3D, camera_intrinsics):
     """Project points3D to image plane using a perspective transformation
     """
@@ -222,3 +257,37 @@ def rotation_vector_to_rotation_matrix(rotation_vector):
     rotation_matrix = np.eye(3)
     cv2.Rodrigues(rotation_vector, rotation_matrix)
     return rotation_matrix
+
+
+def draw_keypoints(image, keypoints, colors, radius):
+    for keypoint, color in zip(keypoints, colors):
+        R, G, B = color
+        color = (int(R), int(G), int(B))
+        draw_circle(image, keypoint.astype('int'), color, radius)
+    return image
+
+
+def draw_mask(image, keypoints, colors, radius):
+    for keypoint, color in zip(keypoints, colors):
+        R, G, B = color
+        color = (int(R), int(G), int(B))
+        draw_circle(image, keypoint.astype('int'), color, radius)
+    return image
+
+
+def rotation_matrix_to_quaternion(rotation_matrix):
+    qw = np.sqrt(1 + np.trace(rotation_matrix)) / 2.0
+
+    m21 = rotation_matrix[2, 1]
+    m12 = rotation_matrix[1, 2]
+
+    m02 = rotation_matrix[0, 2]
+    m20 = rotation_matrix[2, 0]
+
+    m10 = rotation_matrix[1, 0]
+    m01 = rotation_matrix[0, 1]
+
+    qx = (m21 - m12) / (4.0 * qw)
+    qy = (m02 - m20) / (4.0 * qw)
+    qz = (m10 - m01) / (4.0 * qw)
+    return qx, qy, qz, qw
diff --git a/examples/pix2pose/calibrate_camera.py b/examples/pix2pose/calibrate_camera.py
new file mode 100644
index 000000000..bfc7a3e40
--- /dev/null
+++ b/examples/pix2pose/calibrate_camera.py
@@ -0,0 +1,60 @@
+from paz.backend.image import show_image
+import numpy as np
+import cv2
+
+
+# def calibrate_camera(square_size, pattern_shape=(5, 5)):
+
+pattern_size = (5, 7)
+square_size_mm = 35
+window_size, zero_zone = (11, 11), (-1, -1)
+
+# constructing default 3D points
+point3D = np.zeros((np.prod(pattern_size), 3), np.float32)
+xy_coordinates = np.mgrid[0:pattern_size[0], 0:pattern_size[1]].T
+point3D[:, :2] = xy_coordinates.reshape(-1, 2) * square_size_mm
+
+camera = cv2.VideoCapture(0)
+cv2.namedWindow('camera_window')
+# 2D points in image plane, 3D points in real world space, images, counter
+image_points, points3D, images, image_counter = [], [], [], 0
+criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
+print('Press `Escape` to quit')
+while True:
+
+    frame = camera.read()[1]
+    image_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    show_image(image_gray, wait=False)
+    chessboard_found, corners = cv2.findChessboardCorners(
+        image_gray, pattern_size, None)
+    print(chessboard_found)
+    if chessboard_found:
+        points3D.append(point3D)
+        refined_corners = cv2.cornerSubPix(
+            image_gray, corners, window_size, zero_zone, criteria)
+        image_points.append(refined_corners)
+        frame = cv2.drawChessboardCorners(
+            frame, pattern_size, refined_corners, chessboard_found)
+        show_image(frame)
+        image_counter = image_counter + 1
+
+    cv2.imshow('camera_window', frame)
+    keystroke = cv2.waitKey(1)
+
+    if keystroke % 256 == 27:
+        print('`Escape` key hit, closing...')
+        break
+
+camera.release()
+cv2.destroyAllWindows()
+
+ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
+    points3D, image_points, image_gray.shape[::-1], None, None)
+print(ret, mtx, dist, rvecs, tvecs)
+print(mtx)
+# fx = 659.10
+# fy = 668.76
+# cx = 276.76
+# cy = 252.35
+# ret = 0.6814
+# dist = [9.86e-3, 1.41, 1.08e-2, 2.431e-3, -7.05]
diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index fd4a2cd3f..48928abc1 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -2,61 +2,58 @@
 import cv2
 import numpy as np
 from paz.models import UNET_VGG16
-from paz.backend.image import show_image
+from paz.backend.image import show_image, load_image
 from paz import processors as pr
 from paz.backend.camera import Camera
 from scenes import PixelMaskRenderer
 from processors import DrawBoxes3D
-from backend import inhomogenous_quaternion_to_rotation_matrix as quaternion_to_rotation_matrix
 # from backend import homogenous_quaternion_to_rotation_matrix
 from backend import solve_PnP_RANSAC
 from backend import project_to_image
 from backend import build_cube_points3D
 from backend import draw_cube
 from pipelines import Pix2Pose
+from pipelines import EstimatePoseMasks
 from paz.backend.camera import VideoPlayer
+from paz.applications import SSD300FAT
 
 
-root_path = os.path.expanduser('~')
-path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
-path_OBJ = os.path.join(root_path, path_OBJ)
 image_shape = (128, 128, 3)
-viewport_size = image_shape[:2]
 num_classes = 3
-y_fov = 3.14159 / 4.0
-distance = [0.3, 0.5]
-light = [1.0, 30]
-top_only = False
-roll = 3.14159
-shift = 0.05
-
 
 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
 model.load_weights('UNET_weights_epochs-10_beta-3.hdf5')
-renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
-                             light, top_only, roll, shift)
 
+# approximating intrinsic camera parameters
+camera = Camera(device_id=0)
+# camera.start()
+# image_size = camera.read().shape[0:2]
+# camera.stop()
 
-camera = Camera(device_id=4)
-focal_length = 179  # 128
-image_center = (128 / 2.0, 128 / 2.0)
-# building camera parameters
+image = load_image('test_image.jpg')
+image_size = image.shape[0:2]
+focal_length = image_size[1]
+image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
 camera.distortion = np.zeros((4))
 camera.intrinsics = np.array([[focal_length, 0, image_center[0]],
                               [0, focal_length, image_center[1]],
                               [0, 0, 1]])
 
+
 object_sizes = np.array([0.184, 0.187, 0.052])
-# object_size = np.array([0.20, 0.20, 0.08])
-# camera.intrinsics = renderer.camera.camera.get_projection_matrix()[:3, :3]
 epsilon = 0.15
-# object_keypoints3D = renderer.mesh.mesh.primitives[0].positions
-# solve_PNP = pr.SolvePNP(object_keypoints3D, camera)
-pipeline = Pix2Pose(model, object_sizes, camera, epsilon)
+detect = SSD300FAT(draw=False)
+offsets = [0.1, 0.1]
+estimate_keypoints = Pix2Pose(model, object_sizes)
+pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None)
+
+results = pipeline(image)
+predicted_image = results['image']
+show_image(predicted_image)
+
 # image_size = (640, 480)
-image_size = (128, 128)
-player = VideoPlayer(image_size, pipeline, camera)
-player.run()
+# player = VideoPlayer(image_size, pipeline, camera)
+# player.run()
 """
 def show_results():
     image, alpha, RGB_mask_true = renderer.render()
@@ -95,4 +92,3 @@ def show_results():
     image = image.astype('uint8')
     show_image(image)
 """
-
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index a4e9b67fe..050ff0496 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -1,14 +1,17 @@
 import numpy as np
-from paz.abstract import SequentialProcessor
+from paz.abstract import SequentialProcessor, Processor
 from paz.pipelines import RandomizeRenderedImage as RandomizeRender
+from paz.abstract.messages import Pose6D
 from paz import processors as pr
 from processors import (
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
     ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC,
     RotationVectorToRotationMatrix, ReplaceLowerThanThreshold)
-from backend import build_cube_points3D, project_to_image, draw_cube
-from processors import CropImage
+from backend import build_cube_points3D, project_to_image, draw_cube, draw_keypoints, project_to_image2
+from processors import CropImage, UnwrapDictionary, ToAffineMatrix, RotationVectorToQuaternion
 from paz.backend.image import show_image
+from backend import solve_PnP_RANSAC, rotation_matrix_to_quaternion
+from backend import rotation_vector_to_rotation_matrix
 
 
 class DomainRandomization(SequentialProcessor):
@@ -29,8 +32,7 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1):
 class PredictRGBMask(SequentialProcessor):
     def __init__(self, model, epsilon=0.15):
         super(PredictRGBMask, self).__init__()
-        self.add(CropImage())
-        self.add(pr.ResizeImage((128, 128)))
+        # self.add(pr.ResizeImage((128, 128)))
         self.add(pr.NormalizeImage())
         self.add(pr.ExpandDims(0))
         self.add(pr.Predict(model))
@@ -55,24 +57,38 @@ def __init__(self):
         self.add(ArgumentsToImagePoints2D())
 
 
+class SolveChangingObjectPnP(SequentialProcessor):
+    def __init__(self, camera_intrinsics):
+        super(SolveChangingObjectPnP, self).__init__()
+        self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics))
+        self.add(pr.ControlMap(RotationVectorToRotationMatrix()))
+        # self.add(pr.ControlMap(RotationVectorToQuaternion()))
+        self.add(pr.ControlMap(pr.Squeeze(1), [1], [1]))
+        # self.add(ToAffineMatrix())
+
+
 class Pix2Pose(pr.Processor):
-    def __init__(self, model, object_sizes, camera, epsilon=0.15):
-        self.camera = camera
+    def __init__(self, model, object_sizes, epsilon=0.15):
         self.object_sizes = object_sizes
+        H, W = model.input_shape[1:3]
+        self.resize = pr.ResizeImage((W, H))
         self.predict_RGBMask = PredictRGBMask(model, epsilon)
-        self.RGBMask_to_object_points3D = RGBMaskToObjectPoints3D(
-            self.object_sizes)
-        self.RGBMask_to_image_points2D = RGBMaskToImagePoints2D()
-        self.predict_pose = SolveChangingObjectPnPRANSAC(camera.intrinsics)
-        self.vector_to_matrix = RotationVectorToRotationMatrix()
+        self.RGBMask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
+        self.RGBMask_to_points2D = RGBMaskToImagePoints2D()
+        self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask'])
 
     def call(self, image):
-        show_image(image, wait=False)
-        RGBMask = self.predict_RGBMask(image)
-        print(RGBMask.shape)
-        return {'image': RGBMask}
-        points3D = self.RGBMask_to_object_points3D(RGBMask)
-        points2D = self.RGBMask_to_image_points2D(RGBMask)
+        # show_image(image, wait=False)
+        print(image.shape)
+        image = self.resize(image)
+        print(image.shape)
+        RGB_mask = self.predict_RGBMask(image)
+        print(RGB_mask.shape)
+        points3D = self.RGBMask_to_points3D(RGB_mask)
+        # points3D = points3D * 100
+        points2D = self.RGBMask_to_points2D(RGB_mask)
+        return self.wrap(points3D, points2D, RGB_mask)
+        """
         rotation_vector, translation = self.predict_pose(points3D, points2D)
         rotation_matrix = self.vector_to_matrix(rotation_vector)
         translation = np.squeeze(translation, 1)
@@ -83,3 +99,73 @@ def call(self, image):
         image = draw_cube(image.astype(float), points2D)
         image = image.astype('uint8')
         return {'image', image}
+        """
+
+
+class EstimatePoseMasks(Processor):
+    def __init__(self, detect, estimate_keypoints, camera, offsets,
+                 class_to_dimensions, radius=3, thickness=1):
+        """Pose estimation pipeline using keypoints.
+        """
+        super(EstimatePoseMasks, self).__init__()
+        self.detect = detect
+        self.camera = camera
+        self.estimate_keypoints = estimate_keypoints
+        self.square = SequentialProcessor(
+            [pr.SquareBoxes2D(), pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.unwrap = UnwrapDictionary(['points3D', 'points2D', 'RGB_mask'])
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'RGB_mask', 'poses6D'])
+        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        self.denormalize_keypoints = pr.DenormalizeKeypoints()
+        self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07)
+
+    def call(self, image):
+        boxes2D = self.detect(image)['boxes2D']
+        boxes2D = self.square(boxes2D)
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, RGB_masks, cubes_points2D = [], [], []
+        for cropped_image, box2D in zip(cropped_images, boxes2D):
+            if box2D.class_name != '035_power_drill':
+                continue
+            keypoints = self.estimate_keypoints(cropped_image)
+            points3D, points2D, RGB_mask = self.unwrap(keypoints)
+            # Change keypoints coordinates
+            points2D = (2 * points2D / 128.0) - 1.0
+            x, y = np.split(points2D, 2, axis=1)
+            points2D = np.concatenate([x, -y], axis=1)
+            points2D = self.denormalize_keypoints(points2D, cropped_image)
+            points2D = self.change_coordinates(points2D, box2D)
+            # ----------------------------
+
+            rotation, translation = self.predict_pose(points3D, points2D)
+            # quaternion = rotation_matrix_to_quaternion(rotation)
+            # pose6D = Pose6D(quaternion, translation, box2D.class_name)
+            cube_points2D = project_to_image(
+                rotation, translation, self.cube_points3D,
+                self.camera.intrinsics)
+            cube_points2D = cube_points2D.astype(np.int32)
+
+            # draw mask on image
+            object_sizes = np.array([0.184, 0.187, 0.052])
+            colors = points3D / (object_sizes / 2.0)
+            colors = (colors + 1.0) * 127.5
+            colors = colors.astype('int')
+            print(colors.min(), colors.max())
+            draw_keypoints(image, points2D, colors, radius=3)
+            # -----------------------------------
+            poses6D.append(None), RGB_masks.append(RGB_mask)
+            cubes_points2D.append(cube_points2D)
+
+        image = self.draw_boxes2D(image, boxes2D)
+        # draw cube
+        image = image.astype(float)
+        for cube_points2D in cubes_points2D:
+            image = draw_cube(image, cube_points2D)
+        image = image.astype('uint8')
+
+        return self.wrap(image, boxes2D, RGB_masks, poses6D)
diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index 148d601b4..d0dff4b85 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -2,6 +2,7 @@
 from paz.abstract import Processor
 from paz.backend.keypoints import project_points3D
 from paz.backend.image import draw_cube
+from paz.backend.quaternion import rotation_vector_to_quaternion
 
 from backend import build_cube_points3D
 from backend import replace_lower_than_threshold
@@ -81,7 +82,8 @@ def __init__(self):
         super(GetNonZeroValues, self).__init__()
 
     def call(self, array):
-        non_zero_arguments = np.nonzero(array)
+        channel_wise_sum = np.sum(array, axis=2)
+        non_zero_arguments = np.nonzero(channel_wise_sum)
         return array[non_zero_arguments]
 
 
@@ -90,7 +92,8 @@ def __init__(self):
         super(GetNonZeroArguments, self).__init__()
 
     def call(self, array):
-        non_zero_rows, non_zero_columns = np.nonzero(array)
+        channel_wise_sum = np.sum(array, axis=2)
+        non_zero_rows, non_zero_columns = np.nonzero(channel_wise_sum)
         return non_zero_rows, non_zero_columns
 
 
@@ -140,3 +143,38 @@ def __init__(self):
 
     def call(self, image):
         return image[:128, :128, :]
+
+
+class UnwrapDictionary(Processor):
+    def __init__(self, keys):
+        super(UnwrapDictionary, self).__init__()
+        self.keys = keys
+
+    def call(self, dictionary):
+        return [dictionary[key] for key in self.keys]
+
+
+class ToAffineMatrix(Processor):
+    def __init__(self):
+        super(ToAffineMatrix, self).__init__()
+
+    def call(self, rotation_matrix, translation):
+        if len(translation) != 3:
+            raise ValueError('Translation should be of lenght 3')
+        if rotation_matrix.shape != (3, 3):
+            raise ValueError('Rotation matrix should be of shape (3, 3)')
+        translation = translation.reshape(3, 1)
+        affine_matrix = np.concatenate([rotation_matrix, translation], axis=1)
+        affine_row = np.array([[0.0, 0.0, 0.0, 1.0]])
+        affine_matrix = np.concatenate([affine_matrix, affine_row], axis=0)
+        print(affine_matrix.shape)
+        return affine_matrix
+
+
+class RotationVectorToQuaternion(Processor):
+    def __init__(self):
+        super(RotationVectorToQuaternion, self).__init__()
+
+    def call(self, rotation_vector):
+        quaternion = rotation_vector_to_quaternion(rotation_vector)
+        return quaternion

From 1906c4a403ea4583936d0a8e6d452ea40002a87b Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 28 Oct 2021 12:16:02 +0200
Subject: [PATCH 023/101] Refactor main pipeline

---
 examples/pix2pose/backend.py    | 111 +++++++++++++++++++++++++++++++-
 examples/pix2pose/demo.py       |   1 +
 examples/pix2pose/pipelines.py  |  90 +++++++++++++-------------
 examples/pix2pose/processors.py |  22 ++++++-
 4 files changed, 177 insertions(+), 47 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index a172b7461..a90c06597 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -1,6 +1,8 @@
+from collections import Iterable
 import numpy as np
 from paz.backend.image.draw import GREEN
 from paz.backend.image import draw_line, draw_dot, draw_circle
+from paz.abstract import Pose6D
 import cv2
 
 
@@ -267,11 +269,14 @@ def draw_keypoints(image, keypoints, colors, radius):
     return image
 
 
-def draw_mask(image, keypoints, colors, radius):
+def draw_maski(image, keypoints, colors, radius=5):
     for keypoint, color in zip(keypoints, colors):
         R, G, B = color
         color = (int(R), int(G), int(B))
-        draw_circle(image, keypoint.astype('int'), color, radius)
+        x, y = keypoint
+        x = int(x)
+        y = int(y)
+        draw_dot(image, (x, y), color, radius)
     return image
 
 
@@ -291,3 +296,105 @@ def rotation_matrix_to_quaternion(rotation_matrix):
     qy = (m02 - m20) / (4.0 * qw)
     qz = (m10 - m01) / (4.0 * qw)
     return qx, qy, qz, qw
+
+
+def to_pose6D(quaternion, translation, class_name=None):
+    return Pose6D(quaternion, translation, class_name)
+
+
+class MultiList(Iterable):
+    def __init__(self, num_lists):
+        self.num_lists = num_lists
+        self.lists = [[] for list_arg in range(self.num_lists)]
+
+    def append(self, *args):
+        if len(args) != self.num_lists:
+            raise ValueError('Arguments should have equal lenght as num_lists')
+        for arg, arg_list in zip(args, self.lists):
+            arg_list.append(arg)
+
+    def __iter__(self):
+        return iter(self.lists)
+
+
+def draw_mask2(image, points3D, object_sizes):
+    if len(object_sizes) != 3:
+        raise ValueError('Object sizes must contain 3 values')
+    colors = points3D / (object_sizes / 2.0)
+    colors = (colors + 1.0) * 127.5
+    colors = colors.astype('int')
+    # draw_keypoints(image, points2D, colors, radius=3)
+
+
+def normalize_points2D(points2D, height, width):
+    """Transform points2D in image coordinates to normalized coordinates.
+
+    # Arguments
+        points2D: Numpy array of shape ``(num_keypoints, 2)``.
+        height: Int. Height of the image
+        width: Int. Width of the image
+
+    # Returns
+        Numpy array of shape ``(num_keypoints, 2)``.
+    """
+    image_shape = np.array([width, height])
+    points2D = points2D / image_shape  # [0, W], [0, H] -> [0,  1], [0,  1]
+    points2D = 2.0 * points2D          # [0, 1], [0, 1] -> [0,  2], [0,  2]
+    points2D = points2D - 1.0          # [0, 2], [0, 2] -> [-1, 1], [-1, 1]
+    return points2D
+
+
+def denormalize_points2D(points2D, height, width):
+    image_shape = np.array([width, height])
+    points2D = points2D + 1.0          # [-1, 1], [-1, 1] -> [0, 2], [0, 2]
+    points2D = points2D / 2.0          # [0 , 2], [0 , 2] -> [0, 1], [0, 1]
+    points2D = points2D * image_shape  # [0 , 1], [0 , 1] -> [0, W], [0, H]
+    return points2D
+
+
+def flip_y_axis(points2D):
+    x, y = np.split(points2D, 2, axis=1)
+    points2D = np.concatenate([x, -y], axis=1)
+    return points2D
+
+
+def denormalize_keypoints2(keypoints, height, width):
+    # [-1, 1] -> [-127.5, 127.5] -> [0, 255]
+    half_sizes = np.array([width, height]) / 2.0
+    return (half_sizes * keypoints) + half_sizes
+
+
+def translate_points2D(points2D, translation):
+    if len(points2D.shape) != 2:
+        raise ValueError('Invalid points2D shape')
+    if len(translation) != 2:
+        raise ValueError('Invalid translation lenght')
+    num_keypoints = len(points2D)
+    height, width = translation
+    x_translation = np.full((num_keypoints, 1), width)
+    y_translation = np.full((num_keypoints, 1), height)
+    translation = np.concatenate([x_translation, y_translation], axis=1)
+    translated_points2D = translation - points2D
+    return translated_points2D
+
+
+def denormalize_keypoints(keypoints, height, width):
+    """Transform normalized keypoint coordinates into image coordinates
+
+    # Arguments
+        keypoints: Numpy array of shape ``(num_keypoints, 2)``.
+        height: Int. Height of the image
+        width: Int. Width of the image
+
+    # Returns
+        Numpy array of shape ``(num_keypoints, 2)``.
+    """
+    for keypoint_arg, keypoint in enumerate(keypoints):
+        x, y = keypoint[:2]
+        # transform key-point coordinates to image coordinates
+        x = (min(max(x, -1), 1) * width / 2 + width / 2) - 0.5
+        # flip since the image coordinates for y are flipped
+        y = height - 0.5 - (min(max(y, -1), 1) * height / 2 + height / 2)
+        x, y = int(round(x)), int(round(y))
+        keypoints[keypoint_arg][:2] = [x, y]
+    return keypoints
diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 48928abc1..1b66d8941 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -41,6 +41,7 @@
 
 
 object_sizes = np.array([0.184, 0.187, 0.052])
+# epsilon = 0.005
 epsilon = 0.15
 detect = SSD300FAT(draw=False)
 offsets = [0.1, 0.1]
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 050ff0496..088bcefe8 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -6,12 +6,14 @@
 from processors import (
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
     ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC,
-    RotationVectorToRotationMatrix, ReplaceLowerThanThreshold)
-from backend import build_cube_points3D, project_to_image, draw_cube, draw_keypoints, project_to_image2
-from processors import CropImage, UnwrapDictionary, ToAffineMatrix, RotationVectorToQuaternion
-from paz.backend.image import show_image
-from backend import solve_PnP_RANSAC, rotation_matrix_to_quaternion
-from backend import rotation_vector_to_rotation_matrix
+    ReplaceLowerThanThreshold)
+from backend import (build_cube_points3D, project_to_image, draw_cube,
+                     draw_keypoints)
+from processors import UnwrapDictionary, RotationVectorToQuaternion
+# from paz.backend.image import show_image
+from backend import quaternion_to_rotation_matrix, draw_maski
+from backend import normalize_points2D, flip_y_axis
+from backend import denormalize_points2D
 
 
 class DomainRandomization(SequentialProcessor):
@@ -61,9 +63,10 @@ class SolveChangingObjectPnP(SequentialProcessor):
     def __init__(self, camera_intrinsics):
         super(SolveChangingObjectPnP, self).__init__()
         self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics))
-        self.add(pr.ControlMap(RotationVectorToRotationMatrix()))
-        # self.add(pr.ControlMap(RotationVectorToQuaternion()))
+        # self.add(pr.ControlMap(RotationVectorToRotationMatrix()))
+        self.add(pr.ControlMap(RotationVectorToQuaternion()))
         self.add(pr.ControlMap(pr.Squeeze(1), [1], [1]))
+        # self.add(ToPose6D())
         # self.add(ToAffineMatrix())
 
 
@@ -78,14 +81,9 @@ def __init__(self, model, object_sizes, epsilon=0.15):
         self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask'])
 
     def call(self, image):
-        # show_image(image, wait=False)
-        print(image.shape)
         image = self.resize(image)
-        print(image.shape)
         RGB_mask = self.predict_RGBMask(image)
-        print(RGB_mask.shape)
         points3D = self.RGBMask_to_points3D(RGB_mask)
-        # points3D = points3D * 100
         points2D = self.RGBMask_to_points2D(RGB_mask)
         return self.wrap(points3D, points2D, RGB_mask)
         """
@@ -117,10 +115,9 @@ def __init__(self, detect, estimate_keypoints, camera, offsets,
         self.crop = pr.CropBoxes2D()
         self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
         self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.unwrap = UnwrapDictionary(['points3D', 'points2D', 'RGB_mask'])
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'RGB_mask', 'poses6D'])
+        self.unwrap = UnwrapDictionary(['points2D', 'points3D'])
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
         self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.denormalize_keypoints = pr.DenormalizeKeypoints()
         self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07)
 
     def call(self, image):
@@ -128,44 +125,49 @@ def call(self, image):
         boxes2D = self.square(boxes2D)
         boxes2D = self.clip(image, boxes2D)
         cropped_images = self.crop(image, boxes2D)
-        poses6D, RGB_masks, cubes_points2D = [], [], []
-        for cropped_image, box2D in zip(cropped_images, boxes2D):
+        poses6D, points = [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
             if box2D.class_name != '035_power_drill':
                 continue
-            keypoints = self.estimate_keypoints(cropped_image)
-            points3D, points2D, RGB_mask = self.unwrap(keypoints)
-            # Change keypoints coordinates
-            points2D = (2 * points2D / 128.0) - 1.0
-            x, y = np.split(points2D, 2, axis=1)
-            points2D = np.concatenate([x, -y], axis=1)
-            points2D = self.denormalize_keypoints(points2D, cropped_image)
+            points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
+
+            points2D = normalize_points2D(points2D, 128.0, 128.0)
+            crop_H, crop_W = crop.shape[:2]
+            points2D = denormalize_points2D(points2D, crop_H, crop_W)
             points2D = self.change_coordinates(points2D, box2D)
-            # ----------------------------
 
-            rotation, translation = self.predict_pose(points3D, points2D)
-            # quaternion = rotation_matrix_to_quaternion(rotation)
-            # pose6D = Pose6D(quaternion, translation, box2D.class_name)
-            cube_points2D = project_to_image(
-                rotation, translation, self.cube_points3D,
-                self.camera.intrinsics)
-            cube_points2D = cube_points2D.astype(np.int32)
+            quaternion, translation = self.predict_pose(points3D, points2D)
+            pose6D = Pose6D(quaternion, translation, box2D.class_name)
+
+            poses6D.append(pose6D), points.append([points2D, points3D])
 
-            # draw mask on image
+        # draw boxes
+        new_boxes2D = []
+        for box2D in boxes2D:
+            if box2D.class_name == '035_power_drill':
+                new_boxes2D.append(box2D)
+        image = self.draw_boxes2D(image, new_boxes2D)
+
+        # draw masks
+        for points2D, points3D in points:
             object_sizes = np.array([0.184, 0.187, 0.052])
             colors = points3D / (object_sizes / 2.0)
             colors = (colors + 1.0) * 127.5
             colors = colors.astype('int')
-            print(colors.min(), colors.max())
-            draw_keypoints(image, points2D, colors, radius=3)
-            # -----------------------------------
-            poses6D.append(None), RGB_masks.append(RGB_mask)
-            cubes_points2D.append(cube_points2D)
-
-        image = self.draw_boxes2D(image, boxes2D)
-        # draw cube
+            draw_maski(image, points2D, colors)
+
+        # draw cubes
         image = image.astype(float)
-        for cube_points2D in cubes_points2D:
+        for pose6D in poses6D:
+            rotation = quaternion_to_rotation_matrix(pose6D.quaternion)
+            rotation = np.squeeze(rotation, axis=2)
+            cube_points2D = project_to_image(
+                rotation,
+                pose6D.translation,
+                self.cube_points3D,
+                self.camera.intrinsics)
+            cube_points2D = cube_points2D.astype(np.int32)
             image = draw_cube(image, cube_points2D)
         image = image.astype('uint8')
 
-        return self.wrap(image, boxes2D, RGB_masks, poses6D)
+        return self.wrap(image, boxes2D, poses6D)
diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index d0dff4b85..e31f3bddc 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -9,6 +9,7 @@
 from backend import arguments_to_image_points2D
 from backend import solve_PnP_RANSAC
 from backend import rotation_vector_to_rotation_matrix
+from backend import translate_points2D
 
 
 class ImageToClosedOneBall(Processor):
@@ -18,7 +19,7 @@ def __init__(self):
         super(ImageToClosedOneBall, self).__init__()
 
     def call(self, image):
-        return (image / 127.5) - 1
+        return (image / 127.5) - 1.0
 
 
 class ClosedOneBallToImage(Processor):
@@ -178,3 +179,22 @@ def __init__(self):
     def call(self, rotation_vector):
         quaternion = rotation_vector_to_quaternion(rotation_vector)
         return quaternion
+
+
+class TranslatePoints2D(Processor):
+    def __init__(self):
+        super(TranslatePoints2D, self).__init__()
+
+    def call(points2D, image):
+        height, width = image.shape[:2]
+        translated_points2D = translate_points2D(points2D, (height, width))
+        return translated_points2D
+
+
+class FlipYAxisPoints2D(Processor):
+    def __init__(self):
+        super(FlipYAxisPoints2D, self).__init__()
+
+    def call(self, points2D, image):
+        height = image.shape[0]
+        translate_points2D(points2D, (0, height))

From a512a091a1eb8586f53631bbced8b07b33fdcf15 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 28 Oct 2021 15:06:45 +0200
Subject: [PATCH 024/101] Refactor pipelines

---
 examples/pix2pose/pipelines.py | 81 +++++++++-------------------------
 1 file changed, 20 insertions(+), 61 deletions(-)

diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 088bcefe8..b60c4cbd8 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -7,13 +7,12 @@
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
     ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC,
     ReplaceLowerThanThreshold)
-from backend import (build_cube_points3D, project_to_image, draw_cube,
-                     draw_keypoints)
+from backend import build_cube_points3D, project_to_image, draw_cube
 from processors import UnwrapDictionary, RotationVectorToQuaternion
-# from paz.backend.image import show_image
+from processors import NormalizePoints2D
 from backend import quaternion_to_rotation_matrix, draw_maski
-from backend import normalize_points2D, flip_y_axis
 from backend import denormalize_points2D
+from backend import draw_poses6D
 
 
 class DomainRandomization(SequentialProcessor):
@@ -34,7 +33,7 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1):
 class PredictRGBMask(SequentialProcessor):
     def __init__(self, model, epsilon=0.15):
         super(PredictRGBMask, self).__init__()
-        # self.add(pr.ResizeImage((128, 128)))
+        self.add(pr.ResizeImage(model.input_shape[1:3]))
         self.add(pr.NormalizeImage())
         self.add(pr.ExpandDims(0))
         self.add(pr.Predict(model))
@@ -53,51 +52,33 @@ def __init__(self, object_sizes):
 
 
 class RGBMaskToImagePoints2D(SequentialProcessor):
-    def __init__(self):
+    def __init__(self, output_shape):
         super(RGBMaskToImagePoints2D, self).__init__()
         self.add(GetNonZeroArguments())
         self.add(ArgumentsToImagePoints2D())
+        self.add(NormalizePoints2D(output_shape))
 
 
 class SolveChangingObjectPnP(SequentialProcessor):
     def __init__(self, camera_intrinsics):
         super(SolveChangingObjectPnP, self).__init__()
         self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics))
-        # self.add(pr.ControlMap(RotationVectorToRotationMatrix()))
         self.add(pr.ControlMap(RotationVectorToQuaternion()))
-        self.add(pr.ControlMap(pr.Squeeze(1), [1], [1]))
-        # self.add(ToPose6D())
-        # self.add(ToAffineMatrix())
 
 
 class Pix2Pose(pr.Processor):
     def __init__(self, model, object_sizes, epsilon=0.15):
         self.object_sizes = object_sizes
-        H, W = model.input_shape[1:3]
-        self.resize = pr.ResizeImage((W, H))
         self.predict_RGBMask = PredictRGBMask(model, epsilon)
-        self.RGBMask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
-        self.RGBMask_to_points2D = RGBMaskToImagePoints2D()
+        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
+        self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
         self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask'])
 
     def call(self, image):
-        image = self.resize(image)
         RGB_mask = self.predict_RGBMask(image)
-        points3D = self.RGBMask_to_points3D(RGB_mask)
-        points2D = self.RGBMask_to_points2D(RGB_mask)
+        points3D = self.mask_to_points3D(RGB_mask)
+        points2D = self.mask_to_points2D(RGB_mask)
         return self.wrap(points3D, points2D, RGB_mask)
-        """
-        rotation_vector, translation = self.predict_pose(points3D, points2D)
-        rotation_matrix = self.vector_to_matrix(rotation_vector)
-        translation = np.squeeze(translation, 1)
-        points3D = build_cube_points3D(*self.object_sizes)
-        points2D = project_to_image(
-            rotation_matrix, translation, points3D, self.camera.intrinsics)
-        points2D = points2D.astype(np.int32)
-        image = draw_cube(image.astype(float), points2D)
-        image = image.astype('uint8')
-        return {'image', image}
-        """
 
 
 class EstimatePoseMasks(Processor):
@@ -109,8 +90,11 @@ def __init__(self, detect, estimate_keypoints, camera, offsets,
         self.detect = detect
         self.camera = camera
         self.estimate_keypoints = estimate_keypoints
-        self.square = SequentialProcessor(
-            [pr.SquareBoxes2D(), pr.OffsetBoxes2D(offsets)])
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             pr.FilterClassBoxes2D(['035_power_drill']),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
         self.clip = pr.ClipBoxes2D()
         self.crop = pr.CropBoxes2D()
         self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
@@ -121,32 +105,18 @@ def __init__(self, detect, estimate_keypoints, camera, offsets,
         self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07)
 
     def call(self, image):
-        boxes2D = self.detect(image)['boxes2D']
-        boxes2D = self.square(boxes2D)
+        boxes2D = self.postprocess_boxes(self.detect(image))
         boxes2D = self.clip(image, boxes2D)
         cropped_images = self.crop(image, boxes2D)
         poses6D, points = [], []
         for crop, box2D in zip(cropped_images, boxes2D):
-            if box2D.class_name != '035_power_drill':
-                continue
             points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
-
-            points2D = normalize_points2D(points2D, 128.0, 128.0)
-            crop_H, crop_W = crop.shape[:2]
-            points2D = denormalize_points2D(points2D, crop_H, crop_W)
+            points2D = denormalize_points2D(points2D, *crop.shape[0:2])
             points2D = self.change_coordinates(points2D, box2D)
-
             quaternion, translation = self.predict_pose(points3D, points2D)
             pose6D = Pose6D(quaternion, translation, box2D.class_name)
-
             poses6D.append(pose6D), points.append([points2D, points3D])
-
-        # draw boxes
-        new_boxes2D = []
-        for box2D in boxes2D:
-            if box2D.class_name == '035_power_drill':
-                new_boxes2D.append(box2D)
-        image = self.draw_boxes2D(image, new_boxes2D)
+        image = self.draw_boxes2D(image, boxes2D)
 
         # draw masks
         for points2D, points3D in points:
@@ -157,17 +127,6 @@ def call(self, image):
             draw_maski(image, points2D, colors)
 
         # draw cubes
-        image = image.astype(float)
-        for pose6D in poses6D:
-            rotation = quaternion_to_rotation_matrix(pose6D.quaternion)
-            rotation = np.squeeze(rotation, axis=2)
-            cube_points2D = project_to_image(
-                rotation,
-                pose6D.translation,
-                self.cube_points3D,
-                self.camera.intrinsics)
-            cube_points2D = cube_points2D.astype(np.int32)
-            image = draw_cube(image, cube_points2D)
-        image = image.astype('uint8')
-
+        image = draw_poses6D(
+            image, poses6D, self.cube_points3D, self.camera.intrinsics)
         return self.wrap(image, boxes2D, poses6D)

From bfd1ebda129dbf3bddd7d2a4ad7abe70404ab39e Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 28 Oct 2021 15:07:01 +0200
Subject: [PATCH 025/101] Refactor backend

---
 examples/pix2pose/backend.py | 16 ++++++++++++++++
 examples/pix2pose/demo.py    |  1 -
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index a90c06597..25419ea89 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -142,6 +142,7 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
         object_points3D, image_points2D, camera_intrinsics, None,
         flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold,
         iterationsCount=num_iterations)
+    translation = np.squeeze(translation, 1)
     if success is False:
         rotation_vector, translation = None, None
     return rotation_vector, translation
@@ -352,6 +353,7 @@ def denormalize_points2D(points2D, height, width):
     return points2D
 
 
+
 def flip_y_axis(points2D):
     x, y = np.split(points2D, 2, axis=1)
     points2D = np.concatenate([x, -y], axis=1)
@@ -398,3 +400,17 @@ def denormalize_keypoints(keypoints, height, width):
         x, y = int(round(x)), int(round(y))
         keypoints[keypoint_arg][:2] = [x, y]
     return keypoints
+
+
+def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics):
+    image = image.astype(float)
+    for pose6D in poses6D:
+        rotation = quaternion_to_rotation_matrix(pose6D.quaternion)
+        rotation = np.squeeze(rotation, axis=2)
+        cube_points2D = project_to_image(
+            rotation, pose6D.translation,
+            cube_points3D, camera_intrinsics)
+        cube_points2D = cube_points2D.astype(np.int32)
+        image = draw_cube(image, cube_points2D)
+    image = image.astype('uint8')
+    return image
diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 1b66d8941..48928abc1 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -41,7 +41,6 @@
 
 
 object_sizes = np.array([0.184, 0.187, 0.052])
-# epsilon = 0.005
 epsilon = 0.15
 detect = SSD300FAT(draw=False)
 offsets = [0.1, 0.1]

From 858cf268df4dc742bf008fa27f135ec7ac3d86e4 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 28 Oct 2021 15:07:15 +0200
Subject: [PATCH 026/101] Add basic processor

---
 examples/pix2pose/processors.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index e31f3bddc..a3452323f 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -10,6 +10,7 @@
 from backend import solve_PnP_RANSAC
 from backend import rotation_vector_to_rotation_matrix
 from backend import translate_points2D
+from backend import normalize_points2D
 
 
 class ImageToClosedOneBall(Processor):
@@ -198,3 +199,12 @@ def __init__(self):
     def call(self, points2D, image):
         height = image.shape[0]
         translate_points2D(points2D, (0, height))
+
+
+class NormalizePoints2D(Processor):
+    def __init__(self, image_shape):
+        self.height, self.width = image_shape[:2]
+
+    def call(self, points2D):
+        points2D = normalize_points2D(points2D, self.height, self.width)
+        return points2D

From a40233135a80eef6d94aea0c38d02dfd9a55bb7c Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 28 Oct 2021 15:07:44 +0200
Subject: [PATCH 027/101] Start ObjectHypothesis example

---
 examples/pix2pose/messages.py | 50 +++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 examples/pix2pose/messages.py

diff --git a/examples/pix2pose/messages.py b/examples/pix2pose/messages.py
new file mode 100644
index 000000000..1c50c176d
--- /dev/null
+++ b/examples/pix2pose/messages.py
@@ -0,0 +1,50 @@
+from paz.abstract.messages import Box2D, Pose6D
+
+
+class ObjectHypothesis(object):
+    # TODO: Check if class_name, score is the same
+    def __init__(self, score=None, class_name=None, box2D=None, pose6D=None):
+        self.score = score
+        self.class_name = class_name
+        self.box2D = box2D
+        self.pose6D = pose6D
+
+    @property
+    def box2D(self):
+        return self._box2D
+
+    @box2D.setter
+    def box2D(self, value):
+        if not isinstance(value, Box2D):
+            raise ValueError('Value must be a Box2D class')
+
+        if self.score is None:
+            if value.score is not None:
+                self.score = value.score
+        else:
+            if self.score != value.score:
+                raise ValueError('Mismatch score between Hypothesis and Box2D')
+
+
+        if self.score is None and (value.score is not None):
+            self.score = value.score
+        elif (self.score is not None) and (value.score is not None):
+            if self.score != value.score:
+                raise ValueError('Mismatch score between Hypothesis and Box2D')
+        if self.class_name is None and (value.class_name is not None):
+            self.class_name = value.class_name
+        self._box2D = value
+
+    @property
+    def pose6D(self):
+        return self._pose6D
+
+    @pose6D.setter
+    def pose6D(self, value):
+        if not isinstance(value, Pose6D):
+            raise ValueError('Value must be a Pose6D class')
+        if (self.score is None) and (value.score is not None):
+            self.score = value.score
+        if self.class_name is None and (value.class_name is not None):
+            self.class_name = value.class_name
+        self._pose6D = value

From 9810748ad021f72f98260a7e8318bed1d5a4c2a6 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 28 Oct 2021 15:09:30 +0200
Subject: [PATCH 028/101] Remove comments

---
 examples/pix2pose/demo.py | 51 +--------------------------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 48928abc1..2cb953dfd 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -1,20 +1,10 @@
-import os
-import cv2
 import numpy as np
 from paz.models import UNET_VGG16
 from paz.backend.image import show_image, load_image
-from paz import processors as pr
 from paz.backend.camera import Camera
-from scenes import PixelMaskRenderer
-from processors import DrawBoxes3D
-# from backend import homogenous_quaternion_to_rotation_matrix
-from backend import solve_PnP_RANSAC
-from backend import project_to_image
-from backend import build_cube_points3D
-from backend import draw_cube
 from pipelines import Pix2Pose
 from pipelines import EstimatePoseMasks
-from paz.backend.camera import VideoPlayer
+# from paz.backend.camera import VideoPlayer
 from paz.applications import SSD300FAT
 
 
@@ -39,7 +29,6 @@
                               [0, focal_length, image_center[1]],
                               [0, 0, 1]])
 
-
 object_sizes = np.array([0.184, 0.187, 0.052])
 epsilon = 0.15
 detect = SSD300FAT(draw=False)
@@ -54,41 +43,3 @@
 # image_size = (640, 480)
 # player = VideoPlayer(image_size, pipeline, camera)
 # player.run()
-"""
-def show_results():
-    image, alpha, RGB_mask_true = renderer.render()
-    normalized_image = np.expand_dims(image / 255.0, 0)
-    RGB_mask_pred = model.predict(normalized_image)
-    RGB_mask_pred = np.squeeze(RGB_mask_pred, 0)
-    RGB_mask_pred[RGB_mask_pred < epsilon] = 0.0
-    show_image((RGB_mask_pred * 255.0).astype('uint8'))
-
-    mask_pred = np.sum(RGB_mask_pred, axis=2)
-    non_zero_arguments = np.nonzero(mask_pred)
-    RGB_mask_pred = RGB_mask_pred[non_zero_arguments]
-    RGB_mask_pred = (2.0 * RGB_mask_pred) - 1.0
-    # this RGB mask scaling is good since you are scaling in RGB space
-    object_points3D = (object_size / 2.0) * RGB_mask_pred
-    num_points = len(object_points3D)
-
-    row_args, col_args = non_zero_arguments
-    row_args = row_args.reshape(-1, 1)
-    col_args = col_args.reshape(-1, 1)
-    image_points2D = np.concatenate([col_args, row_args], axis=1)
-    image_points2D = image_points2D.reshape(num_points, 1, 2)
-    image_points2D = image_points2D.astype(np.float64)
-    image_points2D = np.ascontiguousarray(image_points2D)
-
-    rotation_vector, translation = solve_PnP_RANSAC(
-        object_points3D, image_points2D, camera.intrinsics)
-    rotation_matrix = np.eye(3)
-    cv2.Rodrigues(rotation_vector, rotation_matrix)
-    translation = np.squeeze(translation, 1)
-    points3D = build_cube_points3D(0.184, 0.187, 0.052)
-    points2D = project_to_image(
-        rotation_matrix, translation, points3D, camera.intrinsics)
-    points2D = points2D.astype(np.int32)
-    image = draw_cube(image.astype(float), points2D)
-    image = image.astype('uint8')
-    show_image(image)
-"""

From 501f43456f618754abf43efc8b1dfbbd23239189 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 28 Oct 2021 15:40:47 +0200
Subject: [PATCH 029/101] Refactor code

---
 examples/pix2pose/backend.py   | 12 +++++++++++-
 examples/pix2pose/demo.py      | 25 +++++++++++++------------
 examples/pix2pose/pipelines.py | 28 +++++++++++-----------------
 3 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 25419ea89..fccf619d3 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -270,7 +270,17 @@ def draw_keypoints(image, keypoints, colors, radius):
     return image
 
 
-def draw_maski(image, keypoints, colors, radius=5):
+def draw_masks(image, points):
+    for points2D, points3D in points:
+        object_sizes = np.array([0.184, 0.187, 0.052])
+        colors = points3D / (object_sizes / 2.0)
+        colors = (colors + 1.0) * 127.5
+        colors = colors.astype('int')
+        image = draw_maski(image, points2D, colors)
+    return image
+
+
+def draw_maski(image, keypoints, colors, radius=1):
     for keypoint, color in zip(keypoints, colors):
         R, G, B = color
         color = (int(R), int(G), int(B))
diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 2cb953dfd..8dd50ab67 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -4,7 +4,7 @@
 from paz.backend.camera import Camera
 from pipelines import Pix2Pose
 from pipelines import EstimatePoseMasks
-# from paz.backend.camera import VideoPlayer
+from paz.backend.camera import VideoPlayer
 from paz.applications import SSD300FAT
 
 
@@ -15,20 +15,21 @@
 model.load_weights('UNET_weights_epochs-10_beta-3.hdf5')
 
 # approximating intrinsic camera parameters
-camera = Camera(device_id=0)
-# camera.start()
-# image_size = camera.read().shape[0:2]
-# camera.stop()
+camera = Camera(device_id=4)
+camera.start()
+image_size = camera.read().shape[0:2]
+camera.stop()
 
+"""
 image = load_image('test_image.jpg')
 image_size = image.shape[0:2]
+"""
 focal_length = image_size[1]
 image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
 camera.distortion = np.zeros((4))
 camera.intrinsics = np.array([[focal_length, 0, image_center[0]],
                               [0, focal_length, image_center[1]],
                               [0, 0, 1]])
-
 object_sizes = np.array([0.184, 0.187, 0.052])
 epsilon = 0.15
 detect = SSD300FAT(draw=False)
@@ -36,10 +37,10 @@
 estimate_keypoints = Pix2Pose(model, object_sizes)
 pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None)
 
-results = pipeline(image)
-predicted_image = results['image']
-show_image(predicted_image)
+# results = pipeline(image)
+# predicted_image = results['image']
+# show_image(predicted_image)
 
-# image_size = (640, 480)
-# player = VideoPlayer(image_size, pipeline, camera)
-# player.run()
+image_size = (640, 480)
+player = VideoPlayer(image_size, pipeline, camera)
+player.run()
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index b60c4cbd8..30ff49d97 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -7,12 +7,13 @@
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
     ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC,
     ReplaceLowerThanThreshold)
-from backend import build_cube_points3D, project_to_image, draw_cube
+from backend import build_cube_points3D
 from processors import UnwrapDictionary, RotationVectorToQuaternion
 from processors import NormalizePoints2D
-from backend import quaternion_to_rotation_matrix, draw_maski
+from backend import draw_maski
 from backend import denormalize_points2D
 from backend import draw_poses6D
+from backend import draw_masks
 
 
 class DomainRandomization(SequentialProcessor):
@@ -83,13 +84,14 @@ def call(self, image):
 
 class EstimatePoseMasks(Processor):
     def __init__(self, detect, estimate_keypoints, camera, offsets,
-                 class_to_dimensions, radius=3, thickness=1):
+                 class_to_dimensions, radius=3, thickness=1, draw=True):
         """Pose estimation pipeline using keypoints.
         """
         super(EstimatePoseMasks, self).__init__()
         self.detect = detect
-        self.camera = camera
         self.estimate_keypoints = estimate_keypoints
+        self.camera = camera
+        self.draw = draw
         self.postprocess_boxes = SequentialProcessor(
             [pr.UnpackDictionary(['boxes2D']),
              pr.FilterClassBoxes2D(['035_power_drill']),
@@ -116,17 +118,9 @@ def call(self, image):
             quaternion, translation = self.predict_pose(points3D, points2D)
             pose6D = Pose6D(quaternion, translation, box2D.class_name)
             poses6D.append(pose6D), points.append([points2D, points3D])
-        image = self.draw_boxes2D(image, boxes2D)
-
-        # draw masks
-        for points2D, points3D in points:
-            object_sizes = np.array([0.184, 0.187, 0.052])
-            colors = points3D / (object_sizes / 2.0)
-            colors = (colors + 1.0) * 127.5
-            colors = colors.astype('int')
-            draw_maski(image, points2D, colors)
-
-        # draw cubes
-        image = draw_poses6D(
-            image, poses6D, self.cube_points3D, self.camera.intrinsics)
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            image = draw_masks(image, points)
+            image = draw_poses6D(
+                image, poses6D, self.cube_points3D, self.camera.intrinsics)
         return self.wrap(image, boxes2D, poses6D)

From 46c4e2d8fbb975289efed8cfa9d2392ed536f010 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 1 Nov 2021 13:35:01 +0100
Subject: [PATCH 030/101] Refactor code to train GAN

---
 examples/pix2pose/demo.py             |  16 ++--
 examples/pix2pose/loss.py             |  88 ++++++++++++++-----
 examples/pix2pose/metrics.py          |  15 ++++
 examples/pix2pose/models/generator.py |  18 ++--
 examples/pix2pose/pipelines.py        |   7 +-
 examples/pix2pose/train.py            |   7 +-
 examples/pix2pose/train_gan.py        | 117 ++++++++++++++++++++++++++
 7 files changed, 227 insertions(+), 41 deletions(-)
 create mode 100644 examples/pix2pose/metrics.py
 create mode 100644 examples/pix2pose/train_gan.py

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 8dd50ab67..def06b71d 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -16,14 +16,12 @@
 
 # approximating intrinsic camera parameters
 camera = Camera(device_id=4)
-camera.start()
-image_size = camera.read().shape[0:2]
-camera.stop()
+# camera.start()
+# image_size = camera.read().shape[0:2]
+# camera.stop()
 
-"""
 image = load_image('test_image.jpg')
 image_size = image.shape[0:2]
-"""
 focal_length = image_size[1]
 image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
 camera.distortion = np.zeros((4))
@@ -37,10 +35,12 @@
 estimate_keypoints = Pix2Pose(model, object_sizes)
 pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None)
 
-# results = pipeline(image)
-# predicted_image = results['image']
-# show_image(predicted_image)
+results = pipeline(image)
+predicted_image = results['image']
+show_image(predicted_image)
 
+"""
 image_size = (640, 480)
 player = VideoPlayer(image_size, pipeline, camera)
 player.run()
+"""
diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index 3b57dcc7d..741f64dc3 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -1,36 +1,82 @@
 from tensorflow.keras.losses import Loss
+from tensorflow.keras.losses import mean_squared_error
 import tensorflow as tf
 
 
-class WeightedForeground(Loss):
-    def __init__(self, beta=3.0):
-        super(WeightedForeground, self).__init__()
-        self.beta = beta
+def extract_alpha_mask(RGBA_mask):
+    color_mask = RGBA_mask[:, :, :, 0:3]
+    alpha_mask = RGBA_mask[:, :, :, 3:4]
+    return color_mask, alpha_mask
+
+
+def extract_error_mask(RGBE_mask):
+    color_mask = RGBE_mask[:, :, :, 0:3]
+    error_mask = RGBE_mask[:, :, :, 3:4]
+    return color_mask, error_mask
+
+
+def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask):
+    foreground_true = RGB_true * alpha_mask
+    foreground_pred = RGB_pred * alpha_mask
+    foreground_loss = tf.abs(foreground_true - foreground_pred)
+    return foreground_loss
+
+
+def compute_background_loss(RGB_true, RGB_pred, alpha_mask):
+    background_true = RGB_true * (1.0 - alpha_mask)
+    background_pred = RGB_pred * (1.0 - alpha_mask)
+    background_loss = tf.abs(background_true - background_pred)
+    return background_loss
+
+
+def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
+    RGB_true, alpha_mask = extract_alpha_mask(RGBA_true)
+    foreground_loss = compute_foreground_loss(RGB_true, RGB_pred, alpha_mask)
+    background_loss = compute_background_loss(RGB_true, RGB_pred, alpha_mask)
+    reconstruction_loss = (beta * foreground_loss) + background_loss
+    return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True)
 
-    def _extract_alpha_mask(self, RGBA_mask):
-        alpha_mask = RGBA_mask[:, :, :, 3:4]
-        color_mask = RGBA_mask[:, :, :, 0:3]
-        return color_mask, alpha_mask
 
-    def call(self, RGBA_mask_true, RGB_mask_pred):
-        RGB_mask_true, alpha_mask = self._extract_alpha_mask(RGBA_mask_true)
+def compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred,
+                                                    beta=3.0):
+    RGB_pred, error_mask = extract_error_mask(RGBE_pred)
+    loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
+    return loss
 
-        foreground_true = RGB_mask_true * alpha_mask
-        foreground_pred = RGB_mask_pred * alpha_mask
-        foreground_loss = tf.abs(foreground_true - foreground_pred)
 
-        background_true = RGB_mask_true * (1.0 - alpha_mask)
-        background_pred = RGB_mask_pred * (1.0 - alpha_mask)
-        background_loss = tf.abs(background_true - background_pred)
+def compute_error_prediction_loss(RGBA_true, RGBE_pred):
+    RGB_pred, error_pred = extract_error_mask(RGBE_pred)
+    error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0)
+    error_true = tf.minimum(error_true, 1.0)
+    error_loss = mean_squared_error(error_true, error_pred)
+    error_loss = tf.expand_dims(error_loss, axis=-1)
+    return error_loss
 
-        loss = (self.beta * foreground_loss) + background_loss
 
-        loss = tf.reduce_mean(loss, axis=[1, 2, 3])
-        # loss = tf.math.minimum(loss, tf.float32.max)
-        # loss = tf.losses.mean_squared_error(RGB_mask_true, RGB_mask_pred)
+class WeightedReconstructionWithError(Loss):
+    def __init__(self, beta=3.0):
+        super(WeightedReconstructionWithError, self).__init__()
+        self.beta = beta
+
+    def call(self, RGBA_true, RGBE_pred):
+        reconstruction = compute_weighted_reconstruction_loss_with_error(
+            RGBA_true, RGBE_pred, self.beta)
+        error_prediction = compute_error_prediction_loss(RGBA_true, RGBE_pred)
+        loss = reconstruction + error_prediction
+        return loss
+
+
+class WeightedReconstruction(Loss):
+    def __init__(self, beta=3.0):
+        super(WeightedReconstruction, self).__init__()
+        self.beta = beta
+
+    def call(self, RGBA_true, RGB_pred):
+        loss = compute_weighted_reconstruction_loss(
+            RGBA_true, RGB_pred, self.beta)
         return loss
 
 
-def MSE_with_alpha_channel(y_true, y_pred):
+def MSE_without_last_channel(y_true, y_pred):
     squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred)
     return tf.reduce_mean(squared_difference, axis=-1)  # Note the `axis=-1`
diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/metrics.py
new file mode 100644
index 000000000..27e79bba2
--- /dev/null
+++ b/examples/pix2pose/metrics.py
@@ -0,0 +1,15 @@
+from loss import compute_weighted_reconstruction_loss_with_error
+from loss import compute_error_prediction_loss
+from loss import compute_weighted_reconstruction_loss
+
+
+def weighted_reconstruction(RGBA_true, RGBE_pred, beta=3.0, with_error=False):
+    if with_error:
+        loss_function = compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred, beta)
+    else:
+        loss_function = compute_weighted_reconstruction_loss(RGBA_true, RGBE_pred, beta)
+    return loss_function
+
+
+def error_prediction(RGBA_true, RGBE_pred, beta=3.0):
+    return compute_error_prediction_loss(RGBA_true, RGBE_pred)
diff --git a/examples/pix2pose/models/generator.py b/examples/pix2pose/models/generator.py
index 720586a33..357d3b646 100644
--- a/examples/pix2pose/models/generator.py
+++ b/examples/pix2pose/models/generator.py
@@ -53,22 +53,24 @@ def decoder(x, skip_connections):
 
 def Generator(input_shape=(128, 128, 3), latent_dimension=256,
               name='PIX2POSE_GENERATOR'):
-    input_image = Input(input_shape, name='input_image')
-    x, skip_connections = encoder(input_image)
+    RGB_input = Input(input_shape, name='RGB_input')
+    x, skip_connections = encoder(RGB_input)
     x = Flatten()(x)
     x = Dense(latent_dimension)(x)
     x = Dense(8 * 8 * latent_dimension)(x)
     x = Reshape((8, 8, latent_dimension))(x)
     x = decoder(x, skip_connections)
-    label_image = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x)
-    label_image = Activation('tanh', name='label_image')(label_image)
-    error_image = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x)
-    error_image = Activation('sigmoid', name='error_image')(error_image)
-    model = Model([input_image], [label_image, error_image], name=name)
+    RGB = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x)
+    RGB = Activation('tanh', name='RGB')(RGB)
+    error = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x)
+    error = Activation('sigmoid', name='error')(error)
+    RGB_with_error = Concatenate(axis=-1, name='RGB_with_error')([RGB, error])
+    model = Model(RGB_input, RGB_with_error, name=name)
     return model
 
 
 model = Generator()
 assert model.count_params() == 25740356
-assert model.output_shape == [(None, 128, 128, 3), (None, 128, 128, 1)]
+# assert model.output_shape == [(None, 128, 128, 3), (None, 128, 128, 1)]
+assert model.output_shape == (None, 128, 128, 4)
 assert model.input_shape == (None, 128, 128, 3)
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 30ff49d97..bf48f7082 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -19,7 +19,8 @@
 class DomainRandomization(SequentialProcessor):
     """Performs domain randomization on a rendered image
     """
-    def __init__(self, renderer, image_shape, image_paths, num_occlusions=1):
+    def __init__(self, renderer, image_shape, image_paths, inputs_to_shape,
+                 labels_to_shape, num_occlusions=1):
         super(DomainRandomization, self).__init__()
         H, W = image_shape[:2]
         self.add(pr.Render(renderer))
@@ -27,8 +28,12 @@ def __init__(self, renderer, image_shape, image_paths, num_occlusions=1):
         self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
         # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1]))
         self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1]))
+        """
         self.add(pr.SequenceWrapper({0: {'input_1': [H, W, 3]}},
                                     {1: {'masks': [H, W, 4]}}))
+        """
+        self.add(pr.SequenceWrapper({0: inputs_to_shape},
+                                    {1: labels_to_shape}))
 
 
 class PredictRGBMask(SequentialProcessor):
diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 1c4b13e30..42195bafc 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -8,7 +8,7 @@
 
 from scenes import PixelMaskRenderer
 from pipelines import DomainRandomization
-from loss import WeightedForeground, MSE_with_alpha_channel
+from loss import WeightedReconstruction, MSE_with_alpha_channel
 from models.fully_convolutional_net import FullyConvolutionalNet
 
 image_shape = [128, 128, 3]
@@ -47,14 +47,15 @@
 sequence = GeneratingSequence(processor, batch_size, num_steps)
 
 beta = 3.0
-weighted_foreground = WeightedForeground(beta)
+weighted_reconstruction = WeightedReconstruction(beta)
 
 # model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha)
 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
 # model.
 optimizer = Adam(learning_rate)
 # model.load_weights('UNET_weights_MSE.hdf5')
-model.compile(optimizer, weighted_foreground, metrics=MSE_with_alpha_channel)
+model.compile(
+    optimizer, weighted_reconstruction, metrics=MSE_with_alpha_channel)
 model.fit(
     sequence,
     # steps_per_epoch=args.steps_per_epoch,
diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py
new file mode 100644
index 000000000..65dfeca28
--- /dev/null
+++ b/examples/pix2pose/train_gan.py
@@ -0,0 +1,117 @@
+import os
+import glob
+from tensorflow.keras.optimizers import Adam
+from paz.abstract import GeneratingSequence
+from paz.models.segmentation import UNET_VGG16
+from models.generator import Generator
+from paz.backend.image import show_image, resize_image
+import numpy as np
+
+from scenes import PixelMaskRenderer
+from pipelines import DomainRandomization
+from loss import WeightedReconstruction
+from loss import WeightedReconstructionWithError
+from metrics import error_prediction, weighted_reconstruction
+# from models.fully_convolutional_net import FullyConvolutionalNet
+
+H, W, num_channels = image_shape = [128, 128, 3]
+root_path = os.path.expanduser('~')
+background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
+background_wildcard = os.path.join(root_path, background_wildcard)
+image_paths = glob.glob(background_wildcard)
+path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
+path_OBJ = os.path.join(root_path, path_OBJ)
+num_occlusions = 1
+viewport_size = image_shape[:2]
+y_fov = 3.14159 / 4.0
+distance = [0.3, 0.5]
+light = [1.0, 30]
+top_only = False
+roll = 3.14159
+shift = 0.05
+num_steps = 1000
+batch_size = 32
+beta = 3.0
+alpha = 0.1
+filters = 16
+num_classes = 3
+learning_rate = 0.001
+# steps_per_epoch
+model_names = ['PIX2POSE', 'UNET_VGG16']
+model_name = 'UNET_VGG16'
+# model_name = 'PIX2POSE'
+max_num_epochs = 1
+latent_dimension = 128
+beta = 3.0
+
+
+renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
+                             light, top_only, roll, shift)
+
+
+# model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha)
+# name_to_model = dict(zip(model_names, [Generator, UNET_VGG16])
+# model = name_to_model[model_name]
+
+if model_name == 'UNET_VGG16':
+    model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+    loss = WeightedReconstruction(beta)
+    inputs_to_shape = {'input_1': [H, W, num_channels]}
+    labels_to_shape = {'masks': [H, W, 4]}
+    metrics = weighted_reconstruction
+if model_name == 'PIX2POSE':
+    model = Generator(image_shape, latent_dimension)
+    reconstruction_loss = WeightedReconstructionWithError(beta)
+    # error_prediction_loss = ErrorPrediction()
+    # loss = {'RGB_with_error': [reconstruction_loss, error_prediction_loss]}
+    loss = WeightedReconstructionWithError()
+    H, W, num_channels = image_shape
+    inputs_to_shape = {'RGB_input': [H, W, num_channels]}
+    labels_to_shape = {'RGB_with_error': [H, W, 4]}
+    metrics = {'RGB_with_error': [weighted_reconstruction, error_prediction]}
+
+
+processor = DomainRandomization(
+    renderer, image_shape, image_paths, inputs_to_shape,
+    labels_to_shape, num_occlusions)
+
+sequence = GeneratingSequence(processor, batch_size, num_steps)
+
+optimizer = Adam(learning_rate)
+
+# inputs, labels = sequence.__getitem__(0)
+# preds = model(inputs)
+# error_prediction = ErrorPrediction()
+# losses = error_prediction(preds, labels['RGB_with_error'])
+
+# model.compile(optimizer, loss, metrics=mean_squared_error)
+model.compile(optimizer, loss, metrics)
+
+model.fit(
+    sequence,
+    epochs=max_num_epochs,
+    # callbacks=[stop, log, save, plateau, draw],
+    verbose=1,
+    workers=0)
+
+"""
+def normalize(image):
+    return (image * 255.0).astype('uint8')
+
+
+def show_results():
+    # image, alpha, pixel_mask_true = renderer.render()
+    sample = processor()
+    image = sample['inputs']['input_1']
+    pixel_mask_true = sample['labels']['masks']
+    image = np.expand_dims(image, 0)
+    pixel_mask_pred = model.predict(image)
+    pixel_mask_pred = normalize(np.squeeze(pixel_mask_pred, axis=0))
+    image = normalize(np.squeeze(image, axis=0))
+    results = np.concatenate(
+        [image, normalize(pixel_mask_true[..., 0:3]), pixel_mask_pred], axis=1)
+    H, W = results.shape[:2]
+    scale = 6
+    results = resize_image(results, (scale * W, scale * H))
+    show_image(results)
+"""

From 5810d8ef6311fd6f31bb2b47253253a71866ef50 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 1 Nov 2021 14:11:19 +0100
Subject: [PATCH 031/101] Add training for UNET and GAN

---
 examples/pix2pose/metrics.py   | 37 ++++++++++++++++++++++++++++------
 examples/pix2pose/train_gan.py | 24 ++++++++++------------
 2 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/metrics.py
index 27e79bba2..4f4b451af 100644
--- a/examples/pix2pose/metrics.py
+++ b/examples/pix2pose/metrics.py
@@ -1,15 +1,40 @@
 from loss import compute_weighted_reconstruction_loss_with_error
 from loss import compute_error_prediction_loss
 from loss import compute_weighted_reconstruction_loss
+import tensorflow as tf
 
 
-def weighted_reconstruction(RGBA_true, RGBE_pred, beta=3.0, with_error=False):
-    if with_error:
-        loss_function = compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred, beta)
-    else:
-        loss_function = compute_weighted_reconstruction_loss(RGBA_true, RGBE_pred, beta)
-    return loss_function
+def weighted_reconstruction_with_error(RGBA_true, RGBE_pred, beta=3.0):
+    return compute_weighted_reconstruction_loss_with_error(
+        RGBA_true, RGBE_pred, beta)
+
+
+def weighted_reconstruction(RGBA_true, RGB_pred, beta=3.0):
+    return compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
 
 
 def error_prediction(RGBA_true, RGBE_pred, beta=3.0):
     return compute_error_prediction_loss(RGBA_true, RGBE_pred)
+
+
+def mean_squared_error(y_true, y_pred):
+    squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred[:, :, :, 0:3])
+    return tf.reduce_mean(squared_difference, axis=-1)
+
+
+def weighted_reconstruction2(y_true, y_pred, beta=3.0, with_error=False):
+    if with_error:
+        return compute_weighted_reconstruction_loss_with_error(y_true, y_pred, beta)
+    else:
+        return compute_error_prediction_loss(y_true, y_pred, beta)
+
+
+def weighted_reconstruction_wrapper(beta=3.0, with_error=False):
+    if with_error:
+        def weighted_reconstruction(y_true, y_pred):
+            return compute_weighted_reconstruction_loss_with_error(
+                y_true, y_pred, beta)
+    else:
+        def weighted_reconstruction(y_true, y_pred):
+            return compute_weighted_reconstruction_loss(y_true, y_pred, beta)
+    return weighted_reconstruction
diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py
index 65dfeca28..2f64d4f3b 100644
--- a/examples/pix2pose/train_gan.py
+++ b/examples/pix2pose/train_gan.py
@@ -4,14 +4,17 @@
 from paz.abstract import GeneratingSequence
 from paz.models.segmentation import UNET_VGG16
 from models.generator import Generator
-from paz.backend.image import show_image, resize_image
-import numpy as np
+# from paz.backend.image import show_image, resize_image
+# import numpy as np
 
 from scenes import PixelMaskRenderer
 from pipelines import DomainRandomization
 from loss import WeightedReconstruction
 from loss import WeightedReconstructionWithError
-from metrics import error_prediction, weighted_reconstruction
+# from metrics import error_prediction, weighted_reconstruction
+# from metrics import weighted_reconstruction_with_error
+from metrics import mean_squared_error, error_prediction
+from metrics import weighted_reconstruction_wrapper
 # from models.fully_convolutional_net import FullyConvolutionalNet
 
 H, W, num_channels = image_shape = [128, 128, 3]
@@ -58,17 +61,18 @@
     loss = WeightedReconstruction(beta)
     inputs_to_shape = {'input_1': [H, W, num_channels]}
     labels_to_shape = {'masks': [H, W, 4]}
-    metrics = weighted_reconstruction
+    weighted_reconstruction = weighted_reconstruction_wrapper(beta, False)
+    metrics = {'masks': [weighted_reconstruction, mean_squared_error]}
 if model_name == 'PIX2POSE':
     model = Generator(image_shape, latent_dimension)
     reconstruction_loss = WeightedReconstructionWithError(beta)
-    # error_prediction_loss = ErrorPrediction()
-    # loss = {'RGB_with_error': [reconstruction_loss, error_prediction_loss]}
     loss = WeightedReconstructionWithError()
     H, W, num_channels = image_shape
     inputs_to_shape = {'RGB_input': [H, W, num_channels]}
     labels_to_shape = {'RGB_with_error': [H, W, 4]}
-    metrics = {'RGB_with_error': [weighted_reconstruction, error_prediction]}
+    weighted_reconstruction = weighted_reconstruction_wrapper(beta, True)
+    metrics = {'RGB_with_error':
+               [weighted_reconstruction, error_prediction, mean_squared_error]}
 
 
 processor = DomainRandomization(
@@ -79,12 +83,6 @@
 
 optimizer = Adam(learning_rate)
 
-# inputs, labels = sequence.__getitem__(0)
-# preds = model(inputs)
-# error_prediction = ErrorPrediction()
-# losses = error_prediction(preds, labels['RGB_with_error'])
-
-# model.compile(optimizer, loss, metrics=mean_squared_error)
 model.compile(optimizer, loss, metrics)
 
 model.fit(

From d2b014f9842b2ed7f9b3bb0d3c498ddfae6c1bc3 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 1 Nov 2021 14:13:09 +0100
Subject: [PATCH 032/101] Remove unecessary metrics

---
 examples/pix2pose/metrics.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/metrics.py
index 4f4b451af..7304ab7d8 100644
--- a/examples/pix2pose/metrics.py
+++ b/examples/pix2pose/metrics.py
@@ -4,15 +4,6 @@
 import tensorflow as tf
 
 
-def weighted_reconstruction_with_error(RGBA_true, RGBE_pred, beta=3.0):
-    return compute_weighted_reconstruction_loss_with_error(
-        RGBA_true, RGBE_pred, beta)
-
-
-def weighted_reconstruction(RGBA_true, RGB_pred, beta=3.0):
-    return compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
-
-
 def error_prediction(RGBA_true, RGBE_pred, beta=3.0):
     return compute_error_prediction_loss(RGBA_true, RGBE_pred)
 
@@ -22,13 +13,6 @@ def mean_squared_error(y_true, y_pred):
     return tf.reduce_mean(squared_difference, axis=-1)
 
 
-def weighted_reconstruction2(y_true, y_pred, beta=3.0, with_error=False):
-    if with_error:
-        return compute_weighted_reconstruction_loss_with_error(y_true, y_pred, beta)
-    else:
-        return compute_error_prediction_loss(y_true, y_pred, beta)
-
-
 def weighted_reconstruction_wrapper(beta=3.0, with_error=False):
     if with_error:
         def weighted_reconstruction(y_true, y_pred):

From 3b85afaad4219d9757be4677f2d76f91b12f419d Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 2 Nov 2021 10:35:06 +0100
Subject: [PATCH 033/101] Add available GAN training

---
 examples/pix2pose/models/gan_example.py   |  81 +++++++++++++++
 examples/pix2pose/models/keras_example.py |  67 ++++++++++++
 examples/pix2pose/models/pix2pose.py      | 118 +++++++++++-----------
 examples/pix2pose/old_train.py            |   7 +-
 examples/pix2pose/train_gan.py            |  36 +++++--
 5 files changed, 237 insertions(+), 72 deletions(-)
 create mode 100644 examples/pix2pose/models/gan_example.py
 create mode 100644 examples/pix2pose/models/keras_example.py

diff --git a/examples/pix2pose/models/gan_example.py b/examples/pix2pose/models/gan_example.py
new file mode 100644
index 000000000..8472a9462
--- /dev/null
+++ b/examples/pix2pose/models/gan_example.py
@@ -0,0 +1,81 @@
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.metrics import Mean
+
+
+class Pix2PoseGAN(Model):
+    def __init__(self, image_shape, discriminator, generator, latent_dim):
+        super(Pix2PoseGAN, self).__init__()
+        self.image_shape = image_shape
+        self.discriminator = discriminator
+        self.generator = generator
+        self.latent_dim = latent_dim
+        self.generator_loss_tracker = Mean(name='generator_loss')
+        self.discriminator_loss_tracker = Mean(name='discriminator_loss')
+
+    @property
+    def metrics(self):
+        return [self.generator_loss_tracker, self.discriminator_loss_tracker]
+
+    def compile(self, d_optimizer, g_optimizer, loss_fn):
+        super(Pix2PoseGAN, self).compile()
+        self.d_optimizer = d_optimizer
+        self.g_optimizer = g_optimizer
+        self.loss_fn = loss_fn
+
+    def train_step(self, data):
+        RGB_inputs, RGB_labels = data
+        RGB_generated = self.generator(RGB_inputs)
+        RGB_combined = tf.concat([RGB_generated, RGB_labels], axis=0)
+        """
+        # Add dummy dimensions to the labels so that they can be concatenated with
+        # the images. This is for the discriminator.
+        image_one_hot_labels = one_hot_labels[:, :, None, None]
+        image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size])
+        image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes))
+
+        # Sample random points in the latent space and concatenate the labels.
+        # This is for the generator.
+        batch_size = tf.shape(real_images)[0]
+        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
+        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
+
+        # Decode the noise (guided by labels) to fake images.
+        generated_images = self.generator(random_vector_labels)
+        """
+
+        # Combine them with real images. Note that we are concatenating the labels
+        # with these images here.
+
+        # Assemble labels discriminating real from fake images.
+        labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0)
+
+        # Train the discriminator.
+        with tf.GradientTape() as tape:
+            predictions = self.discriminator(combined_images)
+            d_loss = self.loss_fn(labels, predictions)
+        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
+        self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))
+
+        # Sample random points in the latent space.
+        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
+        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
+
+        # Assemble labels that say "all real images".
+        misleading_labels = tf.zeros((batch_size, 1))
+
+        # Train the generator (note that we should *not* update the weights
+        # of the discriminator)!
+        with tf.GradientTape() as tape:
+            fake_images = self.generator(random_vector_labels)
+            fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1)
+            predictions = self.discriminator(fake_image_and_labels)
+            g_loss = self.loss_fn(misleading_labels, predictions)
+        grads = tape.gradient(g_loss, self.generator.trainable_weights)
+        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))
+
+        # Monitor loss.
+        self.generator_loss_tracker.update_state(g_loss)
+        self.discriminator_loss_tracker.update_state(d_loss)
+        return {'generator_loss': self.generator_loss_tracker.result(),
+                'discrminator_loss': self.discriminator_loss_tracker.result()}
diff --git a/examples/pix2pose/models/keras_example.py b/examples/pix2pose/models/keras_example.py
new file mode 100644
index 000000000..c3f016cfa
--- /dev/null
+++ b/examples/pix2pose/models/keras_example.py
@@ -0,0 +1,67 @@
+class GAN(keras.Model):
+    def __init__(self, discriminator, generator, latent_dim):
+        super(GAN, self).__init__()
+        self.discriminator = discriminator
+        self.generator = generator
+        self.latent_dim = latent_dim
+
+    def compile(self, d_optimizer, g_optimizer, loss_fn):
+        super(GAN, self).compile()
+        self.d_optimizer = d_optimizer
+        self.g_optimizer = g_optimizer
+        self.loss_fn = loss_fn
+        self.d_loss_metric = keras.metrics.Mean(name="d_loss")
+        self.g_loss_metric = keras.metrics.Mean(name="g_loss")
+
+    @property
+    def metrics(self):
+        return [self.d_loss_metric, self.g_loss_metric]
+
+    def train_step(self, real_images):
+        # Sample random points in the latent space
+        batch_size = tf.shape(real_images)[0]
+        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
+
+        # Decode them to fake images
+        generated_images = self.generator(random_latent_vectors)
+
+        # Combine them with real images
+        combined_images = tf.concat([generated_images, real_images], axis=0)
+
+        # Assemble labels discriminating real from fake images
+        labels = tf.concat(
+            [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0
+        )
+        # Add random noise to the labels - important trick!
+        labels += 0.05 * tf.random.uniform(tf.shape(labels))
+
+        # Train the discriminator
+        with tf.GradientTape() as tape:
+            predictions = self.discriminator(combined_images)
+            d_loss = self.loss_fn(labels, predictions)
+        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
+        self.d_optimizer.apply_gradients(
+            zip(grads, self.discriminator.trainable_weights)
+        )
+
+        # Sample random points in the latent space
+        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
+
+        # Assemble labels that say "all real images"
+        misleading_labels = tf.zeros((batch_size, 1))
+
+        # Train the generator (note that we should *not* update the weights
+        # of the discriminator)!
+        with tf.GradientTape() as tape:
+            predictions = self.discriminator(self.generator(random_latent_vectors))
+            g_loss = self.loss_fn(misleading_labels, predictions)
+        grads = tape.gradient(g_loss, self.generator.trainable_weights)
+        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))
+
+        # Update metrics
+        self.d_loss_metric.update_state(d_loss)
+        self.g_loss_metric.update_state(g_loss)
+        return {
+            "d_loss": self.d_loss_metric.result(),
+            "g_loss": self.g_loss_metric.result(),
+        }
diff --git a/examples/pix2pose/models/pix2pose.py b/examples/pix2pose/models/pix2pose.py
index bc69b2516..37cb9ce39 100644
--- a/examples/pix2pose/models/pix2pose.py
+++ b/examples/pix2pose/models/pix2pose.py
@@ -1,81 +1,77 @@
-import tensorflow as tf
 from tensorflow.keras.models import Model
 from tensorflow.keras.metrics import Mean
+import tensorflow as tf
 
 
-class Pix2PoseGAN(Model):
+class Pix2Pose(Model):
     def __init__(self, image_shape, discriminator, generator, latent_dim):
-        super(Pix2PoseGAN, self).__init__()
+        super(Pix2Pose, self).__init__()
         self.image_shape = image_shape
-        self.discriminator = discriminator
-        self.generator = generator
+        self.D = discriminator
+        self.G = generator
         self.latent_dim = latent_dim
-        self.generator_loss_tracker = Mean(name='generator_loss')
-        self.discriminator_loss_tracker = Mean(name='discriminator_loss')
 
     @property
     def metrics(self):
-        return [self.generator_loss_tracker, self.discriminator_loss_tracker]
-
-    def compile(self, d_optimizer, g_optimizer, loss_fn):
-        super(Pix2PoseGAN, self).compile()
-        self.d_optimizer = d_optimizer
-        self.g_optimizer = g_optimizer
-        self.loss_fn = loss_fn
-
-    def train_step(self, data):
-        real_images, one_hot_labels = data
+        return [self.G_loss_metric, self.D_loss_metric]
 
-        # Add dummy dimensions to the labels so that they can be concatenated with
-        # the images. This is for the discriminator.
-        image_one_hot_labels = one_hot_labels[:, :, None, None]
-        image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size])
-        image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes))
+    def compile(self, optimizer_D, optimizer_G, loss):
+        super(Pix2Pose, self).compile()
+        self.optimizer_G = optimizer_G
+        self.optimizer_D = optimizer_D
+        self.loss = loss
+        self.G_loss_metric = Mean(name='generator_loss')
+        self.D_loss_metric = Mean(name='discriminator_loss')
 
-        # Sample random points in the latent space and concatenate the labels.
-        # This is for the generator.
-        batch_size = tf.shape(real_images)[0]
-        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
-        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
+    def _build_discriminator_labels(self, batch_size):
+        return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0)
 
-        # Decode the noise (guided by labels) to fake images.
-        generated_images = self.generator(random_vector_labels)
+    def _add_noise_to_labels(self, labels):
+        noise = tf.random.uniform(tf.shape(labels))
+        labels = labels + 0.05 * noise
+        return labels
 
-        # Combine them with real images. Note that we are concatenating the labels
-        # with these images here.
-        fake_image_and_labels = tf.concat([generated_images, image_one_hot_labels], -1)
-        real_image_and_labels = tf.concat([real_images, image_one_hot_labels], -1)
-        combined_images = tf.concat([fake_image_and_labels, real_image_and_labels], axis=0)
-
-        # Assemble labels discriminating real from fake images.
-        labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0)
+    def _train_D(self, y_true, x_combined):
+        with tf.GradientTape() as tape:
+            y_pred = self.D(x_combined)
+            D_loss = self.loss(y_true, y_pred)
+        grads = tape.gradient(D_loss, self.D.trainable_weights)
+        self.optimizer_D.apply_gradients(zip(grads, self.D.trainable_weights))
+        return D_loss
 
-        # Train the discriminator.
+    def _train_G(self, RGB_inputs):
+        batch_size = tf.shape(RGB_inputs)[0]
+        y_misleading = tf.zeros((batch_size, 1))
         with tf.GradientTape() as tape:
-            predictions = self.discriminator(combined_images)
-            d_loss = self.loss_fn(labels, predictions)
-        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
-        self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))
+            y_pred = self.D(self.G(RGB_inputs)[:, :, :, 0:3])
+            G_loss = self.loss(y_misleading, y_pred)
+        grads = tape.gradient(G_loss, self.G.trainable_weights)
+        self.optimizer_G.apply_gradients(zip(grads, self.G.trainable_weights))
+        return G_loss
 
-        # Sample random points in the latent space.
-        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
-        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
+    def _update_metrics(self, D_loss, G_loss):
+        self.D_loss_metric.update_state(D_loss)
+        self.G_loss_metric.update_state(G_loss)
 
-        # Assemble labels that say "all real images".
-        misleading_labels = tf.zeros((batch_size, 1))
+    def train_step(self, data):
+        RGB_inputs, RGB_labels = data
+        RGB_inputs = RGB_inputs['RGB_input'][:, :, :, 0:3]
+        RGB_labels = RGB_labels['RGB_with_error'][:, :, :, 0:3]
+        RGB_generated = self.G(RGB_inputs)[:, :, :, 0:3]
 
-        # Train the generator (note that we should *not* update the weights
-        # of the discriminator)!
-        with tf.GradientTape() as tape:
-            fake_images = self.generator(random_vector_labels)
-            fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1)
-            predictions = self.discriminator(fake_image_and_labels)
-            g_loss = self.loss_fn(misleading_labels, predictions)
-        grads = tape.gradient(g_loss, self.generator.trainable_weights)
-        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))
+        combined_images = tf.concat([RGB_generated, RGB_labels], axis=0)
+        batch_size = tf.shape(RGB_inputs)[0]
+        y_true = self._build_discriminator_labels(batch_size)
+        y_true = self._add_noise_to_labels(y_true)
 
-        # Monitor loss.
-        self.generator_loss_tracker.update_state(g_loss)
-        self.discriminator_loss_tracker.update_state(d_loss)
-        return {'generator_loss': self.generator_loss_tracker.result(),
-                'discrminator_loss': self.discriminator_loss_tracker.result()}
+        D_loss = self._train_D(y_true, combined_images)
+        G_loss = self._train_G(RGB_inputs)
+        self._update_metrics(D_loss, G_loss)
+        return {"discriminator_loss": self.D_loss_metric.result(),
+                "generator_loss": self.G_loss_metric.result()}
+    """
+    def call(self, data):
+        generated = self.G(data)
+        predictions = self.D(generated)
+        return generated , predictions
+    """
diff --git a/examples/pix2pose/old_train.py b/examples/pix2pose/old_train.py
index c7adce3f1..481457ad3 100644
--- a/examples/pix2pose/old_train.py
+++ b/examples/pix2pose/old_train.py
@@ -115,7 +115,10 @@
 
         # Train the generator
         discriminator.trainable = False
-        loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'], {"color_output": batch[1]['color_output'], "error_output": batch[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))})
+        loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'],
+                {"color_output": batch[1]['color_output'],
+                 "error_output": batch[1]['error_output'],
+                 "discriminator_output": np.ones((args.batch_size, 1))})
 
         # Test the network
         batch_test = next(sequence_iterator_test)
@@ -134,4 +137,4 @@
 
 
 for callback in callbacks:
-    callback.on_train_end()
\ No newline at end of file
+    callback.on_train_end()
diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py
index 2f64d4f3b..de9ebb7df 100644
--- a/examples/pix2pose/train_gan.py
+++ b/examples/pix2pose/train_gan.py
@@ -4,6 +4,9 @@
 from paz.abstract import GeneratingSequence
 from paz.models.segmentation import UNET_VGG16
 from models.generator import Generator
+from models.discriminator import Discriminator
+from models.pix2pose import Pix2Pose
+from tensorflow.keras.losses import BinaryCrossentropy
 # from paz.backend.image import show_image, resize_image
 # import numpy as np
 
@@ -40,9 +43,10 @@
 num_classes = 3
 learning_rate = 0.001
 # steps_per_epoch
-model_names = ['PIX2POSE', 'UNET_VGG16']
-model_name = 'UNET_VGG16'
-# model_name = 'PIX2POSE'
+model_names = ['PIX2POSE', 'PIX2POSE_GENERATOR', 'UNET_VGG16']
+# model_name = 'UNET_VGG16'
+# model_name = 'PIX2POSE_GENERATOR'
+model_name = 'PIX2POSE'
 max_num_epochs = 1
 latent_dimension = 128
 beta = 3.0
@@ -63,7 +67,9 @@
     labels_to_shape = {'masks': [H, W, 4]}
     weighted_reconstruction = weighted_reconstruction_wrapper(beta, False)
     metrics = {'masks': [weighted_reconstruction, mean_squared_error]}
-if model_name == 'PIX2POSE':
+    optimizer = Adam(learning_rate)
+    model.compile(optimizer, loss, metrics)
+if model_name == 'PIX2POSE_GENERATOR':
     model = Generator(image_shape, latent_dimension)
     reconstruction_loss = WeightedReconstructionWithError(beta)
     loss = WeightedReconstructionWithError()
@@ -73,7 +79,23 @@
     weighted_reconstruction = weighted_reconstruction_wrapper(beta, True)
     metrics = {'RGB_with_error':
                [weighted_reconstruction, error_prediction, mean_squared_error]}
-
+    optimizer = Adam(learning_rate)
+    model.compile(optimizer, loss, metrics)
+if model_name == 'PIX2POSE':
+    discriminator = Discriminator(image_shape)
+    generator = Generator(image_shape, latent_dimension)
+    model = Pix2Pose(image_shape, discriminator, generator, latent_dimension)
+    # reconstruction_loss = WeightedReconstructionWithError(beta)
+    # loss = WeightedReconstructionWithError()
+    H, W, num_channels = image_shape
+    inputs_to_shape = {'RGB_input': [H, W, num_channels]}
+    labels_to_shape = {'RGB_with_error': [H, W, 4]}
+    # weighted_reconstruction = weighted_reconstruction_wrapper(beta, True)
+    # metrics = {'RGB_with_error':
+    #           [weighted_reconstruction,error_prediction, mean_squared_error]}
+    optimizer_D = Adam(learning_rate)
+    optimizer_G = Adam(learning_rate)
+    model.compile(optimizer_D, optimizer_G, BinaryCrossentropy())
 
 processor = DomainRandomization(
     renderer, image_shape, image_paths, inputs_to_shape,
@@ -81,10 +103,6 @@
 
 sequence = GeneratingSequence(processor, batch_size, num_steps)
 
-optimizer = Adam(learning_rate)
-
-model.compile(optimizer, loss, metrics)
-
 model.fit(
     sequence,
     epochs=max_num_epochs,

From b9901b8e096604afd4bac1bce82c1b380a84fccc Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 8 Nov 2021 13:42:24 +0100
Subject: [PATCH 034/101] Add additional losses to pix2pose

---
 examples/pix2pose/models/pix2pose.py | 102 +++++++++++++++++++--------
 examples/pix2pose/train_gan.py       |   1 +
 2 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/examples/pix2pose/models/pix2pose.py b/examples/pix2pose/models/pix2pose.py
index 37cb9ce39..efe3ae195 100644
--- a/examples/pix2pose/models/pix2pose.py
+++ b/examples/pix2pose/models/pix2pose.py
@@ -1,27 +1,33 @@
 from tensorflow.keras.models import Model
 from tensorflow.keras.metrics import Mean
 import tensorflow as tf
+from loss import compute_weighted_reconstruction_loss_with_error
+from loss import compute_error_prediction_loss
 
 
 class Pix2Pose(Model):
     def __init__(self, image_shape, discriminator, generator, latent_dim):
         super(Pix2Pose, self).__init__()
         self.image_shape = image_shape
-        self.D = discriminator
-        self.G = generator
+        self.discriminator = discriminator
+        self.generator = generator
         self.latent_dim = latent_dim
 
     @property
     def metrics(self):
-        return [self.G_loss_metric, self.D_loss_metric]
+        return [self.generator_loss, self.discriminator_loss]
 
-    def compile(self, optimizer_D, optimizer_G, loss):
+    def compile(self, optimizer_D, optimizer_G, gan_loss):
         super(Pix2Pose, self).compile()
         self.optimizer_G = optimizer_G
         self.optimizer_D = optimizer_D
-        self.loss = loss
-        self.G_loss_metric = Mean(name='generator_loss')
-        self.D_loss_metric = Mean(name='discriminator_loss')
+        self.gan_loss = gan_loss
+        # self.reconstruction = reconstruction
+        # self.error_prediction = error_prediction
+        self.generator_loss = Mean(name='generator_loss')
+        self.discriminator_loss = Mean(name='discriminator_loss')
+        self.reconstruction_loss = Mean(name='weighted_reconstruction')
+        self.error_prediction_loss = Mean(name='error_prediction')
 
     def _build_discriminator_labels(self, batch_size):
         return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0)
@@ -33,45 +39,79 @@ def _add_noise_to_labels(self, labels):
 
     def _train_D(self, y_true, x_combined):
         with tf.GradientTape() as tape:
-            y_pred = self.D(x_combined)
-            D_loss = self.loss(y_true, y_pred)
-        grads = tape.gradient(D_loss, self.D.trainable_weights)
-        self.optimizer_D.apply_gradients(zip(grads, self.D.trainable_weights))
-        return D_loss
+            y_pred = self.discriminator(x_combined)
+            discriminator_loss = self.gan_loss(y_true, y_pred)
+        grads = tape.gradient(
+            discriminator_loss, self.discriminator.trainable_weights)
+        self.optimizer_D.apply_gradients(
+            zip(grads, self.discriminator.trainable_weights))
+        return discriminator_loss
 
     def _train_G(self, RGB_inputs):
         batch_size = tf.shape(RGB_inputs)[0]
         y_misleading = tf.zeros((batch_size, 1))
         with tf.GradientTape() as tape:
-            y_pred = self.D(self.G(RGB_inputs)[:, :, :, 0:3])
-            G_loss = self.loss(y_misleading, y_pred)
-        grads = tape.gradient(G_loss, self.G.trainable_weights)
-        self.optimizer_G.apply_gradients(zip(grads, self.G.trainable_weights))
-        return G_loss
+            y_pred = self.discriminator(
+                self.generator(RGB_inputs)[:, :, :, 0:3])
+            generator_loss = self.gan_loss(y_misleading, y_pred)
+        grads = tape.gradient(generator_loss, self.generator.trainable_weights)
+        self.optimizer_G.apply_gradients(
+            zip(grads, self.generator.trainable_weights))
+        return generator_loss
 
-    def _update_metrics(self, D_loss, G_loss):
-        self.D_loss_metric.update_state(D_loss)
-        self.G_loss_metric.update_state(G_loss)
+    def _train_G_reconstruction(self, RGB_inputs, RGBA_true):
+        with tf.GradientTape() as tape:
+            RGBE_pred = self.generator(RGB_inputs)
+            loss = compute_weighted_reconstruction_loss_with_error(
+                RGBA_true, RGBE_pred, beta=3.0)
+        grads = tape.gradient(loss, self.generator.trainable_weights)
+        self.optimizer_G.apply_gradients(
+            zip(grads, self.generator.trainable_weights))
+        return loss
+
+    def _train_G_error_prediction(self, RGB_inputs, RGBA_true):
+        with tf.GradientTape() as tape:
+            RGBE_pred = self.generator(RGB_inputs)
+            loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
+        grads = tape.gradient(loss, self.generator.trainable_weights)
+        self.optimizer_G.apply_gradients(
+            zip(grads, self.generator.trainable_weights))
+        return loss
+
+    def _update_metrics(self, discriminator_loss, generator_loss):
+        self.discriminator_loss.update_state(discriminator_loss)
+        self.generator_loss.update_state(generator_loss)
 
     def train_step(self, data):
-        RGB_inputs, RGB_labels = data
-        RGB_inputs = RGB_inputs['RGB_input'][:, :, :, 0:3]
-        RGB_labels = RGB_labels['RGB_with_error'][:, :, :, 0:3]
-        RGB_generated = self.G(RGB_inputs)[:, :, :, 0:3]
+        inputs, labels = data
+        RGB_inputs, RGBA_true = inputs['RGB_input'], labels['RGB_with_error']
+
+        reconstruction_loss = self._train_G_reconstruction(RGB_inputs, RGBA_true)
+        self.reconstruction_loss.update_state(reconstruction_loss)
+
+        error_prediction_loss = self._train_G_error_prediction(RGB_inputs, RGBA_true)
+        self.error_prediction_loss.update_state(error_prediction_loss)
+        # reconstruction_loss = self.error_prediction(RGBA_true, RGBE_pred, beta)
+
+        RGB_labels = RGBA_true[:, :, :, 0:3]
+        RGB_generated = self.generator(RGB_inputs)[:, :, :, 0:3]
 
         combined_images = tf.concat([RGB_generated, RGB_labels], axis=0)
         batch_size = tf.shape(RGB_inputs)[0]
         y_true = self._build_discriminator_labels(batch_size)
         y_true = self._add_noise_to_labels(y_true)
 
-        D_loss = self._train_D(y_true, combined_images)
-        G_loss = self._train_G(RGB_inputs)
-        self._update_metrics(D_loss, G_loss)
-        return {"discriminator_loss": self.D_loss_metric.result(),
-                "generator_loss": self.G_loss_metric.result()}
+        discriminator_loss = self._train_D(y_true, combined_images)
+        generator_loss = self._train_G(RGB_inputs)
+        self._update_metrics(discriminator_loss, generator_loss)
+        return {'discriminator_loss': self.discriminator_loss.result(),
+                'generator_loss': self.generator_loss.result(),
+                'reconstruction_loss': self.reconstruction_loss.result(),
+                'error_prediction_loss': self.error_prediction_loss.result()}
+
     """
     def call(self, data):
-        generated = self.G(data)
-        predictions = self.D(generated)
+        generated = self.generator(data)
+        predictions = self.discriminator(generated)
         return generated , predictions
     """
diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py
index de9ebb7df..1872c181d 100644
--- a/examples/pix2pose/train_gan.py
+++ b/examples/pix2pose/train_gan.py
@@ -110,6 +110,7 @@
     verbose=1,
     workers=0)
 
+model.save_weights('PIX2POSE_GAN.hdf5')
 """
 def normalize(image):
     return (image * 255.0).astype('uint8')

From 7d72dde665bcbd08fd44335618945261b36b48a6 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 8 Nov 2021 15:53:03 +0100
Subject: [PATCH 035/101] Add basic training with full GAN model

---
 examples/pix2pose/loss.py             |  19 ++--
 examples/pix2pose/models/generator.py |   6 +-
 examples/pix2pose/models/pix2pose.py  | 130 ++++++++++++++------------
 examples/pix2pose/train_gan.py        |  24 +++--
 4 files changed, 99 insertions(+), 80 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index 741f64dc3..bfe7e90ea 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -37,8 +37,8 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
     return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True)
 
 
-def compute_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred,
-                                                    beta=3.0):
+def compute_weighted_reconstruction_loss_with_error(
+        RGBA_true, RGBE_pred, beta=3.0):
     RGB_pred, error_mask = extract_error_mask(RGBE_pred)
     loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
     return loss
@@ -53,17 +53,24 @@ def compute_error_prediction_loss(RGBA_true, RGBE_pred):
     return error_loss
 
 
+class ErrorPrediction(Loss):
+    def __init__(self):
+        super(ErrorPrediction, self).__init__()
+
+    def call(self, RGBA_true, RGBE_pred):
+        error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
+        return error_loss
+
+
 class WeightedReconstructionWithError(Loss):
     def __init__(self, beta=3.0):
         super(WeightedReconstructionWithError, self).__init__()
         self.beta = beta
 
     def call(self, RGBA_true, RGBE_pred):
-        reconstruction = compute_weighted_reconstruction_loss_with_error(
+        reconstruction_loss = compute_weighted_reconstruction_loss_with_error(
             RGBA_true, RGBE_pred, self.beta)
-        error_prediction = compute_error_prediction_loss(RGBA_true, RGBE_pred)
-        loss = reconstruction + error_prediction
-        return loss
+        return reconstruction_loss
 
 
 class WeightedReconstruction(Loss):
diff --git a/examples/pix2pose/models/generator.py b/examples/pix2pose/models/generator.py
index 357d3b646..2d7766e58 100644
--- a/examples/pix2pose/models/generator.py
+++ b/examples/pix2pose/models/generator.py
@@ -52,7 +52,7 @@ def decoder(x, skip_connections):
 
 
 def Generator(input_shape=(128, 128, 3), latent_dimension=256,
-              name='PIX2POSE_GENERATOR'):
+              activation='sigmoid', name='PIX2POSE_GENERATOR'):
     RGB_input = Input(input_shape, name='RGB_input')
     x, skip_connections = encoder(RGB_input)
     x = Flatten()(x)
@@ -61,9 +61,9 @@ def Generator(input_shape=(128, 128, 3), latent_dimension=256,
     x = Reshape((8, 8, latent_dimension))(x)
     x = decoder(x, skip_connections)
     RGB = Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same')(x)
-    RGB = Activation('tanh', name='RGB')(RGB)
+    RGB = Activation(activation, name='RGB')(RGB)
     error = Conv2DTranspose(1, (5, 5), (2, 2), padding='same')(x)
-    error = Activation('sigmoid', name='error')(error)
+    error = Activation(activation, name='error')(error)
     RGB_with_error = Concatenate(axis=-1, name='RGB_with_error')([RGB, error])
     model = Model(RGB_input, RGB_with_error, name=name)
     return model
diff --git a/examples/pix2pose/models/pix2pose.py b/examples/pix2pose/models/pix2pose.py
index efe3ae195..b120a4fa4 100644
--- a/examples/pix2pose/models/pix2pose.py
+++ b/examples/pix2pose/models/pix2pose.py
@@ -1,8 +1,8 @@
 from tensorflow.keras.models import Model
 from tensorflow.keras.metrics import Mean
 import tensorflow as tf
-from loss import compute_weighted_reconstruction_loss_with_error
-from loss import compute_error_prediction_loss
+# from loss import compute_weighted_reconstruction_loss_with_error
+# from loss import compute_error_prediction_loss
 
 
 class Pix2Pose(Model):
@@ -17,17 +17,20 @@ def __init__(self, image_shape, discriminator, generator, latent_dim):
     def metrics(self):
         return [self.generator_loss, self.discriminator_loss]
 
-    def compile(self, optimizer_D, optimizer_G, gan_loss):
+    def compile(self, optimizers, losses, loss_weights):
         super(Pix2Pose, self).compile()
-        self.optimizer_G = optimizer_G
-        self.optimizer_D = optimizer_D
-        self.gan_loss = gan_loss
-        # self.reconstruction = reconstruction
-        # self.error_prediction = error_prediction
+        self.optimizer_generator = optimizers['generator']
+        self.optimizer_discriminator = optimizers['discriminator']
+        self.compute_reconstruction_loss = losses['weighted_reconstruction']
+        self.compute_error_prediction_loss = losses['error_prediction']
+        self.compute_discriminator_loss = losses['discriminator']
+
         self.generator_loss = Mean(name='generator_loss')
         self.discriminator_loss = Mean(name='discriminator_loss')
         self.reconstruction_loss = Mean(name='weighted_reconstruction')
         self.error_prediction_loss = Mean(name='error_prediction')
+        self.reconstruction_weight = loss_weights['weighted_reconstruction']
+        self.error_prediction_weight = loss_weights['error_prediction']
 
     def _build_discriminator_labels(self, batch_size):
         return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0)
@@ -37,81 +40,84 @@ def _add_noise_to_labels(self, labels):
         labels = labels + 0.05 * noise
         return labels
 
-    def _train_D(self, y_true, x_combined):
+    def _get_batch_size(self, values):
+        return tf.shape(values)[0]
+
+    def _train_discriminator(self, RGB_inputs, RGBA_true):
+        RGB_true = RGBA_true[:, :, :, 0:3]
+        RGB_fake = self.generator(RGB_inputs)[:, :, :, 0:3]
+        RGB_fake_true = tf.concat([RGB_fake, RGB_true], axis=0)
+
+        batch_size = self._get_batch_size(RGB_inputs)
+        y_true = self._build_discriminator_labels(batch_size)
+        y_true = self._add_noise_to_labels(y_true)
+
         with tf.GradientTape() as tape:
-            y_pred = self.discriminator(x_combined)
-            discriminator_loss = self.gan_loss(y_true, y_pred)
-        grads = tape.gradient(
-            discriminator_loss, self.discriminator.trainable_weights)
-        self.optimizer_D.apply_gradients(
-            zip(grads, self.discriminator.trainable_weights))
+            y_pred = self.discriminator(RGB_fake_true)
+            discriminator_loss = self.compute_discriminator_loss(
+                y_true, y_pred)
+        gradients = tape.gradient(discriminator_loss,
+                                  self.discriminator.trainable_weights)
+        self.optimizer_discriminator.apply_gradients(
+            zip(gradients, self.discriminator.trainable_weights))
         return discriminator_loss
 
-    def _train_G(self, RGB_inputs):
+    def _train_generator(self, RGB_inputs):
         batch_size = tf.shape(RGB_inputs)[0]
         y_misleading = tf.zeros((batch_size, 1))
         with tf.GradientTape() as tape:
-            y_pred = self.discriminator(
-                self.generator(RGB_inputs)[:, :, :, 0:3])
-            generator_loss = self.gan_loss(y_misleading, y_pred)
-        grads = tape.gradient(generator_loss, self.generator.trainable_weights)
-        self.optimizer_G.apply_gradients(
-            zip(grads, self.generator.trainable_weights))
+            RGBE_preds = self.generator(RGB_inputs)
+            y_pred = self.discriminator(RGBE_preds[..., 0:3])
+            generator_loss = self.compute_discriminator_loss(
+                y_misleading, y_pred)
+        gradients = tape.gradient(generator_loss,
+                                  self.generator.trainable_weights)
+        self.optimizer_generator.apply_gradients(
+            zip(gradients, self.generator.trainable_weights))
         return generator_loss
 
-    def _train_G_reconstruction(self, RGB_inputs, RGBA_true):
+    def _train_reconstruction(self, RGB_inputs, RGBA_true):
         with tf.GradientTape() as tape:
             RGBE_pred = self.generator(RGB_inputs)
-            loss = compute_weighted_reconstruction_loss_with_error(
-                RGBA_true, RGBE_pred, beta=3.0)
-        grads = tape.gradient(loss, self.generator.trainable_weights)
-        self.optimizer_G.apply_gradients(
-            zip(grads, self.generator.trainable_weights))
-        return loss
-
-    def _train_G_error_prediction(self, RGB_inputs, RGBA_true):
+            reconstruction_loss = self.compute_reconstruction_loss(
+                RGBA_true, RGBE_pred)
+            reconstruction_loss = (
+                self.reconstruction_weight * reconstruction_loss)
+        gradients = tape.gradient(reconstruction_loss,
+                                  self.generator.trainable_weights)
+        self.optimizer_generator.apply_gradients(
+            zip(gradients, self.generator.trainable_weights))
+        return reconstruction_loss
+
+    def _train_error_prediction(self, RGB_inputs, RGBA_true):
         with tf.GradientTape() as tape:
             RGBE_pred = self.generator(RGB_inputs)
-            loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
-        grads = tape.gradient(loss, self.generator.trainable_weights)
-        self.optimizer_G.apply_gradients(
-            zip(grads, self.generator.trainable_weights))
-        return loss
-
-    def _update_metrics(self, discriminator_loss, generator_loss):
-        self.discriminator_loss.update_state(discriminator_loss)
-        self.generator_loss.update_state(generator_loss)
+            error_prediction_loss = self.compute_error_prediction_loss(
+                RGBA_true, RGBE_pred)
+            error_prediction_loss = (
+                self.error_prediction_weight * error_prediction_loss)
+        gradients = tape.gradient(
+            error_prediction_loss, self.generator.trainable_weights)
+        self.optimizer_generator.apply_gradients(
+            zip(gradients, self.generator.trainable_weights))
+        return error_prediction_loss
 
     def train_step(self, data):
-        inputs, labels = data
-        RGB_inputs, RGBA_true = inputs['RGB_input'], labels['RGB_with_error']
+        RGB_inputs, RGBA_true = data[0]['RGB_input'], data[1]['RGB_with_error']
 
-        reconstruction_loss = self._train_G_reconstruction(RGB_inputs, RGBA_true)
+        reconstruction_loss = self._train_reconstruction(RGB_inputs, RGBA_true)
         self.reconstruction_loss.update_state(reconstruction_loss)
 
-        error_prediction_loss = self._train_G_error_prediction(RGB_inputs, RGBA_true)
-        self.error_prediction_loss.update_state(error_prediction_loss)
-        # reconstruction_loss = self.error_prediction(RGBA_true, RGBE_pred, beta)
+        error_loss = self._train_error_prediction(RGB_inputs, RGBA_true)
+        self.error_prediction_loss.update_state(error_loss)
 
-        RGB_labels = RGBA_true[:, :, :, 0:3]
-        RGB_generated = self.generator(RGB_inputs)[:, :, :, 0:3]
+        discriminator_loss = self._train_discriminator(RGB_inputs, RGBA_true)
+        self.discriminator_loss.update_state(discriminator_loss)
 
-        combined_images = tf.concat([RGB_generated, RGB_labels], axis=0)
-        batch_size = tf.shape(RGB_inputs)[0]
-        y_true = self._build_discriminator_labels(batch_size)
-        y_true = self._add_noise_to_labels(y_true)
+        generator_loss = self._train_generator(RGB_inputs)
+        self.generator_loss.update_state(generator_loss)
 
-        discriminator_loss = self._train_D(y_true, combined_images)
-        generator_loss = self._train_G(RGB_inputs)
-        self._update_metrics(discriminator_loss, generator_loss)
         return {'discriminator_loss': self.discriminator_loss.result(),
                 'generator_loss': self.generator_loss.result(),
                 'reconstruction_loss': self.reconstruction_loss.result(),
                 'error_prediction_loss': self.error_prediction_loss.result()}
-
-    """
-    def call(self, data):
-        generated = self.generator(data)
-        predictions = self.discriminator(generated)
-        return generated , predictions
-    """
diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/train_gan.py
index 1872c181d..2d0e32fa5 100644
--- a/examples/pix2pose/train_gan.py
+++ b/examples/pix2pose/train_gan.py
@@ -14,6 +14,7 @@
 from pipelines import DomainRandomization
 from loss import WeightedReconstruction
 from loss import WeightedReconstructionWithError
+from loss import ErrorPrediction
 # from metrics import error_prediction, weighted_reconstruction
 # from metrics import weighted_reconstruction_with_error
 from metrics import mean_squared_error, error_prediction
@@ -69,6 +70,9 @@
     metrics = {'masks': [weighted_reconstruction, mean_squared_error]}
     optimizer = Adam(learning_rate)
     model.compile(optimizer, loss, metrics)
+
+# TODO this is not working at the moment because the loss does not include 
+# the error prediction loss.
 if model_name == 'PIX2POSE_GENERATOR':
     model = Generator(image_shape, latent_dimension)
     reconstruction_loss = WeightedReconstructionWithError(beta)
@@ -81,28 +85,29 @@
                [weighted_reconstruction, error_prediction, mean_squared_error]}
     optimizer = Adam(learning_rate)
     model.compile(optimizer, loss, metrics)
+
 if model_name == 'PIX2POSE':
     discriminator = Discriminator(image_shape)
     generator = Generator(image_shape, latent_dimension)
     model = Pix2Pose(image_shape, discriminator, generator, latent_dimension)
-    # reconstruction_loss = WeightedReconstructionWithError(beta)
-    # loss = WeightedReconstructionWithError()
     H, W, num_channels = image_shape
     inputs_to_shape = {'RGB_input': [H, W, num_channels]}
     labels_to_shape = {'RGB_with_error': [H, W, 4]}
-    # weighted_reconstruction = weighted_reconstruction_wrapper(beta, True)
-    # metrics = {'RGB_with_error':
-    #           [weighted_reconstruction,error_prediction, mean_squared_error]}
-    optimizer_D = Adam(learning_rate)
-    optimizer_G = Adam(learning_rate)
-    model.compile(optimizer_D, optimizer_G, BinaryCrossentropy())
+    optimizers = {'discriminator': Adam(learning_rate),
+                  'generator': Adam(learning_rate)}
+    losses = {'discriminator': BinaryCrossentropy(),
+              'weighted_reconstruction': WeightedReconstructionWithError(),
+              'error_prediction': ErrorPrediction()}
+    loss_weights = {'weighted_reconstruction': 100, 'error_prediction': 50}
+    model.compile(optimizers, losses, loss_weights)
 
 processor = DomainRandomization(
     renderer, image_shape, image_paths, inputs_to_shape,
     labels_to_shape, num_occlusions)
 
 sequence = GeneratingSequence(processor, batch_size, num_steps)
-
+model.load_weights('PIX2POSE_GAN.hdf5')
+"""
 model.fit(
     sequence,
     epochs=max_num_epochs,
@@ -112,6 +117,7 @@
 
 model.save_weights('PIX2POSE_GAN.hdf5')
 """
+"""
 def normalize(image):
     return (image * 255.0).astype('uint8')
 

From 7472cea9b6de09508dae684457f3a7800f17731a Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 17 Nov 2021 11:22:44 +0100
Subject: [PATCH 036/101] Add fix to PnP having to solve for less than 4 mask
 points

---
 examples/pix2pose/backend.py    | 2 ++
 examples/pix2pose/pipelines.py  | 2 ++
 examples/pix2pose/processors.py | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index fccf619d3..847117868 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -137,6 +137,8 @@ def _preprocess_image_points2D(image_points2D):
 
 def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
                      inlier_threshold=5, num_iterations=100):
+    if ((len(object_points3D) < 4) or (len(image_points2D) < 4)):
+        return None, None
     image_points2D = _preprocess_image_points2D(image_points2D)
     success, rotation_vector, translation, inliers = cv2.solvePnPRansac(
         object_points3D, image_points2D, camera_intrinsics, None,
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index bf48f7082..688ea9d03 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -121,6 +121,8 @@ def call(self, image):
             points2D = denormalize_points2D(points2D, *crop.shape[0:2])
             points2D = self.change_coordinates(points2D, box2D)
             quaternion, translation = self.predict_pose(points3D, points2D)
+            if (quaternion is None) or (translation is None):
+                continue
             pose6D = Pose6D(quaternion, translation, box2D.class_name)
             poses6D.append(pose6D), points.append([points2D, points3D])
         if self.draw:
diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index a3452323f..e51137201 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -178,6 +178,8 @@ def __init__(self):
         super(RotationVectorToQuaternion, self).__init__()
 
     def call(self, rotation_vector):
+        if rotation_vector is None:
+            return None
         quaternion = rotation_vector_to_quaternion(rotation_vector)
         return quaternion
 

From 74c4888970b5dfb362ef664c2c29d237f3acc168 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 17 Nov 2021 11:22:58 +0100
Subject: [PATCH 037/101] Change demo for image processing

---
 examples/pix2pose/demo.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index def06b71d..44167d034 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -15,10 +15,10 @@
 model.load_weights('UNET_weights_epochs-10_beta-3.hdf5')
 
 # approximating intrinsic camera parameters
-camera = Camera(device_id=4)
-# camera.start()
-# image_size = camera.read().shape[0:2]
-# camera.stop()
+camera = Camera(device_id=0)
+camera.start()
+image_size = camera.read().shape[0:2]
+camera.stop()
 
 image = load_image('test_image.jpg')
 image_size = image.shape[0:2]
@@ -30,8 +30,9 @@
                               [0, 0, 1]])
 object_sizes = np.array([0.184, 0.187, 0.052])
 epsilon = 0.15
-detect = SSD300FAT(draw=False)
-offsets = [0.1, 0.1]
+score_thresh = 0.50
+detect = SSD300FAT(score_thresh, draw=False)
+offsets = [0.2, 0.2]
 estimate_keypoints = Pix2Pose(model, object_sizes)
 pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None)
 
@@ -39,8 +40,6 @@
 predicted_image = results['image']
 show_image(predicted_image)
 
-"""
-image_size = (640, 480)
-player = VideoPlayer(image_size, pipeline, camera)
-player.run()
-"""
+# image_size = (640, 480)
+# player = VideoPlayer(image_size, pipeline, camera)
+# player.run()

From b6aeb0bb9b96c80bf16a278215f10cd3d0db695e Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 17 Nov 2021 12:11:54 +0100
Subject: [PATCH 038/101] Add accessible state of failure and success of
 internal PnP solution

---
 examples/pix2pose/backend.py    |  6 ++----
 examples/pix2pose/pipelines.py  | 16 ++++++++++------
 examples/pix2pose/processors.py |  6 ++----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 847117868..513a3bb88 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -138,16 +138,14 @@ def _preprocess_image_points2D(image_points2D):
 def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
                      inlier_threshold=5, num_iterations=100):
     if ((len(object_points3D) < 4) or (len(image_points2D) < 4)):
-        return None, None
+        raise ValueError('Solve PnP requires at least 4 3D and 2D points')
     image_points2D = _preprocess_image_points2D(image_points2D)
     success, rotation_vector, translation, inliers = cv2.solvePnPRansac(
         object_points3D, image_points2D, camera_intrinsics, None,
         flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold,
         iterationsCount=num_iterations)
     translation = np.squeeze(translation, 1)
-    if success is False:
-        rotation_vector, translation = None, None
-    return rotation_vector, translation
+    return success, rotation_vector, translation
 
 
 def apply_affine_transform(affine_matrix, vectors):
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 688ea9d03..547f2ec0b 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -1,4 +1,3 @@
-import numpy as np
 from paz.abstract import SequentialProcessor, Processor
 from paz.pipelines import RandomizeRenderedImage as RandomizeRender
 from paz.abstract.messages import Pose6D
@@ -8,12 +7,12 @@
     ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC,
     ReplaceLowerThanThreshold)
 from backend import build_cube_points3D
-from processors import UnwrapDictionary, RotationVectorToQuaternion
+from processors import UnwrapDictionary
 from processors import NormalizePoints2D
-from backend import draw_maski
 from backend import denormalize_points2D
 from backend import draw_poses6D
 from backend import draw_masks
+from paz.backend.quaternion import rotation_vector_to_quaternion
 
 
 class DomainRandomization(SequentialProcessor):
@@ -68,8 +67,9 @@ def __init__(self, output_shape):
 class SolveChangingObjectPnP(SequentialProcessor):
     def __init__(self, camera_intrinsics):
         super(SolveChangingObjectPnP, self).__init__()
+        self.MINIMUM_REQUIRED_POINTS = 4
         self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics))
-        self.add(pr.ControlMap(RotationVectorToQuaternion()))
+        # self.add(pr.ControlMap(RotationVectorToQuaternion()))
 
 
 class Pix2Pose(pr.Processor):
@@ -120,9 +120,13 @@ def call(self, image):
             points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
             points2D = denormalize_points2D(points2D, *crop.shape[0:2])
             points2D = self.change_coordinates(points2D, box2D)
-            quaternion, translation = self.predict_pose(points3D, points2D)
-            if (quaternion is None) or (translation is None):
+            if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
                 continue
+            success, rotation, translation = self.predict_pose(
+                points3D, points2D)
+            if success is False:
+                continue
+            quaternion = rotation_vector_to_quaternion(rotation)
             pose6D = Pose6D(quaternion, translation, box2D.class_name)
             poses6D.append(pose6D), points.append([points2D, points3D])
         if self.draw:
diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index e51137201..03164771c 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -125,10 +125,10 @@ def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
         self.num_iterations = num_iterations
 
     def call(self, object_points3D, image_points2D):
-        rotation_vector, translation = solve_PnP_RANSAC(
+        success, rotation_vector, translation = solve_PnP_RANSAC(
             object_points3D, image_points2D, self.camera_intrinsics,
             self.inlier_thresh, self.num_iterations)
-        return rotation_vector, translation
+        return success, rotation_vector, translation
 
 
 class RotationVectorToRotationMatrix(Processor):
@@ -178,8 +178,6 @@ def __init__(self):
         super(RotationVectorToQuaternion, self).__init__()
 
     def call(self, rotation_vector):
-        if rotation_vector is None:
-            return None
         quaternion = rotation_vector_to_quaternion(rotation_vector)
         return quaternion
 

From ca3b26c7950b29bee1efde29f0e28070edec0ea5 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 17 Nov 2021 13:03:25 +0100
Subject: [PATCH 039/101] Remove unsued functions and processors

---
 examples/pix2pose/backend.py    | 295 ++++++++------------------------
 examples/pix2pose/pipelines.py  |   1 -
 examples/pix2pose/processors.py |  77 +++------
 3 files changed, 94 insertions(+), 279 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 513a3bb88..be696aaa1 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -1,93 +1,11 @@
-from collections import Iterable
+# from collections import Iterable
 import numpy as np
 from paz.backend.image.draw import GREEN
-from paz.backend.image import draw_line, draw_dot, draw_circle
-from paz.abstract import Pose6D
+from paz.backend.image import draw_line, draw_dot
+# from paz.abstract import Pose6D
 import cv2
 
 
-def homogenous_quaternion_to_rotation_matrix(quaternion):
-    # w0, q1, q2, q3 = quaternion
-    q1, q2, q3, w0 = quaternion
-
-    r11 = w0**2 + q1**2 - q2**2 - q3**2
-    r12 = 2 * ((q1 * q2) - (w0 * q3))
-    r13 = 2 * ((w0 * q2) + (q1 * q3))
-
-    r21 = 2 * ((w0 * q3) + (q1 * q2))
-    r22 = w0**2 - q1**2 + q2**2 - q3**2
-    r23 = 2 * ((q2 * q3) - (w0 * q1))
-
-    r31 = 2 * ((q1 * q3) - (w0 * q2))
-    r32 = 2 * ((w0 * q1) + (q2 * q3))
-    r33 = w0**2 - q1**2 - q2**2 + q3**2
-
-    rotation_matrix = np.array([[r11, r12, r13],
-                                [r21, r22, r23],
-                                [r31, r32, r33]])
-    return rotation_matrix
-
-
-def inhomogenous_quaternion_to_rotation_matrix(q):
-    """Transforms quaternion into a rotation matrix
-    # Arguments
-        q: quarternion, Numpy array of shape ``[4]``
-    # Returns
-        Numpy array representing a rotation vector having a shape ``[3]``.
-    """
-    # quaternion
-    # q = q[::-1]
-    r11 = 1 - (2 * (q[1]**2 + q[2]**2))
-    r12 = 2 * (q[0] * q[1] - q[3] * q[2])
-    r13 = 2 * (q[3] * q[1] + q[0] * q[2])
-
-    r21 = 2 * (q[0] * q[1] + q[3] * q[2])
-    r22 = 1 - (2 * (q[0]**2 + q[2]**2))
-    r23 = 2 * (q[1] * q[2] - q[3] * q[0])
-
-    r31 = 2 * (q[0] * q[2] - q[3] * q[1])
-    r32 = 2 * (q[3] * q[0] + q[1] * q[2])
-    r33 = 1 - (2 * (q[0]**2 + q[1]**2))
-
-    rotation_matrix = np.array([[r11, r12, r13],
-                                [r21, r22, r23],
-                                [r31, r32, r33]])
-
-    return rotation_matrix
-    # return np.squeeze(rotation_matrix)
-
-
-def quaternion_to_rotation_matrix(quaternion, homogenous=True):
-    if homogenous:
-        matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
-    else:
-        matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion)
-    return matrix
-
-
-def multiply_quaternions(quaternion_0, quaternion_1):
-    """Multiplies two quaternions.
-
-    # Reference:
-        Code extracted from [here](https://stackoverflow.com/questions/
-            39000758/how-to-multiply-two-quaternions-by-python-or-numpy)
-    """
-    x0, y0, z0, w0 = quaternion_0
-    x1, y1, z1, w1 = quaternion_1
-    x2 = +(x1 * w0) + (y1 * z0) - (z1 * y0) + (w1 * x0)
-    y2 = -(x1 * z0) + (y1 * w0) + (z1 * x0) + (w1 * y0)
-    z2 = +(x1 * y0) - (y1 * x0) + (z1 * w0) + (w1 * z0)
-    w2 = -(x1 * x0) - (y1 * y0) - (z1 * z0) + (w1 * w0)
-    return np.array([x2, y2, z2, w2])
-
-
-# quaternion = (1 / np.sqrt(30)) * np.array([1, 2, 3, 4])
-# theta = np.deg2rad(0)
-# quaternion = np.array([1, 0, 0, 0])
-# a = homogenous_quaternion_to_rotation_matrix(quaternion)
-# quaternion = (1 / np.sqrt(30)) * np.array([2, 3, 4, 1])
-# b = inhomogenous_quaternion_to_rotation_matrix(quaternion)
-
 def build_cube_points3D(width, height, depth):
     """ Build the 3D points of a cube in the openCV coordinate system:
                                4--------1
@@ -148,33 +66,6 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
     return success, rotation_vector, translation
 
 
-def apply_affine_transform(affine_matrix, vectors):
-    return np.matmul(affine_matrix, vectors.T).T
-
-
-def project_to_image2(affine_matrix, points3D, camera_intrinsics):
-    """Project points3D to image plane using a perspective transformation
-    """
-    if affine_matrix.shape != (4, 4):
-        raise ValueError('Affine matrix is not of shape (4, 4)')
-    if len(points3D.shape) != 2:
-        raise ValueError('points3D should have a shape (N, 3)')
-    if points3D.shape[1] != 3:
-        raise ValueError('points3D should have a shape (N, 3)')
-    # TODO missing checks for camera intrinsics conditions
-    points3D = apply_affine_transform(affine_matrix, points3D)
-    # points3D = np.matmul(rotation, points3D.T).T + translation
-    x, y, z = np.split(points3D, 3, axis=1)
-    x_focal_length = camera_intrinsics[0, 0]
-    y_focal_length = camera_intrinsics[1, 1]
-    x_image_center = camera_intrinsics[0, 2]
-    y_image_center = camera_intrinsics[1, 2]
-    x_points = (x_focal_length * (x / z)) + x_image_center
-    y_points = (y_focal_length * (y / z)) + y_image_center
-    projected_points2D = np.concatenate([x_points, y_points], axis=1)
-    return projected_points2D
-
-
 def project_to_image(rotation, translation, points3D, camera_intrinsics):
     """Project points3D to image plane using a perspective transformation
     """
@@ -256,20 +147,6 @@ def arguments_to_image_points2D(row_args, col_args):
     return image_points2D
 
 
-def rotation_vector_to_rotation_matrix(rotation_vector):
-    rotation_matrix = np.eye(3)
-    cv2.Rodrigues(rotation_vector, rotation_matrix)
-    return rotation_matrix
-
-
-def draw_keypoints(image, keypoints, colors, radius):
-    for keypoint, color in zip(keypoints, colors):
-        R, G, B = color
-        color = (int(R), int(G), int(B))
-        draw_circle(image, keypoint.astype('int'), color, radius)
-    return image
-
-
 def draw_masks(image, points):
     for points2D, points3D in points:
         object_sizes = np.array([0.184, 0.187, 0.052])
@@ -291,52 +168,6 @@ def draw_maski(image, keypoints, colors, radius=1):
     return image
 
 
-def rotation_matrix_to_quaternion(rotation_matrix):
-    qw = np.sqrt(1 + np.trace(rotation_matrix)) / 2.0
-
-    m21 = rotation_matrix[2, 1]
-    m12 = rotation_matrix[1, 2]
-
-    m02 = rotation_matrix[0, 2]
-    m20 = rotation_matrix[2, 0]
-
-    m10 = rotation_matrix[1, 0]
-    m01 = rotation_matrix[0, 1]
-
-    qx = (m21 - m12) / (4.0 * qw)
-    qy = (m02 - m20) / (4.0 * qw)
-    qz = (m10 - m01) / (4.0 * qw)
-    return qx, qy, qz, qw
-
-
-def to_pose6D(quaternion, translation, class_name=None):
-    return Pose6D(quaternion, translation, class_name)
-
-
-class MultiList(Iterable):
-    def __init__(self, num_lists):
-        self.num_lists = num_lists
-        self.lists = [[] for list_arg in range(self.num_lists)]
-
-    def append(self, *args):
-        if len(args) != self.num_lists:
-            raise ValueError('Arguments should have equal lenght as num_lists')
-        for arg, arg_list in zip(args, self.lists):
-            arg_list.append(arg)
-
-    def __iter__(self):
-        return iter(self.lists)
-
-
-def draw_mask2(image, points3D, object_sizes):
-    if len(object_sizes) != 3:
-        raise ValueError('Object sizes must contain 3 values')
-    colors = points3D / (object_sizes / 2.0)
-    colors = (colors + 1.0) * 127.5
-    colors = colors.astype('int')
-    # draw_keypoints(image, points2D, colors, radius=3)
-
-
 def normalize_points2D(points2D, height, width):
     """Transform points2D in image coordinates to normalized coordinates.
 
@@ -363,55 +194,6 @@ def denormalize_points2D(points2D, height, width):
     return points2D
 
 
-
-def flip_y_axis(points2D):
-    x, y = np.split(points2D, 2, axis=1)
-    points2D = np.concatenate([x, -y], axis=1)
-    return points2D
-
-
-def denormalize_keypoints2(keypoints, height, width):
-    # [-1, 1] -> [-127.5, 127.5] -> [0, 255]
-    half_sizes = np.array([width, height]) / 2.0
-    return (half_sizes * keypoints) + half_sizes
-
-
-def translate_points2D(points2D, translation):
-    if len(points2D.shape) != 2:
-        raise ValueError('Invalid points2D shape')
-    if len(translation) != 2:
-        raise ValueError('Invalid translation lenght')
-    num_keypoints = len(points2D)
-    height, width = translation
-    x_translation = np.full((num_keypoints, 1), width)
-    y_translation = np.full((num_keypoints, 1), height)
-    translation = np.concatenate([x_translation, y_translation], axis=1)
-    translated_points2D = translation - points2D
-    return translated_points2D
-
-
-def denormalize_keypoints(keypoints, height, width):
-    """Transform normalized keypoint coordinates into image coordinates
-
-    # Arguments
-        keypoints: Numpy array of shape ``(num_keypoints, 2)``.
-        height: Int. Height of the image
-        width: Int. Width of the image
-
-    # Returns
-        Numpy array of shape ``(num_keypoints, 2)``.
-    """
-    for keypoint_arg, keypoint in enumerate(keypoints):
-        x, y = keypoint[:2]
-        # transform key-point coordinates to image coordinates
-        x = (min(max(x, -1), 1) * width / 2 + width / 2) - 0.5
-        # flip since the image coordinates for y are flipped
-        y = height - 0.5 - (min(max(y, -1), 1) * height / 2 + height / 2)
-        x, y = int(round(x)), int(round(y))
-        keypoints[keypoint_arg][:2] = [x, y]
-    return keypoints
-
-
 def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics):
     image = image.astype(float)
     for pose6D in poses6D:
@@ -424,3 +206,74 @@ def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics):
         image = draw_cube(image, cube_points2D)
     image = image.astype('uint8')
     return image
+
+
+# NOT USED
+def homogenous_quaternion_to_rotation_matrix(quaternion):
+    # w0, q1, q2, q3 = quaternion
+    q1, q2, q3, w0 = quaternion
+
+    r11 = w0**2 + q1**2 - q2**2 - q3**2
+    r12 = 2 * ((q1 * q2) - (w0 * q3))
+    r13 = 2 * ((w0 * q2) + (q1 * q3))
+
+    r21 = 2 * ((w0 * q3) + (q1 * q2))
+    r22 = w0**2 - q1**2 + q2**2 - q3**2
+    r23 = 2 * ((q2 * q3) - (w0 * q1))
+
+    r31 = 2 * ((q1 * q3) - (w0 * q2))
+    r32 = 2 * ((w0 * q1) + (q2 * q3))
+    r33 = w0**2 - q1**2 - q2**2 + q3**2
+
+    rotation_matrix = np.array([[r11, r12, r13],
+                                [r21, r22, r23],
+                                [r31, r32, r33]])
+    return rotation_matrix
+
+
+def inhomogenous_quaternion_to_rotation_matrix(q):
+    # quaternion
+    # q = q[::-1]
+    r11 = 1 - (2 * (q[1]**2 + q[2]**2))
+    r12 = 2 * (q[0] * q[1] - q[3] * q[2])
+    r13 = 2 * (q[3] * q[1] + q[0] * q[2])
+
+    r21 = 2 * (q[0] * q[1] + q[3] * q[2])
+    r22 = 1 - (2 * (q[0]**2 + q[2]**2))
+    r23 = 2 * (q[1] * q[2] - q[3] * q[0])
+
+    r31 = 2 * (q[0] * q[2] - q[3] * q[1])
+    r32 = 2 * (q[3] * q[0] + q[1] * q[2])
+    r33 = 1 - (2 * (q[0]**2 + q[1]**2))
+
+    rotation_matrix = np.array([[r11, r12, r13],
+                                [r21, r22, r23],
+                                [r31, r32, r33]])
+
+    return rotation_matrix
+
+
+def quaternion_to_rotation_matrix(quaternion, homogenous=True):
+    if homogenous:
+        matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
+    else:
+        matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion)
+    return matrix
+
+
+def rotation_vector_to_rotation_matrix(rotation_vector):
+    rotation_matrix = np.eye(3)
+    cv2.Rodrigues(rotation_vector, rotation_matrix)
+    return rotation_matrix
+
+
+def to_affine_matrix(rotation_matrix, translation):
+    if len(translation) != 3:
+        raise ValueError('Translation should be of lenght 3')
+    if rotation_matrix.shape != (3, 3):
+        raise ValueError('Rotation matrix should be of shape (3, 3)')
+    translation = translation.reshape(3, 1)
+    affine_top = np.concatenate([rotation_matrix, translation], axis=1)
+    affine_row = np.array([[0.0, 0.0, 0.0, 1.0]])
+    affine_matrix = np.concatenate([affine_top, affine_row], axis=0)
+    return affine_matrix
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 547f2ec0b..16d35ad62 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -69,7 +69,6 @@ def __init__(self, camera_intrinsics):
         super(SolveChangingObjectPnP, self).__init__()
         self.MINIMUM_REQUIRED_POINTS = 4
         self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics))
-        # self.add(pr.ControlMap(RotationVectorToQuaternion()))
 
 
 class Pix2Pose(pr.Processor):
diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index 03164771c..c47acd2e9 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -8,9 +8,9 @@
 from backend import replace_lower_than_threshold
 from backend import arguments_to_image_points2D
 from backend import solve_PnP_RANSAC
-from backend import rotation_vector_to_rotation_matrix
-from backend import translate_points2D
 from backend import normalize_points2D
+from backend import rotation_vector_to_rotation_matrix
+from backend import to_affine_matrix
 
 
 class ImageToClosedOneBall(Processor):
@@ -62,8 +62,6 @@ def call(self, image, pose6D):
         points3D = self.class_to_points[pose6D.class_name]
         points2D = project_points3D(points3D, pose6D, self.camera)
         points2D = points2D.astype(np.int32)
-        # points2D = np.squeeze(points2D)
-        # return points2D
         draw_cube(image, points2D, thickness=self.thickness)
         return image
 
@@ -74,9 +72,9 @@ def __init__(self, threshold=1e-8, replacement=0.0):
         self.threshold = threshold
         self.replacement = replacement
 
-    def call(self, image):
+    def call(self, values):
         return replace_lower_than_threshold(
-            image, self.threshold, self.replacement)
+            values, self.threshold, self.replacement)
 
 
 class GetNonZeroValues(Processor):
@@ -131,22 +129,6 @@ def call(self, object_points3D, image_points2D):
         return success, rotation_vector, translation
 
 
-class RotationVectorToRotationMatrix(Processor):
-    def __init__(self):
-        super(RotationVectorToRotationMatrix, self).__init__()
-
-    def call(self, rotation_vector):
-        return rotation_vector_to_rotation_matrix(rotation_vector)
-
-
-class CropImage(Processor):
-    def __init__(self):
-        super(CropImage, self).__init__()
-
-    def call(self, image):
-        return image[:128, :128, :]
-
-
 class UnwrapDictionary(Processor):
     def __init__(self, keys):
         super(UnwrapDictionary, self).__init__()
@@ -156,23 +138,6 @@ def call(self, dictionary):
         return [dictionary[key] for key in self.keys]
 
 
-class ToAffineMatrix(Processor):
-    def __init__(self):
-        super(ToAffineMatrix, self).__init__()
-
-    def call(self, rotation_matrix, translation):
-        if len(translation) != 3:
-            raise ValueError('Translation should be of lenght 3')
-        if rotation_matrix.shape != (3, 3):
-            raise ValueError('Rotation matrix should be of shape (3, 3)')
-        translation = translation.reshape(3, 1)
-        affine_matrix = np.concatenate([rotation_matrix, translation], axis=1)
-        affine_row = np.array([[0.0, 0.0, 0.0, 1.0]])
-        affine_matrix = np.concatenate([affine_matrix, affine_row], axis=0)
-        print(affine_matrix.shape)
-        return affine_matrix
-
-
 class RotationVectorToQuaternion(Processor):
     def __init__(self):
         super(RotationVectorToQuaternion, self).__init__()
@@ -182,29 +147,27 @@ def call(self, rotation_vector):
         return quaternion
 
 
-class TranslatePoints2D(Processor):
-    def __init__(self):
-        super(TranslatePoints2D, self).__init__()
+class NormalizePoints2D(Processor):
+    def __init__(self, image_shape):
+        self.height, self.width = image_shape[:2]
 
-    def call(points2D, image):
-        height, width = image.shape[:2]
-        translated_points2D = translate_points2D(points2D, (height, width))
-        return translated_points2D
+    def call(self, points2D):
+        points2D = normalize_points2D(points2D, self.height, self.width)
+        return points2D
 
 
-class FlipYAxisPoints2D(Processor):
+class RotationVectorToRotationMatrix(Processor):
     def __init__(self):
-        super(FlipYAxisPoints2D, self).__init__()
+        super(RotationVectorToRotationMatrix, self).__init__()
 
-    def call(self, points2D, image):
-        height = image.shape[0]
-        translate_points2D(points2D, (0, height))
+    def call(self, rotation_vector):
+        return rotation_vector_to_rotation_matrix(rotation_vector)
 
 
-class NormalizePoints2D(Processor):
-    def __init__(self, image_shape):
-        self.height, self.width = image_shape[:2]
+class ToAffineMatrix(Processor):
+    def __init__(self):
+        super(ToAffineMatrix, self).__init__()
 
-    def call(self, points2D):
-        points2D = normalize_points2D(points2D, self.height, self.width)
-        return points2D
+    def call(self, rotation_matrix, translation):
+        affine_matrix = to_affine_matrix(rotation_matrix, translation)
+        return affine_matrix

From 6266d3af545aa1374af6f3dc88c81ed94c93c8be Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 17 Nov 2021 14:04:32 +0100
Subject: [PATCH 040/101] Remove unnecessary files

---
 examples/pix2pose/old_pipelines.py | 150 -----------------------------
 examples/pix2pose/old_train.py     | 140 ---------------------------
 examples/pix2pose/pix2pose.sh      |   1 -
 examples/pix2pose/utils.py         |  62 ------------
 4 files changed, 353 deletions(-)
 delete mode 100644 examples/pix2pose/old_pipelines.py
 delete mode 100644 examples/pix2pose/old_train.py
 delete mode 100644 examples/pix2pose/pix2pose.sh
 delete mode 100644 examples/pix2pose/utils.py

diff --git a/examples/pix2pose/old_pipelines.py b/examples/pix2pose/old_pipelines.py
deleted file mode 100644
index f484c6a44..000000000
--- a/examples/pix2pose/old_pipelines.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import numpy as np
-import os
-import glob
-import random
-from tensorflow.keras.utils import Sequence
-
-from paz.abstract import SequentialProcessor, Processor
-from paz.abstract.sequence import SequenceExtra
-from paz.pipelines import RandomizeRenderedImage
-from paz import processors as pr
-
-
-class GeneratedImageProcessor(Processor):
-    """Loads pre-generated images
-    """
-    def __init__(self, path_images, background_images_paths, num_occlusions=1, split=pr.TRAIN, no_ambiguities=False):
-        super(GeneratedImageProcessor, self).__init__()
-        self.copy = pr.Copy()
-        self.augment = RandomizeRenderedImage(background_images_paths, num_occlusions)
-        preprocessors_input = [pr.NormalizeImage()]
-        preprocessors_output = [NormalizeImageTanh()]
-        self.preprocess_input = SequentialProcessor(preprocessors_input)
-        self.preprocess_output = SequentialProcessor(preprocessors_output)
-        self.split = split
-
-        # Total number of images
-        self.num_images = len(glob.glob(os.path.join(path_images, "image_original/*")))
-
-        # Load all images into memory to save time
-        self.images_original = [np.load(os.path.join(path_images, "image_original/image_original_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)]
-
-        if no_ambiguities:
-            self.images_colors = [np.load(os.path.join(path_images, "image_colors_no_ambiguities/image_colors_no_ambiguities_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)]
-        else:
-            self.images_colors = [np.load(os.path.join(path_images, "image_colors/image_colors_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)]
-
-        self.alpha_original = [np.load(os.path.join(path_images, "alpha_original/alpha_original_{}.npy".format(str(i).zfill(7)))) for i in range(self.num_images)]
-
-
-    def call(self, input_image, label_image):
-        # index = random.randint(0, self.num_images-1)
-        # image_original = self.images_original[index]
-        # image_colors = self.images_colors[index]
-        # alpha_original = self.alpha_original[index]
-
-        if self.split == pr.TRAIN:
-            image_original = self.augment(image_original, alpha_original)
-
-        image_original = self.preprocess_input(image_original)
-        image_colors = self.preprocess_output(image_colors)
-        return image_original, image_colors
-
-
-class GeneratedImageGenerator(SequentialProcessor):
-    def __init__(self, path_images, size, background_images_paths, num_occlusions=1, split=pr.TRAIN):
-        super(GeneratedImageGenerator, self).__init__()
-        self.add(GeneratedImageProcessor(
-            path_images, background_images_paths, num_occlusions, split))
-        self.add(pr.SequenceWrapper(
-            {0: {'input_image': [size, size, 3]}},
-            {1: {'color_output': [size, size, 3]}, 0: {'error_output': [size, size, 1]}}))
-
-"""
-Creates a batch of train data for the discriminator. For real images the label is 1, 
-for fake images the label is 0
-"""
-def make_batch_discriminator(generator, input_images, color_output_images, label):
-    if label == 1:
-        return color_output_images, np.ones(len(color_output_images))
-    elif label == 0:
-        predictions = generator.predict(input_images)
-        return predictions[0], np.zeros(len(predictions[0]))
-
-
-class GeneratingSequencePix2Pose(SequenceExtra):
-    """Sequence generator used for generating samples.
-    Unfortunately the GeneratingSequence class from paz.abstract cannot be used here. Reason: not all of
-    the training data is available right at the start. The error images depend on the predicted color images,
-    so that they have to be generated on-the-fly during training. This is done here.
-
-    # Arguments
-        processor: Function used for generating and processing ``samples``.
-        model: Keras model
-        batch_size: Int.
-        num_steps: Int. Number of steps for each epoch.
-        as_list: Bool, if True ``inputs`` and ``labels`` are dispatched as
-            lists. If false ``inputs`` and ``labels`` are dispatched as
-            dictionaries.
-    """
-    def __init__(self, processor, model, batch_size, num_steps, as_list=False, rotation_matrices=None):
-        self.num_steps = num_steps
-        self.model = model
-        self.rotation_matrices = rotation_matrices
-        super(GeneratingSequencePix2Pose, self).__init__(
-            processor, batch_size, as_list)
-
-    def __len__(self):
-        return self.num_steps
-
-    def rotate_image(self, image, rotation_matrix):
-        mask_image = np.ma.masked_not_equal(np.sum(image, axis=-1), -1.*3).mask.astype(float)
-        mask_image = np.repeat(mask_image[..., np.newaxis], 3, axis=-1)
-        mask_background = np.ones_like(mask_image) - mask_image
-
-        # Rotate the object
-        image_rotated = np.einsum('ij,klj->kli', rotation_matrix, image)
-        image_rotated *= mask_image
-        image_rotated += (mask_background * -1.)
-
-        return image_rotated
-
-    def process_batch(self, inputs, labels, batch_index):
-        input_images, samples = list(), list()
-        for sample_arg in range(self.batch_size):
-            sample = self.pipeline()
-            samples.append(sample)
-            input_image = sample['inputs'][self.ordered_input_names[0]]
-            input_images.append(input_image)
-
-        input_images = np.asarray(input_images)
-        # This line is very important. If model.predict(...) is used instead the results are wrong.
-        # Reason: BatchNormalization behaves differently, depending on whether it is in train or
-        # inference mode. model.predict(...) is the inference mode, so the predictions here will
-        # be different from the predictions the model is trained on --> Result: the error images
-        # generated here are also wrong
-        predictions = self.model(input_images, training=True)
-
-        # Calculate the errors between the target output and the predicted output
-        for sample_arg in range(self.batch_size):
-            sample = samples[sample_arg]
-
-            # List of tuples of the form (error, error_image)
-            stored_errors = []
-
-            # Iterate over all rotation matrices to find the object position
-            # with the smallest error
-            for rotation_matrix in self.rotation_matrices:
-                color_image_rotated = self.rotate_image(sample['labels']['color_output'], rotation_matrix)
-                error_image = np.sum(predictions['color_output'][sample_arg] - color_image_rotated, axis=-1, keepdims=True)
-
-                error_value = np.sum(np.abs(error_image))
-                stored_errors.append((error_value, error_image))
-
-            # Select the error image with the smallest error
-            minimal_error_pair = min(stored_errors, key=lambda t: t[0])
-            sample['labels'][self.ordered_label_names[0]] = minimal_error_pair[1]
-            self._place_sample(sample['inputs'], sample_arg, inputs)
-            self._place_sample(sample['labels'], sample_arg, labels)
-
-        return inputs, labels
diff --git a/examples/pix2pose/old_train.py b/examples/pix2pose/old_train.py
deleted file mode 100644
index 481457ad3..000000000
--- a/examples/pix2pose/old_train.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import os
-import glob
-import argparse
-import numpy as np
-import time
-
-from tensorflow.keras.callbacks import CSVLogger
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.layers import Input
-from tensorflow.keras.models import Model
-
-from paz.abstract import GeneratingSequence
-from paz.abstract.sequence import GeneratingSequence
-
-from pipelines import GeneratingSequencePix2Pose, GeneratedImageGenerator, make_batch_discriminator
-from model import Generator, Discriminator, loss_color_wrapped, loss_error
-
-
-description = 'Training script Pix2Pose model'
-root_path = os.path.join(os.path.expanduser('~'), '.keras/')
-parser = argparse.ArgumentParser(description=description)
-parser.add_argument('-cl', '--class_name', default='tless05', type=str,
-                    help='Class name to be added to model save path')
-parser.add_argument('-id', '--background_images_directory', type=str,
-                    help='Path to directory containing background images')
-parser.add_argument('-pi', '--images_directory', type=str,
-                    help='Path to pre-generated images (npy format)')
-parser.add_argument('-bs', '--batch_size', default=4, type=int,
-                    help='Batch size for training')
-parser.add_argument('-lr', '--learning_rate', default=0.001, type=float,
-                    help='Initial learning rate for Adam')
-parser.add_argument('-ld', '--image_size', default=128, type=int,
-                    help='Size of the side of a square image e.g. 64')
-parser.add_argument('-e', '--max_num_epochs', default=10000, type=int,
-                    help='Maximum number of epochs before finishing')
-parser.add_argument('-st', '--steps_per_epoch', default=5, type=int,
-                    help='Steps per epoch')
-parser.add_argument('-oc', '--num_occlusions', default=2, type=int,
-                    help='Number of occlusions')
-parser.add_argument('-sa', '--save_path',
-                    default=os.path.join(
-                        os.path.expanduser('~'), '.keras/paz/models'),
-                    type=str, help='Path for writing model weights and logs')
-parser.add_argument('-rm', '--rotation_matrices',
-                    type=str, help='Path to npy file with a list of rotation matrices', required=True)
-parser.add_argument('-de', '--description',
-                    type=str, help='Description of the model')
-args = parser.parse_args()
-
-# Building the whole GAN model
-dcgan_input = Input(shape=(128, 128, 3))
-discriminator = Discriminator()
-generator = Generator()
-color_output, error_output = generator(dcgan_input)
-discriminator.trainable = False
-discriminator_output = discriminator(color_output)
-dcgan = Model(inputs=[dcgan_input], outputs={"color_output": color_output, "error_output": error_output, "discriminator_output": discriminator_output})
-
-# For the loss function pix2pose needs to know all the rotations under which the pose looks the same
-rotation_matrices = np.load(args.rotation_matrices)
-loss_color = loss_color_wrapped(rotation_matrices)
-
-# Set the loss
-optimizer = Adam(args.learning_rate, amsgrad=True)
-losses = {"color_output": loss_color,
-          "error_output": loss_error,
-          "discriminator_output": "binary_crossentropy"}
-lossWeights = {"color_output": 100.0, "error_output": 50.0, "discriminator_output": 1.0}
-dcgan.compile(optimizer=optimizer, loss=losses, loss_weights=lossWeights, run_eagerly=True)
-
-discriminator.trainable = True
-discriminator.compile(loss=['binary_crossentropy'], optimizer=optimizer)
-
-# Creating sequencer
-background_image_paths = glob.glob(os.path.join(args.background_images_directory, '*.jpg'))
-processor_train = GeneratedImageGenerator(os.path.join(args.images_directory, "train"), args.image_size, background_image_paths, num_occlusions=0)
-processor_test = GeneratedImageGenerator(os.path.join(args.images_directory, "test"), args.image_size, background_image_paths, num_occlusions=0)
-sequence_train = GeneratingSequencePix2Pose(processor_train, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices)
-sequence_test = GeneratingSequencePix2Pose(processor_test, dcgan, args.batch_size, args.steps_per_epoch, rotation_matrices=rotation_matrices)
-
-# Making directory for saving model weights and logs
-model_name = '_'.join([dcgan.name, args.class_name])
-save_path = os.path.join(args.save_path, model_name)
-if not os.path.exists(save_path):
-    os.makedirs(save_path)
-
-# Setting callbacks
-log = CSVLogger(os.path.join(save_path, '%s.log' % model_name))
-log.model = dcgan
-
-callbacks=[log]
-
-for callback in callbacks:
-    callback.on_train_begin()
-
-for num_epoch in range(args.max_num_epochs):
-    sequence_iterator_train = sequence_train.__iter__()
-    sequence_iterator_test = sequence_test.__iter__()
-
-    for callback in callbacks:
-        callback.on_epoch_begin(num_epoch)
-
-    for num_batch in range(args.steps_per_epoch):
-        # Train the discriminator
-        discriminator.trainable = True
-        batch = next(sequence_iterator_train)
-
-        X_discriminator_real, y_discriminator_real = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 1)
-        loss_discriminator_real = discriminator.train_on_batch(X_discriminator_real, y_discriminator_real)
-
-        X_discriminator_fake, y_discriminator_fake = make_batch_discriminator(generator, batch[0]['input_image'], batch[1]['color_output'], 0)
-        loss_discriminator_fake = discriminator.train_on_batch(X_discriminator_fake, y_discriminator_fake)
-
-        loss_discriminator = (loss_discriminator_real + loss_discriminator_fake)/2.
-
-        # Train the generator
-        discriminator.trainable = False
-        loss_dcgan, loss_color_output, loss_dcgan_discriminator, loss_error_output = dcgan.train_on_batch(batch[0]['input_image'],
-                {"color_output": batch[1]['color_output'],
-                 "error_output": batch[1]['error_output'],
-                 "discriminator_output": np.ones((args.batch_size, 1))})
-
-        # Test the network
-        batch_test = next(sequence_iterator_test)
-        loss_dcgan_test, loss_color_output_test, loss_dcgan_discriminator_test, loss_error_output_test = dcgan.test_on_batch(batch_test[0]['input_image'], {"color_output": batch_test[1]['color_output'], "error_output": batch_test[1]['error_output'], "discriminator_output": np.ones((args.batch_size, 1))})
-
-        print("Loss DCGAN: {}".format(loss_dcgan))
-    for callback in callbacks:
-        callback.on_epoch_end(num_epoch, logs={'loss_discriminator': loss_discriminator,
-                                               'loss_dcgan': loss_dcgan, 'loss_color_output': loss_color_output,
-                                               'loss_dcgan_discriminator': loss_dcgan_discriminator,
-                                               'loss_error_output': loss_error_output,
-                                               'loss_dcgan_test': loss_dcgan_test, 'loss_color_output_test': loss_color_output_test,
-                                               'loss_dcgan_discriminator_test': loss_dcgan_discriminator_test,
-                                               'loss_error_output_test': loss_error_output_test
-                                               })
-
-
-for callback in callbacks:
-    callback.on_train_end()
diff --git a/examples/pix2pose/pix2pose.sh b/examples/pix2pose/pix2pose.sh
deleted file mode 100644
index fb315cb9f..000000000
--- a/examples/pix2pose/pix2pose.sh
+++ /dev/null
@@ -1 +0,0 @@
-python3 train.py --images_directory /home/fabian/.keras/tless_obj05/pix2pose/normal_coloring --background_images_directory /home/fabian/.keras/backgrounds --batch_size 4 --steps_per_epoch 5 --image_size 128 --rotation_matrices /home/fabian/Uni/masterarbeit/src/paz/examples/pix2pose/rotation_matrices/2_fold_symmetry_rotation_matrices.npy
\ No newline at end of file
diff --git a/examples/pix2pose/utils.py b/examples/pix2pose/utils.py
deleted file mode 100644
index 7aaadf344..000000000
--- a/examples/pix2pose/utils.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import tensorflow as tf
-from tensorflow.keras.losses import Loss
-
-
-class LossError(Loss):
-    def __init__(self):
-        super(LossError, self).__init__()
-
-    def call(self, y_true, y_pred):
-        y_true = tf.clip_by_value(tf.math.abs(y_true), tf.float32.min, 1.0)
-        squared_error = tf.square(y_pred - y_true)
-        squared_error = tf.reduce_sum(squared_error, axis=3)
-        squared_error = tf.reduce_mean(squared_error, axis=[1, 2])
-        return squared_error
-
-
-class LossColor(Loss):
-    def __init__(self, rotation_matrices):
-        super(LossColor, self).__init__()
-        self.rotation_matrices = rotation_matrices
-
-
-    def call(self, color_image, predicted_color_image):
-        min_loss = tf.float32.max
-
-        # [-1, 1] -> [0, 1]
-        color_image = (color_image + 1) * 0.5
-
-        # Calculate masks for the object and the background (they are independent of the rotation)
-        mask_object = tf.repeat(tf.expand_dims(tf.math.reduce_max(tf.math.ceil(color_image), axis=-1), axis=-1), repeats=3, axis=-1)
-        mask_background = tf.ones(tf.shape(mask_object)) - mask_object
-
-        # [0, 1] -> [-1, 1]
-        color_image = (color_image * 2) - 1
-
-        # Iterate over all possible rotations
-        for rotation_matrix in self.rotation_matrices:
-
-            real_color_image = tf.identity(color_image)
-
-            # Add a small epsilon value to avoid the discontinuity problem
-            real_color_image = real_color_image + tf.ones_like(real_color_image) * 0.0001
-
-            # Rotate the object
-            real_color_image = tf.einsum('ij,mklj->mkli', tf.convert_to_tensor(np.array(rotation_matrix), dtype=tf.float32), real_color_image)
-
-            # Set the background to be all -1
-            real_color_image *= mask_object
-            real_color_image += (mask_background * tf.constant(-1.))
-
-            # Get the number of pixels
-            num_pixels = tf.math.reduce_prod(tf.shape(real_color_image)[1:3])
-            beta = 3
-
-            # Calculate the difference between the real and predicted images including the mask
-            diff_object = tf.math.abs(predicted_color_image*mask_object - real_color_image*mask_object)
-            diff_background = tf.math.abs(predicted_color_image*mask_background - real_color_image*mask_background)
-
-            # Calculate the total loss
-            loss_colors = tf.cast((1/num_pixels), dtype=tf.float32)*(beta*tf.math.reduce_sum(diff_object, axis=[1, 2, 3]) + tf.math.reduce_sum(diff_background, axis=[1, 2, 3]))
-            min_loss = tf.math.minimum(loss_colors, min_loss)
-        return min_loss

From d9fe2c075a61b83c225160a67832ad05dca672ac Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 18 Nov 2021 13:42:56 +0100
Subject: [PATCH 041/101] Add comments to functions

---
 examples/pix2pose/backend.py | 161 +++++++++++++++++++++++++++++++----
 1 file changed, 144 insertions(+), 17 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index be696aaa1..7b0c4b109 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -7,7 +7,7 @@
 
 
 def build_cube_points3D(width, height, depth):
-    """ Build the 3D points of a cube in the openCV coordinate system:
+    """Build the 3D points of a cube in the openCV coordinate system:
                                4--------1
                               /|       /|
                              / |      / |
@@ -46,6 +46,14 @@ def build_cube_points3D(width, height, depth):
 
 
 def _preprocess_image_points2D(image_points2D):
+    """Preprocessing image points for PnPRANSAC
+
+    # Arguments
+        image_points2D: Array of shape (num_points, 2)
+
+    # Returns
+        Contiguous float64 array of shape (num_points, 1, 2)
+    """
     num_points = len(image_points2D)
     image_points2D = image_points2D.reshape(num_points, 1, 2)
     image_points2D = image_points2D.astype(np.float64)
@@ -55,6 +63,39 @@ def _preprocess_image_points2D(image_points2D):
 
 def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
                      inlier_threshold=5, num_iterations=100):
+    """Returns rotation (Roc) and translation (Toc) vectors that transform
+        3D points in object frame to camera frame.
+
+                               O------------O
+                              /|           /|
+                             / |          / |
+                            O------------O  |
+                            |  |    z    |  |
+                            |  O____|____|__O
+                            |  /    |___y|  /   object
+                            | /    /     | /  coordinates
+                            |/    x      |/
+                            O------------O
+                                   ___
+                   Z                |
+                  /                 | Rco, Tco
+                 /_____X     <------|
+                 |
+                 |    camera
+                 Y  coordinates
+
+    # Arguments
+        object_points3D: Array (num_points, 3). Points 3D in object reference
+            frame. Represented as (0) in image above.
+        image_points2D: Array (num_points, 2). Points in 2D in camera UV space.
+        camera_intrinsics: Array of shape (3, 3). Diagonal elements represent
+            focal lenghts and last column the image center translation.
+        inlier_threshold: Number of inliers for RANSAC method.
+        num_iterations: Maximum number of iterations.
+
+    # Returns
+        Rotation vector in axis-angle form (3) and translation vector (3).
+    """
     if ((len(object_points3D) < 4) or (len(image_points2D) < 4)):
         raise ValueError('Solve PnP requires at least 4 3D and 2D points')
     image_points2D = _preprocess_image_points2D(image_points2D)
@@ -67,16 +108,36 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
 
 
 def project_to_image(rotation, translation, points3D, camera_intrinsics):
-    """Project points3D to image plane using a perspective transformation
+    """Project points3D to image plane using a perspective transformation.
+
+              Image plane
+
+           (0,0)-------->  (U)
+             |
+             |
+             |
+             v
+
+            (V)
+
+    # Arguments
+        rotation: Array (3, 3). Rotation matrix (Rco).
+        translation: Array (3). Translation (Tco).
+        points3D: Array (num_points, 3). Points 3D in object frame.
+        camera_intrinsics: Array of shape (3, 3). Diagonal elements represent
+            focal lenghts and last column the image center translation.
+
+    # Returns
+        Array (num_points, 2) in UV image space.
     """
     if rotation.shape != (3, 3):
         raise ValueError('Rotation matrix is not of shape (3, 3)')
     if len(translation) != 3:
         raise ValueError('Translation vector is not of length 3')
     if len(points3D.shape) != 2:
-        raise ValueError('points3D should have a shape (N, 3)')
+        raise ValueError('Points3D should have a shape (num_points, 3)')
     if points3D.shape[1] != 3:
-        raise ValueError('points3D should have a shape (N, 3)')
+        raise ValueError('Points3D should have a shape (num_points, 3)')
     # TODO missing checks for camera intrinsics conditions
     points3D = np.matmul(rotation, points3D.T).T + translation
     x, y, z = np.split(points3D, 3, axis=1)
@@ -91,18 +152,18 @@ def project_to_image(rotation, translation, points3D, camera_intrinsics):
 
 
 def draw_cube(image, points, color=GREEN, thickness=2, radius=5):
-    """ Draws a cube in image.
+    """Draws a cube in image.
 
     # Arguments
-        image: Numpy array of shape ``[H, W, 3]``.
+        image: Numpy array of shape (H, W, 3).
         points: List of length 8  having each element a list
-            of length two indicating ``(y, x)`` openCV coordinates.
+            of length two indicating (U, V) openCV coordinates.
         color: List of length three indicating RGB color of point.
         thickness: Integer indicating the thickness of the line to be drawn.
         radius: Integer indicating the radius of corner points to be drawn.
 
     # Returns
-        Numpy array with shape ``[H, W, 3]``. Image with cube.
+        Numpy array with shape (H, W, 3). Image with cube.
     """
     if points.shape != (8, 2):
         raise ValueError('Cube points 2D must be of shape (8, 2)')
@@ -135,12 +196,46 @@ def draw_cube(image, points, color=GREEN, thickness=2, radius=5):
 
 
 def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0):
+    """Replace values from source that are lower than the given threshold.
+
+    # Arguments
+        source: Array.
+        threshold: Float. Values lower than this value will be replaced.
+        replacement: Float. Value taken by elements lower than threshold.
+
+    # Returns
+        Array of same shape as source.
+    """
     lower_than_epsilon = source < threshold
     source[lower_than_epsilon] = replacement
     return source
 
 
 def arguments_to_image_points2D(row_args, col_args):
+    """Convert array arguments into UV coordinates.
+
+            Image plane
+
+           (0,0)-------->  (U)
+             |
+             |
+             |
+             v
+
+            (V)
+
+    # Arguments
+        row_args: Array (num_rows).
+        col_args: Array (num_cols).
+
+    # Returns
+        Array (num_cols, num_rows) representing points2D in UV space.
+
+    # Notes
+        Arguments are row args (V) and col args (U). Iamge points are in UV
+            coordinates; thus, we concatenate them in that order
+            i.e. [col_args, row_args]
+    """
     row_args = row_args.reshape(-1, 1)
     col_args = col_args.reshape(-1, 1)
     image_points2D = np.concatenate([col_args, row_args], axis=1)
@@ -169,28 +264,60 @@ def draw_maski(image, keypoints, colors, radius=1):
 
 
 def normalize_points2D(points2D, height, width):
-    """Transform points2D in image coordinates to normalized coordinates.
+    """Transform points2D in image coordinates to normalized coordinates i.e.
+        [U, V] -> [-1, 1]. UV have maximum values of [W, H] respectively.
+
+             Image plane
+
+           (0,0)-------->  (U)
+             |
+             |
+             |
+             v
+
+            (V)
 
     # Arguments
-        points2D: Numpy array of shape ``(num_keypoints, 2)``.
+        points2D: Numpy array of shape (num_keypoints, 2).
         height: Int. Height of the image
         width: Int. Width of the image
 
     # Returns
-        Numpy array of shape ``(num_keypoints, 2)``.
+        Numpy array of shape (num_keypoints, 2).
     """
     image_shape = np.array([width, height])
-    points2D = points2D / image_shape  # [0, W], [0, H] -> [0,  1], [0,  1]
-    points2D = 2.0 * points2D          # [0, 1], [0, 1] -> [0,  2], [0,  2]
-    points2D = points2D - 1.0          # [0, 2], [0, 2] -> [-1, 1], [-1, 1]
+    points2D = points2D / image_shape  # [W, 0], [0, H] -> [1,  0], [0,  1]
+    points2D = 2.0 * points2D          # [1, 0], [0, 1] -> [2,  0], [0,  2]
+    points2D = points2D - 1.0          # [2, 0], [0, 2] -> [-1, 1], [-1, 1]
     return points2D
 
 
 def denormalize_points2D(points2D, height, width):
+    """Transform nomralized points2D to image UV coordinates i.e.
+        [-1, 1] -> [U, V]. UV have maximum values of [W, H] respectively.
+
+             Image plane
+
+           (0,0)-------->  (U)
+             |
+             |
+             |
+             v
+
+            (V)
+
+    # Arguments
+        points2D: Numpy array of shape (num_keypoints, 2).
+        height: Int. Height of the image
+        width: Int. Width of the image
+
+    # Returns
+        Numpy array of shape (num_keypoints, 2).
+    """
     image_shape = np.array([width, height])
-    points2D = points2D + 1.0          # [-1, 1], [-1, 1] -> [0, 2], [0, 2]
-    points2D = points2D / 2.0          # [0 , 2], [0 , 2] -> [0, 1], [0, 1]
-    points2D = points2D * image_shape  # [0 , 1], [0 , 1] -> [0, W], [0, H]
+    points2D = points2D + 1.0          # [-1, 1], [-1, 1] -> [2, 0], [0, 2]
+    points2D = points2D / 2.0          # [2 , 0], [0 , 2] -> [1, 0], [0, 1]
+    points2D = points2D * image_shape  # [1 , 0], [0 , 1] -> [W, 0], [0, H]
     return points2D
 
 

From acde9da8ebe2f29c7875bf7afb78d6829cd9b7bc Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 18 Nov 2021 15:58:21 +0100
Subject: [PATCH 042/101] Add backend function comments

---
 examples/pix2pose/backend.py | 58 ++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 7b0c4b109..f12d3276e 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -1,8 +1,6 @@
-# from collections import Iterable
 import numpy as np
 from paz.backend.image.draw import GREEN
 from paz.backend.image import draw_line, draw_dot
-# from paz.abstract import Pose6D
 import cv2
 
 
@@ -214,7 +212,7 @@ def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0):
 def arguments_to_image_points2D(row_args, col_args):
     """Convert array arguments into UV coordinates.
 
-            Image plane
+              Image plane
 
            (0,0)-------->  (U)
              |
@@ -242,24 +240,52 @@ def arguments_to_image_points2D(row_args, col_args):
     return image_points2D
 
 
-def draw_masks(image, points):
+def points3D_to_RGB(points3D, object_sizes):
+    """Transforms points3D in object frame to RGB color space.
+    # Arguments
+        points3D: Array (num_points, 3). Points3D a
+        object_sizes: List (3) indicating the
+            (width, height, depth) of object.
+
+    # Returns
+        Array of ints (num_points, 3) in RGB space.
+    """
+    colors = points3D / (0.5 * object_sizes)
+    colors = colors + 1.0
+    colors = colors * 127.5
+    colors = colors.astype(np.uint8)
+    return colors
+
+
+def draw_masks(image, points, object_sizes):
     for points2D, points3D in points:
-        object_sizes = np.array([0.184, 0.187, 0.052])
-        colors = points3D / (object_sizes / 2.0)
-        colors = (colors + 1.0) * 127.5
-        colors = colors.astype('int')
-        image = draw_maski(image, points2D, colors)
+        colors = points3D_to_RGB(points3D, object_sizes)
+        image = draw_points2D(image, points2D, colors)
+    return image
+
+
+def draw_points2D(image, points2D, colors):
+    """Draws mask using points2D in UV space using only numpy.
+
+    # Arguments
+        image: Array (H, W).
+        keypoints: Array (num_points, U, V). Keypoints in image space
+        colors: Array (num_points, 3). Colors in RGB space.
+
+    # Returns
+        Array with drawn points.
+    """
+    keypoints = points2D.astype(int)
+    U = keypoints[:, 0]
+    V = keypoints[:, 1]
+    image[V, U, :] = colors
     return image
 
 
-def draw_maski(image, keypoints, colors, radius=1):
-    for keypoint, color in zip(keypoints, colors):
-        R, G, B = color
+def draw_points2D_(image, keypoints, colors, radius=1):
+    for (u, v), (R, G, B) in zip(keypoints, colors):
         color = (int(R), int(G), int(B))
-        x, y = keypoint
-        x = int(x)
-        y = int(y)
-        draw_dot(image, (x, y), color, radius)
+        draw_dot(image, (u, v), color, radius)
     return image
 
 

From 2535574828a3d603e250459c44f48a451a005f85 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 18 Nov 2021 15:59:00 +0100
Subject: [PATCH 043/101] Remove unecessary values from pipelines

---
 examples/pix2pose/demo.py      | 2 +-
 examples/pix2pose/pipelines.py | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 44167d034..18a18d2cf 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -34,7 +34,7 @@
 detect = SSD300FAT(score_thresh, draw=False)
 offsets = [0.2, 0.2]
 estimate_keypoints = Pix2Pose(model, object_sizes)
-pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets, None)
+pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets)
 
 results = pipeline(image)
 predicted_image = results['image']
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 16d35ad62..9ef05220b 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -87,8 +87,7 @@ def call(self, image):
 
 
 class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_keypoints, camera, offsets,
-                 class_to_dimensions, radius=3, thickness=1, draw=True):
+    def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
         """Pose estimation pipeline using keypoints.
         """
         super(EstimatePoseMasks, self).__init__()
@@ -108,7 +107,9 @@ def __init__(self, detect, estimate_keypoints, camera, offsets,
         self.unwrap = UnwrapDictionary(['points2D', 'points3D'])
         self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
         self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07)
+        self.object_sizes = self.estimate_keypoints.object_sizes
+        # self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07)
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
 
     def call(self, image):
         boxes2D = self.postprocess_boxes(self.detect(image))
@@ -130,7 +131,7 @@ def call(self, image):
             poses6D.append(pose6D), points.append([points2D, points3D])
         if self.draw:
             image = self.draw_boxes2D(image, boxes2D)
-            image = draw_masks(image, points)
+            image = draw_masks(image, points, self.object_sizes)
             image = draw_poses6D(
                 image, poses6D, self.cube_points3D, self.camera.intrinsics)
         return self.wrap(image, boxes2D, poses6D)

From 2f37602a3627135f2001795bcf88ecdc46a3f439 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 18 Nov 2021 16:06:53 +0100
Subject: [PATCH 044/101] Remove unecessary files

---
 examples/pix2pose/calibrate_camera.py |  60 ---------------
 examples/pix2pose/icp.py              | 102 --------------------------
 examples/pix2pose/messages.py         |  50 -------------
 3 files changed, 212 deletions(-)
 delete mode 100644 examples/pix2pose/calibrate_camera.py
 delete mode 100644 examples/pix2pose/icp.py
 delete mode 100644 examples/pix2pose/messages.py

diff --git a/examples/pix2pose/calibrate_camera.py b/examples/pix2pose/calibrate_camera.py
deleted file mode 100644
index bfc7a3e40..000000000
--- a/examples/pix2pose/calibrate_camera.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from paz.backend.image import show_image
-import numpy as np
-import cv2
-
-
-# def calibrate_camera(square_size, pattern_shape=(5, 5)):
-
-pattern_size = (5, 7)
-square_size_mm = 35
-window_size, zero_zone = (11, 11), (-1, -1)
-
-# constructing default 3D points
-point3D = np.zeros((np.prod(pattern_size), 3), np.float32)
-xy_coordinates = np.mgrid[0:pattern_size[0], 0:pattern_size[1]].T
-point3D[:, :2] = xy_coordinates.reshape(-1, 2) * square_size_mm
-
-camera = cv2.VideoCapture(0)
-cv2.namedWindow('camera_window')
-# 2D points in image plane, 3D points in real world space, images, counter
-image_points, points3D, images, image_counter = [], [], [], 0
-criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
-print('Press `Escape` to quit')
-while True:
-
-    frame = camera.read()[1]
-    image_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-    show_image(image_gray, wait=False)
-    chessboard_found, corners = cv2.findChessboardCorners(
-        image_gray, pattern_size, None)
-    print(chessboard_found)
-    if chessboard_found:
-        points3D.append(point3D)
-        refined_corners = cv2.cornerSubPix(
-            image_gray, corners, window_size, zero_zone, criteria)
-        image_points.append(refined_corners)
-        frame = cv2.drawChessboardCorners(
-            frame, pattern_size, refined_corners, chessboard_found)
-        show_image(frame)
-        image_counter = image_counter + 1
-
-    cv2.imshow('camera_window', frame)
-    keystroke = cv2.waitKey(1)
-
-    if keystroke % 256 == 27:
-        print('`Escape` key hit, closing...')
-        break
-
-camera.release()
-cv2.destroyAllWindows()
-
-ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
-    points3D, image_points, image_gray.shape[::-1], None, None)
-print(ret, mtx, dist, rvecs, tvecs)
-print(mtx)
-# fx = 659.10
-# fy = 668.76
-# cx = 276.76
-# cy = 252.35
-# ret = 0.6814
-# dist = [9.86e-3, 1.41, 1.08e-2, 2.431e-3, -7.05]
diff --git a/examples/pix2pose/icp.py b/examples/pix2pose/icp.py
deleted file mode 100644
index 61ca4352c..000000000
--- a/examples/pix2pose/icp.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import numpy as np
-from sklearn.neighbors import NearestNeighbors
-
-
-def calculate_affine_matrix(pointcloud_A, pointcloud_B):
-    '''Calculates affine transform with the best least-squares fit transforming
-        keypoints A to keypoints B.
-
-    # Argument:
-        pointcloud_A: Array of shape (num_keypoints, 3).
-        pointcloud_B: Array of shape (num_keypoints, 3).
-
-    # Returns:
-        T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B
-        R: mxm rotation matrix
-        t: mx1 translation vector
-    '''
-    assert pointcloud_A.shape == pointcloud_B.shape
-    # translate points to their centroids
-    centroid3D_A = np.mean(pointcloud_A, axis=0)
-    centroid3D_B = np.mean(pointcloud_B, axis=0)
-    centered_keypoints3D_A = pointcloud_A - centroid3D_A
-    centered_keypoints3D_B = pointcloud_B - centroid3D_B
-
-    covariance = np.dot(centered_keypoints3D_A.T, centered_keypoints3D_B)
-    U, S, Vt = np.linalg.svd(covariance)
-    # compute rotation matrix
-    rotation_matrix = np.dot(Vt.T, U.T)
-
-    # resolve special reflection case
-    if np.linalg.det(rotation_matrix) < 0:
-        Vt[3 - 1, :] *= -1
-        rotation_matrix = np.dot(Vt.T, U.T)
-
-    # compute translation
-    translation3D = centroid3D_B.T - np.dot(rotation_matrix, centroid3D_A.T)
-
-    affine_matrix = to_affine_matrix(rotation_matrix, translation3D)
-    return affine_matrix
-
-
-def to_affine_matrix(rotation_matrix, translation_vector):
-    translation_vector = translation_vector.reshape(3, 1)
-    affine = np.concatenate([rotation_matrix, translation_vector], axis=0)
-    affine = np.concatenate([affine, np.array([[0.0, 0.0, 0.0, 1.0]])], axis=1)
-    return affine
-
-
-def nearest_neighbor(pointcloud_A, pointcloud_B):
-    '''Find the nearest (Euclidean) neighbor in dst for each point in src
-    # Arguments:
-        src: Nxm array of points
-        dst: Nxm array of points
-    # Returns:
-        distances: Euclidean distances of the nearest neighbor
-        indices: dst indices of the nearest neighbor
-    '''
-    assert pointcloud_A.shape == pointcloud_B.shape
-    model = NearestNeighbors(n_neighbors=1)
-    model.fit(pointcloud_B)
-    distances, indices = model.kneighbors(pointcloud_A, return_distance=True)
-    return distances.ravel(), indices.ravel()
-
-
-def add_homogenous_coordinate(keypoints3D):
-    num_keypoints = len(keypoints3D)
-    ones = np.ones_like(num_keypoints).reshape(-1, 1)
-    homogenous_keypoints3D = np.concatenate([keypoints3D, ones], axis=1)
-    return homogenous_keypoints3D
-
-
-def iterative_closes_point(pointcloud_A, pointcloud_B, initial_pose=None,
-                           max_iterations=20, tolerance=1e-3):
-    '''Find best least square fit that transforms pointcloud A to pointcloud B.
-    Input:
-        A: Nxm numpy array of source mD points
-        B: Nxm numpy array of destination mD point
-        initial_pose: (m+1)x(m+1) homogeneous transformation
-        max_iterations: exit algorithm after max_iterations
-        tolerance: convergence criteria
-    Output:
-        T: final homogeneous transformation that maps A on to B
-        distances: Euclidean distances (errors) of the nearest neighbor
-        i: number of iterations to converge
-    '''
-    assert pointcloud_A.shape == pointcloud_B.shape
-    pointcloud_A = add_homogenous_coordinate(pointcloud_A)
-    pointcloud_B = add_homogenous_coordinate(pointcloud_B)
-    pointcloud_A_0 = np.copy(pointcloud_A)
-    if initial_pose is not None:
-        pointcloud_A = np.dot(initial_pose, pointcloud_A.T).T
-    previous_error = 0
-    for iteration_arg in range(max_iterations):
-        distances, indices = nearest_neighbor(pointcloud_A, pointcloud_B)
-        affine_matrix = calculate_affine_matrix(pointcloud_A, pointcloud_B)
-        pointcloud_A = np.dot(affine_matrix, pointcloud_A.T).T
-        mean_error = np.mean(distances)
-        if np.abs(previous_error - mean_error) < tolerance:
-            break
-        previous_error = mean_error
-    affine_transform = calculate_affine_matrix(pointcloud_A_0, pointcloud_A)
-    return affine_transform, distances, iteration_arg
diff --git a/examples/pix2pose/messages.py b/examples/pix2pose/messages.py
deleted file mode 100644
index 1c50c176d..000000000
--- a/examples/pix2pose/messages.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from paz.abstract.messages import Box2D, Pose6D
-
-
-class ObjectHypothesis(object):
-    # TODO: Check if class_name, score is the same
-    def __init__(self, score=None, class_name=None, box2D=None, pose6D=None):
-        self.score = score
-        self.class_name = class_name
-        self.box2D = box2D
-        self.pose6D = pose6D
-
-    @property
-    def box2D(self):
-        return self._box2D
-
-    @box2D.setter
-    def box2D(self, value):
-        if not isinstance(value, Box2D):
-            raise ValueError('Value must be a Box2D class')
-
-        if self.score is None:
-            if value.score is not None:
-                self.score = value.score
-        else:
-            if self.score != value.score:
-                raise ValueError('Mismatch score between Hypothesis and Box2D')
-
-
-        if self.score is None and (value.score is not None):
-            self.score = value.score
-        elif (self.score is not None) and (value.score is not None):
-            if self.score != value.score:
-                raise ValueError('Mismatch score between Hypothesis and Box2D')
-        if self.class_name is None and (value.class_name is not None):
-            self.class_name = value.class_name
-        self._box2D = value
-
-    @property
-    def pose6D(self):
-        return self._pose6D
-
-    @pose6D.setter
-    def pose6D(self, value):
-        if not isinstance(value, Pose6D):
-            raise ValueError('Value must be a Pose6D class')
-        if (self.score is None) and (value.score is not None):
-            self.score = value.score
-        if self.class_name is None and (value.class_name is not None):
-            self.class_name = value.class_name
-        self._pose6D = value

From 22ea0a1ba138669a764247015da28fb2158b17fd Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 18 Nov 2021 17:35:19 +0100
Subject: [PATCH 045/101] Add resize option for augmenting keypoints based on
 interpolation

---
 examples/pix2pose/backend.py   |  2 ++
 examples/pix2pose/pipelines.py | 22 +++++++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index f12d3276e..003db374d 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -237,6 +237,7 @@ def arguments_to_image_points2D(row_args, col_args):
     row_args = row_args.reshape(-1, 1)
     col_args = col_args.reshape(-1, 1)
     image_points2D = np.concatenate([col_args, row_args], axis=1)
+    # image_points2D = np.concatenate([row_args, col_args], axis=1)
     return image_points2D
 
 
@@ -275,6 +276,7 @@ def draw_points2D(image, points2D, colors):
     # Returns
         Array with drawn points.
     """
+    # print(np.max(points2D, axis=0))
     keypoints = points2D.astype(int)
     U = keypoints[:, 0]
     V = keypoints[:, 1]
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 9ef05220b..5e1e1c348 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -13,6 +13,7 @@
 from backend import draw_poses6D
 from backend import draw_masks
 from paz.backend.quaternion import rotation_vector_to_quaternion
+from paz.backend.image import resize_image, show_image
 
 
 class DomainRandomization(SequentialProcessor):
@@ -61,7 +62,7 @@ def __init__(self, output_shape):
         super(RGBMaskToImagePoints2D, self).__init__()
         self.add(GetNonZeroArguments())
         self.add(ArgumentsToImagePoints2D())
-        self.add(NormalizePoints2D(output_shape))
+        # self.add(NormalizePoints2D(output_shape))
 
 
 class SolveChangingObjectPnP(SequentialProcessor):
@@ -72,17 +73,25 @@ def __init__(self, camera_intrinsics):
 
 
 class Pix2Pose(pr.Processor):
-    def __init__(self, model, object_sizes, epsilon=0.15):
+    def __init__(self, model, object_sizes, epsilon=0.15, with_resize=True):
         self.object_sizes = object_sizes
         self.predict_RGBMask = PredictRGBMask(model, epsilon)
         self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
         self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
         self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask'])
+        self.with_resize = with_resize
 
     def call(self, image):
         RGB_mask = self.predict_RGBMask(image)
+        if self.with_resize:
+            print(image.shape, RGB_mask.shape)
+            RGB_mask = resize_image(RGB_mask, image.shape[:2][::-1])
+            print(RGB_mask.shape)
+            show_image(RGB_mask)
         points3D = self.mask_to_points3D(RGB_mask)
         points2D = self.mask_to_points2D(RGB_mask)
+        from backend import normalize_points2D
+        points2D = normalize_points2D(points2D, *image.shape[:2][::-1])
         return self.wrap(points3D, points2D, RGB_mask)
 
 
@@ -108,7 +117,6 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
         self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
         self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
         self.object_sizes = self.estimate_keypoints.object_sizes
-        # self.cube_points3D = build_cube_points3D(0.2, 0.2, 0.07)
         self.cube_points3D = build_cube_points3D(*self.object_sizes)
 
     def call(self, image):
@@ -119,7 +127,11 @@ def call(self, image):
         for crop, box2D in zip(cropped_images, boxes2D):
             points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
             points2D = denormalize_points2D(points2D, *crop.shape[0:2])
+            print(box2D.coordinates)
             points2D = self.change_coordinates(points2D, box2D)
+            import numpy as np
+            print(np.max(points2D, axis=0))
+            print(points2D.shape)
             if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
                 continue
             success, rotation, translation = self.predict_pose(
@@ -132,6 +144,6 @@ def call(self, image):
         if self.draw:
             image = self.draw_boxes2D(image, boxes2D)
             image = draw_masks(image, points, self.object_sizes)
-            image = draw_poses6D(
-                image, poses6D, self.cube_points3D, self.camera.intrinsics)
+            image = draw_poses6D(image, poses6D, self.cube_points3D,
+                                 self.camera.intrinsics)
         return self.wrap(image, boxes2D, poses6D)

From bc30c791f2f50923769d3771b29f358d0bcc7475 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 11:06:16 +0100
Subject: [PATCH 046/101] Add comments to missing functions

---
 examples/pix2pose/backend.py | 124 ++++++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 39 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 003db374d..f8c73fac3 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -237,7 +237,6 @@ def arguments_to_image_points2D(row_args, col_args):
     row_args = row_args.reshape(-1, 1)
     col_args = col_args.reshape(-1, 1)
     image_points2D = np.concatenate([col_args, row_args], axis=1)
-    # image_points2D = np.concatenate([row_args, col_args], axis=1)
     return image_points2D
 
 
@@ -251,6 +250,7 @@ def points3D_to_RGB(points3D, object_sizes):
     # Returns
         Array of ints (num_points, 3) in RGB space.
     """
+    # TODO add domain and codomain transform as comments
     colors = points3D / (0.5 * object_sizes)
     colors = colors + 1.0
     colors = colors * 127.5
@@ -258,6 +258,7 @@ def points3D_to_RGB(points3D, object_sizes):
     return colors
 
 
+# TODO change to processor
 def draw_masks(image, points, object_sizes):
     for points2D, points3D in points:
         colors = points3D_to_RGB(points3D, object_sizes)
@@ -266,7 +267,7 @@ def draw_masks(image, points, object_sizes):
 
 
 def draw_points2D(image, points2D, colors):
-    """Draws mask using points2D in UV space using only numpy.
+    """Draws a pixel for all points2D in UV space using only numpy.
 
     # Arguments
         image: Array (H, W).
@@ -276,7 +277,6 @@ def draw_points2D(image, points2D, colors):
     # Returns
         Array with drawn points.
     """
-    # print(np.max(points2D, axis=0))
     keypoints = points2D.astype(int)
     U = keypoints[:, 0]
     V = keypoints[:, 1]
@@ -349,23 +349,61 @@ def denormalize_points2D(points2D, height, width):
     return points2D
 
 
+def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics):
+    """Draws pose6D by projecting cube3D to image space with camera intrinsics.
+
+    # Arguments
+        image: Array (H, W, 3)
+        pose6D: paz message Pose6D with quaternion and translation values.
+        cube3D: Array (8, 3). Cube 3D points in object frame.
+        camera_intrinsics: Array of shape (3, 3). Diagonal elements represent
+            focal lenghts and last column the image center translation.
+
+    # Returns
+        Original image array (H, W, 3) with drawn cube points.
+    """
+    quaternion, translation = pose6D.quaternion, pose6D.translation
+    rotation = quaternion_to_rotation_matrix(quaternion)
+    rotation = np.squeeze(rotation, axis=2)
+    cube_points2D = project_to_image(
+        rotation, translation, cube_points3D, camera_intrinsics)
+    cube_points2D = cube_points2D.astype(np.int32)
+    image = draw_cube(image, cube_points2D)
+    return image
+
+
 def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics):
-    image = image.astype(float)
+    """Draws pose6D by projecting cube3D to image space with camera intrinsics.
+
+    # Arguments
+        image: Array (H, W, 3)
+        pose6D: List paz messages Pose6D with quaternions and translations.
+        cube3D: Array (8, 3). Cube 3D points in object frame.
+        camera_intrinsics: Array of shape (3, 3). Diagonal elements represent
+            focal lenghts and last column the image center translation.
+
+    # Returns
+        Original image array (H, W, 3) with drawn cube points for all poses6D.
+    """
     for pose6D in poses6D:
-        rotation = quaternion_to_rotation_matrix(pose6D.quaternion)
-        rotation = np.squeeze(rotation, axis=2)
-        cube_points2D = project_to_image(
-            rotation, pose6D.translation,
-            cube_points3D, camera_intrinsics)
-        cube_points2D = cube_points2D.astype(np.int32)
-        image = draw_cube(image, cube_points2D)
-    image = image.astype('uint8')
+        image = draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics)
     return image
 
 
-# NOT USED
 def homogenous_quaternion_to_rotation_matrix(quaternion):
-    # w0, q1, q2, q3 = quaternion
+    """Transforms quaternion to rotation matrix.
+
+    # Arguments
+        quaternion: Array containing quaternion value [q1, q2, q3, w0].
+
+    # Returns
+        Rotation matrix [3, 3].
+
+    # Note
+        If quaternion is not a unit quaternion the rotation matrix is not
+        unitary but still orthogonal i.e. the outputted rotation matrix is
+        a scalar multiple of a rotation matrix.
+    """
     q1, q2, q3, w0 = quaternion
 
     r11 = w0**2 + q1**2 - q2**2 - q3**2
@@ -386,43 +424,51 @@ def homogenous_quaternion_to_rotation_matrix(quaternion):
     return rotation_matrix
 
 
-def inhomogenous_quaternion_to_rotation_matrix(q):
-    # quaternion
-    # q = q[::-1]
-    r11 = 1 - (2 * (q[1]**2 + q[2]**2))
-    r12 = 2 * (q[0] * q[1] - q[3] * q[2])
-    r13 = 2 * (q[3] * q[1] + q[0] * q[2])
-
-    r21 = 2 * (q[0] * q[1] + q[3] * q[2])
-    r22 = 1 - (2 * (q[0]**2 + q[2]**2))
-    r23 = 2 * (q[1] * q[2] - q[3] * q[0])
-
-    r31 = 2 * (q[0] * q[2] - q[3] * q[1])
-    r32 = 2 * (q[3] * q[0] + q[1] * q[2])
-    r33 = 1 - (2 * (q[0]**2 + q[1]**2))
-
-    rotation_matrix = np.array([[r11, r12, r13],
-                                [r21, r22, r23],
-                                [r31, r32, r33]])
-
-    return rotation_matrix
+def quaternion_to_rotation_matrix(quaternion):
+    """Transforms quaternion to rotation matrix.
 
+    # Arguments
+        quaternion: Array containing quaternion value [q1, q2, q3, w0].
 
-def quaternion_to_rotation_matrix(quaternion, homogenous=True):
-    if homogenous:
-        matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
-    else:
-        matrix = inhomogenous_quaternion_to_rotation_matrix(quaternion)
+    # Returns
+        Rotation matrix [3, 3].
+
+    # Note
+        "If the quaternion "is not a unit quaternion then the homogeneous form
+        is still a scalar multiple of a rotation matrix, while the
+        inhomogeneous form is in general no longer an orthogonal matrix.
+        This is why in numerical work the homogeneous form is to be preferred
+        if distortion is to be avoided." [wikipedia](https://en.wikipedia.org/
+            wiki/Conversion_between_quaternions_and_Euler_angles)
+    """
+    matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
     return matrix
 
 
 def rotation_vector_to_rotation_matrix(rotation_vector):
+    """Transforms rotation vector (axis-angle) form to rotation matrix.
+
+    # Arguments
+        rotation_vector: Array (3). Rotation vector in axis-angle form.
+
+    # Returns
+        Array (3, 3) rotation matrix.
+    """
     rotation_matrix = np.eye(3)
     cv2.Rodrigues(rotation_vector, rotation_matrix)
     return rotation_matrix
 
 
 def to_affine_matrix(rotation_matrix, translation):
+    """Builds affine matrix from rotation matrix and translation vector.
+
+    # Arguments
+        rotation_matrix: Array (3, 3). Representing a rotation matrix.
+        translation: Array (3). Translation vector.
+
+    # Returns
+        Array (4, 4) representing an affine matrix.
+    """
     if len(translation) != 3:
         raise ValueError('Translation should be of lenght 3')
     if rotation_matrix.shape != (3, 3):

From a8066a492d0976d91922663112d04b3853bb3bb6 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 11:07:01 +0100
Subject: [PATCH 047/101] Remove unnecessary flags for estimate keypoints
 pipeline

---
 examples/pix2pose/demo.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 18a18d2cf..00aec2636 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -20,6 +20,7 @@
 image_size = camera.read().shape[0:2]
 camera.stop()
 
+# image = load_image('test_image2.jpg')
 image = load_image('test_image.jpg')
 image_size = image.shape[0:2]
 focal_length = image_size[1]
@@ -29,11 +30,11 @@
                               [0, focal_length, image_center[1]],
                               [0, 0, 1]])
 object_sizes = np.array([0.184, 0.187, 0.052])
-epsilon = 0.15
+epsilon = 0.001
 score_thresh = 0.50
 detect = SSD300FAT(score_thresh, draw=False)
 offsets = [0.2, 0.2]
-estimate_keypoints = Pix2Pose(model, object_sizes)
+estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True)
 pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets)
 
 results = pipeline(image)

From 646d733b0f98039320870cb534792804653176a9 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 11:07:36 +0100
Subject: [PATCH 048/101] Add reminder to debug incorrect shape management

---
 examples/pix2pose/pipelines.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 5e1e1c348..affa419af 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -12,6 +12,7 @@
 from backend import denormalize_points2D
 from backend import draw_poses6D
 from backend import draw_masks
+from backend import normalize_points2D
 from paz.backend.quaternion import rotation_vector_to_quaternion
 from paz.backend.image import resize_image, show_image
 
@@ -66,10 +67,11 @@ def __init__(self, output_shape):
 
 
 class SolveChangingObjectPnP(SequentialProcessor):
-    def __init__(self, camera_intrinsics):
+    def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
         super(SolveChangingObjectPnP, self).__init__()
         self.MINIMUM_REQUIRED_POINTS = 4
-        self.add(SolveChangingObjectPnPRANSAC(camera_intrinsics))
+        self.add(SolveChangingObjectPnPRANSAC(
+            camera_intrinsics, inlier_thresh, num_iterations))
 
 
 class Pix2Pose(pr.Processor):
@@ -84,13 +86,9 @@ def __init__(self, model, object_sizes, epsilon=0.15, with_resize=True):
     def call(self, image):
         RGB_mask = self.predict_RGBMask(image)
         if self.with_resize:
-            print(image.shape, RGB_mask.shape)
             RGB_mask = resize_image(RGB_mask, image.shape[:2][::-1])
-            print(RGB_mask.shape)
-            show_image(RGB_mask)
         points3D = self.mask_to_points3D(RGB_mask)
         points2D = self.mask_to_points2D(RGB_mask)
-        from backend import normalize_points2D
         points2D = normalize_points2D(points2D, *image.shape[:2][::-1])
         return self.wrap(points3D, points2D, RGB_mask)
 
@@ -127,18 +125,16 @@ def call(self, image):
         for crop, box2D in zip(cropped_images, boxes2D):
             points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
             points2D = denormalize_points2D(points2D, *crop.shape[0:2])
-            print(box2D.coordinates)
             points2D = self.change_coordinates(points2D, box2D)
-            import numpy as np
-            print(np.max(points2D, axis=0))
-            print(points2D.shape)
             if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
                 continue
             success, rotation, translation = self.predict_pose(
                 points3D, points2D)
             if success is False:
                 continue
+            print('ROTATION', rotation.shape)
             quaternion = rotation_vector_to_quaternion(rotation)
+            print('QUATERNION', quaternion.shape)
             pose6D = Pose6D(quaternion, translation, box2D.class_name)
             poses6D.append(pose6D), points.append([points2D, points3D])
         if self.draw:

From cf8a1d7157c1fbfbfc9122e0e829e65d16f98108 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 11:42:54 +0100
Subject: [PATCH 049/101] Add comments to loss functions

---
 examples/pix2pose/loss.py | 139 +++++++++++++++++++++++++++++++++-----
 examples/pix2pose/test.py |   6 ++
 2 files changed, 129 insertions(+), 16 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index bfe7e90ea..b27118b7f 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -3,19 +3,47 @@
 import tensorflow as tf
 
 
-def extract_alpha_mask(RGBA_mask):
+def split_alpha_mask(RGBA_mask):
+    """Splits alpha mask and RGB image.
+
+    # Arguments
+        RGBA_mask: Tensor [batch, H, W, 4]
+
+    # Returns
+        Color tensor [batch, H, W, 3] and alpha tensor [batch, H, W, 1]
+    """
     color_mask = RGBA_mask[:, :, :, 0:3]
     alpha_mask = RGBA_mask[:, :, :, 3:4]
     return color_mask, alpha_mask
 
 
-def extract_error_mask(RGBE_mask):
+def split_error_mask(RGBE_mask):
+    """Splits error mask and RGB image.
+
+    # Arguments
+        RGBA_mask: Tensor [batch, H, W, 4]
+
+    # Returns
+        Color tensor [batch, H, W, 3] and error tensor [batch, H, W, 1]
+
+    """
     color_mask = RGBE_mask[:, :, :, 0:3]
     error_mask = RGBE_mask[:, :, :, 3:4]
     return color_mask, error_mask
 
 
 def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask):
+    """Computes foreground reconstruction L1 loss by using only positive alpha
+        mask values.
+
+    # Arguments
+        RGB_true: Tensor [batch, H, W, 3]. True RGB label values.
+        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
+        alpha_mask: Tensor [batch, H, W, 1]. True normalized alpha mask values.
+
+    # Returns
+        Tensor [batch, H, W, 3] with foreground loss values.
+    """
     foreground_true = RGB_true * alpha_mask
     foreground_pred = RGB_pred * alpha_mask
     foreground_loss = tf.abs(foreground_true - foreground_pred)
@@ -23,6 +51,17 @@ def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask):
 
 
 def compute_background_loss(RGB_true, RGB_pred, alpha_mask):
+    """Computes background reconstruction L1 loss by using the inverted alpha
+        mask values.
+
+    # Arguments
+        RGB_true: Tensor [batch, H, W, 3]. True RGB label values.
+        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
+        alpha_mask: Tensor [batch, H, W, 1]. True normalized alpha mask values.
+
+    # Returns
+        Tensor [batch, H, W, 3] with background loss values.
+    """
     background_true = RGB_true * (1.0 - alpha_mask)
     background_pred = RGB_pred * (1.0 - alpha_mask)
     background_loss = tf.abs(background_true - background_pred)
@@ -30,7 +69,19 @@ def compute_background_loss(RGB_true, RGB_pred, alpha_mask):
 
 
 def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
-    RGB_true, alpha_mask = extract_alpha_mask(RGBA_true)
+    """Computes L1 reconstruction loss by multiplying positive alpha mask
+        by beta.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
+        beta: Float. Value used to multiple positive alpha mask values.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
+    RGB_true, alpha_mask = split_alpha_mask(RGBA_true)
     foreground_loss = compute_foreground_loss(RGB_true, RGB_pred, alpha_mask)
     background_loss = compute_background_loss(RGB_true, RGB_pred, alpha_mask)
     reconstruction_loss = (beta * foreground_loss) + background_loss
@@ -39,13 +90,35 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
 
 def compute_weighted_reconstruction_loss_with_error(
         RGBA_true, RGBE_pred, beta=3.0):
-    RGB_pred, error_mask = extract_error_mask(RGBE_pred)
+    """Computes L1 reconstruction loss by multiplying positive alpha mask
+        by beta.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGBE_pred: Tensor [batch, H, W, 4]. Predicted RGB and error mask.
+        beta: Float. Value used to multiple positive alpha mask values.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
+    RGB_pred, error_mask = split_error_mask(RGBE_pred)
     loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
     return loss
 
 
 def compute_error_prediction_loss(RGBA_true, RGBE_pred):
-    RGB_pred, error_pred = extract_error_mask(RGBE_pred)
+    """Computes L2 reconstruction loss of predicted error mask.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
+    RGB_pred, error_pred = split_error_mask(RGBE_pred)
     error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0)
     error_true = tf.minimum(error_true, 1.0)
     error_loss = mean_squared_error(error_true, error_pred)
@@ -53,7 +126,40 @@ def compute_error_prediction_loss(RGBA_true, RGBE_pred):
     return error_loss
 
 
+class WeightedReconstruction(Loss):
+    """Computes L1 reconstruction loss by multiplying positive alpha mask
+        by beta.
+
+    # Arguments
+        beta: Float. Value used to multiple positive alpha mask values.
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
+    def __init__(self, beta=3.0):
+        super(WeightedReconstruction, self).__init__()
+        self.beta = beta
+
+    def call(self, RGBA_true, RGB_pred):
+        loss = compute_weighted_reconstruction_loss(
+            RGBA_true, RGB_pred, self.beta)
+        return loss
+
+
 class ErrorPrediction(Loss):
+    """Computes L2 reconstruction loss of predicted error mask.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
     def __init__(self):
         super(ErrorPrediction, self).__init__()
 
@@ -63,6 +169,18 @@ def call(self, RGBA_true, RGBE_pred):
 
 
 class WeightedReconstructionWithError(Loss):
+    """Computes L1 reconstruction loss by multiplying positive alpha mask
+        by beta.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGBE_pred: Tensor [batch, H, W, 4]. Predicted RGB and error mask.
+        beta: Float. Value used to multiple positive alpha mask values.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
     def __init__(self, beta=3.0):
         super(WeightedReconstructionWithError, self).__init__()
         self.beta = beta
@@ -73,17 +191,6 @@ def call(self, RGBA_true, RGBE_pred):
         return reconstruction_loss
 
 
-class WeightedReconstruction(Loss):
-    def __init__(self, beta=3.0):
-        super(WeightedReconstruction, self).__init__()
-        self.beta = beta
-
-    def call(self, RGBA_true, RGB_pred):
-        loss = compute_weighted_reconstruction_loss(
-            RGBA_true, RGB_pred, self.beta)
-        return loss
-
-
 def MSE_without_last_channel(y_true, y_pred):
     squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred)
     return tf.reduce_mean(squared_difference, axis=-1)  # Note the `axis=-1`
diff --git a/examples/pix2pose/test.py b/examples/pix2pose/test.py
index d23077c90..d9effe5b6 100644
--- a/examples/pix2pose/test.py
+++ b/examples/pix2pose/test.py
@@ -1,6 +1,8 @@
 from paz.abstract import SequentialProcessor, Processor
 from paz import processors as pr
 import numpy as np
+from backend import build_cube_points3D
+# import pytest
 
 
 class PipelineWithTwoChannels(SequentialProcessor):
@@ -61,3 +63,7 @@ def test_copy_with_controlmap_using_3_channels_plus():
     assert len(values) == 2
     assert np.allclose(values[0], A_random_values + B_random_values)
     assert np.allclose(values[1], A_random_values)
+
+
+def test_build_cube_points3D(width, height, depth):
+    cube_points3D = build_cube_points3D(width, height, depth)

From f3005843be7edd49dc1dcdf35cfd74c6ff88c7e1 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 11:47:51 +0100
Subject: [PATCH 050/101] Removed keras GAN examples

---
 .../models/fully_convolutional_net.py         |  5 +-
 examples/pix2pose/models/gan_example.py       | 81 -------------------
 examples/pix2pose/models/keras_example.py     | 67 ---------------
 3 files changed, 3 insertions(+), 150 deletions(-)
 delete mode 100644 examples/pix2pose/models/gan_example.py
 delete mode 100644 examples/pix2pose/models/keras_example.py

diff --git a/examples/pix2pose/models/fully_convolutional_net.py b/examples/pix2pose/models/fully_convolutional_net.py
index 57d10e102..8a9da517e 100644
--- a/examples/pix2pose/models/fully_convolutional_net.py
+++ b/examples/pix2pose/models/fully_convolutional_net.py
@@ -1,10 +1,11 @@
 from tensorflow.keras.models import Model
-from tensorflow.keras.layers import Input, Conv2D, Activation, LeakyReLU
+from tensorflow.keras.layers import (
+    Input, Conv2D, Activation, LeakyReLU, BatchNormalization)
 
 
 def block(x, filters, dilation_rate, alpha):
     x = Conv2D(filters, (3, 3), dilation_rate=dilation_rate, padding='same')(x)
-    # x = BatchNormalization()(x)
+    x = BatchNormalization()(x)
     x = LeakyReLU(alpha)(x)
     return x
 
diff --git a/examples/pix2pose/models/gan_example.py b/examples/pix2pose/models/gan_example.py
deleted file mode 100644
index 8472a9462..000000000
--- a/examples/pix2pose/models/gan_example.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import tensorflow as tf
-from tensorflow.keras.models import Model
-from tensorflow.keras.metrics import Mean
-
-
-class Pix2PoseGAN(Model):
-    def __init__(self, image_shape, discriminator, generator, latent_dim):
-        super(Pix2PoseGAN, self).__init__()
-        self.image_shape = image_shape
-        self.discriminator = discriminator
-        self.generator = generator
-        self.latent_dim = latent_dim
-        self.generator_loss_tracker = Mean(name='generator_loss')
-        self.discriminator_loss_tracker = Mean(name='discriminator_loss')
-
-    @property
-    def metrics(self):
-        return [self.generator_loss_tracker, self.discriminator_loss_tracker]
-
-    def compile(self, d_optimizer, g_optimizer, loss_fn):
-        super(Pix2PoseGAN, self).compile()
-        self.d_optimizer = d_optimizer
-        self.g_optimizer = g_optimizer
-        self.loss_fn = loss_fn
-
-    def train_step(self, data):
-        RGB_inputs, RGB_labels = data
-        RGB_generated = self.generator(RGB_inputs)
-        RGB_combined = tf.concat([RGB_generated, RGB_labels], axis=0)
-        """
-        # Add dummy dimensions to the labels so that they can be concatenated with
-        # the images. This is for the discriminator.
-        image_one_hot_labels = one_hot_labels[:, :, None, None]
-        image_one_hot_labels = tf.repeat(image_one_hot_labels, repeats=[image_size * image_size])
-        image_one_hot_labels = tf.reshape(image_one_hot_labels, (-1, image_size, image_size, num_classes))
-
-        # Sample random points in the latent space and concatenate the labels.
-        # This is for the generator.
-        batch_size = tf.shape(real_images)[0]
-        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
-        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
-
-        # Decode the noise (guided by labels) to fake images.
-        generated_images = self.generator(random_vector_labels)
-        """
-
-        # Combine them with real images. Note that we are concatenating the labels
-        # with these images here.
-
-        # Assemble labels discriminating real from fake images.
-        labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0)
-
-        # Train the discriminator.
-        with tf.GradientTape() as tape:
-            predictions = self.discriminator(combined_images)
-            d_loss = self.loss_fn(labels, predictions)
-        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
-        self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))
-
-        # Sample random points in the latent space.
-        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
-        random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
-
-        # Assemble labels that say "all real images".
-        misleading_labels = tf.zeros((batch_size, 1))
-
-        # Train the generator (note that we should *not* update the weights
-        # of the discriminator)!
-        with tf.GradientTape() as tape:
-            fake_images = self.generator(random_vector_labels)
-            fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1)
-            predictions = self.discriminator(fake_image_and_labels)
-            g_loss = self.loss_fn(misleading_labels, predictions)
-        grads = tape.gradient(g_loss, self.generator.trainable_weights)
-        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))
-
-        # Monitor loss.
-        self.generator_loss_tracker.update_state(g_loss)
-        self.discriminator_loss_tracker.update_state(d_loss)
-        return {'generator_loss': self.generator_loss_tracker.result(),
-                'discrminator_loss': self.discriminator_loss_tracker.result()}
diff --git a/examples/pix2pose/models/keras_example.py b/examples/pix2pose/models/keras_example.py
deleted file mode 100644
index c3f016cfa..000000000
--- a/examples/pix2pose/models/keras_example.py
+++ /dev/null
@@ -1,67 +0,0 @@
-class GAN(keras.Model):
-    def __init__(self, discriminator, generator, latent_dim):
-        super(GAN, self).__init__()
-        self.discriminator = discriminator
-        self.generator = generator
-        self.latent_dim = latent_dim
-
-    def compile(self, d_optimizer, g_optimizer, loss_fn):
-        super(GAN, self).compile()
-        self.d_optimizer = d_optimizer
-        self.g_optimizer = g_optimizer
-        self.loss_fn = loss_fn
-        self.d_loss_metric = keras.metrics.Mean(name="d_loss")
-        self.g_loss_metric = keras.metrics.Mean(name="g_loss")
-
-    @property
-    def metrics(self):
-        return [self.d_loss_metric, self.g_loss_metric]
-
-    def train_step(self, real_images):
-        # Sample random points in the latent space
-        batch_size = tf.shape(real_images)[0]
-        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
-
-        # Decode them to fake images
-        generated_images = self.generator(random_latent_vectors)
-
-        # Combine them with real images
-        combined_images = tf.concat([generated_images, real_images], axis=0)
-
-        # Assemble labels discriminating real from fake images
-        labels = tf.concat(
-            [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0
-        )
-        # Add random noise to the labels - important trick!
-        labels += 0.05 * tf.random.uniform(tf.shape(labels))
-
-        # Train the discriminator
-        with tf.GradientTape() as tape:
-            predictions = self.discriminator(combined_images)
-            d_loss = self.loss_fn(labels, predictions)
-        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
-        self.d_optimizer.apply_gradients(
-            zip(grads, self.discriminator.trainable_weights)
-        )
-
-        # Sample random points in the latent space
-        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
-
-        # Assemble labels that say "all real images"
-        misleading_labels = tf.zeros((batch_size, 1))
-
-        # Train the generator (note that we should *not* update the weights
-        # of the discriminator)!
-        with tf.GradientTape() as tape:
-            predictions = self.discriminator(self.generator(random_latent_vectors))
-            g_loss = self.loss_fn(misleading_labels, predictions)
-        grads = tape.gradient(g_loss, self.generator.trainable_weights)
-        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))
-
-        # Update metrics
-        self.d_loss_metric.update_state(d_loss)
-        self.g_loss_metric.update_state(g_loss)
-        return {
-            "d_loss": self.d_loss_metric.result(),
-            "g_loss": self.g_loss_metric.result(),
-        }

From 317793049a5279c513c15122b70cd591c28edb2e Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 11:49:28 +0100
Subject: [PATCH 051/101] Remove unecessary metric function

---
 examples/pix2pose/loss.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index b27118b7f..8e28f8a6c 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -189,8 +189,3 @@ def call(self, RGBA_true, RGBE_pred):
         reconstruction_loss = compute_weighted_reconstruction_loss_with_error(
             RGBA_true, RGBE_pred, self.beta)
         return reconstruction_loss
-
-
-def MSE_without_last_channel(y_true, y_pred):
-    squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred)
-    return tf.reduce_mean(squared_difference, axis=-1)  # Note the `axis=-1`

From 0338f70b1e21c3d5fd68da4b333b9eda2e13c028 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 23:33:36 +0100
Subject: [PATCH 052/101] Add untested symmetric weighted loss

---
 examples/pix2pose/loss.py | 60 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index 8e28f8a6c..50332258c 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -51,8 +51,8 @@ def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask):
 
 
 def compute_background_loss(RGB_true, RGB_pred, alpha_mask):
-    """Computes background reconstruction L1 loss by using the inverted alpha
-        mask values.
+    """Computes the L1 reconstruction loss, weighting the inverted alpha
+        mask values in the predicted RGB image by beta.
 
     # Arguments
         RGB_true: Tensor [batch, H, W, 3]. True RGB label values.
@@ -69,8 +69,8 @@ def compute_background_loss(RGB_true, RGB_pred, alpha_mask):
 
 
 def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
-    """Computes L1 reconstruction loss by multiplying positive alpha mask
-        by beta.
+    """Computes the L1 reconstruction loss, weighting the positive alpha
+        mask values in the predicted RGB image by beta.
 
     # Arguments
         RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
@@ -88,6 +88,44 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
     return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True)
 
 
+def to_normalized_device_coordinates(image):
+    """Map image value from [0, 1] -> [-1, 1].
+    """
+    return (image * 2) - 1.0
+
+
+def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
+    """Computes the mininum of all rotated L1 reconstruction losses weighting
+        the positive alpha mask values in the predicted RGB image by beta.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
+        rotations: Array (num_symmetries, 3, 3). Rotation matrices
+            that when applied lead to the same object view.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+    """
+    # alpha mask is invariant to rotations that leave the shape symmetric.
+    RGB_true, alpha = split_alpha_mask(RGBA_true)
+    RGB_original_shape = tf.shape(RGBA_true)
+    RGB_true = tf.reshape(RGB_true, [-1, 3])
+    RGB_true = to_normalized_device_coordinates(RGB_true)
+    symmetric_losses = []
+    for rotation in rotations:
+        RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T
+        RGB_true_symmetric = tf.reshape(RGB_true_symmetric, RGB_original_shape)
+        RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3)
+        symmetric_loss = compute_weighted_reconstruction_loss(
+            RGBA_true_symmetric, RGB_pred, beta)
+        symmetric_loss = tf.expand_dims(symmetric_loss, -1)
+        symmetric_losses.append(symmetric_loss)
+    symmetric_losses = tf.concat(symmetric_losses, axis=-1)
+    minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1)
+    return minimum_symmetric_loss
+
+
 def compute_weighted_reconstruction_loss_with_error(
         RGBA_true, RGBE_pred, beta=3.0):
     """Computes L1 reconstruction loss by multiplying positive alpha mask
@@ -149,6 +187,20 @@ def call(self, RGBA_true, RGB_pred):
         return loss
 
 
+class WeightedSymmetricReconstruction(Loss):
+    """Computes the mininum of all rotated L1 reconstruction losses weighting
+        the positive alpha mask values in the predicted RGB image by beta.
+    """
+    def __init__(self, rotations, beta=3.0):
+        self.rotations = rotations
+        self.beta = beta
+
+    def call(self, RGBA_true, RGB_pred):
+        loss = compute_weighted_symmetric_loss(
+            RGBA_true, RGB_pred, self.rotations, self.beta)
+        return loss
+
+
 class ErrorPrediction(Loss):
     """Computes L2 reconstruction loss of predicted error mask.
 

From db21312357e38cf80154d4ed26706009b473aa5d Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 23:34:27 +0100
Subject: [PATCH 053/101] Change space name to compute graphics convention i.e.
 NDC

---
 examples/pix2pose/pipelines.py  |  4 ++--
 examples/pix2pose/processors.py | 14 ++++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index affa419af..e8f1e4a9d 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -4,7 +4,7 @@
 from paz import processors as pr
 from processors import (
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
-    ImageToClosedOneBall, Scale, SolveChangingObjectPnPRANSAC,
+    ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC,
     ReplaceLowerThanThreshold)
 from backend import build_cube_points3D
 from processors import UnwrapDictionary
@@ -54,7 +54,7 @@ class RGBMaskToObjectPoints3D(SequentialProcessor):
     def __init__(self, object_sizes):
         super(RGBMaskToObjectPoints3D, self).__init__()
         self.add(GetNonZeroValues())
-        self.add(ImageToClosedOneBall())
+        self.add(ImageToNormalizedDeviceCoordinates())
         self.add(Scale(object_sizes / 2.0))
 
 
diff --git a/examples/pix2pose/processors.py b/examples/pix2pose/processors.py
index c47acd2e9..1a6255626 100644
--- a/examples/pix2pose/processors.py
+++ b/examples/pix2pose/processors.py
@@ -11,26 +11,28 @@
 from backend import normalize_points2D
 from backend import rotation_vector_to_rotation_matrix
 from backend import to_affine_matrix
+from backend import image_to_normalized_device_coordinates
+from backend import normalized_device_coordinates_to_image
 
 
-class ImageToClosedOneBall(Processor):
+class ImageToNormalizedDeviceCoordinates(Processor):
     """Map image value from [0, 255] -> [-1, 1].
     """
     def __init__(self):
-        super(ImageToClosedOneBall, self).__init__()
+        super(ImageToNormalizedDeviceCoordinates, self).__init__()
 
     def call(self, image):
-        return (image / 127.5) - 1.0
+        return image_to_normalized_device_coordinates(image)
 
 
-class ClosedOneBallToImage(Processor):
+class NormalizedDeviceCoordinatesToImage(Processor):
     """Map normalized value from [-1, 1] -> [0, 255].
     """
     def __init__(self):
-        super(ClosedOneBallToImage, self).__init__()
+        super(NormalizedDeviceCoordinatesToImage, self).__init__()
 
     def call(self, image):
-        return (image + 1.0) * 127.5
+        return normalized_device_coordinates_to_image(image)
 
 
 class DrawBoxes3D(Processor):

From 34f1565b86437ae55516816bee3f50302ef179c2 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 19 Nov 2021 23:35:01 +0100
Subject: [PATCH 054/101] Add NDC transforms and rotation matrix builds

---
 examples/pix2pose/backend.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index f8c73fac3..4ef476610 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -478,3 +478,30 @@ def to_affine_matrix(rotation_matrix, translation):
     affine_row = np.array([[0.0, 0.0, 0.0, 1.0]])
     affine_matrix = np.concatenate([affine_top, affine_row], axis=0)
     return affine_matrix
+
+
+def image_to_normalized_device_coordinates(image):
+    """Map image value from [0, 255] -> [-1, 1].
+    """
+    return (image / 127.5) - 1.0
+
+
+def normalized_device_coordinates_to_image(image):
+    """Map normalized value from [-1, 1] -> [0, 255].
+    """
+    return (image + 1.0) * 127.5
+
+
+def build_rotation_matrix_z(angle):
+    """Builds rotation matrix in Z axis.
+    # Arguments
+        angle: Float. Angle in radians.
+    # Return
+        Array (3, 3) rotation matrix in Z axis.
+    """
+    cos_angle = np.cos(angle)
+    sin_angle = np.cos(angle)
+    rotation_matrix_z = np.array([[+cos_angle, -sin_angle, 0.0],
+                                  [+sin_angle, +cos_angle, 0.0],
+                                  [0.0, 0.0, 1.0]])
+    return rotation_matrix_z

From a5fd6a0214313c7fe5a626abe30db9f4dc6b323d Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Sat, 20 Nov 2021 00:14:09 +0100
Subject: [PATCH 055/101] Add untested symmetric loss training script

---
 examples/pix2pose/loss.py            | 12 +++--
 examples/pix2pose/train_symmetric.py | 72 ++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 4 deletions(-)
 create mode 100644 examples/pix2pose/train_symmetric.py

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index 50332258c..88d4e0596 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -109,13 +109,16 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
     """
     # alpha mask is invariant to rotations that leave the shape symmetric.
     RGB_true, alpha = split_alpha_mask(RGBA_true)
-    RGB_original_shape = tf.shape(RGBA_true)
-    RGB_true = tf.reshape(RGB_true, [-1, 3])
+    # RGB_original_shape = tf.shape(RGBA_true)
+    batch_size, H, W, num_channels = RGB_true.shape
+    batch_size, H, W, num_channels = 32, 128, 128, 3
+    RGB_true = tf.reshape(RGB_true, [batch_size, -1, 3])
     RGB_true = to_normalized_device_coordinates(RGB_true)
     symmetric_losses = []
     for rotation in rotations:
-        RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T
-        RGB_true_symmetric = tf.reshape(RGB_true_symmetric, RGB_original_shape)
+        # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T
+        RGB_true_symmetric = tf.einsum('ij,bpj->bpi', rotation, RGB_true)
+        RGB_true_symmetric = tf.reshape(RGB_true_symmetric, (batch_size, H, W, num_channels))
         RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3)
         symmetric_loss = compute_weighted_reconstruction_loss(
             RGBA_true_symmetric, RGB_pred, beta)
@@ -192,6 +195,7 @@ class WeightedSymmetricReconstruction(Loss):
         the positive alpha mask values in the predicted RGB image by beta.
     """
     def __init__(self, rotations, beta=3.0):
+        super(WeightedSymmetricReconstruction, self).__init__()
         self.rotations = rotations
         self.beta = beta
 
diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py
new file mode 100644
index 000000000..ae9f078ea
--- /dev/null
+++ b/examples/pix2pose/train_symmetric.py
@@ -0,0 +1,72 @@
+import os
+import glob
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from paz.abstract import GeneratingSequence
+from paz.models.segmentation import UNET_VGG16
+from backend import build_rotation_matrix_z
+
+from scenes import PixelMaskRenderer
+from pipelines import DomainRandomization
+from loss import WeightedSymmetricReconstruction
+from metrics import mean_squared_error
+
+image_shape = [128, 128, 3]
+root_path = os.path.expanduser('~')
+background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
+background_wildcard = os.path.join(root_path, background_wildcard)
+image_paths = glob.glob(background_wildcard)
+# path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
+path_OBJ = 'single_solar_panel_02.obj'
+path_OBJ = os.path.join(root_path, path_OBJ)
+num_occlusions = 1
+viewport_size = image_shape[:2]
+y_fov = 3.14159 / 4.0
+distance = [0.3, 0.5]
+light = [1.0, 30]
+top_only = False
+roll = 3.14159
+shift = 0.05
+batch_size = 32
+beta = 3.0
+alpha = 0.1
+filters = 16
+num_classes = 3
+learning_rate = 0.001
+max_num_epochs = 10
+beta = 3.0
+steps_per_epoch = 1000
+H, W, num_channels = image_shape = [128, 128, 3]
+
+
+renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
+                             light, top_only, roll, shift)
+
+inputs_to_shape = {'input_1': [H, W, num_channels]}
+labels_to_shape = {'masks': [H, W, 4]}
+processor = DomainRandomization(
+    renderer, image_shape, image_paths, inputs_to_shape,
+    labels_to_shape, num_occlusions)
+
+
+sequence = GeneratingSequence(processor, batch_size, steps_per_epoch)
+
+angles = np.linspace(0, 2 * np.pi, 6)
+rotations = []
+for angle in angles:
+    rotations.append(build_rotation_matrix_z(angle))
+rotations = np.array(rotations)
+
+
+loss = WeightedSymmetricReconstruction(rotations, beta)
+
+model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+optimizer = Adam(learning_rate)
+
+model.compile(optimizer, loss, mean_squared_error)
+
+model.fit(
+    sequence,
+    epochs=max_num_epochs,
+    verbose=1,
+    workers=0)

From a9f2a27a5cfa5ab5a5a1336cc76b5cd3c7880322 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 23 Nov 2021 09:31:58 +0100
Subject: [PATCH 056/101] Add predictions transformation

---
 examples/pix2pose/loss.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index 88d4e0596..c408ab882 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -114,6 +114,7 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
     batch_size, H, W, num_channels = 32, 128, 128, 3
     RGB_true = tf.reshape(RGB_true, [batch_size, -1, 3])
     RGB_true = to_normalized_device_coordinates(RGB_true)
+    RGB_pred = to_normalized_device_coordinates(RGB_pred)
     symmetric_losses = []
     for rotation in rotations:
         # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T

From b2aca585624897cc1e88fade891f4b92bb2eca42 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 23 Nov 2021 11:27:09 +0100
Subject: [PATCH 057/101] Fix bug with rotation matrix creation

---
 examples/pix2pose/backend.py            | 25 ++++++++-
 examples/pix2pose/test_rotated_image.py | 71 +++++++++++++++++++++++++
 examples/pix2pose/train_symmetric.py    |  4 +-
 3 files changed, 98 insertions(+), 2 deletions(-)
 create mode 100644 examples/pix2pose/test_rotated_image.py

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 4ef476610..3617a7eec 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -500,8 +500,31 @@ def build_rotation_matrix_z(angle):
         Array (3, 3) rotation matrix in Z axis.
     """
     cos_angle = np.cos(angle)
-    sin_angle = np.cos(angle)
+    sin_angle = np.sin(angle)
     rotation_matrix_z = np.array([[+cos_angle, -sin_angle, 0.0],
                                   [+sin_angle, +cos_angle, 0.0],
                                   [0.0, 0.0, 1.0]])
     return rotation_matrix_z
+
+
+def rotate_image(image, rotation_matrix, epsilon=1e-4):
+    """Rotates an image with a symmetry.
+    # Arguments
+        image: Array (H, W, 3) with domain [0, 255].
+        rotation_matrix: Array (3, 3).
+    """
+    mask_image = np.sum(image, axis=-1, keepdims=True)
+    mask_image = mask_image != 0
+    # mask_image = np.repeat(mask_image, 3, axis=-1)
+
+    image = image_to_normalized_device_coordinates(image)
+    # image_colors = (image * 2) - 1
+
+    # rotated_image = image + epsilon
+    rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image)
+    image = normalized_device_coordinates_to_image(rotated_image)
+    # rotated_image = (rotated_image + 1) / 2
+
+    # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
+    # rotated_image = rotated_image * mask_image
+    return rotated_image
diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py
new file mode 100644
index 000000000..c71343a79
--- /dev/null
+++ b/examples/pix2pose/test_rotated_image.py
@@ -0,0 +1,71 @@
+import numpy as np
+import os
+import glob
+from paz.backend.image import show_image
+
+from backend import build_rotation_matrix_z
+from backend import normalized_device_coordinates_to_image
+from backend import image_to_normalized_device_coordinates
+from scenes import PixelMaskRenderer
+
+scale = 4
+image_shape = [128 * scale, 128 * scale, 3]
+root_path = os.path.expanduser('~')
+background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
+background_wildcard = os.path.join(root_path, background_wildcard)
+image_paths = glob.glob(background_wildcard)
+
+path_OBJ = 'single_solar_panel_02.obj'
+path_OBJ = os.path.join(root_path, path_OBJ)
+num_occlusions = 1
+viewport_size = image_shape[:2]
+y_fov = 3.14159 / 4.0
+distance = [1.0, 1.0]
+light = [1.0, 30]
+top_only = False
+roll = 3.14159
+shift = 0.05
+
+renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
+                             light, top_only, roll, shift)
+
+
+def rotate_image(image, rotation_matrix, epsilon=1e-4):
+    mask_image = np.sum(image, axis=-1, keepdims=True)
+    mask_image = mask_image != 0
+
+    image = image_to_normalized_device_coordinates(image)
+    # image = image / 255.0
+    print(image.min(), image.max())
+    # image = (image * 2) - 1
+
+    # rotated_image = image + epsilon
+    rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image)
+    rotated_image = normalized_device_coordinates_to_image(rotated_image)
+    # rotated_image = (rotated_image + 1) / 2
+    # print(rotated_image.min(), rotated_image.max())
+
+    # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=1.0)
+    rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
+    # rotated_image = rotated_image * 255.0
+    rotated_image = rotated_image * mask_image
+    return rotated_image
+
+
+image, alpha, RGB_mask = renderer.render()
+RGB_mask = RGB_mask[..., 0:3]
+show_image(image)
+show_image(RGB_mask)
+angles = np.linspace(0, 2 * np.pi, 7)
+images = []
+for angle in angles:
+    print('-' * 40)
+    print('angle', angle)
+    rotation_matrix = build_rotation_matrix_z(angle)
+    print(rotation_matrix)
+    rotated_image = rotate_image(RGB_mask, rotation_matrix)
+    rotated_image = rotated_image.astype('uint8')
+    images.append(rotated_image)
+    # show_image(rotated_image)
+images = np.concatenate(images, axis=1)
+show_image(images)
diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py
index ae9f078ea..4475ffc83 100644
--- a/examples/pix2pose/train_symmetric.py
+++ b/examples/pix2pose/train_symmetric.py
@@ -33,7 +33,7 @@
 filters = 16
 num_classes = 3
 learning_rate = 0.001
-max_num_epochs = 10
+max_num_epochs = 5
 beta = 3.0
 steps_per_epoch = 1000
 H, W, num_channels = image_shape = [128, 128, 3]
@@ -70,3 +70,5 @@
     epochs=max_num_epochs,
     verbose=1,
     workers=0)
+
+model.save_weights('UNET_VGG_symmetric_weights.hdf5')

From e9ac4bb54273c710db625d29c777aef1b53cf462 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 23 Nov 2021 14:59:08 +0100
Subject: [PATCH 058/101] Add python rotate image function

---
 examples/pix2pose/backend.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 3617a7eec..7a2190219 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -507,24 +507,19 @@ def build_rotation_matrix_z(angle):
     return rotation_matrix_z
 
 
-def rotate_image(image, rotation_matrix, epsilon=1e-4):
+def rotate_image(image, rotation_matrix):
     """Rotates an image with a symmetry.
     # Arguments
         image: Array (H, W, 3) with domain [0, 255].
         rotation_matrix: Array (3, 3).
-    """
-    mask_image = np.sum(image, axis=-1, keepdims=True)
-    mask_image = mask_image != 0
-    # mask_image = np.repeat(mask_image, 3, axis=-1)
 
+    # Returns
+        Array (H, W, 3) with domain [0, 255]
+    """
+    mask_image = np.sum(image, axis=-1, keepdims=True) != 0
     image = image_to_normalized_device_coordinates(image)
-    # image_colors = (image * 2) - 1
-
-    # rotated_image = image + epsilon
     rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image)
-    image = normalized_device_coordinates_to_image(rotated_image)
-    # rotated_image = (rotated_image + 1) / 2
-
-    # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
-    # rotated_image = rotated_image * mask_image
+    rotated_image = normalized_device_coordinates_to_image(rotated_image)
+    rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
+    rotated_image = rotated_image * mask_image
     return rotated_image

From 31b063b73ace10509987b75da082585f7d6cc5e8 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 23 Nov 2021 14:59:44 +0100
Subject: [PATCH 059/101] Refactor symmetric loss with based on rotate_image
 backend function

---
 examples/pix2pose/loss.py | 40 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index c408ab882..d22325c1c 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -88,10 +88,16 @@ def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
     return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True)
 
 
-def to_normalized_device_coordinates(image):
+def normalized_image_to_normalized_device_coordinates(image):
     """Map image value from [0, 1] -> [-1, 1].
     """
-    return (image * 2) - 1.0
+    return (image * 2.0) - 1.0
+
+
+def normalized_device_coordinates_to_normalized_image(image):
+    """Map image value from [0, 1] -> [-1, 1].
+    """
+    return (image + 1.0) / 2.0
 
 
 def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
@@ -104,6 +110,34 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
         rotations: Array (num_symmetries, 3, 3). Rotation matrices
             that when applied lead to the same object view.
 
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+    """
+    RGB_true, alpha = split_alpha_mask(RGBA_true)
+    RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true)
+    symmetric_losses = []
+    for rotation in rotations:
+        RGB_true = tf.einsum('ij,bklj->bkli', rotation, RGB_true)
+        RGB_true = normalized_device_coordinates_to_normalized_image(RGB_true)
+        RGB_true = tf.concat([RGB_true, alpha], axis=3)
+        loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
+        loss = tf.expand_dims(loss, -1)
+        symmetric_losses.append(loss)
+    symmetric_losses = tf.concat(symmetric_losses, axis=-1)
+    minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1)
+    return minimum_symmetric_loss
+
+
+def compute_weighted_symmetric_loss2(RGBA_true, RGB_pred, rotations, beta=3.0):
+    """Computes the mininum of all rotated L1 reconstruction losses weighting
+        the positive alpha mask values in the predicted RGB image by beta.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
+        rotations: Array (num_symmetries, 3, 3). Rotation matrices
+            that when applied lead to the same object view.
+
     # Returns
         Tensor [batch, H, W] with weighted reconstruction loss values.
     """
@@ -118,7 +152,7 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
     symmetric_losses = []
     for rotation in rotations:
         # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T
-        RGB_true_symmetric = tf.einsum('ij,bpj->bpi', rotation, RGB_true)
+        RGB_true_symmetric = tf.einsum('ij,klj->kli', rotation, RGB_true)
         RGB_true_symmetric = tf.reshape(RGB_true_symmetric, (batch_size, H, W, num_channels))
         RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3)
         symmetric_loss = compute_weighted_reconstruction_loss(

From 0ec6aa09d87d398bb8e780f34fd217c995dfa697 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 23 Nov 2021 15:00:21 +0100
Subject: [PATCH 060/101] Update training scripts

---
 examples/pix2pose/test_rotated_image.py | 59 +++++++------------------
 examples/pix2pose/train_symmetric.py    | 13 +++---
 2 files changed, 23 insertions(+), 49 deletions(-)

diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py
index c71343a79..8383744c2 100644
--- a/examples/pix2pose/test_rotated_image.py
+++ b/examples/pix2pose/test_rotated_image.py
@@ -3,7 +3,7 @@
 import glob
 from paz.backend.image import show_image
 
-from backend import build_rotation_matrix_z
+from backend import build_rotation_matrix_z, rotate_image
 from backend import normalized_device_coordinates_to_image
 from backend import image_to_normalized_device_coordinates
 from scenes import PixelMaskRenderer
@@ -28,44 +28,19 @@
 
 renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
                              light, top_only, roll, shift)
-
-
-def rotate_image(image, rotation_matrix, epsilon=1e-4):
-    mask_image = np.sum(image, axis=-1, keepdims=True)
-    mask_image = mask_image != 0
-
-    image = image_to_normalized_device_coordinates(image)
-    # image = image / 255.0
-    print(image.min(), image.max())
-    # image = (image * 2) - 1
-
-    # rotated_image = image + epsilon
-    rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image)
-    rotated_image = normalized_device_coordinates_to_image(rotated_image)
-    # rotated_image = (rotated_image + 1) / 2
-    # print(rotated_image.min(), rotated_image.max())
-
-    # rotated_image = np.clip(rotated_image, a_min=0.0, a_max=1.0)
-    rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
-    # rotated_image = rotated_image * 255.0
-    rotated_image = rotated_image * mask_image
-    return rotated_image
-
-
-image, alpha, RGB_mask = renderer.render()
-RGB_mask = RGB_mask[..., 0:3]
-show_image(image)
-show_image(RGB_mask)
-angles = np.linspace(0, 2 * np.pi, 7)
-images = []
-for angle in angles:
-    print('-' * 40)
-    print('angle', angle)
-    rotation_matrix = build_rotation_matrix_z(angle)
-    print(rotation_matrix)
-    rotated_image = rotate_image(RGB_mask, rotation_matrix)
-    rotated_image = rotated_image.astype('uint8')
-    images.append(rotated_image)
-    # show_image(rotated_image)
-images = np.concatenate(images, axis=1)
-show_image(images)
+renderer.scene.ambient_light = [1.0, 1.0, 1.0]
+
+for _ in range(3):
+    image, alpha, RGB_mask = renderer.render()
+    RGB_mask = RGB_mask[..., 0:3]
+    show_image(image)
+    show_image(RGB_mask)
+    angles = np.linspace(0, 2 * np.pi, 7)[0:6]
+    images = []
+    for angle in angles:
+        rotation_matrix = build_rotation_matrix_z(angle)
+        rotated_image = rotate_image(RGB_mask, rotation_matrix)
+        rotated_image = rotated_image.astype('uint8')
+        images.append(rotated_image)
+    images = np.concatenate(images, axis=1)
+    show_image(images)
diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py
index 4475ffc83..cd6f85376 100644
--- a/examples/pix2pose/train_symmetric.py
+++ b/examples/pix2pose/train_symmetric.py
@@ -41,22 +41,21 @@
 
 renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
                              light, top_only, roll, shift)
+# check why this is needed in this object
+renderer.scene.ambient_light = [1.0, 1.0, 1.0]
 
 inputs_to_shape = {'input_1': [H, W, num_channels]}
 labels_to_shape = {'masks': [H, W, 4]}
+
 processor = DomainRandomization(
     renderer, image_shape, image_paths, inputs_to_shape,
     labels_to_shape, num_occlusions)
 
-
 sequence = GeneratingSequence(processor, batch_size, steps_per_epoch)
 
-angles = np.linspace(0, 2 * np.pi, 6)
-rotations = []
-for angle in angles:
-    rotations.append(build_rotation_matrix_z(angle))
-rotations = np.array(rotations)
-
+# build all symmetric rotations for solar pannel
+angles = np.linspace(0, 2 * np.pi, 7)[:6]
+rotations = np.array([build_rotation_matrix_z(angle) for angle in angles])
 
 loss = WeightedSymmetricReconstruction(rotations, beta)
 

From a36763b2d1b952a661047b89b9a469aabe17fd79 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 25 Nov 2021 16:43:50 +0100
Subject: [PATCH 061/101] Add canonical coloring scheme scene

---
 examples/pix2pose/canonical_coloring.py | 254 ++++++++++++++++++++++++
 1 file changed, 254 insertions(+)
 create mode 100644 examples/pix2pose/canonical_coloring.py

diff --git a/examples/pix2pose/canonical_coloring.py b/examples/pix2pose/canonical_coloring.py
new file mode 100644
index 000000000..bc2f23202
--- /dev/null
+++ b/examples/pix2pose/canonical_coloring.py
@@ -0,0 +1,254 @@
+import numpy as np
+from backend import build_rotation_matrix_y
+from paz.backend.render import sample_uniformly, split_alpha_channel
+from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight,
+                      RenderFlags, Mesh, Scene)
+import trimesh
+from coloring import color_object
+from backend import quaternion_to_rotation_matrix
+from backend import to_affine_matrix
+
+
+def sample_uniform(min_value, max_value):
+    """Samples values inside segment [min_value, max_value)
+
+    # Arguments
+        segment_limits: List (2) containing min and max segment values.
+
+    # Returns
+        Float inside segment [min_value, max_value]
+    """
+    if min_value > max_value:
+        raise ValueError('First value must be lower than second value')
+    value = np.random.uniform(min_value, max_value)
+    return value
+
+
+def sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D):
+    """ Samples points inside a 3D box defined by the
+        width, height and depth limits.
+                    ________
+                   /       /|
+                  /       / |
+                 /       /  |
+                /_______/   /
+         |      |       |  /   /
+       height   |       | / depth
+         |      |_______|/   /
+
+                --widht--
+
+    # Arguments
+        width_limits: List (2) with [min_value_width, max_value_width].
+        height_limits: List (2) with [min_value_height, max_value_height].
+        depth_limits: List (2) with [min_value_depth, max_value_depth].
+
+    # Returns
+        Array (3) of point inside the 3D box.
+    """
+    W = sample_uniform(min_W, max_W)
+    H = sample_uniform(min_H, max_H)
+    D = sample_uniform(min_D, max_D)
+    box_point3D = np.array([W, H, D])
+    return box_point3D
+
+
+def sample_random_rotation_matrix2():
+    """Samples SO3 in rotation matrix form.
+
+    # Return
+        Array (3, 3).
+
+    # References
+        [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/
+            12/random-rotation-matrix.html)
+        [real-time rendering](from http://www.realtimerendering.com/resources/
+            GraphicsGems/gemsiii/rand_rotation.c)
+    """
+    theta = 2.0 * np.pi * np.random.uniform()
+    phi = 2.0 * np.pi * np.random.uniform()
+    z = 2.0 * np.random.uniform()
+    # Compute a vector V used for distributing points over the sphere via the
+    # reflection I - V Transpose(V).
+    # This formulation of V will guarantee that if x[1] and x[2] are uniformly
+    # distributed, the reflected points will be uniform on the sphere.
+    # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix
+    r = np.sqrt(z)
+    random_vector = np.array([np.sin(phi) * r,
+                              np.cos(phi) * r,
+                              np.sqrt(2.0 - z)])
+    sin_theta = np.sin(theta)
+    cos_theta = np.cos(theta)
+    R = np.array([[+cos_theta, +sin_theta, 0.0],
+                  [-sin_theta, +cos_theta, 0.0],
+                  [0.0, 0.0, 1.0]])
+    random_rotation_matrix = (
+        np.outer(random_vector, random_vector) - np.eye(3)).dot(R)
+    return random_rotation_matrix
+
+
+def sample_random_rotation_matrix():
+    quaternion = np.random.rand(4)
+    quaternion = quaternion / np.linalg.norm(quaternion)
+    rotation_matrix = quaternion_to_rotation_matrix(quaternion)
+    return rotation_matrix
+
+
+def sample_random_rotation_matrix3():
+    epsilon = 0.1
+    x_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon)
+    y_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon)
+    z_angle = np.random.uniform(np.pi, -np.pi)
+
+    x_matrix = build_rotation_matrix_x(x_angle)
+    y_matrix = build_rotation_matrix_y(y_angle)
+    z_matrix = build_rotation_matrix_z(z_angle)
+
+    rotation_matrix = np.dot(z_matrix, np.dot(y_matrix, x_matrix))
+    return rotation_matrix
+
+
+def sample_affine_transform(min_corner, max_corner):
+    min_W, min_H, min_D = min_corner
+    max_W, max_H, max_D = max_corner
+    translation = sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D)
+    rotation_matrix = sample_random_rotation_matrix3()
+    affine_matrix = to_affine_matrix(rotation_matrix, translation)
+    return affine_matrix
+
+
+class CanonicalScene():
+    def __init__(self, path_OBJ, camera_pose, min_corner, max_corner,
+                 symmetric_transforms,
+                 viewport_size=(128, 128), y_fov=3.14159 / 4.0,
+                 light_intensity=[0.5, 30]):
+        self.light_intensity = light_intensity
+        self.symmetric_transforms = symmetric_transforms
+        self.min_corner, self.max_corner = min_corner, max_corner
+        self.scene = Scene(bg_color=[0, 0, 0, 0])
+        self.light = self._build_light(light_intensity, camera_pose)
+        self.camera = self._build_camera(y_fov, viewport_size, camera_pose)
+        self.pixel_mesh = self.scene.add(color_object(path_OBJ))
+        self.mesh = self.scene.add(
+            Mesh.from_trimesh(trimesh.load(path_OBJ), smooth=True))
+
+        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
+
+        self.flags_RGBA = RenderFlags.RGBA
+        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
+
+    def _build_light(self, light, pose):
+        directional_light = DirectionalLight([1.0, 1.0, 1.0], np.mean(light))
+        directional_light = self.scene.add(directional_light, pose=pose)
+        return directional_light
+
+    def _build_camera(self, y_fov, viewport_size, pose):
+        aspect_ratio = np.divide(*viewport_size)
+        camera = PerspectiveCamera(y_fov, aspectRatio=aspect_ratio)
+        camera = self.scene.add(camera, pose=pose)
+        return camera
+
+    def _sample_parameters(self, min_corner, max_corner):
+        mesh_transform = sample_affine_transform(min_corner, max_corner)
+        light_intensity = sample_uniformly(self.light_intensity)
+        return mesh_transform, light_intensity
+
+    def render(self):
+        mesh_transform, light_intensity = self._sample_parameters(
+            self.min_corner, self.max_corner)
+        mesh_rotation = mesh_transform[0:3, 0:3]
+        canonical_rotation = calculate_canonical_rotation(
+            mesh_rotation, self.symmetric_transforms)
+        # mesh_rotation[0:3, 0:3] = canonical_rotation
+        canonical_rotation = np.dot(mesh_rotation, canonical_rotation)
+        mesh_rotation[0:3, 0:3] = canonical_rotation
+        self.scene.set_pose(self.mesh, mesh_transform)
+        self.scene.set_pose(self.pixel_mesh, mesh_transform)
+        self.light.light.intensity = light_intensity
+
+        self.pixel_mesh.mesh.is_visible = False
+        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
+        self.pixel_mesh.mesh.is_visible = True
+        image, alpha = split_alpha_channel(image)
+        self.mesh.mesh.is_visible = False
+        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
+        self.mesh.mesh.is_visible = True
+        return image, alpha, RGB_mask
+
+    def render_symmetries(self):
+        images, alphas, RGB_masks = [], [], []
+        for rotation in self.symmetric_transforms:
+            symmetric_transform = to_affine_matrix(rotation, np.zeros(3))
+            self.scene.set_pose(self.mesh, symmetric_transform)
+            self.scene.set_pose(self.pixel_mesh, symmetric_transform)
+            self.pixel_mesh.mesh.is_visible = False
+            image, depth = self.renderer.render(self.scene, self.flags_RGBA)
+            self.pixel_mesh.mesh.is_visible = True
+            image, alpha = split_alpha_channel(image)
+            self.mesh.mesh.is_visible = False
+            RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
+            self.mesh.mesh.is_visible = True
+            images.append(image)
+            alphas.append(alpha)
+            RGB_masks.append(RGB_mask[..., 0:3])
+        images = np.concatenate(images, axis=1)
+        RGB_masks = np.concatenate(RGB_masks, axis=1)
+        print(images.shape)
+        print(RGB_masks.shape)
+        images = np.concatenate([images, RGB_masks], axis=0)
+        return images
+
+
+def compute_norm_SO3(rotation_mesh, rotation):
+    difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3)
+    distance = np.linalg.norm(difference, ord='fro')
+    return distance
+
+
+def calculate_canonical_rotation(rotation_mesh, rotations):
+    norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
+    closest_rotation_arg = np.argmin(norms)
+    print(closest_rotation_arg)
+    closest_rotation = rotations[closest_rotation_arg]
+    canonical_rotation = np.linalg.inv(closest_rotation)
+    return canonical_rotation
+
+
+if __name__ == "__main__":
+    import os
+    from paz.backend.image import show_image
+    from backend import build_rotation_matrix_z
+    from backend import build_rotation_matrix_x
+    from backend import build_rotation_matrix_y
+    path_OBJ = 'single_solar_panel_02.obj'
+    root_path = os.path.expanduser('~')
+    path_OBJ = os.path.join(root_path, path_OBJ)
+    num_occlusions = 1
+    image_shape = (128, 128, 3)
+    viewport_size = image_shape[:2]
+    y_fov = 3.14159 / 4.0
+    distance = [1.0, 1.0]
+    light = [1.0, 30]
+
+    # min_corner = [-0.1, -0.1, -0.0]
+    # max_corner = [+0.1, +0.1, +0.4]
+    angles = np.linspace(0, 2 * np.pi, 7)[:6]
+    symmetric_rotations = np.array([build_rotation_matrix_z(angle) for angle in angles])
+    min_corner = [0.0, 0.0, -0.4]
+    max_corner = [0.0, 0.0, +0.0]
+    # translation = np.array([0.0, 0.0, 1.0])
+    translation = np.array([0.0, 0.0, 1.0])
+    camera_rotation = np.eye(3)
+    camera_rotation = build_rotation_matrix_x(np.pi)
+    translation = np.array([0.0, 0.0, -1.0])
+    camera_pose = to_affine_matrix(camera_rotation, translation)
+    scene = CanonicalScene(path_OBJ, camera_pose, min_corner, max_corner, symmetric_rotations)
+    from pyrender import Viewer
+    Viewer(scene.scene)
+    scene.scene.ambient_light = [1.0, 1.0, 1.0]
+    image = scene.render_symmetries()
+    show_image(image)
+    for _ in range(100):
+        image, alpha, RGB_mask = scene.render()
+        show_image(image)
+        show_image(RGB_mask[:, :, 0:3])

From 2a86373db94ce4c7889db7cda9aa7164c6f7906e Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 25 Nov 2021 16:44:11 +0100
Subject: [PATCH 062/101] Add rotation build matrices

---
 examples/pix2pose/backend.py | 37 ++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 7a2190219..350947db9 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -494,8 +494,10 @@ def normalized_device_coordinates_to_image(image):
 
 def build_rotation_matrix_z(angle):
     """Builds rotation matrix in Z axis.
+
     # Arguments
         angle: Float. Angle in radians.
+
     # Return
         Array (3, 3) rotation matrix in Z axis.
     """
@@ -507,8 +509,43 @@ def build_rotation_matrix_z(angle):
     return rotation_matrix_z
 
 
+def build_rotation_matrix_x(angle):
+    """Builds rotation matrix in X axis.
+
+    # Arguments
+        angle: Float. Angle in radians.
+
+    # Return
+        Array (3, 3) rotation matrix in Z axis.
+    """
+    cos_angle = np.cos(angle)
+    sin_angle = np.sin(angle)
+    rotation_matrix_x = np.array([[1.0, 0.0, 0.0],
+                                  [0.0, +cos_angle, -sin_angle],
+                                  [0.0, +sin_angle, +cos_angle]])
+    return rotation_matrix_x
+
+
+def build_rotation_matrix_y(angle):
+    """Builds rotation matrix in Y axis.
+
+    # Arguments
+        angle: Float. Angle in radians.
+
+    # Return
+        Array (3, 3) rotation matrix in Z axis.
+    """
+    cos_angle = np.cos(angle)
+    sin_angle = np.sin(angle)
+    rotation_matrix_y = np.array([[+cos_angle, 0.0, +sin_angle],
+                                  [0.0, 1.0, 0.0],
+                                  [-sin_angle, 0.0, +cos_angle]])
+    return rotation_matrix_y
+
+
 def rotate_image(image, rotation_matrix):
     """Rotates an image with a symmetry.
+
     # Arguments
         image: Array (H, W, 3) with domain [0, 255].
         rotation_matrix: Array (3, 3).

From 86bec271787a6af0a229435a2e78a42386d50be1 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 25 Nov 2021 19:58:45 +0100
Subject: [PATCH 063/101] Add training script for canonical pose estimation

---
 examples/pix2pose/canonical_coloring.py | 75 +++++++++++++++++++++----
 1 file changed, 63 insertions(+), 12 deletions(-)

diff --git a/examples/pix2pose/canonical_coloring.py b/examples/pix2pose/canonical_coloring.py
index bc2f23202..0a9f74f7e 100644
--- a/examples/pix2pose/canonical_coloring.py
+++ b/examples/pix2pose/canonical_coloring.py
@@ -193,8 +193,8 @@ def render_symmetries(self):
             RGB_masks.append(RGB_mask[..., 0:3])
         images = np.concatenate(images, axis=1)
         RGB_masks = np.concatenate(RGB_masks, axis=1)
-        print(images.shape)
-        print(RGB_masks.shape)
+        # print(images.shape)
+        # print(RGB_masks.shape)
         images = np.concatenate([images, RGB_masks], axis=0)
         return images
 
@@ -208,7 +208,7 @@ def compute_norm_SO3(rotation_mesh, rotation):
 def calculate_canonical_rotation(rotation_mesh, rotations):
     norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
     closest_rotation_arg = np.argmin(norms)
-    print(closest_rotation_arg)
+    # print(closest_rotation_arg)
     closest_rotation = rotations[closest_rotation_arg]
     canonical_rotation = np.linalg.inv(closest_rotation)
     return canonical_rotation
@@ -219,7 +219,7 @@ def calculate_canonical_rotation(rotation_mesh, rotations):
     from paz.backend.image import show_image
     from backend import build_rotation_matrix_z
     from backend import build_rotation_matrix_x
-    from backend import build_rotation_matrix_y
+    # from backend import build_rotation_matrix_y
     path_OBJ = 'single_solar_panel_02.obj'
     root_path = os.path.expanduser('~')
     path_OBJ = os.path.join(root_path, path_OBJ)
@@ -233,7 +233,8 @@ def calculate_canonical_rotation(rotation_mesh, rotations):
     # min_corner = [-0.1, -0.1, -0.0]
     # max_corner = [+0.1, +0.1, +0.4]
     angles = np.linspace(0, 2 * np.pi, 7)[:6]
-    symmetric_rotations = np.array([build_rotation_matrix_z(angle) for angle in angles])
+    symmetric_rotations = np.array(
+        [build_rotation_matrix_z(angle) for angle in angles])
     min_corner = [0.0, 0.0, -0.4]
     max_corner = [0.0, 0.0, +0.0]
     # translation = np.array([0.0, 0.0, 1.0])
@@ -242,13 +243,63 @@ def calculate_canonical_rotation(rotation_mesh, rotations):
     camera_rotation = build_rotation_matrix_x(np.pi)
     translation = np.array([0.0, 0.0, -1.0])
     camera_pose = to_affine_matrix(camera_rotation, translation)
-    scene = CanonicalScene(path_OBJ, camera_pose, min_corner, max_corner, symmetric_rotations)
-    from pyrender import Viewer
-    Viewer(scene.scene)
-    scene.scene.ambient_light = [1.0, 1.0, 1.0]
-    image = scene.render_symmetries()
+    renderer = CanonicalScene(path_OBJ, camera_pose, min_corner,
+                              max_corner, symmetric_rotations)
+    # from pyrender import Viewer
+    # Viewer(scene.scene)
+    renderer.scene.ambient_light = [1.0, 1.0, 1.0]
+    image = renderer.render_symmetries()
     show_image(image)
-    for _ in range(100):
-        image, alpha, RGB_mask = scene.render()
+    for _ in range(0):
+        image, alpha, RGB_mask = renderer.render()
         show_image(image)
         show_image(RGB_mask[:, :, 0:3])
+
+    from pipelines import DomainRandomization
+    from paz.abstract.sequence import GeneratingSequence
+    from loss import WeightedReconstruction
+    from paz.models import UNET_VGG16
+    from tensorflow.keras.optimizers import Adam
+    from metrics import mean_squared_error
+    import glob
+
+    background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
+    background_wildcard = os.path.join(root_path, background_wildcard)
+    image_paths = glob.glob(background_wildcard)
+
+    H, W, num_channels = image_shape
+    batch_size = 32
+    steps_per_epoch = 1000
+    beta = 3.0
+    num_classes = 3
+    learning_rate = 0.001
+    max_num_epochs = 5
+
+    inputs_to_shape = {'input_1': [H, W, num_channels]}
+    labels_to_shape = {'masks': [H, W, 4]}
+
+    processor = DomainRandomization(
+        renderer, image_shape, image_paths, inputs_to_shape,
+        labels_to_shape, num_occlusions)
+
+    sequence = GeneratingSequence(processor, batch_size, steps_per_epoch)
+
+    # build all symmetric rotations for solar pannel
+    angles = np.linspace(0, 2 * np.pi, 7)[:6]
+    rotations = np.array([build_rotation_matrix_z(angle) for angle in angles])
+
+    # loss = WeightedSymmetricReconstruction(rotations, beta)
+    loss = WeightedReconstruction(beta)
+
+    model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+    optimizer = Adam(learning_rate)
+
+    model.compile(optimizer, loss, mean_squared_error)
+
+    model.fit(
+        sequence,
+        epochs=max_num_epochs,
+        verbose=1,
+        workers=0)
+    model.save_weights('UNET-VGG_solar_panel_canonical.hdf5')
+

From 0aa168bfcaa94ff466196ff4045cd472ab400515 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 26 Nov 2021 19:43:38 +0100
Subject: [PATCH 064/101] Move canonical functions to backend

---
 examples/pix2pose/backend.py | 111 +++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 350947db9..4fd5b4bc8 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -560,3 +560,114 @@ def rotate_image(image, rotation_matrix):
     rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
     rotated_image = rotated_image * mask_image
     return rotated_image
+
+
+def sample_uniform(min_value, max_value):
+    """Samples values inside segment [min_value, max_value)
+
+    # Arguments
+        segment_limits: List (2) containing min and max segment values.
+
+    # Returns
+        Float inside segment [min_value, max_value]
+    """
+    if min_value > max_value:
+        raise ValueError('First value must be lower than second value')
+    value = np.random.uniform(min_value, max_value)
+    return value
+
+
+def sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D):
+    """ Samples points inside a 3D box defined by the
+        width, height and depth limits.
+                    ________
+                   /       /|
+                  /       / |
+                 /       /  |
+                /_______/   /
+         |      |       |  /   /
+       height   |       | / depth
+         |      |_______|/   /
+
+                --widht--
+
+    # Arguments
+        width_limits: List (2) with [min_value_width, max_value_width].
+        height_limits: List (2) with [min_value_height, max_value_height].
+        depth_limits: List (2) with [min_value_depth, max_value_depth].
+
+    # Returns
+        Array (3) of point inside the 3D box.
+    """
+    W = sample_uniform(min_W, max_W)
+    H = sample_uniform(min_H, max_H)
+    D = sample_uniform(min_D, max_D)
+    box_point3D = np.array([W, H, D])
+    return box_point3D
+
+
+def sample_front_rotation_matrix(epsilon=0.1):
+    x_angle = np.random.uniform((-np.pi / 2.0) + epsilon,
+                                (np.pi / 2.0) - epsilon)
+    y_angle = np.random.uniform((-np.pi / 2.0) + epsilon,
+                                (np.pi / 2.0) - epsilon)
+    z_angle = np.random.uniform(np.pi, -np.pi)
+
+    x_matrix = build_rotation_matrix_x(x_angle)
+    y_matrix = build_rotation_matrix_y(y_angle)
+    z_matrix = build_rotation_matrix_z(z_angle)
+
+    rotation_matrix = np.dot(z_matrix, np.dot(y_matrix, x_matrix))
+    return rotation_matrix
+
+
+def sample_affine_transform(min_corner, max_corner):
+    min_W, min_H, min_D = min_corner
+    max_W, max_H, max_D = max_corner
+    translation = sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D)
+    rotation_matrix = sample_front_rotation_matrix()
+    affine_matrix = to_affine_matrix(rotation_matrix, translation)
+    return affine_matrix
+
+
+def sample_random_rotation_matrix():
+    """Samples SO3 in rotation matrix form.
+
+    # Return
+        Array (3, 3).
+
+    # References
+        [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/
+            12/random-rotation-matrix.html)
+        [real-time rendering](from http://www.realtimerendering.com/resources/
+            GraphicsGems/gemsiii/rand_rotation.c)
+    """
+    theta = 2.0 * np.pi * np.random.uniform()
+    phi = 2.0 * np.pi * np.random.uniform()
+    z = 2.0 * np.random.uniform()
+    # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix
+    r = np.sqrt(z)
+    random_vector = np.array(
+        [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)])
+    sin_theta = np.sin(theta)
+    cos_theta = np.cos(theta)
+    R = np.array([[+cos_theta, +sin_theta, 0.0],
+                  [-sin_theta, +cos_theta, 0.0],
+                  [0.0, 0.0, 1.0]])
+    random_rotation_matrix = (
+        np.outer(random_vector, random_vector) - np.eye(3)).dot(R)
+    return random_rotation_matrix
+
+
+def compute_norm_SO3(rotation_mesh, rotation):
+    difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3)
+    distance = np.linalg.norm(difference, ord='fro')
+    return distance
+
+
+def calculate_canonical_rotation(rotation_mesh, rotations):
+    norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
+    closest_rotation_arg = np.argmin(norms)
+    closest_rotation = rotations[closest_rotation_arg]
+    canonical_rotation = np.linalg.inv(closest_rotation)
+    return canonical_rotation

From 049f3dfa99eb8d9e455b6994a993c50682bdf895 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 26 Nov 2021 19:44:21 +0100
Subject: [PATCH 065/101] Fix bug with tensor name being overwritten

---
 examples/pix2pose/loss.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index d22325c1c..d171df124 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -117,10 +117,12 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
     RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true)
     symmetric_losses = []
     for rotation in rotations:
-        RGB_true = tf.einsum('ij,bklj->bkli', rotation, RGB_true)
-        RGB_true = normalized_device_coordinates_to_normalized_image(RGB_true)
-        RGB_true = tf.concat([RGB_true, alpha], axis=3)
-        loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
+        RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true)
+        RGB_true_rotated = normalized_device_coordinates_to_normalized_image(RGB_true_rotated)
+        RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0)
+        RGB_true_rotated = RGB_true_rotated * alpha
+        RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3)
+        loss = compute_weighted_reconstruction_loss(RGBA_true_rotated, RGB_pred, beta)
         loss = tf.expand_dims(loss, -1)
         symmetric_losses.append(loss)
     symmetric_losses = tf.concat(symmetric_losses, axis=-1)

From 6dac02b5ca26a515c26d9aaecc3bd200cca04ffa Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 26 Nov 2021 19:44:49 +0100
Subject: [PATCH 066/101] Add scene for canonical discrete transformations

---
 examples/pix2pose/scenes.py | 82 +++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
index ea03b04ae..3b94a655c 100644
--- a/examples/pix2pose/scenes.py
+++ b/examples/pix2pose/scenes.py
@@ -6,6 +6,9 @@
                       RenderFlags, Mesh, Scene)
 import trimesh
 from coloring import color_object
+from backend import to_affine_matrix
+from backend import sample_affine_transform
+from backend import calculate_canonical_rotation
 
 
 class PixelMaskRenderer():
@@ -65,3 +68,82 @@ def render(self):
         RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
         self.mesh.mesh.is_visible = True
         return image, alpha, RGB_mask
+
+
+class CanonicalScene():
+    def __init__(self, path_OBJ, camera_pose, min_corner, max_corner,
+                 symmetric_transforms, viewport_size=(128, 128),
+                 y_fov=3.14159 / 4.0, light_intensity=[0.5, 30]):
+        self.light_intensity = light_intensity
+        self.symmetric_transforms = symmetric_transforms
+        self.min_corner, self.max_corner = min_corner, max_corner
+        self.scene = Scene(bg_color=[0, 0, 0, 0])
+        self.light = self._build_light(light_intensity, camera_pose)
+        self.camera = self._build_camera(y_fov, viewport_size, camera_pose)
+        self.pixel_mesh = self.scene.add(color_object(path_OBJ))
+        self.mesh = self.scene.add(
+            Mesh.from_trimesh(trimesh.load(path_OBJ), smooth=True))
+
+        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
+
+        self.flags_RGBA = RenderFlags.RGBA
+        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
+
+    def _build_light(self, light, pose):
+        directional_light = DirectionalLight([1.0, 1.0, 1.0], np.mean(light))
+        directional_light = self.scene.add(directional_light, pose=pose)
+        return directional_light
+
+    def _build_camera(self, y_fov, viewport_size, pose):
+        aspect_ratio = np.divide(*viewport_size)
+        camera = PerspectiveCamera(y_fov, aspectRatio=aspect_ratio)
+        camera = self.scene.add(camera, pose=pose)
+        return camera
+
+    def _sample_parameters(self, min_corner, max_corner):
+        mesh_transform = sample_affine_transform(min_corner, max_corner)
+        light_intensity = sample_uniformly(self.light_intensity)
+        return mesh_transform, light_intensity
+
+    def render(self):
+        mesh_transform, light_intensity = self._sample_parameters(
+            self.min_corner, self.max_corner)
+        mesh_rotation = mesh_transform[0:3, 0:3]
+        canonical_rotation = calculate_canonical_rotation(
+            mesh_rotation, self.symmetric_transforms)
+        # mesh_rotation[0:3, 0:3] = canonical_rotation
+        canonical_rotation = np.dot(mesh_rotation, canonical_rotation)
+        mesh_rotation[0:3, 0:3] = canonical_rotation
+        self.scene.set_pose(self.mesh, mesh_transform)
+        self.scene.set_pose(self.pixel_mesh, mesh_transform)
+        self.light.light.intensity = light_intensity
+
+        self.pixel_mesh.mesh.is_visible = False
+        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
+        self.pixel_mesh.mesh.is_visible = True
+        image, alpha = split_alpha_channel(image)
+        self.mesh.mesh.is_visible = False
+        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
+        self.mesh.mesh.is_visible = True
+        return image, alpha, RGB_mask
+
+    def render_symmetries(self):
+        images, alphas, RGB_masks = [], [], []
+        for rotation in self.symmetric_transforms:
+            symmetric_transform = to_affine_matrix(rotation, np.zeros(3))
+            self.scene.set_pose(self.mesh, symmetric_transform)
+            self.scene.set_pose(self.pixel_mesh, symmetric_transform)
+            self.pixel_mesh.mesh.is_visible = False
+            image, depth = self.renderer.render(self.scene, self.flags_RGBA)
+            self.pixel_mesh.mesh.is_visible = True
+            image, alpha = split_alpha_channel(image)
+            self.mesh.mesh.is_visible = False
+            RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
+            self.mesh.mesh.is_visible = True
+            images.append(image)
+            alphas.append(alpha)
+            RGB_masks.append(RGB_mask[..., 0:3])
+        images = np.concatenate(images, axis=1)
+        RGB_masks = np.concatenate(RGB_masks, axis=1)
+        images = np.concatenate([images, RGB_masks], axis=0)
+        return images

From 5a8083df88f8dffe2a41d82f828870d2609178aa Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Fri, 26 Nov 2021 19:47:53 +0100
Subject: [PATCH 067/101] Add training script for canonical transformation

---
 .../pix2pose/train_canonical_transform.py     | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 examples/pix2pose/train_canonical_transform.py

diff --git a/examples/pix2pose/train_canonical_transform.py b/examples/pix2pose/train_canonical_transform.py
new file mode 100644
index 000000000..c9387e77c
--- /dev/null
+++ b/examples/pix2pose/train_canonical_transform.py
@@ -0,0 +1,87 @@
+import os
+import glob
+
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from paz.backend.image import show_image
+from paz.models import UNET_VGG16
+from paz.abstract.sequence import GeneratingSequence
+
+from scenes import CanonicalScene
+from backend import build_rotation_matrix_z
+from backend import build_rotation_matrix_x
+from backend import to_affine_matrix
+from pipelines import DomainRandomization
+from loss import WeightedReconstruction
+from metrics import mean_squared_error
+
+
+path_OBJ = 'single_solar_panel_02.obj'
+root_path = os.path.expanduser('~')
+path_OBJ = os.path.join(root_path, path_OBJ)
+num_occlusions = 1
+image_shape = (128, 128, 3)
+viewport_size = image_shape[:2]
+y_fov = 3.14159 / 4.0
+distance = [1.0, 1.0]
+light = [1.0, 30]
+
+angles = np.linspace(0, 2 * np.pi, 7)[:6]
+symmetric_rotations = np.array(
+    [build_rotation_matrix_z(angle) for angle in angles])
+min_corner = [0.0, 0.0, -0.4]
+max_corner = [0.0, 0.0, +0.0]
+camera_rotation = build_rotation_matrix_x(np.pi)
+translation = np.array([0.0, 0.0, -1.0])
+camera_pose = to_affine_matrix(camera_rotation, translation)
+renderer = CanonicalScene(path_OBJ, camera_pose, min_corner,
+                          max_corner, symmetric_rotations)
+# from pyrender import Viewer
+# Viewer(scene.scene)
+renderer.scene.ambient_light = [1.0, 1.0, 1.0]
+image = renderer.render_symmetries()
+show_image(image)
+for _ in range(100):
+    image, alpha, RGB_mask = renderer.render()
+    show_image(image)
+    show_image(RGB_mask[:, :, 0:3])
+
+background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
+background_wildcard = os.path.join(root_path, background_wildcard)
+image_paths = glob.glob(background_wildcard)
+
+H, W, num_channels = image_shape
+batch_size = 32
+steps_per_epoch = 1000
+beta = 3.0
+num_classes = 3
+learning_rate = 0.001
+max_num_epochs = 5
+
+inputs_to_shape = {'input_1': [H, W, num_channels]}
+labels_to_shape = {'masks': [H, W, 4]}
+
+processor = DomainRandomization(
+    renderer, image_shape, image_paths, inputs_to_shape,
+    labels_to_shape, num_occlusions)
+
+sequence = GeneratingSequence(processor, batch_size, steps_per_epoch)
+
+# build all symmetric rotations for solar pannel
+angles = np.linspace(0, 2 * np.pi, 7)[:6]
+rotations = np.array([build_rotation_matrix_z(angle) for angle in angles])
+
+loss = WeightedReconstruction(beta)
+
+model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+optimizer = Adam(learning_rate)
+
+model.compile(optimizer, loss, mean_squared_error)
+"""
+model.fit(
+    sequence,
+    epochs=max_num_epochs,
+    verbose=1,
+    workers=0)
+model.save_weights('UNET-VGG_solar_panel_canonical.hdf5')
+"""

From 9134957258e04f96f79eb5a45197a003ecd1e985 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Mon, 29 Nov 2021 09:59:17 +0100
Subject: [PATCH 068/101] Update training canonical scripts

---
 examples/pix2pose/backend.py                  |   1 +
 examples/pix2pose/canonical_coloring.py       | 305 ------------------
 examples/pix2pose/scenes.py                   |  86 +++++
 examples/pix2pose/test_rotated_image.py       | 110 +++++--
 .../pix2pose/train_canonical_transform.py     |  53 +--
 examples/pix2pose/train_symmetric.py          |  38 ++-
 6 files changed, 244 insertions(+), 349 deletions(-)
 delete mode 100644 examples/pix2pose/canonical_coloring.py

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 4fd5b4bc8..5555d33fc 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -668,6 +668,7 @@ def compute_norm_SO3(rotation_mesh, rotation):
 def calculate_canonical_rotation(rotation_mesh, rotations):
     norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
     closest_rotation_arg = np.argmin(norms)
+    # print(closest_rotation_arg)
     closest_rotation = rotations[closest_rotation_arg]
     canonical_rotation = np.linalg.inv(closest_rotation)
     return canonical_rotation
diff --git a/examples/pix2pose/canonical_coloring.py b/examples/pix2pose/canonical_coloring.py
deleted file mode 100644
index 0a9f74f7e..000000000
--- a/examples/pix2pose/canonical_coloring.py
+++ /dev/null
@@ -1,305 +0,0 @@
-import numpy as np
-from backend import build_rotation_matrix_y
-from paz.backend.render import sample_uniformly, split_alpha_channel
-from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight,
-                      RenderFlags, Mesh, Scene)
-import trimesh
-from coloring import color_object
-from backend import quaternion_to_rotation_matrix
-from backend import to_affine_matrix
-
-
-def sample_uniform(min_value, max_value):
-    """Samples values inside segment [min_value, max_value)
-
-    # Arguments
-        segment_limits: List (2) containing min and max segment values.
-
-    # Returns
-        Float inside segment [min_value, max_value]
-    """
-    if min_value > max_value:
-        raise ValueError('First value must be lower than second value')
-    value = np.random.uniform(min_value, max_value)
-    return value
-
-
-def sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D):
-    """ Samples points inside a 3D box defined by the
-        width, height and depth limits.
-                    ________
-                   /       /|
-                  /       / |
-                 /       /  |
-                /_______/   /
-         |      |       |  /   /
-       height   |       | / depth
-         |      |_______|/   /
-
-                --widht--
-
-    # Arguments
-        width_limits: List (2) with [min_value_width, max_value_width].
-        height_limits: List (2) with [min_value_height, max_value_height].
-        depth_limits: List (2) with [min_value_depth, max_value_depth].
-
-    # Returns
-        Array (3) of point inside the 3D box.
-    """
-    W = sample_uniform(min_W, max_W)
-    H = sample_uniform(min_H, max_H)
-    D = sample_uniform(min_D, max_D)
-    box_point3D = np.array([W, H, D])
-    return box_point3D
-
-
-def sample_random_rotation_matrix2():
-    """Samples SO3 in rotation matrix form.
-
-    # Return
-        Array (3, 3).
-
-    # References
-        [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/
-            12/random-rotation-matrix.html)
-        [real-time rendering](from http://www.realtimerendering.com/resources/
-            GraphicsGems/gemsiii/rand_rotation.c)
-    """
-    theta = 2.0 * np.pi * np.random.uniform()
-    phi = 2.0 * np.pi * np.random.uniform()
-    z = 2.0 * np.random.uniform()
-    # Compute a vector V used for distributing points over the sphere via the
-    # reflection I - V Transpose(V).
-    # This formulation of V will guarantee that if x[1] and x[2] are uniformly
-    # distributed, the reflected points will be uniform on the sphere.
-    # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix
-    r = np.sqrt(z)
-    random_vector = np.array([np.sin(phi) * r,
-                              np.cos(phi) * r,
-                              np.sqrt(2.0 - z)])
-    sin_theta = np.sin(theta)
-    cos_theta = np.cos(theta)
-    R = np.array([[+cos_theta, +sin_theta, 0.0],
-                  [-sin_theta, +cos_theta, 0.0],
-                  [0.0, 0.0, 1.0]])
-    random_rotation_matrix = (
-        np.outer(random_vector, random_vector) - np.eye(3)).dot(R)
-    return random_rotation_matrix
-
-
-def sample_random_rotation_matrix():
-    quaternion = np.random.rand(4)
-    quaternion = quaternion / np.linalg.norm(quaternion)
-    rotation_matrix = quaternion_to_rotation_matrix(quaternion)
-    return rotation_matrix
-
-
-def sample_random_rotation_matrix3():
-    epsilon = 0.1
-    x_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon)
-    y_angle = np.random.uniform((-np.pi / 2.0) + epsilon, (np.pi / 2.0) - epsilon)
-    z_angle = np.random.uniform(np.pi, -np.pi)
-
-    x_matrix = build_rotation_matrix_x(x_angle)
-    y_matrix = build_rotation_matrix_y(y_angle)
-    z_matrix = build_rotation_matrix_z(z_angle)
-
-    rotation_matrix = np.dot(z_matrix, np.dot(y_matrix, x_matrix))
-    return rotation_matrix
-
-
-def sample_affine_transform(min_corner, max_corner):
-    min_W, min_H, min_D = min_corner
-    max_W, max_H, max_D = max_corner
-    translation = sample_inside_box3D(min_W, min_H, min_D, max_W, max_H, max_D)
-    rotation_matrix = sample_random_rotation_matrix3()
-    affine_matrix = to_affine_matrix(rotation_matrix, translation)
-    return affine_matrix
-
-
-class CanonicalScene():
-    def __init__(self, path_OBJ, camera_pose, min_corner, max_corner,
-                 symmetric_transforms,
-                 viewport_size=(128, 128), y_fov=3.14159 / 4.0,
-                 light_intensity=[0.5, 30]):
-        self.light_intensity = light_intensity
-        self.symmetric_transforms = symmetric_transforms
-        self.min_corner, self.max_corner = min_corner, max_corner
-        self.scene = Scene(bg_color=[0, 0, 0, 0])
-        self.light = self._build_light(light_intensity, camera_pose)
-        self.camera = self._build_camera(y_fov, viewport_size, camera_pose)
-        self.pixel_mesh = self.scene.add(color_object(path_OBJ))
-        self.mesh = self.scene.add(
-            Mesh.from_trimesh(trimesh.load(path_OBJ), smooth=True))
-
-        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
-
-        self.flags_RGBA = RenderFlags.RGBA
-        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
-
-    def _build_light(self, light, pose):
-        directional_light = DirectionalLight([1.0, 1.0, 1.0], np.mean(light))
-        directional_light = self.scene.add(directional_light, pose=pose)
-        return directional_light
-
-    def _build_camera(self, y_fov, viewport_size, pose):
-        aspect_ratio = np.divide(*viewport_size)
-        camera = PerspectiveCamera(y_fov, aspectRatio=aspect_ratio)
-        camera = self.scene.add(camera, pose=pose)
-        return camera
-
-    def _sample_parameters(self, min_corner, max_corner):
-        mesh_transform = sample_affine_transform(min_corner, max_corner)
-        light_intensity = sample_uniformly(self.light_intensity)
-        return mesh_transform, light_intensity
-
-    def render(self):
-        mesh_transform, light_intensity = self._sample_parameters(
-            self.min_corner, self.max_corner)
-        mesh_rotation = mesh_transform[0:3, 0:3]
-        canonical_rotation = calculate_canonical_rotation(
-            mesh_rotation, self.symmetric_transforms)
-        # mesh_rotation[0:3, 0:3] = canonical_rotation
-        canonical_rotation = np.dot(mesh_rotation, canonical_rotation)
-        mesh_rotation[0:3, 0:3] = canonical_rotation
-        self.scene.set_pose(self.mesh, mesh_transform)
-        self.scene.set_pose(self.pixel_mesh, mesh_transform)
-        self.light.light.intensity = light_intensity
-
-        self.pixel_mesh.mesh.is_visible = False
-        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
-        self.pixel_mesh.mesh.is_visible = True
-        image, alpha = split_alpha_channel(image)
-        self.mesh.mesh.is_visible = False
-        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
-        self.mesh.mesh.is_visible = True
-        return image, alpha, RGB_mask
-
-    def render_symmetries(self):
-        images, alphas, RGB_masks = [], [], []
-        for rotation in self.symmetric_transforms:
-            symmetric_transform = to_affine_matrix(rotation, np.zeros(3))
-            self.scene.set_pose(self.mesh, symmetric_transform)
-            self.scene.set_pose(self.pixel_mesh, symmetric_transform)
-            self.pixel_mesh.mesh.is_visible = False
-            image, depth = self.renderer.render(self.scene, self.flags_RGBA)
-            self.pixel_mesh.mesh.is_visible = True
-            image, alpha = split_alpha_channel(image)
-            self.mesh.mesh.is_visible = False
-            RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
-            self.mesh.mesh.is_visible = True
-            images.append(image)
-            alphas.append(alpha)
-            RGB_masks.append(RGB_mask[..., 0:3])
-        images = np.concatenate(images, axis=1)
-        RGB_masks = np.concatenate(RGB_masks, axis=1)
-        # print(images.shape)
-        # print(RGB_masks.shape)
-        images = np.concatenate([images, RGB_masks], axis=0)
-        return images
-
-
-def compute_norm_SO3(rotation_mesh, rotation):
-    difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3)
-    distance = np.linalg.norm(difference, ord='fro')
-    return distance
-
-
-def calculate_canonical_rotation(rotation_mesh, rotations):
-    norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
-    closest_rotation_arg = np.argmin(norms)
-    # print(closest_rotation_arg)
-    closest_rotation = rotations[closest_rotation_arg]
-    canonical_rotation = np.linalg.inv(closest_rotation)
-    return canonical_rotation
-
-
-if __name__ == "__main__":
-    import os
-    from paz.backend.image import show_image
-    from backend import build_rotation_matrix_z
-    from backend import build_rotation_matrix_x
-    # from backend import build_rotation_matrix_y
-    path_OBJ = 'single_solar_panel_02.obj'
-    root_path = os.path.expanduser('~')
-    path_OBJ = os.path.join(root_path, path_OBJ)
-    num_occlusions = 1
-    image_shape = (128, 128, 3)
-    viewport_size = image_shape[:2]
-    y_fov = 3.14159 / 4.0
-    distance = [1.0, 1.0]
-    light = [1.0, 30]
-
-    # min_corner = [-0.1, -0.1, -0.0]
-    # max_corner = [+0.1, +0.1, +0.4]
-    angles = np.linspace(0, 2 * np.pi, 7)[:6]
-    symmetric_rotations = np.array(
-        [build_rotation_matrix_z(angle) for angle in angles])
-    min_corner = [0.0, 0.0, -0.4]
-    max_corner = [0.0, 0.0, +0.0]
-    # translation = np.array([0.0, 0.0, 1.0])
-    translation = np.array([0.0, 0.0, 1.0])
-    camera_rotation = np.eye(3)
-    camera_rotation = build_rotation_matrix_x(np.pi)
-    translation = np.array([0.0, 0.0, -1.0])
-    camera_pose = to_affine_matrix(camera_rotation, translation)
-    renderer = CanonicalScene(path_OBJ, camera_pose, min_corner,
-                              max_corner, symmetric_rotations)
-    # from pyrender import Viewer
-    # Viewer(scene.scene)
-    renderer.scene.ambient_light = [1.0, 1.0, 1.0]
-    image = renderer.render_symmetries()
-    show_image(image)
-    for _ in range(0):
-        image, alpha, RGB_mask = renderer.render()
-        show_image(image)
-        show_image(RGB_mask[:, :, 0:3])
-
-    from pipelines import DomainRandomization
-    from paz.abstract.sequence import GeneratingSequence
-    from loss import WeightedReconstruction
-    from paz.models import UNET_VGG16
-    from tensorflow.keras.optimizers import Adam
-    from metrics import mean_squared_error
-    import glob
-
-    background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
-    background_wildcard = os.path.join(root_path, background_wildcard)
-    image_paths = glob.glob(background_wildcard)
-
-    H, W, num_channels = image_shape
-    batch_size = 32
-    steps_per_epoch = 1000
-    beta = 3.0
-    num_classes = 3
-    learning_rate = 0.001
-    max_num_epochs = 5
-
-    inputs_to_shape = {'input_1': [H, W, num_channels]}
-    labels_to_shape = {'masks': [H, W, 4]}
-
-    processor = DomainRandomization(
-        renderer, image_shape, image_paths, inputs_to_shape,
-        labels_to_shape, num_occlusions)
-
-    sequence = GeneratingSequence(processor, batch_size, steps_per_epoch)
-
-    # build all symmetric rotations for solar pannel
-    angles = np.linspace(0, 2 * np.pi, 7)[:6]
-    rotations = np.array([build_rotation_matrix_z(angle) for angle in angles])
-
-    # loss = WeightedSymmetricReconstruction(rotations, beta)
-    loss = WeightedReconstruction(beta)
-
-    model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
-    optimizer = Adam(learning_rate)
-
-    model.compile(optimizer, loss, mean_squared_error)
-
-    model.fit(
-        sequence,
-        epochs=max_num_epochs,
-        verbose=1,
-        workers=0)
-    model.save_weights('UNET-VGG_solar_panel_canonical.hdf5')
-
diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
index 3b94a655c..1cbb9b9a7 100644
--- a/examples/pix2pose/scenes.py
+++ b/examples/pix2pose/scenes.py
@@ -9,6 +9,7 @@
 from backend import to_affine_matrix
 from backend import sample_affine_transform
 from backend import calculate_canonical_rotation
+from paz.models import UNET_VGG16
 
 
 class PixelMaskRenderer():
@@ -147,3 +148,88 @@ def render_symmetries(self):
         RGB_masks = np.concatenate(RGB_masks, axis=1)
         images = np.concatenate([images, RGB_masks], axis=0)
         return images
+
+
+if __name__ == "__main__":
+    import os
+    from paz.backend.image import show_image
+    from backend import build_rotation_matrix_x
+    from backend import build_rotation_matrix_z
+    from backend import build_rotation_matrix_y
+    from paz.backend.render import compute_modelview_matrices
+    from pipelines import DomainRandomization
+    import glob
+
+    # generic parameters
+    root_path = os.path.expanduser('~')
+    num_occlusions = 1
+    image_shape = (128, 128, 3)
+    viewport_size = image_shape[:2]
+    y_fov = 3.14159 / 4.0
+    light = [1.0, 30]
+
+    # solar panel parameters
+    """
+    OBJ_name = 'single_solar_panel_02.obj'
+    path_OBJ = os.path.join(root_path, OBJ_name)
+    angles = np.linspace(0, 2 * np.pi, 7)[:6]
+    symmetries = np.array([build_rotation_matrix_z(angle) for angle in angles])
+    camera_rotation = build_rotation_matrix_x(np.pi)
+    translation = np.array([0.0, 0.0, -1.0])
+    camera_pose = to_affine_matrix(camera_rotation, translation)
+    min_corner = [0.0, 0.0, -0.4]
+    max_corner = [0.0, 0.0, +0.0]
+    """
+
+    # large clamp parameters
+    # REMEMBER TO CHANGE THE Ns coefficient to values between [0, 1] in
+    # textured.mtl. For example change 96.07 to .967
+    OBJ_name = '.keras/paz/datasets/ycb_models/051_large_clamp/textured.obj'
+    path_OBJ = os.path.join(root_path, OBJ_name)
+    translation = np.array([0.0, 0.0, 0.25])
+    camera_pose, y = compute_modelview_matrices(translation, np.zeros((3)))
+    align_z = build_rotation_matrix_z(np.pi / 20)
+    camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3])
+    min_corner = [-0.05, -0.02, -0.05]
+    max_corner = [+0.05, +0.02, +0.01]
+
+    angles = [0.0, np.pi]
+    symmetries = np.array([build_rotation_matrix_y(angle) for angle in angles])
+    renderer = CanonicalScene(path_OBJ, camera_pose, min_corner,
+                              max_corner, symmetries)
+    renderer.scene.ambient_light = [1.0, 1.0, 1.0]
+    image = renderer.render_symmetries()
+    show_image(image)
+    for arg in range(0):
+        image, alpha, RGB_mask = renderer.render()
+        show_image(RGB_mask[:, :, 0:3])
+
+    model = UNET_VGG16(3, image_shape, freeze_backbone=True)
+    model.load_weights('UNET-VGG_large_clamp_canonical_10.hdf5')
+
+    background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
+    background_wildcard = os.path.join(root_path, background_wildcard)
+    image_paths = glob.glob(background_wildcard)
+
+    H, W, num_channels = image_shape = (128, 128, 3)
+    inputs_to_shape = {'input_1': [H, W, num_channels]}
+    labels_to_shape = {'masks': [H, W, 4]}
+    processor = DomainRandomization(
+        renderer, image_shape, image_paths, inputs_to_shape,
+        labels_to_shape, num_occlusions)
+
+    for arg in range(100):
+        sample = processor()
+        image = sample['inputs']['input_1']
+        image = (image * 255.0).astype('uint8')
+        RGB_mask = sample['labels']['masks']
+        # image, alpha, RGB_mask = renderer.render()
+        RGB_mask_true = (RGB_mask[:, :, 0:3] * 255.0).astype('uint8')
+        RGB_mask_pred = model.predict(np.expand_dims(image / 255.0, 0))
+        RGB_mask_pred = np.squeeze(RGB_mask_pred * 255.0, 0)
+        # error = np.square(RGB_mask_true - RGB_mask_pred)
+        # error = RGB_mask_pred - RGB_mask
+        RGB_mask_pred = RGB_mask_pred.astype('uint8')
+        print(image.dtype, RGB_mask_pred.dtype, RGB_mask_true.dtype)
+        images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1)
+        show_image(images)
diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py
index 8383744c2..9c383edea 100644
--- a/examples/pix2pose/test_rotated_image.py
+++ b/examples/pix2pose/test_rotated_image.py
@@ -1,15 +1,29 @@
 import numpy as np
 import os
 import glob
-from paz.backend.image import show_image
+from paz.backend.image import show_image, resize_image
+from paz.models import UNET_VGG16
+from paz.abstract import GeneratingSequence
 
+from paz.backend.camera import Camera
+from pipelines import Pix2Pose
+from pipelines import EstimatePoseMasks
+from pipelines import DomainRandomization
 from backend import build_rotation_matrix_z, rotate_image
-from backend import normalized_device_coordinates_to_image
-from backend import image_to_normalized_device_coordinates
 from scenes import PixelMaskRenderer
+from backend import build_rotation_matrix_x, build_rotation_matrix_y
+from backend import denormalize_points2D
+from processors import SolveChangingObjectPnPRANSAC
+from paz.backend.quaternion import rotation_vector_to_quaternion
+from paz.abstract.messages import Pose6D
+from backend import build_cube_points3D
+from backend import draw_poses6D
+from paz.backend.image import load_image
+from backend import draw_masks
+
 
 scale = 4
-image_shape = [128 * scale, 128 * scale, 3]
+H, W, num_channels = image_shape = [128, 128, 3]
 root_path = os.path.expanduser('~')
 background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
 background_wildcard = os.path.join(root_path, background_wildcard)
@@ -24,23 +38,81 @@
 light = [1.0, 30]
 top_only = False
 roll = 3.14159
-shift = 0.05
+shift = 0 # %0.05
+batch_size = 32
+steps_per_epoch = 1000
+
+image_size = [128, 128]
+focal_length = image_size[1]
+image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
+camera_intrinsics = np.array([[focal_length, 0, image_center[0]],
+                              [0, focal_length, image_center[1]],
+                              [0, 0, 1]])
+
+
+image_shape = (128, 128, 3)
+num_classes = 3
+model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5')
+object_sizes_list = [15000, 15000, 2000]
+object_sizes = np.array(object_sizes_list)
+cube_points = object_sizes
+cube_points3D = build_cube_points3D(*object_sizes)
+epsilon = 0.15
+estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True)
+print(object_sizes)
+predict_pose = SolveChangingObjectPnPRANSAC(camera_intrinsics, 5, 100)
+
+
+def quick_pose(image):
+    image = resize_image(image, (128, 128))
+    keypoints = estimate_keypoints(image)
+    points2D = keypoints['points2D']
+    points3D = keypoints['points3D']
+    # points3D[:, 2:3] = 0.0
+    points2D = denormalize_points2D(points2D, 128, 128)
+    success, rotation, translation = predict_pose(points3D, points2D)
+    quaternion = rotation_vector_to_quaternion(rotation)
+    pose6D = Pose6D(quaternion, translation, 'solar_panel')
+    poses6D = [pose6D]
+    # show_image(image)
+    points = [[points2D, points3D]]
+    image = draw_masks(image, points, object_sizes)
+    image = image.astype('float')
+    image = draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics)
+    image = image.astype('uint8')
+    image = resize_image(image, (256, 256))
+    show_image(image)
+
+
+image = load_image('zed_left_1011.png')
+image = image[250:800, 250:850, :]
+quick_pose(image)
+
+image = load_image('MicrosoftTeams-image.png')
+quick_pose(image)
+
+image = load_image('zed_left_705.png')
+image = image[250:1080, 250:1400, :]
+quick_pose(image)
 
 renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
                              light, top_only, roll, shift)
 renderer.scene.ambient_light = [1.0, 1.0, 1.0]
 
-for _ in range(3):
-    image, alpha, RGB_mask = renderer.render()
-    RGB_mask = RGB_mask[..., 0:3]
-    show_image(image)
-    show_image(RGB_mask)
-    angles = np.linspace(0, 2 * np.pi, 7)[0:6]
-    images = []
-    for angle in angles:
-        rotation_matrix = build_rotation_matrix_z(angle)
-        rotated_image = rotate_image(RGB_mask, rotation_matrix)
-        rotated_image = rotated_image.astype('uint8')
-        images.append(rotated_image)
-    images = np.concatenate(images, axis=1)
-    show_image(images)
+inputs_to_shape = {'input_1': [H, W, num_channels]}
+labels_to_shape = {'masks': [H, W, 4]}
+
+processor = DomainRandomization(
+    renderer, image_shape, image_paths, inputs_to_shape,
+    labels_to_shape, num_occlusions)
+
+for _ in range(100):
+    sample = processor()
+    image = sample['inputs']['input_1']
+    masks = sample['labels']['masks']
+    image = (image * 255).astype('uint8')
+    # image, alpha, RGB_mask = renderer.render()
+    # show_image((image * 255).astype('uint8'))
+    quick_pose(image)
+    # show_image(images)
diff --git a/examples/pix2pose/train_canonical_transform.py b/examples/pix2pose/train_canonical_transform.py
index c9387e77c..49de71cef 100644
--- a/examples/pix2pose/train_canonical_transform.py
+++ b/examples/pix2pose/train_canonical_transform.py
@@ -6,26 +6,36 @@
 from paz.backend.image import show_image
 from paz.models import UNET_VGG16
 from paz.abstract.sequence import GeneratingSequence
+from paz.backend.render import compute_modelview_matrices
 
 from scenes import CanonicalScene
 from backend import build_rotation_matrix_z
 from backend import build_rotation_matrix_x
+from backend import build_rotation_matrix_y
 from backend import to_affine_matrix
 from pipelines import DomainRandomization
 from loss import WeightedReconstruction
 from metrics import mean_squared_error
 
 
-path_OBJ = 'single_solar_panel_02.obj'
 root_path = os.path.expanduser('~')
-path_OBJ = os.path.join(root_path, path_OBJ)
 num_occlusions = 1
 image_shape = (128, 128, 3)
 viewport_size = image_shape[:2]
 y_fov = 3.14159 / 4.0
-distance = [1.0, 1.0]
 light = [1.0, 30]
 
+# training parameters
+H, W, num_channels = image_shape
+batch_size = 32
+steps_per_epoch = 1000
+beta = 3.0
+num_classes = 3
+learning_rate = 0.001
+max_num_epochs = 5
+
+"""
+path_OBJ = 'single_solar_panel_02.obj'
 angles = np.linspace(0, 2 * np.pi, 7)[:6]
 symmetric_rotations = np.array(
     [build_rotation_matrix_z(angle) for angle in angles])
@@ -34,30 +44,35 @@
 camera_rotation = build_rotation_matrix_x(np.pi)
 translation = np.array([0.0, 0.0, -1.0])
 camera_pose = to_affine_matrix(camera_rotation, translation)
+"""
+
+# large clamp parameters
+# REMEMBER TO CHANGE THE Ns coefficient to values between [0, 1] in
+# textured.mtl. For example change 96.07 to .967
+OBJ_name = '.keras/paz/datasets/ycb_models/051_large_clamp/textured.obj'
+translation = np.array([0.0, 0.0, 0.25])
+camera_pose, y = compute_modelview_matrices(translation, np.zeros((3)))
+align_z = build_rotation_matrix_z(np.pi / 20)
+camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3])
+min_corner = [-0.05, -0.02, -0.05]
+max_corner = [+0.05, +0.02, +0.01]
+
+angles = [0.0, np.pi]
+symmetric_rotations = np.array(
+    [build_rotation_matrix_y(angle) for angle in angles])
+
+
+path_OBJ = os.path.join(root_path, OBJ_name)
 renderer = CanonicalScene(path_OBJ, camera_pose, min_corner,
                           max_corner, symmetric_rotations)
-# from pyrender import Viewer
-# Viewer(scene.scene)
 renderer.scene.ambient_light = [1.0, 1.0, 1.0]
 image = renderer.render_symmetries()
 show_image(image)
-for _ in range(100):
-    image, alpha, RGB_mask = renderer.render()
-    show_image(image)
-    show_image(RGB_mask[:, :, 0:3])
 
 background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
 background_wildcard = os.path.join(root_path, background_wildcard)
 image_paths = glob.glob(background_wildcard)
 
-H, W, num_channels = image_shape
-batch_size = 32
-steps_per_epoch = 1000
-beta = 3.0
-num_classes = 3
-learning_rate = 0.001
-max_num_epochs = 5
-
 inputs_to_shape = {'input_1': [H, W, num_channels]}
 labels_to_shape = {'masks': [H, W, 4]}
 
@@ -77,11 +92,9 @@
 optimizer = Adam(learning_rate)
 
 model.compile(optimizer, loss, mean_squared_error)
-"""
 model.fit(
     sequence,
     epochs=max_num_epochs,
     verbose=1,
     workers=0)
-model.save_weights('UNET-VGG_solar_panel_canonical.hdf5')
-"""
+model.save_weights('UNET-VGG_large_clamp_canonical.hdf5')
diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/train_symmetric.py
index cd6f85376..313658ff8 100644
--- a/examples/pix2pose/train_symmetric.py
+++ b/examples/pix2pose/train_symmetric.py
@@ -9,6 +9,7 @@
 from scenes import PixelMaskRenderer
 from pipelines import DomainRandomization
 from loss import WeightedSymmetricReconstruction
+from loss import WeightedReconstruction
 from metrics import mean_squared_error
 
 image_shape = [128, 128, 3]
@@ -22,7 +23,8 @@
 num_occlusions = 1
 viewport_size = image_shape[:2]
 y_fov = 3.14159 / 4.0
-distance = [0.3, 0.5]
+distance = [1.0, 1.0]
+# distance = [0.3, 0.5]
 light = [1.0, 30]
 top_only = False
 roll = 3.14159
@@ -32,9 +34,9 @@
 alpha = 0.1
 filters = 16
 num_classes = 3
-learning_rate = 0.001
+learning_rate = 0.0001
 max_num_epochs = 5
-beta = 3.0
+beta = 10.0
 steps_per_epoch = 1000
 H, W, num_channels = image_shape = [128, 128, 3]
 
@@ -43,6 +45,29 @@
                              light, top_only, roll, shift)
 # check why this is needed in this object
 renderer.scene.ambient_light = [1.0, 1.0, 1.0]
+# pose = np.eye(4)
+# pose[1, 1] = +np.cos(np.deg2rad(90))
+# pose[1, 2] = -np.sin(np.deg2rad(90))
+# pose[2, 2] = +np.cos(np.deg2rad(90))
+# pose[2, 1] = +np.sin(np.deg2rad(90))
+# renderer.scene.set_pose(renderer.mesh, pose)
+# renderer.scene.set_pose(renderer.pixel_mesh, pose)
+from paz.backend.image import show_image
+from backend import rotate_image
+for _ in range(0):
+    image, alpha, RGB_mask = renderer.render()
+    RGB_mask = RGB_mask[..., 0:3]
+    show_image(RGB_mask)
+    angles = np.linspace(0, 2 * np.pi, 7)[0:6]
+    images = []
+    for angle in angles:
+        rotation_matrix = build_rotation_matrix_z(angle)
+        rotated_image = rotate_image(RGB_mask, rotation_matrix)
+        rotated_image = rotated_image.astype('uint8')
+        images.append(rotated_image)
+    images = np.concatenate(images, axis=1)
+    show_image(images)
+
 
 inputs_to_shape = {'input_1': [H, W, num_channels]}
 labels_to_shape = {'masks': [H, W, 4]}
@@ -58,16 +83,19 @@
 rotations = np.array([build_rotation_matrix_z(angle) for angle in angles])
 
 loss = WeightedSymmetricReconstruction(rotations, beta)
+# loss = WeightedReconstruction(beta)
 
-model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+model = UNET_VGG16(num_classes, image_shape, freeze_backbone=False)
 optimizer = Adam(learning_rate)
 
 model.compile(optimizer, loss, mean_squared_error)
 
+model.load_weights('UNET_solar_panel_weights_notsym_04.hdf5')
 model.fit(
     sequence,
     epochs=max_num_epochs,
     verbose=1,
     workers=0)
 
-model.save_weights('UNET_VGG_symmetric_weights.hdf5')
+model.save_weights('UNET-VGG_solar_panel_weights_not_and_symmetric.hdf5')
+

From 8111c1194a66aaac4841d4ddce0431eb86f182a2 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 30 Nov 2021 13:15:11 +0100
Subject: [PATCH 069/101] Add single drawing mask function

---
 examples/pix2pose/backend.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 5555d33fc..bbc3fcdb2 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -258,6 +258,12 @@ def points3D_to_RGB(points3D, object_sizes):
     return colors
 
 
+def draw_mask(image, points2D, points3D, object_sizes):
+    colors = points3D_to_RGB(points3D, object_sizes)
+    image = draw_points2D(image, points2D, colors)
+    return image
+
+
 # TODO change to processor
 def draw_masks(image, points, object_sizes):
     for points2D, points3D in points:

From 042630f69e885d44fc9c992f1886eeeb5fb0f398 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 30 Nov 2021 13:15:40 +0100
Subject: [PATCH 070/101] Add additional pipelines for single inference
 visualization

---
 examples/pix2pose/pipelines2.py | 294 ++++++++++++++++++++++++++++++++
 1 file changed, 294 insertions(+)
 create mode 100644 examples/pix2pose/pipelines2.py

diff --git a/examples/pix2pose/pipelines2.py b/examples/pix2pose/pipelines2.py
new file mode 100644
index 000000000..06357bc17
--- /dev/null
+++ b/examples/pix2pose/pipelines2.py
@@ -0,0 +1,294 @@
+from paz.abstract import SequentialProcessor, Processor
+from paz.pipelines import RandomizeRenderedImage as RandomizeRender
+from paz.abstract.messages import Pose6D
+from paz import processors as pr
+from processors import (
+    GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
+    ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC,
+    ReplaceLowerThanThreshold)
+from backend import build_cube_points3D
+from processors import UnwrapDictionary
+from processors import NormalizePoints2D
+from backend import denormalize_points2D
+from backend import draw_poses6D
+from backend import draw_masks
+from backend import draw_mask
+from backend import draw_pose6D
+from backend import normalize_points2D
+from paz.backend.quaternion import rotation_vector_to_quaternion
+from paz.backend.image import resize_image, show_image
+
+
+class DomainRandomization(SequentialProcessor):
+    """Performs domain randomization on a rendered image
+    """
+    def __init__(self, renderer, image_shape, image_paths, inputs_to_shape,
+                 labels_to_shape, num_occlusions=1):
+        super(DomainRandomization, self).__init__()
+        H, W = image_shape[:2]
+        self.add(pr.Render(renderer))
+        self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0]))
+        self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
+        self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1]))
+        self.add(pr.SequenceWrapper({0: inputs_to_shape},
+                                    {1: labels_to_shape}))
+
+
+class PredictRGBMask(SequentialProcessor):
+    def __init__(self, model, epsilon=0.15):
+        super(PredictRGBMask, self).__init__()
+        self.add(pr.ResizeImage(model.input_shape[1:3]))
+        self.add(pr.NormalizeImage())
+        self.add(pr.ExpandDims(0))
+        self.add(pr.Predict(model))
+        self.add(pr.Squeeze(0))
+        self.add(ReplaceLowerThanThreshold(epsilon))
+        self.add(pr.DenormalizeImage())
+        self.add(pr.CastImage('uint8'))
+
+
+class RGBMaskToObjectPoints3D(SequentialProcessor):
+    def __init__(self, object_sizes):
+        super(RGBMaskToObjectPoints3D, self).__init__()
+        self.add(GetNonZeroValues())
+        self.add(ImageToNormalizedDeviceCoordinates())
+        self.add(Scale(object_sizes / 2.0))
+
+
+class RGBMaskToImagePoints2D(SequentialProcessor):
+    def __init__(self, output_shape):
+        super(RGBMaskToImagePoints2D, self).__init__()
+        self.add(GetNonZeroArguments())
+        self.add(ArgumentsToImagePoints2D())
+        # self.add(NormalizePoints2D(output_shape))
+
+
+class SolveChangingObjectPnP(SequentialProcessor):
+    def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
+        super(SolveChangingObjectPnP, self).__init__()
+        self.MINIMUM_REQUIRED_POINTS = 4
+        self.add(SolveChangingObjectPnPRANSAC(
+            camera_intrinsics, inlier_thresh, num_iterations))
+
+
+class Pix2Points(pr.Processor):
+    def __init__(self, model, object_sizes, epsilon=0.15,
+                 resize=True, draw=True):
+        self.object_sizes = object_sizes
+        self.predict_RGBMask = PredictRGBMask(model, epsilon)
+        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
+        self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
+        self.resize, self.draw = resize, draw
+        self.wrap = pr.WrapOutput(
+            ['image', 'points2D', 'points3D', 'RGB_mask'])
+
+    def call(self, image):
+        RGB_mask = self.predict_RGBMask(image)
+        H, W, num_channels = image.shape
+        if self.resize:
+            RGB_mask = resize_image(RGB_mask, (W, H))
+        points3D = self.mask_to_points3D(RGB_mask)
+        points2D = self.mask_to_points2D(RGB_mask)
+        points2D = normalize_points2D(points2D, W, H)
+        if self.draw:
+            image = draw_mask(image, points2D, points3D, self.object_sizes)
+        return self.wrap(image, points2D, points3D, RGB_mask)
+
+
+class Pix2Pose(pr.Processor):
+    def __init__(self, model, object_sizes, camera, epsilon=0.15,
+                 class_name=None, resize=True, draw=True):
+        self.pix2points = Pix2Points(
+            model, object_sizes, epsilon, resize, False)
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.MIN_REQUIRED_POINTS = self.predict_pose.MINIMUM_REQUIRED_POINTS
+        self.class_name = str(class_name) if class_name is None else class_name
+        self.object_sizes = object_sizes
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.camera = camera
+        self.draw = draw
+
+    def call(self, image, box2D=None):
+        results = self.pix2points(image)
+        points2D, points3D = results['points2D'], results['points3D']
+        H, W, num_channels = image.shape
+        points2D = denormalize_points2D(points2D, H, W)
+        if box2D is not None:
+            points2D = self.change_coordinates(points2D, box2D)
+
+        valid_num_points = len(points3D) > self.MIN_REQUIRED_POINTS
+        if valid_num_points:
+            success, rotation, translation = self.predict_pose(points3D,
+                                                               points2D)
+        if success and valid_num_points:
+            quaternion = rotation_vector_to_quaternion(rotation)
+            pose6D = Pose6D(quaternion, translation, self.class_name)
+        else:
+            pose6D = None
+
+        if self.draw:
+            image = draw_mask(image, points2D, points3D, self.object_sizes)
+            image = draw_pose6D(image, pose6D, self.cube_points3D,
+                                self.camera.intrinsics)
+        results['pose6D'], results['image'] = pose6D, image
+        return results
+
+
+class EstimatePoseMasks(Processor):
+    def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
+        """Pose estimation pipeline using keypoints.
+        """
+        super(EstimatePoseMasks, self).__init__()
+        self.detect = detect
+        self.estimate_keypoints = estimate_keypoints
+        self.camera = camera
+        self.draw = draw
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             pr.FilterClassBoxes2D(['035_power_drill']),
+             # pr.FilterClassBoxes2D(['solar_panel']),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.unwrap = UnwrapDictionary(['points2D', 'points3D'])
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        # self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel'])
+        self.object_sizes = self.estimate_keypoints.object_sizes
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        # affine_matrix = build_rotation_matrix_z(3.14156 / 6)
+        # self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T
+        # 25000,
+        # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0])
+
+    def call(self, image):
+        from paz.abstract.messages import Box2D
+        detections = self.detect(image)
+        # detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]}
+        boxes2D = self.postprocess_boxes(detections)
+        # boxes2D = self.postprocess_boxes(self.detect(image))
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, points = [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
+            points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
+            points2D = denormalize_points2D(points2D, *crop.shape[0:2])
+            points2D = self.change_coordinates(points2D, box2D)
+            if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
+                continue
+            success, rotation, translation = self.predict_pose(
+                points3D, points2D)
+            if success is False:
+                continue
+            quaternion = rotation_vector_to_quaternion(rotation)
+            pose6D = Pose6D(quaternion, translation, box2D.class_name)
+            poses6D.append(pose6D), points.append([points2D, points3D])
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            image = draw_masks(image, points, self.object_sizes)
+            image = draw_poses6D(image, poses6D, self.cube_points3D,
+                                 self.camera.intrinsics)
+        return self.wrap(image, boxes2D, poses6D)
+
+
+
+class Pix2Pose2(pr.Processor):
+    def __init__(self, model, object_sizes, epsilon=0.15,
+                 class_name=None, with_resize=True, draw=True):
+        self.object_sizes = object_sizes
+        self.predict_RGBMask = PredictRGBMask(model, epsilon)
+        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
+        self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.wrap = pr.WrapOutput(['image', 'points3D', 'points2D', 'RGB_mask'])
+        self.with_resize = with_resize
+        self.class_name = str(class_name) if class_name is None else class_name
+        self.draw = draw
+
+    def call(self, image):
+        RGB_mask = self.predict_RGBMask(image)
+        H, W, num_channels = image.shape
+        if self.with_resize:
+            RGB_mask = resize_image(RGB_mask, (W, H))
+        points3D = self.mask_to_points3D(RGB_mask)
+        points2D = self.mask_to_points2D(RGB_mask)
+        points2D = normalize_points2D(points2D, (W, H))
+        if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
+            pose6D = None
+        success, rotation, translation = self.predict_pose(points3D, points2D)
+        if success is False:
+            pose6D = None
+        quaternion = rotation_vector_to_quaternion(rotation)
+        pose6D = Pose6D(quaternion, translation, self.class_name)
+        if self.draw:
+            image = draw_mask(image, points2D, points3D, self.object_sizes)
+            image = draw_pose6D(image, pose6D, self.cube_points3D, self.camera.intrinsics)
+        return self.wrap(image, points3D, points2D, RGB_mask)
+
+
+class EstimatePoseMasks(Processor):
+    def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
+        """Pose estimation pipeline using keypoints.
+        """
+        super(EstimatePoseMasks, self).__init__()
+        self.detect = detect
+        self.estimate_keypoints = estimate_keypoints
+        self.camera = camera
+        self.draw = draw
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             # pr.FilterClassBoxes2D(['035_power_drill']),
+             pr.FilterClassBoxes2D(['solar_panel']),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.unwrap = UnwrapDictionary(['points2D', 'points3D'])
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        # self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel'])
+        self.object_sizes = self.estimate_keypoints.object_sizes
+        from backend import build_rotation_matrix_z
+        import numpy as np
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        affine_matrix = build_rotation_matrix_z(3.14156 / 6)
+        self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T
+        # 25000,
+        # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0])
+
+    def call(self, image):
+        from paz.abstract.messages import Box2D
+        detections = self.detect(image)
+        detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]}
+        boxes2D = self.postprocess_boxes(detections)
+        # boxes2D = self.postprocess_boxes(self.detect(image))
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, points = [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
+            points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
+            points2D = denormalize_points2D(points2D, *crop.shape[0:2])
+            points2D = self.change_coordinates(points2D, box2D)
+            if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
+                continue
+            success, rotation, translation = self.predict_pose(
+                points3D, points2D)
+            if success is False:
+                continue
+            print('ROTATION', rotation.shape)
+            quaternion = rotation_vector_to_quaternion(rotation)
+            print('QUATERNION', quaternion.shape)
+            pose6D = Pose6D(quaternion, translation, box2D.class_name)
+            poses6D.append(pose6D), points.append([points2D, points3D])
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            image = draw_masks(image, points, self.object_sizes)
+            image = draw_poses6D(image, poses6D, self.cube_points3D,
+                                 self.camera.intrinsics)
+        return self.wrap(image, boxes2D, poses6D)

From 3cbba0d927ff77b5735c71889df0429d38dbae67 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 30 Nov 2021 16:38:01 +0100
Subject: [PATCH 071/101] Refactor pipelines to work independent from detector

---
 examples/pix2pose/pipelines3.py | 120 ++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 examples/pix2pose/pipelines3.py

diff --git a/examples/pix2pose/pipelines3.py b/examples/pix2pose/pipelines3.py
new file mode 100644
index 000000000..0fe383eb7
--- /dev/null
+++ b/examples/pix2pose/pipelines3.py
@@ -0,0 +1,120 @@
+from paz.abstract import SequentialProcessor, Processor
+from paz.pipelines import RandomizeRenderedImage as RandomizeRender
+from paz.abstract.messages import Pose6D
+from paz import processors as pr
+from processors import (
+    GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
+    ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC,
+    ReplaceLowerThanThreshold)
+from backend import build_cube_points3D
+from processors import UnwrapDictionary
+from processors import NormalizePoints2D
+from backend import denormalize_points2D
+from backend import draw_poses6D
+from backend import draw_masks
+from backend import draw_mask
+from backend import normalize_points2D
+from backend import draw_pose6D
+from paz.backend.quaternion import rotation_vector_to_quaternion
+from paz.backend.image import resize_image, show_image
+from pipelines import SolveChangingObjectPnP
+from pipelines import RGBMaskToImagePoints2D, RGBMaskToObjectPoints3D, PredictRGBMask
+
+
+class Pix2Points(pr.Processor):
+    def __init__(self, model, object_sizes, epsilon=0.15, resize=True):
+        self.object_sizes = object_sizes
+        self.predict_RGBMask = PredictRGBMask(model, epsilon)
+        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
+        self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
+        self.resize = resize
+        self.wrap = pr.WrapOutput(['points2D', 'points3D', 'RGB_mask'])
+
+    def call(self, image):
+        RGB_mask = self.predict_RGBMask(image)
+        H, W, num_channels = image.shape
+        if self.resize:
+            RGB_mask = resize_image(RGB_mask, (W, H))
+        points3D = self.mask_to_points3D(RGB_mask)
+        points2D = self.mask_to_points2D(RGB_mask)
+        points2D = normalize_points2D(points2D, H, W)
+        return self.wrap(points2D, points3D, RGB_mask)
+
+
+class Pix2Pose(pr.Processor):
+    def __init__(self, model, object_sizes, camera,
+                 epsilon=0.15, class_name=None, draw=True):
+
+        self.pix2points = Pix2Points(model, object_sizes, epsilon, True)
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.class_name = str(class_name) if class_name is None else class_name
+        self.object_sizes = object_sizes
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.camera = camera
+        self.draw = draw
+
+    def call(self, image, box2D=None):
+        results = self.pix2points(image)
+        points2D, points3D = results['points2D'], results['points3D']
+        H, W, num_channels = image.shape
+        points2D = denormalize_points2D(points2D, H, W)
+        if box2D is not None:
+            points2D = self.change_coordinates(points2D, box2D)
+            self.class_name = box2D.class_name
+
+        min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS
+        if min_num_points:
+            pose_results = self.predict_pose(points3D, points2D)
+            success, rotation, translation = pose_results
+        if success and min_num_points:
+            quaternion = rotation_vector_to_quaternion(rotation)
+            pose6D = Pose6D(quaternion, translation, self.class_name)
+        else:
+            pose6D = None
+        if self.draw:
+            topic = 'image_crop' if box2D is not None else 'image'
+            image = draw_mask(image, points2D, points3D, self.object_sizes)
+            image = draw_pose6D(image, pose6D, self.cube_points3D,
+                                self.camera.intrinsics)
+            results[topic] = image
+        results['points2D'], results['pose6D'] = points2D, pose6D
+        return results
+
+
+class EstimatePoseMasks(Processor):
+    def __init__(self, detect, estimate_pose, offsets, draw=True):
+        """Pose estimation pipeline using keypoints.
+        """
+        super(EstimatePoseMasks, self).__init__()
+        self.detect = detect
+        self.estimate_pose = estimate_pose
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             pr.FilterClassBoxes2D(['035_power_drill']),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D'])
+        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        self.object_sizes = self.estimate_pose.object_sizes
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        self.draw = draw
+
+    def call(self, image):
+        boxes2D = self.postprocess_boxes(self.detect(image))
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, points = [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
+            results = self.estimate_pose(crop, box2D)
+            pose6D, points2D, points3D = self.unwrap(results)
+            poses6D.append(pose6D), points.append([points2D, points3D])
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            image = draw_masks(image, points, self.object_sizes)
+            image = draw_poses6D(image, poses6D, self.cube_points3D,
+                                 self.estimate_pose.camera.intrinsics)
+        return self.wrap(image, boxes2D, poses6D)

From 0a8b1f3bbe7ec6e36ef0b69ea1a5861d36e2a5f9 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 30 Nov 2021 17:21:26 +0100
Subject: [PATCH 072/101] Refactor demo to work with new pipelines

---
 examples/pix2pose/backend.py |  6 +++---
 examples/pix2pose/demo.py    | 37 ++++++++++++++++++++++++++----------
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index bbc3fcdb2..e19f9e948 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -283,9 +283,9 @@ def draw_points2D(image, points2D, colors):
     # Returns
         Array with drawn points.
     """
-    keypoints = points2D.astype(int)
-    U = keypoints[:, 0]
-    V = keypoints[:, 1]
+    points2D = points2D.astype(int)
+    U = points2D[:, 0]
+    V = points2D[:, 1]
     image[V, U, :] = colors
     return image
 
diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 00aec2636..a76dc1af8 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -2,26 +2,29 @@
 from paz.models import UNET_VGG16
 from paz.backend.image import show_image, load_image
 from paz.backend.camera import Camera
-from pipelines import Pix2Pose
-from pipelines import EstimatePoseMasks
 from paz.backend.camera import VideoPlayer
 from paz.applications import SSD300FAT
 
+# from pipelines import Pix2Pose
+# from pipelines import EstimatePoseMasks
+from pipelines3 import Pix2Pose
+from pipelines3 import EstimatePoseMasks
+
 
 image_shape = (128, 128, 3)
 num_classes = 3
 
 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
-model.load_weights('UNET_weights_epochs-10_beta-3.hdf5')
+model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5')
 
 # approximating intrinsic camera parameters
 camera = Camera(device_id=0)
-camera.start()
-image_size = camera.read().shape[0:2]
-camera.stop()
+# camera.start()
+# image_size = camera.read().shape[0:2]
+# camera.stop()
 
 # image = load_image('test_image2.jpg')
-image = load_image('test_image.jpg')
+image = load_image('images/test_image.jpg')
 image_size = image.shape[0:2]
 focal_length = image_size[1]
 image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
@@ -29,17 +32,31 @@
 camera.intrinsics = np.array([[focal_length, 0, image_center[0]],
                               [0, focal_length, image_center[1]],
                               [0, 0, 1]])
-object_sizes = np.array([0.184, 0.187, 0.052])
+# object_sizes = np.array([0.184, 0.187, 0.052])
 epsilon = 0.001
 score_thresh = 0.50
 detect = SSD300FAT(score_thresh, draw=False)
 offsets = [0.2, 0.2]
-estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True)
-pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets)
+# estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True)
+# pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets)
+
+object_sizes = np.array([1840, 1870, 520])
+# object_sizes = np.array([0.184, 0.187, 0.052])
+estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False)
+# image = image[50:320, 60:320]
+# show_image(estimate_pose(image)['image'])
+pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True)
+results = pipeline(image)
+predicted_image = results['image']
+show_image(predicted_image)
+
+"""
+estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False)
 
 results = pipeline(image)
 predicted_image = results['image']
 show_image(predicted_image)
+"""
 
 # image_size = (640, 480)
 # player = VideoPlayer(image_size, pipeline, camera)

From 63bc57cf881cf128bc112b98608947c426cd55d8 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 1 Dec 2021 08:19:38 +0100
Subject: [PATCH 073/101] Found bug with mask drawing when box2D is given

---
 examples/pix2pose/pipelines3.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/pix2pose/pipelines3.py b/examples/pix2pose/pipelines3.py
index 0fe383eb7..8c70fca96 100644
--- a/examples/pix2pose/pipelines3.py
+++ b/examples/pix2pose/pipelines3.py
@@ -72,7 +72,8 @@ def call(self, image, box2D=None):
             pose6D = Pose6D(quaternion, translation, self.class_name)
         else:
             pose6D = None
-        if self.draw:
+        # change_coordinates puts points2D outside image.
+        if (self.draw and (box2D is None)):
             topic = 'image_crop' if box2D is not None else 'image'
             image = draw_mask(image, points2D, points3D, self.object_sizes)
             image = draw_pose6D(image, pose6D, self.cube_points3D,

From a54c3c19c4fef53fbc3f6de8c13b257d57239224 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 1 Dec 2021 08:21:13 +0100
Subject: [PATCH 074/101] Refactor pipelines for better modularity

---
 examples/pix2pose/demo.py       |  15 +-
 examples/pix2pose/pipelines.py  | 101 ++++++-----
 examples/pix2pose/pipelines2.py | 294 --------------------------------
 examples/pix2pose/pipelines3.py | 121 -------------
 4 files changed, 65 insertions(+), 466 deletions(-)
 delete mode 100644 examples/pix2pose/pipelines2.py
 delete mode 100644 examples/pix2pose/pipelines3.py

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index a76dc1af8..5ab9e6416 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -7,8 +7,8 @@
 
 # from pipelines import Pix2Pose
 # from pipelines import EstimatePoseMasks
-from pipelines3 import Pix2Pose
-from pipelines3 import EstimatePoseMasks
+from pipelines import Pix2Pose
+from pipelines import EstimatePoseMasks
 
 
 image_shape = (128, 128, 3)
@@ -41,8 +41,7 @@
 # pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets)
 
 object_sizes = np.array([1840, 1870, 520])
-# object_sizes = np.array([0.184, 0.187, 0.052])
-estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False)
+estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True)
 # image = image[50:320, 60:320]
 # show_image(estimate_pose(image)['image'])
 pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True)
@@ -50,14 +49,6 @@
 predicted_image = results['image']
 show_image(predicted_image)
 
-"""
-estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=False)
-
-results = pipeline(image)
-predicted_image = results['image']
-show_image(predicted_image)
-"""
-
 # image_size = (640, 480)
 # player = VideoPlayer(image_size, pipeline, camera)
 # player.run()
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index e8f1e4a9d..7383e3571 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -8,13 +8,14 @@
     ReplaceLowerThanThreshold)
 from backend import build_cube_points3D
 from processors import UnwrapDictionary
-from processors import NormalizePoints2D
 from backend import denormalize_points2D
 from backend import draw_poses6D
+from backend import draw_pose6D
 from backend import draw_masks
+from backend import draw_mask
 from backend import normalize_points2D
 from paz.backend.quaternion import rotation_vector_to_quaternion
-from paz.backend.image import resize_image, show_image
+from paz.backend.image import resize_image
 
 
 class DomainRandomization(SequentialProcessor):
@@ -27,12 +28,7 @@ def __init__(self, renderer, image_shape, image_paths, inputs_to_shape,
         self.add(pr.Render(renderer))
         self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0]))
         self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
-        # self.add(pr.ControlMap(ImageToClosedOneBall(), [1], [1]))
         self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1]))
-        """
-        self.add(pr.SequenceWrapper({0: {'input_1': [H, W, 3]}},
-                                    {1: {'masks': [H, W, 4]}}))
-        """
         self.add(pr.SequenceWrapper({0: inputs_to_shape},
                                     {1: labels_to_shape}))
 
@@ -63,45 +59,84 @@ def __init__(self, output_shape):
         super(RGBMaskToImagePoints2D, self).__init__()
         self.add(GetNonZeroArguments())
         self.add(ArgumentsToImagePoints2D())
-        # self.add(NormalizePoints2D(output_shape))
 
 
 class SolveChangingObjectPnP(SequentialProcessor):
     def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
         super(SolveChangingObjectPnP, self).__init__()
-        self.MINIMUM_REQUIRED_POINTS = 4
+        self.MIN_REQUIRED_POINTS = 4
         self.add(SolveChangingObjectPnPRANSAC(
             camera_intrinsics, inlier_thresh, num_iterations))
 
 
-class Pix2Pose(pr.Processor):
-    def __init__(self, model, object_sizes, epsilon=0.15, with_resize=True):
+class Pix2Points(pr.Processor):
+    def __init__(self, model, object_sizes, epsilon=0.15, resize=True):
         self.object_sizes = object_sizes
         self.predict_RGBMask = PredictRGBMask(model, epsilon)
         self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
         self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
-        self.wrap = pr.WrapOutput(['points3D', 'points2D', 'RGB_mask'])
-        self.with_resize = with_resize
+        self.resize = resize
+        self.wrap = pr.WrapOutput(['points2D', 'points3D', 'RGB_mask'])
 
     def call(self, image):
         RGB_mask = self.predict_RGBMask(image)
-        if self.with_resize:
-            RGB_mask = resize_image(RGB_mask, image.shape[:2][::-1])
+        H, W, num_channels = image.shape
+        if self.resize:
+            RGB_mask = resize_image(RGB_mask, (W, H))
         points3D = self.mask_to_points3D(RGB_mask)
         points2D = self.mask_to_points2D(RGB_mask)
-        points2D = normalize_points2D(points2D, *image.shape[:2][::-1])
-        return self.wrap(points3D, points2D, RGB_mask)
+        points2D = normalize_points2D(points2D, H, W)
+        return self.wrap(points2D, points3D, RGB_mask)
+
+
+class Pix2Pose(pr.Processor):
+    def __init__(self, model, object_sizes, camera,
+                 epsilon=0.15, class_name=None, draw=True):
+        self.pix2points = Pix2Points(model, object_sizes, epsilon, True)
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.class_name = str(class_name) if class_name is None else class_name
+        self.object_sizes = object_sizes
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.camera = camera
+        self.draw = draw
+
+    def call(self, image, box2D=None):
+        results = self.pix2points(image)
+        points2D, points3D = results['points2D'], results['points3D']
+        H, W, num_channels = image.shape
+        points2D = denormalize_points2D(points2D, H, W)
+        if box2D is not None:
+            points2D = self.change_coordinates(points2D, box2D)
+            self.class_name = box2D.class_name
+
+        min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS
+        if min_num_points:
+            pose_results = self.predict_pose(points3D, points2D)
+            success, rotation, translation = pose_results
+        if success and min_num_points:
+            quaternion = rotation_vector_to_quaternion(rotation)
+            pose6D = Pose6D(quaternion, translation, self.class_name)
+        else:
+            pose6D = None
+        # change_coordinates puts points2D outside image.
+        if (self.draw and (box2D is None)):
+            topic = 'image_crop' if box2D is not None else 'image'
+            image = draw_mask(image, points2D, points3D, self.object_sizes)
+            image = draw_pose6D(image, pose6D, self.cube_points3D,
+                                self.camera.intrinsics)
+            results[topic] = image
+        results['points2D'], results['pose6D'] = points2D, pose6D
+        return results
 
 
 class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
+    def __init__(self, detect, estimate_pose, offsets, draw=True):
         """Pose estimation pipeline using keypoints.
         """
         super(EstimatePoseMasks, self).__init__()
         self.detect = detect
-        self.estimate_keypoints = estimate_keypoints
-        self.camera = camera
-        self.draw = draw
+        self.estimate_pose = estimate_pose
         self.postprocess_boxes = SequentialProcessor(
             [pr.UnpackDictionary(['boxes2D']),
              pr.FilterClassBoxes2D(['035_power_drill']),
@@ -109,13 +144,12 @@ def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
              pr.OffsetBoxes2D(offsets)])
         self.clip = pr.ClipBoxes2D()
         self.crop = pr.CropBoxes2D()
-        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.unwrap = UnwrapDictionary(['points2D', 'points3D'])
         self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D'])
         self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.object_sizes = self.estimate_keypoints.object_sizes
+        self.object_sizes = self.estimate_pose.object_sizes
         self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        self.draw = draw
 
     def call(self, image):
         boxes2D = self.postprocess_boxes(self.detect(image))
@@ -123,23 +157,12 @@ def call(self, image):
         cropped_images = self.crop(image, boxes2D)
         poses6D, points = [], []
         for crop, box2D in zip(cropped_images, boxes2D):
-            points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
-            points2D = denormalize_points2D(points2D, *crop.shape[0:2])
-            points2D = self.change_coordinates(points2D, box2D)
-            if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
-                continue
-            success, rotation, translation = self.predict_pose(
-                points3D, points2D)
-            if success is False:
-                continue
-            print('ROTATION', rotation.shape)
-            quaternion = rotation_vector_to_quaternion(rotation)
-            print('QUATERNION', quaternion.shape)
-            pose6D = Pose6D(quaternion, translation, box2D.class_name)
+            results = self.estimate_pose(crop, box2D)
+            pose6D, points2D, points3D = self.unwrap(results)
             poses6D.append(pose6D), points.append([points2D, points3D])
         if self.draw:
             image = self.draw_boxes2D(image, boxes2D)
             image = draw_masks(image, points, self.object_sizes)
             image = draw_poses6D(image, poses6D, self.cube_points3D,
-                                 self.camera.intrinsics)
+                                 self.estimate_pose.camera.intrinsics)
         return self.wrap(image, boxes2D, poses6D)
diff --git a/examples/pix2pose/pipelines2.py b/examples/pix2pose/pipelines2.py
deleted file mode 100644
index 06357bc17..000000000
--- a/examples/pix2pose/pipelines2.py
+++ /dev/null
@@ -1,294 +0,0 @@
-from paz.abstract import SequentialProcessor, Processor
-from paz.pipelines import RandomizeRenderedImage as RandomizeRender
-from paz.abstract.messages import Pose6D
-from paz import processors as pr
-from processors import (
-    GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
-    ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC,
-    ReplaceLowerThanThreshold)
-from backend import build_cube_points3D
-from processors import UnwrapDictionary
-from processors import NormalizePoints2D
-from backend import denormalize_points2D
-from backend import draw_poses6D
-from backend import draw_masks
-from backend import draw_mask
-from backend import draw_pose6D
-from backend import normalize_points2D
-from paz.backend.quaternion import rotation_vector_to_quaternion
-from paz.backend.image import resize_image, show_image
-
-
-class DomainRandomization(SequentialProcessor):
-    """Performs domain randomization on a rendered image
-    """
-    def __init__(self, renderer, image_shape, image_paths, inputs_to_shape,
-                 labels_to_shape, num_occlusions=1):
-        super(DomainRandomization, self).__init__()
-        H, W = image_shape[:2]
-        self.add(pr.Render(renderer))
-        self.add(pr.ControlMap(RandomizeRender(image_paths), [0, 1], [0]))
-        self.add(pr.ControlMap(pr.NormalizeImage(), [0], [0]))
-        self.add(pr.ControlMap(pr.NormalizeImage(), [1], [1]))
-        self.add(pr.SequenceWrapper({0: inputs_to_shape},
-                                    {1: labels_to_shape}))
-
-
-class PredictRGBMask(SequentialProcessor):
-    def __init__(self, model, epsilon=0.15):
-        super(PredictRGBMask, self).__init__()
-        self.add(pr.ResizeImage(model.input_shape[1:3]))
-        self.add(pr.NormalizeImage())
-        self.add(pr.ExpandDims(0))
-        self.add(pr.Predict(model))
-        self.add(pr.Squeeze(0))
-        self.add(ReplaceLowerThanThreshold(epsilon))
-        self.add(pr.DenormalizeImage())
-        self.add(pr.CastImage('uint8'))
-
-
-class RGBMaskToObjectPoints3D(SequentialProcessor):
-    def __init__(self, object_sizes):
-        super(RGBMaskToObjectPoints3D, self).__init__()
-        self.add(GetNonZeroValues())
-        self.add(ImageToNormalizedDeviceCoordinates())
-        self.add(Scale(object_sizes / 2.0))
-
-
-class RGBMaskToImagePoints2D(SequentialProcessor):
-    def __init__(self, output_shape):
-        super(RGBMaskToImagePoints2D, self).__init__()
-        self.add(GetNonZeroArguments())
-        self.add(ArgumentsToImagePoints2D())
-        # self.add(NormalizePoints2D(output_shape))
-
-
-class SolveChangingObjectPnP(SequentialProcessor):
-    def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
-        super(SolveChangingObjectPnP, self).__init__()
-        self.MINIMUM_REQUIRED_POINTS = 4
-        self.add(SolveChangingObjectPnPRANSAC(
-            camera_intrinsics, inlier_thresh, num_iterations))
-
-
-class Pix2Points(pr.Processor):
-    def __init__(self, model, object_sizes, epsilon=0.15,
-                 resize=True, draw=True):
-        self.object_sizes = object_sizes
-        self.predict_RGBMask = PredictRGBMask(model, epsilon)
-        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
-        self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
-        self.resize, self.draw = resize, draw
-        self.wrap = pr.WrapOutput(
-            ['image', 'points2D', 'points3D', 'RGB_mask'])
-
-    def call(self, image):
-        RGB_mask = self.predict_RGBMask(image)
-        H, W, num_channels = image.shape
-        if self.resize:
-            RGB_mask = resize_image(RGB_mask, (W, H))
-        points3D = self.mask_to_points3D(RGB_mask)
-        points2D = self.mask_to_points2D(RGB_mask)
-        points2D = normalize_points2D(points2D, W, H)
-        if self.draw:
-            image = draw_mask(image, points2D, points3D, self.object_sizes)
-        return self.wrap(image, points2D, points3D, RGB_mask)
-
-
-class Pix2Pose(pr.Processor):
-    def __init__(self, model, object_sizes, camera, epsilon=0.15,
-                 class_name=None, resize=True, draw=True):
-        self.pix2points = Pix2Points(
-            model, object_sizes, epsilon, resize, False)
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.MIN_REQUIRED_POINTS = self.predict_pose.MINIMUM_REQUIRED_POINTS
-        self.class_name = str(class_name) if class_name is None else class_name
-        self.object_sizes = object_sizes
-        self.cube_points3D = build_cube_points3D(*self.object_sizes)
-        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
-        self.camera = camera
-        self.draw = draw
-
-    def call(self, image, box2D=None):
-        results = self.pix2points(image)
-        points2D, points3D = results['points2D'], results['points3D']
-        H, W, num_channels = image.shape
-        points2D = denormalize_points2D(points2D, H, W)
-        if box2D is not None:
-            points2D = self.change_coordinates(points2D, box2D)
-
-        valid_num_points = len(points3D) > self.MIN_REQUIRED_POINTS
-        if valid_num_points:
-            success, rotation, translation = self.predict_pose(points3D,
-                                                               points2D)
-        if success and valid_num_points:
-            quaternion = rotation_vector_to_quaternion(rotation)
-            pose6D = Pose6D(quaternion, translation, self.class_name)
-        else:
-            pose6D = None
-
-        if self.draw:
-            image = draw_mask(image, points2D, points3D, self.object_sizes)
-            image = draw_pose6D(image, pose6D, self.cube_points3D,
-                                self.camera.intrinsics)
-        results['pose6D'], results['image'] = pose6D, image
-        return results
-
-
-class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
-        """Pose estimation pipeline using keypoints.
-        """
-        super(EstimatePoseMasks, self).__init__()
-        self.detect = detect
-        self.estimate_keypoints = estimate_keypoints
-        self.camera = camera
-        self.draw = draw
-        self.postprocess_boxes = SequentialProcessor(
-            [pr.UnpackDictionary(['boxes2D']),
-             pr.FilterClassBoxes2D(['035_power_drill']),
-             # pr.FilterClassBoxes2D(['solar_panel']),
-             pr.SquareBoxes2D(),
-             pr.OffsetBoxes2D(offsets)])
-        self.clip = pr.ClipBoxes2D()
-        self.crop = pr.CropBoxes2D()
-        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.unwrap = UnwrapDictionary(['points2D', 'points3D'])
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
-        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        # self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel'])
-        self.object_sizes = self.estimate_keypoints.object_sizes
-        self.cube_points3D = build_cube_points3D(*self.object_sizes)
-        # affine_matrix = build_rotation_matrix_z(3.14156 / 6)
-        # self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T
-        # 25000,
-        # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0])
-
-    def call(self, image):
-        from paz.abstract.messages import Box2D
-        detections = self.detect(image)
-        # detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]}
-        boxes2D = self.postprocess_boxes(detections)
-        # boxes2D = self.postprocess_boxes(self.detect(image))
-        boxes2D = self.clip(image, boxes2D)
-        cropped_images = self.crop(image, boxes2D)
-        poses6D, points = [], []
-        for crop, box2D in zip(cropped_images, boxes2D):
-            points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
-            points2D = denormalize_points2D(points2D, *crop.shape[0:2])
-            points2D = self.change_coordinates(points2D, box2D)
-            if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
-                continue
-            success, rotation, translation = self.predict_pose(
-                points3D, points2D)
-            if success is False:
-                continue
-            quaternion = rotation_vector_to_quaternion(rotation)
-            pose6D = Pose6D(quaternion, translation, box2D.class_name)
-            poses6D.append(pose6D), points.append([points2D, points3D])
-        if self.draw:
-            image = self.draw_boxes2D(image, boxes2D)
-            image = draw_masks(image, points, self.object_sizes)
-            image = draw_poses6D(image, poses6D, self.cube_points3D,
-                                 self.camera.intrinsics)
-        return self.wrap(image, boxes2D, poses6D)
-
-
-
-class Pix2Pose2(pr.Processor):
-    def __init__(self, model, object_sizes, epsilon=0.15,
-                 class_name=None, with_resize=True, draw=True):
-        self.object_sizes = object_sizes
-        self.predict_RGBMask = PredictRGBMask(model, epsilon)
-        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
-        self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.wrap = pr.WrapOutput(['image', 'points3D', 'points2D', 'RGB_mask'])
-        self.with_resize = with_resize
-        self.class_name = str(class_name) if class_name is None else class_name
-        self.draw = draw
-
-    def call(self, image):
-        RGB_mask = self.predict_RGBMask(image)
-        H, W, num_channels = image.shape
-        if self.with_resize:
-            RGB_mask = resize_image(RGB_mask, (W, H))
-        points3D = self.mask_to_points3D(RGB_mask)
-        points2D = self.mask_to_points2D(RGB_mask)
-        points2D = normalize_points2D(points2D, (W, H))
-        if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
-            pose6D = None
-        success, rotation, translation = self.predict_pose(points3D, points2D)
-        if success is False:
-            pose6D = None
-        quaternion = rotation_vector_to_quaternion(rotation)
-        pose6D = Pose6D(quaternion, translation, self.class_name)
-        if self.draw:
-            image = draw_mask(image, points2D, points3D, self.object_sizes)
-            image = draw_pose6D(image, pose6D, self.cube_points3D, self.camera.intrinsics)
-        return self.wrap(image, points3D, points2D, RGB_mask)
-
-
-class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_keypoints, camera, offsets, draw=True):
-        """Pose estimation pipeline using keypoints.
-        """
-        super(EstimatePoseMasks, self).__init__()
-        self.detect = detect
-        self.estimate_keypoints = estimate_keypoints
-        self.camera = camera
-        self.draw = draw
-        self.postprocess_boxes = SequentialProcessor(
-            [pr.UnpackDictionary(['boxes2D']),
-             # pr.FilterClassBoxes2D(['035_power_drill']),
-             pr.FilterClassBoxes2D(['solar_panel']),
-             pr.SquareBoxes2D(),
-             pr.OffsetBoxes2D(offsets)])
-        self.clip = pr.ClipBoxes2D()
-        self.crop = pr.CropBoxes2D()
-        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.unwrap = UnwrapDictionary(['points2D', 'points3D'])
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
-        # self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.draw_boxes2D = pr.DrawBoxes2D(['solar_panel'])
-        self.object_sizes = self.estimate_keypoints.object_sizes
-        from backend import build_rotation_matrix_z
-        import numpy as np
-        self.cube_points3D = build_cube_points3D(*self.object_sizes)
-        affine_matrix = build_rotation_matrix_z(3.14156 / 6)
-        self.cube_points3D = np.matmul(affine_matrix, self.cube_points3D.T).T
-        # 25000,
-        # self.cube_points3D = self.cube_points3D + np.array([5000, 5000, 0])
-
-    def call(self, image):
-        from paz.abstract.messages import Box2D
-        detections = self.detect(image)
-        detections = {'boxes2D': [Box2D([320, 280, 1300, 1060], 1.0, 'solar_panel')]}
-        boxes2D = self.postprocess_boxes(detections)
-        # boxes2D = self.postprocess_boxes(self.detect(image))
-        boxes2D = self.clip(image, boxes2D)
-        cropped_images = self.crop(image, boxes2D)
-        poses6D, points = [], []
-        for crop, box2D in zip(cropped_images, boxes2D):
-            points2D, points3D = self.unwrap(self.estimate_keypoints(crop))
-            points2D = denormalize_points2D(points2D, *crop.shape[0:2])
-            points2D = self.change_coordinates(points2D, box2D)
-            if len(points3D) < self.predict_pose.MINIMUM_REQUIRED_POINTS:
-                continue
-            success, rotation, translation = self.predict_pose(
-                points3D, points2D)
-            if success is False:
-                continue
-            print('ROTATION', rotation.shape)
-            quaternion = rotation_vector_to_quaternion(rotation)
-            print('QUATERNION', quaternion.shape)
-            pose6D = Pose6D(quaternion, translation, box2D.class_name)
-            poses6D.append(pose6D), points.append([points2D, points3D])
-        if self.draw:
-            image = self.draw_boxes2D(image, boxes2D)
-            image = draw_masks(image, points, self.object_sizes)
-            image = draw_poses6D(image, poses6D, self.cube_points3D,
-                                 self.camera.intrinsics)
-        return self.wrap(image, boxes2D, poses6D)
diff --git a/examples/pix2pose/pipelines3.py b/examples/pix2pose/pipelines3.py
deleted file mode 100644
index 8c70fca96..000000000
--- a/examples/pix2pose/pipelines3.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from paz.abstract import SequentialProcessor, Processor
-from paz.pipelines import RandomizeRenderedImage as RandomizeRender
-from paz.abstract.messages import Pose6D
-from paz import processors as pr
-from processors import (
-    GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
-    ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC,
-    ReplaceLowerThanThreshold)
-from backend import build_cube_points3D
-from processors import UnwrapDictionary
-from processors import NormalizePoints2D
-from backend import denormalize_points2D
-from backend import draw_poses6D
-from backend import draw_masks
-from backend import draw_mask
-from backend import normalize_points2D
-from backend import draw_pose6D
-from paz.backend.quaternion import rotation_vector_to_quaternion
-from paz.backend.image import resize_image, show_image
-from pipelines import SolveChangingObjectPnP
-from pipelines import RGBMaskToImagePoints2D, RGBMaskToObjectPoints3D, PredictRGBMask
-
-
-class Pix2Points(pr.Processor):
-    def __init__(self, model, object_sizes, epsilon=0.15, resize=True):
-        self.object_sizes = object_sizes
-        self.predict_RGBMask = PredictRGBMask(model, epsilon)
-        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
-        self.mask_to_points2D = RGBMaskToImagePoints2D(model.output_shape[1:3])
-        self.resize = resize
-        self.wrap = pr.WrapOutput(['points2D', 'points3D', 'RGB_mask'])
-
-    def call(self, image):
-        RGB_mask = self.predict_RGBMask(image)
-        H, W, num_channels = image.shape
-        if self.resize:
-            RGB_mask = resize_image(RGB_mask, (W, H))
-        points3D = self.mask_to_points3D(RGB_mask)
-        points2D = self.mask_to_points2D(RGB_mask)
-        points2D = normalize_points2D(points2D, H, W)
-        return self.wrap(points2D, points3D, RGB_mask)
-
-
-class Pix2Pose(pr.Processor):
-    def __init__(self, model, object_sizes, camera,
-                 epsilon=0.15, class_name=None, draw=True):
-
-        self.pix2points = Pix2Points(model, object_sizes, epsilon, True)
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.class_name = str(class_name) if class_name is None else class_name
-        self.object_sizes = object_sizes
-        self.cube_points3D = build_cube_points3D(*self.object_sizes)
-        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
-        self.camera = camera
-        self.draw = draw
-
-    def call(self, image, box2D=None):
-        results = self.pix2points(image)
-        points2D, points3D = results['points2D'], results['points3D']
-        H, W, num_channels = image.shape
-        points2D = denormalize_points2D(points2D, H, W)
-        if box2D is not None:
-            points2D = self.change_coordinates(points2D, box2D)
-            self.class_name = box2D.class_name
-
-        min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS
-        if min_num_points:
-            pose_results = self.predict_pose(points3D, points2D)
-            success, rotation, translation = pose_results
-        if success and min_num_points:
-            quaternion = rotation_vector_to_quaternion(rotation)
-            pose6D = Pose6D(quaternion, translation, self.class_name)
-        else:
-            pose6D = None
-        # change_coordinates puts points2D outside image.
-        if (self.draw and (box2D is None)):
-            topic = 'image_crop' if box2D is not None else 'image'
-            image = draw_mask(image, points2D, points3D, self.object_sizes)
-            image = draw_pose6D(image, pose6D, self.cube_points3D,
-                                self.camera.intrinsics)
-            results[topic] = image
-        results['points2D'], results['pose6D'] = points2D, pose6D
-        return results
-
-
-class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_pose, offsets, draw=True):
-        """Pose estimation pipeline using keypoints.
-        """
-        super(EstimatePoseMasks, self).__init__()
-        self.detect = detect
-        self.estimate_pose = estimate_pose
-        self.postprocess_boxes = SequentialProcessor(
-            [pr.UnpackDictionary(['boxes2D']),
-             pr.FilterClassBoxes2D(['035_power_drill']),
-             pr.SquareBoxes2D(),
-             pr.OffsetBoxes2D(offsets)])
-        self.clip = pr.ClipBoxes2D()
-        self.crop = pr.CropBoxes2D()
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
-        self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D'])
-        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.object_sizes = self.estimate_pose.object_sizes
-        self.cube_points3D = build_cube_points3D(*self.object_sizes)
-        self.draw = draw
-
-    def call(self, image):
-        boxes2D = self.postprocess_boxes(self.detect(image))
-        boxes2D = self.clip(image, boxes2D)
-        cropped_images = self.crop(image, boxes2D)
-        poses6D, points = [], []
-        for crop, box2D in zip(cropped_images, boxes2D):
-            results = self.estimate_pose(crop, box2D)
-            pose6D, points2D, points3D = self.unwrap(results)
-            poses6D.append(pose6D), points.append([points2D, points3D])
-        if self.draw:
-            image = self.draw_boxes2D(image, boxes2D)
-            image = draw_masks(image, points, self.object_sizes)
-            image = draw_poses6D(image, poses6D, self.cube_points3D,
-                                 self.estimate_pose.camera.intrinsics)
-        return self.wrap(image, boxes2D, poses6D)

From 0555b9436b35e4eee5602e83d32690c6629bdd53 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 2 Dec 2021 18:10:25 +0100
Subject: [PATCH 075/101] Add MultiPoseEstimation pipeline

---
 examples/pix2pose/demo_image.py |  59 ++++++++++++++++++
 examples/pix2pose/pipelines.py  | 104 +++++++++++++++++++++++++++++++-
 2 files changed, 161 insertions(+), 2 deletions(-)
 create mode 100644 examples/pix2pose/demo_image.py

diff --git a/examples/pix2pose/demo_image.py b/examples/pix2pose/demo_image.py
new file mode 100644
index 000000000..b6ec18aed
--- /dev/null
+++ b/examples/pix2pose/demo_image.py
@@ -0,0 +1,59 @@
+import numpy as np
+from paz.models import UNET_VGG16
+from paz.backend.image import show_image, load_image
+from paz.backend.camera import Camera
+from paz.pipelines import DetectSingleShot
+from paz.models import SSD300
+
+from pipelines import MultiPix2Pose
+
+
+image_path = 'images/lab_condition.png'
+epsilon = 0.001
+score_thresh = 0.50
+offsets = [0.2, 0.2]
+nms_thresh = 0.45
+
+image_shape = (128, 128, 3)
+num_classes = 3
+camera = Camera(device_id=0)
+image = load_image(image_path)
+image_size = image.shape[0:2]
+focal_length = image_size[1]
+image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
+camera.distortion = np.zeros((4))
+camera.intrinsics = np.array([[focal_length, 0, image_center[0]],
+                              [0, focal_length, image_center[1]],
+                              [0, 0, 1]])
+
+class_names = ['background', 'Large_clamp', 'flat_screwdriver',
+               'hammer', 'Solar_panel', 'power_drill']
+detection = SSD300(len(class_names), head_weights=None)
+detection.load_weights('weights/SSD300_weights_.53-1.40.hdf5')
+detect = DetectSingleShot(detection, class_names, score_thresh,
+                          nms_thresh, draw=False)
+
+name_to_sizes = {
+    'power_drill': np.array([1840, 1870, 520]),
+    'Solar_panel': np.array([15000, 15000, 2000]),
+    'Large_clamp': np.array([12000, 17100, 3900]),
+    'hammer': np.array([18210, 33272, 3280])}
+
+
+name_to_weights = {
+    'power_drill': 'weights/UNET_weights_epochs-10_beta-3.hdf5',
+    'Solar_panel': 'weights/UNET-VGG_solar_panel_canonical_13.hdf5',
+    'Large_clamp': 'weights/UNET-VGG_large_clamp_canonical_10.hdf5',
+    'hammer': 'weights/UNET-VGG16_weights_hammer_10.hdf5'}
+
+
+segment = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+valid_class_names = ['power_drill', 'Solar_panel', 'Large_clamp', 'hammer']
+
+pipeline = MultiPix2Pose(detect, segment, camera, name_to_weights,
+                         name_to_sizes, valid_class_names, offsets,
+                         epsilon, draw=True)
+
+results = pipeline(image)
+predicted_image = results['image']
+show_image(predicted_image)
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 7383e3571..7ebe0d7a3 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -14,8 +14,11 @@
 from backend import draw_masks
 from backend import draw_mask
 from backend import normalize_points2D
+from backend import points3D_to_RGB
+from backend import draw_points2D
 from paz.backend.quaternion import rotation_vector_to_quaternion
 from paz.backend.image import resize_image
+import numpy as np
 
 
 class DomainRandomization(SequentialProcessor):
@@ -71,6 +74,7 @@ def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
 
 class Pix2Points(pr.Processor):
     def __init__(self, model, object_sizes, epsilon=0.15, resize=True):
+        self.model = model
         self.object_sizes = object_sizes
         self.predict_RGBMask = PredictRGBMask(model, epsilon)
         self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
@@ -92,6 +96,7 @@ def call(self, image):
 class Pix2Pose(pr.Processor):
     def __init__(self, model, object_sizes, camera,
                  epsilon=0.15, class_name=None, draw=True):
+        self.model = model
         self.pix2points = Pix2Points(model, object_sizes, epsilon, True)
         self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
         self.class_name = str(class_name) if class_name is None else class_name
@@ -111,6 +116,7 @@ def call(self, image, box2D=None):
             self.class_name = box2D.class_name
 
         min_num_points = len(points3D) > self.predict_pose.MIN_REQUIRED_POINTS
+        success = False
         if min_num_points:
             pose_results = self.predict_pose(points3D, points2D)
             success, rotation, translation = pose_results
@@ -130,8 +136,102 @@ def call(self, image, box2D=None):
         return results
 
 
+class MultiPix2Pose(Processor):
+    def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes,
+                 valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True):
+        self.detect = detect
+        self.name_to_weights = name_to_weights
+        self.name_to_sizes = name_to_sizes
+        self.valid_class_names = valid_class_names
+        self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon)
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.camera = camera
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             pr.FilterClassBoxes2D(valid_class_names),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        self.draw = draw
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        self.name_to_cube_points3D = {}
+        self.mask_to_points2D = RGBMaskToImagePoints2D(
+            segment.output_shape[1:3])
+        for name in self.name_to_sizes:
+            W, H, D = self.name_to_sizes[name]
+            cube_points3D = build_cube_points3D(W, H, D)
+            self.name_to_cube_points3D[name] = cube_points3D
+
+        self.predict_RGBMask = PredictRGBMask(segment, epsilon)
+
+    def call(self, image):
+        boxes2D = self.postprocess_boxes(self.detect(image))
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, points2D, points3D = [], [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
+            class_name = box2D.class_name
+            name_to_weights = self.name_to_weights[class_name]
+            self.pix2points.model.load_weights(name_to_weights)
+            object_sizes = self.name_to_sizes[class_name]
+            # self.pix2points.object_sizes = object_sizes
+            # points = self.pix2points(crop)
+
+            RGB_mask = self.predict_RGBMask(crop)
+            H, W, num_channels = crop.shape
+            RGB_mask = resize_image(RGB_mask, (W, H))
+
+            self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes)
+            class_points3D = self.mask_to_points3D(RGB_mask)
+            class_points2D = self.mask_to_points2D(RGB_mask)
+            class_points2D = normalize_points2D(class_points2D, H, W)
+
+            # from paz.backend.image import show_image
+            # show_image((points['RGB_mask'] * 255).astype('uint8'))
+            # class_points2D = points['points2D']
+            # class_points3D = points['points3D']
+            H, W, num_channels = crop.shape
+            class_points2D = denormalize_points2D(class_points2D, H, W)
+            class_points2D = self.change_coordinates(class_points2D, box2D)
+            print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS)
+            print(len(class_points3D), len(class_points2D))
+            if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS:
+                pose_results = self.predict_pose(class_points3D, class_points2D)
+                success, rotation, translation = pose_results
+                print('solver success', success)
+                # success = True
+            else:
+                success = False
+            if success:
+                quaternion = rotation_vector_to_quaternion(rotation)
+                pose6D = Pose6D(quaternion, translation, class_name)
+            else:
+                pose6D = None
+            print(success)
+            points2D.append(class_points2D)
+            points3D.append(class_points3D)
+            poses6D.append(pose6D)
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D):
+                class_name = pose6D.class_name
+                object_sizes = self.name_to_sizes[class_name]
+                colors = points3D_to_RGB(class_points3D, object_sizes)
+                image = draw_points2D(image, class_points2D, colors)
+
+            for pose6D in poses6D:
+                class_name = pose6D.class_name
+                cube_points3D = self.name_to_cube_points3D[class_name]
+                image = draw_pose6D(image, pose6D, cube_points3D,
+                                    self.camera.intrinsics)
+        return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D}
+
+
 class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_pose, offsets, draw=True):
+    def __init__(self, detect, estimate_pose, offsets, draw=True, valid_class_names=['035_power_drill']):
         """Pose estimation pipeline using keypoints.
         """
         super(EstimatePoseMasks, self).__init__()
@@ -139,7 +239,7 @@ def __init__(self, detect, estimate_pose, offsets, draw=True):
         self.estimate_pose = estimate_pose
         self.postprocess_boxes = SequentialProcessor(
             [pr.UnpackDictionary(['boxes2D']),
-             pr.FilterClassBoxes2D(['035_power_drill']),
+             pr.FilterClassBoxes2D(valid_class_names),
              pr.SquareBoxes2D(),
              pr.OffsetBoxes2D(offsets)])
         self.clip = pr.ClipBoxes2D()

From 424430b132b8b546212c830ee32c3f80aa355de1 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 2 Dec 2021 18:10:40 +0100
Subject: [PATCH 076/101] Add multi samples in demo

---
 examples/pix2pose/demo.py | 60 +++++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index 5ab9e6416..c972f7165 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -15,7 +15,9 @@
 num_classes = 3
 
 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
-model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5')
+# model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5')
+# model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5')
+# model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5')
 
 # approximating intrinsic camera parameters
 camera = Camera(device_id=0)
@@ -24,7 +26,7 @@
 # camera.stop()
 
 # image = load_image('test_image2.jpg')
-image = load_image('images/test_image.jpg')
+image = load_image('images/lab_condition.png')
 image_size = image.shape[0:2]
 focal_length = image_size[1]
 image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
@@ -40,14 +42,54 @@
 # estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True)
 # pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets)
 
-object_sizes = np.array([1840, 1870, 520])
+
+object_sizes = np.array([1840, 1870, 520])  # power drill
+object_sizes = np.array([15000, 15000, 2000])  # solar panel
+object_sizes = np.array([15000, 15000, 2000])  # solar panel
 estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True)
-# image = image[50:320, 60:320]
-# show_image(estimate_pose(image)['image'])
-pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True)
-results = pipeline(image)
-predicted_image = results['image']
-show_image(predicted_image)
+# image = image[768:1324, 622:784]
+# image = image[622:784, 768:1324]
+
+
+# image_hammer = image[460:1030, 740:1340]
+# model.load_weights('weights/UNET-VGG16_weights_hammer_10.hdf5')
+# show_image(estimate_pose(image_hammer)['image'])
+
+# show_image(image)
+image_clamp = image[670:1000, 1000:1400]
+# image_hammer = image[460:1030, 740:1340]
+model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5')
+show_image(estimate_pose(image_clamp)['image'])
+
+"""
+image = load_image('images/zed_left_1011.png')
+image = image[250:800, 250:850, :]
+H, W, num_channels = image.shape
+show_image(estimate_pose(image)['image'])
+
+image = load_image('images/MicrosoftTeams-image.png')
+show_image(estimate_pose(image)['image'])
+
+image = load_image('images/zed_left_705.png')
+image = image[250:1080, 250:1400, :]
+show_image(estimate_pose(image)['image'])
+
+
+image = load_image('images/zed_left_792.png')
+image = image[30:1400, 280:1060, :]
+show_image(estimate_pose(image)['image'])
+"""
+
+# image = load_image('images/large_clamp.jpeg')
+# show_image(image[1])
+# results = estimate_pose(image)
+# show_image(results['image'])
+
+
+# pipeline = EstimatePoseMasks(detect, estimate_pose, offsets, True)
+# results = pipeline(image)
+# predicted_image = results['image']
+# show_image(predicted_image)
 
 # image_size = (640, 480)
 # player = VideoPlayer(image_size, pipeline, camera)

From be139c270d83f979a755bfdd746167dc07929969 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 2 Dec 2021 18:12:05 +0100
Subject: [PATCH 077/101] Added parameters for multiple objects in the scene

---
 examples/pix2pose/scenes.py | 61 +++++++++++++++++++++++++++++++++----
 1 file changed, 55 insertions(+), 6 deletions(-)

diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
index 1cbb9b9a7..79d8da90c 100644
--- a/examples/pix2pose/scenes.py
+++ b/examples/pix2pose/scenes.py
@@ -168,8 +168,9 @@ def render_symmetries(self):
     y_fov = 3.14159 / 4.0
     light = [1.0, 30]
 
+    # model = UNET_VGG16(3, image_shape, freeze_backbone=True)
+
     # solar panel parameters
-    """
     OBJ_name = 'single_solar_panel_02.obj'
     path_OBJ = os.path.join(root_path, OBJ_name)
     angles = np.linspace(0, 2 * np.pi, 7)[:6]
@@ -179,8 +180,14 @@ def render_symmetries(self):
     camera_pose = to_affine_matrix(camera_rotation, translation)
     min_corner = [0.0, 0.0, -0.4]
     max_corner = [0.0, 0.0, +0.0]
-    """
+    # model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5')
+    renderer = CanonicalScene(path_OBJ, camera_pose, min_corner,
+                              max_corner, symmetries)
+    renderer.scene.ambient_light = [1.0, 1.0, 1.0]
+    image = renderer.render_symmetries()
+    show_image(image)
 
+    """
     # large clamp parameters
     # REMEMBER TO CHANGE THE Ns coefficient to values between [0, 1] in
     # textured.mtl. For example change 96.07 to .967
@@ -192,6 +199,7 @@ def render_symmetries(self):
     camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3])
     min_corner = [-0.05, -0.02, -0.05]
     max_corner = [+0.05, +0.02, +0.01]
+    # model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5')
 
     angles = [0.0, np.pi]
     symmetries = np.array([build_rotation_matrix_y(angle) for angle in angles])
@@ -200,13 +208,52 @@ def render_symmetries(self):
     renderer.scene.ambient_light = [1.0, 1.0, 1.0]
     image = renderer.render_symmetries()
     show_image(image)
+    """
+    """
+    # -------------------------------------------------------------
+    # Training scene for hammer
+    # --------------------------------------------------------------
+    OBJ_name = '.keras/paz/datasets/ycb_models/048_hammer/textured.obj'
+    path_OBJ = os.path.join(root_path, OBJ_name)
+    distance = [0.5, 0.6]
+    top_only = False
+    roll = 3.14159
+    shift = 0.05
+    renderer = PixelMaskRenderer(
+        path_OBJ, viewport_size, y_fov, distance, light, top_only, roll, shift)
+    for arg in range(100):
+        image, alpha, RGBA_mask = renderer.render()
+        image = np.concatenate([image, RGBA_mask[..., 0:3]], axis=1)
+        show_image(image)
+    """
+    """
+    translation = np.array([0.0, 0.0, 0.50])
+    camera_pose, y = compute_modelview_matrices(translation, np.zeros((3)))
+    align_z = build_rotation_matrix_z(np.pi / 8)
+    camera_pose[:3, :3] = np.matmul(align_z, camera_pose[:3, :3])
+    min_corner = [-0.05, -0.02, -0.05]
+    max_corner = [+0.05, +0.02, +0.01]
+
+    symmetries, angles = [], [0.0, np.pi]
+    for angle in angles:
+        symmetry = build_rotation_matrix_y(angle)
+        symmetries.append(symmetry)
+    symmetries = np.array(symmetries)
+
+    renderer = CanonicalScene(path_OBJ, camera_pose, min_corner,
+                              max_corner, symmetries)
+    renderer.scene.ambient_light = [1.0, 1.0, 1.0]
+    image = renderer.render_symmetries()
+    show_image(image)
+    """
+
+
+    """
+    show_image(image)
     for arg in range(0):
         image, alpha, RGB_mask = renderer.render()
         show_image(RGB_mask[:, :, 0:3])
 
-    model = UNET_VGG16(3, image_shape, freeze_backbone=True)
-    model.load_weights('UNET-VGG_large_clamp_canonical_10.hdf5')
-
     background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
     background_wildcard = os.path.join(root_path, background_wildcard)
     image_paths = glob.glob(background_wildcard)
@@ -231,5 +278,7 @@ def render_symmetries(self):
         # error = RGB_mask_pred - RGB_mask
         RGB_mask_pred = RGB_mask_pred.astype('uint8')
         print(image.dtype, RGB_mask_pred.dtype, RGB_mask_true.dtype)
-        images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1)
+        # images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1)
+        images = np.concatenate([image, RGB_mask_pred], axis=1)
         show_image(images)
+    """

From 46492c309c4957d4b2f8ced7abc7136559f2f10c Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 2 Dec 2021 18:12:37 +0100
Subject: [PATCH 078/101] Refactor training script for multiple objects

---
 examples/pix2pose/train.py | 97 ++++++++++++++------------------------
 1 file changed, 35 insertions(+), 62 deletions(-)

diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 42195bafc..12c48a715 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -3,98 +3,71 @@
 from tensorflow.keras.optimizers import Adam
 from paz.abstract import GeneratingSequence
 from paz.models.segmentation import UNET_VGG16
-from paz.backend.image import show_image, resize_image
-import numpy as np
 
 from scenes import PixelMaskRenderer
 from pipelines import DomainRandomization
-from loss import WeightedReconstruction, MSE_with_alpha_channel
-from models.fully_convolutional_net import FullyConvolutionalNet
+from loss import WeightedReconstruction
+from metrics import mean_squared_error as MSE
 
-image_shape = [128, 128, 3]
+# global training parameters
+H, W, num_channels = image_shape = [128, 128, 3]
+beta = 3.0
+batch_size = 32
+num_classes = 3
+learning_rate = 0.001
+max_num_epochs = 10
+steps_per_epoch = 1000
+inputs_to_shape = {'input_1': [H, W, 3]}
+labels_to_shape = {'masks': [H, W, 4]}
+
+# global rendering parameters
 root_path = os.path.expanduser('~')
 background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
 background_wildcard = os.path.join(root_path, background_wildcard)
 image_paths = glob.glob(background_wildcard)
-path_OBJ = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
-path_OBJ = os.path.join(root_path, path_OBJ)
 num_occlusions = 1
 viewport_size = image_shape[:2]
+light = [1.0, 30]
 y_fov = 3.14159 / 4.0
+
+# power drill parameters
+"""
+OBJ_name = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
 distance = [0.3, 0.5]
-light = [1.0, 30]
 top_only = False
 roll = 3.14159
 shift = 0.05
-num_steps = 1000
-batch_size = 32
-beta = 3.0
-alpha = 0.1
-filters = 16
-num_classes = 3
-learning_rate = 0.001
-# steps_per_epoch
-max_num_epochs = 10
-steps_per_epoch = num_steps
+"""
 
+# hammer parameters
+OBJ_name = '.keras/paz/datasets/ycb_models/048_hammer/textured.obj'
+distance = [0.5, 0.6]
+top_only = False
+roll = 3.14159
+shift = 0.05
+
+path_OBJ = os.path.join(root_path, OBJ_name)
 
 renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
                              light, top_only, roll, shift)
 
-processor = DomainRandomization(renderer, image_shape,
-                                image_paths, num_occlusions)
+processor = DomainRandomization(
+    renderer, image_shape, image_paths, inputs_to_shape,
+    labels_to_shape, num_occlusions)
 
-sequence = GeneratingSequence(processor, batch_size, num_steps)
+sequence = GeneratingSequence(processor, batch_size, steps_per_epoch)
 
-beta = 3.0
 weighted_reconstruction = WeightedReconstruction(beta)
 
-# model = FullyConvolutionalNet(num_classes, image_shape, filters, alpha)
 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
-# model.
 optimizer = Adam(learning_rate)
-# model.load_weights('UNET_weights_MSE.hdf5')
-model.compile(
-    optimizer, weighted_reconstruction, metrics=MSE_with_alpha_channel)
+model.compile(optimizer, weighted_reconstruction, metrics=MSE)
+
 model.fit(
     sequence,
-    # steps_per_epoch=args.steps_per_epoch,
     epochs=max_num_epochs,
     # callbacks=[stop, log, save, plateau, draw],
     verbose=1,
     workers=0)
-# batch = sequence.__getitem__(0)
-# for _ in range(100):
-# image, alpha, RGB_mask = renderer.render()
-# show_image(image)
-# show_image(RGB_mask)
 
-
-def normalize(image):
-    return (image * 255.0).astype('uint8')
-
-
-def show_results():
-    # image, alpha, pixel_mask_true = renderer.render()
-    sample = processor()
-    image = sample['inputs']['input_1']
-    pixel_mask_true = sample['labels']['masks']
-    image = np.expand_dims(image, 0)
-    pixel_mask_pred = model.predict(image)
-    pixel_mask_pred = normalize(np.squeeze(pixel_mask_pred, axis=0))
-    image = normalize(np.squeeze(image, axis=0))
-    results = np.concatenate(
-        [image, normalize(pixel_mask_true[..., 0:3]), pixel_mask_pred], axis=1)
-    H, W = results.shape[:2]
-    scale = 6
-    results = resize_image(results, (scale * W, scale * H))
-    show_image(results)
-
-
-"""
-for _ in range(100):
-    sample = processor()
-    inputs, labels = sample['inputs'], sample['labels']
-    show_image((inputs['input_image'] * 255).astype('uint8'))
-    show_image((labels['label_image'] * 255).astype('uint8'))
-"""
+model.save_weights('UNET-VGG16_weights_hammer_10.hdf5')

From efa75f7b783d1990fa9e3136224cdfac732bedff Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 2 Dec 2021 18:13:30 +0100
Subject: [PATCH 079/101] Remove jpegs from repository

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 607a7ce7e..beeee88dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ checkpoint
 *.p
 *.zip
 *.iml
+*.jpeg
 
 !.github/manifest.xml
 

From 0587a9e60cf437103b6eb47bf95ea60096f71569 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 2 Dec 2021 18:13:50 +0100
Subject: [PATCH 080/101] Add rotated image

---
 examples/pix2pose/test_rotated_image.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/test_rotated_image.py
index 9c383edea..6e48a3658 100644
--- a/examples/pix2pose/test_rotated_image.py
+++ b/examples/pix2pose/test_rotated_image.py
@@ -66,6 +66,7 @@
 
 def quick_pose(image):
     image = resize_image(image, (128, 128))
+    # show_image(resize_image(image, (256 * 3, 256 * 3)))
     keypoints = estimate_keypoints(image)
     points2D = keypoints['points2D']
     points3D = keypoints['points3D']
@@ -81,21 +82,31 @@ def quick_pose(image):
     image = image.astype('float')
     image = draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics)
     image = image.astype('uint8')
-    image = resize_image(image, (256, 256))
+    image = resize_image(image, (256 * 3, 256 * 3))
     show_image(image)
 
 
-image = load_image('zed_left_1011.png')
+image = load_image('images/zed_left_1011.png')
 image = image[250:800, 250:850, :]
+H, W, num_channels = image.shape
+# image = resize_image(image, (W * 20, H * 20))
 quick_pose(image)
 
-image = load_image('MicrosoftTeams-image.png')
+image = load_image('images/MicrosoftTeams-image.png')
 quick_pose(image)
 
-image = load_image('zed_left_705.png')
+image = load_image('images/zed_left_705.png')
 image = image[250:1080, 250:1400, :]
 quick_pose(image)
 
+
+image = load_image('images/zed_left_792.png')
+# image = image[280:1060, 320:1060, :]
+image = image[320:1300, 280:1060, :]
+quick_pose(image)
+
+
+
 renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
                              light, top_only, roll, shift)
 renderer.scene.ambient_light = [1.0, 1.0, 1.0]

From 181802448659bfa4a6939d17176790c0b043563e Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 19 Jan 2022 12:54:32 +0100
Subject: [PATCH 081/101] Remove unused loss backend function

---
 examples/pix2pose/loss.py | 42 ++++-----------------------------------
 1 file changed, 4 insertions(+), 38 deletions(-)

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/loss.py
index d171df124..ead212f09 100644
--- a/examples/pix2pose/loss.py
+++ b/examples/pix2pose/loss.py
@@ -118,11 +118,13 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
     symmetric_losses = []
     for rotation in rotations:
         RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true)
-        RGB_true_rotated = normalized_device_coordinates_to_normalized_image(RGB_true_rotated)
+        RGB_true_rotated = normalized_device_coordinates_to_normalized_image(
+            RGB_true_rotated)
         RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0)
         RGB_true_rotated = RGB_true_rotated * alpha
         RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3)
-        loss = compute_weighted_reconstruction_loss(RGBA_true_rotated, RGB_pred, beta)
+        loss = compute_weighted_reconstruction_loss(
+            RGBA_true_rotated, RGB_pred, beta)
         loss = tf.expand_dims(loss, -1)
         symmetric_losses.append(loss)
     symmetric_losses = tf.concat(symmetric_losses, axis=-1)
@@ -130,42 +132,6 @@ def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
     return minimum_symmetric_loss
 
 
-def compute_weighted_symmetric_loss2(RGBA_true, RGB_pred, rotations, beta=3.0):
-    """Computes the mininum of all rotated L1 reconstruction losses weighting
-        the positive alpha mask values in the predicted RGB image by beta.
-
-    # Arguments
-        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
-        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
-        rotations: Array (num_symmetries, 3, 3). Rotation matrices
-            that when applied lead to the same object view.
-
-    # Returns
-        Tensor [batch, H, W] with weighted reconstruction loss values.
-    """
-    # alpha mask is invariant to rotations that leave the shape symmetric.
-    RGB_true, alpha = split_alpha_mask(RGBA_true)
-    # RGB_original_shape = tf.shape(RGBA_true)
-    batch_size, H, W, num_channels = RGB_true.shape
-    batch_size, H, W, num_channels = 32, 128, 128, 3
-    RGB_true = tf.reshape(RGB_true, [batch_size, -1, 3])
-    RGB_true = to_normalized_device_coordinates(RGB_true)
-    RGB_pred = to_normalized_device_coordinates(RGB_pred)
-    symmetric_losses = []
-    for rotation in rotations:
-        # RGB_true_symmetric = tf.matmul(rotation, RGB_true.T).T
-        RGB_true_symmetric = tf.einsum('ij,klj->kli', rotation, RGB_true)
-        RGB_true_symmetric = tf.reshape(RGB_true_symmetric, (batch_size, H, W, num_channels))
-        RGBA_true_symmetric = tf.concat([RGB_true_symmetric, alpha], axis=3)
-        symmetric_loss = compute_weighted_reconstruction_loss(
-            RGBA_true_symmetric, RGB_pred, beta)
-        symmetric_loss = tf.expand_dims(symmetric_loss, -1)
-        symmetric_losses.append(symmetric_loss)
-    symmetric_losses = tf.concat(symmetric_losses, axis=-1)
-    minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1)
-    return minimum_symmetric_loss
-
-
 def compute_weighted_reconstruction_loss_with_error(
         RGBA_true, RGBE_pred, beta=3.0):
     """Computes L1 reconstruction loss by multiplying positive alpha mask

From 1c9d5209988f3756a86c3d0283795ca925ceb1b7 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 25 Jan 2022 16:22:17 +0100
Subject: [PATCH 082/101] Add basic tests for loss functions

---
 .../{loss.py => weighted_reconstruction.py}   |   0
 .../pix2pose/weighted_reconstruction_test.py  | 116 ++++++++++++++++++
 2 files changed, 116 insertions(+)
 rename examples/pix2pose/{loss.py => weighted_reconstruction.py} (100%)
 create mode 100644 examples/pix2pose/weighted_reconstruction_test.py

diff --git a/examples/pix2pose/loss.py b/examples/pix2pose/weighted_reconstruction.py
similarity index 100%
rename from examples/pix2pose/loss.py
rename to examples/pix2pose/weighted_reconstruction.py
diff --git a/examples/pix2pose/weighted_reconstruction_test.py b/examples/pix2pose/weighted_reconstruction_test.py
new file mode 100644
index 000000000..73a02d7d7
--- /dev/null
+++ b/examples/pix2pose/weighted_reconstruction_test.py
@@ -0,0 +1,116 @@
+import pytest
+import numpy as np
+
+from .weighted_reconstruction import split_alpha_mask
+from .weighted_reconstruction import compute_foreground_loss
+from .weighted_reconstruction import compute_background_loss
+from .weighted_reconstruction import compute_error_prediction_loss
+from .weighted_reconstruction import compute_weighted_reconstruction_loss
+from .weighted_reconstruction import (
+    compute_weighted_reconstruction_loss_with_error)
+from .weighted_reconstruction import (
+    normalized_image_to_normalized_device_coordinates,
+    normalized_device_coordinates_to_normalized_image)
+from .weighted_reconstruction import WeightedReconstruction
+
+
+@pytest.fixture
+def RGBA_mask():
+    return np.ones((32, 128, 128, 4), dtype=np.float32)
+
+
+@pytest.fixture
+def RGB_true():
+    return np.ones((32, 128, 128, 3), dtype=np.float32)
+
+
+@pytest.fixture
+def RGBA_true():
+    return np.ones((32, 128, 128, 4), dtype=np.float32)
+
+
+@pytest.fixture
+def RGB_pred():
+    return 0.5 * np.ones((32, 128, 128, 3), dtype=np.float32)
+
+
+@pytest.fixture
+def RGBE_pred():
+    return 0.5 * np.ones((32, 128, 128, 4), dtype=np.float32)
+
+
+@pytest.fixture
+def alpha_mask():
+    return np.ones((32, 128, 128, 1), dtype=np.float32)
+
+
+def test_split_alpha_mask(RGBA_mask):
+    batch_size, H, W, num_channels = RGBA_mask.shape
+    color_mask, alpha_mask = split_alpha_mask(RGBA_mask)
+    assert color_mask.shape == (batch_size, H, W, 3)
+    assert alpha_mask.shape == (batch_size, H, W, 1)
+
+
+def test_split_error_mask(RGBA_mask):
+    batch_size, H, W, num_channels = RGBA_mask.shape
+    color_mask, alpha_mask = split_alpha_mask(RGBA_mask)
+    assert color_mask.shape == (batch_size, H, W, 3)
+    assert alpha_mask.shape == (batch_size, H, W, 1)
+
+
+def test_compute_foreground_loss(RGB_true, RGB_pred, alpha_mask):
+    foreground_loss = compute_foreground_loss(RGB_true, RGB_pred, alpha_mask)
+    assert np.allclose(foreground_loss, 0.5)
+
+
+def test_compute_background_loss(RGB_true, RGB_pred, alpha_mask):
+    alpha_mask = 1.0 - alpha_mask
+    background_loss = compute_background_loss(RGB_true, RGB_pred, alpha_mask)
+    assert np.allclose(background_loss, 0.5)
+
+
+def test_compute_weighted_reconstruction_loss(RGBA_true, RGB_pred):
+    loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 3.0)
+    assert np.allclose(loss, 1.5)
+
+
+def test_normalized_image_to_normalized_device_coordinates(RGB_true):
+    value = normalized_image_to_normalized_device_coordinates(RGB_true)
+    assert np.max(value) == 1.0
+
+
+def test_normalized_image_to_normalized_device_coordinates_segment():
+    image = np.array([0, 0.5, 1.0])
+    value = normalized_image_to_normalized_device_coordinates(image)
+    assert ((np.min(value) == -1.0) and (np.max(value) == 1.0))
+
+
+def test_normalized_device_coordinates_to_normalized_image():
+    image = np.array([-1.0, 0.0, 1.0])
+    value = normalized_device_coordinates_to_normalized_image(image)
+    assert ((np.min(value) == 0.0) and (np.max(value) == 1.0))
+
+
+def test_weighted_reconstruction_loss(RGBA_true, RGB_pred):
+    compute_loss = WeightedReconstruction(beta=3.0)
+    loss = compute_loss(RGBA_true, RGB_pred)
+    assert np.allclose(loss, 1.5)
+
+
+def test_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred):
+    loss = compute_weighted_reconstruction_loss_with_error(
+        RGBA_true, RGBE_pred, beta=3.0)
+    assert np.allclose(loss, 1.5)
+
+
+def test_error_prediction_loss(RGBA_true, RGBE_pred):
+    # TODO change RGBE_pred
+    loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
+    print(loss)
+    assert True
+
+# test_WeightedReconstructionWithError
+# test_ErrorPrediction
+
+# test_WeightedSymmetricReconstruction
+# test_compute_weighted_symmetric_loss

From 8a50045ec05cdfa18700aecf408d86cee07e677f Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 26 Jan 2022 13:25:29 +0100
Subject: [PATCH 083/101] Start test for backend

---
 examples/pix2pose/backend_test.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 examples/pix2pose/backend_test.py

diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py
new file mode 100644
index 000000000..1c3dd1d8f
--- /dev/null
+++ b/examples/pix2pose/backend_test.py
@@ -0,0 +1,17 @@
+import pytest
+import numpy as np
+
+from .backend import build_cube_points3D
+
+
+@pytest.fixture
+def unit_cube():
+    return np.array([[-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]
+                     [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]])
+
+
+def test_build_cube_points3D(unit_cube):
+    cube_points = build_cube_points3D(1, 1, 1)
+    print(cube_points.shape)
+    print(cube_points)
+    assert np.allclose(unit_cube, cube_points)

From 28cf4c7ec746e922572d04e51e6c4829f11885e5 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 1 Feb 2022 12:30:18 +0100
Subject: [PATCH 084/101] Add tests and refactor backend

---
 examples/pix2pose/backend.py                  |  39 +--
 examples/pix2pose/backend_test.py             | 271 +++++++++++++++++-
 examples/pix2pose/legacy.py                   | 118 ++++++++
 examples/pix2pose/pipelines.py                |   3 +-
 examples/pix2pose/weighted_reconstruction.py  |  86 ------
 .../pix2pose/weighted_reconstruction_test.py  |  16 +-
 6 files changed, 403 insertions(+), 130 deletions(-)
 create mode 100644 examples/pix2pose/legacy.py

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index e19f9e948..1f3f3524f 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -43,8 +43,8 @@ def build_cube_points3D(width, height, depth):
                      point_5, point_6, point_7, point_8])
 
 
-def _preprocess_image_points2D(image_points2D):
-    """Preprocessing image points for PnPRANSAC
+def preprocess_image_points2D(image_points2D):
+    """Preprocessing image points for openCV's PnPRANSAC
 
     # Arguments
         image_points2D: Array of shape (num_points, 2)
@@ -96,7 +96,7 @@ def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
     """
     if ((len(object_points3D) < 4) or (len(image_points2D) < 4)):
         raise ValueError('Solve PnP requires at least 4 3D and 2D points')
-    image_points2D = _preprocess_image_points2D(image_points2D)
+    image_points2D = preprocess_image_points2D(image_points2D)
     success, rotation_vector, translation, inliers = cv2.solvePnPRansac(
         object_points3D, image_points2D, camera_intrinsics, None,
         flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold,
@@ -195,6 +195,7 @@ def draw_cube(image, points, color=GREEN, thickness=2, radius=5):
 
 def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0):
     """Replace values from source that are lower than the given threshold.
+    This function doesn't create a new array but does replacement in place.
 
     # Arguments
         source: Array.
@@ -230,7 +231,7 @@ def arguments_to_image_points2D(row_args, col_args):
         Array (num_cols, num_rows) representing points2D in UV space.
 
     # Notes
-        Arguments are row args (V) and col args (U). Iamge points are in UV
+        Arguments are row args (V) and col args (U). Image points are in UV
             coordinates; thus, we concatenate them in that order
             i.e. [col_args, row_args]
     """
@@ -290,22 +291,16 @@ def draw_points2D(image, points2D, colors):
     return image
 
 
-def draw_points2D_(image, keypoints, colors, radius=1):
-    for (u, v), (R, G, B) in zip(keypoints, colors):
-        color = (int(R), int(G), int(B))
-        draw_dot(image, (u, v), color, radius)
-    return image
-
-
 def normalize_points2D(points2D, height, width):
     """Transform points2D in image coordinates to normalized coordinates i.e.
         [U, V] -> [-1, 1]. UV have maximum values of [W, H] respectively.
 
              Image plane
 
+                 width
            (0,0)-------->  (U)
              |
-             |
+      height |
              |
              v
 
@@ -549,25 +544,6 @@ def build_rotation_matrix_y(angle):
     return rotation_matrix_y
 
 
-def rotate_image(image, rotation_matrix):
-    """Rotates an image with a symmetry.
-
-    # Arguments
-        image: Array (H, W, 3) with domain [0, 255].
-        rotation_matrix: Array (3, 3).
-
-    # Returns
-        Array (H, W, 3) with domain [0, 255]
-    """
-    mask_image = np.sum(image, axis=-1, keepdims=True) != 0
-    image = image_to_normalized_device_coordinates(image)
-    rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image)
-    rotated_image = normalized_device_coordinates_to_image(rotated_image)
-    rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
-    rotated_image = rotated_image * mask_image
-    return rotated_image
-
-
 def sample_uniform(min_value, max_value):
     """Samples values inside segment [min_value, max_value)
 
@@ -674,7 +650,6 @@ def compute_norm_SO3(rotation_mesh, rotation):
 def calculate_canonical_rotation(rotation_mesh, rotations):
     norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
     closest_rotation_arg = np.argmin(norms)
-    # print(closest_rotation_arg)
     closest_rotation = rotations[closest_rotation_arg]
     canonical_rotation = np.linalg.inv(closest_rotation)
     return canonical_rotation
diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py
index 1c3dd1d8f..c36e64f25 100644
--- a/examples/pix2pose/backend_test.py
+++ b/examples/pix2pose/backend_test.py
@@ -2,16 +2,279 @@
 import numpy as np
 
 from .backend import build_cube_points3D
+from .backend import preprocess_image_points2D
+from .backend import replace_lower_than_threshold
+from .backend import arguments_to_image_points2D
+# from .backend import points3D_to_RGB
+from .backend import normalize_points2D
+from .backend import denormalize_points2D
+from .backend import homogenous_quaternion_to_rotation_matrix
+from .backend import quaternion_to_rotation_matrix
+from .backend import rotation_vector_to_rotation_matrix
+from .backend import to_affine_matrix
+from .backend import image_to_normalized_device_coordinates
+from .backend import normalized_device_coordinates_to_image
+from .backend import build_rotation_matrix_x
+from .backend import build_rotation_matrix_y
+from .backend import build_rotation_matrix_z
+from .backend import compute_norm_SO3
+
+
+@pytest.fixture
+def rotation_matrix_X_HALF_PI():
+    rotation_matrix = np.array([[1.0, 0.0, 0.0],
+                                [0.0, 0.0, -1.0],
+                                [0.0, 1.0, 0.0]])
+    return rotation_matrix
+
+
+@pytest.fixture
+def rotation_matrix_Y_HALF_PI():
+    rotation_matrix = np.array([[0.0, 0.0, 1.0],
+                                [0.0, 1.0, 0.0],
+                                [-1.0, 0.0, 0.0]])
+    return rotation_matrix
+
+@pytest.fixture
+def rotation_matrix_Z_HALF_PI():
+    rotation_matrix = np.array([[0.0, -1.0, 0.0],
+                                [1.0, 0.0, 0.0],
+                                [0.0, 0.0, 1.0]])
+    return rotation_matrix
 
 
 @pytest.fixture
 def unit_cube():
-    return np.array([[-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]
-                     [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]])
+    return np.array([[0.5, -0.5, 0.5],
+                     [0.5, -0.5, -0.5],
+                     [-0.5, -0.5, -0.5],
+                     [-0.5, -0.5, 0.5],
+                     [0.5, 0.5, 0.5],
+                     [0.5, 0.5, -0.5],
+                     [-0.5, 0.5, -0.5],
+                     [-0.5, 0.5, 0.5]])
+
+
+@pytest.fixture
+def points2D():
+    return np.array([[10, 301],
+                     [145, 253],
+                     [203, 5],
+                     [214, 244],
+                     [23, 67],
+                     [178, 48],
+                     [267, 310]])
 
 
 def test_build_cube_points3D(unit_cube):
     cube_points = build_cube_points3D(1, 1, 1)
-    print(cube_points.shape)
-    print(cube_points)
     assert np.allclose(unit_cube, cube_points)
+
+
+def test_preprocess_image_point2D(points2D):
+    image_points2D = preprocess_image_points2D(points2D)
+    num_points = len(points2D)
+    assert image_points2D.shape == (num_points, 1, 2)
+    assert image_points2D.data.contiguous
+    assert np.allclose(np.squeeze(image_points2D, 1), points2D)
+
+
+# def test_solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
+# def test_project_to_image(rotation, translation, points3D, camera_intrisincs)
+# def draw_cube
+
+def test_replace_lower_than_threshold():
+    source = np.ones((128, 128, 3))
+    target = replace_lower_than_threshold(source, 2.0, 5.0)
+    assert np.allclose(target, 5.0)
+
+    source = np.ones((128, 128, 3))
+    target = replace_lower_than_threshold(source, 0.0, -1.0)
+    assert np.allclose(target, 1.0)
+
+
+def test_arguments_to_image_points2D():
+    col_args = np.array([3, 44, 6])
+    row_args = np.array([66, 0, 5])
+    image_points2D = arguments_to_image_points2D(row_args, col_args)
+    assert np.allclose(image_points2D, np.array([[3, 66], [44, 0], [6, 5]]))
+
+
+# def test_points3D_to_RGB(points3D):
+# def draw_mask
+# def draw_masks
+# def draw_points2D
+
+def test_normalize_points2D():
+    height, width = 480, 640
+    points2D = np.array([[0, 0], [320, 240], [640, 480]])
+    normalized_points = normalize_points2D(points2D, height, width)
+    assert np.allclose(normalized_points, np.array([[-1, -1], [0, 0], [1, 1]]))
+
+
+def test_denormalize_points2D():
+    height, width = 480, 640
+    normalized_points = np.array([[-1, -1], [0, 0], [1, 1]])
+    points2D = denormalize_points2D(normalized_points, height, width)
+    assert np.allclose(points2D, np.array([[0, 0], [320, 240], [640, 480]]))
+
+# def draw_pose6D
+# def draw_poses6D
+
+
+def test_homogenous_quaternion_to_rotation_matrix_identity():
+    quaternion = np.array([0.0, 0.0, 0.0, 1.0])
+    matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(np.eye(3), matrix)
+
+
+def test_homogenous_quaternion_to_rotation_matrix_Z(rotation_matrix_Z_HALF_PI):
+    quaternion = np.array([0, 0, 0.7071068, 0.7071068])
+    matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(rotation_matrix_Z_HALF_PI, matrix)
+
+
+def test_homogenous_quaternion_to_rotation_matrix_Y(rotation_matrix_Y_HALF_PI):
+    quaternion = np.array([0, 0.7071068, 0.0, 0.7071068])
+    matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(rotation_matrix_Y_HALF_PI, matrix)
+
+
+def test_homogenous_quaternion_to_rotation_matrix_X(rotation_matrix_X_HALF_PI):
+    quaternion = np.array([0.7071068, 0.0, 0.0, 0.7071068])
+    matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(rotation_matrix_X_HALF_PI, matrix)
+
+
+def test_quaternion_to_rotation_matrix_identity():
+    quaternion = np.array([0.0, 0.0, 0.0, 1.0])
+    matrix = quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(np.eye(3), matrix)
+
+
+def test_quaternion_to_rotation_matrix_Z(rotation_matrix_Z_HALF_PI):
+    quaternion = np.array([0, 0, 0.7071068, 0.7071068])
+    matrix = quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(rotation_matrix_Z_HALF_PI, matrix)
+
+
+def test_quaternion_to_rotation_matrix_Y(rotation_matrix_Y_HALF_PI):
+    quaternion = np.array([0, 0.7071068, 0.0, 0.7071068])
+    matrix = quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(rotation_matrix_Y_HALF_PI, matrix)
+
+
+def test_quaternion_to_rotation_matrix_X(rotation_matrix_X_HALF_PI):
+    quaternion = np.array([0.7071068, 0.0, 0.0, 0.7071068])
+    matrix = quaternion_to_rotation_matrix(quaternion)
+    assert np.allclose(rotation_matrix_X_HALF_PI, matrix)
+
+
+def test_rotation_vector_to_rotation_matrix_identity():
+    rotation_vector = np.array([0.0, 0.0, 0.0])
+    matrix = rotation_vector_to_rotation_matrix(rotation_vector)
+    assert np.allclose(np.eye(3), matrix)
+
+
+def test_rotation_vector_to_rotation_matrix_Z(rotation_matrix_Z_HALF_PI):
+    rotation_vector = np.array([0.0, 0.0, np.pi / 2.0])
+    matrix = rotation_vector_to_rotation_matrix(rotation_vector)
+    assert np.allclose(rotation_matrix_Z_HALF_PI, matrix)
+
+
+def test_rotation_vector_to_rotation_matrix_Y(rotation_matrix_Y_HALF_PI):
+    rotation_vector = np.array([0.0, np.pi / 2.0, 0.0])
+    matrix = rotation_vector_to_rotation_matrix(rotation_vector)
+    assert np.allclose(rotation_matrix_Y_HALF_PI, matrix)
+
+
+def test_rotation_vector_to_rotation_matrix_X(rotation_matrix_X_HALF_PI):
+    rotation_vector = np.array([np.pi / 2.0, 0.0, 0.0])
+    matrix = rotation_vector_to_rotation_matrix(rotation_vector)
+    assert np.allclose(rotation_matrix_X_HALF_PI, matrix)
+
+
+def test_to_affine_matrix_identity():
+    rotation_matrix = np.eye(3)
+    translation = np.zeros(3)
+    matrix = to_affine_matrix(rotation_matrix, translation)
+    assert np.allclose(matrix, np.eye(4))
+
+
+def test_to_affine_matrix():
+    rotation_matrix = np.array([[1.0, 0.0, 0.0],
+                                [0.0, 0.0, -1.0],
+                                [0.0, 1.0, 0.0]])
+    translation = np.array([3.0, 1.2, 3.0])
+    matrix = to_affine_matrix(rotation_matrix, translation)
+    affine_matrix = np.array([[1.0, 0.0, 0.0, 3.0],
+                              [0.0, 0.0, -1.0, 1.2],
+                              [0.0, 1.0, 0.0, 3.0],
+                              [0.0, 0.0, 0.0, 1.0]])
+    assert np.allclose(affine_matrix, matrix)
+
+
+def test_image_to_normalized_device_coordinates():
+    image = np.array([[0, 127.5, 255]])
+    values = image_to_normalized_device_coordinates(image)
+    assert np.allclose(values, np.array([[-1.0, 0.0, 1.0]]))
+
+
+def test_normalized_device_coordinates_to_image():
+    coordinates = np.array([[-1.0, 0.0, 1.0]])
+    values = normalized_device_coordinates_to_image(coordinates)
+    assert np.allclose(values, np.array([[0.0, 127.5, 255.0]]))
+
+
+def test_build_rotation_matrix_x(rotation_matrix_X_HALF_PI):
+    angle = np.pi / 2.0
+    matrix = build_rotation_matrix_x(angle)
+    assert np.allclose(matrix, rotation_matrix_X_HALF_PI)
+
+
+def test_build_rotation_matrix_y(rotation_matrix_Y_HALF_PI):
+    angle = np.pi / 2.0
+    matrix = build_rotation_matrix_y(angle)
+    assert np.allclose(matrix, rotation_matrix_Y_HALF_PI)
+
+
+def test_build_rotation_matrix_z(rotation_matrix_Z_HALF_PI):
+    angle = np.pi / 2.0
+    matrix = build_rotation_matrix_z(angle)
+    assert np.allclose(matrix, rotation_matrix_Z_HALF_PI)
+
+
+# test_sample_uniform
+# test_sample_inside_box3D
+# test_sample_front_rotation_matrix
+# test_sample_afine_transform
+# test_sample_random_rotation_matrix
+
+def test_compute_norm_SO3_X(rotation_matrix_X_HALF_PI):
+    norm = compute_norm_SO3(np.eye(3), rotation_matrix_X_HALF_PI)
+    assert np.allclose(norm, 2.0)
+
+
+def test_compute_norm_SO3_Y(rotation_matrix_Y_HALF_PI):
+    norm = compute_norm_SO3(np.eye(3), rotation_matrix_Y_HALF_PI)
+    assert np.allclose(norm, 2.0)
+
+
+def test_compute_norm_SO3_Z(rotation_matrix_Z_HALF_PI):
+    norm = compute_norm_SO3(np.eye(3), rotation_matrix_Z_HALF_PI)
+    assert np.allclose(norm, 2.0)
+
+
+def test_compute_norm_SO3_identity():
+    norm = compute_norm_SO3(np.eye(3), np.eye(3))
+    assert np.allclose(norm, 0.0)
+
+
+def test_compute_norm_SO3_X_to_Z(rotation_matrix_X_HALF_PI,
+                                 rotation_matrix_Z_HALF_PI):
+    norm = compute_norm_SO3(rotation_matrix_X_HALF_PI,
+                            rotation_matrix_Z_HALF_PI)
+    assert np.allclose(norm, 2.449489742783178)
+
+
+# calculate_canonical_rotation
diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy.py
new file mode 100644
index 000000000..6069ada13
--- /dev/null
+++ b/examples/pix2pose/legacy.py
@@ -0,0 +1,118 @@
+from tensorflow.keras.losses import Loss
+from tensorflow.keras.losses import mean_squared_error
+import tensorflow as tf
+
+
+def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
+    """Computes the mininum of all rotated L1 reconstruction losses weighting
+        the positive alpha mask values in the predicted RGB image by beta.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
+        rotations: Array (num_symmetries, 3, 3). Rotation matrices
+            that when applied lead to the same object view.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+    """
+    RGB_true, alpha = split_alpha_mask(RGBA_true)
+    RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true)
+    symmetric_losses = []
+    for rotation in rotations:
+        RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true)
+        RGB_true_rotated = normalized_device_coordinates_to_normalized_image(
+            RGB_true_rotated)
+        RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0)
+        RGB_true_rotated = RGB_true_rotated * alpha
+        RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3)
+        loss = compute_weighted_reconstruction_loss(
+            RGBA_true_rotated, RGB_pred, beta)
+        loss = tf.expand_dims(loss, -1)
+        symmetric_losses.append(loss)
+    symmetric_losses = tf.concat(symmetric_losses, axis=-1)
+    minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1)
+    return minimum_symmetric_loss
+
+
+class WeightedSymmetricReconstruction(Loss):
+    """Computes the mininum of all rotated L1 reconstruction losses weighting
+        the positive alpha mask values in the predicted RGB image by beta.
+    """
+    def __init__(self, rotations, beta=3.0):
+        super(WeightedSymmetricReconstruction, self).__init__()
+        self.rotations = rotations
+        self.beta = beta
+
+    def call(self, RGBA_true, RGB_pred):
+        loss = compute_weighted_symmetric_loss(
+            RGBA_true, RGB_pred, self.rotations, self.beta)
+        return loss
+
+
+def compute_error_prediction_loss(RGBA_true, RGBE_pred):
+    """Computes L2 reconstruction loss of predicted error mask.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
+    RGB_pred, error_pred = split_error_mask(RGBE_pred)
+    error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0)
+    # TODO check we need to set minimum to 1.0?
+    error_true = tf.minimum(error_true, 1.0)
+    error_loss = mean_squared_error(error_true, error_pred)
+    error_loss = tf.expand_dims(error_loss, axis=-1)
+    return error_loss
+
+
+class ErrorPrediction(Loss):
+    """Computes L2 reconstruction loss of predicted error mask.
+
+    # Arguments
+        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
+        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
+
+    # Returns
+        Tensor [batch, H, W] with weighted reconstruction loss values.
+
+    """
+    def __init__(self):
+        super(ErrorPrediction, self).__init__()
+
+    def call(self, RGBA_true, RGBE_pred):
+        error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
+        return error_loss
+
+
+from paz.backend.image import draw_dot
+
+
+def draw_points2D_(image, keypoints, colors, radius=1):
+    for (u, v), (R, G, B) in zip(keypoints, colors):
+        color = (int(R), int(G), int(B))
+        draw_dot(image, (u, v), color, radius)
+    return image
+
+
+def rotate_image(image, rotation_matrix):
+    """Rotates an image with a symmetry.
+
+    # Arguments
+        image: Array (H, W, 3) with domain [0, 255].
+        rotation_matrix: Array (3, 3).
+
+    # Returns
+        Array (H, W, 3) with domain [0, 255]
+    """
+    mask_image = np.sum(image, axis=-1, keepdims=True) != 0
+    image = image_to_normalized_device_coordinates(image)
+    rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image)
+    rotated_image = normalized_device_coordinates_to_image(rotated_image)
+    rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
+    rotated_image = rotated_image * mask_image
+    return rotated_image
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 7ebe0d7a3..7d6abf446 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -231,7 +231,8 @@ def call(self, image):
 
 
 class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_pose, offsets, draw=True, valid_class_names=['035_power_drill']):
+    def __init__(self, detect, estimate_pose, offsets, draw=True,
+                 valid_class_names=['035_power_drill']):
         """Pose estimation pipeline using keypoints.
         """
         super(EstimatePoseMasks, self).__init__()
diff --git a/examples/pix2pose/weighted_reconstruction.py b/examples/pix2pose/weighted_reconstruction.py
index ead212f09..099257b17 100644
--- a/examples/pix2pose/weighted_reconstruction.py
+++ b/examples/pix2pose/weighted_reconstruction.py
@@ -1,5 +1,4 @@
 from tensorflow.keras.losses import Loss
-from tensorflow.keras.losses import mean_squared_error
 import tensorflow as tf
 
 
@@ -100,38 +99,6 @@ def normalized_device_coordinates_to_normalized_image(image):
     return (image + 1.0) / 2.0
 
 
-def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
-    """Computes the mininum of all rotated L1 reconstruction losses weighting
-        the positive alpha mask values in the predicted RGB image by beta.
-
-    # Arguments
-        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
-        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
-        rotations: Array (num_symmetries, 3, 3). Rotation matrices
-            that when applied lead to the same object view.
-
-    # Returns
-        Tensor [batch, H, W] with weighted reconstruction loss values.
-    """
-    RGB_true, alpha = split_alpha_mask(RGBA_true)
-    RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true)
-    symmetric_losses = []
-    for rotation in rotations:
-        RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true)
-        RGB_true_rotated = normalized_device_coordinates_to_normalized_image(
-            RGB_true_rotated)
-        RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0)
-        RGB_true_rotated = RGB_true_rotated * alpha
-        RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3)
-        loss = compute_weighted_reconstruction_loss(
-            RGBA_true_rotated, RGB_pred, beta)
-        loss = tf.expand_dims(loss, -1)
-        symmetric_losses.append(loss)
-    symmetric_losses = tf.concat(symmetric_losses, axis=-1)
-    minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1)
-    return minimum_symmetric_loss
-
-
 def compute_weighted_reconstruction_loss_with_error(
         RGBA_true, RGBE_pred, beta=3.0):
     """Computes L1 reconstruction loss by multiplying positive alpha mask
@@ -151,25 +118,6 @@ def compute_weighted_reconstruction_loss_with_error(
     return loss
 
 
-def compute_error_prediction_loss(RGBA_true, RGBE_pred):
-    """Computes L2 reconstruction loss of predicted error mask.
-
-    # Arguments
-        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
-        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
-
-    # Returns
-        Tensor [batch, H, W] with weighted reconstruction loss values.
-
-    """
-    RGB_pred, error_pred = split_error_mask(RGBE_pred)
-    error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0)
-    error_true = tf.minimum(error_true, 1.0)
-    error_loss = mean_squared_error(error_true, error_pred)
-    error_loss = tf.expand_dims(error_loss, axis=-1)
-    return error_loss
-
-
 class WeightedReconstruction(Loss):
     """Computes L1 reconstruction loss by multiplying positive alpha mask
         by beta.
@@ -193,40 +141,6 @@ def call(self, RGBA_true, RGB_pred):
         return loss
 
 
-class WeightedSymmetricReconstruction(Loss):
-    """Computes the mininum of all rotated L1 reconstruction losses weighting
-        the positive alpha mask values in the predicted RGB image by beta.
-    """
-    def __init__(self, rotations, beta=3.0):
-        super(WeightedSymmetricReconstruction, self).__init__()
-        self.rotations = rotations
-        self.beta = beta
-
-    def call(self, RGBA_true, RGB_pred):
-        loss = compute_weighted_symmetric_loss(
-            RGBA_true, RGB_pred, self.rotations, self.beta)
-        return loss
-
-
-class ErrorPrediction(Loss):
-    """Computes L2 reconstruction loss of predicted error mask.
-
-    # Arguments
-        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
-        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
-
-    # Returns
-        Tensor [batch, H, W] with weighted reconstruction loss values.
-
-    """
-    def __init__(self):
-        super(ErrorPrediction, self).__init__()
-
-    def call(self, RGBA_true, RGBE_pred):
-        error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
-        return error_loss
-
-
 class WeightedReconstructionWithError(Loss):
     """Computes L1 reconstruction loss by multiplying positive alpha mask
         by beta.
diff --git a/examples/pix2pose/weighted_reconstruction_test.py b/examples/pix2pose/weighted_reconstruction_test.py
index 73a02d7d7..bbfd8c09f 100644
--- a/examples/pix2pose/weighted_reconstruction_test.py
+++ b/examples/pix2pose/weighted_reconstruction_test.py
@@ -4,7 +4,6 @@
 from .weighted_reconstruction import split_alpha_mask
 from .weighted_reconstruction import compute_foreground_loss
 from .weighted_reconstruction import compute_background_loss
-from .weighted_reconstruction import compute_error_prediction_loss
 from .weighted_reconstruction import compute_weighted_reconstruction_loss
 from .weighted_reconstruction import (
     compute_weighted_reconstruction_loss_with_error)
@@ -12,6 +11,7 @@
     normalized_image_to_normalized_device_coordinates,
     normalized_device_coordinates_to_normalized_image)
 from .weighted_reconstruction import WeightedReconstruction
+from .weighted_reconstruction import WeightedReconstructionWithError
 
 
 @pytest.fixture
@@ -103,13 +103,15 @@ def test_weighted_reconstruction_loss_with_error(RGBA_true, RGBE_pred):
     assert np.allclose(loss, 1.5)
 
 
-def test_error_prediction_loss(RGBA_true, RGBE_pred):
-    # TODO change RGBE_pred
-    loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
-    print(loss)
-    assert True
+def test_WeightedReconstructionWithError(RGBA_true, RGBE_pred):
+    compute_loss = WeightedReconstructionWithError(beta=3.0)
+    loss = compute_loss(RGBA_true, RGBE_pred)
+    assert np.allclose(loss, 1.5)
+
+
 
-# test_WeightedReconstructionWithError
+# def test_error_prediction_loss(RGBA_true, RGBE_pred):
+# def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0)
 # test_ErrorPrediction
 
 # test_WeightedSymmetricReconstruction

From 0c4700e6a7f28716d7f41987289fc6e95c21d548 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 1 Feb 2022 12:44:40 +0100
Subject: [PATCH 085/101] Removed mulitple pix2pose pipeline

---
 examples/pix2pose/legacy.py    | 133 ++++++++++++++++++++++++++++
 examples/pix2pose/pipelines.py | 154 +++------------------------------
 2 files changed, 145 insertions(+), 142 deletions(-)

diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy.py
index 6069ada13..76b2b6973 100644
--- a/examples/pix2pose/legacy.py
+++ b/examples/pix2pose/legacy.py
@@ -116,3 +116,136 @@ def rotate_image(image, rotation_matrix):
     rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
     rotated_image = rotated_image * mask_image
     return rotated_image
+
+
+class EstimatePoseMasks(Processor):
+    def __init__(self, detect, estimate_pose, offsets, draw=True,
+                 valid_class_names=['035_power_drill']):
+        """Pose estimation pipeline using keypoints.
+        """
+        super(EstimatePoseMasks, self).__init__()
+        self.detect = detect
+        self.estimate_pose = estimate_pose
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             pr.FilterClassBoxes2D(valid_class_names),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D'])
+        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        self.object_sizes = self.estimate_pose.object_sizes
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        self.draw = draw
+
+    def call(self, image):
+        boxes2D = self.postprocess_boxes(self.detect(image))
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, points = [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
+            results = self.estimate_pose(crop, box2D)
+            pose6D, points2D, points3D = self.unwrap(results)
+            poses6D.append(pose6D), points.append([points2D, points3D])
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            image = draw_masks(image, points, self.object_sizes)
+            image = draw_poses6D(image, poses6D, self.cube_points3D,
+                                 self.estimate_pose.camera.intrinsics)
+        return self.wrap(image, boxes2D, poses6D)
+
+
+class MultiPix2Pose(Processor):
+    def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes,
+                 valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True):
+        self.detect = detect
+        self.name_to_weights = name_to_weights
+        self.name_to_sizes = name_to_sizes
+        self.valid_class_names = valid_class_names
+        self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon)
+        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
+        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
+        self.camera = camera
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             pr.FilterClassBoxes2D(valid_class_names),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        self.draw = draw
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        self.name_to_cube_points3D = {}
+        self.mask_to_points2D = RGBMaskToImagePoints2D(
+            segment.output_shape[1:3])
+        for name in self.name_to_sizes:
+            W, H, D = self.name_to_sizes[name]
+            cube_points3D = build_cube_points3D(W, H, D)
+            self.name_to_cube_points3D[name] = cube_points3D
+
+        self.predict_RGBMask = PredictRGBMask(segment, epsilon)
+
+    def call(self, image):
+        boxes2D = self.postprocess_boxes(self.detect(image))
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, points2D, points3D = [], [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
+            class_name = box2D.class_name
+            name_to_weights = self.name_to_weights[class_name]
+            self.pix2points.model.load_weights(name_to_weights)
+            object_sizes = self.name_to_sizes[class_name]
+            # self.pix2points.object_sizes = object_sizes
+            # points = self.pix2points(crop)
+
+            RGB_mask = self.predict_RGBMask(crop)
+            H, W, num_channels = crop.shape
+            RGB_mask = resize_image(RGB_mask, (W, H))
+
+            self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes)
+            class_points3D = self.mask_to_points3D(RGB_mask)
+            class_points2D = self.mask_to_points2D(RGB_mask)
+            class_points2D = normalize_points2D(class_points2D, H, W)
+
+            # from paz.backend.image import show_image
+            # show_image((points['RGB_mask'] * 255).astype('uint8'))
+            # class_points2D = points['points2D']
+            # class_points3D = points['points3D']
+            H, W, num_channels = crop.shape
+            class_points2D = denormalize_points2D(class_points2D, H, W)
+            class_points2D = self.change_coordinates(class_points2D, box2D)
+            print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS)
+            print(len(class_points3D), len(class_points2D))
+            if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS:
+                pose_results = self.predict_pose(class_points3D, class_points2D)
+                success, rotation, translation = pose_results
+                print('solver success', success)
+                # success = True
+            else:
+                success = False
+            if success:
+                quaternion = rotation_vector_to_quaternion(rotation)
+                pose6D = Pose6D(quaternion, translation, class_name)
+            else:
+                pose6D = None
+            print(success)
+            points2D.append(class_points2D)
+            points3D.append(class_points3D)
+            poses6D.append(pose6D)
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D):
+                class_name = pose6D.class_name
+                object_sizes = self.name_to_sizes[class_name]
+                colors = points3D_to_RGB(class_points3D, object_sizes)
+                image = draw_points2D(image, class_points2D, colors)
+
+            for pose6D in poses6D:
+                class_name = pose6D.class_name
+                cube_points3D = self.name_to_cube_points3D[class_name]
+                image = draw_pose6D(image, pose6D, cube_points3D,
+                                    self.camera.intrinsics)
+        return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D}
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 7d6abf446..2e6fb2a97 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -1,24 +1,27 @@
-from paz.abstract import SequentialProcessor, Processor
+from paz.abstract import SequentialProcessor
 from paz.pipelines import RandomizeRenderedImage as RandomizeRender
 from paz.abstract.messages import Pose6D
+from paz.backend.quaternion import rotation_vector_to_quaternion
+from paz.backend.image import resize_image
 from paz import processors as pr
+
 from processors import (
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
     ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC,
     ReplaceLowerThanThreshold)
+
 from backend import build_cube_points3D
-from processors import UnwrapDictionary
 from backend import denormalize_points2D
-from backend import draw_poses6D
 from backend import draw_pose6D
-from backend import draw_masks
 from backend import draw_mask
 from backend import normalize_points2D
-from backend import points3D_to_RGB
-from backend import draw_points2D
-from paz.backend.quaternion import rotation_vector_to_quaternion
-from paz.backend.image import resize_image
-import numpy as np
+
+# from processors import UnwrapDictionary
+# from backend import draw_poses6D
+# from backend import draw_masks
+# from backend import points3D_to_RGB
+# from backend import draw_points2D
+# import numpy as np
 
 
 class DomainRandomization(SequentialProcessor):
@@ -134,136 +137,3 @@ def call(self, image, box2D=None):
             results[topic] = image
         results['points2D'], results['pose6D'] = points2D, pose6D
         return results
-
-
-class MultiPix2Pose(Processor):
-    def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes,
-                 valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True):
-        self.detect = detect
-        self.name_to_weights = name_to_weights
-        self.name_to_sizes = name_to_sizes
-        self.valid_class_names = valid_class_names
-        self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon)
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
-        self.camera = camera
-        self.postprocess_boxes = SequentialProcessor(
-            [pr.UnpackDictionary(['boxes2D']),
-             pr.FilterClassBoxes2D(valid_class_names),
-             pr.SquareBoxes2D(),
-             pr.OffsetBoxes2D(offsets)])
-        self.clip = pr.ClipBoxes2D()
-        self.crop = pr.CropBoxes2D()
-        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.draw = draw
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
-        self.name_to_cube_points3D = {}
-        self.mask_to_points2D = RGBMaskToImagePoints2D(
-            segment.output_shape[1:3])
-        for name in self.name_to_sizes:
-            W, H, D = self.name_to_sizes[name]
-            cube_points3D = build_cube_points3D(W, H, D)
-            self.name_to_cube_points3D[name] = cube_points3D
-
-        self.predict_RGBMask = PredictRGBMask(segment, epsilon)
-
-    def call(self, image):
-        boxes2D = self.postprocess_boxes(self.detect(image))
-        boxes2D = self.clip(image, boxes2D)
-        cropped_images = self.crop(image, boxes2D)
-        poses6D, points2D, points3D = [], [], []
-        for crop, box2D in zip(cropped_images, boxes2D):
-            class_name = box2D.class_name
-            name_to_weights = self.name_to_weights[class_name]
-            self.pix2points.model.load_weights(name_to_weights)
-            object_sizes = self.name_to_sizes[class_name]
-            # self.pix2points.object_sizes = object_sizes
-            # points = self.pix2points(crop)
-
-            RGB_mask = self.predict_RGBMask(crop)
-            H, W, num_channels = crop.shape
-            RGB_mask = resize_image(RGB_mask, (W, H))
-
-            self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes)
-            class_points3D = self.mask_to_points3D(RGB_mask)
-            class_points2D = self.mask_to_points2D(RGB_mask)
-            class_points2D = normalize_points2D(class_points2D, H, W)
-
-            # from paz.backend.image import show_image
-            # show_image((points['RGB_mask'] * 255).astype('uint8'))
-            # class_points2D = points['points2D']
-            # class_points3D = points['points3D']
-            H, W, num_channels = crop.shape
-            class_points2D = denormalize_points2D(class_points2D, H, W)
-            class_points2D = self.change_coordinates(class_points2D, box2D)
-            print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS)
-            print(len(class_points3D), len(class_points2D))
-            if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS:
-                pose_results = self.predict_pose(class_points3D, class_points2D)
-                success, rotation, translation = pose_results
-                print('solver success', success)
-                # success = True
-            else:
-                success = False
-            if success:
-                quaternion = rotation_vector_to_quaternion(rotation)
-                pose6D = Pose6D(quaternion, translation, class_name)
-            else:
-                pose6D = None
-            print(success)
-            points2D.append(class_points2D)
-            points3D.append(class_points3D)
-            poses6D.append(pose6D)
-        if self.draw:
-            image = self.draw_boxes2D(image, boxes2D)
-            for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D):
-                class_name = pose6D.class_name
-                object_sizes = self.name_to_sizes[class_name]
-                colors = points3D_to_RGB(class_points3D, object_sizes)
-                image = draw_points2D(image, class_points2D, colors)
-
-            for pose6D in poses6D:
-                class_name = pose6D.class_name
-                cube_points3D = self.name_to_cube_points3D[class_name]
-                image = draw_pose6D(image, pose6D, cube_points3D,
-                                    self.camera.intrinsics)
-        return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D}
-
-
-class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_pose, offsets, draw=True,
-                 valid_class_names=['035_power_drill']):
-        """Pose estimation pipeline using keypoints.
-        """
-        super(EstimatePoseMasks, self).__init__()
-        self.detect = detect
-        self.estimate_pose = estimate_pose
-        self.postprocess_boxes = SequentialProcessor(
-            [pr.UnpackDictionary(['boxes2D']),
-             pr.FilterClassBoxes2D(valid_class_names),
-             pr.SquareBoxes2D(),
-             pr.OffsetBoxes2D(offsets)])
-        self.clip = pr.ClipBoxes2D()
-        self.crop = pr.CropBoxes2D()
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
-        self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D'])
-        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.object_sizes = self.estimate_pose.object_sizes
-        self.cube_points3D = build_cube_points3D(*self.object_sizes)
-        self.draw = draw
-
-    def call(self, image):
-        boxes2D = self.postprocess_boxes(self.detect(image))
-        boxes2D = self.clip(image, boxes2D)
-        cropped_images = self.crop(image, boxes2D)
-        poses6D, points = [], []
-        for crop, box2D in zip(cropped_images, boxes2D):
-            results = self.estimate_pose(crop, box2D)
-            pose6D, points2D, points3D = self.unwrap(results)
-            poses6D.append(pose6D), points.append([points2D, points3D])
-        if self.draw:
-            image = self.draw_boxes2D(image, boxes2D)
-            image = draw_masks(image, points, self.object_sizes)
-            image = draw_poses6D(image, poses6D, self.cube_points3D,
-                                 self.estimate_pose.camera.intrinsics)
-        return self.wrap(image, boxes2D, poses6D)

From 24db9d04ade3a6cd0b68b97b129e5cc9bb88d8e2 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 1 Feb 2022 12:52:04 +0100
Subject: [PATCH 086/101] Move scene to legacy

---
 examples/pix2pose/legacy.py | 59 +++++++++++++++++++++++++++++++
 examples/pix2pose/scenes.py | 69 ++-----------------------------------
 2 files changed, 61 insertions(+), 67 deletions(-)

diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy.py
index 76b2b6973..e8140fb48 100644
--- a/examples/pix2pose/legacy.py
+++ b/examples/pix2pose/legacy.py
@@ -249,3 +249,62 @@ def call(self, image):
                 image = draw_pose6D(image, pose6D, cube_points3D,
                                     self.camera.intrinsics)
         return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D}
+
+
+class PixelMaskRenderer():
+    """Render-ready scene composed of a single object and a single moving camera.
+
+    # Arguments
+        path_OBJ: String containing the path to an OBJ file.
+        viewport_size: List, specifying [H, W] of rendered image.
+        y_fov: Float indicating the vertical field of view in radians.
+        distance: List of floats indicating [max_distance, min_distance]
+        light: List of floats indicating [max_light, min_light]
+        top_only: Boolean. If True images are only take from the top.
+        roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis.
+        shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes.
+    """
+    def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0,
+                 distance=[0.3, 0.5], light=[0.5, 30], top_only=False,
+                 roll=None, shift=None):
+        self.distance, self.roll, self.shift = distance, roll, shift
+        self.light_intensity, self.top_only = light, top_only
+        self._build_scene(path_OBJ, viewport_size, light, y_fov)
+        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
+        self.flags_RGBA = RenderFlags.RGBA
+        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
+        self.epsilon = 0.01
+
+    def _build_scene(self, path, size, light, y_fov):
+        self.scene = Scene(bg_color=[0, 0, 0, 0])
+        self.light = self.scene.add(
+            DirectionalLight([1.0, 1.0, 1.0], np.mean(light)))
+        self.camera = self.scene.add(
+            PerspectiveCamera(y_fov, aspectRatio=np.divide(*size)))
+        self.pixel_mesh = self.scene.add(color_object(path))
+        self.mesh = self.scene.add(
+            Mesh.from_trimesh(trimesh.load(path), smooth=True))
+        self.world_origin = self.mesh.mesh.centroid
+
+    def _sample_parameters(self):
+        distance = sample_uniformly(self.distance)
+        camera_origin = sample_point_in_sphere(distance, self.top_only)
+        camera_origin = random_perturbation(camera_origin, self.epsilon)
+        light_intensity = sample_uniformly(self.light_intensity)
+        return camera_origin, light_intensity
+
+    def render(self):
+        camera_origin, intensity = self._sample_parameters()
+        camera_to_world, world_to_camera = compute_modelview_matrices(
+            camera_origin, self.world_origin, self.roll, self.shift)
+        self.light.light.intensity = intensity
+        self.scene.set_pose(self.camera, camera_to_world)
+        self.scene.set_pose(self.light, camera_to_world)
+        self.pixel_mesh.mesh.is_visible = False
+        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
+        self.pixel_mesh.mesh.is_visible = True
+        image, alpha = split_alpha_channel(image)
+        self.mesh.mesh.is_visible = False
+        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
+        self.mesh.mesh.is_visible = True
+        return image, alpha, RGB_mask
diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
index 79d8da90c..53730deca 100644
--- a/examples/pix2pose/scenes.py
+++ b/examples/pix2pose/scenes.py
@@ -1,74 +1,13 @@
 import numpy as np
-from paz.backend.render import (sample_uniformly, split_alpha_channel,
-                                random_perturbation, sample_point_in_sphere,
-                                compute_modelview_matrices)
+from paz.backend.render import sample_uniformly, split_alpha_channel
 from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight,
                       RenderFlags, Mesh, Scene)
 import trimesh
+
 from coloring import color_object
 from backend import to_affine_matrix
 from backend import sample_affine_transform
 from backend import calculate_canonical_rotation
-from paz.models import UNET_VGG16
-
-
-class PixelMaskRenderer():
-    """Render-ready scene composed of a single object and a single moving camera.
-
-    # Arguments
-        path_OBJ: String containing the path to an OBJ file.
-        viewport_size: List, specifying [H, W] of rendered image.
-        y_fov: Float indicating the vertical field of view in radians.
-        distance: List of floats indicating [max_distance, min_distance]
-        light: List of floats indicating [max_light, min_light]
-        top_only: Boolean. If True images are only take from the top.
-        roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis.
-        shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes.
-    """
-    def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0,
-                 distance=[0.3, 0.5], light=[0.5, 30], top_only=False,
-                 roll=None, shift=None):
-        self.distance, self.roll, self.shift = distance, roll, shift
-        self.light_intensity, self.top_only = light, top_only
-        self._build_scene(path_OBJ, viewport_size, light, y_fov)
-        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
-        self.flags_RGBA = RenderFlags.RGBA
-        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
-        self.epsilon = 0.01
-
-    def _build_scene(self, path, size, light, y_fov):
-        self.scene = Scene(bg_color=[0, 0, 0, 0])
-        self.light = self.scene.add(
-            DirectionalLight([1.0, 1.0, 1.0], np.mean(light)))
-        self.camera = self.scene.add(
-            PerspectiveCamera(y_fov, aspectRatio=np.divide(*size)))
-        self.pixel_mesh = self.scene.add(color_object(path))
-        self.mesh = self.scene.add(
-            Mesh.from_trimesh(trimesh.load(path), smooth=True))
-        self.world_origin = self.mesh.mesh.centroid
-
-    def _sample_parameters(self):
-        distance = sample_uniformly(self.distance)
-        camera_origin = sample_point_in_sphere(distance, self.top_only)
-        camera_origin = random_perturbation(camera_origin, self.epsilon)
-        light_intensity = sample_uniformly(self.light_intensity)
-        return camera_origin, light_intensity
-
-    def render(self):
-        camera_origin, intensity = self._sample_parameters()
-        camera_to_world, world_to_camera = compute_modelview_matrices(
-            camera_origin, self.world_origin, self.roll, self.shift)
-        self.light.light.intensity = intensity
-        self.scene.set_pose(self.camera, camera_to_world)
-        self.scene.set_pose(self.light, camera_to_world)
-        self.pixel_mesh.mesh.is_visible = False
-        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
-        self.pixel_mesh.mesh.is_visible = True
-        image, alpha = split_alpha_channel(image)
-        self.mesh.mesh.is_visible = False
-        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
-        self.mesh.mesh.is_visible = True
-        return image, alpha, RGB_mask
 
 
 class CanonicalScene():
@@ -155,10 +94,6 @@ def render_symmetries(self):
     from paz.backend.image import show_image
     from backend import build_rotation_matrix_x
     from backend import build_rotation_matrix_z
-    from backend import build_rotation_matrix_y
-    from paz.backend.render import compute_modelview_matrices
-    from pipelines import DomainRandomization
-    import glob
 
     # generic parameters
     root_path = os.path.expanduser('~')

From 4472df90ec72a0848d2b0fa11c91842c4c99e970 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 1 Feb 2022 12:53:18 +0100
Subject: [PATCH 087/101] Remove test

---
 examples/pix2pose/test.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/examples/pix2pose/test.py b/examples/pix2pose/test.py
index d9effe5b6..4304c95fd 100644
--- a/examples/pix2pose/test.py
+++ b/examples/pix2pose/test.py
@@ -1,7 +1,6 @@
 from paz.abstract import SequentialProcessor, Processor
 from paz import processors as pr
 import numpy as np
-from backend import build_cube_points3D
 # import pytest
 
 
@@ -63,7 +62,3 @@ def test_copy_with_controlmap_using_3_channels_plus():
     assert len(values) == 2
     assert np.allclose(values[0], A_random_values + B_random_values)
     assert np.allclose(values[1], A_random_values)
-
-
-def test_build_cube_points3D(width, height, depth):
-    cube_points3D = build_cube_points3D(width, height, depth)

From 8aba37db75f1ac9733f9afaab760d45556793a6a Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 1 Feb 2022 12:58:46 +0100
Subject: [PATCH 088/101] Rearranged structure to include tested functionality

---
 examples/pix2pose/{test.py => abstract_test.py}      | 0
 examples/pix2pose/{ => legacy}/demo_image.py         | 0
 examples/pix2pose/{ => legacy}/legacy.py             | 0
 examples/pix2pose/{ => legacy}/test_rotated_image.py | 0
 examples/pix2pose/{ => legacy}/train_gan.py          | 0
 examples/pix2pose/{ => legacy}/train_symmetric.py    | 0
 6 files changed, 0 insertions(+), 0 deletions(-)
 rename examples/pix2pose/{test.py => abstract_test.py} (100%)
 rename examples/pix2pose/{ => legacy}/demo_image.py (100%)
 rename examples/pix2pose/{ => legacy}/legacy.py (100%)
 rename examples/pix2pose/{ => legacy}/test_rotated_image.py (100%)
 rename examples/pix2pose/{ => legacy}/train_gan.py (100%)
 rename examples/pix2pose/{ => legacy}/train_symmetric.py (100%)

diff --git a/examples/pix2pose/test.py b/examples/pix2pose/abstract_test.py
similarity index 100%
rename from examples/pix2pose/test.py
rename to examples/pix2pose/abstract_test.py
diff --git a/examples/pix2pose/demo_image.py b/examples/pix2pose/legacy/demo_image.py
similarity index 100%
rename from examples/pix2pose/demo_image.py
rename to examples/pix2pose/legacy/demo_image.py
diff --git a/examples/pix2pose/legacy.py b/examples/pix2pose/legacy/legacy.py
similarity index 100%
rename from examples/pix2pose/legacy.py
rename to examples/pix2pose/legacy/legacy.py
diff --git a/examples/pix2pose/test_rotated_image.py b/examples/pix2pose/legacy/test_rotated_image.py
similarity index 100%
rename from examples/pix2pose/test_rotated_image.py
rename to examples/pix2pose/legacy/test_rotated_image.py
diff --git a/examples/pix2pose/train_gan.py b/examples/pix2pose/legacy/train_gan.py
similarity index 100%
rename from examples/pix2pose/train_gan.py
rename to examples/pix2pose/legacy/train_gan.py
diff --git a/examples/pix2pose/train_symmetric.py b/examples/pix2pose/legacy/train_symmetric.py
similarity index 100%
rename from examples/pix2pose/train_symmetric.py
rename to examples/pix2pose/legacy/train_symmetric.py

From c5939cb228540f7a63731a84705bbd1ed27b30de Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 1 Feb 2022 13:59:09 +0100
Subject: [PATCH 089/101] Refactor coloring to be in backend and add pixel mask
 rendering for simple segmentation training

---
 examples/pix2pose/backend.py      | 38 ++++++++++++
 examples/pix2pose/backend_test.py | 39 +++++++++++++
 examples/pix2pose/coloring.py     | 50 ----------------
 examples/pix2pose/scenes.py       | 97 +++++++++++++++++++++++++++++--
 4 files changed, 170 insertions(+), 54 deletions(-)
 delete mode 100644 examples/pix2pose/coloring.py

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 1f3f3524f..775b17ce2 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -642,6 +642,8 @@ def sample_random_rotation_matrix():
 
 
 def compute_norm_SO3(rotation_mesh, rotation):
+    """Computes norm between SO3 elements.
+    """
     difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3)
     distance = np.linalg.norm(difference, ord='fro')
     return distance
@@ -653,3 +655,39 @@ def calculate_canonical_rotation(rotation_mesh, rotations):
     closest_rotation = rotations[closest_rotation_arg]
     canonical_rotation = np.linalg.inv(closest_rotation)
     return canonical_rotation
+
+
+def normalize_min_max(x, x_min, x_max):
+    """Normalized data using it's maximum and minimum values
+
+    # Arguments
+        x: array
+        x_min: minimum value of x
+        x_max: maximum value of x
+
+    # Returns
+        min-max normalized data
+    """
+    return (x - x_min) / (x_max - x_min)
+
+
+def extract_bounding_box_corners(points3D):
+    """Extracts the (x_min, y_min, z_min) and the (x_max, y_max, z_max)
+        coordinates from an array of  points3D
+    # Arguments
+        points3D: Array (num_points, 3)
+
+    # Returns
+        Left-down-bottom corner (x_min, y_min, z_min) and right-up-top
+            (x_max, y_max, z_max) corner.
+    """
+    XYZ_min = np.min(points3D, axis=0)
+    XYZ_max = np.max(points3D, axis=0)
+    return XYZ_min, XYZ_max
+
+
+def compute_vertices_colors(vertices):
+    corner3D_min, corner3D_max = extract_bounding_box_corners(vertices)
+    normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max)
+    colors = (255 * normalized_colors).astype('uint8')
+    return colors
diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py
index c36e64f25..28a944a2f 100644
--- a/examples/pix2pose/backend_test.py
+++ b/examples/pix2pose/backend_test.py
@@ -18,6 +18,9 @@
 from .backend import build_rotation_matrix_y
 from .backend import build_rotation_matrix_z
 from .backend import compute_norm_SO3
+from .backend import normalize_min_max
+from .backend import extract_bounding_box_corners
+from .backend import compute_vertices_colors
 
 
 @pytest.fixture
@@ -35,6 +38,7 @@ def rotation_matrix_Y_HALF_PI():
                                 [-1.0, 0.0, 0.0]])
     return rotation_matrix
 
+
 @pytest.fixture
 def rotation_matrix_Z_HALF_PI():
     rotation_matrix = np.array([[0.0, -1.0, 0.0],
@@ -66,6 +70,17 @@ def points2D():
                      [267, 310]])
 
 
+@pytest.fixture
+def points3D():
+    return np.array([[10, 301, 30],
+                     [145, 253, 12],
+                     [203, 5, 299],
+                     [214, 244, 98],
+                     [23, 67, 16],
+                     [178, 48, 234],
+                     [267, 310, 2]])
+
+
 def test_build_cube_points3D(unit_cube):
     cube_points = build_cube_points3D(1, 1, 1)
     assert np.allclose(unit_cube, cube_points)
@@ -278,3 +293,27 @@ def test_compute_norm_SO3_X_to_Z(rotation_matrix_X_HALF_PI,
 
 
 # calculate_canonical_rotation
+
+
+def test_normalize_min_max():
+    x = np.array([-1.0, 0.0, 1.0])
+    values = normalize_min_max(x, np.min(x), np.max(x))
+    assert np.allclose(values, np.array([0.0, 0.5, 1.0]))
+
+
+def test_extract_corners3D(points3D):
+    bottom_left, top_right = extract_bounding_box_corners(points3D)
+    assert np.allclose(bottom_left, np.array([10, 5, 2]))
+    assert np.allclose(top_right, np.array([267, 310, 299]))
+
+
+def test_compute_vertices_colors(points3D):
+    values = compute_vertices_colors(points3D)
+    colors = np.array([[0, 247, 24],
+                       [133, 207, 8],
+                       [191, 0, 255],
+                       [202, 199, 82],
+                       [12, 51, 12],
+                       [166, 35, 199],
+                       [255, 255, 0]])
+    assert np.allclose(values, colors)
diff --git a/examples/pix2pose/coloring.py b/examples/pix2pose/coloring.py
deleted file mode 100644
index f21e02990..000000000
--- a/examples/pix2pose/coloring.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import os
-import numpy as np
-import trimesh
-from pyrender import Mesh, Scene, Viewer
-from pyrender.constants import RenderFlags
-
-
-def normalize_min_max(x, x_min, x_max):
-    return (x - x_min) / (x_max - x_min)
-
-
-def load_obj(path):
-    mesh = trimesh.load(path)
-    return mesh
-
-
-def extract_corners3D(vertices):
-    point3D_min = np.min(vertices, axis=0)
-    point3D_max = np.max(vertices, axis=0)
-    return point3D_min, point3D_max
-
-
-def compute_vertices_colors(vertices):
-    corner3D_min, corner3D_max = extract_corners3D(vertices)
-    normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max)
-    colors = (255 * normalized_colors).astype('uint8')
-    return colors
-
-
-def color_object(path):
-    mesh = load_obj(path)
-    colors = compute_vertices_colors(mesh.vertices)
-    mesh.visual = mesh.visual.to_color()
-    mesh.visual.vertex_colors = colors
-    mesh = Mesh.from_trimesh(mesh, smooth=False)
-    mesh.primitives[0].material.metallicFactor = 0.0
-    mesh.primitives[0].material.roughnessFactor = 1.0
-    mesh.primitives[0].material.alphaMode = 'OPAQUE'
-    return mesh
-
-
-if __name__ == "__main__":
-    scene = Scene(bg_color=[0, 0, 0])
-    root = os.path.expanduser('~')
-    mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
-    path = os.path.join(root, mesh_path)
-    mesh = color_object(path)
-    scene.add(mesh)
-    Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT)
-    # mesh_extents = np.array([0.184, 0.187, 0.052])
diff --git a/examples/pix2pose/scenes.py b/examples/pix2pose/scenes.py
index 53730deca..07e79e2e9 100644
--- a/examples/pix2pose/scenes.py
+++ b/examples/pix2pose/scenes.py
@@ -1,13 +1,102 @@
 import numpy as np
 from paz.backend.render import sample_uniformly, split_alpha_channel
+from paz.backend.render import (
+    sample_point_in_sphere, random_perturbation, compute_modelview_matrices)
 from pyrender import (PerspectiveCamera, OffscreenRenderer, DirectionalLight,
-                      RenderFlags, Mesh, Scene)
+                      RenderFlags, Mesh, Scene, Viewer)
 import trimesh
 
-from coloring import color_object
 from backend import to_affine_matrix
 from backend import sample_affine_transform
 from backend import calculate_canonical_rotation
+from backend import compute_vertices_colors
+
+
+def load_obj(path):
+    mesh = trimesh.load(path)
+    return mesh
+
+
+def color_object(path):
+    mesh = load_obj(path)
+    colors = compute_vertices_colors(mesh.vertices)
+    mesh.visual = mesh.visual.to_color()
+    mesh.visual.vertex_colors = colors
+    mesh = Mesh.from_trimesh(mesh, smooth=False)
+    mesh.primitives[0].material.metallicFactor = 0.0
+    mesh.primitives[0].material.roughnessFactor = 1.0
+    mesh.primitives[0].material.alphaMode = 'OPAQUE'
+    return mesh
+
+
+def quick_color_visualize():
+    scene = Scene(bg_color=[0, 0, 0])
+    root = os.path.expanduser('~')
+    mesh_path = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
+    path = os.path.join(root, mesh_path)
+    mesh = color_object(path)
+    scene.add(mesh)
+    Viewer(scene, use_raymond_lighting=True, flags=RenderFlags.FLAT)
+    # mesh_extents = np.array([0.184, 0.187, 0.052])
+
+
+class PixelMaskRenderer():
+    """Render-ready scene composed of a single object and a single moving camera.
+
+    # Arguments
+        path_OBJ: String containing the path to an OBJ file.
+        viewport_size: List, specifying [H, W] of rendered image.
+        y_fov: Float indicating the vertical field of view in radians.
+        distance: List of floats indicating [max_distance, min_distance]
+        light: List of floats indicating [max_light, min_light]
+        top_only: Boolean. If True images are only take from the top.
+        roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis.
+        shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes.
+    """
+    def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0,
+                 distance=[0.3, 0.5], light=[0.5, 30], top_only=False,
+                 roll=None, shift=None):
+        self.distance, self.roll, self.shift = distance, roll, shift
+        self.light_intensity, self.top_only = light, top_only
+        self._build_scene(path_OBJ, viewport_size, light, y_fov)
+        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
+        self.flags_RGBA = RenderFlags.RGBA
+        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
+        self.epsilon = 0.01
+
+    def _build_scene(self, path, size, light, y_fov):
+        self.scene = Scene(bg_color=[0, 0, 0, 0])
+        self.light = self.scene.add(
+            DirectionalLight([1.0, 1.0, 1.0], np.mean(light)))
+        self.camera = self.scene.add(
+            PerspectiveCamera(y_fov, aspectRatio=np.divide(*size)))
+        self.pixel_mesh = self.scene.add(color_object(path))
+        self.mesh = self.scene.add(
+            Mesh.from_trimesh(trimesh.load(path), smooth=True))
+        self.world_origin = self.mesh.mesh.centroid
+
+    def _sample_parameters(self):
+        distance = sample_uniformly(self.distance)
+        camera_origin = sample_point_in_sphere(distance, self.top_only)
+        camera_origin = random_perturbation(camera_origin, self.epsilon)
+        light_intensity = sample_uniformly(self.light_intensity)
+        return camera_origin, light_intensity
+
+    def render(self):
+        camera_origin, intensity = self._sample_parameters()
+        camera_to_world, world_to_camera = compute_modelview_matrices(
+            camera_origin, self.world_origin, self.roll, self.shift)
+        self.light.light.intensity = intensity
+        self.scene.set_pose(self.camera, camera_to_world)
+        self.scene.set_pose(self.light, camera_to_world)
+        self.pixel_mesh.mesh.is_visible = False
+        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
+        self.pixel_mesh.mesh.is_visible = True
+        image, alpha = split_alpha_channel(image)
+        self.mesh.mesh.is_visible = False
+        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
+        self.mesh.mesh.is_visible = True
+        return image, alpha, RGB_mask
 
 
 class CanonicalScene():
@@ -182,7 +271,6 @@ def render_symmetries(self):
     show_image(image)
     """
 
-
     """
     show_image(image)
     for arg in range(0):
@@ -213,7 +301,8 @@ def render_symmetries(self):
         # error = RGB_mask_pred - RGB_mask
         RGB_mask_pred = RGB_mask_pred.astype('uint8')
         print(image.dtype, RGB_mask_pred.dtype, RGB_mask_true.dtype)
-        # images = np.concatenate([image, RGB_mask_pred, RGB_mask_true], axis=1)
+        # images = np.concatenate(
+            [image, RGB_mask_pred, RGB_mask_true], axis=1)
         images = np.concatenate([image, RGB_mask_pred], axis=1)
         show_image(images)
     """

From 1959b5fe31f7ee6780c53bb7b116ed1f6ddca2a4 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 1 Feb 2022 19:37:38 +0100
Subject: [PATCH 090/101] Fix bug with demo importing legacy pipeline

---
 examples/pix2pose/demo.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index c972f7165..e1a9a046b 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -5,10 +5,7 @@
 from paz.backend.camera import VideoPlayer
 from paz.applications import SSD300FAT
 
-# from pipelines import Pix2Pose
-# from pipelines import EstimatePoseMasks
 from pipelines import Pix2Pose
-from pipelines import EstimatePoseMasks
 
 
 image_shape = (128, 128, 3)

From 3b0cfb696b9a4e67e9b6f54cf4d92c60fadb0d43 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 2 Feb 2022 23:58:59 +0100
Subject: [PATCH 091/101] Refactor basic training script for pix2pose RGB mask

---
 examples/pix2pose/train.py | 157 +++++++++++++++++++++++++------------
 1 file changed, 107 insertions(+), 50 deletions(-)

diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 12c48a715..6eccd230a 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -1,6 +1,14 @@
 import os
 import glob
+import json
+import argparse
+from datetime import datetime
+
+from tensorflow.keras.utils import get_file
 from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import (
+    EarlyStopping, CSVLogger, ModelCheckpoint, ReduceLROnPlateau)
+
 from paz.abstract import GeneratingSequence
 from paz.models.segmentation import UNET_VGG16
 
@@ -9,65 +17,114 @@
 from loss import WeightedReconstruction
 from metrics import mean_squared_error as MSE
 
-# global training parameters
-H, W, num_channels = image_shape = [128, 128, 3]
-beta = 3.0
-batch_size = 32
-num_classes = 3
-learning_rate = 0.001
-max_num_epochs = 10
-steps_per_epoch = 1000
-inputs_to_shape = {'input_1': [H, W, 3]}
-labels_to_shape = {'masks': [H, W, 4]}
-
-# global rendering parameters
+MTL_FILE = 'textured.mtl'
+OBJ_FILE = 'textured.obj'
+PNG_FILE = 'texture_map.png'
+cache_subdir = 'paz/datasets/ycb_video/035_power_drill'
+URL = 'https://github.com/oarriaga/altamira-data/releases/download/v0.12/'
+
+MTL_FILEPATH = get_file(MTL_FILE, URL + MTL_FILE, cache_subdir=cache_subdir)
+OBJ_FILEPATH = get_file(OBJ_FILE, URL + OBJ_FILE, cache_subdir=cache_subdir)
+PNG_FILEPATH = get_file(PNG_FILE, URL + PNG_FILE, cache_subdir=cache_subdir)
+
 root_path = os.path.expanduser('~')
-background_wildcard = '.keras/paz/datasets/voc-backgrounds/*.png'
-background_wildcard = os.path.join(root_path, background_wildcard)
-image_paths = glob.glob(background_wildcard)
-num_occlusions = 1
-viewport_size = image_shape[:2]
-light = [1.0, 30]
-y_fov = 3.14159 / 4.0
-
-# power drill parameters
-"""
-OBJ_name = '.keras/paz/datasets/ycb_models/035_power_drill/textured.obj'
-distance = [0.3, 0.5]
-top_only = False
-roll = 3.14159
-shift = 0.05
-"""
-
-# hammer parameters
-OBJ_name = '.keras/paz/datasets/ycb_models/048_hammer/textured.obj'
-distance = [0.5, 0.6]
-top_only = False
-roll = 3.14159
-shift = 0.05
-
-path_OBJ = os.path.join(root_path, OBJ_name)
-
-renderer = PixelMaskRenderer(path_OBJ, viewport_size, y_fov, distance,
-                             light, top_only, roll, shift)
 
+description = 'Training script for pix2pose model'
+parser = argparse.ArgumentParser(description=description)
+parser.add_argument('--obj_path', default=OBJ_FILEPATH, type=str,
+                    help='Path to OBJ model')
+parser.add_argument('--save_path', default='experiments', type=str,
+                    help='Path for saving evaluations')
+parser.add_argument('--model', default='UNET_VGG16', type=str,
+                    choices=['UNET_VGG16'])
+parser.add_argument('--batch_size', default=32, type=int,
+                    help='Batch size used during optimization')
+parser.add_argument('--learning_rate', default=0.001, type=float,
+                    help='Initial learning rate for Adam')
+parser.add_argument('--beta', default=3.0, type=float,
+                    help='Loss Weight for pixels in object')
+parser.add_argument('--max_num_epochs', default=100, type=int,
+                    help='Number of epochs before finishing')
+parser.add_argument('--steps_per_epoch', default=1000, type=int,
+                    help='Steps per epoch')
+parser.add_argument('--stop_patience', default=5, type=int,
+                    help='Early stop patience')
+parser.add_argument('--reduce_patience', default=2, type=int,
+                    help='Reduce learning rate patience')
+parser.add_argument('--run_label', default='RUN_00', type=str,
+                    help='Label used to distinguish between different runs')
+parser.add_argument('--time', type=str,
+                    default=datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
+parser.add_argument('--light', nargs='+', type=float, default=[1.0, 30])
+parser.add_argument('--y_fov', default=3.14159 / 4.0, type=float,
+                    help='Field of view angle in radians')
+parser.add_argument('--distance', nargs='+', type=float, default=[0.3, 0.5],
+                    help='Distance from camera to origin in meters')
+parser.add_argument('--top_only', default=0, choices=[0, 1], type=int,
+                    help='Flag for full sphere or top half for rendering')
+parser.add_argument('--roll', default=3.14159, type=float,
+                    help='Threshold for camera roll in radians')
+parser.add_argument('--shift', default=0.05, type=float,
+                    help='Threshold of random shift of camera')
+parser.add_argument('--num_occlusions', default=1, type=int,
+                    help='Number of occlusions added to image')
+parser.add_argument('--image_size', default=128, type=int,
+                    help='Size of the side of a square image e.g. 64')
+parser.add_argument('--background_wildcard', type=str,
+                    help='Wildcard for backgroun images', default=os.path.join(
+                        root_path,
+                        '.keras/paz/datasets/voc-backgrounds/*.png'))
+args = parser.parse_args()
+
+
+# loading background image paths
+image_paths = glob.glob(args.background_wildcard)
+if len(image_paths) == 0:
+    raise ValueError('Background images not found. Provide path to png images')
+
+# setting rendering function
+H, W, num_channels = image_shape = [args.image_size, args.image_size, 3]
+renderer = PixelMaskRenderer(
+    args.obj_path, [H, W], args.y_fov, args.distance, args.light,
+    args.top_only, args.roll, args.shift)
+
+# building full processor
+inputs_to_shape = {'input_1': [H, W, num_channels]}    # inputs RGB
+labels_to_shape = {'masks': [H, W, num_channels + 1]}  # labels RGBMask + alpha
 processor = DomainRandomization(
     renderer, image_shape, image_paths, inputs_to_shape,
-    labels_to_shape, num_occlusions)
+    labels_to_shape, args.num_occlusions)
+
+# building python generator
+sequence = GeneratingSequence(processor, args.batch_size, args.steps_per_epoch)
 
-sequence = GeneratingSequence(processor, batch_size, steps_per_epoch)
+# instantiating the model and loss
+model = UNET_VGG16(num_channels, image_shape, freeze_backbone=True)
+optimizer = Adam(args.learning_rate)
+loss = WeightedReconstruction(args.beta)
+model.compile(optimizer, loss, metrics=MSE)
 
-weighted_reconstruction = WeightedReconstruction(beta)
+# building experiment path
+experiment_label = '_'.join([model.name, args.run_label])
+experiment_path = os.path.join(args.save_path, experiment_label)
 
-model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
-optimizer = Adam(learning_rate)
-model.compile(optimizer, weighted_reconstruction, metrics=MSE)
+# setting additional callbacks
+log = CSVLogger(os.path.join(experiment_path, 'optimization.log'))
+stop = EarlyStopping('loss', patience=args.stop_patience, verbose=1)
+plateau = ReduceLROnPlateau('loss', patience=args.reduce_patience, verbose=1)
+save_filename = os.path.join(experiment_path, 'model_weights.hdf5')
+save = ModelCheckpoint(save_filename, 'loss', verbose=1, save_best_only=True,
+                       save_weights_only=True)
+callbacks = [log, stop, save, plateau]
+
+# saving hyper-parameters and model summary
+with open(os.path.join(experiment_path, 'hyperparameters.json'), 'w') as filer:
+    json.dump(args.__dict__, filer, indent=4)
+with open(os.path.join(experiment_path, 'model_summary.txt'), 'w') as filer:
+    model.summary(print_fn=lambda x: filer.write(x + '\n'))
 
 model.fit(
     sequence,
-    epochs=max_num_epochs,
-    # callbacks=[stop, log, save, plateau, draw],
+    epochs=args.max_num_epochs,
     verbose=1,
     workers=0)
-
-model.save_weights('UNET-VGG16_weights_hammer_10.hdf5')

From 3be885f36dddb082f52f558441cba429dfdfdc7a Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Thu, 3 Feb 2022 11:04:38 +0100
Subject: [PATCH 092/101] Move metrics to legacy dir

---
 examples/pix2pose/{ => legacy}/metrics.py |  0
 examples/pix2pose/train.py                | 13 ++++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)
 rename examples/pix2pose/{ => legacy}/metrics.py (100%)

diff --git a/examples/pix2pose/metrics.py b/examples/pix2pose/legacy/metrics.py
similarity index 100%
rename from examples/pix2pose/metrics.py
rename to examples/pix2pose/legacy/metrics.py
diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 6eccd230a..35a873481 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -4,6 +4,7 @@
 import argparse
 from datetime import datetime
 
+import tensorflow as tf
 from tensorflow.keras.utils import get_file
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.callbacks import (
@@ -14,8 +15,7 @@
 
 from scenes import PixelMaskRenderer
 from pipelines import DomainRandomization
-from loss import WeightedReconstruction
-from metrics import mean_squared_error as MSE
+from weighted_reconstruction import WeightedReconstruction
 
 MTL_FILE = 'textured.mtl'
 OBJ_FILE = 'textured.obj'
@@ -98,11 +98,18 @@
 # building python generator
 sequence = GeneratingSequence(processor, args.batch_size, args.steps_per_epoch)
 
+
+# metric for labels with alpha mask
+def mean_squared_error(y_true, y_pred):
+    squared_difference = tf.square(y_true[:, :, :, 0:3] - y_pred[:, :, :, 0:3])
+    return tf.reduce_mean(squared_difference, axis=-1)
+
+
 # instantiating the model and loss
 model = UNET_VGG16(num_channels, image_shape, freeze_backbone=True)
 optimizer = Adam(args.learning_rate)
 loss = WeightedReconstruction(args.beta)
-model.compile(optimizer, loss, metrics=MSE)
+model.compile(optimizer, loss, mean_squared_error)
 
 # building experiment path
 experiment_label = '_'.join([model.name, args.run_label])

From 788de8808e0168c0af096e1bd2aa2018b6cdd0b3 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Sat, 5 Feb 2022 15:42:39 +0100
Subject: [PATCH 093/101] Refactor tests

---
 examples/pix2pose/backend_test.py    |  18 ++++-
 examples/pix2pose/legacy/legacy.py   |  59 ---------------
 examples/pix2pose/pipelines.py       |  61 +++++++++++----
 examples/pix2pose/processors_test.py | 108 +++++++++++++++++++++++++++
 4 files changed, 172 insertions(+), 74 deletions(-)
 create mode 100644 examples/pix2pose/processors_test.py

diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py
index 28a944a2f..2e7f8b128 100644
--- a/examples/pix2pose/backend_test.py
+++ b/examples/pix2pose/backend_test.py
@@ -5,7 +5,6 @@
 from .backend import preprocess_image_points2D
 from .backend import replace_lower_than_threshold
 from .backend import arguments_to_image_points2D
-# from .backend import points3D_to_RGB
 from .backend import normalize_points2D
 from .backend import denormalize_points2D
 from .backend import homogenous_quaternion_to_rotation_matrix
@@ -21,6 +20,7 @@
 from .backend import normalize_min_max
 from .backend import extract_bounding_box_corners
 from .backend import compute_vertices_colors
+from .backend import project_to_image
 
 
 @pytest.fixture
@@ -317,3 +317,19 @@ def test_compute_vertices_colors(points3D):
                        [166, 35, 199],
                        [255, 255, 0]])
     assert np.allclose(values, colors)
+
+
+def test_project_to_image():
+    points3D = np.array([[1.0, 1.0, 1.0]])
+    translation = np.array([0.0, 0.0, -3.0])
+    rotation = np.array([[0.0, 0.0, -1.0],
+                         [0.0, 1.0, 0.0],
+                         [1.0, 0.0, 0.0]])
+    fx = 1.0
+    fy = 1.0
+    tx = 0.0
+    ty = 0.0
+    camera_intrinsics = np.array([[fx, 0.0, tx], [0.0, fy, ty]])
+    points2D = project_to_image(rotation, translation,
+                                points3D, camera_intrinsics)
+    assert np.allclose(points2D, np.array([0.5, -0.5]))
diff --git a/examples/pix2pose/legacy/legacy.py b/examples/pix2pose/legacy/legacy.py
index e8140fb48..76b2b6973 100644
--- a/examples/pix2pose/legacy/legacy.py
+++ b/examples/pix2pose/legacy/legacy.py
@@ -249,62 +249,3 @@ def call(self, image):
                 image = draw_pose6D(image, pose6D, cube_points3D,
                                     self.camera.intrinsics)
         return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D}
-
-
-class PixelMaskRenderer():
-    """Render-ready scene composed of a single object and a single moving camera.
-
-    # Arguments
-        path_OBJ: String containing the path to an OBJ file.
-        viewport_size: List, specifying [H, W] of rendered image.
-        y_fov: Float indicating the vertical field of view in radians.
-        distance: List of floats indicating [max_distance, min_distance]
-        light: List of floats indicating [max_light, min_light]
-        top_only: Boolean. If True images are only take from the top.
-        roll: Float, to sample [-roll, roll] rolls of the Z OpenGL camera axis.
-        shift: Float, to sample [-shift, shift] to move in X, Y OpenGL axes.
-    """
-    def __init__(self, path_OBJ, viewport_size=(128, 128), y_fov=3.14159 / 4.0,
-                 distance=[0.3, 0.5], light=[0.5, 30], top_only=False,
-                 roll=None, shift=None):
-        self.distance, self.roll, self.shift = distance, roll, shift
-        self.light_intensity, self.top_only = light, top_only
-        self._build_scene(path_OBJ, viewport_size, light, y_fov)
-        self.renderer = OffscreenRenderer(viewport_size[0], viewport_size[1])
-        self.flags_RGBA = RenderFlags.RGBA
-        self.flags_FLAT = RenderFlags.RGBA | RenderFlags.FLAT
-        self.epsilon = 0.01
-
-    def _build_scene(self, path, size, light, y_fov):
-        self.scene = Scene(bg_color=[0, 0, 0, 0])
-        self.light = self.scene.add(
-            DirectionalLight([1.0, 1.0, 1.0], np.mean(light)))
-        self.camera = self.scene.add(
-            PerspectiveCamera(y_fov, aspectRatio=np.divide(*size)))
-        self.pixel_mesh = self.scene.add(color_object(path))
-        self.mesh = self.scene.add(
-            Mesh.from_trimesh(trimesh.load(path), smooth=True))
-        self.world_origin = self.mesh.mesh.centroid
-
-    def _sample_parameters(self):
-        distance = sample_uniformly(self.distance)
-        camera_origin = sample_point_in_sphere(distance, self.top_only)
-        camera_origin = random_perturbation(camera_origin, self.epsilon)
-        light_intensity = sample_uniformly(self.light_intensity)
-        return camera_origin, light_intensity
-
-    def render(self):
-        camera_origin, intensity = self._sample_parameters()
-        camera_to_world, world_to_camera = compute_modelview_matrices(
-            camera_origin, self.world_origin, self.roll, self.shift)
-        self.light.light.intensity = intensity
-        self.scene.set_pose(self.camera, camera_to_world)
-        self.scene.set_pose(self.light, camera_to_world)
-        self.pixel_mesh.mesh.is_visible = False
-        image, depth = self.renderer.render(self.scene, self.flags_RGBA)
-        self.pixel_mesh.mesh.is_visible = True
-        image, alpha = split_alpha_channel(image)
-        self.mesh.mesh.is_visible = False
-        RGB_mask, _ = self.renderer.render(self.scene, self.flags_FLAT)
-        self.mesh.mesh.is_visible = True
-        return image, alpha, RGB_mask
diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 2e6fb2a97..067c2d658 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -1,6 +1,5 @@
-from paz.abstract import SequentialProcessor
+from paz.abstract import SequentialProcessor, Processor, Pose6D
 from paz.pipelines import RandomizeRenderedImage as RandomizeRender
-from paz.abstract.messages import Pose6D
 from paz.backend.quaternion import rotation_vector_to_quaternion
 from paz.backend.image import resize_image
 from paz import processors as pr
@@ -8,20 +7,15 @@
 from processors import (
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
     ImageToNormalizedDeviceCoordinates, Scale, SolveChangingObjectPnPRANSAC,
-    ReplaceLowerThanThreshold)
+    ReplaceLowerThanThreshold, UnwrapDictionary)
 
-from backend import build_cube_points3D
-from backend import denormalize_points2D
-from backend import draw_pose6D
-from backend import draw_mask
-from backend import normalize_points2D
+# TODO replace draw_pose6D with draw_poses6D
+# TODO replace draw_mask with draw_masks
+from backend import draw_pose6D, draw_mask
 
-# from processors import UnwrapDictionary
-# from backend import draw_poses6D
-# from backend import draw_masks
-# from backend import points3D_to_RGB
-# from backend import draw_points2D
-# import numpy as np
+from backend import (
+    build_cube_points3D, denormalize_points2D, normalize_points2D,
+    draw_masks, draw_poses6D)
 
 
 class DomainRandomization(SequentialProcessor):
@@ -137,3 +131,42 @@ def call(self, image, box2D=None):
             results[topic] = image
         results['points2D'], results['pose6D'] = points2D, pose6D
         return results
+
+
+class EstimatePoseMasks(Processor):
+    def __init__(self, detect, estimate_pose, offsets, draw=True,
+                 valid_class_names=['035_power_drill']):
+        """Pose estimation pipeline using keypoints.
+        """
+        super(EstimatePoseMasks, self).__init__()
+        self.detect = detect
+        self.estimate_pose = estimate_pose
+        self.postprocess_boxes = SequentialProcessor(
+            [pr.UnpackDictionary(['boxes2D']),
+             pr.FilterClassBoxes2D(valid_class_names),
+             pr.SquareBoxes2D(),
+             pr.OffsetBoxes2D(offsets)])
+        self.clip = pr.ClipBoxes2D()
+        self.crop = pr.CropBoxes2D()
+        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
+        self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D'])
+        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
+        self.object_sizes = self.estimate_pose.object_sizes
+        self.cube_points3D = build_cube_points3D(*self.object_sizes)
+        self.draw = draw
+
+    def call(self, image):
+        boxes2D = self.postprocess_boxes(self.detect(image))
+        boxes2D = self.clip(image, boxes2D)
+        cropped_images = self.crop(image, boxes2D)
+        poses6D, points = [], []
+        for crop, box2D in zip(cropped_images, boxes2D):
+            results = self.estimate_pose(crop, box2D)
+            pose6D, points2D, points3D = self.unwrap(results)
+            poses6D.append(pose6D), points.append([points2D, points3D])
+        if self.draw:
+            image = self.draw_boxes2D(image, boxes2D)
+            image = draw_masks(image, points, self.object_sizes)
+            image = draw_poses6D(image, poses6D, self.cube_points3D,
+                                 self.estimate_pose.camera.intrinsics)
+        return self.wrap(image, boxes2D, poses6D)
diff --git a/examples/pix2pose/processors_test.py b/examples/pix2pose/processors_test.py
new file mode 100644
index 000000000..31da496fe
--- /dev/null
+++ b/examples/pix2pose/processors_test.py
@@ -0,0 +1,108 @@
+import pytest
+import numpy as np
+
+from .processors import ImageToNormalizedDeviceCoordinates
+from .processors import NormalizedDeviceCoordinatesToImage
+from .processors import ReplaceLowerThanThreshold
+from .processors import NormalizePoints2D
+from .processors import ToAffineMatrix
+from .processors import ArgumentsToImagePoints2D
+from .processors import UnwrapDictionary
+# from .processors import GetNonZeroArguments
+# from .processors import GetNonZeroValues
+# from .processors import Scale
+# from .processors import SolveChangingObjectPnPRANSAC
+
+
+@pytest.fixture
+def rotation_matrix_X_HALF_PI():
+    rotation_matrix = np.array([[1.0, 0.0, 0.0],
+                                [0.0, 0.0, -1.0],
+                                [0.0, 1.0, 0.0]])
+    return rotation_matrix
+
+
+@pytest.fixture
+def rotation_matrix_Y_HALF_PI():
+    rotation_matrix = np.array([[0.0, 0.0, 1.0],
+                                [0.0, 1.0, 0.0],
+                                [-1.0, 0.0, 0.0]])
+    return rotation_matrix
+
+
+@pytest.fixture
+def rotation_matrix_Z_HALF_PI():
+    rotation_matrix = np.array([[0.0, -1.0, 0.0],
+                                [1.0, 0.0, 0.0],
+                                [0.0, 0.0, 1.0]])
+    return rotation_matrix
+
+
+def test_ImageToNormalizedDeviceCoordinates():
+    image = np.array([[0, 127.5, 255]])
+    image_to_NDC = ImageToNormalizedDeviceCoordinates()
+    values = image_to_NDC(image)
+    assert np.allclose(values, np.array([[-1.0, 0.0, 1.0]]))
+
+
+def test_NormalizedDeviceCoordinatesToImage():
+    coordinates = np.array([[-1.0, 0.0, 1.0]])
+    NDC_to_image = NormalizedDeviceCoordinatesToImage()
+    values = NDC_to_image(coordinates)
+    assert np.allclose(values, np.array([[0.0, 127.5, 255.0]]))
+
+
+def test_ReplaceLowerThanThreshold():
+    source = np.ones((128, 128, 3))
+    replace_lower_than_threshold = ReplaceLowerThanThreshold(2.0, 5.0)
+    target = replace_lower_than_threshold(source)
+    assert np.allclose(target, 5.0)
+
+    source = np.ones((128, 128, 3))
+    replace_lower_than_threshold = ReplaceLowerThanThreshold(0.0, -1.0)
+    target = replace_lower_than_threshold(source)
+    assert np.allclose(target, 1.0)
+
+
+def test_NormalizePoints2D():
+    height, width = 480, 640
+    points2D = np.array([[0, 0], [320, 240], [640, 480]])
+    normalize_points2D = NormalizePoints2D((height, width))
+    normalized_points = normalize_points2D(points2D, height, width)
+    assert np.allclose(normalized_points, np.array([[-1, -1], [0, 0], [1, 1]]))
+
+
+def test_ToAffineMarixIdentity():
+    rotation_matrix = np.eye(3)
+    translation = np.zeros(3)
+    to_affine_matrix = ToAffineMatrix()
+    matrix = to_affine_matrix(rotation_matrix, translation)
+    assert np.allclose(matrix, np.eye(4))
+
+
+def test_ToAffineMatrix():
+    rotation_matrix = np.array([[1.0, 0.0, 0.0],
+                                [0.0, 0.0, -1.0],
+                                [0.0, 1.0, 0.0]])
+    translation = np.array([3.0, 1.2, 3.0])
+    to_affine_matrix = ToAffineMatrix()
+    matrix = to_affine_matrix(rotation_matrix, translation)
+    affine_matrix = np.array([[1.0, 0.0, 0.0, 3.0],
+                              [0.0, 0.0, -1.0, 1.2],
+                              [0.0, 1.0, 0.0, 3.0],
+                              [0.0, 0.0, 0.0, 1.0]])
+    assert np.allclose(affine_matrix, matrix)
+
+
+def test_ArgumentsToImagePoints2D():
+    col_args = np.array([3, 44, 6])
+    row_args = np.array([66, 0, 5])
+    arguments_to_image_points2D = ArgumentsToImagePoints2D()
+    image_points2D = arguments_to_image_points2D(row_args, col_args)
+    assert np.allclose(image_points2D, np.array([[3, 66], [44, 0], [6, 5]]))
+
+
+def test_UnwrapDictionary():
+    dictionary = {'a': 1, 'b': 2, 'c': 3}
+    unwrap = UnwrapDictionary(['b', 'a', 'c'])
+    assert unwrap(dictionary) == [2, 1, 3]

From 99c19543925c2cbcfe7da314ece9002a7e1a2375 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 8 Feb 2022 10:58:54 +0100
Subject: [PATCH 094/101] Add backend and test processors

---
 examples/pix2pose/backend.py         | 135 +++++++++++----------------
 examples/pix2pose/backend_test.py    |  32 +++++++
 examples/pix2pose/legacy/icp.py      | 102 ++++++++++++++++++++
 examples/pix2pose/legacy/legacy.py   |  30 ++++++
 examples/pix2pose/processors_test.py |  16 +++-
 5 files changed, 231 insertions(+), 84 deletions(-)
 create mode 100644 examples/pix2pose/legacy/icp.py

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 775b17ce2..4f9ae19ce 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -245,7 +245,7 @@ def points3D_to_RGB(points3D, object_sizes):
     """Transforms points3D in object frame to RGB color space.
     # Arguments
         points3D: Array (num_points, 3). Points3D a
-        object_sizes: List (3) indicating the
+        object_sizes: Array (3) indicating the
             (width, height, depth) of object.
 
     # Returns
@@ -493,6 +493,58 @@ def normalized_device_coordinates_to_image(image):
     return (image + 1.0) * 127.5
 
 
+def compute_norm_SO3(rotation_mesh, rotation):
+    """Computes norm between SO3 elements.
+    """
+    difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3)
+    distance = np.linalg.norm(difference, ord='fro')
+    return distance
+
+
+def calculate_canonical_rotation(rotation_mesh, rotations):
+    norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
+    closest_rotation_arg = np.argmin(norms)
+    closest_rotation = rotations[closest_rotation_arg]
+    canonical_rotation = np.linalg.inv(closest_rotation)
+    return canonical_rotation
+
+
+def normalize_min_max(x, x_min, x_max):
+    """Normalized data using it's maximum and minimum values
+
+    # Arguments
+        x: array
+        x_min: minimum value of x
+        x_max: maximum value of x
+
+    # Returns
+        min-max normalized data
+    """
+    return (x - x_min) / (x_max - x_min)
+
+
+def extract_bounding_box_corners(points3D):
+    """Extracts the (x_min, y_min, z_min) and the (x_max, y_max, z_max)
+        coordinates from an array of  points3D
+    # Arguments
+        points3D: Array (num_points, 3)
+
+    # Returns
+        Left-down-bottom corner (x_min, y_min, z_min) and right-up-top
+            (x_max, y_max, z_max) corner.
+    """
+    XYZ_min = np.min(points3D, axis=0)
+    XYZ_max = np.max(points3D, axis=0)
+    return XYZ_min, XYZ_max
+
+
+def compute_vertices_colors(vertices):
+    corner3D_min, corner3D_max = extract_bounding_box_corners(vertices)
+    normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max)
+    colors = (255 * normalized_colors).astype('uint8')
+    return colors
+
+
 def build_rotation_matrix_z(angle):
     """Builds rotation matrix in Z axis.
 
@@ -610,84 +662,3 @@ def sample_affine_transform(min_corner, max_corner):
     rotation_matrix = sample_front_rotation_matrix()
     affine_matrix = to_affine_matrix(rotation_matrix, translation)
     return affine_matrix
-
-
-def sample_random_rotation_matrix():
-    """Samples SO3 in rotation matrix form.
-
-    # Return
-        Array (3, 3).
-
-    # References
-        [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/
-            12/random-rotation-matrix.html)
-        [real-time rendering](from http://www.realtimerendering.com/resources/
-            GraphicsGems/gemsiii/rand_rotation.c)
-    """
-    theta = 2.0 * np.pi * np.random.uniform()
-    phi = 2.0 * np.pi * np.random.uniform()
-    z = 2.0 * np.random.uniform()
-    # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix
-    r = np.sqrt(z)
-    random_vector = np.array(
-        [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)])
-    sin_theta = np.sin(theta)
-    cos_theta = np.cos(theta)
-    R = np.array([[+cos_theta, +sin_theta, 0.0],
-                  [-sin_theta, +cos_theta, 0.0],
-                  [0.0, 0.0, 1.0]])
-    random_rotation_matrix = (
-        np.outer(random_vector, random_vector) - np.eye(3)).dot(R)
-    return random_rotation_matrix
-
-
-def compute_norm_SO3(rotation_mesh, rotation):
-    """Computes norm between SO3 elements.
-    """
-    difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3)
-    distance = np.linalg.norm(difference, ord='fro')
-    return distance
-
-
-def calculate_canonical_rotation(rotation_mesh, rotations):
-    norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
-    closest_rotation_arg = np.argmin(norms)
-    closest_rotation = rotations[closest_rotation_arg]
-    canonical_rotation = np.linalg.inv(closest_rotation)
-    return canonical_rotation
-
-
-def normalize_min_max(x, x_min, x_max):
-    """Normalized data using it's maximum and minimum values
-
-    # Arguments
-        x: array
-        x_min: minimum value of x
-        x_max: maximum value of x
-
-    # Returns
-        min-max normalized data
-    """
-    return (x - x_min) / (x_max - x_min)
-
-
-def extract_bounding_box_corners(points3D):
-    """Extracts the (x_min, y_min, z_min) and the (x_max, y_max, z_max)
-        coordinates from an array of  points3D
-    # Arguments
-        points3D: Array (num_points, 3)
-
-    # Returns
-        Left-down-bottom corner (x_min, y_min, z_min) and right-up-top
-            (x_max, y_max, z_max) corner.
-    """
-    XYZ_min = np.min(points3D, axis=0)
-    XYZ_max = np.max(points3D, axis=0)
-    return XYZ_min, XYZ_max
-
-
-def compute_vertices_colors(vertices):
-    corner3D_min, corner3D_max = extract_bounding_box_corners(vertices)
-    normalized_colors = normalize_min_max(vertices, corner3D_min, corner3D_max)
-    colors = (255 * normalized_colors).astype('uint8')
-    return colors
diff --git a/examples/pix2pose/backend_test.py b/examples/pix2pose/backend_test.py
index 2e7f8b128..2cb53f52f 100644
--- a/examples/pix2pose/backend_test.py
+++ b/examples/pix2pose/backend_test.py
@@ -17,10 +17,12 @@
 from .backend import build_rotation_matrix_y
 from .backend import build_rotation_matrix_z
 from .backend import compute_norm_SO3
+from .backend import calculate_canonical_rotation
 from .backend import normalize_min_max
 from .backend import extract_bounding_box_corners
 from .backend import compute_vertices_colors
 from .backend import project_to_image
+from .backend import points3D_to_RGB
 
 
 @pytest.fixture
@@ -81,6 +83,23 @@ def points3D():
                      [267, 310, 2]])
 
 
+@pytest.fixture
+def object_colors():
+    return np.array([[136, 166, 159],
+                     [3, 119, 140],
+                     [56, 132, 189],
+                     [66, 110, 231],
+                     [148, 193, 144],
+                     [33, 174, 120],
+                     [114, 175, 129]])
+
+
+@pytest.fixture
+def object_sizes():
+    object_sizes = np.array([280, 260, 240])
+    return object_sizes
+
+
 def test_build_cube_points3D(unit_cube):
     cube_points = build_cube_points3D(1, 1, 1)
     assert np.allclose(unit_cube, cube_points)
@@ -333,3 +352,16 @@ def test_project_to_image():
     points2D = project_to_image(rotation, translation,
                                 points3D, camera_intrinsics)
     assert np.allclose(points2D, np.array([0.5, -0.5]))
+
+
+def test_calculate_canonical_rotation(rotation_matrix_X_HALF_PI):
+    X_PI = np.matmul(rotation_matrix_X_HALF_PI, rotation_matrix_X_HALF_PI)
+    rotations = [X_PI, rotation_matrix_X_HALF_PI]
+    canonical_rotation = calculate_canonical_rotation(np.eye(3), rotations)
+    assert np.allclose(
+        canonical_rotation, np.linalg.inv(rotation_matrix_X_HALF_PI))
+
+
+def test_points3D_to_RGB(points3D, object_sizes, object_colors):
+    values = points3D_to_RGB(points3D, object_sizes)
+    assert np.allclose(values, object_colors)
diff --git a/examples/pix2pose/legacy/icp.py b/examples/pix2pose/legacy/icp.py
new file mode 100644
index 000000000..61ca4352c
--- /dev/null
+++ b/examples/pix2pose/legacy/icp.py
@@ -0,0 +1,102 @@
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+
+
+def calculate_affine_matrix(pointcloud_A, pointcloud_B):
+    '''Calculates affine transform with the best least-squares fit transforming
+        keypoints A to keypoints B.
+
+    # Argument:
+        pointcloud_A: Array of shape (num_keypoints, 3).
+        pointcloud_B: Array of shape (num_keypoints, 3).
+
+    # Returns:
+        T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B
+        R: mxm rotation matrix
+        t: mx1 translation vector
+    '''
+    assert pointcloud_A.shape == pointcloud_B.shape
+    # translate points to their centroids
+    centroid3D_A = np.mean(pointcloud_A, axis=0)
+    centroid3D_B = np.mean(pointcloud_B, axis=0)
+    centered_keypoints3D_A = pointcloud_A - centroid3D_A
+    centered_keypoints3D_B = pointcloud_B - centroid3D_B
+
+    covariance = np.dot(centered_keypoints3D_A.T, centered_keypoints3D_B)
+    U, S, Vt = np.linalg.svd(covariance)
+    # compute rotation matrix
+    rotation_matrix = np.dot(Vt.T, U.T)
+
+    # resolve special reflection case
+    if np.linalg.det(rotation_matrix) < 0:
+        Vt[3 - 1, :] *= -1
+        rotation_matrix = np.dot(Vt.T, U.T)
+
+    # compute translation
+    translation3D = centroid3D_B.T - np.dot(rotation_matrix, centroid3D_A.T)
+
+    affine_matrix = to_affine_matrix(rotation_matrix, translation3D)
+    return affine_matrix
+
+
+def to_affine_matrix(rotation_matrix, translation_vector):
+    translation_vector = translation_vector.reshape(3, 1)
+    affine = np.concatenate([rotation_matrix, translation_vector], axis=0)
+    affine = np.concatenate([affine, np.array([[0.0, 0.0, 0.0, 1.0]])], axis=1)
+    return affine
+
+
+def nearest_neighbor(pointcloud_A, pointcloud_B):
+    '''Find the nearest (Euclidean) neighbor in dst for each point in src
+    # Arguments:
+        src: Nxm array of points
+        dst: Nxm array of points
+    # Returns:
+        distances: Euclidean distances of the nearest neighbor
+        indices: dst indices of the nearest neighbor
+    '''
+    assert pointcloud_A.shape == pointcloud_B.shape
+    model = NearestNeighbors(n_neighbors=1)
+    model.fit(pointcloud_B)
+    distances, indices = model.kneighbors(pointcloud_A, return_distance=True)
+    return distances.ravel(), indices.ravel()
+
+
+def add_homogenous_coordinate(keypoints3D):
+    num_keypoints = len(keypoints3D)
+    ones = np.ones_like(num_keypoints).reshape(-1, 1)
+    homogenous_keypoints3D = np.concatenate([keypoints3D, ones], axis=1)
+    return homogenous_keypoints3D
+
+
+def iterative_closes_point(pointcloud_A, pointcloud_B, initial_pose=None,
+                           max_iterations=20, tolerance=1e-3):
+    '''Find best least square fit that transforms pointcloud A to pointcloud B.
+    Input:
+        A: Nxm numpy array of source mD points
+        B: Nxm numpy array of destination mD point
+        initial_pose: (m+1)x(m+1) homogeneous transformation
+        max_iterations: exit algorithm after max_iterations
+        tolerance: convergence criteria
+    Output:
+        T: final homogeneous transformation that maps A on to B
+        distances: Euclidean distances (errors) of the nearest neighbor
+        i: number of iterations to converge
+    '''
+    assert pointcloud_A.shape == pointcloud_B.shape
+    pointcloud_A = add_homogenous_coordinate(pointcloud_A)
+    pointcloud_B = add_homogenous_coordinate(pointcloud_B)
+    pointcloud_A_0 = np.copy(pointcloud_A)
+    if initial_pose is not None:
+        pointcloud_A = np.dot(initial_pose, pointcloud_A.T).T
+    previous_error = 0
+    for iteration_arg in range(max_iterations):
+        distances, indices = nearest_neighbor(pointcloud_A, pointcloud_B)
+        affine_matrix = calculate_affine_matrix(pointcloud_A, pointcloud_B)
+        pointcloud_A = np.dot(affine_matrix, pointcloud_A.T).T
+        mean_error = np.mean(distances)
+        if np.abs(previous_error - mean_error) < tolerance:
+            break
+        previous_error = mean_error
+    affine_transform = calculate_affine_matrix(pointcloud_A_0, pointcloud_A)
+    return affine_transform, distances, iteration_arg
diff --git a/examples/pix2pose/legacy/legacy.py b/examples/pix2pose/legacy/legacy.py
index 76b2b6973..4335d2e8d 100644
--- a/examples/pix2pose/legacy/legacy.py
+++ b/examples/pix2pose/legacy/legacy.py
@@ -1,6 +1,36 @@
 from tensorflow.keras.losses import Loss
 from tensorflow.keras.losses import mean_squared_error
 import tensorflow as tf
+import numpy as np
+
+
+def sample_random_rotation_matrix():
+    """Samples SO3 in rotation matrix form.
+
+    # Return
+        Array (3, 3).
+
+    # References
+        [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/
+            12/random-rotation-matrix.html)
+        [real-time rendering](from http://www.realtimerendering.com/resources/
+            GraphicsGems/gemsiii/rand_rotation.c)
+    """
+    theta = 2.0 * np.pi * np.random.uniform()
+    phi = 2.0 * np.pi * np.random.uniform()
+    z = 2.0 * np.random.uniform()
+    # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix
+    r = np.sqrt(z)
+    random_vector = np.array(
+        [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)])
+    sin_theta = np.sin(theta)
+    cos_theta = np.cos(theta)
+    R = np.array([[+cos_theta, +sin_theta, 0.0],
+                  [-sin_theta, +cos_theta, 0.0],
+                  [0.0, 0.0, 1.0]])
+    random_rotation_matrix = (
+        np.outer(random_vector, random_vector) - np.eye(3)).dot(R)
+    return random_rotation_matrix
 
 
 def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
diff --git a/examples/pix2pose/processors_test.py b/examples/pix2pose/processors_test.py
index 31da496fe..a06b94664 100644
--- a/examples/pix2pose/processors_test.py
+++ b/examples/pix2pose/processors_test.py
@@ -10,8 +10,7 @@
 from .processors import UnwrapDictionary
 # from .processors import GetNonZeroArguments
 # from .processors import GetNonZeroValues
-# from .processors import Scale
-# from .processors import SolveChangingObjectPnPRANSAC
+from .processors import Scale
 
 
 @pytest.fixture
@@ -38,6 +37,12 @@ def rotation_matrix_Z_HALF_PI():
     return rotation_matrix
 
 
+@pytest.fixture
+def object_sizes():
+    object_sizes = np.array([280, 260, 240])
+    return object_sizes
+
+
 def test_ImageToNormalizedDeviceCoordinates():
     image = np.array([[0, 127.5, 255]])
     image_to_NDC = ImageToNormalizedDeviceCoordinates()
@@ -106,3 +111,10 @@ def test_UnwrapDictionary():
     dictionary = {'a': 1, 'b': 2, 'c': 3}
     unwrap = UnwrapDictionary(['b', 'a', 'c'])
     assert unwrap(dictionary) == [2, 1, 3]
+
+
+def test_Scale(object_sizes):
+    scale = Scale(object_sizes)
+    values = np.array([1.0, 0.5, 0.25])
+    scaled_values = scale(values)
+    assert np.allclose(scaled_values, values * object_sizes)

From 7b174c43ef28a47d42c5d938f979d83f32c19227 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 8 Feb 2022 11:36:31 +0100
Subject: [PATCH 095/101] Add comment to function

---
 examples/pix2pose/backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 4f9ae19ce..52b011654 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -237,7 +237,7 @@ def arguments_to_image_points2D(row_args, col_args):
     """
     row_args = row_args.reshape(-1, 1)
     col_args = col_args.reshape(-1, 1)
-    image_points2D = np.concatenate([col_args, row_args], axis=1)
+    image_points2D = np.concatenate([col_args, row_args], axis=1)  # (U, V)
     return image_points2D
 
 

From 59a3ea5b40c37891ad68d3e935f794579a380205 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 8 Feb 2022 12:22:05 +0100
Subject: [PATCH 096/101] Add drawing callback

---
 examples/pix2pose/train.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 35a873481..0b57e4ff3 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -4,6 +4,7 @@
 import argparse
 from datetime import datetime
 
+import numpy as np
 import tensorflow as tf
 from tensorflow.keras.utils import get_file
 from tensorflow.keras.optimizers import Adam
@@ -12,9 +13,11 @@
 
 from paz.abstract import GeneratingSequence
 from paz.models.segmentation import UNET_VGG16
+from paz.optimization.callbacks import DrawInferences
+from paz.backend.camera import Camera
 
 from scenes import PixelMaskRenderer
-from pipelines import DomainRandomization
+from pipelines import DomainRandomization, Pix2Pose
 from weighted_reconstruction import WeightedReconstruction
 
 MTL_FILE = 'textured.mtl'
@@ -45,7 +48,7 @@
                     help='Loss Weight for pixels in object')
 parser.add_argument('--max_num_epochs', default=100, type=int,
                     help='Number of epochs before finishing')
-parser.add_argument('--steps_per_epoch', default=1000, type=int,
+parser.add_argument('--steps_per_epoch', default=10, type=int,
                     help='Steps per epoch')
 parser.add_argument('--stop_patience', default=5, type=int,
                     help='Early stop patience')
@@ -54,7 +57,7 @@
 parser.add_argument('--run_label', default='RUN_00', type=str,
                     help='Label used to distinguish between different runs')
 parser.add_argument('--time', type=str,
-                    default=datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
+                    default=datetime.now().strftime("%d-%m-%Y_%H-%M-%S"))
 parser.add_argument('--light', nargs='+', type=float, default=[1.0, 30])
 parser.add_argument('--y_fov', default=3.14159 / 4.0, type=float,
                     help='Field of view angle in radians')
@@ -68,6 +71,8 @@
                     help='Threshold of random shift of camera')
 parser.add_argument('--num_occlusions', default=1, type=int,
                     help='Number of occlusions added to image')
+parser.add_argument('--num_test_images', default=100, type=int,
+                    help='Number of test images')
 parser.add_argument('--image_size', default=128, type=int,
                     help='Size of the side of a square image e.g. 64')
 parser.add_argument('--background_wildcard', type=str,
@@ -95,6 +100,7 @@
     renderer, image_shape, image_paths, inputs_to_shape,
     labels_to_shape, args.num_occlusions)
 
+
 # building python generator
 sequence = GeneratingSequence(processor, args.batch_size, args.steps_per_epoch)
 
@@ -112,7 +118,7 @@ def mean_squared_error(y_true, y_pred):
 model.compile(optimizer, loss, mean_squared_error)
 
 # building experiment path
-experiment_label = '_'.join([model.name, args.run_label])
+experiment_label = '_'.join([model.name, args.run_label, args.time])
 experiment_path = os.path.join(args.save_path, experiment_label)
 
 # setting additional callbacks
@@ -122,7 +128,15 @@ def mean_squared_error(y_true, y_pred):
 save_filename = os.path.join(experiment_path, 'model_weights.hdf5')
 save = ModelCheckpoint(save_filename, 'loss', verbose=1, save_best_only=True,
                        save_weights_only=True)
-callbacks = [log, stop, save, plateau]
+images = [np.copy(renderer.render()[0]) for _ in range(args.num_test_images)]
+# setting drawing callback
+camera = Camera()
+camera.distortion = np.zeros((4))
+object_sizes = renderer.mesh.mesh.extents * 100  # from meters to milimiters
+camera.intrinsics = renderer.camera.camera.get_projection_matrix()[:3, :3]
+draw_pipeline = Pix2Pose(model, object_sizes, camera, draw=True)
+draw = DrawInferences(experiment_path, images, draw_pipeline)
+callbacks = [log, stop, save, plateau, draw]
 
 # saving hyper-parameters and model summary
 with open(os.path.join(experiment_path, 'hyperparameters.json'), 'w') as filer:
@@ -133,5 +147,6 @@ def mean_squared_error(y_true, y_pred):
 model.fit(
     sequence,
     epochs=args.max_num_epochs,
+    callbacks=callbacks,
     verbose=1,
     workers=0)

From 305861b079e8a14b3c42b2204a592ad9af849606 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Tue, 8 Feb 2022 13:57:42 +0100
Subject: [PATCH 097/101] Remove drawing of pose6D in Pix2Pose pipeline and
 save original images of training

---
 examples/pix2pose/pipelines.py |  8 ++++----
 examples/pix2pose/train.py     | 23 ++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 067c2d658..8cfcac23b 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -124,11 +124,11 @@ def call(self, image, box2D=None):
             pose6D = None
         # change_coordinates puts points2D outside image.
         if (self.draw and (box2D is None)):
-            topic = 'image_crop' if box2D is not None else 'image'
             image = draw_mask(image, points2D, points3D, self.object_sizes)
-            image = draw_pose6D(image, pose6D, self.cube_points3D,
-                                self.camera.intrinsics)
-            results[topic] = image
+            # TODO: commented it out for DrawInfferences callback
+            # image = draw_pose6D(image, pose6D, self.cube_points3D,
+            #                     self.camera.intrinsics)
+            results['image'] = image
         results['points2D'], results['pose6D'] = points2D, pose6D
         return results
 
diff --git a/examples/pix2pose/train.py b/examples/pix2pose/train.py
index 0b57e4ff3..5f3aab1cd 100644
--- a/examples/pix2pose/train.py
+++ b/examples/pix2pose/train.py
@@ -15,6 +15,7 @@
 from paz.models.segmentation import UNET_VGG16
 from paz.optimization.callbacks import DrawInferences
 from paz.backend.camera import Camera
+from paz.backend.image import write_image
 
 from scenes import PixelMaskRenderer
 from pipelines import DomainRandomization, Pix2Pose
@@ -48,7 +49,7 @@
                     help='Loss Weight for pixels in object')
 parser.add_argument('--max_num_epochs', default=100, type=int,
                     help='Number of epochs before finishing')
-parser.add_argument('--steps_per_epoch', default=10, type=int,
+parser.add_argument('--steps_per_epoch', default=250, type=int,
                     help='Steps per epoch')
 parser.add_argument('--stop_patience', default=5, type=int,
                     help='Early stop patience')
@@ -129,6 +130,26 @@ def mean_squared_error(y_true, y_pred):
 save = ModelCheckpoint(save_filename, 'loss', verbose=1, save_best_only=True,
                        save_weights_only=True)
 images = [np.copy(renderer.render()[0]) for _ in range(args.num_test_images)]
+images = []
+
+
+image_directory = os.path.join(experiment_path, 'original_images')
+if not os.path.exists(image_directory):
+    os.makedirs(image_directory)
+
+for image_arg in range(args.num_test_images):
+    image, alpha, masks = renderer.render()
+    image = np.copy(image)  # TODO: renderer outputs unwritable numpy arrays
+    masks = np.copy(masks)  # TODO: renderer outputs unwritable numpy arrays
+    image_filename = 'image_%03d.png' % image_arg
+    masks_filename = 'masks_%03d.png' % image_arg
+    image_directory = os.path.join(experiment_path, 'original_images')
+    image_filename = os.path.join(image_directory, image_filename)
+    masks_filename = os.path.join(image_directory, masks_filename)
+    write_image(image_filename, image)
+    write_image(masks_filename, masks)
+    images.append(image)
+
 # setting drawing callback
 camera = Camera()
 camera.distortion = np.zeros((4))

From d3f2f81ac08def5befab3c004eeac5f795163367 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 9 Feb 2022 09:22:00 +0100
Subject: [PATCH 098/101] Add thickness option for drawing poses

---
 examples/pix2pose/backend.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/pix2pose/backend.py b/examples/pix2pose/backend.py
index 52b011654..98a26a963 100644
--- a/examples/pix2pose/backend.py
+++ b/examples/pix2pose/backend.py
@@ -350,7 +350,7 @@ def denormalize_points2D(points2D, height, width):
     return points2D
 
 
-def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics):
+def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics, thickness=2):
     """Draws pose6D by projecting cube3D to image space with camera intrinsics.
 
     # Arguments
@@ -369,11 +369,12 @@ def draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics):
     cube_points2D = project_to_image(
         rotation, translation, cube_points3D, camera_intrinsics)
     cube_points2D = cube_points2D.astype(np.int32)
-    image = draw_cube(image, cube_points2D)
+    image = draw_cube(image, cube_points2D, thickness=thickness)
     return image
 
 
-def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics):
+def draw_poses6D(image, poses6D, cube_points3D,
+                 camera_intrinsics, thickness=2):
     """Draws pose6D by projecting cube3D to image space with camera intrinsics.
 
     # Arguments
@@ -387,7 +388,8 @@ def draw_poses6D(image, poses6D, cube_points3D, camera_intrinsics):
         Original image array (H, W, 3) with drawn cube points for all poses6D.
     """
     for pose6D in poses6D:
-        image = draw_pose6D(image, pose6D, cube_points3D, camera_intrinsics)
+        image = draw_pose6D(image, pose6D, cube_points3D,
+                            camera_intrinsics, thickness)
     return image
 
 

From 41edbf04ac1f9f6626e6c34b2a3558b484db4940 Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 9 Feb 2022 09:22:39 +0100
Subject: [PATCH 099/101] Change resize interpolation

---
 examples/pix2pose/pipelines.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/pix2pose/pipelines.py b/examples/pix2pose/pipelines.py
index 8cfcac23b..40e9ec23c 100644
--- a/examples/pix2pose/pipelines.py
+++ b/examples/pix2pose/pipelines.py
@@ -3,6 +3,7 @@
 from paz.backend.quaternion import rotation_vector_to_quaternion
 from paz.backend.image import resize_image
 from paz import processors as pr
+import cv2
 
 from processors import (
     GetNonZeroArguments, GetNonZeroValues, ArgumentsToImagePoints2D,
@@ -83,7 +84,7 @@ def call(self, image):
         RGB_mask = self.predict_RGBMask(image)
         H, W, num_channels = image.shape
         if self.resize:
-            RGB_mask = resize_image(RGB_mask, (W, H))
+            RGB_mask = cv2.resize(RGB_mask, (W, H), cv2.INTER_CUBIC)
         points3D = self.mask_to_points3D(RGB_mask)
         points2D = self.mask_to_points2D(RGB_mask)
         points2D = normalize_points2D(points2D, H, W)
@@ -168,5 +169,6 @@ def call(self, image):
             image = self.draw_boxes2D(image, boxes2D)
             image = draw_masks(image, points, self.object_sizes)
             image = draw_poses6D(image, poses6D, self.cube_points3D,
-                                 self.estimate_pose.camera.intrinsics)
+                                 self.estimate_pose.camera.intrinsics,
+                                 thickness=2)
         return self.wrap(image, boxes2D, poses6D)

From c081d6affdcdb4d91be76ee185052140f5488a3f Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 9 Feb 2022 09:22:57 +0100
Subject: [PATCH 100/101] Revert demo for single image

---
 examples/pix2pose/demo.py | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/examples/pix2pose/demo.py b/examples/pix2pose/demo.py
index e1a9a046b..67acd545a 100644
--- a/examples/pix2pose/demo.py
+++ b/examples/pix2pose/demo.py
@@ -5,13 +5,14 @@
 from paz.backend.camera import VideoPlayer
 from paz.applications import SSD300FAT
 
-from pipelines import Pix2Pose
+from pipelines import Pix2Pose, EstimatePoseMasks
 
 
 image_shape = (128, 128, 3)
 num_classes = 3
 
 model = UNET_VGG16(num_classes, image_shape, freeze_backbone=True)
+model.load_weights('experiments/UNET-VGG16_RUN_00_08-02-2022_14-39-55/weights.hdf5')
 # model.load_weights('weights/UNET_weights_epochs-10_beta-3.hdf5')
 # model.load_weights('weights/UNET-VGG_solar_panel_canonical_13.hdf5')
 # model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5')
@@ -22,8 +23,8 @@
 # image_size = camera.read().shape[0:2]
 # camera.stop()
 
-# image = load_image('test_image2.jpg')
-image = load_image('images/lab_condition.png')
+image = load_image('images/test_image2.jpg')
+# image = load_image('images/lab_condition.png')
 image_size = image.shape[0:2]
 focal_length = image_size[1]
 image_center = (image_size[1] / 2.0, image_size[0] / 2.0)
@@ -32,18 +33,23 @@
                               [0, focal_length, image_center[1]],
                               [0, 0, 1]])
 # object_sizes = np.array([0.184, 0.187, 0.052])
-epsilon = 0.001
+# object_sizes = np.array([184, 187, 52])
+object_sizes = np.array([1840, 1870, 520])  # power drill
+epsilon = 0.015
 score_thresh = 0.50
 detect = SSD300FAT(score_thresh, draw=False)
-offsets = [0.2, 0.2]
-# estimate_keypoints = Pix2Pose(model, object_sizes, epsilon, True)
-# pipeline = EstimatePoseMasks(detect, estimate_keypoints, camera, offsets)
-
-
-object_sizes = np.array([1840, 1870, 520])  # power drill
-object_sizes = np.array([15000, 15000, 2000])  # solar panel
-object_sizes = np.array([15000, 15000, 2000])  # solar panel
-estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True)
+offsets = [0.5, 0.5]
+estimate_keypoints = Pix2Pose(model, object_sizes, camera, epsilon, draw=False)
+pipeline = EstimatePoseMasks(detect, estimate_keypoints, offsets)
+predicted_image = pipeline(image)['image']
+show_image(predicted_image)
+from paz.backend.image import write_image
+write_image('images/predicted_power_drill.png', predicted_image)
+
+# object_sizes = np.array([1840, 1870, 520])  # power drill
+# object_sizes = np.array([15000, 15000, 2000])  # solar panel
+# object_sizes = np.array([15000, 15000, 2000])  # solar panel
+# estimate_pose = Pix2Pose(model, object_sizes, camera, epsilon, draw=True)
 # image = image[768:1324, 622:784]
 # image = image[622:784, 768:1324]
 
@@ -53,10 +59,10 @@
 # show_image(estimate_pose(image_hammer)['image'])
 
 # show_image(image)
-image_clamp = image[670:1000, 1000:1400]
+# image_clamp = image[670:1000, 1000:1400]
 # image_hammer = image[460:1030, 740:1340]
-model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5')
-show_image(estimate_pose(image_clamp)['image'])
+# model.load_weights('weights/UNET-VGG_large_clamp_canonical_10.hdf5')
+# show_image(estimate_pose(image_clamp)['image'])
 
 """
 image = load_image('images/zed_left_1011.png')

From 72d14000486ce1fc929419b1555759ba9934f40c Mon Sep 17 00:00:00 2001
From: Octavio Arriaga <arriaga.camargo@gmail.com>
Date: Wed, 9 Feb 2022 09:30:57 +0100
Subject: [PATCH 101/101] Delete legacy file to keep pep8 master

---
 examples/pix2pose/legacy/legacy.py | 281 -----------------------------
 1 file changed, 281 deletions(-)
 delete mode 100644 examples/pix2pose/legacy/legacy.py

diff --git a/examples/pix2pose/legacy/legacy.py b/examples/pix2pose/legacy/legacy.py
deleted file mode 100644
index 4335d2e8d..000000000
--- a/examples/pix2pose/legacy/legacy.py
+++ /dev/null
@@ -1,281 +0,0 @@
-from tensorflow.keras.losses import Loss
-from tensorflow.keras.losses import mean_squared_error
-import tensorflow as tf
-import numpy as np
-
-
-def sample_random_rotation_matrix():
-    """Samples SO3 in rotation matrix form.
-
-    # Return
-        Array (3, 3).
-
-    # References
-        [Lost in my terminal](http://blog.lostinmyterminal.com/python/2015/05/
-            12/random-rotation-matrix.html)
-        [real-time rendering](from http://www.realtimerendering.com/resources/
-            GraphicsGems/gemsiii/rand_rotation.c)
-    """
-    theta = 2.0 * np.pi * np.random.uniform()
-    phi = 2.0 * np.pi * np.random.uniform()
-    z = 2.0 * np.random.uniform()
-    # random_vector has length sqrt(2) to eliminate 2 in the Householder matrix
-    r = np.sqrt(z)
-    random_vector = np.array(
-        [np.sin(phi) * r, np.cos(phi) * r, np.sqrt(2.0 - z)])
-    sin_theta = np.sin(theta)
-    cos_theta = np.cos(theta)
-    R = np.array([[+cos_theta, +sin_theta, 0.0],
-                  [-sin_theta, +cos_theta, 0.0],
-                  [0.0, 0.0, 1.0]])
-    random_rotation_matrix = (
-        np.outer(random_vector, random_vector) - np.eye(3)).dot(R)
-    return random_rotation_matrix
-
-
-def compute_weighted_symmetric_loss(RGBA_true, RGB_pred, rotations, beta=3.0):
-    """Computes the mininum of all rotated L1 reconstruction losses weighting
-        the positive alpha mask values in the predicted RGB image by beta.
-
-    # Arguments
-        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
-        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
-        rotations: Array (num_symmetries, 3, 3). Rotation matrices
-            that when applied lead to the same object view.
-
-    # Returns
-        Tensor [batch, H, W] with weighted reconstruction loss values.
-    """
-    RGB_true, alpha = split_alpha_mask(RGBA_true)
-    RGB_true = normalized_image_to_normalized_device_coordinates(RGB_true)
-    symmetric_losses = []
-    for rotation in rotations:
-        RGB_true_rotated = tf.einsum('ij,bklj->bkli', rotation, RGB_true)
-        RGB_true_rotated = normalized_device_coordinates_to_normalized_image(
-            RGB_true_rotated)
-        RGB_true_rotated = tf.clip_by_value(RGB_true_rotated, 0.0, 1.0)
-        RGB_true_rotated = RGB_true_rotated * alpha
-        RGBA_true_rotated = tf.concat([RGB_true_rotated, alpha], axis=3)
-        loss = compute_weighted_reconstruction_loss(
-            RGBA_true_rotated, RGB_pred, beta)
-        loss = tf.expand_dims(loss, -1)
-        symmetric_losses.append(loss)
-    symmetric_losses = tf.concat(symmetric_losses, axis=-1)
-    minimum_symmetric_loss = tf.reduce_min(symmetric_losses, axis=-1)
-    return minimum_symmetric_loss
-
-
-class WeightedSymmetricReconstruction(Loss):
-    """Computes the mininum of all rotated L1 reconstruction losses weighting
-        the positive alpha mask values in the predicted RGB image by beta.
-    """
-    def __init__(self, rotations, beta=3.0):
-        super(WeightedSymmetricReconstruction, self).__init__()
-        self.rotations = rotations
-        self.beta = beta
-
-    def call(self, RGBA_true, RGB_pred):
-        loss = compute_weighted_symmetric_loss(
-            RGBA_true, RGB_pred, self.rotations, self.beta)
-        return loss
-
-
-def compute_error_prediction_loss(RGBA_true, RGBE_pred):
-    """Computes L2 reconstruction loss of predicted error mask.
-
-    # Arguments
-        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
-        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
-
-    # Returns
-        Tensor [batch, H, W] with weighted reconstruction loss values.
-
-    """
-    RGB_pred, error_pred = split_error_mask(RGBE_pred)
-    error_true = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, 1.0)
-    # TODO check we need to set minimum to 1.0?
-    error_true = tf.minimum(error_true, 1.0)
-    error_loss = mean_squared_error(error_true, error_pred)
-    error_loss = tf.expand_dims(error_loss, axis=-1)
-    return error_loss
-
-
-class ErrorPrediction(Loss):
-    """Computes L2 reconstruction loss of predicted error mask.
-
-    # Arguments
-        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
-        RGBE_pred: Tensor [batch, H, W, 3]. Predicted RGB and error mask.
-
-    # Returns
-        Tensor [batch, H, W] with weighted reconstruction loss values.
-
-    """
-    def __init__(self):
-        super(ErrorPrediction, self).__init__()
-
-    def call(self, RGBA_true, RGBE_pred):
-        error_loss = compute_error_prediction_loss(RGBA_true, RGBE_pred)
-        return error_loss
-
-
-from paz.backend.image import draw_dot
-
-
-def draw_points2D_(image, keypoints, colors, radius=1):
-    for (u, v), (R, G, B) in zip(keypoints, colors):
-        color = (int(R), int(G), int(B))
-        draw_dot(image, (u, v), color, radius)
-    return image
-
-
-def rotate_image(image, rotation_matrix):
-    """Rotates an image with a symmetry.
-
-    # Arguments
-        image: Array (H, W, 3) with domain [0, 255].
-        rotation_matrix: Array (3, 3).
-
-    # Returns
-        Array (H, W, 3) with domain [0, 255]
-    """
-    mask_image = np.sum(image, axis=-1, keepdims=True) != 0
-    image = image_to_normalized_device_coordinates(image)
-    rotated_image = np.einsum('ij,klj->kli', rotation_matrix, image)
-    rotated_image = normalized_device_coordinates_to_image(rotated_image)
-    rotated_image = np.clip(rotated_image, a_min=0.0, a_max=255.0)
-    rotated_image = rotated_image * mask_image
-    return rotated_image
-
-
-class EstimatePoseMasks(Processor):
-    def __init__(self, detect, estimate_pose, offsets, draw=True,
-                 valid_class_names=['035_power_drill']):
-        """Pose estimation pipeline using keypoints.
-        """
-        super(EstimatePoseMasks, self).__init__()
-        self.detect = detect
-        self.estimate_pose = estimate_pose
-        self.postprocess_boxes = SequentialProcessor(
-            [pr.UnpackDictionary(['boxes2D']),
-             pr.FilterClassBoxes2D(valid_class_names),
-             pr.SquareBoxes2D(),
-             pr.OffsetBoxes2D(offsets)])
-        self.clip = pr.ClipBoxes2D()
-        self.crop = pr.CropBoxes2D()
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
-        self.unwrap = UnwrapDictionary(['pose6D', 'points2D', 'points3D'])
-        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.object_sizes = self.estimate_pose.object_sizes
-        self.cube_points3D = build_cube_points3D(*self.object_sizes)
-        self.draw = draw
-
-    def call(self, image):
-        boxes2D = self.postprocess_boxes(self.detect(image))
-        boxes2D = self.clip(image, boxes2D)
-        cropped_images = self.crop(image, boxes2D)
-        poses6D, points = [], []
-        for crop, box2D in zip(cropped_images, boxes2D):
-            results = self.estimate_pose(crop, box2D)
-            pose6D, points2D, points3D = self.unwrap(results)
-            poses6D.append(pose6D), points.append([points2D, points3D])
-        if self.draw:
-            image = self.draw_boxes2D(image, boxes2D)
-            image = draw_masks(image, points, self.object_sizes)
-            image = draw_poses6D(image, poses6D, self.cube_points3D,
-                                 self.estimate_pose.camera.intrinsics)
-        return self.wrap(image, boxes2D, poses6D)
-
-
-class MultiPix2Pose(Processor):
-    def __init__(self, detect, segment, camera, name_to_weights, name_to_sizes,
-                 valid_class_names, offsets=[0.2, 0.2], epsilon=0.15, draw=True):
-        self.detect = detect
-        self.name_to_weights = name_to_weights
-        self.name_to_sizes = name_to_sizes
-        self.valid_class_names = valid_class_names
-        self.pix2points = Pix2Points(segment, np.zeros((3)), epsilon)
-        self.predict_pose = SolveChangingObjectPnP(camera.intrinsics)
-        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
-        self.camera = camera
-        self.postprocess_boxes = SequentialProcessor(
-            [pr.UnpackDictionary(['boxes2D']),
-             pr.FilterClassBoxes2D(valid_class_names),
-             pr.SquareBoxes2D(),
-             pr.OffsetBoxes2D(offsets)])
-        self.clip = pr.ClipBoxes2D()
-        self.crop = pr.CropBoxes2D()
-        self.draw_boxes2D = pr.DrawBoxes2D(detect.class_names)
-        self.draw = draw
-        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])
-        self.name_to_cube_points3D = {}
-        self.mask_to_points2D = RGBMaskToImagePoints2D(
-            segment.output_shape[1:3])
-        for name in self.name_to_sizes:
-            W, H, D = self.name_to_sizes[name]
-            cube_points3D = build_cube_points3D(W, H, D)
-            self.name_to_cube_points3D[name] = cube_points3D
-
-        self.predict_RGBMask = PredictRGBMask(segment, epsilon)
-
-    def call(self, image):
-        boxes2D = self.postprocess_boxes(self.detect(image))
-        boxes2D = self.clip(image, boxes2D)
-        cropped_images = self.crop(image, boxes2D)
-        poses6D, points2D, points3D = [], [], []
-        for crop, box2D in zip(cropped_images, boxes2D):
-            class_name = box2D.class_name
-            name_to_weights = self.name_to_weights[class_name]
-            self.pix2points.model.load_weights(name_to_weights)
-            object_sizes = self.name_to_sizes[class_name]
-            # self.pix2points.object_sizes = object_sizes
-            # points = self.pix2points(crop)
-
-            RGB_mask = self.predict_RGBMask(crop)
-            H, W, num_channels = crop.shape
-            RGB_mask = resize_image(RGB_mask, (W, H))
-
-            self.mask_to_points3D = RGBMaskToObjectPoints3D(object_sizes)
-            class_points3D = self.mask_to_points3D(RGB_mask)
-            class_points2D = self.mask_to_points2D(RGB_mask)
-            class_points2D = normalize_points2D(class_points2D, H, W)
-
-            # from paz.backend.image import show_image
-            # show_image((points['RGB_mask'] * 255).astype('uint8'))
-            # class_points2D = points['points2D']
-            # class_points3D = points['points3D']
-            H, W, num_channels = crop.shape
-            class_points2D = denormalize_points2D(class_points2D, H, W)
-            class_points2D = self.change_coordinates(class_points2D, box2D)
-            print(len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS)
-            print(len(class_points3D), len(class_points2D))
-            if len(class_points3D) > self.predict_pose.MIN_REQUIRED_POINTS:
-                pose_results = self.predict_pose(class_points3D, class_points2D)
-                success, rotation, translation = pose_results
-                print('solver success', success)
-                # success = True
-            else:
-                success = False
-            if success:
-                quaternion = rotation_vector_to_quaternion(rotation)
-                pose6D = Pose6D(quaternion, translation, class_name)
-            else:
-                pose6D = None
-            print(success)
-            points2D.append(class_points2D)
-            points3D.append(class_points3D)
-            poses6D.append(pose6D)
-        if self.draw:
-            image = self.draw_boxes2D(image, boxes2D)
-            for class_points2D, class_points3D, pose6D in zip(points2D, points3D, poses6D):
-                class_name = pose6D.class_name
-                object_sizes = self.name_to_sizes[class_name]
-                colors = points3D_to_RGB(class_points3D, object_sizes)
-                image = draw_points2D(image, class_points2D, colors)
-
-            for pose6D in poses6D:
-                class_name = pose6D.class_name
-                cube_points3D = self.name_to_cube_points3D[class_name]
-                image = draw_pose6D(image, pose6D, cube_points3D,
-                                    self.camera.intrinsics)
-        return {'image': image, 'boxes2D': boxes2D, 'poses6D': poses6D}