diff --git a/README.md b/README.md
index bd4e4dcc6..5d4a5fff4 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,11 @@ PAZ is used in the following examples (links to **real-time demos** and training
|---------------------------|--------------------------| -----------------------|
|| | |
-|[Semantic segmentation](https://github.com/oarriaga/paz/tree/master/examples/semantic_segmentation) | [Hand pose estimation](https://github.com/oarriaga/paz/tree/master/examples/minimal_hand) | [Human pose estimation](https://github.com/oarriaga/paz/tree/master/examples/human_pose_estimation_2D) |
+|[Semantic segmentation](https://github.com/oarriaga/paz/tree/master/examples/semantic_segmentation) | [Hand pose estimation](https://github.com/oarriaga/paz/tree/master/examples/hand_pose_estimation) | [Human pose estimation](https://github.com/oarriaga/paz/tree/master/examples/human_pose_estimation_2D) |
|---------------------------|-----------------------|-----------------|
| | | |
-| [3D keypoint discovery](https://github.com/oarriaga/paz/tree/master/examples/discovery_of_latent_keypoints) | [Hand closure detection](https://github.com/oarriaga/paz/tree/master/examples/minimal_hand) | [6D pose estimation](https://github.com/oarriaga/paz/tree/master/examples/pix2pose) |
+| [3D keypoint discovery](https://github.com/oarriaga/paz/tree/master/examples/discovery_of_latent_keypoints) | [Hand closure detection](https://github.com/oarriaga/paz/tree/master/examples/hand_pose_estimation) | [6D pose estimation](https://github.com/oarriaga/paz/tree/master/examples/pix2pose) |
|---------------------------|-----------------------| --------------------------|
| | | |
@@ -202,7 +202,9 @@ The following models are implemented in PAZ and they can be trained with your ow
|[Attention](https://github.com/oarriaga/paz/blob/master/examples/spatial_transfomer_networks/STN.py) |[Spatial Transformers](https://arxiv.org/abs/1506.02025) |
|[Object detection](https://github.com/oarriaga/paz/blob/master/paz/models/detection/haar_cascade.py) |[HaarCascades](https://link.springer.com/article/10.1023/B:VISI.0000013087.49260.fb) |
|[Human pose estimation](https://github.com/oarriaga/paz/blob/master/paz/models/pose_estimation/higher_hrnet.py) |[HigherHRNet](https://arxiv.org/abs/1908.10357) |
-|[Hand pose estimation](https://github.com/oarriaga/paz/blob/refactor_readme/paz/models/keypoint/detnet.py) |[DetNet](https://vcai.mpi-inf.mpg.de/projects/2020-cvpr-hands/) |
+|[Hand pose estimation](https://github.com/oarriaga/paz/blob/master/paz/models/keypoint/detnet.py) |[DetNet](https://vcai.mpi-inf.mpg.de/projects/2020-cvpr-hands/) |
+|[Hand closure classification](https://github.com/oarriaga/paz/blob/master/paz/models/keypoint/iknet.py) |[IKNet](https://vcai.mpi-inf.mpg.de/projects/2020-cvpr-hands/) |
+|[Hand detection](https://github.com/oarriaga/paz/blob/master/paz/models/detection/ssd512.py) |[SSD512](https://arxiv.org/abs/1512.02325)|
## Motivation
diff --git a/docs/structure.py b/docs/structure.py
index 91caa8b96..30b3a2957 100644
--- a/docs/structure.py
+++ b/docs/structure.py
@@ -115,7 +115,9 @@
'classes': [
(camera.Camera, [camera.Camera.is_open,
camera.Camera.start,
- camera.Camera.stop]),
+ camera.Camera.stop,
+ camera.Camera.intrinsics_from_HFOV,
+ camera.Camera.take_photo]),
(camera.VideoPlayer, [camera.VideoPlayer.step,
camera.VideoPlayer.run,
camera.VideoPlayer.record,
@@ -237,7 +239,8 @@
standard.calculate_norm,
standard.tensor_to_numpy,
standard.pad_matrix,
- standard.max_pooling_2d
+ standard.max_pooling_2d,
+ standard.predict
],
},
@@ -340,9 +343,9 @@
losses.WeightedReconstructionWithError
],
},
-
-
- {
+
+
+ {
'page': 'processors/angles.md',
'classes': [
processors.ChangeLinkOrder,
@@ -386,7 +389,8 @@
processors.NormalizedDeviceCoordinatesToImage,
processors.ReplaceLowerThanThreshold,
processors.GetNonZeroValues,
- processors.GetNonZeroArguments
+ processors.GetNonZeroArguments,
+ processors.FlipLeftRightImage
]
},
@@ -500,7 +504,8 @@
'page': 'processors/pose.md',
'classes': [
processors.SolvePNP,
- processors.SolveChangingObjectPnPRANSAC
+ processors.SolveChangingObjectPnPRANSAC,
+ processors.Translation3DFromBoxWidth
]
},
@@ -547,7 +552,8 @@
processors.UnwrapDictionary,
processors.Scale,
processors.AppendValues,
- processors.BooleanToTextMessage
+ processors.BooleanToTextMessage,
+ processors.PrintTopics
]
},
@@ -561,13 +567,10 @@
{
- 'page': 'pipelines/image.md',
+ 'page': 'pipelines/classification.md',
'classes': [
- pipelines.AugmentImage,
- pipelines.PreprocessImage,
- pipelines.DecoderPredictor,
- pipelines.EncoderPredictor,
- pipelines.PreprocessImageHigherHRNet
+ pipelines.MiniXceptionFER,
+ pipelines.ClassifyHandClosure
]
},
@@ -581,7 +584,8 @@
pipelines.PostprocessBoxes2D,
pipelines.DetectSingleShot,
pipelines.DetectHaarCascade,
- pipelines.SSD512HandDetection
+ pipelines.SSD512HandDetection,
+ pipelines.SSD512MinimalHandPose
]
},
@@ -594,6 +598,18 @@
},
+ {
+ 'page': 'pipelines/image.md',
+ 'classes': [
+ pipelines.AugmentImage,
+ pipelines.PreprocessImage,
+ pipelines.DecoderPredictor,
+ pipelines.EncoderPredictor,
+ pipelines.PreprocessImageHigherHRNet
+ ]
+ },
+
+
{
'page': 'pipelines/keypoints.md',
'classes': [
@@ -662,7 +678,9 @@
pipelines.PIX2YCBTools6D,
pipelines.DetNetHandKeypoints,
pipelines.MinimalHandPoseEstimation,
- pipelines.DetectMinimalHand
+ pipelines.DetectMinimalHand,
+ pipelines.ClassifyHandClosure,
+ pipelines.SSD512MinimalHandPose
]
},
diff --git a/examples/hand_detection/pose_demo.py b/examples/hand_detection/pose_demo.py
index 1fc725627..30dea0631 100644
--- a/examples/hand_detection/pose_demo.py
+++ b/examples/hand_detection/pose_demo.py
@@ -1,17 +1,15 @@
import argparse
-from paz.applications import DetectMinimalHand
-from paz.applications import MinimalHandPoseEstimation
-from paz.pipelines.detection import SSD512HandDetection
+from paz.applications import SSD512MinimalHandPose
from paz.backend.camera import VideoPlayer, Camera
-parser = argparse.ArgumentParser(description='Minimal hand detection')
+parser = argparse.ArgumentParser(description='''Minimal hand detection and
+ keypoints estimation''')
parser.add_argument('-c', '--camera_id', type=int, default=0,
help='Camera device ID')
args = parser.parse_args()
-pipeline = DetectMinimalHand(
- SSD512HandDetection(), MinimalHandPoseEstimation(right_hand=False))
+pipeline = SSD512MinimalHandPose(right_hand=False, offsets=[0.5, 0.5])
camera = Camera(args.camera_id)
player = VideoPlayer((640, 480), pipeline, camera)
player.run()
diff --git a/examples/hand_pose_estimation/HandPoseEstimation.py b/examples/hand_pose_estimation/HandPoseEstimation.py
deleted file mode 100755
index ed7a74552..000000000
--- a/examples/hand_pose_estimation/HandPoseEstimation.py
+++ /dev/null
@@ -1,312 +0,0 @@
-from tensorflow.keras.layers import Concatenate, Dense, Dropout, Reshape, Input
-from tensorflow.keras.layers import Conv2D, MaxPooling2D, LeakyReLU
-from tensorflow.keras import Model
-from tensorflow.keras.utils import get_file
-
-BASE_WEIGHT_PATH = (
- 'https://github.com/oarriaga/altamira-data/releases/download/v0.11/')
-
-
-def HandSegmentationNet(input_shape=(320, 320, 3), weights='RHDv2'):
- image = Input(shape=input_shape, name='image')
-
- X = Conv2D(64, kernel_size=3, padding='same', name='conv1_1')(image)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(64, 3, padding='same', name='conv1_2')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
-
- X = Conv2D(128, 3, padding='same', name='conv1_3')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, 3, padding='same', name='conv1_4')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
-
- X = Conv2D(256, 3, padding='same', name='conv1_5')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, 3, padding='same', name='conv1_6')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, 3, padding='same', name='conv1_7')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, 3, padding='same', name='conv1_8')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
-
- X = Conv2D(512, 3, padding='same', name='conv1_9')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(512, 3, padding='same', name='conv1_10')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(512, 3, padding='same', name='conv1_11')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(512, 3, padding='same', name='conv1_12')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(512, 3, padding='same', name='conv1_13')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, 3, padding='same', name='conv1_14')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(512, 1, padding='same', name='conv1_15')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- raw_segmented_image = Conv2D(2, 1, padding='same', activation=None,
- name='conv1_16')(X)
-
- segmentation_net = Model(inputs={'image': image},
- outputs={'image': image,
- 'raw_segmentation_map':
- raw_segmented_image},
- name='HandSegNet')
-
- if weights is not None:
- model_filename = [segmentation_net.name, str(weights)]
- model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5'])
- weights_path = get_file(model_filename,
- BASE_WEIGHT_PATH + model_filename,
- cache_subdir='paz/models')
- print('Loading %s model weights' % weights_path)
- segmentation_net.load_weights(weights_path)
-
- return segmentation_net
-
-
-def PoseNet(input_shape=(256, 256, 3), weights='RHDv2'):
- cropped_image = Input(shape=input_shape, name='cropped_image')
-
- X = Conv2D(64, kernel_size=3, padding='same', name='conv2_1')(
- cropped_image)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(64, kernel_size=3, padding='same', name='conv2_2')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
-
- X = Conv2D(128, kernel_size=3, padding='same', name='conv2_3')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=3, padding='same', name='conv2_4')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_5')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_6')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_7')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_8')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
-
- X = Conv2D(512, kernel_size=3, padding='same', name='conv2_9')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(512, kernel_size=3, padding='same', name='conv2_10')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_11')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_12')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_13')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, kernel_size=3, padding='same', name='conv2_14')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=3, padding='same', name='conv2_15')(X)
- X = LeakyReLU(alpha=0.01)(X)
- skip_connection = X
-
- X = Conv2D(512, kernel_size=1, name='conv2_16')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(21, kernel_size=1, name='conv2_17')(X)
-
- X = Concatenate(axis=3)([X, skip_connection])
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_18')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_19')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_20')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_21')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_22')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=1, name='conv2_23')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(21, kernel_size=1, padding='same', name='conv2_24')(X)
-
- X = Concatenate(axis=3)([X, skip_connection])
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_25')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_26')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_27')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_28')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=7, padding='same', name='conv2_29')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, kernel_size=1, name='conv2_30')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- score_maps = Conv2D(21, kernel_size=1, name='conv2_31')(X)
-
- PoseNet = Model(inputs={'cropped_image': cropped_image},
- outputs={'score_maps': score_maps}, name='PoseNet')
-
- if weights is not None:
- model_filename = [PoseNet.name, str(weights)]
- model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5'])
- weights_path = get_file(model_filename,
- BASE_WEIGHT_PATH + model_filename,
- cache_subdir='paz/models')
- print('Loading %s model weights' % weights_path)
- PoseNet.load_weights(weights_path)
-
- return PoseNet
-
-
-def PosePriorNet(keypoint_heatmaps_shape=(32, 32, 21), hand_side_shape=(2,),
- num_keypoints=21, weights='RHDv2'):
- score_maps = Input(shape=keypoint_heatmaps_shape)
- hand_side = Input(shape=hand_side_shape)
-
- X = Conv2D(32, 3, padding='same', name='conv3_1')(score_maps)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(32, 3, padding='same', strides=2, name='conv3_2')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(64, 3, padding='same', name='conv3_3')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(64, 3, padding='same', strides=2, name='conv3_4')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, 3, padding='same', name='conv3_5')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, 3, padding='same', strides=2, name='conv3_6')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Reshape([-1])(X)
- X = Concatenate(axis=1)([X, hand_side])
-
- X = Dense(512, name='dense3_1')(X)
- X = LeakyReLU(alpha=0.01)(X)
- X = Dropout(rate=0.2)(X)
-
- X = Dense(512, name='dense3_2')(X)
- X = LeakyReLU(alpha=0.01)(X)
- X = Dropout(rate=0.2)(X)
-
- X = Dense(num_keypoints * 3, name='dense3_3')(X)
-
- hand_keypoints = Reshape((21, 3), name='reshape3_1')(X)
- PosePriorNet = Model(inputs={'score_maps': score_maps,
- 'hand_side': hand_side},
- outputs={'canonical_coordinates': hand_keypoints},
- name='PosePriorNet')
-
- if weights is not None:
- model_filename = [PosePriorNet.name, str(weights)]
- model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5'])
- weights_path = get_file(model_filename,
- BASE_WEIGHT_PATH + model_filename,
- cache_subdir='paz/models')
- print('Loading %s model weights' % weights_path)
- PosePriorNet.load_weights(weights_path)
-
- return PosePriorNet
-
-
-def ViewPointNet(keypoint_heat_maps_shape=(32, 32, 21), hand_side_shape=(2,),
- weights='RHDv2'):
- score_maps = Input(shape=keypoint_heat_maps_shape,
- name='score_maps')
- hand_side = Input(shape=hand_side_shape, name='hand_side')
-
- X = Conv2D(64, 3, padding='same')(score_maps)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(64, 3, strides=2, padding='same')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, 3, padding='same')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(128, 3, strides=2, padding='same')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, 3, padding='same')(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Conv2D(256, 3, strides=2, padding='same')(X)
- X = LeakyReLU(alpha=0.01)(X)
- X = Reshape([-1])(X)
- X = Concatenate(axis=1)([X, hand_side])
-
- X = Dense(256)(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- X = Dense(128)(X)
- X = LeakyReLU(alpha=0.01)(X)
-
- ux = Dense(1)(X)
- uy = Dense(1)(X)
- uz = Dense(1)(X)
-
- axis_angles = Concatenate(axis=1)([ux, uy, uz])
-
- ViewPointNet = Model(inputs={'score_maps': score_maps,
- 'hand_side': hand_side},
- outputs={'rotation_parameters': axis_angles[0],
- 'hand_side': hand_side},
- name='ViewPointNet')
-
- if weights is not None:
- model_filename = [ViewPointNet.name, str(weights)]
- model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5'])
- weights_path = get_file(model_filename,
- BASE_WEIGHT_PATH + model_filename,
- cache_subdir='paz/models')
- print('Loading %s model weights' % weights_path)
- ViewPointNet.load_weights(weights_path)
-
- return ViewPointNet
diff --git a/examples/hand_pose_estimation/README.md b/examples/hand_pose_estimation/README.md
new file mode 100644
index 000000000..452c895df
--- /dev/null
+++ b/examples/hand_pose_estimation/README.md
@@ -0,0 +1,25 @@
+### This example detects hand pose from an image.
+
+To test the live hand pose estimation from camera, run:
+```py
+python demo.py
+```
+
+To test the hand pose estimation on image, run:
+```py
+python demo_image.py
+```
+
+To test the live hand closure status with the pose estimation from camera, run:
+```py
+python is_open_demo.py
+```
+
+To test the live hand pose estimation from camera and visualize keypoints in 3D, run (This module has an extra dependency of matplotlib):
+```py
+python demo3D.py
+```
+
+### Additional notes
+To test a more robust hand pose estimation and open / close classification try out the "paz/examples/hand_detection/pose_demo.py"
+
diff --git a/examples/hand_pose_estimation/RHDv2.py b/examples/hand_pose_estimation/RHDv2.py
deleted file mode 100644
index 3e5bcc3b2..000000000
--- a/examples/hand_pose_estimation/RHDv2.py
+++ /dev/null
@@ -1,18 +0,0 @@
-KINEMATIC_CHAIN_DICT = {0: 'root',
- 4: 'root', 3: 4, 2: 3, 1: 2,
- 8: 'root', 7: 8, 6: 7, 5: 6,
- 12: 'root', 11: 12, 10: 11, 9: 10,
- 16: 'root', 15: 16, 14: 15, 13: 14,
- 20: 'root', 19: 20, 18: 19, 17: 18}
-KINEMATIC_CHAIN_LIST = list(KINEMATIC_CHAIN_DICT.keys())
-
-LEFT_WRIST = 0
-LEFT_MIDDLE_METACARPAL = 12
-LEFT_PINKY_TIP = 20
-
-RIGHT_WRIST = 21
-RIGHT_MIDDLE_METACARPAL = 33
-RIGHT_PINKY_TIP = 41
-
-LEFT_HAND = 0
-RIGHT_HAND = 1
diff --git a/examples/hand_pose_estimation/backend_SE3.py b/examples/hand_pose_estimation/backend_SE3.py
deleted file mode 100644
index 42e60ea56..000000000
--- a/examples/hand_pose_estimation/backend_SE3.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import numpy as np
-
-
-def to_homogeneous_coordinates(vector):
- """ Homogenize the vector : Appending 1 to the vector.
-
- # Arguments
- keypoints: Numpy array with any shape.
-
- # Returns
- vector: Numpy array.
- """
- vector = np.append(vector, 1)
- return vector
-
-
-def build_translation_matrix_SE3(translation_vector):
- """ Build a translation matrix from translation vector.
-
- # Arguments
- translation_vector: list of length 1 or 3.
-
- # Returns
- transformation_matrix: Numpy array of size (1, 4, 4).
- """
- if len(translation_vector) == 1:
- translation_vector = [0, 0, translation_vector]
- transformation_matrix = np.array([[1, 0, 0, translation_vector[0]],
- [0, 1, 0, translation_vector[1]],
- [0, 0, 1, translation_vector[2]],
- [0, 0, 0, 1]])
- return transformation_matrix
-
-
-def build_affine_matrix(matrix, translation_vector=None):
- """ Build a (4, 4) affine matrix provided a matrix of size (3, 3).
-
- # Arguments
- matrix: numpy array of shape (3, 3).
-
- # Returns
- affine_matrix: Numpy array of size (4, 4).
- """
- if translation_vector is None:
- translation_vector = np.array([[0], [0], [0]])
-
- if len(translation_vector) == 1:
- translation_vector = [0, 0, translation_vector]
-
- affine_matrix = np.hstack([matrix, translation_vector])
- affine_matrix = np.vstack((affine_matrix, [0, 0, 0, 1]))
- return affine_matrix
-
-
-def build_rotation_matrix_x(angle):
- """Build a (3, 3) rotation matrix along x-axis.
-
- # Arguments
- angle: float value of range [0, 360].
-
- # Returns
- rotation_matrix_x: Numpy array of size (3, 3).
- """
- cosine_value = np.cos(angle)
- sine_value = np.sin(angle)
- rotation_matrix_x = np.array([[1.0, 0.0, 0.0],
- [0.0, cosine_value, sine_value],
- [0.0, -sine_value, cosine_value]])
- return rotation_matrix_x
-
-
-def build_rotation_matrix_y(angle):
- """Build a (3, 3) rotation matrix along y-axis.
-
- # Arguments
- angle: float value of range [0, 360].
-
- # Returns
- rotation_matrix_y: Numpy array of size (3, 3).
- """
- cosine_value = np.cos(angle)
- sine_value = np.sin(angle)
- rotation_matrix_y = np.array([[cosine_value, 0.0, -sine_value],
- [0.0, 1.0, 0.0],
- [sine_value, 0.0, cosine_value]])
- return rotation_matrix_y
-
-
-def build_rotation_matrix_z(angle):
- """ Build a (3, 3) rotation matrix along z-axis.
-
- # Arguments
- angle: float value of range [0, 360].
-
- # Returns
- rotation_matrix_z: Numpy array of size (3, 3).
- """
- cosine_value = np.cos(angle)
- sine_value = np.sin(angle)
- rotation_matrix_z = np.array([[cosine_value, sine_value, 0.0],
- [-sine_value, cosine_value, 0.0],
- [0.0, 0.0, 1.0]])
- return rotation_matrix_z
-
-
-def normalize_axis_coordinates(axis_angles, theta):
- normalization_factor = 1.0 / theta
- axis_coordinates_x = axis_angles[0] * normalization_factor
- axis_coordinates_y = axis_angles[1] * normalization_factor
- axis_coordinates_z = axis_angles[2] * normalization_factor
- axis_angles = (axis_coordinates_x, axis_coordinates_y, axis_coordinates_z)
- return axis_angles
-
-
-def get_rotation_matrix(axis_coordinates, theta):
- """ Calculate Rotation matrix.
-
- # Arguments
- axis_coordinates: List of length (3).
- theta: Float value.
-
- # Returns:
- matrix: Numpy array of size (3, 3).
- """
- x = axis_coordinates[0]
- y = axis_coordinates[1]
- z = axis_coordinates[2]
-
- sine_theta = np.sin(theta)
- cosine_theta = np.cos(theta)
-
- r11 = cosine_theta + ((x ** 2) * (1.0 - cosine_theta))
- r22 = cosine_theta + ((y ** 2) * (1.0 - cosine_theta))
- r33 = cosine_theta + ((z ** 2) * (1.0 - cosine_theta))
-
- r12 = (x * y * (1.0 - cosine_theta)) - (z * sine_theta)
- r13 = (x * z * (1.0 - cosine_theta)) + (y * sine_theta)
- r21 = (y * x * (1.0 - cosine_theta)) + (z * sine_theta)
- r23 = (y * z * (1.0 - cosine_theta)) - (x * sine_theta)
- r31 = (z * x * (1.0 - cosine_theta)) - (y * sine_theta)
- r32 = (z * y * (1.0 - cosine_theta)) + (x * sine_theta)
-
- rotation_matrix = np.array([[r11, r12, r13],
- [r21, r22, r23],
- [r31, r32, r33]])
-
- return rotation_matrix
-
-
-def rotation_from_axis_angles(axis_angles, is_normalized=False):
- """ Get Rotation matrix from axis angles.
-
- # Arguments
- axis_angles: list of length (3).
- is_normalized: boolean value.
-
- # Returns
- rotation-matrix: numpy array of size (3, 3).
- """
- theta = np.linalg.norm(axis_angles)
- if not is_normalized:
- axis_angles = normalize_axis_coordinates(axis_angles, theta)
- rotation_matrix = get_rotation_matrix(axis_angles, theta)
- return rotation_matrix
diff --git a/examples/hand_pose_estimation/backend_keypoints.py b/examples/hand_pose_estimation/backend_keypoints.py
deleted file mode 100644
index 2c755a124..000000000
--- a/examples/hand_pose_estimation/backend_keypoints.py
+++ /dev/null
@@ -1,1123 +0,0 @@
-import numpy as np
-
-from backend_SE3 import to_homogeneous_coordinates
-from backend_SE3 import build_translation_matrix_SE3
-from backend_SE3 import build_rotation_matrix_x, build_rotation_matrix_y
-from backend_SE3 import build_rotation_matrix_z, build_affine_matrix
-
-from RHDv2 import LEFT_MIDDLE_METACARPAL, LEFT_WRIST
-from RHDv2 import LEFT_PINKY_TIP, LEFT_HAND
-from RHDv2 import RIGHT_MIDDLE_METACARPAL, RIGHT_WRIST
-from RHDv2 import RIGHT_PINKY_TIP, RIGHT_HAND
-from RHDv2 import KINEMATIC_CHAIN_DICT, KINEMATIC_CHAIN_LIST
-
-from paz.backend.image.opencv_image import resize_image, show_image
-
-
-def extract_hand_segment(segmentation_label, hand_arg=1):
- """ Data Pre-processing step: Extract only hand mask from the
- segmentation map provided in RHD dataset.
-
- # Arguments
- segmentation_label: Numpy array.
-
- # Returns
- Numpy array.
- """
- hand_mask = np.greater(segmentation_label, hand_arg)
- background_mask = np.logical_not(hand_mask)
- return np.stack([background_mask, hand_mask], axis=2)
-
-
-def normalize_keypoints(keypoints3D):
- """ Normalize 3D-keypoints.
-
- # Arguments
- keypoints: Numpy array with shape `(num_keypoints, 3)`
-
- # Returns
- keypoint_scale: Numpy array with shape `(1, )`.
- keypoint_normalized: Numpy array with shape `(num_keypoints, 3)`.
- """
- keypoint3D_root = keypoints3D[0, :]
- relative_keypoint3D = keypoints3D - keypoint3D_root
- metacarpal_bone_length = np.linalg.norm(
- relative_keypoint3D[LEFT_MIDDLE_METACARPAL, :] -
- relative_keypoint3D[(LEFT_MIDDLE_METACARPAL - 1), :])
- keypoint_normalized = relative_keypoint3D / metacarpal_bone_length
- return metacarpal_bone_length, keypoint_normalized
-
-
-def extract_hand_mask(segmenation_mask, hand_arg=1):
- """ Normalize 3D-keypoints.
-
- # Arguments
- segmenation_mask: Numpy array
- hand_arg: Int value.
-
- # Returns
- hand_mask: Numpy array.
- """
- hand_mask = np.greater(segmenation_mask, hand_arg)
- return hand_mask
-
-
-def extract_hand_masks(segmentation_mask, right_hand_mask_limit=18):
- """ Extract Hand masks of left and right hand.
- ones_mask * right_hand_mask_limit convert to a variable
-
- # Arguments
- segmentation_mask: Numpy array.
- right_hand_mask_limit: Int value.
-
- # Returns
- mask_left: Numpy array.
- mask_right: Numpy array.
- """
- ones_mask = np.ones_like(segmentation_mask)
- hand_mask = extract_hand_mask(segmentation_mask, hand_arg=1)
- right_hand_mask = ones_mask * right_hand_mask_limit
- right_hand_map = np.less(segmentation_mask, right_hand_mask)
- mask_left = np.logical_and(hand_mask, right_hand_map)
- mask_right = np.greater(segmentation_mask, right_hand_mask)
- return mask_left, mask_right
-
-
-def extract_hand_side_keypoints(keypoints3D, dominant_hand):
- """ Extract keypoints related to Left or Right hand.
-
- # Arguments
- keypoints3D: numpy array of shape (num_keypoints, 3)
- Is_Left: numpy array of shape (1).
-
- # Returns
- keypoints3D: Numpy array of size (num_keypoints, 3).
- """
- if dominant_hand == LEFT_HAND:
- keypoints3D = keypoints3D[LEFT_WRIST:LEFT_PINKY_TIP, :]
- else:
- keypoints3D = keypoints3D[RIGHT_WRIST:RIGHT_PINKY_TIP, :]
- return keypoints3D
-
-
-def get_hand_side_and_keypooints(hand_parts_mask, keypoints3D):
- """Extract hand masks, hand side and keypoints of dominant hand.
-
- # Arguments
- keypoints3D: numpy array of shape (num_keypoints, 3).
- hand_parts_mask: numpy array of shape (image_size, image_size).
-
- # Returns
- hand_side: Numpy array of size (2).
- hand_side_keypoints3D: Numpy array of size (num_keypoints, 3).
- dominant_hand: numpy array of shape (1).
- """
- hand_map_left, hand_map_right = extract_hand_masks(hand_parts_mask)
- num_pixels_hand_left = np.sum(hand_map_left)
- num_pixels_hand_right = np.sum(hand_map_right)
- is_left_dominant = num_pixels_hand_left > num_pixels_hand_right
- dominant_hand = LEFT_HAND if is_left_dominant else RIGHT_HAND
- keypoints3D = extract_hand_side_keypoints(keypoints3D, dominant_hand)
- hand_side = np.where(is_left_dominant, 0, 1)
- return hand_side, keypoints3D, dominant_hand
-
-
-def extract_coordinate_limits(keypoints2D, keypoints2D_visibility,
- image_size):
- """ Extract minimum and maximum coordinates.
- # Try to convert to a function , check numpy.permute , rollaxis, flip
- # Arguments
- keypoints2D: Numpy array of shape (num_keypoints, 2).
- keypoints2D_visibility: Numpy array of shape (num_keypoints, 2).
- image_size: List of shape (3).
-
- # Returns
- min_coordinates: Tuple of size (2).
- max_coordinates: Tuple of size (2).
- """
- visible_keypoints = keypoints2D[keypoints2D_visibility]
- keypoint_u = visible_keypoints[:, 1]
- keypoint_v = visible_keypoints[:, 0]
- keypoints2D_coordinates = np.stack([keypoint_u, keypoint_v], 1)
- max_keypoint2D = np.maximum(keypoints2D_coordinates, 0)
- min_keypoint2D = np.minimum(keypoints2D_coordinates, 0)
- min_coordinates = np.maximum(min_keypoint2D, 0.0)
- max_coordinates = np.minimum(max_keypoint2D, image_size[0:2])
- return min_coordinates, max_coordinates
-
-
-def tranform_keypoints_to_camera_coordinates(keypoints2D, crop_center, scale,
- crop_size):
- """ Extract keypoints in cropped image frame.
-
- # Arguments
- keypoints2D: Numpy array of shape (num_keypoints, 1).
- crop_center: Typle of size (2).
- Scale: Integer.
- image_size: List of size (3).
-
- # Returns
- keypoint_uv21: Numpy array of shape (num_keypoints, 1).
- """
- crop_size_halved = crop_size // 2
- u_residual = keypoints2D[:, 0] - crop_center[1]
- v_residual = keypoints2D[:, 1] - crop_center[0]
- keypoint_u = (u_residual * scale) + crop_size_halved
- keypoint_v = (v_residual * scale) + crop_size_halved
- keypoint_uv = np.stack([keypoint_u, keypoint_v], 1)
- return keypoint_uv
-
-
-def get_best_crop_size(max_coordinates, min_coordinates, crop_center,
- min_crop_size=50.0, max_crop_size=500.0):
- """ calculate crop size.
- # Arguments
- max_coordinates: (x_max, y_max) Numpy array of shape (1,2).
- min_coordinates: (x_min, y_min) Numpy array of shape (1,2).
- crop_center: (x_center, y_center) Numpy array of shape (1,2).
-
- # Returns
- crop_size_best: Int value.
- """
- crop_size_best = 2 * np.maximum(max_coordinates - crop_center,
- crop_center - min_coordinates)
- crop_size_best = np.maximum(crop_size_best)
- crop_size_best = np.minimum(np.maximum(crop_size_best, min_crop_size),
- max_crop_size)
- return crop_size_best
-
-
-def get_crop_scale_and_center(keypoints2D, keypoints2D_visibility, image_size,
- crop_size):
- """ Extract scale to which image should be cropped.
-
- # Arguments
- keypoints2D: Numpy array of shape (num_keypoints, 1).
- keypoints2D_visibility: Numpy array of shape (num_keypoints, 1).
- image_size: List of size (3).
- crop_size: List of size (2).
-
- # Returns
- scale: Integer value.
- crop_center: Tuple of length 3.
- """
- crop_center = keypoints2D[LEFT_MIDDLE_METACARPAL, ::-1]
- min_coordinates, max_coordinates = extract_coordinate_limits(
- keypoints2D, keypoints2D_visibility, image_size)
- crop_size_best = get_best_crop_size(max_coordinates, min_coordinates,
- crop_center)
- scale = crop_size / crop_size_best
- return scale, crop_center
-
-
-def crop_image_from_mask(keypoints2D, keypoints2D_visibility, image,
- image_size, crop_size, camera_matrix):
- """ Crop image from mask.
-
- # Arguments
- keypoints2D: Numpy array of shape (num_keypoints, 1).
- keypoints2D_vis: Numpy array of shape (num_keypoints, 1).
- image: Numpy array of shape (image_size, image_size, 3).
- image_size: List of size (2).
- crop_size: List of size (2).
- camera_matrix: Numpy array of shape (3, 3).
-
- # Returns
- scale: Integer value.
- img_crop: Numpy array of size (crop_size, crop-size, 3).
- keypoint_uv21: Numpy array of shape (num_keypoints, 1).
- camera_matrix_cropped: Numpy array of shape (3, 3).
- """
- scale, crop_center = get_crop_scale_and_center(
- keypoints2D, keypoints2D_visibility, image_size, crop_size)
- scale, scale_matrix = get_scale_matrix(scale)
- cropped_image = crop_image_from_coordinates(
- image, crop_center, crop_size, scale)
- keypoint_uv21 = tranform_keypoints_to_camera_coordinates(
- keypoints2D, crop_center, scale, crop_size)
- scale_translation_matrix = get_scale_translation_matrix(
- crop_center, crop_size, scale)
- scale_matrix_uv = np.matmul(scale_matrix, camera_matrix)
- camera_matrix_cropped = np.matmul(scale_translation_matrix, scale_matrix_uv)
- return scale, np.squeeze(
- cropped_image), keypoint_uv21, camera_matrix_cropped
-
-
-def flip_right_to_left_hand(keypoints3D, flip_right):
- """ Flip right hend coordinates to left hand coordinates.
- # Arguments
- canonical_keypoints3D: Numpy array of shape (num_keypoints, 3).
- flip_right: boolean value.
-
- # Returns
- canonical_keypoints3D_left: Numpy array of shape (num_keypoints, 3).
- """
- keypoints3D_mirrored = np.stack([keypoints3D[:, 0], keypoints3D[:, 1],
- -keypoints3D[:, 2]], -1)
- keypoints3D_left = np.where(flip_right, keypoints3D_mirrored, keypoints3D)
- return keypoints3D_left
-
-
-def extract_dominant_hand_visibility(keypoint_visibility, dominant_hand):
- """ Extract Visibility mask for dominant hand.
- # Look Later with Octavio
- # Arguments
- keypoint_visibility: Numpy array of shape (num_keypoints, 1).
- dominant_hand: List of size (2).
-
- # Returns
- keypoint_visibility_21: Numpy array of shape (num_keypoints, 1).
- """
- keypoint_visibility_left = keypoint_visibility[:LEFT_PINKY_TIP]
- keypoint_visibility_right = keypoint_visibility[RIGHT_WRIST:RIGHT_PINKY_TIP]
- keypoint_visibility_21 = np.where(dominant_hand[:, 0],
- keypoint_visibility_left,
- keypoint_visibility_right)
- return keypoint_visibility_21
-
-
-def extract_dominant_keypoints2D(keypoint_2D, dominant_hand):
- """ Extract keypoint 2D.
- # Look Later with Octavio
- # Arguments
- keypoint_2D: Numpy array of shape (num_keypoints, 1).
- dominant_hand: List of size (2) with booleans.
-
- # Returns
- keypoint_visibility_2D_21: Numpy array of shape (num_keypoints, 1).
- """
- keypoint_visibility_left = keypoint_2D[:LEFT_PINKY_TIP, :]
- keypoint_visibility_right = keypoint_2D[RIGHT_WRIST:RIGHT_PINKY_TIP, :]
- keypoint_visibility_2D_21 = np.where(
- dominant_hand[:, :2], keypoint_visibility_left,
- keypoint_visibility_right)
- return keypoint_visibility_2D_21
-
-
-def extract_keypoint2D_limits(uv_coordinates, scoremap_size):
- """ Limit keypoint coordinates to scoremap size ,
- # Arguments
- uv_coordinates: Numpy array of shape (num_keypoints, 1).
- scoremap_size: List of size (2).
-
- # Returns
- keypoint_limits: Numpy array of shape (num_keypoints, 1).
- """
- scoremap_height, scoremap_width = scoremap_size
- x_lower_limits = np.less(uv_coordinates[:, 0], scoremap_height - 1)
- x_upper_limits = np.greater(uv_coordinates[:, 0], 0)
- x_limits = np.logical_and(x_lower_limits, x_upper_limits)
-
- y_lower_limits = np.less(uv_coordinates[:, 1], scoremap_width - 1)
- y_upper_limits = np.greater(uv_coordinates[:, 1], 0)
- y_limits = np.logical_and(y_lower_limits, y_upper_limits)
-
- keypoint_limits_mask = np.logical_and(x_limits, y_limits)
- return keypoint_limits_mask
-
-
-def get_keypoints_mask(validity_mask, uv_coordinates, scoremap_size,
- validity_score=0.5):
- """ Extract Visibility mask for dominant hand.
- # Add in dataset README the difference between seg and vis
- # Arguments
- validity_mask: Int value.
- uv_coordinates: Numpy array of shape (num_keypoints, 1).
- scoremap_size: List of size (2).
-
- # Returns
- keypoint_limits: Numpy array of shape (num_keypoints, 1).
- """
- validity_mask = np.squeeze(validity_mask)
- keypoint_validity = np.greater(validity_mask, validity_score)
- keypoint_limits = extract_keypoint2D_limits(uv_coordinates, scoremap_size)
- keypooints_mask = np.logical_and(keypoint_validity, keypoint_limits)
- return keypooints_mask
-
-
-def get_keypoint_limits(uv_coordinates, scoremap_size):
- """ Extract X and Y limits.
- # Arguments
- uv_coordinates: Numpy array of shape (num_keypoints, 2).
- scoremap_size: List of size (2).
-
- # Returns
- X_limits: Numpy array of shape (num_keypoints, 1).
- Y_limits: Numpy array of shape (num_keypoints, 1).
- """
- shape = uv_coordinates.shape
- scoremap_height, scoremap_width = scoremap_size
-
- x_range = np.expand_dims(np.arange(scoremap_height), 1)
- x_coordinates = np.tile(x_range, [1, scoremap_width])
- x_coordinates.reshape((scoremap_height, scoremap_width))
- x_coordinates = np.expand_dims(x_coordinates, -1)
- x_coordinates = np.tile(x_coordinates, [1, 1, shape[0]])
- x_limits = x_coordinates - uv_coordinates[:, 0].astype('float64')
-
- y_range = np.expand_dims(np.arange(scoremap_width), 0)
- y_coordinates = np.tile(y_range, [scoremap_height, 1])
- y_coordinates.reshape((scoremap_height, scoremap_width))
- y_coordinates = np.expand_dims(y_coordinates, -1)
- y_coordinates = np.tile(y_coordinates, [1, 1, shape[0]])
- y_limits = y_coordinates - uv_coordinates[:, 1].astype('float64')
-
- return x_limits, y_limits
-
-
-def create_gaussian_map(uv_coordinates, scoremap_size, sigma, validity_mask):
- """ Generate Gaussian maps based on keypoints in Image coordinates.
- # Arguments
- uv_coordinates: Numpy array of shape (num_keypoints, 1).
- scoremap_size: List of size (2).
- sigma: Integer value.
- validity_mask: Integer value.
-
- # Returns
- scoremap: Numpy array of shape (crop_size, crop-size).
- """
- keypoints_mask = get_keypoints_mask(validity_mask, uv_coordinates,
- scoremap_size)
- x_limits, y_limits = get_keypoint_limits(uv_coordinates, scoremap_size)
- squared_distance = np.square(x_limits) + np.square(y_limits)
- scoremap = np.exp(-squared_distance / np.square(sigma)) * keypoints_mask
- return scoremap
-
-
-def extract_keypoints_uv_coordinates(shape):
- """ Generate X and Y mesh.
- # Rename to best name
- # Arguments
- shape: tuple of size (3).
-
- # Returns
- X: Numpy array of shape (1, crop_size).
- Y: Numpy array of shape (crop_size, 1).
- """
- crop_size_height, crop_size_width = shape[0], shape[1]
- x_range = np.expand_dims(np.arange(crop_size_height), 1)
- y_range = np.expand_dims(np.arange(crop_size_width), 0)
- x_coordinates = np.tile(x_range, [1, crop_size_width])
- y_coordinates = np.tile(y_range, [crop_size_height, 1])
- return x_coordinates, y_coordinates
-
-
-def get_bounding_box(X_masked, Y_masked):
- """ Get Bounding Box.
-
- # Arguments
- X_masked: tuple of size (crop_size, 1).
- Y_masked: tuple of size (crop_size, 1).
-
- # Returns
- bounding_box: List of length (4).
- """
- x_min, x_max = np.min(X_masked), np.max(X_masked)
- y_min, y_max = np.min(Y_masked), np.max(Y_masked)
- bounding_box = np.array([x_min, y_min, x_max, y_max])
- return bounding_box
-
-
-def get_crop_center(box_coordinates):
- """ Extract Center.
- # Arguments
- box_coordinates: List of length 4.
- center_list: List of length batch_size.
-
- # Returns
- center_list: List of length batch_size.
- """
- x_min, x_max = box_coordinates[0], box_coordinates[2]
- y_min, y_max = box_coordinates[1], box_coordinates[3]
- center_x = 0.5 * (x_min + x_max)
- center_y = 0.5 * (y_min + y_max)
- center = np.stack([center_x, center_y], 0)
- return center
-
-
-def get_crop_size(box_coordinates):
- """ Extract Crop.
-
- # Arguments
- xy_limit: List of length 4.
- crop_size_list: List of length batch_size.
-
- # Returns
- crop_size_list: List of length batch_size.
- """
- x_max, x_min = box_coordinates[2], box_coordinates[0]
- y_max, y_min = box_coordinates[3], box_coordinates[1]
- crop_size_x = x_max - x_min
- crop_size_y = y_max - y_min
- crop_maximum_value = np.maximum(crop_size_x, crop_size_y)
- crop_size = np.expand_dims(crop_maximum_value, 0)
- return crop_size
-
-
-# RESTART_LINE
-def get_bounding_box_features(X, Y, binary_class_mask):
- """ Extract Crop.
-
- # Arguments
- X: Numpy array of size (num_keypoints, 1).
- Y: Numpy array of size (num_keypoints, 1).
- binary_class_mask: Numpy array of size (image_size, image_size).
- shape: Tuple of lenth (3).
-
- # Returns
- bounding_box_list: List of length batch_size.
- center_list: List of length batch_size.
- crop_size_list: List of length batch_size.
- """
- X_masked = X[binary_class_mask]
- Y_masked = Y[binary_class_mask]
- bounding_box = get_bounding_box(X_masked, Y_masked)
- center = get_crop_center(bounding_box)
- crop_size = get_crop_size(bounding_box)
- bounding_box = [bounding_box[1],bounding_box[0],bounding_box[3],
- bounding_box[2]]
- return bounding_box, center, crop_size
-
-
-def extract_bounding_box(binary_class_mask):
- """ Extract Bounding Box from Segmentation mask.
-
- # Arguments
- binary_class_mask: Numpy array of size (image_size, image_size).
-
- # Returns
- bounding_box: Numpy array of shape (batch_size, 4).
- center: Numpy array of shape (batch_size, 2).
- crop_size: Numpy array of shape (batch_size, 1).
- """
- binary_class_mask = binary_class_mask.astype('int')
- binary_class_mask = np.equal(binary_class_mask, 1)
- binary_class_mask = np.squeeze(binary_class_mask, axis=-1)
- shape = binary_class_mask.shape
- coordinates_x, coordinates_y = extract_keypoints_uv_coordinates(shape)
- bounding_box, center, crop_size = get_bounding_box_features(
- coordinates_x, coordinates_y, binary_class_mask)
- return center, bounding_box, crop_size
-
-
-def get_box_coordinates(center, size, shape):
- """ Extract Bounding Box from center and size of cropped image.
-
- # Arguments
- location: Tuple of length (2).
- size: Tuple of length (2).
- shape: Typle of length (3).
-
- # Returns
- boxes: Numpy array of shape (batch_size, 4).
- """
- height, width = shape[0], shape[1]
- x_min = center[0] - size // 2
- y_min = center[1] - size // 2
- x_max, y_max = x_min + size, y_min + size
- x_min, x_max = x_min / height, x_max / height
- y_min, y_max = y_min / width, y_max / width
- boxes = [x_min, y_min, x_max, y_max]
- return boxes
-
-
-def crop_image_from_coordinates(image, crop_center, crop_size, scale=1.0):
- """ Crop Image from Center and crop size.
-
- # Arguments
- Image: Numpy array of shape (image_size, image_size, 3).
- crop_center: Tuple of length (2).
- crop_size: Float.
- Scale: Float.
-
- # Returns
- Image_cropped: Numpy array of shape (crop_size, crop-size).
- """
- image = np.squeeze(image, 0)
- height, width, channels = image.shape
- scale = np.reshape(scale, [-1])
- crop_location = crop_center.astype(np.float)
- crop_size_scaled = crop_size / scale
- boxes = get_box_coordinates(crop_location, crop_size_scaled,
- image.shape)
- x_min, y_min, x_max, y_max = boxes
- box = [int(x_min * width),
- int(y_min * height),
- int(x_max * width),
- int(y_max * height)]
- image_cropped = crop_image(image, box)
- image_cropped = resize_image(image_cropped, (crop_size, crop_size))
- return image_cropped
-
-
-def crop_image(image, crop_box):
- """Crop image.
-
- # Arguments
- image: Numpy array.
- crop_box: List of four ints.
-
- # Returns
- Numpy array.
- """
- cropped_image = image[crop_box[0]:crop_box[2], crop_box[1]:crop_box[3], :]
- return cropped_image
-
-
-def extract_keypoint_index(scoremap):
- """ Extract Scoremap.
-
- # Arguments
- scoremap: Numpy aray of shape (crop_size, crop-size).
-
- # Returns
- max_index_vec: List of Max Indices.
- """
- keypoint_index = np.argmax(scoremap)
- return keypoint_index
-
-
-def extract_keypoints_XY(x_vector, y_vector, maximum_indices):
- """ Extract Keypoint X,Y coordinates.
- # Arguments
- x_vector: Numpy array of shape (batch_size, 1).
- y_vector: Numpy array of shape (batch_size, 1).
- maximum_indices: Numpy array of shape (batch_size, 1).
- batch_size: Integer Value.
-
- # Returns
- keypoints2D: Numpy array of shape (num_keypoints, 1).
- """
- keypoints2D = list()
- x_location = np.reshape(x_vector[maximum_indices], [1])
- y_location = np.reshape(y_vector[maximum_indices], [1])
- keypoints2D.append(np.concatenate([x_location, y_location], 0))
- keypoints2D = np.stack(keypoints2D, 0)
- return keypoints2D
-
-
-def create_2D_grids(shape):
- """ Create 2D Grids.
-
- # Arguments
- shape: Tuple of length 2.
-
- # Returns
- x_vec: Numpy array.
- y_vec: Numpy array.
- """
- height, width = shape
- x_range = np.expand_dims(np.arange(height), 1)
- y_range = np.expand_dims(np.arange(width), 0)
- X = np.tile(x_range, [1, width])
- Y = np.tile(y_range, [height, 1])
- X = np.reshape(X, [-1])
- Y = np.reshape(Y, [-1])
- return X, Y
-
-
-def find_max_location(scoremap):
- """ Returns the coordinates of the given scoremap with maximum value.
-
- # Arguments
- scoremap: Numpy array of shape (crop_size, crop-size).
-
- # Returns
- keypoints2D: numpy array of shape (num_keypoints, 1).
- """
- shape = scoremap.shape
- x_grid, y_grid = create_2D_grids(shape)
- keypoint_index = extract_keypoint_index(scoremap)
- keypoints2D = extract_keypoints_XY(x_grid, y_grid, keypoint_index)
- return keypoints2D
-
-
-def create_score_maps(keypoint_2D, keypoint_visibility, image_size,
- crop_size, variance, crop_image=True):
- """ Create gaussian maps for keypoint representation.
- # Arguments
- keypoint_2D: Numpy array of shape (num_keypoints, 2).
- keypoint_visibility: Numpy array of shape (num_keypoints, 1).
- image_size: Tuple of length (3).
- crop_size: Typle of length (2).
- variance: Float value.
- crop_image: Boolean value.
-
- # Returns
- scoremap: numpy array of size (num_keypoints, crop_size, crop-size).
- """
- keypoint_uv = np.stack([keypoint_2D[:, 1], keypoint_2D[:, 0]], -1)
- scoremap_size = image_size[0:2]
- if crop_image:
- scoremap_size = (crop_size, crop_size)
- scoremap = create_gaussian_map(keypoint_uv, scoremap_size, variance,
- keypoint_visibility) # Check if visibility
- # can be removed
- return scoremap
-
-
-def extract_2D_keypoints(visibility_mask):
- """ Extract 2D keypoints.
-
- # Arguments
- visibility_mask: Numpy array of size (num_keypoints, 3).
-
- # Returns
- keypoints2D: numpy array of size (num_keypoints, 1).
- keypoints_visibility_mask: numpy array of size (num_keypoints, 1).
- """
- keypoints2D = visibility_mask[:, :2]
- keypoints_visibility_mask = visibility_mask[:, 2] == 1
- return keypoints2D, keypoints_visibility_mask
-
-
-def extract_keypoints(scoremaps):
- """ Performs detection per scoremap for the hands keypoints.
-
- # Arguments
- scoremaps: Numpy array of size (crop_size, crop-size, num_keypoints).
-
- # Returns
- keypoint_coords: numpy array of size (num_keypoints, 1).
- """
- height, width, num_keypoints = scoremaps.shape
- keypoint2D = np.zeros((num_keypoints, 2))
- for keypoint_arg in range(num_keypoints):
- keypoint_scoremap = np.argmax(scoremaps[:, :, keypoint_arg])
- coordinates = np.unravel_index(keypoint_scoremap, (height, width))
- v, u = coordinates
- keypoint2D[keypoint_arg, 0] = u
- keypoint2D[keypoint_arg, 1] = v
- return keypoint2D
-
-
-def transform_visibility_mask(visibility_mask):
- """ Data Pre-processing step: Transform Visibility mask to palm coordinates
- from wrist coordinates.
-
- # Arguments
- visibility_mask: Numpy array with shape `(42, 1)`.
-
- # Returns
- visibility_mask: Numpy array with shape `(42, 1)`.
- """
- visibility_left_root = visibility_mask[LEFT_WRIST]
- visibility_left_aligned = visibility_mask[LEFT_MIDDLE_METACARPAL]
- visibility_right_root = visibility_mask[RIGHT_WRIST]
- visibility_right_aligned = visibility_mask[RIGHT_MIDDLE_METACARPAL]
-
- palm_visibility_left = np.logical_or(
- visibility_left_root, visibility_left_aligned)
- palm_visibility_right = np.logical_or(
- visibility_right_root, visibility_right_aligned)
-
- palm_visibility_left = np.expand_dims(palm_visibility_left, 0)
- palm_visibility_right = np.expand_dims(palm_visibility_right, 0)
-
- visibility_mask = np.concatenate(
- [palm_visibility_left, visibility_mask[LEFT_WRIST: LEFT_PINKY_TIP],
- palm_visibility_right, visibility_mask[RIGHT_WRIST: RIGHT_PINKY_TIP]],
- 0)
- return visibility_mask
-
-
-def keypoints_to_palm_coordinates(keypoints):
- """ Data Pre-processing step: Transform keypoints to palm coordinates
- from wrist coordinates.
- # Arguments
- keypoints: Numpy array with shape `(42, 3)` for 3D keypoints.
- Numpy array with shape `(42, 2)` for 2D keypoints.
-
- # Returns
- keypoints: Numpy array with shape `(42, 3)` for 3D keypoints.
- Numpy array with shape `(42, 2)` for 2D keypoints.
- """
- palm_coordinates_left = 0.5 * (keypoints[LEFT_WRIST, :] +
- keypoints[LEFT_MIDDLE_METACARPAL, :])
- palm_coordinates_right = 0.5 * (keypoints[RIGHT_WRIST, :] +
- keypoints[RIGHT_MIDDLE_METACARPAL, :])
-
- palm_coordinates_left = np.expand_dims(palm_coordinates_left, 0)
- palm_coordinates_right = np.expand_dims(palm_coordinates_right, 0)
-
- keypoints = np.concatenate(
- [palm_coordinates_left, keypoints[LEFT_WRIST:LEFT_PINKY_TIP, :],
- palm_coordinates_right, keypoints[RIGHT_WRIST:RIGHT_PINKY_TIP, :]], 0)
-
- return keypoints
-
-
-def get_transform_to_bone_frame(keypoints3D, bone_index):
- """ Transform the keypoints in camera image frame to index keypoint frame.
-
- # Arguments
- keypoints3D: numpy array of shape (num_keypoints, 3).
- bone_index: int value of range [0, num_keypoints].
-
- # Returns
- transformation_parameters: multiple values representing all the
- euclidean parameters to calculate transformation matrix.
- """
- index_keypoint = np.expand_dims(keypoints3D[bone_index, :], 1)
- translated_keypoint3D = to_homogeneous_coordinates(index_keypoint)
- translation_matrix = build_translation_matrix_SE3(np.zeros(3))
- translation_matrix = np.expand_dims(translation_matrix, 0)
- transformation_parameters = get_transformation_parameters(
- translated_keypoint3D, translation_matrix)
- return transformation_parameters
-
-
-def transform_to_keypoint_coordinates(transformation_matrix, keypoint3D):
- """ Transform to keypoint (root/child) frame.
-
- # Arguments
- transformation_matrix: numpy array of shape (4, 4).
- keypoint3D: numpy array of shape (3, ).
-
- # Returns
- keypoint_coordinates: Numpy array of size (3, ).
- """
- keypoint3D = np.expand_dims(keypoint3D, 1)
- keypoint3D = to_homogeneous_coordinates(keypoint3D)
- keypoint_coordinates = np.matmul(transformation_matrix, keypoint3D)
- return keypoint_coordinates
-
-
-def apply_root_transformations(keypoints3D, bone_index):
- """ Transform all keypoints to root keypoint frame.
-
- # Arguments
- keypoints3D: numpy array of shape (num_keypoints, 3).
- bone_index: int value of range [0, num_keypoints].
-
- # Returns
- relative_coordinates: numpy array of shape (num_keypoints, 3, 1).
- transformations: placeholder for transformation
- (num_keypoints, 4, 4, 1).
- """
- transformation_parameters = get_transform_to_bone_frame(keypoints3D,
- bone_index)
-
- length_from_origin = transformation_parameters[0]
- rotation_angle_x = transformation_parameters[1]
- rotation_angle_y = transformation_parameters[2]
- rotated_keypoints = transformation_parameters[3]
-
- relative_coordinate = np.stack([length_from_origin, rotation_angle_x,
- rotation_angle_y], 0)
- return rotated_keypoints, relative_coordinate
-
-
-def get_articulation_angles(child_keypoint_coordinates,
- parent_keypoint_coordinates, transformation_matrix):
- """ Calculate Articulation Angles.
-
- # Arguments
- local_child_coordinates: Child keypoint coordinates (1, 3).
- local_child_coordinates: Parent keypoint coordinates (1, 3).
- transformation_matrix: Numpy array of shape (4, 4).
-
- # Returns
- transformation_parameters: parameters for transformation to
- local frame.
- """
- delta_vector = child_keypoint_coordinates - parent_keypoint_coordinates
- delta_vector = to_homogeneous_coordinates(
- np.expand_dims(delta_vector[:, :3], 1))
- transformation_angles = get_transform_to_bone_frame(
- delta_vector, transformation_matrix)
- return transformation_angles
-
-
-def apply_child_transformations(keypoints3D, bone_index, parent_index,
- transformations):
- """ Calculate Child coordinate to Parent coordinate.
-
- # Arguments
- keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3).
- bone_index: Index of current bone keypoint, Numpy array of shape (1).
- parent_index: Index of root keypoint, Numpy array of shape (1).
- relative_coordinates: place holder for relative_coordinates.
- transformations: placeholder for transformations.
-
- # Returns
- rotated_keypoints: place holder for relative_coordinates.
- transformation_parameters: placeholder for transformations.
- """
- transformation_matrix = transformations[parent_index]
- parent_keypoint_coordinates = transform_to_keypoint_coordinates(
- transformation_matrix, keypoints3D[parent_index, :])
- child_keypoint_coordinates = transform_to_keypoint_coordinates(
- transformation_matrix, keypoints3D[bone_index, :])
- transformation_parameters = get_articulation_angles(
- parent_keypoint_coordinates, child_keypoint_coordinates,
- transformation_matrix)
- length_from_origin = transformation_parameters[0]
- rotation_angle_x, rotation_angle_y = transformation_parameters[1:3]
- rotated_keypoints = transformation_parameters[3]
- transformation_parameters = np.stack([length_from_origin, rotation_angle_x,
- rotation_angle_y])
- return rotated_keypoints, transformation_parameters
-
-
-def keypoints_to_root_frame(keypoints3D):
- """ Convert keypoints to root keypoint coordinates.
-
- # Arguments
- keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3).
-
- # Returns
- relative_coordinates: keypoints in root keypoint coordinate frame.
- """
- transformations = [None] * len(KINEMATIC_CHAIN_LIST)
- relative_coordinates = np.zeros(len(KINEMATIC_CHAIN_LIST))
- for bone_index in KINEMATIC_CHAIN_LIST:
- parent_index = KINEMATIC_CHAIN_DICT[bone_index]
- if parent_index == 'root':
- transformation, relative_coordinate = apply_root_transformations(
- keypoints3D, bone_index)
- else:
- transformation, relative_coordinate = apply_child_transformations(
- keypoints3D, bone_index, parent_index, transformations)
- transformations[bone_index] = transformation
- relative_coordinates[bone_index] = relative_coordinate
- return relative_coordinates
-
-
-def keypoint_to_root_frame(keypoints3D, num_keypoints=21):
- """ Convert keypoints to root keypoint coordinates.
- # Arguments
- keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3).
-
- # Returns
- key_point_relative_frame: keypoints in root keypoint coordinate frame.
- """
- keypoints3D = keypoints3D.reshape([num_keypoints, 3])
- relative_coordinates = keypoints_to_root_frame(keypoints3D)
- key_point_relative_frame = np.stack(relative_coordinates, 1)
- key_point_relative_frame = np.squeeze(key_point_relative_frame)
- return key_point_relative_frame
-
-
-def get_keypoints_z_rotation(keypoints3D, keypoint):
- """ Rotate Keypoints along z-axis.
-
- # Arguments
- keypoint: Keypoint to whose frame transformation is to
- be done, Numpy array of shape (1, 3).
- keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3).
-
- # Returns
- reference_keypoint_z_rotation: Reference keypoint after rotation.
- resultant_keypoints3D: keypoints after rotation.
- rotation_matrix_z: Rotation matrix.
- """
- alpha = np.arctan2(keypoint[0], keypoint[1])
- rotation_matrix = build_rotation_matrix_z(alpha)
- keypoints3D = np.matmul(keypoints3D.T, rotation_matrix)
- keypoint = keypoints3D[LEFT_MIDDLE_METACARPAL, :]
- return keypoint, rotation_matrix, keypoints3D
-
-
-def get_keypoints_x_rotation(keypoints3D, keypoint):
- """ Rotate Keypoints along x-axis.
-
- # Arguments
- keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3).
- keypoint: Numpy array of shape (1, 3).
-
- # Returns
- keypoint: Resultant reference keypoint after rotation, Numpy array of
- shape (1, 3).
- resultant_keypoints3D: keypoints after rotation.
- rotation_matrix_x: Rotation matrix along x-axis.
- """
- beta = -np.arctan2(keypoint[2], keypoint[1])
- rotation_matrix = build_rotation_matrix_x(beta + np.pi)
- keypoints3D = np.matmul(keypoints3D, rotation_matrix)
- keypoint = keypoints3D[LEFT_PINKY_TIP, :]
- return keypoint, rotation_matrix, keypoints3D
-
-
-def get_keypoints_y_rotation(keypoints3D, keypoint):
- """ Rotate Keypoints along y-axis.
-
- # Arguments
- keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3).
- reference_keypoint: keypoint, Numpy array of shape (1, 3).
-
- # Returns
- resultant_keypoint: Resultant reference keypoint after rotation.
- resultant_keypoints3D: keypoints after rotation along Y-axis.
- rotation_matrix_y: Rotation matrix along x-axis.
- """
- gamma = np.arctan2(keypoint[2], keypoint[0])
- rotation_matrix = build_rotation_matrix_y(gamma)
- keypoints3D = np.matmul(keypoints3D, rotation_matrix)
- keypoint = keypoints3D[LEFT_PINKY_TIP, :]
- return keypoint, rotation_matrix, keypoints3D
-
-
-def canonical_transformations_on_keypoints(keypoints3D): # rename properly
- # RE_CHECK
- """ Transform Keypoints to canonical coordinates.
-
- # Arguments
- keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3).
-
- # Returns
- transformed_keypoints3D: Resultant keypoint after transformation.
- final_rotation_matrix: Final transformation matrix.
- """
- reference_keypoint = np.expand_dims(keypoints3D[:, LEFT_WRIST], 1)
- keypoints3D = keypoints3D - reference_keypoint
- keypoint = keypoints3D[:, LEFT_MIDDLE_METACARPAL]
- final_rotation_matrix = np.ones((3, 3))
- apply_rotations = [get_keypoints_z_rotation, get_keypoints_x_rotation,
- get_keypoints_y_rotation]
- for function in apply_rotations:
- keypoint, rotation_matrix, keypoints3D = function(keypoints3D, keypoint)
- final_rotation_matrix = np.matmul(final_rotation_matrix,
- rotation_matrix)
- return np.squeeze(keypoints3D), np.squeeze(final_rotation_matrix)
-
-
-def get_scale_matrix(scale, min_scale=1.0, max_scale=10.0):
- """ calculate scale matrix.
-
- # Arguments
- scale: Int value.
-
- # Returns
- scale_original: Int value
- scale_matrix: Numpy array of shape (3, 3)
- """
- scale_original = np.minimum(np.maximum(scale, min_scale), max_scale)
- scale_matrix = np.diag([scale_original, scale_original, 1])
- return scale_original, scale_matrix
-
-
-def get_scale_translation_matrix(crop_center, crop_size, scale):
- """ calculate scale translation matrix.
-
- # Arguments
- crop_center: Numpy array of shape (2).
- crop_size: Int value.
- scale: Int value.
-
- # Returns
- translation_matrix: Numpy array of shape (3, 3).
- """
- crop_size_halved = crop_size // 2
- translated_center_x = (crop_center[0] * scale) - crop_size_halved
- translated_center_y = (crop_center[1] * scale) - crop_size_halved
- translation_matrix = np.diag(
- [-translated_center_x, -translated_center_y, 1])
- return translation_matrix
-
-
-def get_y_axis_rotated_keypoints(keypoint3D):
- """ Rotate keypoints along y-axis
- # Arguments
- keypoint3D: Numpy array of shape (num_keypoints, 3).
-
- # Returns
- keypoint3D: Numpy array of shape (num_keypoints, 3).
- affine_rotation_matrix_y: Numpy array of shape (3, 3).
- gamma: Numpy array of shape (1, ).
- """
- gamma = np.arctan2(keypoint3D[0], keypoint3D[2])
- rotation_matrix_y = build_rotation_matrix_y(gamma)
- affine_rotation_matrix_y = build_affine_matrix(rotation_matrix_y)
- keypoint3D = np.matmul(affine_rotation_matrix_y, keypoint3D)
- return keypoint3D, affine_rotation_matrix_y, gamma
-
-
-def get_x_axis_rotated_keypoints(keypoint3D, length_from_origin,
- rotation_matrix):
- """ Rotate keypoints along x-axis
-
- # Arguments
- keypoint3D: Numpy array of shape (num_keypoints, 3).
- length_from_origin: Numpy array of shape (1, ).
- rotation_matrix: Numpy array of shape (3, 3).
-
- # Returns
- keypoint3D: Numpy array of shape (num_keypoints, 3).
- affine_rotation_matrix_y: Numpy array of shape (3, 3).
- gamma: Numpy array of shape (1, ).
- """
- alpha = np.arctan2(-keypoint3D[1], keypoint3D[2])
- rotation_matrix_x = build_rotation_matrix_x(alpha)
- affine_rotation_matrix_x = build_affine_matrix(rotation_matrix_x)
- translation_matrix_to_origin = build_translation_matrix_SE3(
- -length_from_origin)
- translation_matrix_to_origin = np.expand_dims(translation_matrix_to_origin,
- 0)
- rotation_matrix_xy = np.matmul(affine_rotation_matrix_x, rotation_matrix)
- keypoint3D = np.matmul(translation_matrix_to_origin, rotation_matrix_xy)
- return keypoint3D, alpha
-
-
-def get_transformation_parameters(keypoint3D, transformation_matrix):
- """ Calculate transformation parameters.
-
- # Arguments
- keypoint3D: Numpy array of shape (num_keypoints, 3).
- transformation_matrix: Numpy array of shape (4, 4).
-
- # Returns
- length_from_origin: float value.
- alpha: float value. Rotation angle along X-axis.
- gamma: float value. Rotation angle along X-axis.
- final_transformation_matrix: Numpy array of shape (4, 4).
- """
- length_from_origin = np.linalg.norm(keypoint3D)
-
- keypoint_parameters = get_y_axis_rotated_keypoints(keypoint3D)
- keypoint3D_rotated_y, affine_matrix, rotation_angle_y = keypoint_parameters
-
- keypoint3D_rotated_x, rotation_angle_x = get_x_axis_rotated_keypoints(
- keypoint3D_rotated_y, length_from_origin, affine_matrix)
-
- rotated_keypoints = np.matmul(keypoint3D_rotated_x, transformation_matrix)
- transformation_parameters = (length_from_origin, rotation_angle_x,
- rotation_angle_y, rotated_keypoints)
-
- return transformation_parameters
-
-
-def transform_cropped_keypoints(cropped_keypoints, centers, scale, crop_size):
- """ Transforms the cropped coordinates to the original image space.
-
- # Arguments
- cropped_coords: Tensor (batch x num_keypoints x 3): Estimated hand
- coordinates in the cropped space.
- centers: Tensor (batch x 1): Repeated coordinates of the
- center of the hand in global image space.
- scale: Tensor (batch x 1): Scaling factor between the original image
- and the cropped image.
- crop_size: int: Size of the crop.
-
- # Returns
- keypoints: Tensor (batch x num_keypoints x 3): Transformed coordinates.
- """
- cropped_keypoints[:, [0, 1]] = cropped_keypoints[:, [1, 0]]
- keypoints = np.copy(cropped_keypoints)
- keypoints = keypoints - (crop_size // 2)
- keypoints = keypoints / scale
- keypoints = keypoints + centers
- keypoints[:, [0, 1]] = keypoints[:, [1, 0]]
- return keypoints
-
-
-def canonical_to_relative_coordinates(num_keypoints, canonical_coordinates,
- rotation_matrix, hand_side):
- hand_arg = np.argmax(hand_side, 1)
- hand_side_mask = np.equal(hand_arg, 1)
- hand_side_mask = np.reshape(hand_side_mask, [-1, 1])
- hand_side_mask_3D = np.tile(hand_side_mask, [num_keypoints, 3])
- keypoint_flipped = flip_right_to_left_hand(canonical_coordinates,
- hand_side_mask_3D)
- relative_keypoints = np.matmul(keypoint_flipped, rotation_matrix)
- return relative_keypoints
diff --git a/examples/hand_pose_estimation/backend_standard.py b/examples/hand_pose_estimation/backend_standard.py
deleted file mode 100644
index 0a7932afa..000000000
--- a/examples/hand_pose_estimation/backend_standard.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import cv2
-import numpy as np
-
-
-def wrap_as_dictionary(keys, values):
- """ Wrap values with respective keys into a dictionary.
-
- # Arguments
- keys: List of strings.
- Values: List.
-
- # Returns
- output: Dictionary.
- """
- output = dict(zip(keys, values))
- return output
-
-
-def merge_dictionaries(dicts):
- """ Merge multiple dictionaries.
-
- # Arguments
- dicts: List of dictionaries.
-
- # Returns
- result: Dictionary.
- """
- result = {}
- for dict in dicts:
- result.update(dict)
- return result
-
-
-def resize_image_with_linear_interpolation(image, size):
- """Resize image using nearest neighbors interpolation.
-
- # Arguments
- image: Numpy array.
- size: List of two ints.
-
- # Returns
- Numpy array.
- """
- if(type(image) != np.ndarray):
- raise ValueError(
- 'Recieved Image is not of type numpy array', type(image))
- else:
- return cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
-
-
-def transpose_array(array):
- """Resize image using nearest neighbors interpolation.
-
- # Arguments
- image: Numpy array.
- size: List of two ints.
-
- # Returns
- Numpy array.
- """
- if(type(array) != np.ndarray):
- raise ValueError(
- 'Recieved Input is not of type numpy array', type(array))
- else:
- return array.T
diff --git a/examples/hand_pose_estimation/demo.py b/examples/hand_pose_estimation/demo.py
old mode 100755
new mode 100644
index 0897e0eff..00b9ed88e
--- a/examples/hand_pose_estimation/demo.py
+++ b/examples/hand_pose_estimation/demo.py
@@ -1,23 +1,15 @@
import argparse
+from paz.applications import MinimalHandPoseEstimation
+from paz.backend.camera import VideoPlayer
+from paz.backend.camera import Camera
-from HandPoseEstimation import HandSegmentationNet, PosePriorNet, PoseNet
-from HandPoseEstimation import ViewPointNet
-from pipelines import DetectHandKeypoints
-from paz.backend.camera import Camera, VideoPlayer
-parser = argparse.ArgumentParser()
+parser = argparse.ArgumentParser(description='Minimal hand keypoint detection')
parser.add_argument('-c', '--camera_id', type=int, default=0,
help='Camera device ID')
args = parser.parse_args()
-use_pretrained = True
-HandSegNet = HandSegmentationNet()
-HandPoseNet = PoseNet()
-HandPosePriorNet = PosePriorNet()
-HandViewPointNet = ViewPointNet()
-
-pipeline = DetectHandKeypoints(HandSegNet, HandPoseNet, HandPosePriorNet,
- HandViewPointNet)
+pipeline = MinimalHandPoseEstimation(right_hand=False)
camera = Camera(args.camera_id)
player = VideoPlayer((640, 480), pipeline, camera)
player.run()
diff --git a/examples/minimal_hand/demo3D.py b/examples/hand_pose_estimation/demo3D.py
similarity index 89%
rename from examples/minimal_hand/demo3D.py
rename to examples/hand_pose_estimation/demo3D.py
index 94243b238..7d0556a65 100644
--- a/examples/minimal_hand/demo3D.py
+++ b/examples/hand_pose_estimation/demo3D.py
@@ -3,7 +3,7 @@
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from paz.backend.camera import Camera, VideoPlayer
-from paz.applications import MinimalHandPoseEstimation
+from paz.applications import SSD512MinimalHandPose
from paz.backend.image import resize_image, show_image
from paz.datasets import MINIMAL_HAND_CONFIG
@@ -13,7 +13,7 @@
help='Camera device ID')
args = parser.parse_args()
-pipeline = MinimalHandPoseEstimation(right_hand=False)
+pipeline = SSD512MinimalHandPose(right_hand=False, offsets=[0.5, 0.5])
camera = Camera(args.camera_id)
player = VideoPlayer((640, 480), pipeline, camera)
@@ -62,13 +62,16 @@ def wrapped_animate(i):
show_image(image, 'inference', wait=False)
keypoints3D = output['keypoints3D']
+ if len(keypoints3D) == 0:
+ return
+ keypoints3D = keypoints3D[0] # TAKE ONLY THE FIRST PREDICTION
xs, ys, zs = np.split(keypoints3D, 3, axis=1)
plt.cla()
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
- ax.scatter3D(xs, ys, zs, c = joint_colors)
+ ax.scatter3D(xs, ys, zs, c=joint_colors)
plot_3D_keypoints_link(ax, keypoints3D, link_args, link_orders,
link_colors)
return wrapped_animate
diff --git a/examples/minimal_hand/demo_image.py b/examples/hand_pose_estimation/demo_image.py
similarity index 100%
rename from examples/minimal_hand/demo_image.py
rename to examples/hand_pose_estimation/demo_image.py
diff --git a/examples/hand_pose_estimation/hand_keypoints_loader.py b/examples/hand_pose_estimation/hand_keypoints_loader.py
deleted file mode 100644
index e66a3ae62..000000000
--- a/examples/hand_pose_estimation/hand_keypoints_loader.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import glob
-import pickle
-
-from paz.abstract import Loader
-
-
-class RenderedHandLoader(Loader):
- def __init__(self, path, split='train'):
- super().__init__(path, split, None, 'HandPoseLoader')
- self.path = path
- split_to_folder = {'train': 'training', 'val': 'evaluation',
- 'test': 'testing'}
- self.folder = split_to_folder[split]
-
- def _load_annotation(self, label_path):
- with open(label_path, 'rb') as file:
- annotations_all = pickle.load(file)
- return annotations_all
-
- def to_list_of_dictionaries(self, hands, segmentation_labels=None,
- annotations=None):
- dataset = []
- for hand_arg in range(len(hands)):
- sample = dict()
- sample['image_path'] = hands[hand_arg]
- sample['segmentation_label'] = segmentation_labels[hand_arg]
- sample['annotations'] = annotations[hand_arg]
- dataset.append(sample)
- return dataset
-
- def load_data(self):
- images = sorted(glob.glob(self.path + self.folder + '/color/*.png'))
-
- if self.split == 'test':
- dataset = self.to_list_of_dictionaries(images, None, None)
- else:
- segmentation_labels = sorted(glob.glob(self.path + self.folder +
- '/mask/*.png'))
- annotations = self._load_annotation(self.path + self.folder +
- '/anno_{}.pickle'.format(
- self.folder))
- dataset = self.to_list_of_dictionaries(images, segmentation_labels,
- annotations)
-
- return dataset
\ No newline at end of file
diff --git a/examples/hand_pose_estimation/hand_tracking.py b/examples/hand_pose_estimation/hand_tracking.py
new file mode 100644
index 000000000..a8f501301
--- /dev/null
+++ b/examples/hand_pose_estimation/hand_tracking.py
@@ -0,0 +1,33 @@
+import argparse
+from paz.abstract import SequentialProcessor
+from paz.backend.camera import VideoPlayer, Camera
+from paz.applications import SSD512MinimalHandPose
+from paz import processors as pr
+
+
+parser = argparse.ArgumentParser(description='Minimal hand keypoint detection')
+parser.add_argument('-c', '--camera_id', type=int, default=0,
+ help='Camera device ID')
+parser.add_argument('-HFOV', '--horizontal_field_of_view', type=float,
+ default=75, help='Horizontal field of view in degrees')
+args = parser.parse_args()
+
+camera = Camera(args.camera_id)
+camera.intrinsics_from_HFOV(args.horizontal_field_of_view)
+
+
+class HandStateEstimation(SequentialProcessor):
+ def __init__(self, camera):
+ super(HandStateEstimation, self).__init__()
+ intro_topics = ['image', 'boxes2D', 'keypoints2D', 'keypoints3D']
+ self.add(SSD512MinimalHandPose())
+ self.add(pr.UnpackDictionary(intro_topics))
+ self.add(pr.ControlMap(
+ pr.Translation3DFromBoxWidth(camera), [1], [4], {1: 1}))
+ outro_topics = intro_topics + ['translation3D']
+ self.add(pr.WrapOutput(outro_topics))
+
+
+pipeline = HandStateEstimation(camera)
+player = VideoPlayer((640, 480), pipeline, camera)
+player.run()
diff --git a/examples/minimal_hand/demo.py b/examples/hand_pose_estimation/is_open_demo.py
similarity index 78%
rename from examples/minimal_hand/demo.py
rename to examples/hand_pose_estimation/is_open_demo.py
index 00b9ed88e..4c751f411 100644
--- a/examples/minimal_hand/demo.py
+++ b/examples/hand_pose_estimation/is_open_demo.py
@@ -1,15 +1,14 @@
import argparse
-from paz.applications import MinimalHandPoseEstimation
from paz.backend.camera import VideoPlayer
from paz.backend.camera import Camera
-
+from paz.applications import ClassifyHandClosure
parser = argparse.ArgumentParser(description='Minimal hand keypoint detection')
parser.add_argument('-c', '--camera_id', type=int, default=0,
help='Camera device ID')
args = parser.parse_args()
-pipeline = MinimalHandPoseEstimation(right_hand=False)
+pipeline = ClassifyHandClosure(draw=True, right_hand=False)
camera = Camera(args.camera_id)
player = VideoPlayer((640, 480), pipeline, camera)
player.run()
diff --git a/examples/hand_pose_estimation/layer.py b/examples/hand_pose_estimation/layer.py
deleted file mode 100644
index 86fbdb8db..000000000
--- a/examples/hand_pose_estimation/layer.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import tensorflow as tf
-from tensorflow.keras.layers import Layer
-from backend_keypoints import find_max_location
-
-
-class SegmentationDilation(Layer):
- def __init__(self, filter_size=21):
- super(SegmentationDilation, self).__init__()
- self.filter_size = filter_size
- filters = tf.ones((filter_size, filter_size, 1))
- self.kernel = filters / float(self.filter_size ** 2)
-
- def call(self, inputs):
- segmentation_map_height, segmentation_map_width, channels = inputs.shape
- scoremap_softmax = tf.nn.softmax(inputs)
- scoremap_foreground = tf.reduce_max(scoremap_softmax[:, :, 1:], -1)
- segmentationmap_foreground = tf.round(scoremap_foreground)
- max_loc = find_max_location(scoremap_foreground)
-
- sparse_indices = tf.reshape(max_loc, [1, 2])
-
- sparse_input = tf.SparseTensor(
- dense_shape=[segmentation_map_height, segmentation_map_width],
- values=[1.0], indices=sparse_indices)
-
- objectmap = tf.sparse.to_dense(sparse_input)
- num_passes = max(segmentation_map_height, segmentation_map_width) // (
- self.filter_size // 2)
-
- for pass_count in range(num_passes):
- objectmap = tf.reshape(objectmap, [1, segmentation_map_height,
- segmentation_map_width, 1])
-
- objectmap_dilated = tf.nn.dilation2d(
- input=objectmap, filters=self.kernel, strides=[1, 1, 1, 1],
- dilations=[1, 1, 1, 1], padding='SAME', data_format='NHWC')
-
- objectmap_dilated = tf.reshape(objectmap_dilated,
- [segmentation_map_height,
- segmentation_map_width])
-
- objectmap = tf.round(tf.multiply(segmentationmap_foreground,
- objectmap_dilated))
-
- objectmap = tf.reshape(objectmap, [segmentation_map_height,
- segmentation_map_width, 1])
- return objectmap.numpy()
diff --git a/examples/hand_pose_estimation/pipelines.py b/examples/hand_pose_estimation/pipelines.py
deleted file mode 100755
index e4c5b4718..000000000
--- a/examples/hand_pose_estimation/pipelines.py
+++ /dev/null
@@ -1,303 +0,0 @@
-import numpy as np
-
-from layer import SegmentationDilation
-from paz import processors as pr
-from paz.abstract import SequentialProcessor, Processor, Box2D
-from processors_SE3 import CalculatePseudoInverse, RotationMatrixfromAxisAngles
-from processors_SE3 import CanonicaltoRelativeFrame, KeypointstoPalmFrame
-from processors_SE3 import GetCanonicalTransformation, TransformKeypoints
-from processors_SE3 import TransformVisibilityMask, TransformtoRelativeFrame
-from processors_keypoints import AdjustCropSize, CropImage
-from processors_keypoints import CreateScoremaps, ExtractBoundingbox
-from processors_keypoints import Extract2DKeypoints, ExtractHandsideandKeypoints
-from processors_keypoints import ExtractDominantHandVisibility
-from processors_keypoints import ExtractDominantKeypoints2D, CropImageFromMask
-from processors_keypoints import ExtractHandmask, ExtractKeypoints
-from processors_keypoints import FlipRightHandToLeftHand
-from processors_keypoints import NormalizeKeypoints
-from processors_standard import MergeDictionaries, ToOneHot, WrapToDictionary
-from processors_standard import ResizeImageWithLinearInterpolation
-from processors_standard import TransposeOfArray, ListToArray
-
-
-class ExtractHandSegmentation(SequentialProcessor):
- def __init__(self, size=320):
- super(ExtractHandSegmentation, self).__init__()
- self.add(pr.UnpackDictionary(
- ['image', 'segmentation_label', 'annotations']))
-
- preprocess_image = pr.SequentialProcessor(
- [pr.LoadImage(), pr.ResizeImage((size, size))])
-
- preprocess_segmentation_map = pr.SequentialProcessor(
- [pr.LoadImage(), pr.ResizeImage((size, size)), ExtractHandmask()])
-
- self.add(pr.ControlMap(preprocess_image, [0], [0]))
- self.add(pr.ControlMap(preprocess_segmentation_map, [1], [1]))
- self.add(pr.SequenceWrapper({0: {'image': [size, size, 3]}},
- {1: {'hand_mask': [size, size]}}))
-
-
-class ExtractHandPose2D(Processor):
- def __init__(self, size, image_size, crop_size, variance):
- super(ExtractHandPose2D, self).__init__()
- self.unwrap_inputs = pr.UnpackDictionary(
- ['image', 'segmentation_label', 'annotations'])
- self.preprocess_image = pr.SequentialProcessor(
- [pr.LoadImage(), pr.ResizeImage((size, size))])
-
- self.preprocess_segmentation_map = pr.SequentialProcessor(
- [pr.LoadImage(), pr.ResizeImage((size, size)), ExtractHandmask()])
- self.extract_annotations = pr.UnpackDictionary(['xyz', 'uv_vis', 'K'])
- self.extract_2D_keypoints = Extract2DKeypoints()
- self.keypoints_to_palm = KeypointstoPalmFrame()
- self.visibility_to_palm = TransformVisibilityMask()
- self.extract_hand_side = ExtractHandsideandKeypoints()
-
- self.extract_visibility_dominant_hand = ExtractDominantHandVisibility()
- self.create_scoremaps = CreateScoremaps(
- image_size, crop_size, variance)
- self.crop_image_from_mask = CropImageFromMask()
- self.wrap = pr.WrapOutput(
- ['cropped_image', 'score_maps', 'keypoints_vis21'])
-
- def call(self, inputs, use_palm_coordinates, crop_image):
- image, segmentation_label, annotations = self.unwrap_inputs(inputs)
-
- image = self.preprocess_image(image)
- segmentation_label = self.preprocess_segmentation_map(
- segmentation_label)
- keypoints3D, keypoints2D, camera_matrix = self.extract_annotations(
- annotations)
- keypoints2D, keypoints_visibility_mask = self.extract_2D_keypoints(
- keypoints2D)
-
- if use_palm_coordinates:
- keypoints2D = self.keypoints_to_palm(keypoints2D)
- keypoints_visibility_mask = self.visibility_to_palm(
- keypoints_visibility_mask)
-
- hand_side, keypoints3D, dominant_hand = self.extract_hand_side(
- segmentation_label, keypoints3D)
-
- keypoints21 = self.extract_visibility_dominant_hand(
- keypoints_visibility_mask, dominant_hand)
-
- scoremaps = self.create_scoremaps(keypoints2D, keypoints21)
-
- if crop_image:
- image = self.crop_image_from_mask(
- keypoints2D, keypoints21, image, camera_matrix)
-
- return self.wrap(image, scoremaps, keypoints21)
-
-
-class ExtractHandPose(Processor):
- def __init__(self, size, image_size, crop_size, variance):
- super(ExtractHandPose, self).__init__()
- self.unwrap_inputs = pr.UnpackDictionary(
- ['image', 'segmentation_label', 'annotations'])
- self.preprocess_image = pr.SequentialProcessor(
- [pr.LoadImage(),
- pr.ResizeImage((size, size))])
-
- self.preprocess_segmentation_map = pr.SequentialProcessor(
- [pr.LoadImage(),
- pr.ResizeImage((size, size)),
- ExtractHandmask()])
-
- self.extract_annotations = pr.UnpackDictionary(['xyz', 'uv_vis', 'K'])
- self.extract_2D_keypoints = Extract2DKeypoints()
- self.keypoints_to_palm = KeypointstoPalmFrame()
- self.visibility_to_palm = TransformVisibilityMask()
- self.extract_hand_side = ExtractHandsideandKeypoints()
- self.to_one_hot = ToOneHot(num_classes=2)
- self.normaliza_keypoints = NormalizeKeypoints()
- self.to_relative_frame = TransformtoRelativeFrame()
- self.canonical_transformations = GetCanonicalTransformation()
- self.flip_right_hand = FlipRightHandToLeftHand()
- self.get_matrix_inverse = CalculatePseudoInverse()
-
- self.extract_hand_visibility = ExtractDominantHandVisibility()
- self.extract_dominant_keypoints = ExtractDominantKeypoints2D()
-
- self.crop_image_from_mask = CropImageFromMask()
- self.create_scoremaps = CreateScoremaps(
- image_size=image_size, crop_size=crop_size, variance=variance)
-
- self.wrap = pr.WrapOutput(
- ['score_maps', 'hand_side', 'keypoints3D', 'rotation_matrix'])
-
- def call(self, inputs, use_palm_coordinates, crop_image,
- flip_right_hand=False):
- image, segmentation_label, annotations = self.unwrap_inputs(inputs)
-
- image = self.preprocess_image(image)
- segmentation_label = self.preprocess_segmentation_map(
- segmentation_label)
- keypoints3D, keypoints2D, camera_matrix = self.extract_annotations(
- annotations)
- keypoints2D, keypoints_visibility_mask = self.extract_2D_keypoints(
- keypoints2D)
-
- if use_palm_coordinates:
- keypoints2D = self.keypoints_to_palm(keypoints2D)
- keypoints3D = self.keypoints_to_palm(keypoints3D)
- keypoints_visibility_mask = self.visibility_to_palm(
- keypoints_visibility_mask)
-
- hand_side, keypoints3D, dominant_hand = self.extract_hand_side(
- segmentation_label, keypoints3D)
-
- hand_side_one_hot = self.to_one_hot(hand_side)
-
- keypoint_scale, keypoints3D = self.normaliza_keypoints(keypoints3D)
- keypoints3D = self.to_relative_frame(keypoints3D)
- keypoints3D, canonical_rotation_matrix = self.canonical_transformations(
- keypoints3D)
-
- if flip_right_hand:
- keypoints3D = self.flip_right_hand(keypoints3D)
-
- canonical_rotation_matrix = self.get_matrix_inverse(
- canonical_rotation_matrix)
-
- visible_keypoints = self.extract_hand_visibility(
- keypoints_visibility_mask, dominant_hand)
- dominant_keypoints = self.extract_dominant_keypoints(
- keypoints2D, dominant_hand)
-
- if crop_image:
- scale, image, visible_keypoints, camera_matrix = \
- self.crop_image_from_mask(
- visible_keypoints, dominant_keypoints, image, camera_matrix)
- scoremaps = self.create_scoremaps(
- canonical_rotation_matrix, visible_keypoints)
-
- return self.wrap(scoremaps, hand_side_one_hot, keypoints3D,
- canonical_rotation_matrix)
-
-
-class Process2DKeypoints(SequentialProcessor):
- def __init__(self, PoseNet):
- super(Process2DKeypoints, self).__init__()
- self.add(pr.ExpandDims(0))
- self.add(pr.Predict(PoseNet))
-
-
-class PostProcessKeypoints(SequentialProcessor):
- def __init__(self, number_of_keypoints=21):
- super(PostProcessKeypoints, self).__init__()
- self.add(pr.UnpackDictionary(['canonical_coordinates',
- 'rotation_parameters', 'hand_side']))
- self.add(pr.ControlMap(RotationMatrixfromAxisAngles(), [1], [1]))
- self.add(pr.ControlMap(CanonicaltoRelativeFrame(number_of_keypoints),
- [0, 1, 2], [0]))
-
-
-class PostProcessSegmentation(Processor):
- def __init__(self, image_size=320, crop_shape=(256, 256)):
- super(PostProcessSegmentation, self).__init__()
- self.unpack_inputs = pr.UnpackDictionary(['image',
- 'raw_segmentation_map'])
- self.resize_segmentation_map = ResizeImageWithLinearInterpolation(
- shape=(image_size, image_size))
- self.dilate_map = SegmentationDilation()
- self.extract_box = ExtractBoundingbox()
- self.adjust_crop_size = AdjustCropSize()
- self.crop_image = CropImage(crop_shape[0])
- self.expand_dims = pr.ExpandDims(axis=0)
- self.squeeze_input = pr.Squeeze(axis=0)
-
- def call(self, inputs):
- image, raw_segmentation_map = self.unpack_inputs(inputs)
- raw_segmentation_map = self.squeeze_input(raw_segmentation_map)
- raw_segmentation_map = self.resize_segmentation_map(
- raw_segmentation_map)
- segmentation_map = self.dilate_map(raw_segmentation_map)
- if not np.count_nonzero(segmentation_map):
- return None
- center, bounding_box, crop_size = self.extract_box(segmentation_map)
- crop_size = self.adjust_crop_size(crop_size)
- cropped_image = self.crop_image(image, center, crop_size)
- return cropped_image, segmentation_map, center, bounding_box, crop_size
-
-
-class ResizeScoreMaps(Processor): # Change to Sequential processor
- def __init__(self, crop_shape=(256, 256)):
- super(ResizeScoreMaps, self).__init__()
- self.unpack_inputs = pr.UnpackDictionary(['score_maps'])
- self.crop_shape = crop_shape
- self.squeeze = pr.Squeeze(axis=0)
- self.transpose = TransposeOfArray()
- self.resize_scoremap = pr.ResizeImages(crop_shape)
- self.list_to_array = ListToArray()
- self.expand_dims = pr.ExpandDims(axis=0)
-
- def call(self, input):
- scoremaps = self.unpack_inputs(input)
- scoremaps = self.squeeze(scoremaps)
- scoremaps_transposed = self.transpose(scoremaps)
- scoremaps_resized = self.resize_scoremap(scoremaps_transposed)
- scoremaps_resized = self.list_to_array(scoremaps_resized)
- scoremaps_transposed = self.transpose(scoremaps_resized)
- return scoremaps_transposed
-
-
-class DetectHandKeypoints(Processor):
- def __init__(self, handsegnet, posenet, posepriornet, viewpointnet,
- image_size=320, crop_shape=(256, 256), num_keypoints=21):
- super(DetectHandKeypoints, self).__init__()
-
- self.preprocess_image = SequentialProcessor(
- [pr.NormalizeImage(), pr.ResizeImage((image_size, image_size)),
- pr.ExpandDims(0)])
- postprocess_segmentation = PostProcessSegmentation(image_size,
- crop_shape)
- self.localize_hand = pr.Predict(handsegnet,
- postprocess=postprocess_segmentation)
-
- self.resize_scoremaps = ResizeScoreMaps(crop_shape)
- self.merge_dictionaries = MergeDictionaries()
- self.wrap_input = WrapToDictionary(['hand_side'])
-
- self.predict_keypoints2D = pr.Predict(posenet)
- self.predict_keypoints3D = pr.Predict(posepriornet)
- self.predict_keypoints_angles = pr.Predict(viewpointnet)
- self.postprocess_keypoints = PostProcessKeypoints()
- self.resize = pr.ResizeImage(shape=crop_shape)
- self.extract_2D_keypoints = ExtractKeypoints()
- self.transform_keypoints = TransformKeypoints()
- self.draw_keypoint = pr.DrawKeypoints2D(num_keypoints, normalized=True,
- radius=4)
- self.denormalize = pr.DenormalizeImage()
- self.wrap = pr.WrapOutput(['image', 'keypoints2D', 'keypoints3D'])
- self.expand_dims = pr.ExpandDims(axis=0)
- self.draw_boxes = pr.DrawBoxes2D(['hand'], [[0, 1, 0]])
-
- def call(self, input_image, hand_side=np.array([[1.0, 0.0]])):
- image = self.preprocess_image(input_image)
- hand_features = self.localize_hand(image)
- if hand_features is None:
- output = self.wrap(input_image.astype('uint8'), None, None)
- return output
- hand_crop, segmentation_map, center, box, crop_size_best = hand_features
- box = Box2D(box, score=1.0, class_name='hand')
- image = self.draw_boxes(np.squeeze(image), [box])
- hand_crop = self.expand_dims(hand_crop)
- score_maps = self.predict_keypoints2D(hand_crop)
- score_maps_resized = self.resize_scoremaps(score_maps)
- hand_side = {'hand_side': hand_side}
- score_maps = self.merge_dictionaries([score_maps, hand_side])
- keypoints_2D = self.extract_2D_keypoints(score_maps_resized)
- rotation_parameters = self.predict_keypoints3D(score_maps)
- viewpoints = self.predict_keypoints_angles(score_maps)
- canonical_keypoints = self.merge_dictionaries([rotation_parameters,
- viewpoints])
- keypoints3D = self.postprocess_keypoints(canonical_keypoints)
- keypoints2D = self.transform_keypoints(keypoints_2D, center,
- crop_size_best, 256)
- image = self.draw_keypoint(np.squeeze(image), keypoints2D)
- image = self.denormalize(image)
- output = self.wrap(image.astype('uint8'), keypoints2D, keypoints3D)
- return output
diff --git a/examples/hand_pose_estimation/processors_SE3.py b/examples/hand_pose_estimation/processors_SE3.py
deleted file mode 100644
index 937c58c60..000000000
--- a/examples/hand_pose_estimation/processors_SE3.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import numpy as np
-
-from backend_SE3 import rotation_from_axis_angles
-from backend_keypoints import canonical_to_relative_coordinates
-from backend_keypoints import canonical_transformations_on_keypoints
-from backend_keypoints import keypoint_to_root_frame
-from backend_keypoints import keypoints_to_palm_coordinates
-from backend_keypoints import transform_cropped_keypoints
-from backend_keypoints import transform_visibility_mask
-from paz.abstract import Processor
-
-
-class TransformKeypoints(Processor):
- """ Transform the keypoint from cropped image frame to original image
- frame"""
-
- def __init__(self):
- super(TransformKeypoints, self).__init__()
-
- def call(self, cropped_keypoints, centers, scale, crop_size):
- keypoints_2D = transform_cropped_keypoints(cropped_keypoints, centers,
- scale, crop_size)
- return keypoints_2D
-
-
-class KeypointstoPalmFrame(Processor):
- """Translate to Wrist Coordinates.
- """
-
- def __init__(self):
- super(KeypointstoPalmFrame, self).__init__()
-
- def call(self, keypoints):
- return keypoints_to_palm_coordinates(keypoints=keypoints)
-
-
-class TransformVisibilityMask(Processor):
- """Tranform Visibility Mask to palm coordinates.
- """
-
- def __init__(self):
- super(TransformVisibilityMask, self).__init__()
-
- def call(self, visibility_mask):
- return transform_visibility_mask(visibility_mask)
-
-
-class TransformtoRelativeFrame(Processor):
- """Transform to Relative Frame."""
-
- def __init__(self):
- super(TransformtoRelativeFrame, self).__init__()
-
- def call(self, keypoints3D):
- return keypoint_to_root_frame(keypoints3D)
-
-
-class GetCanonicalTransformation(Processor):
- """Extract Canonical Transformation matrix. To transform keypoints to
- palm frame inorder to make them rotationally invariant
- """
-
- def __init__(self):
- super(GetCanonicalTransformation, self).__init__()
-
- def call(self, keypoints3D):
- return canonical_transformations_on_keypoints(keypoints3D)
-
-
-class CalculatePseudoInverse(Processor):
- """ Perform Pseudo Inverse of the matrix"""
-
- def __init__(self):
- super(CalculatePseudoInverse, self).__init__()
-
- def call(self, matrix):
- return np.linalg.pinv(matrix)
-
-
-class RotationMatrixfromAxisAngles(Processor):
- """ Get Rotation matrix from the axis angles"""
-
- def __init__(self):
- super(RotationMatrixfromAxisAngles, self).__init__()
-
- def call(self, rotation_angles):
- return rotation_from_axis_angles(rotation_angles)
-
-
-class CanonicaltoRelativeFrame(Processor):
- """ Transform the keypoints from Canonical coordinates to chosen relative (
- wrist or palm) coordinates. To make keypoints rotationally invariant """
-
- def __init__(self, num_keypoints=21):
- super(CanonicaltoRelativeFrame, self).__init__()
- self.num_keypoints = num_keypoints
-
- def call(self, canonical_coordinates, rotation_matrix, hand_side):
- canonical_coordinates = canonical_coordinates.reshape((21, 3))
- keypoints = canonical_to_relative_coordinates(
- self.num_keypoints, canonical_coordinates, rotation_matrix,
- hand_side)
- return keypoints
diff --git a/examples/hand_pose_estimation/processors_keypoints.py b/examples/hand_pose_estimation/processors_keypoints.py
deleted file mode 100644
index adb376c47..000000000
--- a/examples/hand_pose_estimation/processors_keypoints.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import numpy as np
-
-from backend_keypoints import create_score_maps, extract_2D_keypoints
-from backend_keypoints import crop_image_from_coordinates, extract_keypoints
-from backend_keypoints import crop_image_from_mask, extract_hand_segment
-from backend_keypoints import extract_bounding_box, find_max_location
-from backend_keypoints import extract_dominant_hand_visibility
-from backend_keypoints import extract_dominant_keypoints2D
-from backend_keypoints import flip_right_to_left_hand
-from backend_keypoints import get_hand_side_and_keypooints
-from backend_keypoints import normalize_keypoints
-
-from paz.abstract import Processor
-
-
-class ExtractHandmask(Processor):
- """Extract Hand mask from the segmentation label provided. The pixels
- with value greater than 1 belongs to hands
- """
-
- def __init__(self):
- super(ExtractHandmask, self).__init__()
-
- def call(self, segmentation_label):
- return extract_hand_segment(segmentation_label=segmentation_label)
-
-
-class ExtractHandsideandKeypoints(Processor):
- """Extract Hand Side by counting the number of pixels belonging to each
- hand.
- """
-
- def __init__(self):
- super(ExtractHandsideandKeypoints, self).__init__()
-
- def call(self, hand_parts_mask, keypoints3D):
- return get_hand_side_and_keypooints(hand_parts_mask, keypoints3D)
-
-
-class NormalizeKeypoints(Processor):
- """Normalize KeyPoints.
- """
-
- def __init__(self):
- super(NormalizeKeypoints, self).__init__()
-
- def call(self, keypoints3D):
- return normalize_keypoints(keypoints3D)
-
-
-class FlipRightHandToLeftHand(Processor):
- """Flip Right hand keypoints to Left hand keypoints.
- """
-
- def __init__(self, flip_to_left=True):
- super(FlipRightHandToLeftHand, self).__init__()
- self.flip_to_left = flip_to_left
-
- def call(self, keypoints3D):
- return flip_right_to_left_hand(keypoints3D, self.flip_to_left)
-
-
-class ExtractDominantHandVisibility(Processor):
- """Extract hand Visibility of Left or Right hand based on the
- dominant_hand flag.
- """
-
- def __init__(self):
- super(ExtractDominantHandVisibility, self).__init__()
-
- def call(self, keypoint_visibility, dominant_hand):
- return extract_dominant_hand_visibility(keypoint_visibility,
- dominant_hand)
-
-
-class ExtractDominantKeypoints2D(Processor):
- """Extract hand keypoints of Left or Right hand based on the
- dominant_hand flag.
- """
-
- def __init__(self):
- super(ExtractDominantKeypoints2D, self).__init__()
-
- def call(self, keypoint_visibility, dominant_hand):
- return extract_dominant_keypoints2D(keypoint_visibility,
- dominant_hand)
-
-
-class CropImageFromMask(Processor):
- """Crop Image from Mask.
- """
-
- def __init__(self, image_size=(320, 320, 3), crop_size=256):
- super(CropImageFromMask, self).__init__()
- self.image_size = image_size
- self.crop_size = crop_size
-
- def call(self, keypoints, keypoint_visibility, image, camera_matrix):
- return crop_image_from_mask(keypoints, keypoint_visibility, image,
- self.image_size, self.crop_size,
- camera_matrix)
-
-
-class CreateScoremaps(Processor):
- """Create Gaussian Score maps representing 2D Keypoints.
- image_size: Size of the input image
- crop_size: Cropped Image size
- variance: variance of the gaussian scoremap to be generated
- """
-
- def __init__(self, image_size, crop_size, variance):
- super(CreateScoremaps, self).__init__()
- self.image_size = image_size
- self.crop_size = crop_size
- self.variance = variance
-
- def call(self, keypoints2D, keypoints_visibility):
- return create_score_maps(keypoints2D, keypoints_visibility,
- self.image_size, self.crop_size, self.variance)
-
-
-class Extract2DKeypoints(Processor):
- """ Extract the keyppoints based on the visibility of the hand"""
-
- def __init__(self):
- super(Extract2DKeypoints, self).__init__()
-
- def call(self, keypoint_visibility):
- return extract_2D_keypoints(keypoint_visibility)
-
-
-class ExtractBoundingbox(Processor):
- """ Extract bounding box from a binary mask"""
-
- def __init__(self):
- super(ExtractBoundingbox, self).__init__()
-
- def call(self, binary_hand_mask):
- return extract_bounding_box(binary_hand_mask)
-
-
-class AdjustCropSize(Processor):
- """ Adjust the crop size with a buffer of scale 0.25 added"""
-
- def __init__(self, crop_size=256):
- super(AdjustCropSize, self).__init__()
- self.crop_size = crop_size
-
- def call(self, crop_size_best):
- crop_size_best = crop_size_best.astype(dtype=np.float64)
- crop_size_best = crop_size_best * 1.25
- scaled_crop = np.maximum(self.crop_size / crop_size_best, 0.25)
- scaled_crop = np.minimum(scaled_crop, 5.0)
- return scaled_crop
-
-
-class CropImage(Processor):
- """ Crop the input image provided the location, output image size and the
- scaling of the output image"""
-
- def __init__(self, crop_size=256):
- super(CropImage, self).__init__()
- self.crop_size = crop_size
-
- def call(self, image, crop_location, scale):
- return crop_image_from_coordinates(image, crop_location, self.crop_size,
- scale)
-
-
-class ExtractKeypoints(Processor):
- """ Extract keypoints when provided with a predicted scoremap"""
-
- def __init__(self):
- super(ExtractKeypoints, self).__init__()
-
- def call(self, keypoint_scoremaps):
- return extract_keypoints(keypoint_scoremaps)
-
-
-class FindMaxLocation(Processor):
- """ Find the brightest point in the score map, which is represented as a
- keypoint"""
-
- def __init__(self):
- super(FindMaxLocation, self).__init__()
-
- def call(self, scoremaps):
- keypoints_2D = find_max_location(scoremaps)
- return keypoints_2D
diff --git a/examples/hand_pose_estimation/processors_standard.py b/examples/hand_pose_estimation/processors_standard.py
deleted file mode 100644
index 9061217ce..000000000
--- a/examples/hand_pose_estimation/processors_standard.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import numpy as np
-from paz.abstract import Processor
-from backend_standard import wrap_as_dictionary, merge_dictionaries
-from backend_standard import resize_image_with_linear_interpolation
-from paz.backend.boxes import to_one_hot
-
-
-class WrapToDictionary(Processor):
- """ Wrap the input values to a dictionary with already provided key
- values """
-
- def __init__(self, keys):
- super(WrapToDictionary, self).__init__()
- if not isinstance(keys, list):
- keys = list(keys)
- self.keys = keys
-
- def call(self, values):
- if not isinstance(values, list):
- values = list(values)
- return wrap_as_dictionary(self.keys, values)
-
-
-class MergeDictionaries(Processor):
- """ Merge two dictionaries into one"""
-
- def __init__(self):
- super(MergeDictionaries, self).__init__()
-
- def call(self, dicts):
- return merge_dictionaries(dicts)
-
-
-class ToOneHot(Processor):
- """Extract Hand mask."""
-
- def __init__(self, num_classes=2):
- super(ToOneHot, self).__init__()
- self.num_classes = num_classes
-
- def call(self, class_indices):
- return to_one_hot(class_indices, self.num_classes)
-
-
-class ResizeImageWithLinearInterpolation(Processor):
- def __init__(self, shape):
- self.shape = shape
- super(ResizeImageWithLinearInterpolation, self).__init__()
-
- def call(self, image):
- return resize_image_with_linear_interpolation(image, self.shape)
-
-
-class TransposeOfArray(Processor):
- def __init__(self):
- super(TransposeOfArray, self).__init__()
-
- def call(self, array):
- return array.T
-
-
-class ListToArray(Processor):
- def __init__(self):
- super(ListToArray, self).__init__()
-
- def call(self, input):
- return np.array(input)
\ No newline at end of file
diff --git a/examples/hand_pose_estimation/test_data_loaders.py b/examples/hand_pose_estimation/test_data_loaders.py
deleted file mode 100644
index a82d6c7e6..000000000
--- a/examples/hand_pose_estimation/test_data_loaders.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from data_loaders import HandPoseLoader
-from backend import to_homogeneous_coordinates, normalize_keypoints
-
-data_loader = HandPoseLoader(
- '/home/dfki.uni-bremen.de/jbandlamudi/DFKI_Work/RHD_published_v2/')
-
-
-def test_image_loading(image_path):
- image = data_loader.load_images(image_path)
- assert image.shape == data_loader.image_size
-
-
-def test_segmentation_map_loading(segmentation_path):
- segmentation_mask = data_loader.load_images(segmentation_path)
- assert segmentation_mask.shape == data_loader.image_size
-
-
-def test_conversion_to_homogeneous_coordinates(vector):
- homogeneous_vector = to_homogeneous_coordinates(vector)
- assert len(homogeneous_vector) == 4
-
-
-def test_keypoint_normalization(keypoints):
- keypoint_scale, norm_keypoints = normalize_keypoints(keypoints)
diff --git a/examples/hand_pose_estimation/test_pipeline.py b/examples/hand_pose_estimation/test_pipeline.py
deleted file mode 100644
index 097139f7d..000000000
--- a/examples/hand_pose_estimation/test_pipeline.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from HandPoseEstimation import HandSegmentationNet, PosePriorNet, PoseNet
-from HandPoseEstimation import ViewPointNet
-from paz.backend.image.opencv_image import load_image, show_image, write_image
-from pipelines import DetectHandKeypoints
-
-use_pretrained = True
-HandSegNet = HandSegmentationNet()
-HandPoseNet = PoseNet()
-HandPosePriorNet = PosePriorNet()
-HandViewPointNet = ViewPointNet()
-
-pipeline = DetectHandKeypoints(HandSegNet, HandPoseNet, HandPosePriorNet,
- HandViewPointNet)
-
-image = load_image('./sample.jpg')
-detection = pipeline(image)
-
-show_image(detection['image'].astype('uint8'))
-write_image('./detection.jpg', detection['image'].astype('uint8'))
diff --git a/examples/hand_pose_estimation/train_handsegnet.py b/examples/hand_pose_estimation/train_handsegnet.py
deleted file mode 100644
index 14cb84bf5..000000000
--- a/examples/hand_pose_estimation/train_handsegnet.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import os
-import json
-import argparse
-from datetime import datetime
-
-import tensorflow as tf
-
-gpus = tf.config.experimental.list_physical_devices('GPU')
-tf.config.experimental.set_memory_growth(gpus[0], True)
-
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.callbacks import CSVLogger, EarlyStopping
-from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
-from tensorflow.keras.losses import CategoricalCrossentropy
-
-from paz.abstract import ProcessingSequence
-from pipelines import AugmentHandSegmentation
-from HandPoseEstimation import Hand_Segmentation_Net
-from hand_keypoints_loader import RenderedHandLoader
-from utils import load_pretrained_weights
-
-description = 'Training script for semantic segmentation'
-parser = argparse.ArgumentParser(description=description)
-parser.add_argument('--dataset_path', type=str, help='Path to dataset')
-parser.add_argument('-p', '--save_path', default='experiments', type=str,
- help='Path for saving evaluations')
-parser.add_argument('-d', '--dataset', default='RHD', type=str,
- choices=['RHD'])
-parser.add_argument('-b', '--batch_size', default=5, type=int,
- help='Batch size used during optimization')
-parser.add_argument('-e', '--epochs', default=100, type=int,
- help='Number of epochs before finishing')
-parser.add_argument('-o', '--stop_patience', default=5, type=int,
- help='Early stop patience')
-parser.add_argument('-u', '--reduce_patience', default=2, type=int,
- help='Reduce learning rate patience')
-parser.add_argument('-l', '--run_label', default='RUN_00', type=str,
- help='Label used to distinguish between different runs')
-parser.add_argument('-s', '--evaluation_splits', nargs='+', type=str,
- default=['test'], help='Splits used for evaluation')
-parser.add_argument('-v', '--validation_split', default='val', type=str,
- help='Split used for validation')
-parser.add_argument('-t', '--time', type=str,
- default=datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
-parser.add_argument('-a', '--activation', type=str, default='softmax',
- help='Final activation function')
-parser.add_argument('-z', '--image_size', default=320, type=int,
- help='Image size. Value is applied to height and width')
-parser.add_argument('-w', '--load_pretrained_weights', default=True, type=bool,
- help='If True, load pre-trained weights')
-parser.add_argument('-wp', '--pretrained_weights_path',
- default='./person_net.ckpt.meta', type=str,
- help='Path to pre-trained weights')
-
-args = parser.parse_args()
-
-model = Hand_Segmentation_Net()
-loss = CategoricalCrossentropy(from_logits=True)
-
-splits = ['train'] + args.validation_split
-
-name_to_manager = {'RHD': RenderedHandLoader}
-
-data_managers, datasets = {}, {}
-for split in splits:
- args_manager = [args.dataset_path]
- data_manager = name_to_manager[args.dataset](*args_manager)
- data_managers[split] = data_manager
- datasets[split] = data_manager.load_data()
-
-# instantiating sequencers
-sequencers = {}
-for split in splits:
- data_manager = data_managers[split]
- image_shape = (args.image_size, args.image_size)
- processor = AugmentHandSegmentation(image_shape)
- sequencers[split] = ProcessingSequence(
- processor, args.batch_size, datasets[split])
-
-model = Hand_Segmentation_Net()
-loss = CategoricalCrossentropy(from_logits=True)
-
-model.compile(loss=loss, optimizer=Adam(), metrics=['mean_squared_error'])
-
-if args.load_pretrained_weights:
- model = load_pretrained_weights(args.pretrained_weights_path, model=model,
- num_layers=16)
-
-# creating directory for experiment
-callbacks = []
-experiment_label = '_'.join([args.dataset, model.name, args.run_label])
-experiment_path = os.path.join(args.save_path, experiment_label)
-if not os.path.exists(experiment_path):
- os.makedirs(experiment_path)
-
-# setting additional callbacks
-log = CSVLogger(os.path.join(experiment_path, 'optimization.log'))
-stop = EarlyStopping(patience=args.stop_patience)
-plateau = ReduceLROnPlateau(patience=args.reduce_patience)
-save_filename = os.path.join(experiment_path, 'model.hdf5')
-save = ModelCheckpoint(save_filename, save_best_only=True)
-callbacks.extend([log, stop, save, plateau])
-
-# saving hyper-parameters and model summary
-with open(os.path.join(experiment_path, 'hyperparameters.json'), 'w') as filer:
- json.dump(args.__dict__, filer, indent=4)
-with open(os.path.join(experiment_path, 'model_summary.txt'), 'w') as filer:
- model.summary(print_fn=lambda x: filer.write(x + '\n'))
-
-# starting optimization
-model.fit(
- sequencers['train'],
- epochs=args.epochs,
- validation_data=sequencers[args.validation_split],
- callbacks=callbacks,
- verbose=1,
- workers=1,
- use_multiprocessing=False)
-
-# saving using model tf
-save_filename = os.path.join(experiment_path, 'model.tf')
-model.save_weights(save_filename, save_format='tf')
diff --git a/examples/hand_pose_estimation/unit_tests.py b/examples/hand_pose_estimation/unit_tests.py
deleted file mode 100644
index 578db26b4..000000000
--- a/examples/hand_pose_estimation/unit_tests.py
+++ /dev/null
@@ -1,277 +0,0 @@
-from backend_SE3 import build_rotation_matrix_x, build_rotation_matrix_y
-from backend_SE3 import build_rotation_matrix_z, build_affine_matrix
-from backend_SE3 import rotation_from_axis_angles
-from backend_SE3 import to_homogeneous_coordinates, build_translation_matrix_SE3
-
-from backend_keypoints import canonical_transformations_on_keypoints
-from backend_keypoints import get_hand_side_and_keypooints
-from backend_keypoints import keypoints_to_palm_coordinates
-from backend_keypoints import normalize_keypoints, extract_hand_side_keypoints
-from RHDv2 import LEFT_WRIST
-from RHDv2 import RIGHT_WRIST
-from hand_keypoints_loader import RenderedHandLoader
-from paz.backend.boxes import to_one_hot
-from processors_standard import TransposeOfArray, ListToArray
-
-import paz.processors as pr
-from paz.processors import SequentialProcessor
-
-data_loader = RenderedHandLoader(
- '/media/jarvis/CommonFiles/5th_Semester/DFKI_Work/RHD_published_v2/')
-
-from HandPoseEstimation import HandSegmentationNet, PosePriorNet, PoseNet
-from HandPoseEstimation import ViewPointNet
-import numpy as np
-from pipelines import PostProcessSegmentation, \
- Process2DKeypoints
-from paz.backend.image.opencv_image import load_image
-from backend_keypoints import create_multiple_gaussian_map
-from processors_keypoints import ExtractKeypoints
-
-np.random.seed(0)
-
-use_pretrained = True
-HandSegNet = HandSegmentationNet()
-HandPoseNet = PoseNet()
-HandPosePriorNet = PosePriorNet()
-HandViewPointNet = ViewPointNet()
-
-
-def test_keypoints_to_palm_coordinates():
- keypoints = np.arange(0, 123).reshape((41, 3))
- keypoint_palm = keypoints_to_palm_coordinates(keypoints)
- assert keypoint_palm[LEFT_WRIST, :].all() == np.array([
- [18., 19., 20.]]).all()
- assert keypoint_palm[RIGHT_WRIST, :].all() == np.array([
- [81., 82., 83.]]).all()
-
-
-def test_one_hot_encode():
- one_hot_vector = to_one_hot([1], 2)
- assert type(one_hot_vector).__module__ == np.__name__
- assert one_hot_vector.all() == np.array([0, 1]).all()
- assert to_one_hot([0], 2).all() == np.array([1, 0]).all()
-
-
-def test_normalize_keypoints():
- test_array = np.array([[0., 0., 0.], [1., 1., 1.], [1., 1., 1.],
- [2., 2., 2.], [2., 2., 2.], [3., 3., 3.],
- [3., 3., 3.], [4., 4., 4.], [5., 5., 5.],
- [5., 5., 5.], [6., 6., 6.], [6., 6., 6.],
- [7., 7., 7.], [8., 8., 8.], [8., 8., 8.],
- [9., 9., 9.], [9., 9., 9.], [10., 10., 10.],
- [10., 10., 10.], [11., 11., 11.], [12., 12., 12.]])
- keypoints3D = np.random.rand(21, 3)
- keypoint_scale, keypoint_normalized = normalize_keypoints(keypoints3D)
- assert round(keypoint_scale, 2) == 0.68
- assert keypoints3D.shape == keypoint_normalized.shape
- assert keypoint_normalized.round().all() == test_array.all()
-
-
-def test_extracting_handside():
- keypoints3D = np.random.rand(42, 3)
- left_keypoints = extract_hand_side_keypoints(keypoints3D, 0)
- right_keypoints = extract_hand_side_keypoints(keypoints3D, 1)
- assert left_keypoints.shape == (21, 3)
- assert right_keypoints.shape == (21, 3)
-
-
-def test_to_homogeneous():
- vector_shape = (1, 3)
- keypoint = np.zeros(vector_shape)
- homogeneous_keypoint = to_homogeneous_coordinates(keypoint)
- assert homogeneous_keypoint[-1] == 1
- assert homogeneous_keypoint.shape == (vector_shape[1] + 1,)
-
-
-def test_to_translation_1D():
- translation_matrix = build_translation_matrix_SE3([1])
-
- assert translation_matrix.shape == (1, 4, 4)
- assert translation_matrix[-1].all() == np.array([0, 0, 0, 1]).all()
-
-
-def test_to_translation_3D():
- translation_matrix = build_translation_matrix_SE3([1, 2, 3])
-
- assert translation_matrix[:, :, -1].all() == np.array([[1, 2, 3, 1]]).all()
- assert translation_matrix.shape == (1, 4, 4)
- assert translation_matrix[-1].all() == np.array([0, 0, 0, 1]).all()
-
-
-def test_to_affine_matrix():
- matrix = np.arange(0, 9).reshape((3, 3))
- affine_matrix = build_affine_matrix(matrix)
-
- assert matrix.shape == (3, 3)
- assert affine_matrix.shape == (4, 4)
-
-
-def test_rotation_matrix_x():
- rotation_matrix_test = np.array([[1.0000000, 0.0000000, 0.0000000],
- [0.0000000, 0.8668, 0.5],
- [0.0000000, -0.5, 0.8668]])
- rotation_matrix = build_rotation_matrix_x(np.deg2rad(30))
- assert rotation_matrix.shape == rotation_matrix_test.shape
- assert np.round(np.linalg.det(rotation_matrix)) == 1.0
- assert np.round(np.linalg.inv(rotation_matrix)).all() == \
- np.round(np.transpose(rotation_matrix)).all()
- assert rotation_matrix_test.round().all() == \
- rotation_matrix.round().all()
-
-
-def test_rotation_matrix_y():
- rotation_matrix_test = np.array([[0.8660254, 0.0000000, 0.5000000],
- [0.0000000, 1.0000000, 0.0000000],
- [-0.5000000, 0.0000000, 0.8660254]])
- rotation_matrix = build_rotation_matrix_y(np.deg2rad(30))
- assert rotation_matrix.shape == rotation_matrix_test.shape
- assert np.round(np.linalg.det(rotation_matrix)) == 1.0
- assert np.round(np.linalg.inv(rotation_matrix)).all() == \
- np.round(np.transpose(rotation_matrix)).all()
- assert rotation_matrix_test.round().all() == \
- rotation_matrix.round().all()
-
-
-def test_rotation_matrix_z():
- rotation_matrix_test = np.array([[0.8660254, -0.5000000, 0.0000000],
- [0.5000000, 0.8660254, 0.0000000],
- [0.0000000, 0.0000000, 1.0000000]])
- rotation_matrix = build_rotation_matrix_z(np.deg2rad(30))
- assert rotation_matrix.shape == rotation_matrix_test.shape
- assert np.round(np.linalg.det(rotation_matrix)) == 1.0
- assert np.round(np.linalg.inv(rotation_matrix)).all() == \
- np.round(np.transpose(rotation_matrix)).all()
- assert rotation_matrix_test.round().all() == \
- rotation_matrix.round().all()
-
-
-def test_rotation_matrix_axis_angles():
- rotation_matrix_test = np.array([[0.739, -0.406, 0.536],
- [0.536, 0.837, -0.1],
- [-0.4, 0.36, 0.837]])
- rotation_matrix = rotation_from_axis_angles(np.deg2rad([15, 30, 30]))
- print(rotation_matrix)
- assert rotation_matrix.shape == rotation_matrix_test.shape
- assert np.round(np.linalg.det(rotation_matrix)) == 1.0
- assert np.round(np.linalg.inv(rotation_matrix)).all() == \
- np.round(np.transpose(rotation_matrix)).all()
- assert rotation_matrix_test.round().all() == \
- rotation_matrix.round().all()
-
-
-def test_get_affine_matrix():
- rotation_matrix = build_rotation_matrix_x(np.deg2rad(30))
- affine_rotation_matrix = build_affine_matrix(rotation_matrix)
- assert affine_rotation_matrix.shape == (4, 4)
- assert affine_rotation_matrix[-1].all() == np.array([0, 0, 0, 1]).all()
-
-
-def test_hand_side_extraction(segmentation_path, label_path):
- segmentation_mask = data_loader.load_images(segmentation_path)
- annotations_all = data_loader._load_annotation(label_path)
- keypoints3D = data_loader.process_keypoints_3D(annotations_all[11]['xyz'])
- hand_side, hand_side_keypoints, dominant_hand_keypoints = \
- get_hand_side_and_keypooints(segmentation_mask, keypoints3D)
-
- assert type(hand_side).__module__ == np.__name__
- assert hand_side == np.array([0])
- assert hand_side_keypoints.shape == (21, 3)
- assert dominant_hand_keypoints.shape == (21, 3)
-
-
-def test_canonical_transformations(label_path):
- annotations_all = data_loader._load_annotation(label_path)
- keypoints3D = data_loader.process_keypoints_3D(annotations_all[11]['xyz'])
- transformed_keypoints, rotation_matrix = canonical_transformations_on_keypoints(
- keypoints3D.T)
-
- assert transformed_keypoints.shape == (42, 3)
- assert rotation_matrix.shape == (3, 3)
-
-
-def test_preprocess_image():
- preprocess_pipeline = SequentialProcessor(
- [pr.NormalizeImage(), pr.ResizeImage((320, 320)), pr.ExpandDims(0)])
- image = load_image('./sample.jpg')
- processed_image = preprocess_pipeline(image)
-
- assert len(processed_image.shape) == 4
- assert processed_image.shape == (1, 320, 320, 3)
-
-
-def test_image_cropping():
- handsegnet = HandSegmentationNet()
- preprocess_image = SequentialProcessor(
- [pr.NormalizeImage(), pr.ResizeImage((320, 320)),
- pr.ExpandDims(0)])
-
- postprocess_segmentation = PostProcessSegmentation(
- 320, 320)
-
- localize_hand = pr.Predict(handsegnet, preprocess_image,
- postprocess_segmentation)
- image = load_image('./sample.jpg')
- hand_crop, segmentation_map, center, boxes, crop_sizes = localize_hand(
- image)
- box = boxes[0]
- xmin, ymin, xmax, ymax = box
- crop_size = crop_sizes[0]
-
- assert len(hand_crop.shape) == 4
- assert hand_crop.shape == (1, 256, 256, 3)
- assert len(segmentation_map.shape) == 4
- assert segmentation_map.shape == (1, 320, 320, 1)
- assert center == [[191.5, 194.5]]
- assert len(box) == 4
- assert box == [114, 153, 269, 236]
- assert xmax > xmin and ymin > ymax
- assert round(crop_size[0], 2) == 1.32
-
-
-def test_segmentation_postprocess():
- preprocess_pipeline = SequentialProcessor(
- [pr.NormalizeImage(), pr.ResizeImage((320, 320)), pr.ExpandDims(0)])
- image = load_image('./sample.jpg')
- processed_image = preprocess_pipeline(image)
-
- localization_pipeline = PostProcessSegmentation(HandSegNet)
- localization_output = localization_pipeline(processed_image)
-
- assert len(localization_output) == 5
- assert localization_output[0].shape == (1, 256, 256, 3)
- assert localization_output[1].shape == (1, 320, 320, 1)
- assert localization_output[2].shape == (1, 2)
- assert localization_output[3].shape == (1, 2, 2)
- assert localization_output[4].shape == (1, 1)
-
-
-def test_keypoints2D_process():
- preprocess_pipeline = SequentialProcessor(
- [pr.NormalizeImage(), pr.ResizeImage((320, 320)), pr.ExpandDims(0)])
- image = load_image('./sample.jpg')
- processed_image = preprocess_pipeline(image)
-
- localization_pipeline = PostProcessSegmentation(HandSegNet)
- localization_output = localization_pipeline(processed_image)
-
- keypoints_pipeline = Process2DKeypoints(HandPoseNet)
- score_maps_dict = keypoints_pipeline(np.squeeze(localization_output[0],
- axis=0))
- score_maps = score_maps_dict['score_maps']
-
- assert score_maps.shape == (1, 32, 32, 21)
- assert len(score_maps) == 1
-
-
-def test_extract_keypoints2D():
- uv_coordinates = np.array([[0, 0], [1, 1]])
- uv_coordinates = np.expand_dims(uv_coordinates, axis=0)
-
- gaussian_maps = create_multiple_gaussian_map(uv_coordinates, (256, 256),
- sigma=0.1, validity_mask=None)
- gaussian_maps = np.expand_dims(gaussian_maps, axis=0)
- keypoints_extraction_pipeline = ExtractKeypoints()
- keypoints2D = keypoints_extraction_pipeline(gaussian_maps)
-
- assert keypoints2D[0] == [0, 0]
diff --git a/examples/hand_pose_estimation/utils.py b/examples/hand_pose_estimation/utils.py
deleted file mode 100644
index b63b843b8..000000000
--- a/examples/hand_pose_estimation/utils.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import tensorflow as tf
-import numpy as np
-import matplotlib.pyplot as plt
-import cv2
-from PIL import Image
-from skimage import transform
-
-
-def load_pretrained_weights(weights_path, model, num_layers):
- with tf.compat.v1.Session() as sess:
-
- # import graph
- saver = tf.compat.v1.train.import_meta_graph(weights_path)
- sess.run(tf.compat.v1.global_variables_initializer())
- # load weights for graph
- saver.restore(sess, weights_path[:-5])
-
- # get all global variables (including model variables)
- global_variables = tf.compat.v1.global_variables()
-
- # get their name and value and put them into dictionary
- sess.as_default()
-
- model_variables = {}
- for variable in global_variables:
- try:
- model_variables[variable.name] = variable.eval()
- except:
- print("For var={}, an exception occurred".format(variable.name))
-
- layer_count = 1 # skip Input layer
- for key_count, weights in enumerate(model_variables.items()):
- if layer_count > num_layers:
- break
-
- while not model.layers[layer_count].trainable_weights:
- layer_count = layer_count + 1
-
- if key_count % 2 == 0:
- kernel = weights[1]
- print(kernel.shape)
- else:
- bias = weights[1]
- print(bias.shape)
- model.layers[layer_count].set_weights([kernel, bias])
- layer_count = layer_count + 1
-
- return model
-
-
-def visualize_heatmaps(heatmaps):
- """Visualize all 21 heatmaps in a 7x3 grid"""
-
- fig, axes = plt.subplots(7, 3, figsize=(16, 16))
- print(heatmaps.shape)
- # heatmaps = np.expand_dims(heatmaps, axis=0)
-
- for i in range(heatmaps.shape[3]):
- img_row = int(i / 3)
- img_col = i % 3
-
- heatmap = heatmaps[:, :, :, i]
-
- heatmap = (heatmap - tf.reduce_min(heatmap)) / (
- tf.reduce_max(heatmap) - tf.reduce_min(heatmap))
-
- axes[img_row, img_col].imshow(np.squeeze(heatmap), cmap='jet')
- plt.show()
-
-
-def show_mask(image, name='image', wait=True):
- """Shows RGB image in an external window.
-
- # Arguments
- image: Numpy array
- name: String indicating the window name.
- wait: Boolean. If ''True'' window stays open until user presses a key.
- If ''False'' windows closes immediately.
- """
- if image.dtype != np.uint8:
- raise ValueError('``image`` must be of type ``uint8``')
- cv2.imshow(name, image)
- if wait:
- while True:
- if cv2.waitKey(0) & 0xFF == ord('q'):
- break
- cv2.destroyAllWindows()
-
-
-def load(filename):
- np_image = Image.open(filename)
- np_image = np.array(np_image).astype('float32')/255
- return np_image
diff --git a/examples/minimal_hand/README.md b/examples/minimal_hand/README.md
deleted file mode 100644
index da911e790..000000000
--- a/examples/minimal_hand/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-### This example detects hand pose from an image.
-
-To test the live hand pose detection from camera, run:
-```py
-python demo.py
-```
-
-To test the hand pose detection on image, run:
-```py
-python demo_image.py
-```
-
-To test the live hand closure status with pose detection from camera, run:
-```py
-python is_open_demo.py
-```
-
-To test the live hand pose detection from camera and visualize keypoints in 3D, run(This module has an extra dependency of matplotlib):
-```py
-python demo3D.py
-```
\ No newline at end of file
diff --git a/examples/minimal_hand/is_open_demo.py b/examples/minimal_hand/is_open_demo.py
deleted file mode 100644
index c7cfe16a5..000000000
--- a/examples/minimal_hand/is_open_demo.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import argparse
-from paz.applications import MinimalHandPoseEstimation
-from paz.backend.camera import VideoPlayer
-from paz.backend.camera import Camera
-from paz.abstract import SequentialProcessor
-from paz import processors as pr
-
-parser = argparse.ArgumentParser(description='Minimal hand keypoint detection')
-parser.add_argument('-c', '--camera_id', type=int, default=0,
- help='Camera device ID')
-args = parser.parse_args()
-
-
-pipeline = SequentialProcessor()
-pipeline.add(MinimalHandPoseEstimation(right_hand=False))
-pipeline.add(pr.UnpackDictionary(['image', 'relative_angles']))
-pipeline.add(pr.ControlMap(pr.IsHandOpen(), [1], [1]))
-pipeline.add(pr.ControlMap(pr.BooleanToTextMessage('OPEN', 'CLOSE'), [1], [1]))
-pipeline.add(pr.ControlMap(pr.DrawText(), [0, 1], [1]))
-pipeline.add(pr.WrapOutput(['image', 'status']))
-
-camera = Camera(args.camera_id)
-player = VideoPlayer((640, 480), pipeline, camera)
-player.run()
diff --git a/paz/__init__.py b/paz/__init__.py
index 1c98a23a8..850505a32 100644
--- a/paz/__init__.py
+++ b/paz/__init__.py
@@ -1 +1 @@
-__version__ = '0.1.9'
+__version__ = '0.1.10'
diff --git a/paz/applications.py b/paz/applications.py
index 5e75dbce2..0128a8f21 100644
--- a/paz/applications.py
+++ b/paz/applications.py
@@ -17,3 +17,5 @@
from .pipelines import DetNetHandKeypoints
from .pipelines import MinimalHandPoseEstimation
from .pipelines import DetectMinimalHand
+from .pipelines import ClassifyHandClosure
+from .pipelines import SSD512MinimalHandPose
diff --git a/paz/backend/boxes.py b/paz/backend/boxes.py
index 5401eb663..93ba29068 100644
--- a/paz/backend/boxes.py
+++ b/paz/backend/boxes.py
@@ -38,8 +38,8 @@ def to_corner_form(boxes):
def encode(matched, priors, variances=[0.1, 0.1, 0.2, 0.2]):
- """Encode the variances from the priorbox layers into the ground truth boxes
- we have matched (based on jaccard overlap) with the prior boxes.
+ """Encode the variances from the priorbox layers into the ground truth
+ boxes we have matched (based on jaccard overlap) with the prior boxes.
# Arguments
matched: Numpy array of shape `(num_priors, 4)` with boxes in
@@ -367,7 +367,8 @@ def to_one_hot(class_indices, num_classes):
def make_box_square(box):
- """Makes box coordinates square with sides equal to the longest original side.
+ """Makes box coordinates square with sides equal to the longest
+ original side.
# Arguments
box: Numpy array with shape `(4)` with point corner coordinates.
@@ -442,7 +443,7 @@ def clip(coordinates, image_shape):
def denormalize_box(box, image_shape):
- """Scales corner box coordinates from normalized values to image dimensions.
+ """Scales corner box coordinates from normalized values to image dimensions
# Arguments
box: Numpy array containing corner box coordinates.
diff --git a/paz/backend/camera.py b/paz/backend/camera.py
index 37433730e..6dd94bdfa 100644
--- a/paz/backend/camera.py
+++ b/paz/backend/camera.py
@@ -131,6 +131,16 @@ def intrinsics_from_HFOV(self, HFOV=70, image_shape=None):
[0, 0, 1.0]])
self.intrinsics = intrinsics
+ def take_photo(self):
+ """Starts camera, reads buffer and returns an image.
+ """
+ self.start()
+ image = self.read()
+ # all pipelines start with RGB
+ image = convert_color_space(image, BGR2RGB)
+ self.stop()
+ return image
+
class VideoPlayer(object):
"""Performs visualization inferences in a real-time video.
@@ -231,7 +241,7 @@ def record_from_file(self, video_file_path, name='video.avi',
if (video.isOpened() is False):
print("Error opening video file")
- while(video.isOpened()):
+ while video.isOpened():
is_frame_received, frame = video.read()
if not is_frame_received:
print("Frame not received. Exiting ...")
diff --git a/paz/backend/standard.py b/paz/backend/standard.py
index 83561ca2b..a32c28c92 100644
--- a/paz/backend/standard.py
+++ b/paz/backend/standard.py
@@ -1,4 +1,5 @@
import numpy as np
+import tensorflow as tf
def append_values(dictionary, lists, keys):
@@ -249,3 +250,24 @@ def max_pooling_2d(image, pool_size=3, strides=1, padding='same'):
for x in range(0, W - pool_size + 1, strides):
max_image[y][x] = np.max(image[y:y + pool_size, x:x + pool_size])
return max_image
+
+
+def predict(x, model, preprocess=None, postprocess=None):
+ """Preprocess, predict and postprocess input.
+ # Arguments
+ x: Input to model
+ model: Callable i.e. Keras model.
+ preprocess: Callable, used for preprocessing input x.
+ postprocess: Callable, used for postprocessing output of model.
+
+ # Note
+ If model outputs a tf.Tensor is converted automatically to numpy array.
+ """
+ if preprocess is not None:
+ x = preprocess(x)
+ y = model(x)
+ if isinstance(y, tf.Tensor):
+ y = y.numpy()
+ if postprocess is not None:
+ y = postprocess(y)
+ return y
diff --git a/paz/models/detection/haar_cascade.py b/paz/models/detection/haar_cascade.py
index 4ffbac671..708b1c234 100644
--- a/paz/models/detection/haar_cascade.py
+++ b/paz/models/detection/haar_cascade.py
@@ -32,7 +32,7 @@ def __init__(self, weights='frontalface_default', class_arg=None,
self.scale = scale
self.neighbors = neighbors
- def predict(self, gray_image):
+ def __call__(self, gray_image):
""" Detects faces from gray images.
# Arguments
diff --git a/paz/pipelines/__init__.py b/paz/pipelines/__init__.py
index 9f00bcf5e..49b687147 100644
--- a/paz/pipelines/__init__.py
+++ b/paz/pipelines/__init__.py
@@ -20,6 +20,7 @@
from .detection import DetectKeypoints2D
from .detection import DetectFaceKeypointNet2D32
from .detection import SSD512HandDetection
+from .detection import SSD512MinimalHandPose
from .keypoints import KeypointNetSharedAugmentation
from .keypoints import KeypointNetInference
@@ -36,6 +37,7 @@
from .renderer import RandomizeRenderedImage
from .classification import MiniXceptionFER
+from .classification import ClassifyHandClosure
from .pose import EstimatePoseKeypoints
from .pose import HeadPoseKeypointNet2D32
diff --git a/paz/pipelines/classification.py b/paz/pipelines/classification.py
index c9b53050e..a2351e1ea 100644
--- a/paz/pipelines/classification.py
+++ b/paz/pipelines/classification.py
@@ -3,6 +3,7 @@
from . import PreprocessImage
from ..models.classification import MiniXception
from ..datasets import get_class_names
+from .keypoints import MinimalHandPoseEstimation
# neutral, happiness, surprise, sadness, anger, disgust, fear, contempt
@@ -45,3 +46,32 @@ def __init__(self):
self.add(pr.CopyDomain([0], [1]))
self.add(pr.ControlMap(pr.ToClassName(self.class_names), [0], [0]))
self.add(pr.WrapOutput(['class_name', 'scores']))
+
+
+class ClassifyHandClosure(SequentialProcessor):
+ """Pipeline to classify minimal hand closure status.
+
+ # Example
+ ``` python
+ from paz.pipelines import ClassifyHandClosure
+
+ classify = ClassifyHandClosure()
+
+ # apply directly to an image (numpy-array)
+ inference = classify(image)
+ ```
+
+ # Returns
+ A function that takes an RGB image and outputs an image with class
+ status drawn on it.
+ """
+ def __init__(self, draw=True, right_hand=False):
+ super(ClassifyHandClosure, self).__init__()
+ self.add(MinimalHandPoseEstimation(draw, right_hand))
+ self.add(pr.UnpackDictionary(['image', 'relative_angles']))
+ self.add(pr.ControlMap(pr.IsHandOpen(), [1], [1]))
+ self.add(pr.ControlMap(pr.BooleanToTextMessage('OPEN', 'CLOSE'),
+ [1], [1]))
+ if draw:
+ self.add(pr.ControlMap(pr.DrawText(), [0, 1], [0], {1: 1}))
+ self.add(pr.WrapOutput(['image', 'status']))
diff --git a/paz/pipelines/detection.py b/paz/pipelines/detection.py
index c6c20900b..d173a902a 100644
--- a/paz/pipelines/detection.py
+++ b/paz/pipelines/detection.py
@@ -1,7 +1,5 @@
import numpy as np
-from paz.models.detection.haar_cascade import WEIGHT_PATH
-
from .. import processors as pr
from ..abstract import SequentialProcessor, Processor
from ..models import SSD512, SSD300, HaarCascadeDetector
@@ -9,7 +7,8 @@
from .image import AugmentImage, PreprocessImage
from .classification import MiniXceptionFER
-from .keypoints import FaceKeypointNet2D32
+from .keypoints import FaceKeypointNet2D32, DetectMinimalHand
+from .keypoints import MinimalHandPoseEstimation
class AugmentBoxes(SequentialProcessor):
@@ -208,8 +207,6 @@ class SSD512YCBVideo(DetectSingleShot):
as a dictionary with ``keys``: ``image`` and ``boxes2D``.
The corresponding values of these keys contain the image with the drawn
inferences and a list of ``paz.abstract.messages.Boxes2D``.
-
-
"""
def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
names = get_class_names('YCBVideo')
@@ -516,3 +513,34 @@ def __init__(self, score_thresh=0.40, nms_thresh=0.45, draw=True):
head_weights='OIV6Hand')
super(SSD512HandDetection, self).__init__(
model, class_names, score_thresh, nms_thresh, draw=draw)
+
+
+class SSD512MinimalHandPose(DetectMinimalHand):
+ """Hand detection and minimal hand pose estimation pipeline.
+
+ # Arguments
+ right_hand: Boolean. True for right hand inference.
+ offsets: List of two elements. Each element must be between [0, 1].
+
+ # Example
+ ``` python
+ from paz.pipelines import SSD512MinimalHandPose
+
+ detect = SSD512MinimalHandPose()
+
+ # apply directly to an image (numpy-array)
+ inferences = detect(image)
+ ```
+
+ # Returns
+ A function that takes an RGB image and outputs the predictions
+ as a dictionary with ``keys``: ``image``, ``boxes2D``,
+ ``Keypoints2D``, ``Keypoints3D``.
+ The corresponding values of these keys contain the image with the drawn
+ inferences.
+ """
+ def __init__(self, right_hand=False, offsets=[0.25, 0.25]):
+ detector = SSD512HandDetection()
+ keypoint_estimator = MinimalHandPoseEstimation(right_hand)
+ super(SSD512MinimalHandPose, self).__init__(
+ detector, keypoint_estimator, offsets)
diff --git a/paz/pipelines/keypoints.py b/paz/pipelines/keypoints.py
index 13f17d2ba..951228cb9 100644
--- a/paz/pipelines/keypoints.py
+++ b/paz/pipelines/keypoints.py
@@ -10,7 +10,7 @@
from .angles import IKNetHandJointAngles
-from ..backend.image import get_affine_transform, flip_left_right, lincolor
+from ..backend.image import get_affine_transform, lincolor
from ..backend.keypoints import flip_keypoints_left_right, uv_to_vu
from ..datasets import JOINT_CONFIG, FLIP_CONFIG
@@ -276,24 +276,26 @@ def __init__(self, shape=(128, 128), draw=True, right_hand=False):
super(DetNetHandKeypoints).__init__()
self.draw = draw
self.right_hand = right_hand
- self.preprocess = pr.SequentialProcessor(
- [pr.ResizeImage(shape), pr.ExpandDims(axis=0)])
- self.hand_estimator = DetNet()
+ self.preprocess = pr.SequentialProcessor()
+ self.preprocess.add(pr.ResizeImage(shape))
+ self.preprocess.add(pr.ExpandDims(axis=0))
+ if self.right_hand:
+ self.preprocess.add(pr.FlipLeftRightImage())
+ self.predict = pr.Predict(model=DetNet(), preprocess=self.preprocess)
self.scale_keypoints = pr.ScaleKeypoints(scale=4, shape=shape)
self.draw_skeleton = pr.DrawHandSkeleton()
self.wrap = pr.WrapOutput(['image', 'keypoints3D', 'keypoints2D'])
- def call(self, input_image):
- image = self.preprocess(input_image)
- if self.right_hand:
- image = flip_left_right(image)
- keypoints3D, keypoints2D = self.hand_estimator.predict(image)
+ def call(self, image):
+ keypoints3D, keypoints2D = self.predict(image)
+ keypoints3D = keypoints3D.numpy()
+ keypoints2D = keypoints2D.numpy()
if self.right_hand:
keypoints2D = flip_keypoints_left_right(keypoints2D)
keypoints2D = uv_to_vu(keypoints2D)
- keypoints2D = self.scale_keypoints(keypoints2D, input_image)
+ keypoints2D = self.scale_keypoints(keypoints2D, image)
if self.draw:
- image = self.draw_skeleton(input_image, keypoints2D)
+ image = self.draw_skeleton(image, keypoints2D)
return self.wrap(image, keypoints3D, keypoints2D)
diff --git a/paz/processors/__init__.py b/paz/processors/__init__.py
index 7d2150f1f..8a6186383 100644
--- a/paz/processors/__init__.py
+++ b/paz/processors/__init__.py
@@ -57,6 +57,7 @@
from .image import ReplaceLowerThanThreshold
from .image import GetNonZeroArguments
from .image import GetNonZeroValues
+from .image import FlipLeftRightImage
from .image import ImagenetPreprocessInput
@@ -118,9 +119,11 @@
from .standard import Scale
from .standard import AppendValues
from .standard import BooleanToTextMessage
+from .standard import PrintTopics
from .pose import SolvePNP
from .pose import SolveChangingObjectPnPRANSAC
+from .pose import Translation3DFromBoxWidth
from .groups import ToAffineMatrix
from .groups import RotationVectorToQuaternion
diff --git a/paz/processors/image.py b/paz/processors/image.py
index 93b7579da..2d464ecb7 100644
--- a/paz/processors/image.py
+++ b/paz/processors/image.py
@@ -22,6 +22,7 @@
from ..backend.image import normalized_device_coordinates_to_image
from ..backend.image import image_to_normalized_device_coordinates
from ..backend.image import replace_lower_than_threshold
+from ..backend.image import flip_left_right
from ..backend.image import BILINEAR, CUBIC
from ..backend.image.tensorflow_image import imagenet_preprocess_input
@@ -497,3 +498,16 @@ def __init__(self):
def call(self, image):
return imagenet_preprocess_input(image)
+
+
+class FlipLeftRightImage(Processor):
+ """Flips an image left and right.
+
+ # Arguments
+ image: Numpy array.
+ """
+ def __init__(self):
+ super(FlipLeftRightImage, self).__init__()
+
+ def call(self, image):
+ return flip_left_right(image)
diff --git a/paz/processors/pose.py b/paz/processors/pose.py
index 6475e1e50..0580ba8a7 100644
--- a/paz/processors/pose.py
+++ b/paz/processors/pose.py
@@ -91,3 +91,39 @@ def call(self, object_points3D, image_points2D):
self.inlier_thresh, self.num_iterations)
rotation_vector = np.squeeze(rotation_vector)
return success, rotation_vector, translation
+
+
+class Translation3DFromBoxWidth(Processor):
+ """Computes 3D translation from box width and real width ratio.
+
+ # Arguments
+ camera: Instance of ''paz.backend.Camera'' containing as properties
+ the ``camera_intrinsics`` a Numpy array of shape ``[3, 3]``
+ usually calculated from the openCV ``calibrateCamera`` function,
+ and the ``distortion`` a Numpy array of shape ``[5]`` in which the
+ elements are usually obtained from the openCV
+ ``calibrateCamera`` function.
+ real_width: Real width of the predicted box2D.
+
+ # Returns
+ Array (num_boxes, 3) containing all 3D translations.
+ """
+ def __init__(self, camera, real_width=0.3):
+ super(Translation3DFromBoxWidth, self).__init__()
+ self.camera = camera
+ self.real_width = real_width
+ self.focal_length = self.camera.intrinsics[0, 0]
+ self.u_camera_center = self.camera.intrinsics[0, 2]
+ self.v_camera_center = self.camera.intrinsics[1, 2]
+
+ def call(self, boxes2D):
+ hands_center = []
+ for box in boxes2D:
+ u_box_center, v_box_center = box.center
+ z_center = (self.real_width * self.focal_length) / box.width
+ u = u_box_center - self.u_camera_center
+ v = v_box_center - self.v_camera_center
+ x_center = (z_center * u) / self.focal_length
+ y_center = (z_center * v) / self.focal_length
+ hands_center.append([x_center, y_center, z_center])
+ return np.array(hands_center)
diff --git a/paz/processors/standard.py b/paz/processors/standard.py
index fa9e0626e..07c10ce28 100644
--- a/paz/processors/standard.py
+++ b/paz/processors/standard.py
@@ -2,7 +2,7 @@
from ..abstract import Processor
from ..backend.boxes import to_one_hot
-from ..backend.standard import append_values
+from ..backend.standard import append_values, predict
class ControlMap(Processor):
@@ -244,12 +244,7 @@ def __init__(self, model, preprocess=None, postprocess=None):
self.postprocess = postprocess
def call(self, x):
- if self.preprocess is not None:
- x = self.preprocess(x)
- y = self.model.predict(x)
- if self.postprocess is not None:
- y = self.postprocess(y)
- return y
+ return predict(x, self.model, self.preprocess, self.postprocess)
class ToClassName(Processor):
@@ -470,3 +465,20 @@ def call(self, flag):
else:
message = self.false_message
return message
+
+
+class PrintTopics(Processor):
+ """Prints topics
+ # Arguments
+ topics: List of keys to the inputted dictionary
+
+ # Returns
+ Returns same dictionary but outputs to terminal topic values.
+ """
+ def __init__(self, topics):
+ super(PrintTopics, self).__init__()
+ self.topics = topics
+
+ def call(self, dictionary):
+ [print(dictionary[topic]) for topic in self.topics]
+ return dictionary