diff --git a/README.md b/README.md index bd4e4dcc6..5d4a5fff4 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,11 @@ PAZ is used in the following examples (links to **real-time demos** and training |---------------------------|--------------------------| -----------------------| || | | -|[Semantic segmentation](https://github.com/oarriaga/paz/tree/master/examples/semantic_segmentation) | [Hand pose estimation](https://github.com/oarriaga/paz/tree/master/examples/minimal_hand) | [Human pose estimation](https://github.com/oarriaga/paz/tree/master/examples/human_pose_estimation_2D) | +|[Semantic segmentation](https://github.com/oarriaga/paz/tree/master/examples/semantic_segmentation) | [Hand pose estimation](https://github.com/oarriaga/paz/tree/master/examples/hand_pose_estimation) | [Human pose estimation](https://github.com/oarriaga/paz/tree/master/examples/human_pose_estimation_2D) | |---------------------------|-----------------------|-----------------| | | | | -| [3D keypoint discovery](https://github.com/oarriaga/paz/tree/master/examples/discovery_of_latent_keypoints) | [Hand closure detection](https://github.com/oarriaga/paz/tree/master/examples/minimal_hand) | [6D pose estimation](https://github.com/oarriaga/paz/tree/master/examples/pix2pose) | +| [3D keypoint discovery](https://github.com/oarriaga/paz/tree/master/examples/discovery_of_latent_keypoints) | [Hand closure detection](https://github.com/oarriaga/paz/tree/master/examples/hand_pose_estimation) | [6D pose estimation](https://github.com/oarriaga/paz/tree/master/examples/pix2pose) | |---------------------------|-----------------------| --------------------------| | | | | @@ -202,7 +202,9 @@ The following models are implemented in PAZ and they can be trained with your ow |[Attention](https://github.com/oarriaga/paz/blob/master/examples/spatial_transfomer_networks/STN.py) |[Spatial Transformers](https://arxiv.org/abs/1506.02025) | |[Object detection](https://github.com/oarriaga/paz/blob/master/paz/models/detection/haar_cascade.py) |[HaarCascades](https://link.springer.com/article/10.1023/B:VISI.0000013087.49260.fb) | |[Human pose estimation](https://github.com/oarriaga/paz/blob/master/paz/models/pose_estimation/higher_hrnet.py) |[HigherHRNet](https://arxiv.org/abs/1908.10357) | -|[Hand pose estimation](https://github.com/oarriaga/paz/blob/refactor_readme/paz/models/keypoint/detnet.py) |[DetNet](https://vcai.mpi-inf.mpg.de/projects/2020-cvpr-hands/) | +|[Hand pose estimation](https://github.com/oarriaga/paz/blob/master/paz/models/keypoint/detnet.py) |[DetNet](https://vcai.mpi-inf.mpg.de/projects/2020-cvpr-hands/) | +|[Hand closure classification](https://github.com/oarriaga/paz/blob/master/paz/models/keypoint/iknet.py) |[IKNet](https://vcai.mpi-inf.mpg.de/projects/2020-cvpr-hands/) | +|[Hand detection](https://github.com/oarriaga/paz/blob/master/paz/models/detection/ssd512.py) |[SSD512](https://arxiv.org/abs/1512.02325)| ## Motivation diff --git a/docs/structure.py b/docs/structure.py index 91caa8b96..30b3a2957 100644 --- a/docs/structure.py +++ b/docs/structure.py @@ -115,7 +115,9 @@ 'classes': [ (camera.Camera, [camera.Camera.is_open, camera.Camera.start, - camera.Camera.stop]), + camera.Camera.stop, + camera.Camera.intrinsics_from_HFOV, + camera.Camera.take_photo]), (camera.VideoPlayer, [camera.VideoPlayer.step, camera.VideoPlayer.run, camera.VideoPlayer.record, @@ -237,7 +239,8 @@ standard.calculate_norm, standard.tensor_to_numpy, standard.pad_matrix, - standard.max_pooling_2d + standard.max_pooling_2d, + standard.predict ], }, @@ -340,9 +343,9 @@ losses.WeightedReconstructionWithError ], }, - - - { + + + { 'page': 'processors/angles.md', 'classes': [ processors.ChangeLinkOrder, @@ -386,7 +389,8 @@ processors.NormalizedDeviceCoordinatesToImage, processors.ReplaceLowerThanThreshold, processors.GetNonZeroValues, - processors.GetNonZeroArguments + processors.GetNonZeroArguments, + processors.FlipLeftRightImage ] }, @@ -500,7 +504,8 @@ 'page': 'processors/pose.md', 'classes': [ processors.SolvePNP, - processors.SolveChangingObjectPnPRANSAC + processors.SolveChangingObjectPnPRANSAC, + processors.Translation3DFromBoxWidth ] }, @@ -547,7 +552,8 @@ processors.UnwrapDictionary, processors.Scale, processors.AppendValues, - processors.BooleanToTextMessage + processors.BooleanToTextMessage, + processors.PrintTopics ] }, @@ -561,13 +567,10 @@ { - 'page': 'pipelines/image.md', + 'page': 'pipelines/classification.md', 'classes': [ - pipelines.AugmentImage, - pipelines.PreprocessImage, - pipelines.DecoderPredictor, - pipelines.EncoderPredictor, - pipelines.PreprocessImageHigherHRNet + pipelines.MiniXceptionFER, + pipelines.ClassifyHandClosure ] }, @@ -581,7 +584,8 @@ pipelines.PostprocessBoxes2D, pipelines.DetectSingleShot, pipelines.DetectHaarCascade, - pipelines.SSD512HandDetection + pipelines.SSD512HandDetection, + pipelines.SSD512MinimalHandPose ] }, @@ -594,6 +598,18 @@ }, + { + 'page': 'pipelines/image.md', + 'classes': [ + pipelines.AugmentImage, + pipelines.PreprocessImage, + pipelines.DecoderPredictor, + pipelines.EncoderPredictor, + pipelines.PreprocessImageHigherHRNet + ] + }, + + { 'page': 'pipelines/keypoints.md', 'classes': [ @@ -662,7 +678,9 @@ pipelines.PIX2YCBTools6D, pipelines.DetNetHandKeypoints, pipelines.MinimalHandPoseEstimation, - pipelines.DetectMinimalHand + pipelines.DetectMinimalHand, + pipelines.ClassifyHandClosure, + pipelines.SSD512MinimalHandPose ] }, diff --git a/examples/hand_detection/pose_demo.py b/examples/hand_detection/pose_demo.py index 1fc725627..30dea0631 100644 --- a/examples/hand_detection/pose_demo.py +++ b/examples/hand_detection/pose_demo.py @@ -1,17 +1,15 @@ import argparse -from paz.applications import DetectMinimalHand -from paz.applications import MinimalHandPoseEstimation -from paz.pipelines.detection import SSD512HandDetection +from paz.applications import SSD512MinimalHandPose from paz.backend.camera import VideoPlayer, Camera -parser = argparse.ArgumentParser(description='Minimal hand detection') +parser = argparse.ArgumentParser(description='''Minimal hand detection and + keypoints estimation''') parser.add_argument('-c', '--camera_id', type=int, default=0, help='Camera device ID') args = parser.parse_args() -pipeline = DetectMinimalHand( - SSD512HandDetection(), MinimalHandPoseEstimation(right_hand=False)) +pipeline = SSD512MinimalHandPose(right_hand=False, offsets=[0.5, 0.5]) camera = Camera(args.camera_id) player = VideoPlayer((640, 480), pipeline, camera) player.run() diff --git a/examples/hand_pose_estimation/HandPoseEstimation.py b/examples/hand_pose_estimation/HandPoseEstimation.py deleted file mode 100755 index ed7a74552..000000000 --- a/examples/hand_pose_estimation/HandPoseEstimation.py +++ /dev/null @@ -1,312 +0,0 @@ -from tensorflow.keras.layers import Concatenate, Dense, Dropout, Reshape, Input -from tensorflow.keras.layers import Conv2D, MaxPooling2D, LeakyReLU -from tensorflow.keras import Model -from tensorflow.keras.utils import get_file - -BASE_WEIGHT_PATH = ( - 'https://github.com/oarriaga/altamira-data/releases/download/v0.11/') - - -def HandSegmentationNet(input_shape=(320, 320, 3), weights='RHDv2'): - image = Input(shape=input_shape, name='image') - - X = Conv2D(64, kernel_size=3, padding='same', name='conv1_1')(image) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(64, 3, padding='same', name='conv1_2')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X) - - X = Conv2D(128, 3, padding='same', name='conv1_3')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, 3, padding='same', name='conv1_4')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X) - - X = Conv2D(256, 3, padding='same', name='conv1_5')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, 3, padding='same', name='conv1_6')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, 3, padding='same', name='conv1_7')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, 3, padding='same', name='conv1_8')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X) - - X = Conv2D(512, 3, padding='same', name='conv1_9')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(512, 3, padding='same', name='conv1_10')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(512, 3, padding='same', name='conv1_11')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(512, 3, padding='same', name='conv1_12')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(512, 3, padding='same', name='conv1_13')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, 3, padding='same', name='conv1_14')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(512, 1, padding='same', name='conv1_15')(X) - X = LeakyReLU(alpha=0.01)(X) - - raw_segmented_image = Conv2D(2, 1, padding='same', activation=None, - name='conv1_16')(X) - - segmentation_net = Model(inputs={'image': image}, - outputs={'image': image, - 'raw_segmentation_map': - raw_segmented_image}, - name='HandSegNet') - - if weights is not None: - model_filename = [segmentation_net.name, str(weights)] - model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5']) - weights_path = get_file(model_filename, - BASE_WEIGHT_PATH + model_filename, - cache_subdir='paz/models') - print('Loading %s model weights' % weights_path) - segmentation_net.load_weights(weights_path) - - return segmentation_net - - -def PoseNet(input_shape=(256, 256, 3), weights='RHDv2'): - cropped_image = Input(shape=input_shape, name='cropped_image') - - X = Conv2D(64, kernel_size=3, padding='same', name='conv2_1')( - cropped_image) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(64, kernel_size=3, padding='same', name='conv2_2')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X) - - X = Conv2D(128, kernel_size=3, padding='same', name='conv2_3')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=3, padding='same', name='conv2_4')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_5')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_6')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_7')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_8')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X) - - X = Conv2D(512, kernel_size=3, padding='same', name='conv2_9')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(512, kernel_size=3, padding='same', name='conv2_10')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_11')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_12')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_13')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, kernel_size=3, padding='same', name='conv2_14')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=3, padding='same', name='conv2_15')(X) - X = LeakyReLU(alpha=0.01)(X) - skip_connection = X - - X = Conv2D(512, kernel_size=1, name='conv2_16')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(21, kernel_size=1, name='conv2_17')(X) - - X = Concatenate(axis=3)([X, skip_connection]) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_18')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_19')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_20')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_21')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_22')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=1, name='conv2_23')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(21, kernel_size=1, padding='same', name='conv2_24')(X) - - X = Concatenate(axis=3)([X, skip_connection]) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_25')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_26')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_27')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_28')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=7, padding='same', name='conv2_29')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, kernel_size=1, name='conv2_30')(X) - X = LeakyReLU(alpha=0.01)(X) - - score_maps = Conv2D(21, kernel_size=1, name='conv2_31')(X) - - PoseNet = Model(inputs={'cropped_image': cropped_image}, - outputs={'score_maps': score_maps}, name='PoseNet') - - if weights is not None: - model_filename = [PoseNet.name, str(weights)] - model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5']) - weights_path = get_file(model_filename, - BASE_WEIGHT_PATH + model_filename, - cache_subdir='paz/models') - print('Loading %s model weights' % weights_path) - PoseNet.load_weights(weights_path) - - return PoseNet - - -def PosePriorNet(keypoint_heatmaps_shape=(32, 32, 21), hand_side_shape=(2,), - num_keypoints=21, weights='RHDv2'): - score_maps = Input(shape=keypoint_heatmaps_shape) - hand_side = Input(shape=hand_side_shape) - - X = Conv2D(32, 3, padding='same', name='conv3_1')(score_maps) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(32, 3, padding='same', strides=2, name='conv3_2')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(64, 3, padding='same', name='conv3_3')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(64, 3, padding='same', strides=2, name='conv3_4')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, 3, padding='same', name='conv3_5')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, 3, padding='same', strides=2, name='conv3_6')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Reshape([-1])(X) - X = Concatenate(axis=1)([X, hand_side]) - - X = Dense(512, name='dense3_1')(X) - X = LeakyReLU(alpha=0.01)(X) - X = Dropout(rate=0.2)(X) - - X = Dense(512, name='dense3_2')(X) - X = LeakyReLU(alpha=0.01)(X) - X = Dropout(rate=0.2)(X) - - X = Dense(num_keypoints * 3, name='dense3_3')(X) - - hand_keypoints = Reshape((21, 3), name='reshape3_1')(X) - PosePriorNet = Model(inputs={'score_maps': score_maps, - 'hand_side': hand_side}, - outputs={'canonical_coordinates': hand_keypoints}, - name='PosePriorNet') - - if weights is not None: - model_filename = [PosePriorNet.name, str(weights)] - model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5']) - weights_path = get_file(model_filename, - BASE_WEIGHT_PATH + model_filename, - cache_subdir='paz/models') - print('Loading %s model weights' % weights_path) - PosePriorNet.load_weights(weights_path) - - return PosePriorNet - - -def ViewPointNet(keypoint_heat_maps_shape=(32, 32, 21), hand_side_shape=(2,), - weights='RHDv2'): - score_maps = Input(shape=keypoint_heat_maps_shape, - name='score_maps') - hand_side = Input(shape=hand_side_shape, name='hand_side') - - X = Conv2D(64, 3, padding='same')(score_maps) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(64, 3, strides=2, padding='same')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, 3, padding='same')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(128, 3, strides=2, padding='same')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, 3, padding='same')(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Conv2D(256, 3, strides=2, padding='same')(X) - X = LeakyReLU(alpha=0.01)(X) - X = Reshape([-1])(X) - X = Concatenate(axis=1)([X, hand_side]) - - X = Dense(256)(X) - X = LeakyReLU(alpha=0.01)(X) - - X = Dense(128)(X) - X = LeakyReLU(alpha=0.01)(X) - - ux = Dense(1)(X) - uy = Dense(1)(X) - uz = Dense(1)(X) - - axis_angles = Concatenate(axis=1)([ux, uy, uz]) - - ViewPointNet = Model(inputs={'score_maps': score_maps, - 'hand_side': hand_side}, - outputs={'rotation_parameters': axis_angles[0], - 'hand_side': hand_side}, - name='ViewPointNet') - - if weights is not None: - model_filename = [ViewPointNet.name, str(weights)] - model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5']) - weights_path = get_file(model_filename, - BASE_WEIGHT_PATH + model_filename, - cache_subdir='paz/models') - print('Loading %s model weights' % weights_path) - ViewPointNet.load_weights(weights_path) - - return ViewPointNet diff --git a/examples/hand_pose_estimation/README.md b/examples/hand_pose_estimation/README.md new file mode 100644 index 000000000..452c895df --- /dev/null +++ b/examples/hand_pose_estimation/README.md @@ -0,0 +1,25 @@ +### This example detects hand pose from an image. + +To test the live hand pose estimation from camera, run: +```py +python demo.py +``` + +To test the hand pose estimation on image, run: +```py +python demo_image.py +``` + +To test the live hand closure status with the pose estimation from camera, run: +```py +python is_open_demo.py +``` + +To test the live hand pose estimation from camera and visualize keypoints in 3D, run (This module has an extra dependency of matplotlib): +```py +python demo3D.py +``` + +### Additional notes +To test a more robust hand pose estimation and open / close classification try out the "paz/examples/hand_detection/pose_demo.py" + diff --git a/examples/hand_pose_estimation/RHDv2.py b/examples/hand_pose_estimation/RHDv2.py deleted file mode 100644 index 3e5bcc3b2..000000000 --- a/examples/hand_pose_estimation/RHDv2.py +++ /dev/null @@ -1,18 +0,0 @@ -KINEMATIC_CHAIN_DICT = {0: 'root', - 4: 'root', 3: 4, 2: 3, 1: 2, - 8: 'root', 7: 8, 6: 7, 5: 6, - 12: 'root', 11: 12, 10: 11, 9: 10, - 16: 'root', 15: 16, 14: 15, 13: 14, - 20: 'root', 19: 20, 18: 19, 17: 18} -KINEMATIC_CHAIN_LIST = list(KINEMATIC_CHAIN_DICT.keys()) - -LEFT_WRIST = 0 -LEFT_MIDDLE_METACARPAL = 12 -LEFT_PINKY_TIP = 20 - -RIGHT_WRIST = 21 -RIGHT_MIDDLE_METACARPAL = 33 -RIGHT_PINKY_TIP = 41 - -LEFT_HAND = 0 -RIGHT_HAND = 1 diff --git a/examples/hand_pose_estimation/backend_SE3.py b/examples/hand_pose_estimation/backend_SE3.py deleted file mode 100644 index 42e60ea56..000000000 --- a/examples/hand_pose_estimation/backend_SE3.py +++ /dev/null @@ -1,164 +0,0 @@ -import numpy as np - - -def to_homogeneous_coordinates(vector): - """ Homogenize the vector : Appending 1 to the vector. - - # Arguments - keypoints: Numpy array with any shape. - - # Returns - vector: Numpy array. - """ - vector = np.append(vector, 1) - return vector - - -def build_translation_matrix_SE3(translation_vector): - """ Build a translation matrix from translation vector. - - # Arguments - translation_vector: list of length 1 or 3. - - # Returns - transformation_matrix: Numpy array of size (1, 4, 4). - """ - if len(translation_vector) == 1: - translation_vector = [0, 0, translation_vector] - transformation_matrix = np.array([[1, 0, 0, translation_vector[0]], - [0, 1, 0, translation_vector[1]], - [0, 0, 1, translation_vector[2]], - [0, 0, 0, 1]]) - return transformation_matrix - - -def build_affine_matrix(matrix, translation_vector=None): - """ Build a (4, 4) affine matrix provided a matrix of size (3, 3). - - # Arguments - matrix: numpy array of shape (3, 3). - - # Returns - affine_matrix: Numpy array of size (4, 4). - """ - if translation_vector is None: - translation_vector = np.array([[0], [0], [0]]) - - if len(translation_vector) == 1: - translation_vector = [0, 0, translation_vector] - - affine_matrix = np.hstack([matrix, translation_vector]) - affine_matrix = np.vstack((affine_matrix, [0, 0, 0, 1])) - return affine_matrix - - -def build_rotation_matrix_x(angle): - """Build a (3, 3) rotation matrix along x-axis. - - # Arguments - angle: float value of range [0, 360]. - - # Returns - rotation_matrix_x: Numpy array of size (3, 3). - """ - cosine_value = np.cos(angle) - sine_value = np.sin(angle) - rotation_matrix_x = np.array([[1.0, 0.0, 0.0], - [0.0, cosine_value, sine_value], - [0.0, -sine_value, cosine_value]]) - return rotation_matrix_x - - -def build_rotation_matrix_y(angle): - """Build a (3, 3) rotation matrix along y-axis. - - # Arguments - angle: float value of range [0, 360]. - - # Returns - rotation_matrix_y: Numpy array of size (3, 3). - """ - cosine_value = np.cos(angle) - sine_value = np.sin(angle) - rotation_matrix_y = np.array([[cosine_value, 0.0, -sine_value], - [0.0, 1.0, 0.0], - [sine_value, 0.0, cosine_value]]) - return rotation_matrix_y - - -def build_rotation_matrix_z(angle): - """ Build a (3, 3) rotation matrix along z-axis. - - # Arguments - angle: float value of range [0, 360]. - - # Returns - rotation_matrix_z: Numpy array of size (3, 3). - """ - cosine_value = np.cos(angle) - sine_value = np.sin(angle) - rotation_matrix_z = np.array([[cosine_value, sine_value, 0.0], - [-sine_value, cosine_value, 0.0], - [0.0, 0.0, 1.0]]) - return rotation_matrix_z - - -def normalize_axis_coordinates(axis_angles, theta): - normalization_factor = 1.0 / theta - axis_coordinates_x = axis_angles[0] * normalization_factor - axis_coordinates_y = axis_angles[1] * normalization_factor - axis_coordinates_z = axis_angles[2] * normalization_factor - axis_angles = (axis_coordinates_x, axis_coordinates_y, axis_coordinates_z) - return axis_angles - - -def get_rotation_matrix(axis_coordinates, theta): - """ Calculate Rotation matrix. - - # Arguments - axis_coordinates: List of length (3). - theta: Float value. - - # Returns: - matrix: Numpy array of size (3, 3). - """ - x = axis_coordinates[0] - y = axis_coordinates[1] - z = axis_coordinates[2] - - sine_theta = np.sin(theta) - cosine_theta = np.cos(theta) - - r11 = cosine_theta + ((x ** 2) * (1.0 - cosine_theta)) - r22 = cosine_theta + ((y ** 2) * (1.0 - cosine_theta)) - r33 = cosine_theta + ((z ** 2) * (1.0 - cosine_theta)) - - r12 = (x * y * (1.0 - cosine_theta)) - (z * sine_theta) - r13 = (x * z * (1.0 - cosine_theta)) + (y * sine_theta) - r21 = (y * x * (1.0 - cosine_theta)) + (z * sine_theta) - r23 = (y * z * (1.0 - cosine_theta)) - (x * sine_theta) - r31 = (z * x * (1.0 - cosine_theta)) - (y * sine_theta) - r32 = (z * y * (1.0 - cosine_theta)) + (x * sine_theta) - - rotation_matrix = np.array([[r11, r12, r13], - [r21, r22, r23], - [r31, r32, r33]]) - - return rotation_matrix - - -def rotation_from_axis_angles(axis_angles, is_normalized=False): - """ Get Rotation matrix from axis angles. - - # Arguments - axis_angles: list of length (3). - is_normalized: boolean value. - - # Returns - rotation-matrix: numpy array of size (3, 3). - """ - theta = np.linalg.norm(axis_angles) - if not is_normalized: - axis_angles = normalize_axis_coordinates(axis_angles, theta) - rotation_matrix = get_rotation_matrix(axis_angles, theta) - return rotation_matrix diff --git a/examples/hand_pose_estimation/backend_keypoints.py b/examples/hand_pose_estimation/backend_keypoints.py deleted file mode 100644 index 2c755a124..000000000 --- a/examples/hand_pose_estimation/backend_keypoints.py +++ /dev/null @@ -1,1123 +0,0 @@ -import numpy as np - -from backend_SE3 import to_homogeneous_coordinates -from backend_SE3 import build_translation_matrix_SE3 -from backend_SE3 import build_rotation_matrix_x, build_rotation_matrix_y -from backend_SE3 import build_rotation_matrix_z, build_affine_matrix - -from RHDv2 import LEFT_MIDDLE_METACARPAL, LEFT_WRIST -from RHDv2 import LEFT_PINKY_TIP, LEFT_HAND -from RHDv2 import RIGHT_MIDDLE_METACARPAL, RIGHT_WRIST -from RHDv2 import RIGHT_PINKY_TIP, RIGHT_HAND -from RHDv2 import KINEMATIC_CHAIN_DICT, KINEMATIC_CHAIN_LIST - -from paz.backend.image.opencv_image import resize_image, show_image - - -def extract_hand_segment(segmentation_label, hand_arg=1): - """ Data Pre-processing step: Extract only hand mask from the - segmentation map provided in RHD dataset. - - # Arguments - segmentation_label: Numpy array. - - # Returns - Numpy array. - """ - hand_mask = np.greater(segmentation_label, hand_arg) - background_mask = np.logical_not(hand_mask) - return np.stack([background_mask, hand_mask], axis=2) - - -def normalize_keypoints(keypoints3D): - """ Normalize 3D-keypoints. - - # Arguments - keypoints: Numpy array with shape `(num_keypoints, 3)` - - # Returns - keypoint_scale: Numpy array with shape `(1, )`. - keypoint_normalized: Numpy array with shape `(num_keypoints, 3)`. - """ - keypoint3D_root = keypoints3D[0, :] - relative_keypoint3D = keypoints3D - keypoint3D_root - metacarpal_bone_length = np.linalg.norm( - relative_keypoint3D[LEFT_MIDDLE_METACARPAL, :] - - relative_keypoint3D[(LEFT_MIDDLE_METACARPAL - 1), :]) - keypoint_normalized = relative_keypoint3D / metacarpal_bone_length - return metacarpal_bone_length, keypoint_normalized - - -def extract_hand_mask(segmenation_mask, hand_arg=1): - """ Normalize 3D-keypoints. - - # Arguments - segmenation_mask: Numpy array - hand_arg: Int value. - - # Returns - hand_mask: Numpy array. - """ - hand_mask = np.greater(segmenation_mask, hand_arg) - return hand_mask - - -def extract_hand_masks(segmentation_mask, right_hand_mask_limit=18): - """ Extract Hand masks of left and right hand. - ones_mask * right_hand_mask_limit convert to a variable - - # Arguments - segmentation_mask: Numpy array. - right_hand_mask_limit: Int value. - - # Returns - mask_left: Numpy array. - mask_right: Numpy array. - """ - ones_mask = np.ones_like(segmentation_mask) - hand_mask = extract_hand_mask(segmentation_mask, hand_arg=1) - right_hand_mask = ones_mask * right_hand_mask_limit - right_hand_map = np.less(segmentation_mask, right_hand_mask) - mask_left = np.logical_and(hand_mask, right_hand_map) - mask_right = np.greater(segmentation_mask, right_hand_mask) - return mask_left, mask_right - - -def extract_hand_side_keypoints(keypoints3D, dominant_hand): - """ Extract keypoints related to Left or Right hand. - - # Arguments - keypoints3D: numpy array of shape (num_keypoints, 3) - Is_Left: numpy array of shape (1). - - # Returns - keypoints3D: Numpy array of size (num_keypoints, 3). - """ - if dominant_hand == LEFT_HAND: - keypoints3D = keypoints3D[LEFT_WRIST:LEFT_PINKY_TIP, :] - else: - keypoints3D = keypoints3D[RIGHT_WRIST:RIGHT_PINKY_TIP, :] - return keypoints3D - - -def get_hand_side_and_keypooints(hand_parts_mask, keypoints3D): - """Extract hand masks, hand side and keypoints of dominant hand. - - # Arguments - keypoints3D: numpy array of shape (num_keypoints, 3). - hand_parts_mask: numpy array of shape (image_size, image_size). - - # Returns - hand_side: Numpy array of size (2). - hand_side_keypoints3D: Numpy array of size (num_keypoints, 3). - dominant_hand: numpy array of shape (1). - """ - hand_map_left, hand_map_right = extract_hand_masks(hand_parts_mask) - num_pixels_hand_left = np.sum(hand_map_left) - num_pixels_hand_right = np.sum(hand_map_right) - is_left_dominant = num_pixels_hand_left > num_pixels_hand_right - dominant_hand = LEFT_HAND if is_left_dominant else RIGHT_HAND - keypoints3D = extract_hand_side_keypoints(keypoints3D, dominant_hand) - hand_side = np.where(is_left_dominant, 0, 1) - return hand_side, keypoints3D, dominant_hand - - -def extract_coordinate_limits(keypoints2D, keypoints2D_visibility, - image_size): - """ Extract minimum and maximum coordinates. - # Try to convert to a function , check numpy.permute , rollaxis, flip - # Arguments - keypoints2D: Numpy array of shape (num_keypoints, 2). - keypoints2D_visibility: Numpy array of shape (num_keypoints, 2). - image_size: List of shape (3). - - # Returns - min_coordinates: Tuple of size (2). - max_coordinates: Tuple of size (2). - """ - visible_keypoints = keypoints2D[keypoints2D_visibility] - keypoint_u = visible_keypoints[:, 1] - keypoint_v = visible_keypoints[:, 0] - keypoints2D_coordinates = np.stack([keypoint_u, keypoint_v], 1) - max_keypoint2D = np.maximum(keypoints2D_coordinates, 0) - min_keypoint2D = np.minimum(keypoints2D_coordinates, 0) - min_coordinates = np.maximum(min_keypoint2D, 0.0) - max_coordinates = np.minimum(max_keypoint2D, image_size[0:2]) - return min_coordinates, max_coordinates - - -def tranform_keypoints_to_camera_coordinates(keypoints2D, crop_center, scale, - crop_size): - """ Extract keypoints in cropped image frame. - - # Arguments - keypoints2D: Numpy array of shape (num_keypoints, 1). - crop_center: Typle of size (2). - Scale: Integer. - image_size: List of size (3). - - # Returns - keypoint_uv21: Numpy array of shape (num_keypoints, 1). - """ - crop_size_halved = crop_size // 2 - u_residual = keypoints2D[:, 0] - crop_center[1] - v_residual = keypoints2D[:, 1] - crop_center[0] - keypoint_u = (u_residual * scale) + crop_size_halved - keypoint_v = (v_residual * scale) + crop_size_halved - keypoint_uv = np.stack([keypoint_u, keypoint_v], 1) - return keypoint_uv - - -def get_best_crop_size(max_coordinates, min_coordinates, crop_center, - min_crop_size=50.0, max_crop_size=500.0): - """ calculate crop size. - # Arguments - max_coordinates: (x_max, y_max) Numpy array of shape (1,2). - min_coordinates: (x_min, y_min) Numpy array of shape (1,2). - crop_center: (x_center, y_center) Numpy array of shape (1,2). - - # Returns - crop_size_best: Int value. - """ - crop_size_best = 2 * np.maximum(max_coordinates - crop_center, - crop_center - min_coordinates) - crop_size_best = np.maximum(crop_size_best) - crop_size_best = np.minimum(np.maximum(crop_size_best, min_crop_size), - max_crop_size) - return crop_size_best - - -def get_crop_scale_and_center(keypoints2D, keypoints2D_visibility, image_size, - crop_size): - """ Extract scale to which image should be cropped. - - # Arguments - keypoints2D: Numpy array of shape (num_keypoints, 1). - keypoints2D_visibility: Numpy array of shape (num_keypoints, 1). - image_size: List of size (3). - crop_size: List of size (2). - - # Returns - scale: Integer value. - crop_center: Tuple of length 3. - """ - crop_center = keypoints2D[LEFT_MIDDLE_METACARPAL, ::-1] - min_coordinates, max_coordinates = extract_coordinate_limits( - keypoints2D, keypoints2D_visibility, image_size) - crop_size_best = get_best_crop_size(max_coordinates, min_coordinates, - crop_center) - scale = crop_size / crop_size_best - return scale, crop_center - - -def crop_image_from_mask(keypoints2D, keypoints2D_visibility, image, - image_size, crop_size, camera_matrix): - """ Crop image from mask. - - # Arguments - keypoints2D: Numpy array of shape (num_keypoints, 1). - keypoints2D_vis: Numpy array of shape (num_keypoints, 1). - image: Numpy array of shape (image_size, image_size, 3). - image_size: List of size (2). - crop_size: List of size (2). - camera_matrix: Numpy array of shape (3, 3). - - # Returns - scale: Integer value. - img_crop: Numpy array of size (crop_size, crop-size, 3). - keypoint_uv21: Numpy array of shape (num_keypoints, 1). - camera_matrix_cropped: Numpy array of shape (3, 3). - """ - scale, crop_center = get_crop_scale_and_center( - keypoints2D, keypoints2D_visibility, image_size, crop_size) - scale, scale_matrix = get_scale_matrix(scale) - cropped_image = crop_image_from_coordinates( - image, crop_center, crop_size, scale) - keypoint_uv21 = tranform_keypoints_to_camera_coordinates( - keypoints2D, crop_center, scale, crop_size) - scale_translation_matrix = get_scale_translation_matrix( - crop_center, crop_size, scale) - scale_matrix_uv = np.matmul(scale_matrix, camera_matrix) - camera_matrix_cropped = np.matmul(scale_translation_matrix, scale_matrix_uv) - return scale, np.squeeze( - cropped_image), keypoint_uv21, camera_matrix_cropped - - -def flip_right_to_left_hand(keypoints3D, flip_right): - """ Flip right hend coordinates to left hand coordinates. - # Arguments - canonical_keypoints3D: Numpy array of shape (num_keypoints, 3). - flip_right: boolean value. - - # Returns - canonical_keypoints3D_left: Numpy array of shape (num_keypoints, 3). - """ - keypoints3D_mirrored = np.stack([keypoints3D[:, 0], keypoints3D[:, 1], - -keypoints3D[:, 2]], -1) - keypoints3D_left = np.where(flip_right, keypoints3D_mirrored, keypoints3D) - return keypoints3D_left - - -def extract_dominant_hand_visibility(keypoint_visibility, dominant_hand): - """ Extract Visibility mask for dominant hand. - # Look Later with Octavio - # Arguments - keypoint_visibility: Numpy array of shape (num_keypoints, 1). - dominant_hand: List of size (2). - - # Returns - keypoint_visibility_21: Numpy array of shape (num_keypoints, 1). - """ - keypoint_visibility_left = keypoint_visibility[:LEFT_PINKY_TIP] - keypoint_visibility_right = keypoint_visibility[RIGHT_WRIST:RIGHT_PINKY_TIP] - keypoint_visibility_21 = np.where(dominant_hand[:, 0], - keypoint_visibility_left, - keypoint_visibility_right) - return keypoint_visibility_21 - - -def extract_dominant_keypoints2D(keypoint_2D, dominant_hand): - """ Extract keypoint 2D. - # Look Later with Octavio - # Arguments - keypoint_2D: Numpy array of shape (num_keypoints, 1). - dominant_hand: List of size (2) with booleans. - - # Returns - keypoint_visibility_2D_21: Numpy array of shape (num_keypoints, 1). - """ - keypoint_visibility_left = keypoint_2D[:LEFT_PINKY_TIP, :] - keypoint_visibility_right = keypoint_2D[RIGHT_WRIST:RIGHT_PINKY_TIP, :] - keypoint_visibility_2D_21 = np.where( - dominant_hand[:, :2], keypoint_visibility_left, - keypoint_visibility_right) - return keypoint_visibility_2D_21 - - -def extract_keypoint2D_limits(uv_coordinates, scoremap_size): - """ Limit keypoint coordinates to scoremap size , - # Arguments - uv_coordinates: Numpy array of shape (num_keypoints, 1). - scoremap_size: List of size (2). - - # Returns - keypoint_limits: Numpy array of shape (num_keypoints, 1). - """ - scoremap_height, scoremap_width = scoremap_size - x_lower_limits = np.less(uv_coordinates[:, 0], scoremap_height - 1) - x_upper_limits = np.greater(uv_coordinates[:, 0], 0) - x_limits = np.logical_and(x_lower_limits, x_upper_limits) - - y_lower_limits = np.less(uv_coordinates[:, 1], scoremap_width - 1) - y_upper_limits = np.greater(uv_coordinates[:, 1], 0) - y_limits = np.logical_and(y_lower_limits, y_upper_limits) - - keypoint_limits_mask = np.logical_and(x_limits, y_limits) - return keypoint_limits_mask - - -def get_keypoints_mask(validity_mask, uv_coordinates, scoremap_size, - validity_score=0.5): - """ Extract Visibility mask for dominant hand. - # Add in dataset README the difference between seg and vis - # Arguments - validity_mask: Int value. - uv_coordinates: Numpy array of shape (num_keypoints, 1). - scoremap_size: List of size (2). - - # Returns - keypoint_limits: Numpy array of shape (num_keypoints, 1). - """ - validity_mask = np.squeeze(validity_mask) - keypoint_validity = np.greater(validity_mask, validity_score) - keypoint_limits = extract_keypoint2D_limits(uv_coordinates, scoremap_size) - keypooints_mask = np.logical_and(keypoint_validity, keypoint_limits) - return keypooints_mask - - -def get_keypoint_limits(uv_coordinates, scoremap_size): - """ Extract X and Y limits. - # Arguments - uv_coordinates: Numpy array of shape (num_keypoints, 2). - scoremap_size: List of size (2). - - # Returns - X_limits: Numpy array of shape (num_keypoints, 1). - Y_limits: Numpy array of shape (num_keypoints, 1). - """ - shape = uv_coordinates.shape - scoremap_height, scoremap_width = scoremap_size - - x_range = np.expand_dims(np.arange(scoremap_height), 1) - x_coordinates = np.tile(x_range, [1, scoremap_width]) - x_coordinates.reshape((scoremap_height, scoremap_width)) - x_coordinates = np.expand_dims(x_coordinates, -1) - x_coordinates = np.tile(x_coordinates, [1, 1, shape[0]]) - x_limits = x_coordinates - uv_coordinates[:, 0].astype('float64') - - y_range = np.expand_dims(np.arange(scoremap_width), 0) - y_coordinates = np.tile(y_range, [scoremap_height, 1]) - y_coordinates.reshape((scoremap_height, scoremap_width)) - y_coordinates = np.expand_dims(y_coordinates, -1) - y_coordinates = np.tile(y_coordinates, [1, 1, shape[0]]) - y_limits = y_coordinates - uv_coordinates[:, 1].astype('float64') - - return x_limits, y_limits - - -def create_gaussian_map(uv_coordinates, scoremap_size, sigma, validity_mask): - """ Generate Gaussian maps based on keypoints in Image coordinates. - # Arguments - uv_coordinates: Numpy array of shape (num_keypoints, 1). - scoremap_size: List of size (2). - sigma: Integer value. - validity_mask: Integer value. - - # Returns - scoremap: Numpy array of shape (crop_size, crop-size). - """ - keypoints_mask = get_keypoints_mask(validity_mask, uv_coordinates, - scoremap_size) - x_limits, y_limits = get_keypoint_limits(uv_coordinates, scoremap_size) - squared_distance = np.square(x_limits) + np.square(y_limits) - scoremap = np.exp(-squared_distance / np.square(sigma)) * keypoints_mask - return scoremap - - -def extract_keypoints_uv_coordinates(shape): - """ Generate X and Y mesh. - # Rename to best name - # Arguments - shape: tuple of size (3). - - # Returns - X: Numpy array of shape (1, crop_size). - Y: Numpy array of shape (crop_size, 1). - """ - crop_size_height, crop_size_width = shape[0], shape[1] - x_range = np.expand_dims(np.arange(crop_size_height), 1) - y_range = np.expand_dims(np.arange(crop_size_width), 0) - x_coordinates = np.tile(x_range, [1, crop_size_width]) - y_coordinates = np.tile(y_range, [crop_size_height, 1]) - return x_coordinates, y_coordinates - - -def get_bounding_box(X_masked, Y_masked): - """ Get Bounding Box. - - # Arguments - X_masked: tuple of size (crop_size, 1). - Y_masked: tuple of size (crop_size, 1). - - # Returns - bounding_box: List of length (4). - """ - x_min, x_max = np.min(X_masked), np.max(X_masked) - y_min, y_max = np.min(Y_masked), np.max(Y_masked) - bounding_box = np.array([x_min, y_min, x_max, y_max]) - return bounding_box - - -def get_crop_center(box_coordinates): - """ Extract Center. - # Arguments - box_coordinates: List of length 4. - center_list: List of length batch_size. - - # Returns - center_list: List of length batch_size. - """ - x_min, x_max = box_coordinates[0], box_coordinates[2] - y_min, y_max = box_coordinates[1], box_coordinates[3] - center_x = 0.5 * (x_min + x_max) - center_y = 0.5 * (y_min + y_max) - center = np.stack([center_x, center_y], 0) - return center - - -def get_crop_size(box_coordinates): - """ Extract Crop. - - # Arguments - xy_limit: List of length 4. - crop_size_list: List of length batch_size. - - # Returns - crop_size_list: List of length batch_size. - """ - x_max, x_min = box_coordinates[2], box_coordinates[0] - y_max, y_min = box_coordinates[3], box_coordinates[1] - crop_size_x = x_max - x_min - crop_size_y = y_max - y_min - crop_maximum_value = np.maximum(crop_size_x, crop_size_y) - crop_size = np.expand_dims(crop_maximum_value, 0) - return crop_size - - -# RESTART_LINE -def get_bounding_box_features(X, Y, binary_class_mask): - """ Extract Crop. - - # Arguments - X: Numpy array of size (num_keypoints, 1). - Y: Numpy array of size (num_keypoints, 1). - binary_class_mask: Numpy array of size (image_size, image_size). - shape: Tuple of lenth (3). - - # Returns - bounding_box_list: List of length batch_size. - center_list: List of length batch_size. - crop_size_list: List of length batch_size. - """ - X_masked = X[binary_class_mask] - Y_masked = Y[binary_class_mask] - bounding_box = get_bounding_box(X_masked, Y_masked) - center = get_crop_center(bounding_box) - crop_size = get_crop_size(bounding_box) - bounding_box = [bounding_box[1],bounding_box[0],bounding_box[3], - bounding_box[2]] - return bounding_box, center, crop_size - - -def extract_bounding_box(binary_class_mask): - """ Extract Bounding Box from Segmentation mask. - - # Arguments - binary_class_mask: Numpy array of size (image_size, image_size). - - # Returns - bounding_box: Numpy array of shape (batch_size, 4). - center: Numpy array of shape (batch_size, 2). - crop_size: Numpy array of shape (batch_size, 1). - """ - binary_class_mask = binary_class_mask.astype('int') - binary_class_mask = np.equal(binary_class_mask, 1) - binary_class_mask = np.squeeze(binary_class_mask, axis=-1) - shape = binary_class_mask.shape - coordinates_x, coordinates_y = extract_keypoints_uv_coordinates(shape) - bounding_box, center, crop_size = get_bounding_box_features( - coordinates_x, coordinates_y, binary_class_mask) - return center, bounding_box, crop_size - - -def get_box_coordinates(center, size, shape): - """ Extract Bounding Box from center and size of cropped image. - - # Arguments - location: Tuple of length (2). - size: Tuple of length (2). - shape: Typle of length (3). - - # Returns - boxes: Numpy array of shape (batch_size, 4). - """ - height, width = shape[0], shape[1] - x_min = center[0] - size // 2 - y_min = center[1] - size // 2 - x_max, y_max = x_min + size, y_min + size - x_min, x_max = x_min / height, x_max / height - y_min, y_max = y_min / width, y_max / width - boxes = [x_min, y_min, x_max, y_max] - return boxes - - -def crop_image_from_coordinates(image, crop_center, crop_size, scale=1.0): - """ Crop Image from Center and crop size. - - # Arguments - Image: Numpy array of shape (image_size, image_size, 3). - crop_center: Tuple of length (2). - crop_size: Float. - Scale: Float. - - # Returns - Image_cropped: Numpy array of shape (crop_size, crop-size). - """ - image = np.squeeze(image, 0) - height, width, channels = image.shape - scale = np.reshape(scale, [-1]) - crop_location = crop_center.astype(np.float) - crop_size_scaled = crop_size / scale - boxes = get_box_coordinates(crop_location, crop_size_scaled, - image.shape) - x_min, y_min, x_max, y_max = boxes - box = [int(x_min * width), - int(y_min * height), - int(x_max * width), - int(y_max * height)] - image_cropped = crop_image(image, box) - image_cropped = resize_image(image_cropped, (crop_size, crop_size)) - return image_cropped - - -def crop_image(image, crop_box): - """Crop image. - - # Arguments - image: Numpy array. - crop_box: List of four ints. - - # Returns - Numpy array. - """ - cropped_image = image[crop_box[0]:crop_box[2], crop_box[1]:crop_box[3], :] - return cropped_image - - -def extract_keypoint_index(scoremap): - """ Extract Scoremap. - - # Arguments - scoremap: Numpy aray of shape (crop_size, crop-size). - - # Returns - max_index_vec: List of Max Indices. - """ - keypoint_index = np.argmax(scoremap) - return keypoint_index - - -def extract_keypoints_XY(x_vector, y_vector, maximum_indices): - """ Extract Keypoint X,Y coordinates. - # Arguments - x_vector: Numpy array of shape (batch_size, 1). - y_vector: Numpy array of shape (batch_size, 1). - maximum_indices: Numpy array of shape (batch_size, 1). - batch_size: Integer Value. - - # Returns - keypoints2D: Numpy array of shape (num_keypoints, 1). - """ - keypoints2D = list() - x_location = np.reshape(x_vector[maximum_indices], [1]) - y_location = np.reshape(y_vector[maximum_indices], [1]) - keypoints2D.append(np.concatenate([x_location, y_location], 0)) - keypoints2D = np.stack(keypoints2D, 0) - return keypoints2D - - -def create_2D_grids(shape): - """ Create 2D Grids. - - # Arguments - shape: Tuple of length 2. - - # Returns - x_vec: Numpy array. - y_vec: Numpy array. - """ - height, width = shape - x_range = np.expand_dims(np.arange(height), 1) - y_range = np.expand_dims(np.arange(width), 0) - X = np.tile(x_range, [1, width]) - Y = np.tile(y_range, [height, 1]) - X = np.reshape(X, [-1]) - Y = np.reshape(Y, [-1]) - return X, Y - - -def find_max_location(scoremap): - """ Returns the coordinates of the given scoremap with maximum value. - - # Arguments - scoremap: Numpy array of shape (crop_size, crop-size). - - # Returns - keypoints2D: numpy array of shape (num_keypoints, 1). - """ - shape = scoremap.shape - x_grid, y_grid = create_2D_grids(shape) - keypoint_index = extract_keypoint_index(scoremap) - keypoints2D = extract_keypoints_XY(x_grid, y_grid, keypoint_index) - return keypoints2D - - -def create_score_maps(keypoint_2D, keypoint_visibility, image_size, - crop_size, variance, crop_image=True): - """ Create gaussian maps for keypoint representation. - # Arguments - keypoint_2D: Numpy array of shape (num_keypoints, 2). - keypoint_visibility: Numpy array of shape (num_keypoints, 1). - image_size: Tuple of length (3). - crop_size: Typle of length (2). - variance: Float value. - crop_image: Boolean value. - - # Returns - scoremap: numpy array of size (num_keypoints, crop_size, crop-size). - """ - keypoint_uv = np.stack([keypoint_2D[:, 1], keypoint_2D[:, 0]], -1) - scoremap_size = image_size[0:2] - if crop_image: - scoremap_size = (crop_size, crop_size) - scoremap = create_gaussian_map(keypoint_uv, scoremap_size, variance, - keypoint_visibility) # Check if visibility - # can be removed - return scoremap - - -def extract_2D_keypoints(visibility_mask): - """ Extract 2D keypoints. - - # Arguments - visibility_mask: Numpy array of size (num_keypoints, 3). - - # Returns - keypoints2D: numpy array of size (num_keypoints, 1). - keypoints_visibility_mask: numpy array of size (num_keypoints, 1). - """ - keypoints2D = visibility_mask[:, :2] - keypoints_visibility_mask = visibility_mask[:, 2] == 1 - return keypoints2D, keypoints_visibility_mask - - -def extract_keypoints(scoremaps): - """ Performs detection per scoremap for the hands keypoints. - - # Arguments - scoremaps: Numpy array of size (crop_size, crop-size, num_keypoints). - - # Returns - keypoint_coords: numpy array of size (num_keypoints, 1). - """ - height, width, num_keypoints = scoremaps.shape - keypoint2D = np.zeros((num_keypoints, 2)) - for keypoint_arg in range(num_keypoints): - keypoint_scoremap = np.argmax(scoremaps[:, :, keypoint_arg]) - coordinates = np.unravel_index(keypoint_scoremap, (height, width)) - v, u = coordinates - keypoint2D[keypoint_arg, 0] = u - keypoint2D[keypoint_arg, 1] = v - return keypoint2D - - -def transform_visibility_mask(visibility_mask): - """ Data Pre-processing step: Transform Visibility mask to palm coordinates - from wrist coordinates. - - # Arguments - visibility_mask: Numpy array with shape `(42, 1)`. - - # Returns - visibility_mask: Numpy array with shape `(42, 1)`. - """ - visibility_left_root = visibility_mask[LEFT_WRIST] - visibility_left_aligned = visibility_mask[LEFT_MIDDLE_METACARPAL] - visibility_right_root = visibility_mask[RIGHT_WRIST] - visibility_right_aligned = visibility_mask[RIGHT_MIDDLE_METACARPAL] - - palm_visibility_left = np.logical_or( - visibility_left_root, visibility_left_aligned) - palm_visibility_right = np.logical_or( - visibility_right_root, visibility_right_aligned) - - palm_visibility_left = np.expand_dims(palm_visibility_left, 0) - palm_visibility_right = np.expand_dims(palm_visibility_right, 0) - - visibility_mask = np.concatenate( - [palm_visibility_left, visibility_mask[LEFT_WRIST: LEFT_PINKY_TIP], - palm_visibility_right, visibility_mask[RIGHT_WRIST: RIGHT_PINKY_TIP]], - 0) - return visibility_mask - - -def keypoints_to_palm_coordinates(keypoints): - """ Data Pre-processing step: Transform keypoints to palm coordinates - from wrist coordinates. - # Arguments - keypoints: Numpy array with shape `(42, 3)` for 3D keypoints. - Numpy array with shape `(42, 2)` for 2D keypoints. - - # Returns - keypoints: Numpy array with shape `(42, 3)` for 3D keypoints. - Numpy array with shape `(42, 2)` for 2D keypoints. - """ - palm_coordinates_left = 0.5 * (keypoints[LEFT_WRIST, :] + - keypoints[LEFT_MIDDLE_METACARPAL, :]) - palm_coordinates_right = 0.5 * (keypoints[RIGHT_WRIST, :] + - keypoints[RIGHT_MIDDLE_METACARPAL, :]) - - palm_coordinates_left = np.expand_dims(palm_coordinates_left, 0) - palm_coordinates_right = np.expand_dims(palm_coordinates_right, 0) - - keypoints = np.concatenate( - [palm_coordinates_left, keypoints[LEFT_WRIST:LEFT_PINKY_TIP, :], - palm_coordinates_right, keypoints[RIGHT_WRIST:RIGHT_PINKY_TIP, :]], 0) - - return keypoints - - -def get_transform_to_bone_frame(keypoints3D, bone_index): - """ Transform the keypoints in camera image frame to index keypoint frame. - - # Arguments - keypoints3D: numpy array of shape (num_keypoints, 3). - bone_index: int value of range [0, num_keypoints]. - - # Returns - transformation_parameters: multiple values representing all the - euclidean parameters to calculate transformation matrix. - """ - index_keypoint = np.expand_dims(keypoints3D[bone_index, :], 1) - translated_keypoint3D = to_homogeneous_coordinates(index_keypoint) - translation_matrix = build_translation_matrix_SE3(np.zeros(3)) - translation_matrix = np.expand_dims(translation_matrix, 0) - transformation_parameters = get_transformation_parameters( - translated_keypoint3D, translation_matrix) - return transformation_parameters - - -def transform_to_keypoint_coordinates(transformation_matrix, keypoint3D): - """ Transform to keypoint (root/child) frame. - - # Arguments - transformation_matrix: numpy array of shape (4, 4). - keypoint3D: numpy array of shape (3, ). - - # Returns - keypoint_coordinates: Numpy array of size (3, ). - """ - keypoint3D = np.expand_dims(keypoint3D, 1) - keypoint3D = to_homogeneous_coordinates(keypoint3D) - keypoint_coordinates = np.matmul(transformation_matrix, keypoint3D) - return keypoint_coordinates - - -def apply_root_transformations(keypoints3D, bone_index): - """ Transform all keypoints to root keypoint frame. - - # Arguments - keypoints3D: numpy array of shape (num_keypoints, 3). - bone_index: int value of range [0, num_keypoints]. - - # Returns - relative_coordinates: numpy array of shape (num_keypoints, 3, 1). - transformations: placeholder for transformation - (num_keypoints, 4, 4, 1). - """ - transformation_parameters = get_transform_to_bone_frame(keypoints3D, - bone_index) - - length_from_origin = transformation_parameters[0] - rotation_angle_x = transformation_parameters[1] - rotation_angle_y = transformation_parameters[2] - rotated_keypoints = transformation_parameters[3] - - relative_coordinate = np.stack([length_from_origin, rotation_angle_x, - rotation_angle_y], 0) - return rotated_keypoints, relative_coordinate - - -def get_articulation_angles(child_keypoint_coordinates, - parent_keypoint_coordinates, transformation_matrix): - """ Calculate Articulation Angles. - - # Arguments - local_child_coordinates: Child keypoint coordinates (1, 3). - local_child_coordinates: Parent keypoint coordinates (1, 3). - transformation_matrix: Numpy array of shape (4, 4). - - # Returns - transformation_parameters: parameters for transformation to - local frame. - """ - delta_vector = child_keypoint_coordinates - parent_keypoint_coordinates - delta_vector = to_homogeneous_coordinates( - np.expand_dims(delta_vector[:, :3], 1)) - transformation_angles = get_transform_to_bone_frame( - delta_vector, transformation_matrix) - return transformation_angles - - -def apply_child_transformations(keypoints3D, bone_index, parent_index, - transformations): - """ Calculate Child coordinate to Parent coordinate. - - # Arguments - keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3). - bone_index: Index of current bone keypoint, Numpy array of shape (1). - parent_index: Index of root keypoint, Numpy array of shape (1). - relative_coordinates: place holder for relative_coordinates. - transformations: placeholder for transformations. - - # Returns - rotated_keypoints: place holder for relative_coordinates. - transformation_parameters: placeholder for transformations. - """ - transformation_matrix = transformations[parent_index] - parent_keypoint_coordinates = transform_to_keypoint_coordinates( - transformation_matrix, keypoints3D[parent_index, :]) - child_keypoint_coordinates = transform_to_keypoint_coordinates( - transformation_matrix, keypoints3D[bone_index, :]) - transformation_parameters = get_articulation_angles( - parent_keypoint_coordinates, child_keypoint_coordinates, - transformation_matrix) - length_from_origin = transformation_parameters[0] - rotation_angle_x, rotation_angle_y = transformation_parameters[1:3] - rotated_keypoints = transformation_parameters[3] - transformation_parameters = np.stack([length_from_origin, rotation_angle_x, - rotation_angle_y]) - return rotated_keypoints, transformation_parameters - - -def keypoints_to_root_frame(keypoints3D): - """ Convert keypoints to root keypoint coordinates. - - # Arguments - keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3). - - # Returns - relative_coordinates: keypoints in root keypoint coordinate frame. - """ - transformations = [None] * len(KINEMATIC_CHAIN_LIST) - relative_coordinates = np.zeros(len(KINEMATIC_CHAIN_LIST)) - for bone_index in KINEMATIC_CHAIN_LIST: - parent_index = KINEMATIC_CHAIN_DICT[bone_index] - if parent_index == 'root': - transformation, relative_coordinate = apply_root_transformations( - keypoints3D, bone_index) - else: - transformation, relative_coordinate = apply_child_transformations( - keypoints3D, bone_index, parent_index, transformations) - transformations[bone_index] = transformation - relative_coordinates[bone_index] = relative_coordinate - return relative_coordinates - - -def keypoint_to_root_frame(keypoints3D, num_keypoints=21): - """ Convert keypoints to root keypoint coordinates. - # Arguments - keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3). - - # Returns - key_point_relative_frame: keypoints in root keypoint coordinate frame. - """ - keypoints3D = keypoints3D.reshape([num_keypoints, 3]) - relative_coordinates = keypoints_to_root_frame(keypoints3D) - key_point_relative_frame = np.stack(relative_coordinates, 1) - key_point_relative_frame = np.squeeze(key_point_relative_frame) - return key_point_relative_frame - - -def get_keypoints_z_rotation(keypoints3D, keypoint): - """ Rotate Keypoints along z-axis. - - # Arguments - keypoint: Keypoint to whose frame transformation is to - be done, Numpy array of shape (1, 3). - keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3). - - # Returns - reference_keypoint_z_rotation: Reference keypoint after rotation. - resultant_keypoints3D: keypoints after rotation. - rotation_matrix_z: Rotation matrix. - """ - alpha = np.arctan2(keypoint[0], keypoint[1]) - rotation_matrix = build_rotation_matrix_z(alpha) - keypoints3D = np.matmul(keypoints3D.T, rotation_matrix) - keypoint = keypoints3D[LEFT_MIDDLE_METACARPAL, :] - return keypoint, rotation_matrix, keypoints3D - - -def get_keypoints_x_rotation(keypoints3D, keypoint): - """ Rotate Keypoints along x-axis. - - # Arguments - keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3). - keypoint: Numpy array of shape (1, 3). - - # Returns - keypoint: Resultant reference keypoint after rotation, Numpy array of - shape (1, 3). - resultant_keypoints3D: keypoints after rotation. - rotation_matrix_x: Rotation matrix along x-axis. - """ - beta = -np.arctan2(keypoint[2], keypoint[1]) - rotation_matrix = build_rotation_matrix_x(beta + np.pi) - keypoints3D = np.matmul(keypoints3D, rotation_matrix) - keypoint = keypoints3D[LEFT_PINKY_TIP, :] - return keypoint, rotation_matrix, keypoints3D - - -def get_keypoints_y_rotation(keypoints3D, keypoint): - """ Rotate Keypoints along y-axis. - - # Arguments - keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3). - reference_keypoint: keypoint, Numpy array of shape (1, 3). - - # Returns - resultant_keypoint: Resultant reference keypoint after rotation. - resultant_keypoints3D: keypoints after rotation along Y-axis. - rotation_matrix_y: Rotation matrix along x-axis. - """ - gamma = np.arctan2(keypoint[2], keypoint[0]) - rotation_matrix = build_rotation_matrix_y(gamma) - keypoints3D = np.matmul(keypoints3D, rotation_matrix) - keypoint = keypoints3D[LEFT_PINKY_TIP, :] - return keypoint, rotation_matrix, keypoints3D - - -def canonical_transformations_on_keypoints(keypoints3D): # rename properly - # RE_CHECK - """ Transform Keypoints to canonical coordinates. - - # Arguments - keypoints3D: Keypoints, Numpy array of shape (1, num_keypoints, 3). - - # Returns - transformed_keypoints3D: Resultant keypoint after transformation. - final_rotation_matrix: Final transformation matrix. - """ - reference_keypoint = np.expand_dims(keypoints3D[:, LEFT_WRIST], 1) - keypoints3D = keypoints3D - reference_keypoint - keypoint = keypoints3D[:, LEFT_MIDDLE_METACARPAL] - final_rotation_matrix = np.ones((3, 3)) - apply_rotations = [get_keypoints_z_rotation, get_keypoints_x_rotation, - get_keypoints_y_rotation] - for function in apply_rotations: - keypoint, rotation_matrix, keypoints3D = function(keypoints3D, keypoint) - final_rotation_matrix = np.matmul(final_rotation_matrix, - rotation_matrix) - return np.squeeze(keypoints3D), np.squeeze(final_rotation_matrix) - - -def get_scale_matrix(scale, min_scale=1.0, max_scale=10.0): - """ calculate scale matrix. - - # Arguments - scale: Int value. - - # Returns - scale_original: Int value - scale_matrix: Numpy array of shape (3, 3) - """ - scale_original = np.minimum(np.maximum(scale, min_scale), max_scale) - scale_matrix = np.diag([scale_original, scale_original, 1]) - return scale_original, scale_matrix - - -def get_scale_translation_matrix(crop_center, crop_size, scale): - """ calculate scale translation matrix. - - # Arguments - crop_center: Numpy array of shape (2). - crop_size: Int value. - scale: Int value. - - # Returns - translation_matrix: Numpy array of shape (3, 3). - """ - crop_size_halved = crop_size // 2 - translated_center_x = (crop_center[0] * scale) - crop_size_halved - translated_center_y = (crop_center[1] * scale) - crop_size_halved - translation_matrix = np.diag( - [-translated_center_x, -translated_center_y, 1]) - return translation_matrix - - -def get_y_axis_rotated_keypoints(keypoint3D): - """ Rotate keypoints along y-axis - # Arguments - keypoint3D: Numpy array of shape (num_keypoints, 3). - - # Returns - keypoint3D: Numpy array of shape (num_keypoints, 3). - affine_rotation_matrix_y: Numpy array of shape (3, 3). - gamma: Numpy array of shape (1, ). - """ - gamma = np.arctan2(keypoint3D[0], keypoint3D[2]) - rotation_matrix_y = build_rotation_matrix_y(gamma) - affine_rotation_matrix_y = build_affine_matrix(rotation_matrix_y) - keypoint3D = np.matmul(affine_rotation_matrix_y, keypoint3D) - return keypoint3D, affine_rotation_matrix_y, gamma - - -def get_x_axis_rotated_keypoints(keypoint3D, length_from_origin, - rotation_matrix): - """ Rotate keypoints along x-axis - - # Arguments - keypoint3D: Numpy array of shape (num_keypoints, 3). - length_from_origin: Numpy array of shape (1, ). - rotation_matrix: Numpy array of shape (3, 3). - - # Returns - keypoint3D: Numpy array of shape (num_keypoints, 3). - affine_rotation_matrix_y: Numpy array of shape (3, 3). - gamma: Numpy array of shape (1, ). - """ - alpha = np.arctan2(-keypoint3D[1], keypoint3D[2]) - rotation_matrix_x = build_rotation_matrix_x(alpha) - affine_rotation_matrix_x = build_affine_matrix(rotation_matrix_x) - translation_matrix_to_origin = build_translation_matrix_SE3( - -length_from_origin) - translation_matrix_to_origin = np.expand_dims(translation_matrix_to_origin, - 0) - rotation_matrix_xy = np.matmul(affine_rotation_matrix_x, rotation_matrix) - keypoint3D = np.matmul(translation_matrix_to_origin, rotation_matrix_xy) - return keypoint3D, alpha - - -def get_transformation_parameters(keypoint3D, transformation_matrix): - """ Calculate transformation parameters. - - # Arguments - keypoint3D: Numpy array of shape (num_keypoints, 3). - transformation_matrix: Numpy array of shape (4, 4). - - # Returns - length_from_origin: float value. - alpha: float value. Rotation angle along X-axis. - gamma: float value. Rotation angle along X-axis. - final_transformation_matrix: Numpy array of shape (4, 4). - """ - length_from_origin = np.linalg.norm(keypoint3D) - - keypoint_parameters = get_y_axis_rotated_keypoints(keypoint3D) - keypoint3D_rotated_y, affine_matrix, rotation_angle_y = keypoint_parameters - - keypoint3D_rotated_x, rotation_angle_x = get_x_axis_rotated_keypoints( - keypoint3D_rotated_y, length_from_origin, affine_matrix) - - rotated_keypoints = np.matmul(keypoint3D_rotated_x, transformation_matrix) - transformation_parameters = (length_from_origin, rotation_angle_x, - rotation_angle_y, rotated_keypoints) - - return transformation_parameters - - -def transform_cropped_keypoints(cropped_keypoints, centers, scale, crop_size): - """ Transforms the cropped coordinates to the original image space. - - # Arguments - cropped_coords: Tensor (batch x num_keypoints x 3): Estimated hand - coordinates in the cropped space. - centers: Tensor (batch x 1): Repeated coordinates of the - center of the hand in global image space. - scale: Tensor (batch x 1): Scaling factor between the original image - and the cropped image. - crop_size: int: Size of the crop. - - # Returns - keypoints: Tensor (batch x num_keypoints x 3): Transformed coordinates. - """ - cropped_keypoints[:, [0, 1]] = cropped_keypoints[:, [1, 0]] - keypoints = np.copy(cropped_keypoints) - keypoints = keypoints - (crop_size // 2) - keypoints = keypoints / scale - keypoints = keypoints + centers - keypoints[:, [0, 1]] = keypoints[:, [1, 0]] - return keypoints - - -def canonical_to_relative_coordinates(num_keypoints, canonical_coordinates, - rotation_matrix, hand_side): - hand_arg = np.argmax(hand_side, 1) - hand_side_mask = np.equal(hand_arg, 1) - hand_side_mask = np.reshape(hand_side_mask, [-1, 1]) - hand_side_mask_3D = np.tile(hand_side_mask, [num_keypoints, 3]) - keypoint_flipped = flip_right_to_left_hand(canonical_coordinates, - hand_side_mask_3D) - relative_keypoints = np.matmul(keypoint_flipped, rotation_matrix) - return relative_keypoints diff --git a/examples/hand_pose_estimation/backend_standard.py b/examples/hand_pose_estimation/backend_standard.py deleted file mode 100644 index 0a7932afa..000000000 --- a/examples/hand_pose_estimation/backend_standard.py +++ /dev/null @@ -1,65 +0,0 @@ -import cv2 -import numpy as np - - -def wrap_as_dictionary(keys, values): - """ Wrap values with respective keys into a dictionary. - - # Arguments - keys: List of strings. - Values: List. - - # Returns - output: Dictionary. - """ - output = dict(zip(keys, values)) - return output - - -def merge_dictionaries(dicts): - """ Merge multiple dictionaries. - - # Arguments - dicts: List of dictionaries. - - # Returns - result: Dictionary. - """ - result = {} - for dict in dicts: - result.update(dict) - return result - - -def resize_image_with_linear_interpolation(image, size): - """Resize image using nearest neighbors interpolation. - - # Arguments - image: Numpy array. - size: List of two ints. - - # Returns - Numpy array. - """ - if(type(image) != np.ndarray): - raise ValueError( - 'Recieved Image is not of type numpy array', type(image)) - else: - return cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) - - -def transpose_array(array): - """Resize image using nearest neighbors interpolation. - - # Arguments - image: Numpy array. - size: List of two ints. - - # Returns - Numpy array. - """ - if(type(array) != np.ndarray): - raise ValueError( - 'Recieved Input is not of type numpy array', type(array)) - else: - return array.T diff --git a/examples/hand_pose_estimation/demo.py b/examples/hand_pose_estimation/demo.py old mode 100755 new mode 100644 index 0897e0eff..00b9ed88e --- a/examples/hand_pose_estimation/demo.py +++ b/examples/hand_pose_estimation/demo.py @@ -1,23 +1,15 @@ import argparse +from paz.applications import MinimalHandPoseEstimation +from paz.backend.camera import VideoPlayer +from paz.backend.camera import Camera -from HandPoseEstimation import HandSegmentationNet, PosePriorNet, PoseNet -from HandPoseEstimation import ViewPointNet -from pipelines import DetectHandKeypoints -from paz.backend.camera import Camera, VideoPlayer -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') parser.add_argument('-c', '--camera_id', type=int, default=0, help='Camera device ID') args = parser.parse_args() -use_pretrained = True -HandSegNet = HandSegmentationNet() -HandPoseNet = PoseNet() -HandPosePriorNet = PosePriorNet() -HandViewPointNet = ViewPointNet() - -pipeline = DetectHandKeypoints(HandSegNet, HandPoseNet, HandPosePriorNet, - HandViewPointNet) +pipeline = MinimalHandPoseEstimation(right_hand=False) camera = Camera(args.camera_id) player = VideoPlayer((640, 480), pipeline, camera) player.run() diff --git a/examples/minimal_hand/demo3D.py b/examples/hand_pose_estimation/demo3D.py similarity index 89% rename from examples/minimal_hand/demo3D.py rename to examples/hand_pose_estimation/demo3D.py index 94243b238..7d0556a65 100644 --- a/examples/minimal_hand/demo3D.py +++ b/examples/hand_pose_estimation/demo3D.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt from matplotlib.animation import FuncAnimation from paz.backend.camera import Camera, VideoPlayer -from paz.applications import MinimalHandPoseEstimation +from paz.applications import SSD512MinimalHandPose from paz.backend.image import resize_image, show_image from paz.datasets import MINIMAL_HAND_CONFIG @@ -13,7 +13,7 @@ help='Camera device ID') args = parser.parse_args() -pipeline = MinimalHandPoseEstimation(right_hand=False) +pipeline = SSD512MinimalHandPose(right_hand=False, offsets=[0.5, 0.5]) camera = Camera(args.camera_id) player = VideoPlayer((640, 480), pipeline, camera) @@ -62,13 +62,16 @@ def wrapped_animate(i): show_image(image, 'inference', wait=False) keypoints3D = output['keypoints3D'] + if len(keypoints3D) == 0: + return + keypoints3D = keypoints3D[0] # TAKE ONLY THE FIRST PREDICTION xs, ys, zs = np.split(keypoints3D, 3, axis=1) plt.cla() ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') - ax.scatter3D(xs, ys, zs, c = joint_colors) + ax.scatter3D(xs, ys, zs, c=joint_colors) plot_3D_keypoints_link(ax, keypoints3D, link_args, link_orders, link_colors) return wrapped_animate diff --git a/examples/minimal_hand/demo_image.py b/examples/hand_pose_estimation/demo_image.py similarity index 100% rename from examples/minimal_hand/demo_image.py rename to examples/hand_pose_estimation/demo_image.py diff --git a/examples/hand_pose_estimation/hand_keypoints_loader.py b/examples/hand_pose_estimation/hand_keypoints_loader.py deleted file mode 100644 index e66a3ae62..000000000 --- a/examples/hand_pose_estimation/hand_keypoints_loader.py +++ /dev/null @@ -1,45 +0,0 @@ -import glob -import pickle - -from paz.abstract import Loader - - -class RenderedHandLoader(Loader): - def __init__(self, path, split='train'): - super().__init__(path, split, None, 'HandPoseLoader') - self.path = path - split_to_folder = {'train': 'training', 'val': 'evaluation', - 'test': 'testing'} - self.folder = split_to_folder[split] - - def _load_annotation(self, label_path): - with open(label_path, 'rb') as file: - annotations_all = pickle.load(file) - return annotations_all - - def to_list_of_dictionaries(self, hands, segmentation_labels=None, - annotations=None): - dataset = [] - for hand_arg in range(len(hands)): - sample = dict() - sample['image_path'] = hands[hand_arg] - sample['segmentation_label'] = segmentation_labels[hand_arg] - sample['annotations'] = annotations[hand_arg] - dataset.append(sample) - return dataset - - def load_data(self): - images = sorted(glob.glob(self.path + self.folder + '/color/*.png')) - - if self.split == 'test': - dataset = self.to_list_of_dictionaries(images, None, None) - else: - segmentation_labels = sorted(glob.glob(self.path + self.folder + - '/mask/*.png')) - annotations = self._load_annotation(self.path + self.folder + - '/anno_{}.pickle'.format( - self.folder)) - dataset = self.to_list_of_dictionaries(images, segmentation_labels, - annotations) - - return dataset \ No newline at end of file diff --git a/examples/hand_pose_estimation/hand_tracking.py b/examples/hand_pose_estimation/hand_tracking.py new file mode 100644 index 000000000..a8f501301 --- /dev/null +++ b/examples/hand_pose_estimation/hand_tracking.py @@ -0,0 +1,33 @@ +import argparse +from paz.abstract import SequentialProcessor +from paz.backend.camera import VideoPlayer, Camera +from paz.applications import SSD512MinimalHandPose +from paz import processors as pr + + +parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') +parser.add_argument('-c', '--camera_id', type=int, default=0, + help='Camera device ID') +parser.add_argument('-HFOV', '--horizontal_field_of_view', type=float, + default=75, help='Horizontal field of view in degrees') +args = parser.parse_args() + +camera = Camera(args.camera_id) +camera.intrinsics_from_HFOV(args.horizontal_field_of_view) + + +class HandStateEstimation(SequentialProcessor): + def __init__(self, camera): + super(HandStateEstimation, self).__init__() + intro_topics = ['image', 'boxes2D', 'keypoints2D', 'keypoints3D'] + self.add(SSD512MinimalHandPose()) + self.add(pr.UnpackDictionary(intro_topics)) + self.add(pr.ControlMap( + pr.Translation3DFromBoxWidth(camera), [1], [4], {1: 1})) + outro_topics = intro_topics + ['translation3D'] + self.add(pr.WrapOutput(outro_topics)) + + +pipeline = HandStateEstimation(camera) +player = VideoPlayer((640, 480), pipeline, camera) +player.run() diff --git a/examples/minimal_hand/demo.py b/examples/hand_pose_estimation/is_open_demo.py similarity index 78% rename from examples/minimal_hand/demo.py rename to examples/hand_pose_estimation/is_open_demo.py index 00b9ed88e..4c751f411 100644 --- a/examples/minimal_hand/demo.py +++ b/examples/hand_pose_estimation/is_open_demo.py @@ -1,15 +1,14 @@ import argparse -from paz.applications import MinimalHandPoseEstimation from paz.backend.camera import VideoPlayer from paz.backend.camera import Camera - +from paz.applications import ClassifyHandClosure parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') parser.add_argument('-c', '--camera_id', type=int, default=0, help='Camera device ID') args = parser.parse_args() -pipeline = MinimalHandPoseEstimation(right_hand=False) +pipeline = ClassifyHandClosure(draw=True, right_hand=False) camera = Camera(args.camera_id) player = VideoPlayer((640, 480), pipeline, camera) player.run() diff --git a/examples/hand_pose_estimation/layer.py b/examples/hand_pose_estimation/layer.py deleted file mode 100644 index 86fbdb8db..000000000 --- a/examples/hand_pose_estimation/layer.py +++ /dev/null @@ -1,47 +0,0 @@ -import tensorflow as tf -from tensorflow.keras.layers import Layer -from backend_keypoints import find_max_location - - -class SegmentationDilation(Layer): - def __init__(self, filter_size=21): - super(SegmentationDilation, self).__init__() - self.filter_size = filter_size - filters = tf.ones((filter_size, filter_size, 1)) - self.kernel = filters / float(self.filter_size ** 2) - - def call(self, inputs): - segmentation_map_height, segmentation_map_width, channels = inputs.shape - scoremap_softmax = tf.nn.softmax(inputs) - scoremap_foreground = tf.reduce_max(scoremap_softmax[:, :, 1:], -1) - segmentationmap_foreground = tf.round(scoremap_foreground) - max_loc = find_max_location(scoremap_foreground) - - sparse_indices = tf.reshape(max_loc, [1, 2]) - - sparse_input = tf.SparseTensor( - dense_shape=[segmentation_map_height, segmentation_map_width], - values=[1.0], indices=sparse_indices) - - objectmap = tf.sparse.to_dense(sparse_input) - num_passes = max(segmentation_map_height, segmentation_map_width) // ( - self.filter_size // 2) - - for pass_count in range(num_passes): - objectmap = tf.reshape(objectmap, [1, segmentation_map_height, - segmentation_map_width, 1]) - - objectmap_dilated = tf.nn.dilation2d( - input=objectmap, filters=self.kernel, strides=[1, 1, 1, 1], - dilations=[1, 1, 1, 1], padding='SAME', data_format='NHWC') - - objectmap_dilated = tf.reshape(objectmap_dilated, - [segmentation_map_height, - segmentation_map_width]) - - objectmap = tf.round(tf.multiply(segmentationmap_foreground, - objectmap_dilated)) - - objectmap = tf.reshape(objectmap, [segmentation_map_height, - segmentation_map_width, 1]) - return objectmap.numpy() diff --git a/examples/hand_pose_estimation/pipelines.py b/examples/hand_pose_estimation/pipelines.py deleted file mode 100755 index e4c5b4718..000000000 --- a/examples/hand_pose_estimation/pipelines.py +++ /dev/null @@ -1,303 +0,0 @@ -import numpy as np - -from layer import SegmentationDilation -from paz import processors as pr -from paz.abstract import SequentialProcessor, Processor, Box2D -from processors_SE3 import CalculatePseudoInverse, RotationMatrixfromAxisAngles -from processors_SE3 import CanonicaltoRelativeFrame, KeypointstoPalmFrame -from processors_SE3 import GetCanonicalTransformation, TransformKeypoints -from processors_SE3 import TransformVisibilityMask, TransformtoRelativeFrame -from processors_keypoints import AdjustCropSize, CropImage -from processors_keypoints import CreateScoremaps, ExtractBoundingbox -from processors_keypoints import Extract2DKeypoints, ExtractHandsideandKeypoints -from processors_keypoints import ExtractDominantHandVisibility -from processors_keypoints import ExtractDominantKeypoints2D, CropImageFromMask -from processors_keypoints import ExtractHandmask, ExtractKeypoints -from processors_keypoints import FlipRightHandToLeftHand -from processors_keypoints import NormalizeKeypoints -from processors_standard import MergeDictionaries, ToOneHot, WrapToDictionary -from processors_standard import ResizeImageWithLinearInterpolation -from processors_standard import TransposeOfArray, ListToArray - - -class ExtractHandSegmentation(SequentialProcessor): - def __init__(self, size=320): - super(ExtractHandSegmentation, self).__init__() - self.add(pr.UnpackDictionary( - ['image', 'segmentation_label', 'annotations'])) - - preprocess_image = pr.SequentialProcessor( - [pr.LoadImage(), pr.ResizeImage((size, size))]) - - preprocess_segmentation_map = pr.SequentialProcessor( - [pr.LoadImage(), pr.ResizeImage((size, size)), ExtractHandmask()]) - - self.add(pr.ControlMap(preprocess_image, [0], [0])) - self.add(pr.ControlMap(preprocess_segmentation_map, [1], [1])) - self.add(pr.SequenceWrapper({0: {'image': [size, size, 3]}}, - {1: {'hand_mask': [size, size]}})) - - -class ExtractHandPose2D(Processor): - def __init__(self, size, image_size, crop_size, variance): - super(ExtractHandPose2D, self).__init__() - self.unwrap_inputs = pr.UnpackDictionary( - ['image', 'segmentation_label', 'annotations']) - self.preprocess_image = pr.SequentialProcessor( - [pr.LoadImage(), pr.ResizeImage((size, size))]) - - self.preprocess_segmentation_map = pr.SequentialProcessor( - [pr.LoadImage(), pr.ResizeImage((size, size)), ExtractHandmask()]) - self.extract_annotations = pr.UnpackDictionary(['xyz', 'uv_vis', 'K']) - self.extract_2D_keypoints = Extract2DKeypoints() - self.keypoints_to_palm = KeypointstoPalmFrame() - self.visibility_to_palm = TransformVisibilityMask() - self.extract_hand_side = ExtractHandsideandKeypoints() - - self.extract_visibility_dominant_hand = ExtractDominantHandVisibility() - self.create_scoremaps = CreateScoremaps( - image_size, crop_size, variance) - self.crop_image_from_mask = CropImageFromMask() - self.wrap = pr.WrapOutput( - ['cropped_image', 'score_maps', 'keypoints_vis21']) - - def call(self, inputs, use_palm_coordinates, crop_image): - image, segmentation_label, annotations = self.unwrap_inputs(inputs) - - image = self.preprocess_image(image) - segmentation_label = self.preprocess_segmentation_map( - segmentation_label) - keypoints3D, keypoints2D, camera_matrix = self.extract_annotations( - annotations) - keypoints2D, keypoints_visibility_mask = self.extract_2D_keypoints( - keypoints2D) - - if use_palm_coordinates: - keypoints2D = self.keypoints_to_palm(keypoints2D) - keypoints_visibility_mask = self.visibility_to_palm( - keypoints_visibility_mask) - - hand_side, keypoints3D, dominant_hand = self.extract_hand_side( - segmentation_label, keypoints3D) - - keypoints21 = self.extract_visibility_dominant_hand( - keypoints_visibility_mask, dominant_hand) - - scoremaps = self.create_scoremaps(keypoints2D, keypoints21) - - if crop_image: - image = self.crop_image_from_mask( - keypoints2D, keypoints21, image, camera_matrix) - - return self.wrap(image, scoremaps, keypoints21) - - -class ExtractHandPose(Processor): - def __init__(self, size, image_size, crop_size, variance): - super(ExtractHandPose, self).__init__() - self.unwrap_inputs = pr.UnpackDictionary( - ['image', 'segmentation_label', 'annotations']) - self.preprocess_image = pr.SequentialProcessor( - [pr.LoadImage(), - pr.ResizeImage((size, size))]) - - self.preprocess_segmentation_map = pr.SequentialProcessor( - [pr.LoadImage(), - pr.ResizeImage((size, size)), - ExtractHandmask()]) - - self.extract_annotations = pr.UnpackDictionary(['xyz', 'uv_vis', 'K']) - self.extract_2D_keypoints = Extract2DKeypoints() - self.keypoints_to_palm = KeypointstoPalmFrame() - self.visibility_to_palm = TransformVisibilityMask() - self.extract_hand_side = ExtractHandsideandKeypoints() - self.to_one_hot = ToOneHot(num_classes=2) - self.normaliza_keypoints = NormalizeKeypoints() - self.to_relative_frame = TransformtoRelativeFrame() - self.canonical_transformations = GetCanonicalTransformation() - self.flip_right_hand = FlipRightHandToLeftHand() - self.get_matrix_inverse = CalculatePseudoInverse() - - self.extract_hand_visibility = ExtractDominantHandVisibility() - self.extract_dominant_keypoints = ExtractDominantKeypoints2D() - - self.crop_image_from_mask = CropImageFromMask() - self.create_scoremaps = CreateScoremaps( - image_size=image_size, crop_size=crop_size, variance=variance) - - self.wrap = pr.WrapOutput( - ['score_maps', 'hand_side', 'keypoints3D', 'rotation_matrix']) - - def call(self, inputs, use_palm_coordinates, crop_image, - flip_right_hand=False): - image, segmentation_label, annotations = self.unwrap_inputs(inputs) - - image = self.preprocess_image(image) - segmentation_label = self.preprocess_segmentation_map( - segmentation_label) - keypoints3D, keypoints2D, camera_matrix = self.extract_annotations( - annotations) - keypoints2D, keypoints_visibility_mask = self.extract_2D_keypoints( - keypoints2D) - - if use_palm_coordinates: - keypoints2D = self.keypoints_to_palm(keypoints2D) - keypoints3D = self.keypoints_to_palm(keypoints3D) - keypoints_visibility_mask = self.visibility_to_palm( - keypoints_visibility_mask) - - hand_side, keypoints3D, dominant_hand = self.extract_hand_side( - segmentation_label, keypoints3D) - - hand_side_one_hot = self.to_one_hot(hand_side) - - keypoint_scale, keypoints3D = self.normaliza_keypoints(keypoints3D) - keypoints3D = self.to_relative_frame(keypoints3D) - keypoints3D, canonical_rotation_matrix = self.canonical_transformations( - keypoints3D) - - if flip_right_hand: - keypoints3D = self.flip_right_hand(keypoints3D) - - canonical_rotation_matrix = self.get_matrix_inverse( - canonical_rotation_matrix) - - visible_keypoints = self.extract_hand_visibility( - keypoints_visibility_mask, dominant_hand) - dominant_keypoints = self.extract_dominant_keypoints( - keypoints2D, dominant_hand) - - if crop_image: - scale, image, visible_keypoints, camera_matrix = \ - self.crop_image_from_mask( - visible_keypoints, dominant_keypoints, image, camera_matrix) - scoremaps = self.create_scoremaps( - canonical_rotation_matrix, visible_keypoints) - - return self.wrap(scoremaps, hand_side_one_hot, keypoints3D, - canonical_rotation_matrix) - - -class Process2DKeypoints(SequentialProcessor): - def __init__(self, PoseNet): - super(Process2DKeypoints, self).__init__() - self.add(pr.ExpandDims(0)) - self.add(pr.Predict(PoseNet)) - - -class PostProcessKeypoints(SequentialProcessor): - def __init__(self, number_of_keypoints=21): - super(PostProcessKeypoints, self).__init__() - self.add(pr.UnpackDictionary(['canonical_coordinates', - 'rotation_parameters', 'hand_side'])) - self.add(pr.ControlMap(RotationMatrixfromAxisAngles(), [1], [1])) - self.add(pr.ControlMap(CanonicaltoRelativeFrame(number_of_keypoints), - [0, 1, 2], [0])) - - -class PostProcessSegmentation(Processor): - def __init__(self, image_size=320, crop_shape=(256, 256)): - super(PostProcessSegmentation, self).__init__() - self.unpack_inputs = pr.UnpackDictionary(['image', - 'raw_segmentation_map']) - self.resize_segmentation_map = ResizeImageWithLinearInterpolation( - shape=(image_size, image_size)) - self.dilate_map = SegmentationDilation() - self.extract_box = ExtractBoundingbox() - self.adjust_crop_size = AdjustCropSize() - self.crop_image = CropImage(crop_shape[0]) - self.expand_dims = pr.ExpandDims(axis=0) - self.squeeze_input = pr.Squeeze(axis=0) - - def call(self, inputs): - image, raw_segmentation_map = self.unpack_inputs(inputs) - raw_segmentation_map = self.squeeze_input(raw_segmentation_map) - raw_segmentation_map = self.resize_segmentation_map( - raw_segmentation_map) - segmentation_map = self.dilate_map(raw_segmentation_map) - if not np.count_nonzero(segmentation_map): - return None - center, bounding_box, crop_size = self.extract_box(segmentation_map) - crop_size = self.adjust_crop_size(crop_size) - cropped_image = self.crop_image(image, center, crop_size) - return cropped_image, segmentation_map, center, bounding_box, crop_size - - -class ResizeScoreMaps(Processor): # Change to Sequential processor - def __init__(self, crop_shape=(256, 256)): - super(ResizeScoreMaps, self).__init__() - self.unpack_inputs = pr.UnpackDictionary(['score_maps']) - self.crop_shape = crop_shape - self.squeeze = pr.Squeeze(axis=0) - self.transpose = TransposeOfArray() - self.resize_scoremap = pr.ResizeImages(crop_shape) - self.list_to_array = ListToArray() - self.expand_dims = pr.ExpandDims(axis=0) - - def call(self, input): - scoremaps = self.unpack_inputs(input) - scoremaps = self.squeeze(scoremaps) - scoremaps_transposed = self.transpose(scoremaps) - scoremaps_resized = self.resize_scoremap(scoremaps_transposed) - scoremaps_resized = self.list_to_array(scoremaps_resized) - scoremaps_transposed = self.transpose(scoremaps_resized) - return scoremaps_transposed - - -class DetectHandKeypoints(Processor): - def __init__(self, handsegnet, posenet, posepriornet, viewpointnet, - image_size=320, crop_shape=(256, 256), num_keypoints=21): - super(DetectHandKeypoints, self).__init__() - - self.preprocess_image = SequentialProcessor( - [pr.NormalizeImage(), pr.ResizeImage((image_size, image_size)), - pr.ExpandDims(0)]) - postprocess_segmentation = PostProcessSegmentation(image_size, - crop_shape) - self.localize_hand = pr.Predict(handsegnet, - postprocess=postprocess_segmentation) - - self.resize_scoremaps = ResizeScoreMaps(crop_shape) - self.merge_dictionaries = MergeDictionaries() - self.wrap_input = WrapToDictionary(['hand_side']) - - self.predict_keypoints2D = pr.Predict(posenet) - self.predict_keypoints3D = pr.Predict(posepriornet) - self.predict_keypoints_angles = pr.Predict(viewpointnet) - self.postprocess_keypoints = PostProcessKeypoints() - self.resize = pr.ResizeImage(shape=crop_shape) - self.extract_2D_keypoints = ExtractKeypoints() - self.transform_keypoints = TransformKeypoints() - self.draw_keypoint = pr.DrawKeypoints2D(num_keypoints, normalized=True, - radius=4) - self.denormalize = pr.DenormalizeImage() - self.wrap = pr.WrapOutput(['image', 'keypoints2D', 'keypoints3D']) - self.expand_dims = pr.ExpandDims(axis=0) - self.draw_boxes = pr.DrawBoxes2D(['hand'], [[0, 1, 0]]) - - def call(self, input_image, hand_side=np.array([[1.0, 0.0]])): - image = self.preprocess_image(input_image) - hand_features = self.localize_hand(image) - if hand_features is None: - output = self.wrap(input_image.astype('uint8'), None, None) - return output - hand_crop, segmentation_map, center, box, crop_size_best = hand_features - box = Box2D(box, score=1.0, class_name='hand') - image = self.draw_boxes(np.squeeze(image), [box]) - hand_crop = self.expand_dims(hand_crop) - score_maps = self.predict_keypoints2D(hand_crop) - score_maps_resized = self.resize_scoremaps(score_maps) - hand_side = {'hand_side': hand_side} - score_maps = self.merge_dictionaries([score_maps, hand_side]) - keypoints_2D = self.extract_2D_keypoints(score_maps_resized) - rotation_parameters = self.predict_keypoints3D(score_maps) - viewpoints = self.predict_keypoints_angles(score_maps) - canonical_keypoints = self.merge_dictionaries([rotation_parameters, - viewpoints]) - keypoints3D = self.postprocess_keypoints(canonical_keypoints) - keypoints2D = self.transform_keypoints(keypoints_2D, center, - crop_size_best, 256) - image = self.draw_keypoint(np.squeeze(image), keypoints2D) - image = self.denormalize(image) - output = self.wrap(image.astype('uint8'), keypoints2D, keypoints3D) - return output diff --git a/examples/hand_pose_estimation/processors_SE3.py b/examples/hand_pose_estimation/processors_SE3.py deleted file mode 100644 index 937c58c60..000000000 --- a/examples/hand_pose_estimation/processors_SE3.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np - -from backend_SE3 import rotation_from_axis_angles -from backend_keypoints import canonical_to_relative_coordinates -from backend_keypoints import canonical_transformations_on_keypoints -from backend_keypoints import keypoint_to_root_frame -from backend_keypoints import keypoints_to_palm_coordinates -from backend_keypoints import transform_cropped_keypoints -from backend_keypoints import transform_visibility_mask -from paz.abstract import Processor - - -class TransformKeypoints(Processor): - """ Transform the keypoint from cropped image frame to original image - frame""" - - def __init__(self): - super(TransformKeypoints, self).__init__() - - def call(self, cropped_keypoints, centers, scale, crop_size): - keypoints_2D = transform_cropped_keypoints(cropped_keypoints, centers, - scale, crop_size) - return keypoints_2D - - -class KeypointstoPalmFrame(Processor): - """Translate to Wrist Coordinates. - """ - - def __init__(self): - super(KeypointstoPalmFrame, self).__init__() - - def call(self, keypoints): - return keypoints_to_palm_coordinates(keypoints=keypoints) - - -class TransformVisibilityMask(Processor): - """Tranform Visibility Mask to palm coordinates. - """ - - def __init__(self): - super(TransformVisibilityMask, self).__init__() - - def call(self, visibility_mask): - return transform_visibility_mask(visibility_mask) - - -class TransformtoRelativeFrame(Processor): - """Transform to Relative Frame.""" - - def __init__(self): - super(TransformtoRelativeFrame, self).__init__() - - def call(self, keypoints3D): - return keypoint_to_root_frame(keypoints3D) - - -class GetCanonicalTransformation(Processor): - """Extract Canonical Transformation matrix. To transform keypoints to - palm frame inorder to make them rotationally invariant - """ - - def __init__(self): - super(GetCanonicalTransformation, self).__init__() - - def call(self, keypoints3D): - return canonical_transformations_on_keypoints(keypoints3D) - - -class CalculatePseudoInverse(Processor): - """ Perform Pseudo Inverse of the matrix""" - - def __init__(self): - super(CalculatePseudoInverse, self).__init__() - - def call(self, matrix): - return np.linalg.pinv(matrix) - - -class RotationMatrixfromAxisAngles(Processor): - """ Get Rotation matrix from the axis angles""" - - def __init__(self): - super(RotationMatrixfromAxisAngles, self).__init__() - - def call(self, rotation_angles): - return rotation_from_axis_angles(rotation_angles) - - -class CanonicaltoRelativeFrame(Processor): - """ Transform the keypoints from Canonical coordinates to chosen relative ( - wrist or palm) coordinates. To make keypoints rotationally invariant """ - - def __init__(self, num_keypoints=21): - super(CanonicaltoRelativeFrame, self).__init__() - self.num_keypoints = num_keypoints - - def call(self, canonical_coordinates, rotation_matrix, hand_side): - canonical_coordinates = canonical_coordinates.reshape((21, 3)) - keypoints = canonical_to_relative_coordinates( - self.num_keypoints, canonical_coordinates, rotation_matrix, - hand_side) - return keypoints diff --git a/examples/hand_pose_estimation/processors_keypoints.py b/examples/hand_pose_estimation/processors_keypoints.py deleted file mode 100644 index adb376c47..000000000 --- a/examples/hand_pose_estimation/processors_keypoints.py +++ /dev/null @@ -1,189 +0,0 @@ -import numpy as np - -from backend_keypoints import create_score_maps, extract_2D_keypoints -from backend_keypoints import crop_image_from_coordinates, extract_keypoints -from backend_keypoints import crop_image_from_mask, extract_hand_segment -from backend_keypoints import extract_bounding_box, find_max_location -from backend_keypoints import extract_dominant_hand_visibility -from backend_keypoints import extract_dominant_keypoints2D -from backend_keypoints import flip_right_to_left_hand -from backend_keypoints import get_hand_side_and_keypooints -from backend_keypoints import normalize_keypoints - -from paz.abstract import Processor - - -class ExtractHandmask(Processor): - """Extract Hand mask from the segmentation label provided. The pixels - with value greater than 1 belongs to hands - """ - - def __init__(self): - super(ExtractHandmask, self).__init__() - - def call(self, segmentation_label): - return extract_hand_segment(segmentation_label=segmentation_label) - - -class ExtractHandsideandKeypoints(Processor): - """Extract Hand Side by counting the number of pixels belonging to each - hand. - """ - - def __init__(self): - super(ExtractHandsideandKeypoints, self).__init__() - - def call(self, hand_parts_mask, keypoints3D): - return get_hand_side_and_keypooints(hand_parts_mask, keypoints3D) - - -class NormalizeKeypoints(Processor): - """Normalize KeyPoints. - """ - - def __init__(self): - super(NormalizeKeypoints, self).__init__() - - def call(self, keypoints3D): - return normalize_keypoints(keypoints3D) - - -class FlipRightHandToLeftHand(Processor): - """Flip Right hand keypoints to Left hand keypoints. - """ - - def __init__(self, flip_to_left=True): - super(FlipRightHandToLeftHand, self).__init__() - self.flip_to_left = flip_to_left - - def call(self, keypoints3D): - return flip_right_to_left_hand(keypoints3D, self.flip_to_left) - - -class ExtractDominantHandVisibility(Processor): - """Extract hand Visibility of Left or Right hand based on the - dominant_hand flag. - """ - - def __init__(self): - super(ExtractDominantHandVisibility, self).__init__() - - def call(self, keypoint_visibility, dominant_hand): - return extract_dominant_hand_visibility(keypoint_visibility, - dominant_hand) - - -class ExtractDominantKeypoints2D(Processor): - """Extract hand keypoints of Left or Right hand based on the - dominant_hand flag. - """ - - def __init__(self): - super(ExtractDominantKeypoints2D, self).__init__() - - def call(self, keypoint_visibility, dominant_hand): - return extract_dominant_keypoints2D(keypoint_visibility, - dominant_hand) - - -class CropImageFromMask(Processor): - """Crop Image from Mask. - """ - - def __init__(self, image_size=(320, 320, 3), crop_size=256): - super(CropImageFromMask, self).__init__() - self.image_size = image_size - self.crop_size = crop_size - - def call(self, keypoints, keypoint_visibility, image, camera_matrix): - return crop_image_from_mask(keypoints, keypoint_visibility, image, - self.image_size, self.crop_size, - camera_matrix) - - -class CreateScoremaps(Processor): - """Create Gaussian Score maps representing 2D Keypoints. - image_size: Size of the input image - crop_size: Cropped Image size - variance: variance of the gaussian scoremap to be generated - """ - - def __init__(self, image_size, crop_size, variance): - super(CreateScoremaps, self).__init__() - self.image_size = image_size - self.crop_size = crop_size - self.variance = variance - - def call(self, keypoints2D, keypoints_visibility): - return create_score_maps(keypoints2D, keypoints_visibility, - self.image_size, self.crop_size, self.variance) - - -class Extract2DKeypoints(Processor): - """ Extract the keyppoints based on the visibility of the hand""" - - def __init__(self): - super(Extract2DKeypoints, self).__init__() - - def call(self, keypoint_visibility): - return extract_2D_keypoints(keypoint_visibility) - - -class ExtractBoundingbox(Processor): - """ Extract bounding box from a binary mask""" - - def __init__(self): - super(ExtractBoundingbox, self).__init__() - - def call(self, binary_hand_mask): - return extract_bounding_box(binary_hand_mask) - - -class AdjustCropSize(Processor): - """ Adjust the crop size with a buffer of scale 0.25 added""" - - def __init__(self, crop_size=256): - super(AdjustCropSize, self).__init__() - self.crop_size = crop_size - - def call(self, crop_size_best): - crop_size_best = crop_size_best.astype(dtype=np.float64) - crop_size_best = crop_size_best * 1.25 - scaled_crop = np.maximum(self.crop_size / crop_size_best, 0.25) - scaled_crop = np.minimum(scaled_crop, 5.0) - return scaled_crop - - -class CropImage(Processor): - """ Crop the input image provided the location, output image size and the - scaling of the output image""" - - def __init__(self, crop_size=256): - super(CropImage, self).__init__() - self.crop_size = crop_size - - def call(self, image, crop_location, scale): - return crop_image_from_coordinates(image, crop_location, self.crop_size, - scale) - - -class ExtractKeypoints(Processor): - """ Extract keypoints when provided with a predicted scoremap""" - - def __init__(self): - super(ExtractKeypoints, self).__init__() - - def call(self, keypoint_scoremaps): - return extract_keypoints(keypoint_scoremaps) - - -class FindMaxLocation(Processor): - """ Find the brightest point in the score map, which is represented as a - keypoint""" - - def __init__(self): - super(FindMaxLocation, self).__init__() - - def call(self, scoremaps): - keypoints_2D = find_max_location(scoremaps) - return keypoints_2D diff --git a/examples/hand_pose_estimation/processors_standard.py b/examples/hand_pose_estimation/processors_standard.py deleted file mode 100644 index 9061217ce..000000000 --- a/examples/hand_pose_estimation/processors_standard.py +++ /dev/null @@ -1,67 +0,0 @@ -import numpy as np -from paz.abstract import Processor -from backend_standard import wrap_as_dictionary, merge_dictionaries -from backend_standard import resize_image_with_linear_interpolation -from paz.backend.boxes import to_one_hot - - -class WrapToDictionary(Processor): - """ Wrap the input values to a dictionary with already provided key - values """ - - def __init__(self, keys): - super(WrapToDictionary, self).__init__() - if not isinstance(keys, list): - keys = list(keys) - self.keys = keys - - def call(self, values): - if not isinstance(values, list): - values = list(values) - return wrap_as_dictionary(self.keys, values) - - -class MergeDictionaries(Processor): - """ Merge two dictionaries into one""" - - def __init__(self): - super(MergeDictionaries, self).__init__() - - def call(self, dicts): - return merge_dictionaries(dicts) - - -class ToOneHot(Processor): - """Extract Hand mask.""" - - def __init__(self, num_classes=2): - super(ToOneHot, self).__init__() - self.num_classes = num_classes - - def call(self, class_indices): - return to_one_hot(class_indices, self.num_classes) - - -class ResizeImageWithLinearInterpolation(Processor): - def __init__(self, shape): - self.shape = shape - super(ResizeImageWithLinearInterpolation, self).__init__() - - def call(self, image): - return resize_image_with_linear_interpolation(image, self.shape) - - -class TransposeOfArray(Processor): - def __init__(self): - super(TransposeOfArray, self).__init__() - - def call(self, array): - return array.T - - -class ListToArray(Processor): - def __init__(self): - super(ListToArray, self).__init__() - - def call(self, input): - return np.array(input) \ No newline at end of file diff --git a/examples/hand_pose_estimation/test_data_loaders.py b/examples/hand_pose_estimation/test_data_loaders.py deleted file mode 100644 index a82d6c7e6..000000000 --- a/examples/hand_pose_estimation/test_data_loaders.py +++ /dev/null @@ -1,24 +0,0 @@ -from data_loaders import HandPoseLoader -from backend import to_homogeneous_coordinates, normalize_keypoints - -data_loader = HandPoseLoader( - '/home/dfki.uni-bremen.de/jbandlamudi/DFKI_Work/RHD_published_v2/') - - -def test_image_loading(image_path): - image = data_loader.load_images(image_path) - assert image.shape == data_loader.image_size - - -def test_segmentation_map_loading(segmentation_path): - segmentation_mask = data_loader.load_images(segmentation_path) - assert segmentation_mask.shape == data_loader.image_size - - -def test_conversion_to_homogeneous_coordinates(vector): - homogeneous_vector = to_homogeneous_coordinates(vector) - assert len(homogeneous_vector) == 4 - - -def test_keypoint_normalization(keypoints): - keypoint_scale, norm_keypoints = normalize_keypoints(keypoints) diff --git a/examples/hand_pose_estimation/test_pipeline.py b/examples/hand_pose_estimation/test_pipeline.py deleted file mode 100644 index 097139f7d..000000000 --- a/examples/hand_pose_estimation/test_pipeline.py +++ /dev/null @@ -1,19 +0,0 @@ -from HandPoseEstimation import HandSegmentationNet, PosePriorNet, PoseNet -from HandPoseEstimation import ViewPointNet -from paz.backend.image.opencv_image import load_image, show_image, write_image -from pipelines import DetectHandKeypoints - -use_pretrained = True -HandSegNet = HandSegmentationNet() -HandPoseNet = PoseNet() -HandPosePriorNet = PosePriorNet() -HandViewPointNet = ViewPointNet() - -pipeline = DetectHandKeypoints(HandSegNet, HandPoseNet, HandPosePriorNet, - HandViewPointNet) - -image = load_image('./sample.jpg') -detection = pipeline(image) - -show_image(detection['image'].astype('uint8')) -write_image('./detection.jpg', detection['image'].astype('uint8')) diff --git a/examples/hand_pose_estimation/train_handsegnet.py b/examples/hand_pose_estimation/train_handsegnet.py deleted file mode 100644 index 14cb84bf5..000000000 --- a/examples/hand_pose_estimation/train_handsegnet.py +++ /dev/null @@ -1,122 +0,0 @@ -import os -import json -import argparse -from datetime import datetime - -import tensorflow as tf - -gpus = tf.config.experimental.list_physical_devices('GPU') -tf.config.experimental.set_memory_growth(gpus[0], True) - -from tensorflow.keras.optimizers import Adam -from tensorflow.keras.callbacks import CSVLogger, EarlyStopping -from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau -from tensorflow.keras.losses import CategoricalCrossentropy - -from paz.abstract import ProcessingSequence -from pipelines import AugmentHandSegmentation -from HandPoseEstimation import Hand_Segmentation_Net -from hand_keypoints_loader import RenderedHandLoader -from utils import load_pretrained_weights - -description = 'Training script for semantic segmentation' -parser = argparse.ArgumentParser(description=description) -parser.add_argument('--dataset_path', type=str, help='Path to dataset') -parser.add_argument('-p', '--save_path', default='experiments', type=str, - help='Path for saving evaluations') -parser.add_argument('-d', '--dataset', default='RHD', type=str, - choices=['RHD']) -parser.add_argument('-b', '--batch_size', default=5, type=int, - help='Batch size used during optimization') -parser.add_argument('-e', '--epochs', default=100, type=int, - help='Number of epochs before finishing') -parser.add_argument('-o', '--stop_patience', default=5, type=int, - help='Early stop patience') -parser.add_argument('-u', '--reduce_patience', default=2, type=int, - help='Reduce learning rate patience') -parser.add_argument('-l', '--run_label', default='RUN_00', type=str, - help='Label used to distinguish between different runs') -parser.add_argument('-s', '--evaluation_splits', nargs='+', type=str, - default=['test'], help='Splits used for evaluation') -parser.add_argument('-v', '--validation_split', default='val', type=str, - help='Split used for validation') -parser.add_argument('-t', '--time', type=str, - default=datetime.now().strftime("%d/%m/%Y %H:%M:%S")) -parser.add_argument('-a', '--activation', type=str, default='softmax', - help='Final activation function') -parser.add_argument('-z', '--image_size', default=320, type=int, - help='Image size. Value is applied to height and width') -parser.add_argument('-w', '--load_pretrained_weights', default=True, type=bool, - help='If True, load pre-trained weights') -parser.add_argument('-wp', '--pretrained_weights_path', - default='./person_net.ckpt.meta', type=str, - help='Path to pre-trained weights') - -args = parser.parse_args() - -model = Hand_Segmentation_Net() -loss = CategoricalCrossentropy(from_logits=True) - -splits = ['train'] + args.validation_split - -name_to_manager = {'RHD': RenderedHandLoader} - -data_managers, datasets = {}, {} -for split in splits: - args_manager = [args.dataset_path] - data_manager = name_to_manager[args.dataset](*args_manager) - data_managers[split] = data_manager - datasets[split] = data_manager.load_data() - -# instantiating sequencers -sequencers = {} -for split in splits: - data_manager = data_managers[split] - image_shape = (args.image_size, args.image_size) - processor = AugmentHandSegmentation(image_shape) - sequencers[split] = ProcessingSequence( - processor, args.batch_size, datasets[split]) - -model = Hand_Segmentation_Net() -loss = CategoricalCrossentropy(from_logits=True) - -model.compile(loss=loss, optimizer=Adam(), metrics=['mean_squared_error']) - -if args.load_pretrained_weights: - model = load_pretrained_weights(args.pretrained_weights_path, model=model, - num_layers=16) - -# creating directory for experiment -callbacks = [] -experiment_label = '_'.join([args.dataset, model.name, args.run_label]) -experiment_path = os.path.join(args.save_path, experiment_label) -if not os.path.exists(experiment_path): - os.makedirs(experiment_path) - -# setting additional callbacks -log = CSVLogger(os.path.join(experiment_path, 'optimization.log')) -stop = EarlyStopping(patience=args.stop_patience) -plateau = ReduceLROnPlateau(patience=args.reduce_patience) -save_filename = os.path.join(experiment_path, 'model.hdf5') -save = ModelCheckpoint(save_filename, save_best_only=True) -callbacks.extend([log, stop, save, plateau]) - -# saving hyper-parameters and model summary -with open(os.path.join(experiment_path, 'hyperparameters.json'), 'w') as filer: - json.dump(args.__dict__, filer, indent=4) -with open(os.path.join(experiment_path, 'model_summary.txt'), 'w') as filer: - model.summary(print_fn=lambda x: filer.write(x + '\n')) - -# starting optimization -model.fit( - sequencers['train'], - epochs=args.epochs, - validation_data=sequencers[args.validation_split], - callbacks=callbacks, - verbose=1, - workers=1, - use_multiprocessing=False) - -# saving using model tf -save_filename = os.path.join(experiment_path, 'model.tf') -model.save_weights(save_filename, save_format='tf') diff --git a/examples/hand_pose_estimation/unit_tests.py b/examples/hand_pose_estimation/unit_tests.py deleted file mode 100644 index 578db26b4..000000000 --- a/examples/hand_pose_estimation/unit_tests.py +++ /dev/null @@ -1,277 +0,0 @@ -from backend_SE3 import build_rotation_matrix_x, build_rotation_matrix_y -from backend_SE3 import build_rotation_matrix_z, build_affine_matrix -from backend_SE3 import rotation_from_axis_angles -from backend_SE3 import to_homogeneous_coordinates, build_translation_matrix_SE3 - -from backend_keypoints import canonical_transformations_on_keypoints -from backend_keypoints import get_hand_side_and_keypooints -from backend_keypoints import keypoints_to_palm_coordinates -from backend_keypoints import normalize_keypoints, extract_hand_side_keypoints -from RHDv2 import LEFT_WRIST -from RHDv2 import RIGHT_WRIST -from hand_keypoints_loader import RenderedHandLoader -from paz.backend.boxes import to_one_hot -from processors_standard import TransposeOfArray, ListToArray - -import paz.processors as pr -from paz.processors import SequentialProcessor - -data_loader = RenderedHandLoader( - '/media/jarvis/CommonFiles/5th_Semester/DFKI_Work/RHD_published_v2/') - -from HandPoseEstimation import HandSegmentationNet, PosePriorNet, PoseNet -from HandPoseEstimation import ViewPointNet -import numpy as np -from pipelines import PostProcessSegmentation, \ - Process2DKeypoints -from paz.backend.image.opencv_image import load_image -from backend_keypoints import create_multiple_gaussian_map -from processors_keypoints import ExtractKeypoints - -np.random.seed(0) - -use_pretrained = True -HandSegNet = HandSegmentationNet() -HandPoseNet = PoseNet() -HandPosePriorNet = PosePriorNet() -HandViewPointNet = ViewPointNet() - - -def test_keypoints_to_palm_coordinates(): - keypoints = np.arange(0, 123).reshape((41, 3)) - keypoint_palm = keypoints_to_palm_coordinates(keypoints) - assert keypoint_palm[LEFT_WRIST, :].all() == np.array([ - [18., 19., 20.]]).all() - assert keypoint_palm[RIGHT_WRIST, :].all() == np.array([ - [81., 82., 83.]]).all() - - -def test_one_hot_encode(): - one_hot_vector = to_one_hot([1], 2) - assert type(one_hot_vector).__module__ == np.__name__ - assert one_hot_vector.all() == np.array([0, 1]).all() - assert to_one_hot([0], 2).all() == np.array([1, 0]).all() - - -def test_normalize_keypoints(): - test_array = np.array([[0., 0., 0.], [1., 1., 1.], [1., 1., 1.], - [2., 2., 2.], [2., 2., 2.], [3., 3., 3.], - [3., 3., 3.], [4., 4., 4.], [5., 5., 5.], - [5., 5., 5.], [6., 6., 6.], [6., 6., 6.], - [7., 7., 7.], [8., 8., 8.], [8., 8., 8.], - [9., 9., 9.], [9., 9., 9.], [10., 10., 10.], - [10., 10., 10.], [11., 11., 11.], [12., 12., 12.]]) - keypoints3D = np.random.rand(21, 3) - keypoint_scale, keypoint_normalized = normalize_keypoints(keypoints3D) - assert round(keypoint_scale, 2) == 0.68 - assert keypoints3D.shape == keypoint_normalized.shape - assert keypoint_normalized.round().all() == test_array.all() - - -def test_extracting_handside(): - keypoints3D = np.random.rand(42, 3) - left_keypoints = extract_hand_side_keypoints(keypoints3D, 0) - right_keypoints = extract_hand_side_keypoints(keypoints3D, 1) - assert left_keypoints.shape == (21, 3) - assert right_keypoints.shape == (21, 3) - - -def test_to_homogeneous(): - vector_shape = (1, 3) - keypoint = np.zeros(vector_shape) - homogeneous_keypoint = to_homogeneous_coordinates(keypoint) - assert homogeneous_keypoint[-1] == 1 - assert homogeneous_keypoint.shape == (vector_shape[1] + 1,) - - -def test_to_translation_1D(): - translation_matrix = build_translation_matrix_SE3([1]) - - assert translation_matrix.shape == (1, 4, 4) - assert translation_matrix[-1].all() == np.array([0, 0, 0, 1]).all() - - -def test_to_translation_3D(): - translation_matrix = build_translation_matrix_SE3([1, 2, 3]) - - assert translation_matrix[:, :, -1].all() == np.array([[1, 2, 3, 1]]).all() - assert translation_matrix.shape == (1, 4, 4) - assert translation_matrix[-1].all() == np.array([0, 0, 0, 1]).all() - - -def test_to_affine_matrix(): - matrix = np.arange(0, 9).reshape((3, 3)) - affine_matrix = build_affine_matrix(matrix) - - assert matrix.shape == (3, 3) - assert affine_matrix.shape == (4, 4) - - -def test_rotation_matrix_x(): - rotation_matrix_test = np.array([[1.0000000, 0.0000000, 0.0000000], - [0.0000000, 0.8668, 0.5], - [0.0000000, -0.5, 0.8668]]) - rotation_matrix = build_rotation_matrix_x(np.deg2rad(30)) - assert rotation_matrix.shape == rotation_matrix_test.shape - assert np.round(np.linalg.det(rotation_matrix)) == 1.0 - assert np.round(np.linalg.inv(rotation_matrix)).all() == \ - np.round(np.transpose(rotation_matrix)).all() - assert rotation_matrix_test.round().all() == \ - rotation_matrix.round().all() - - -def test_rotation_matrix_y(): - rotation_matrix_test = np.array([[0.8660254, 0.0000000, 0.5000000], - [0.0000000, 1.0000000, 0.0000000], - [-0.5000000, 0.0000000, 0.8660254]]) - rotation_matrix = build_rotation_matrix_y(np.deg2rad(30)) - assert rotation_matrix.shape == rotation_matrix_test.shape - assert np.round(np.linalg.det(rotation_matrix)) == 1.0 - assert np.round(np.linalg.inv(rotation_matrix)).all() == \ - np.round(np.transpose(rotation_matrix)).all() - assert rotation_matrix_test.round().all() == \ - rotation_matrix.round().all() - - -def test_rotation_matrix_z(): - rotation_matrix_test = np.array([[0.8660254, -0.5000000, 0.0000000], - [0.5000000, 0.8660254, 0.0000000], - [0.0000000, 0.0000000, 1.0000000]]) - rotation_matrix = build_rotation_matrix_z(np.deg2rad(30)) - assert rotation_matrix.shape == rotation_matrix_test.shape - assert np.round(np.linalg.det(rotation_matrix)) == 1.0 - assert np.round(np.linalg.inv(rotation_matrix)).all() == \ - np.round(np.transpose(rotation_matrix)).all() - assert rotation_matrix_test.round().all() == \ - rotation_matrix.round().all() - - -def test_rotation_matrix_axis_angles(): - rotation_matrix_test = np.array([[0.739, -0.406, 0.536], - [0.536, 0.837, -0.1], - [-0.4, 0.36, 0.837]]) - rotation_matrix = rotation_from_axis_angles(np.deg2rad([15, 30, 30])) - print(rotation_matrix) - assert rotation_matrix.shape == rotation_matrix_test.shape - assert np.round(np.linalg.det(rotation_matrix)) == 1.0 - assert np.round(np.linalg.inv(rotation_matrix)).all() == \ - np.round(np.transpose(rotation_matrix)).all() - assert rotation_matrix_test.round().all() == \ - rotation_matrix.round().all() - - -def test_get_affine_matrix(): - rotation_matrix = build_rotation_matrix_x(np.deg2rad(30)) - affine_rotation_matrix = build_affine_matrix(rotation_matrix) - assert affine_rotation_matrix.shape == (4, 4) - assert affine_rotation_matrix[-1].all() == np.array([0, 0, 0, 1]).all() - - -def test_hand_side_extraction(segmentation_path, label_path): - segmentation_mask = data_loader.load_images(segmentation_path) - annotations_all = data_loader._load_annotation(label_path) - keypoints3D = data_loader.process_keypoints_3D(annotations_all[11]['xyz']) - hand_side, hand_side_keypoints, dominant_hand_keypoints = \ - get_hand_side_and_keypooints(segmentation_mask, keypoints3D) - - assert type(hand_side).__module__ == np.__name__ - assert hand_side == np.array([0]) - assert hand_side_keypoints.shape == (21, 3) - assert dominant_hand_keypoints.shape == (21, 3) - - -def test_canonical_transformations(label_path): - annotations_all = data_loader._load_annotation(label_path) - keypoints3D = data_loader.process_keypoints_3D(annotations_all[11]['xyz']) - transformed_keypoints, rotation_matrix = canonical_transformations_on_keypoints( - keypoints3D.T) - - assert transformed_keypoints.shape == (42, 3) - assert rotation_matrix.shape == (3, 3) - - -def test_preprocess_image(): - preprocess_pipeline = SequentialProcessor( - [pr.NormalizeImage(), pr.ResizeImage((320, 320)), pr.ExpandDims(0)]) - image = load_image('./sample.jpg') - processed_image = preprocess_pipeline(image) - - assert len(processed_image.shape) == 4 - assert processed_image.shape == (1, 320, 320, 3) - - -def test_image_cropping(): - handsegnet = HandSegmentationNet() - preprocess_image = SequentialProcessor( - [pr.NormalizeImage(), pr.ResizeImage((320, 320)), - pr.ExpandDims(0)]) - - postprocess_segmentation = PostProcessSegmentation( - 320, 320) - - localize_hand = pr.Predict(handsegnet, preprocess_image, - postprocess_segmentation) - image = load_image('./sample.jpg') - hand_crop, segmentation_map, center, boxes, crop_sizes = localize_hand( - image) - box = boxes[0] - xmin, ymin, xmax, ymax = box - crop_size = crop_sizes[0] - - assert len(hand_crop.shape) == 4 - assert hand_crop.shape == (1, 256, 256, 3) - assert len(segmentation_map.shape) == 4 - assert segmentation_map.shape == (1, 320, 320, 1) - assert center == [[191.5, 194.5]] - assert len(box) == 4 - assert box == [114, 153, 269, 236] - assert xmax > xmin and ymin > ymax - assert round(crop_size[0], 2) == 1.32 - - -def test_segmentation_postprocess(): - preprocess_pipeline = SequentialProcessor( - [pr.NormalizeImage(), pr.ResizeImage((320, 320)), pr.ExpandDims(0)]) - image = load_image('./sample.jpg') - processed_image = preprocess_pipeline(image) - - localization_pipeline = PostProcessSegmentation(HandSegNet) - localization_output = localization_pipeline(processed_image) - - assert len(localization_output) == 5 - assert localization_output[0].shape == (1, 256, 256, 3) - assert localization_output[1].shape == (1, 320, 320, 1) - assert localization_output[2].shape == (1, 2) - assert localization_output[3].shape == (1, 2, 2) - assert localization_output[4].shape == (1, 1) - - -def test_keypoints2D_process(): - preprocess_pipeline = SequentialProcessor( - [pr.NormalizeImage(), pr.ResizeImage((320, 320)), pr.ExpandDims(0)]) - image = load_image('./sample.jpg') - processed_image = preprocess_pipeline(image) - - localization_pipeline = PostProcessSegmentation(HandSegNet) - localization_output = localization_pipeline(processed_image) - - keypoints_pipeline = Process2DKeypoints(HandPoseNet) - score_maps_dict = keypoints_pipeline(np.squeeze(localization_output[0], - axis=0)) - score_maps = score_maps_dict['score_maps'] - - assert score_maps.shape == (1, 32, 32, 21) - assert len(score_maps) == 1 - - -def test_extract_keypoints2D(): - uv_coordinates = np.array([[0, 0], [1, 1]]) - uv_coordinates = np.expand_dims(uv_coordinates, axis=0) - - gaussian_maps = create_multiple_gaussian_map(uv_coordinates, (256, 256), - sigma=0.1, validity_mask=None) - gaussian_maps = np.expand_dims(gaussian_maps, axis=0) - keypoints_extraction_pipeline = ExtractKeypoints() - keypoints2D = keypoints_extraction_pipeline(gaussian_maps) - - assert keypoints2D[0] == [0, 0] diff --git a/examples/hand_pose_estimation/utils.py b/examples/hand_pose_estimation/utils.py deleted file mode 100644 index b63b843b8..000000000 --- a/examples/hand_pose_estimation/utils.py +++ /dev/null @@ -1,93 +0,0 @@ -import tensorflow as tf -import numpy as np -import matplotlib.pyplot as plt -import cv2 -from PIL import Image -from skimage import transform - - -def load_pretrained_weights(weights_path, model, num_layers): - with tf.compat.v1.Session() as sess: - - # import graph - saver = tf.compat.v1.train.import_meta_graph(weights_path) - sess.run(tf.compat.v1.global_variables_initializer()) - # load weights for graph - saver.restore(sess, weights_path[:-5]) - - # get all global variables (including model variables) - global_variables = tf.compat.v1.global_variables() - - # get their name and value and put them into dictionary - sess.as_default() - - model_variables = {} - for variable in global_variables: - try: - model_variables[variable.name] = variable.eval() - except: - print("For var={}, an exception occurred".format(variable.name)) - - layer_count = 1 # skip Input layer - for key_count, weights in enumerate(model_variables.items()): - if layer_count > num_layers: - break - - while not model.layers[layer_count].trainable_weights: - layer_count = layer_count + 1 - - if key_count % 2 == 0: - kernel = weights[1] - print(kernel.shape) - else: - bias = weights[1] - print(bias.shape) - model.layers[layer_count].set_weights([kernel, bias]) - layer_count = layer_count + 1 - - return model - - -def visualize_heatmaps(heatmaps): - """Visualize all 21 heatmaps in a 7x3 grid""" - - fig, axes = plt.subplots(7, 3, figsize=(16, 16)) - print(heatmaps.shape) - # heatmaps = np.expand_dims(heatmaps, axis=0) - - for i in range(heatmaps.shape[3]): - img_row = int(i / 3) - img_col = i % 3 - - heatmap = heatmaps[:, :, :, i] - - heatmap = (heatmap - tf.reduce_min(heatmap)) / ( - tf.reduce_max(heatmap) - tf.reduce_min(heatmap)) - - axes[img_row, img_col].imshow(np.squeeze(heatmap), cmap='jet') - plt.show() - - -def show_mask(image, name='image', wait=True): - """Shows RGB image in an external window. - - # Arguments - image: Numpy array - name: String indicating the window name. - wait: Boolean. If ''True'' window stays open until user presses a key. - If ''False'' windows closes immediately. - """ - if image.dtype != np.uint8: - raise ValueError('``image`` must be of type ``uint8``') - cv2.imshow(name, image) - if wait: - while True: - if cv2.waitKey(0) & 0xFF == ord('q'): - break - cv2.destroyAllWindows() - - -def load(filename): - np_image = Image.open(filename) - np_image = np.array(np_image).astype('float32')/255 - return np_image diff --git a/examples/minimal_hand/README.md b/examples/minimal_hand/README.md deleted file mode 100644 index da911e790..000000000 --- a/examples/minimal_hand/README.md +++ /dev/null @@ -1,21 +0,0 @@ -### This example detects hand pose from an image. - -To test the live hand pose detection from camera, run: -```py -python demo.py -``` - -To test the hand pose detection on image, run: -```py -python demo_image.py -``` - -To test the live hand closure status with pose detection from camera, run: -```py -python is_open_demo.py -``` - -To test the live hand pose detection from camera and visualize keypoints in 3D, run(This module has an extra dependency of matplotlib): -```py -python demo3D.py -``` \ No newline at end of file diff --git a/examples/minimal_hand/is_open_demo.py b/examples/minimal_hand/is_open_demo.py deleted file mode 100644 index c7cfe16a5..000000000 --- a/examples/minimal_hand/is_open_demo.py +++ /dev/null @@ -1,24 +0,0 @@ -import argparse -from paz.applications import MinimalHandPoseEstimation -from paz.backend.camera import VideoPlayer -from paz.backend.camera import Camera -from paz.abstract import SequentialProcessor -from paz import processors as pr - -parser = argparse.ArgumentParser(description='Minimal hand keypoint detection') -parser.add_argument('-c', '--camera_id', type=int, default=0, - help='Camera device ID') -args = parser.parse_args() - - -pipeline = SequentialProcessor() -pipeline.add(MinimalHandPoseEstimation(right_hand=False)) -pipeline.add(pr.UnpackDictionary(['image', 'relative_angles'])) -pipeline.add(pr.ControlMap(pr.IsHandOpen(), [1], [1])) -pipeline.add(pr.ControlMap(pr.BooleanToTextMessage('OPEN', 'CLOSE'), [1], [1])) -pipeline.add(pr.ControlMap(pr.DrawText(), [0, 1], [1])) -pipeline.add(pr.WrapOutput(['image', 'status'])) - -camera = Camera(args.camera_id) -player = VideoPlayer((640, 480), pipeline, camera) -player.run() diff --git a/paz/__init__.py b/paz/__init__.py index 1c98a23a8..850505a32 100644 --- a/paz/__init__.py +++ b/paz/__init__.py @@ -1 +1 @@ -__version__ = '0.1.9' +__version__ = '0.1.10' diff --git a/paz/applications.py b/paz/applications.py index 5e75dbce2..0128a8f21 100644 --- a/paz/applications.py +++ b/paz/applications.py @@ -17,3 +17,5 @@ from .pipelines import DetNetHandKeypoints from .pipelines import MinimalHandPoseEstimation from .pipelines import DetectMinimalHand +from .pipelines import ClassifyHandClosure +from .pipelines import SSD512MinimalHandPose diff --git a/paz/backend/boxes.py b/paz/backend/boxes.py index 5401eb663..93ba29068 100644 --- a/paz/backend/boxes.py +++ b/paz/backend/boxes.py @@ -38,8 +38,8 @@ def to_corner_form(boxes): def encode(matched, priors, variances=[0.1, 0.1, 0.2, 0.2]): - """Encode the variances from the priorbox layers into the ground truth boxes - we have matched (based on jaccard overlap) with the prior boxes. + """Encode the variances from the priorbox layers into the ground truth + boxes we have matched (based on jaccard overlap) with the prior boxes. # Arguments matched: Numpy array of shape `(num_priors, 4)` with boxes in @@ -367,7 +367,8 @@ def to_one_hot(class_indices, num_classes): def make_box_square(box): - """Makes box coordinates square with sides equal to the longest original side. + """Makes box coordinates square with sides equal to the longest + original side. # Arguments box: Numpy array with shape `(4)` with point corner coordinates. @@ -442,7 +443,7 @@ def clip(coordinates, image_shape): def denormalize_box(box, image_shape): - """Scales corner box coordinates from normalized values to image dimensions. + """Scales corner box coordinates from normalized values to image dimensions # Arguments box: Numpy array containing corner box coordinates. diff --git a/paz/backend/camera.py b/paz/backend/camera.py index 37433730e..6dd94bdfa 100644 --- a/paz/backend/camera.py +++ b/paz/backend/camera.py @@ -131,6 +131,16 @@ def intrinsics_from_HFOV(self, HFOV=70, image_shape=None): [0, 0, 1.0]]) self.intrinsics = intrinsics + def take_photo(self): + """Starts camera, reads buffer and returns an image. + """ + self.start() + image = self.read() + # all pipelines start with RGB + image = convert_color_space(image, BGR2RGB) + self.stop() + return image + class VideoPlayer(object): """Performs visualization inferences in a real-time video. @@ -231,7 +241,7 @@ def record_from_file(self, video_file_path, name='video.avi', if (video.isOpened() is False): print("Error opening video file") - while(video.isOpened()): + while video.isOpened(): is_frame_received, frame = video.read() if not is_frame_received: print("Frame not received. Exiting ...") diff --git a/paz/backend/standard.py b/paz/backend/standard.py index 83561ca2b..a32c28c92 100644 --- a/paz/backend/standard.py +++ b/paz/backend/standard.py @@ -1,4 +1,5 @@ import numpy as np +import tensorflow as tf def append_values(dictionary, lists, keys): @@ -249,3 +250,24 @@ def max_pooling_2d(image, pool_size=3, strides=1, padding='same'): for x in range(0, W - pool_size + 1, strides): max_image[y][x] = np.max(image[y:y + pool_size, x:x + pool_size]) return max_image + + +def predict(x, model, preprocess=None, postprocess=None): + """Preprocess, predict and postprocess input. + # Arguments + x: Input to model + model: Callable i.e. Keras model. + preprocess: Callable, used for preprocessing input x. + postprocess: Callable, used for postprocessing output of model. + + # Note + If model outputs a tf.Tensor is converted automatically to numpy array. + """ + if preprocess is not None: + x = preprocess(x) + y = model(x) + if isinstance(y, tf.Tensor): + y = y.numpy() + if postprocess is not None: + y = postprocess(y) + return y diff --git a/paz/models/detection/haar_cascade.py b/paz/models/detection/haar_cascade.py index 4ffbac671..708b1c234 100644 --- a/paz/models/detection/haar_cascade.py +++ b/paz/models/detection/haar_cascade.py @@ -32,7 +32,7 @@ def __init__(self, weights='frontalface_default', class_arg=None, self.scale = scale self.neighbors = neighbors - def predict(self, gray_image): + def __call__(self, gray_image): """ Detects faces from gray images. # Arguments diff --git a/paz/pipelines/__init__.py b/paz/pipelines/__init__.py index 9f00bcf5e..49b687147 100644 --- a/paz/pipelines/__init__.py +++ b/paz/pipelines/__init__.py @@ -20,6 +20,7 @@ from .detection import DetectKeypoints2D from .detection import DetectFaceKeypointNet2D32 from .detection import SSD512HandDetection +from .detection import SSD512MinimalHandPose from .keypoints import KeypointNetSharedAugmentation from .keypoints import KeypointNetInference @@ -36,6 +37,7 @@ from .renderer import RandomizeRenderedImage from .classification import MiniXceptionFER +from .classification import ClassifyHandClosure from .pose import EstimatePoseKeypoints from .pose import HeadPoseKeypointNet2D32 diff --git a/paz/pipelines/classification.py b/paz/pipelines/classification.py index c9b53050e..a2351e1ea 100644 --- a/paz/pipelines/classification.py +++ b/paz/pipelines/classification.py @@ -3,6 +3,7 @@ from . import PreprocessImage from ..models.classification import MiniXception from ..datasets import get_class_names +from .keypoints import MinimalHandPoseEstimation # neutral, happiness, surprise, sadness, anger, disgust, fear, contempt @@ -45,3 +46,32 @@ def __init__(self): self.add(pr.CopyDomain([0], [1])) self.add(pr.ControlMap(pr.ToClassName(self.class_names), [0], [0])) self.add(pr.WrapOutput(['class_name', 'scores'])) + + +class ClassifyHandClosure(SequentialProcessor): + """Pipeline to classify minimal hand closure status. + + # Example + ``` python + from paz.pipelines import ClassifyHandClosure + + classify = ClassifyHandClosure() + + # apply directly to an image (numpy-array) + inference = classify(image) + ``` + + # Returns + A function that takes an RGB image and outputs an image with class + status drawn on it. + """ + def __init__(self, draw=True, right_hand=False): + super(ClassifyHandClosure, self).__init__() + self.add(MinimalHandPoseEstimation(draw, right_hand)) + self.add(pr.UnpackDictionary(['image', 'relative_angles'])) + self.add(pr.ControlMap(pr.IsHandOpen(), [1], [1])) + self.add(pr.ControlMap(pr.BooleanToTextMessage('OPEN', 'CLOSE'), + [1], [1])) + if draw: + self.add(pr.ControlMap(pr.DrawText(), [0, 1], [0], {1: 1})) + self.add(pr.WrapOutput(['image', 'status'])) diff --git a/paz/pipelines/detection.py b/paz/pipelines/detection.py index c6c20900b..d173a902a 100644 --- a/paz/pipelines/detection.py +++ b/paz/pipelines/detection.py @@ -1,7 +1,5 @@ import numpy as np -from paz.models.detection.haar_cascade import WEIGHT_PATH - from .. import processors as pr from ..abstract import SequentialProcessor, Processor from ..models import SSD512, SSD300, HaarCascadeDetector @@ -9,7 +7,8 @@ from .image import AugmentImage, PreprocessImage from .classification import MiniXceptionFER -from .keypoints import FaceKeypointNet2D32 +from .keypoints import FaceKeypointNet2D32, DetectMinimalHand +from .keypoints import MinimalHandPoseEstimation class AugmentBoxes(SequentialProcessor): @@ -208,8 +207,6 @@ class SSD512YCBVideo(DetectSingleShot): as a dictionary with ``keys``: ``image`` and ``boxes2D``. The corresponding values of these keys contain the image with the drawn inferences and a list of ``paz.abstract.messages.Boxes2D``. - - """ def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True): names = get_class_names('YCBVideo') @@ -516,3 +513,34 @@ def __init__(self, score_thresh=0.40, nms_thresh=0.45, draw=True): head_weights='OIV6Hand') super(SSD512HandDetection, self).__init__( model, class_names, score_thresh, nms_thresh, draw=draw) + + +class SSD512MinimalHandPose(DetectMinimalHand): + """Hand detection and minimal hand pose estimation pipeline. + + # Arguments + right_hand: Boolean. True for right hand inference. + offsets: List of two elements. Each element must be between [0, 1]. + + # Example + ``` python + from paz.pipelines import SSD512MinimalHandPose + + detect = SSD512MinimalHandPose() + + # apply directly to an image (numpy-array) + inferences = detect(image) + ``` + + # Returns + A function that takes an RGB image and outputs the predictions + as a dictionary with ``keys``: ``image``, ``boxes2D``, + ``Keypoints2D``, ``Keypoints3D``. + The corresponding values of these keys contain the image with the drawn + inferences. + """ + def __init__(self, right_hand=False, offsets=[0.25, 0.25]): + detector = SSD512HandDetection() + keypoint_estimator = MinimalHandPoseEstimation(right_hand) + super(SSD512MinimalHandPose, self).__init__( + detector, keypoint_estimator, offsets) diff --git a/paz/pipelines/keypoints.py b/paz/pipelines/keypoints.py index 13f17d2ba..951228cb9 100644 --- a/paz/pipelines/keypoints.py +++ b/paz/pipelines/keypoints.py @@ -10,7 +10,7 @@ from .angles import IKNetHandJointAngles -from ..backend.image import get_affine_transform, flip_left_right, lincolor +from ..backend.image import get_affine_transform, lincolor from ..backend.keypoints import flip_keypoints_left_right, uv_to_vu from ..datasets import JOINT_CONFIG, FLIP_CONFIG @@ -276,24 +276,26 @@ def __init__(self, shape=(128, 128), draw=True, right_hand=False): super(DetNetHandKeypoints).__init__() self.draw = draw self.right_hand = right_hand - self.preprocess = pr.SequentialProcessor( - [pr.ResizeImage(shape), pr.ExpandDims(axis=0)]) - self.hand_estimator = DetNet() + self.preprocess = pr.SequentialProcessor() + self.preprocess.add(pr.ResizeImage(shape)) + self.preprocess.add(pr.ExpandDims(axis=0)) + if self.right_hand: + self.preprocess.add(pr.FlipLeftRightImage()) + self.predict = pr.Predict(model=DetNet(), preprocess=self.preprocess) self.scale_keypoints = pr.ScaleKeypoints(scale=4, shape=shape) self.draw_skeleton = pr.DrawHandSkeleton() self.wrap = pr.WrapOutput(['image', 'keypoints3D', 'keypoints2D']) - def call(self, input_image): - image = self.preprocess(input_image) - if self.right_hand: - image = flip_left_right(image) - keypoints3D, keypoints2D = self.hand_estimator.predict(image) + def call(self, image): + keypoints3D, keypoints2D = self.predict(image) + keypoints3D = keypoints3D.numpy() + keypoints2D = keypoints2D.numpy() if self.right_hand: keypoints2D = flip_keypoints_left_right(keypoints2D) keypoints2D = uv_to_vu(keypoints2D) - keypoints2D = self.scale_keypoints(keypoints2D, input_image) + keypoints2D = self.scale_keypoints(keypoints2D, image) if self.draw: - image = self.draw_skeleton(input_image, keypoints2D) + image = self.draw_skeleton(image, keypoints2D) return self.wrap(image, keypoints3D, keypoints2D) diff --git a/paz/processors/__init__.py b/paz/processors/__init__.py index 7d2150f1f..8a6186383 100644 --- a/paz/processors/__init__.py +++ b/paz/processors/__init__.py @@ -57,6 +57,7 @@ from .image import ReplaceLowerThanThreshold from .image import GetNonZeroArguments from .image import GetNonZeroValues +from .image import FlipLeftRightImage from .image import ImagenetPreprocessInput @@ -118,9 +119,11 @@ from .standard import Scale from .standard import AppendValues from .standard import BooleanToTextMessage +from .standard import PrintTopics from .pose import SolvePNP from .pose import SolveChangingObjectPnPRANSAC +from .pose import Translation3DFromBoxWidth from .groups import ToAffineMatrix from .groups import RotationVectorToQuaternion diff --git a/paz/processors/image.py b/paz/processors/image.py index 93b7579da..2d464ecb7 100644 --- a/paz/processors/image.py +++ b/paz/processors/image.py @@ -22,6 +22,7 @@ from ..backend.image import normalized_device_coordinates_to_image from ..backend.image import image_to_normalized_device_coordinates from ..backend.image import replace_lower_than_threshold +from ..backend.image import flip_left_right from ..backend.image import BILINEAR, CUBIC from ..backend.image.tensorflow_image import imagenet_preprocess_input @@ -497,3 +498,16 @@ def __init__(self): def call(self, image): return imagenet_preprocess_input(image) + + +class FlipLeftRightImage(Processor): + """Flips an image left and right. + + # Arguments + image: Numpy array. + """ + def __init__(self): + super(FlipLeftRightImage, self).__init__() + + def call(self, image): + return flip_left_right(image) diff --git a/paz/processors/pose.py b/paz/processors/pose.py index 6475e1e50..0580ba8a7 100644 --- a/paz/processors/pose.py +++ b/paz/processors/pose.py @@ -91,3 +91,39 @@ def call(self, object_points3D, image_points2D): self.inlier_thresh, self.num_iterations) rotation_vector = np.squeeze(rotation_vector) return success, rotation_vector, translation + + +class Translation3DFromBoxWidth(Processor): + """Computes 3D translation from box width and real width ratio. + + # Arguments + camera: Instance of ''paz.backend.Camera'' containing as properties + the ``camera_intrinsics`` a Numpy array of shape ``[3, 3]`` + usually calculated from the openCV ``calibrateCamera`` function, + and the ``distortion`` a Numpy array of shape ``[5]`` in which the + elements are usually obtained from the openCV + ``calibrateCamera`` function. + real_width: Real width of the predicted box2D. + + # Returns + Array (num_boxes, 3) containing all 3D translations. + """ + def __init__(self, camera, real_width=0.3): + super(Translation3DFromBoxWidth, self).__init__() + self.camera = camera + self.real_width = real_width + self.focal_length = self.camera.intrinsics[0, 0] + self.u_camera_center = self.camera.intrinsics[0, 2] + self.v_camera_center = self.camera.intrinsics[1, 2] + + def call(self, boxes2D): + hands_center = [] + for box in boxes2D: + u_box_center, v_box_center = box.center + z_center = (self.real_width * self.focal_length) / box.width + u = u_box_center - self.u_camera_center + v = v_box_center - self.v_camera_center + x_center = (z_center * u) / self.focal_length + y_center = (z_center * v) / self.focal_length + hands_center.append([x_center, y_center, z_center]) + return np.array(hands_center) diff --git a/paz/processors/standard.py b/paz/processors/standard.py index fa9e0626e..07c10ce28 100644 --- a/paz/processors/standard.py +++ b/paz/processors/standard.py @@ -2,7 +2,7 @@ from ..abstract import Processor from ..backend.boxes import to_one_hot -from ..backend.standard import append_values +from ..backend.standard import append_values, predict class ControlMap(Processor): @@ -244,12 +244,7 @@ def __init__(self, model, preprocess=None, postprocess=None): self.postprocess = postprocess def call(self, x): - if self.preprocess is not None: - x = self.preprocess(x) - y = self.model.predict(x) - if self.postprocess is not None: - y = self.postprocess(y) - return y + return predict(x, self.model, self.preprocess, self.postprocess) class ToClassName(Processor): @@ -470,3 +465,20 @@ def call(self, flag): else: message = self.false_message return message + + +class PrintTopics(Processor): + """Prints topics + # Arguments + topics: List of keys to the inputted dictionary + + # Returns + Returns same dictionary but outputs to terminal topic values. + """ + def __init__(self, topics): + super(PrintTopics, self).__init__() + self.topics = topics + + def call(self, dictionary): + [print(dictionary[topic]) for topic in self.topics] + return dictionary