""" Evaluation-related codes are modified from https://github.com/hughw19/NOCS_CVPR2019 """ import logging import os import math import cv2 import numpy as np import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import _pickle as cPickle from tqdm import tqdm def setup_logger(logger_name, log_file, level=logging.INFO): logger = logging.getLogger(logger_name) formatter = logging.Formatter('%(asctime)s : %(message)s') fileHandler = logging.FileHandler(log_file, mode='a') fileHandler.setFormatter(formatter) logger.setLevel(level) logger.addHandler(fileHandler) streamHandler = logging.StreamHandler() streamHandler.setFormatter(formatter) logger.addHandler(streamHandler) return logger def load_obj(path_to_file): """ Load obj file. Args: path_to_file: path Returns: vertices: ndarray faces: ndarray, index of triangle vertices """ vertices = [] faces = [] with open(path_to_file, 'r') as f: for line in f: if line[:2] == 'v ': vertex = line[2:].strip().split(' ') vertex = [float(xyz) for xyz in vertex] vertices.append(vertex) elif line[0] == 'f': face = line[1:].replace('//', '/').strip().split(' ') face = [int(idx.split('/')[0])-1 for idx in face] faces.append(face) else: continue vertices = np.asarray(vertices) faces = np.asarray(faces) return vertices, faces def load_depth(img_path): """ Load depth image from img_path. """ depth_path = img_path + '_depth.png' depth = cv2.imread(depth_path, -1) if len(depth.shape) == 3: # This is encoded depth image, let's convert # NOTE: RGB is actually BGR in opencv depth16 = depth[:, :, 1]*256 + depth[:, :, 2] depth16 = np.where(depth16==32001, 0, depth16) depth16 = depth16.astype(np.uint16) elif len(depth.shape) == 2 and depth.dtype == 'uint16': depth16 = depth else: assert False, '[ Error ]: Unsupported depth type.' return depth16 def backproject(depth, intrinsics, instance_mask): """ Back-projection, use opencv camera coordinate frame. """ cam_fx = intrinsics[0, 0] cam_fy = intrinsics[1, 1] cam_cx = intrinsics[0, 2] cam_cy = intrinsics[1, 2] non_zero_mask = (depth > 0) final_instance_mask = np.logical_and(instance_mask, non_zero_mask) idxs = np.where(final_instance_mask) z = depth[idxs[0], idxs[1]] x = (idxs[1] - cam_cx) * z / cam_fx y = (idxs[0] - cam_cy) * z / cam_fy pts = np.stack((x, y, z), axis=1) return pts, idxs def pc_normalize(pc): """ pc: NxC, return NxC """ l = pc.shape[0] centroid = np.mean(pc, axis=0) pc = pc - centroid scale = np.max(np.sqrt(np.sum(pc ** 2, axis=1))) pc = pc / scale return pc, centroid, scale def save_to_obj(verts, faces, path): file = open(path, 'w') for v in verts: file.write('v %f %f %f\n' % (v[0], v[1], v[2])) for f in faces + 1: file.write('f %d %d %d\n' % (f[0], f[1], f[2])) file.close() def save_to_obj_pts(verts, path): file = open(path, 'w') for v in verts: file.write('v %f %f %f\n' % (v[0], v[1], v[2])) file.close() def get_bbox(bbox): """ Compute square image crop window. """ y1, x1, y2, x2 = bbox img_width = 480 img_length = 640 # å‡‘æˆæ•´æ•° 40çš„å€æ•° window_size = (max(y2-y1, x2-x1) // 40 + 1) * 40 window_size = min(window_size, 440) center = [(y1 + y2) // 2, (x1 + x2) // 2] rmin = center[0] - int(window_size / 2) rmax = center[0] + int(window_size / 2) cmin = center[1] - int(window_size / 2) cmax = center[1] + int(window_size / 2) #bbxæœ€ä¸Šè¾¹å°äºŽ0 ä»Ž0å¼€å§‹ if rmin < 0: delt = -rmin rmin = 0 rmax += delt # bbxæœ€å·¦è¾¹å°äºŽ0 ä»Ž0å¼€å§‹ if cmin < 0: delt = -cmin cmin = 0 cmax += delt # bbxæœ€ä¸‹è¾¹å¤§äºŽå›¾ç‰‡é«˜åº¦ ä»Žæœ€åº•ä¸‹å¼€å§‹ if rmax > img_width: delt = rmax - img_width rmax = img_width rmin -= delt # bbxæœ€å³è¾¹å¤§äºŽå›¾ç‰‡é•¿åº¦ ä»Žæœ€å³è¾¹å¼€å§‹ if cmax > img_length: delt = cmax - img_length cmax = img_length cmin -= delt return rmin, rmax, cmin, cmax def compute_sRT_errors(sRT1, sRT2): """ Args: sRT1: [4, 4]. homogeneous affine transformation sRT2: [4, 4]. homogeneous affine transformation Returns: R_error: angle difference in degree, T_error: Euclidean distance IoU: relative scale error """ try: assert np.array_equal(sRT1[3, :], sRT2[3, :]) assert np.array_equal(sRT1[3, :], np.array([0, 0, 0, 1])) except AssertionError: print(sRT1[3, :], sRT2[3, :]) s1 = np.cbrt(np.linalg.det(sRT1[:3, :3])) R1 = sRT1[:3, :3] / s1 T1 = sRT1[:3, 3] s2 = np.cbrt(np.linalg.det(sRT2[:3, :3])) R2 = sRT2[:3, :3] / s2 T2 = sRT2[:3, 3] R12 = R1 @ R2.transpose() R_error = np.arccos(np.clip((np.trace(R12)-1)/2, -1.0, 1.0)) * 180 / np.pi T_error = np.linalg.norm(T1 - T2) IoU = np.abs(s1 - s2) / s2 return R_error, T_error, IoU ############################################################ # Evaluation ############################################################ def get_3d_bbox(size, shift=0): """ Args: size: [3] or scalar shift: [3] or scalar Returns: bbox_3d: [3, N] """ bbox_3d = np.array([[+size[0] / 2, +size[1] / 2, +size[2] / 2], [+size[0] / 2, +size[1] / 2, -size[2] / 2], [-size[0] / 2, +size[1] / 2, +size[2] / 2], [-size[0] / 2, +size[1] / 2, -size[2] / 2], [+size[0] / 2, -size[1] / 2, +size[2] / 2], [+size[0] / 2, -size[1] / 2, -size[2] / 2], [-size[0] / 2, -size[1] / 2, +size[2] / 2], [-size[0] / 2, -size[1] / 2, -size[2] / 2]]) + shift bbox_3d = bbox_3d.transpose() return bbox_3d def transform_coordinates_3d(coordinates, sRT): """ Args: coordinates: [3, N] sRT: [4, 4] Returns: new_coordinates: [3, N] """ assert coordinates.shape[0] == 3 coordinates = np.vstack([coordinates, np.ones((1, coordinates.shape[1]), dtype=np.float32)]) new_coordinates = sRT @ coordinates # if new_coordinates[3, :].any() == 0: # new_coordinates = new_coordinates # else: # new_coordinates = new_coordinates[:3, :] / new_coordinates[3, :] new_coordinates = new_coordinates[:3, :] / new_coordinates[3, :] return new_coordinates def compute_3d_IoU(sRT_1, sRT_2, size_1, size_2, class_name_1, class_name_2, handle_visibility): """ Computes IoU overlaps between two 3D bboxes. """ def asymmetric_3d_iou(sRT_1, sRT_2, size_1, size_2): noc_cube_1 = get_3d_bbox(size_1, 0) #(3,N ) bbox_3d_1 = transform_coordinates_3d(noc_cube_1, sRT_1) #(3,N ) noc_cube_2 = get_3d_bbox(size_2, 0) #(3,N ) bbox_3d_2 = transform_coordinates_3d(noc_cube_2, sRT_2) #(3,N ) # bbox_1_max = np.amax(bbox_3d_1, axis=0) # N # bbox_1_min = np.amin(bbox_3d_1, axis=0) # bbox_2_max = np.amax(bbox_3d_2, axis=0) # bbox_2_min = np.amin(bbox_3d_2, axis=0) '''modified''' bbox_1_max = np.amax(bbox_3d_1, axis=1) # N bbox_1_min = np.amin(bbox_3d_1, axis=1) bbox_2_max = np.amax(bbox_3d_2, axis=1) bbox_2_min = np.amin(bbox_3d_2, axis=1) overlap_min = np.maximum(bbox_1_min, bbox_2_min) # N overlap_max = np.minimum(bbox_1_max, bbox_2_max) # intersections and union if np.amin(overlap_max - overlap_min) < 0: intersections = 0 else: intersections = np.prod(overlap_max - overlap_min) union = np.prod(bbox_1_max - bbox_1_min) + np.prod(bbox_2_max - bbox_2_min) - intersections overlaps = intersections / union return overlaps if sRT_1 is None or sRT_2 is None: return -1 if (class_name_1 in ['bottle', 'bowl', 'can'] and class_name_1 == class_name_2) or \ (class_name_1 == 'mug' and class_name_1 == class_name_2 and handle_visibility==0): def y_rotation_matrix(theta): return np.array([[ np.cos(theta), 0, np.sin(theta), 0], [ 0, 1, 0, 0], [-np.sin(theta), 0, np.cos(theta), 0], [ 0, 0, 0, 1]]) n = 20 max_iou = 0 for i in range(n): rotated_RT_1 = sRT_1 @ y_rotation_matrix(2 * math.pi * i / float(n)) max_iou = max(max_iou, asymmetric_3d_iou(rotated_RT_1, sRT_2, size_1, size_2)) else: max_iou = asymmetric_3d_iou(sRT_1, sRT_2, size_1, size_2) return max_iou def compute_IoU_matches(gt_class_ids, gt_sRT, gt_size, gt_handle_visibility, pred_class_ids, pred_sRT, pred_size, pred_scores, synset_names, iou_3d_thresholds, score_threshold=0): """ Find matches between NOCS prediction and ground truth instances. Args: size: 3D bounding box size bboxes: 2D bounding boxes Returns: gt_matches: 2-D array. For each GT box it has the index of the matched predicted box. pred_matches: 2-D array. For each predicted box, it has the index of the matched ground truth box. overlaps: IoU overlaps. indices: """ num_pred = len(pred_class_ids) num_gt = len(gt_class_ids) indices = np.zeros(0) if num_pred: # Sort predictions by score from high to low indices = np.argsort(pred_scores)[::-1] pred_class_ids = pred_class_ids[indices].copy() pred_size = pred_size[indices].copy() pred_sRT = pred_sRT[indices].copy() # compute IoU overlaps [pred_bboxs gt_bboxs] overlaps = np.zeros((num_pred, num_gt), dtype=np.float32) for i in range(num_pred): for j in range(num_gt): overlaps[i, j] = compute_3d_IoU(pred_sRT[i], gt_sRT[j], pred_size[i, :], gt_size[j], synset_names[pred_class_ids[i]], synset_names[gt_class_ids[j]], gt_handle_visibility[j]) # loop through predictions and find matching ground truth boxes num_iou_3d_thres = len(iou_3d_thresholds) pred_matches = -1 * np.ones([num_iou_3d_thres, num_pred]) gt_matches = -1 * np.ones([num_iou_3d_thres, num_gt]) for s, iou_thres in enumerate(iou_3d_thresholds): for i in range(indices.shape[0]): # Find best matching ground truth box # 1. Sort matches by score sorted_ixs = np.argsort(overlaps[i])[::-1] # 2. Remove low scores low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] if low_score_idx.size > 0: sorted_ixs = sorted_ixs[:low_score_idx[0]] # 3. Find the match for j in sorted_ixs: # If ground truth box is already matched, go to next one if gt_matches[s, j] > -1: continue # If we reach IoU smaller than the threshold, end the loop iou = overlaps[i, j] if iou < iou_thres: break # Do we have a match? if not pred_class_ids[i] == gt_class_ids[j]: continue if iou > iou_thres: gt_matches[s, j] = i pred_matches[s, i] = j break return gt_matches, pred_matches, overlaps, indices def compute_RT_errors(sRT_1, sRT_2, class_id, handle_visibility, synset_names): """ Args: sRT_1: [4, 4]. homogeneous affine transformation sRT_2: [4, 4]. homogeneous affine transformation Returns: theta: angle difference of R in degree shift: l2 difference of T in centimeter """ # make sure the last row is [0, 0, 0, 1] if sRT_1 is None or sRT_2 is None: return -1 try: assert np.array_equal(sRT_1[3, :], sRT_2[3, :]) assert np.array_equal(sRT_1[3, :], np.array([0, 0, 0, 1])) except AssertionError: print(sRT_1[3, :], sRT_2[3, :]) exit() R1 = sRT_1[:3, :3] / np.cbrt(np.linalg.det(sRT_1[:3, :3])) T1 = sRT_1[:3, 3] R2 = sRT_2[:3, :3] / np.cbrt(np.linalg.det(sRT_2[:3, :3])) T2 = sRT_2[:3, 3] # symmetric when rotating around y-axis if synset_names[class_id] in ['bottle', 'can', 'bowl'] or \ (synset_names[class_id] == 'mug' and handle_visibility == 0): y = np.array([0, 1, 0]) y1 = R1 @ y y2 = R2 @ y cos_theta = y1.dot(y2) / (np.linalg.norm(y1) * np.linalg.norm(y2)) else: R = R1 @ R2.transpose() cos_theta = (np.trace(R) - 1) / 2 theta = np.arccos(np.clip(cos_theta, -1.0, 1.0)) * 180 / np.pi shift = np.linalg.norm(T1 - T2) * 100 result = np.array([theta, shift]) return result def compute_RT_overlaps(gt_class_ids, gt_sRT, gt_handle_visibility, pred_class_ids, pred_sRT, synset_names): """ Finds overlaps between prediction and ground truth instances. Returns: overlaps: """ num_pred = len(pred_class_ids) num_gt = len(gt_class_ids) overlaps = np.zeros((num_pred, num_gt, 2)) for i in range(num_pred): for j in range(num_gt): overlaps[i, j, :] = compute_RT_errors(pred_sRT[i], gt_sRT[j], gt_class_ids[j], gt_handle_visibility[j], synset_names) return overlaps def compute_RT_matches(overlaps, pred_class_ids, gt_class_ids, degree_thres_list, shift_thres_list): num_degree_thres = len(degree_thres_list) num_shift_thres = len(shift_thres_list) num_pred = len(pred_class_ids) num_gt = len(gt_class_ids) pred_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_pred)) gt_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_gt)) if num_pred == 0 or num_gt == 0: return gt_matches, pred_matches assert num_pred == overlaps.shape[0] assert num_gt == overlaps.shape[1] assert overlaps.shape[2] == 2 for d, degree_thres in enumerate(degree_thres_list): for s, shift_thres in enumerate(shift_thres_list): for i in range(num_pred): # Find best matching ground truth box # 1. Sort matches by scores from low to high sum_degree_shift = np.sum(overlaps[i, :, :], axis=-1) sorted_ixs = np.argsort(sum_degree_shift) # 2. Find the match for j in sorted_ixs: # If ground truth box is already matched, go to next one if gt_matches[d, s, j] > -1 or pred_class_ids[i] != gt_class_ids[j]: continue # If we reach IoU smaller than the threshold, end the loop if overlaps[i, j, 0] > degree_thres or overlaps[i, j, 1] > shift_thres: continue gt_matches[d, s, j] = i pred_matches[d, s, i] = j break return gt_matches, pred_matches def compute_ap_and_acc(pred_matches, pred_scores, gt_matches): # sort the scores from high to low assert pred_matches.shape[0] == pred_scores.shape[0] score_indices = np.argsort(pred_scores)[::-1] # pred_scores = pred_scores[score_indices] pred_matches = pred_matches[score_indices] precisions = np.cumsum(pred_matches > -1) / (np.arange(len(pred_matches)) + 1) recalls = np.cumsum(pred_matches > -1).astype(np.float32) / len(gt_matches) # Pad with start and end values to simplify the math precisions = np.concatenate([[0], precisions, [0]]) recalls = np.concatenate([[0], recalls, [1]]) # Ensure precision values decrease but don't increase. This way, the # precision value at each recall threshold is the maximum it can be # for all following recall thresholds, as specified by the VOC paper. for i in range(len(precisions) - 2, -1, -1): precisions[i] = np.maximum(precisions[i], precisions[i + 1]) # compute mean AP over recall range indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 ap = np.sum((recalls[indices] - recalls[indices - 1]) * precisions[indices]) # accuracy acc = np.sum(pred_matches > -1) / len(pred_matches) return ap, acc def compute_mAP(pred_results, out_dir, degree_thresholds=[180], shift_thresholds=[100], iou_3d_thresholds=[0.1], iou_pose_thres=0.1, use_matches_for_pose=False): """ Compute mean Average Precision. Returns: iou_aps: pose_aps: iou_acc: pose_acc: """ synset_names = ['BG', 'bottle', 'bowl', 'camera', 'can', 'laptop', 'mug'] num_classes = len(synset_names) degree_thres_list = list(degree_thresholds) + [360] num_degree_thres = len(degree_thres_list) shift_thres_list = list(shift_thresholds) + [100] num_shift_thres = len(shift_thres_list) iou_thres_list = list(iou_3d_thresholds) num_iou_thres = len(iou_thres_list) if use_matches_for_pose: assert iou_pose_thres in iou_thres_list # pre-allocate more than enough memory #iou iou_aps = np.zeros((num_classes + 1, num_iou_thres)) iou_acc = np.zeros((num_classes + 1, num_iou_thres)) iou_pred_matches_all = [np.zeros((num_iou_thres, 30000)) for _ in range(num_classes)] iou_pred_scores_all = [np.zeros((num_iou_thres, 30000)) for _ in range(num_classes)] iou_gt_matches_all = [np.zeros((num_iou_thres, 30000)) for _ in range(num_classes)] iou_pred_count = [0 for _ in range(num_classes)] iou_gt_count = [0 for _ in range(num_classes)] # pose pose_aps = np.zeros((num_classes + 1, num_degree_thres, num_shift_thres)) pose_acc = np.zeros((num_classes + 1, num_degree_thres, num_shift_thres)) pose_pred_matches_all = [np.zeros((num_degree_thres, num_shift_thres, 30000)) for _ in range(num_classes)] pose_pred_scores_all = [np.zeros((num_degree_thres, num_shift_thres, 30000)) for _ in range(num_classes)] pose_gt_matches_all = [np.zeros((num_degree_thres, num_shift_thres, 30000)) for _ in range(num_classes)] pose_pred_count = [0 for _ in range(num_classes)] pose_gt_count = [0 for _ in range(num_classes)] # loop over results to gather pred matches and gt matches for iou and pose metrics progress = 0 for progress, result in enumerate(tqdm(pred_results)): gt_class_ids = result['gt_class_ids'].astype(np.int32) gt_sRT = np.array(result['gt_RTs']) gt_size = np.array(result['gt_scales']) gt_handle_visibility = result['gt_handle_visibility'] pred_class_ids = result['pred_class_ids'] pred_sRT = np.array(result['pred_RTs']) pred_size = result['pred_scales'] pred_scores = result['pred_scores'] if len(gt_class_ids) == 0 and len(pred_class_ids) == 0: continue for cls_id in range(1, num_classes): # get gt and predictions in this class # å…ˆåœ¨ç±»å†…è¿›è¡ŒåŒ¹é…ï¼Œå¯èƒ½ä¼šé‡åˆ°ä¸¤ä¸ªinstanceå±žäºŽä¸€ä¸ªç±»çš„æƒ…å†µï¼ŒåŽé¢å†match cls_gt_class_ids = gt_class_ids[gt_class_ids==cls_id] if len(gt_class_ids) else np.zeros(0) cls_gt_sRT = gt_sRT[gt_class_ids==cls_id] if len(gt_class_ids) else np.zeros((0, 4, 4)) cls_gt_size = gt_size[gt_class_ids==cls_id] if len(gt_class_ids) else np.zeros((0, 3)) # éžæ¯åç±»é»˜è®¤ handle_visibility=1 if synset_names[cls_id] != 'mug': cls_gt_handle_visibility = np.ones_like(cls_gt_class_ids) # æ¯åç±» handle_visibilityå’Œä¹‹å‰å˜çš„gt_handle_visibilityä¿æŒä¸€è‡´å°±å¥½ else: cls_gt_handle_visibility = gt_handle_visibility[gt_class_ids==cls_id] if len(gt_class_ids) else np.ones(0) cls_pred_class_ids = pred_class_ids[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros(0) cls_pred_sRT = pred_sRT[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros((0, 4, 4)) cls_pred_size = pred_size[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros((0, 3)) cls_pred_scores = pred_scores[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros(0) # calculate the overlap between each gt instance and pred instance iou_cls_gt_match, iou_cls_pred_match, _, iou_pred_indices = \ compute_IoU_matches(cls_gt_class_ids, cls_gt_sRT, cls_gt_size, cls_gt_handle_visibility, cls_pred_class_ids, cls_pred_sRT, cls_pred_size, cls_pred_scores, synset_names, iou_thres_list) if len(iou_pred_indices): cls_pred_class_ids = cls_pred_class_ids[iou_pred_indices] cls_pred_sRT = cls_pred_sRT[iou_pred_indices] cls_pred_scores = cls_pred_scores[iou_pred_indices] num_pred = iou_cls_pred_match.shape[1] pred_start = iou_pred_count[cls_id] pred_end = pred_start + num_pred iou_pred_count[cls_id] = pred_end iou_pred_matches_all[cls_id][:, pred_start:pred_end] = iou_cls_pred_match cls_pred_scores_tile = np.tile(cls_pred_scores, (num_iou_thres, 1)) assert cls_pred_scores_tile.shape[1] == num_pred iou_pred_scores_all[cls_id][:, pred_start:pred_end] = cls_pred_scores_tile num_gt = iou_cls_gt_match.shape[1] gt_start = iou_gt_count[cls_id] gt_end = gt_start + num_gt iou_gt_count[cls_id] = gt_end iou_gt_matches_all[cls_id][:, gt_start:gt_end] = iou_cls_gt_match if use_matches_for_pose: thres_ind = list(iou_thres_list).index(iou_pose_thres) iou_thres_pred_match = iou_cls_pred_match[thres_ind, :] cls_pred_class_ids = cls_pred_class_ids[iou_thres_pred_match > -1] if len(iou_thres_pred_match) > 0 else np.zeros(0) cls_pred_sRT = cls_pred_sRT[iou_thres_pred_match > -1] if len(iou_thres_pred_match) > 0 else np.zeros((0, 4, 4)) cls_pred_scores = cls_pred_scores[iou_thres_pred_match > -1] if len(iou_thres_pred_match) > 0 else np.zeros(0) iou_thres_gt_match = iou_cls_gt_match[thres_ind, :] cls_gt_class_ids = cls_gt_class_ids[iou_thres_gt_match > -1] if len(iou_thres_gt_match) > 0 else np.zeros(0) cls_gt_sRT = cls_gt_sRT[iou_thres_gt_match > -1] if len(iou_thres_gt_match) > 0 else np.zeros((0, 4, 4)) cls_gt_handle_visibility = cls_gt_handle_visibility[iou_thres_gt_match > -1] if len(iou_thres_gt_match) > 0 else np.zeros(0) RT_overlaps = compute_RT_overlaps(cls_gt_class_ids, cls_gt_sRT, cls_gt_handle_visibility, cls_pred_class_ids, cls_pred_sRT, synset_names) pose_cls_gt_match, pose_cls_pred_match = compute_RT_matches(RT_overlaps, cls_pred_class_ids, cls_gt_class_ids, degree_thres_list, shift_thres_list) num_pred = pose_cls_pred_match.shape[2] pred_start = pose_pred_count[cls_id] pred_end = pred_start + num_pred pose_pred_count[cls_id] = pred_end pose_pred_matches_all[cls_id][:, :, pred_start:pred_end] = pose_cls_pred_match cls_pred_scores_tile = np.tile(cls_pred_scores, (num_degree_thres, num_shift_thres, 1)) assert cls_pred_scores_tile.shape[2] == num_pred pose_pred_scores_all[cls_id][:, :, pred_start:pred_end] = cls_pred_scores_tile num_gt = pose_cls_gt_match.shape[2] gt_start = pose_gt_count[cls_id] gt_end = gt_start + num_gt pose_gt_count[cls_id] = gt_end pose_gt_matches_all[cls_id][:, :, gt_start:gt_end] = pose_cls_gt_match # trim zeros for cls_id in range(num_classes): # IoU iou_pred_matches_all[cls_id] = iou_pred_matches_all[cls_id][:, :iou_pred_count[cls_id]] iou_pred_scores_all[cls_id] = iou_pred_scores_all[cls_id][:, :iou_pred_count[cls_id]] iou_gt_matches_all[cls_id] = iou_gt_matches_all[cls_id][:, :iou_gt_count[cls_id]] # pose pose_pred_matches_all[cls_id] = pose_pred_matches_all[cls_id][:, :, :pose_pred_count[cls_id]] pose_pred_scores_all[cls_id] = pose_pred_scores_all[cls_id][:, :, :pose_pred_count[cls_id]] pose_gt_matches_all[cls_id] = pose_gt_matches_all[cls_id][:, :, :pose_gt_count[cls_id]] # compute 3D IoU mAP for cls_id in range(1, num_classes): for s, iou_thres in enumerate(iou_thres_list): iou_aps[cls_id, s], iou_acc[cls_id, s] = compute_ap_and_acc(iou_pred_matches_all[cls_id][s, :], iou_pred_scores_all[cls_id][s, :], iou_gt_matches_all[cls_id][s, :]) iou_aps[-1, :] = np.mean(iou_aps[1:-1, :], axis=0) iou_acc[-1, :] = np.mean(iou_acc[1:-1, :], axis=0) # compute pose mAP for i, degree_thres in enumerate(degree_thres_list): for j, shift_thres in enumerate(shift_thres_list): for cls_id in range(1, num_classes): cls_pose_pred_matches_all = pose_pred_matches_all[cls_id][i, j, :] cls_pose_gt_matches_all = pose_gt_matches_all[cls_id][i, j, :] cls_pose_pred_scores_all = pose_pred_scores_all[cls_id][i, j, :] pose_aps[cls_id, i, j], pose_acc[cls_id, i, j] = compute_ap_and_acc(cls_pose_pred_matches_all, cls_pose_pred_scores_all, cls_pose_gt_matches_all) pose_aps[-1, i, j] = np.mean(pose_aps[1:-1, i, j]) pose_acc[-1, i, j] = np.mean(pose_acc[1:-1, i, j]) # save results to pkl result_dict = {} result_dict['iou_thres_list'] = iou_thres_list result_dict['degree_thres_list'] = degree_thres_list result_dict['shift_thres_list'] = shift_thres_list result_dict['iou_aps'] = iou_aps result_dict['pose_aps'] = pose_aps result_dict['iou_acc'] = iou_acc result_dict['pose_acc'] = pose_acc pkl_path = os.path.join(out_dir, 'mAP_Acc.pkl') with open(pkl_path, 'wb') as f: cPickle.dump(result_dict, f) return iou_aps, pose_aps, iou_acc, pose_acc def plot_mAP(iou_aps, pose_aps, out_dir, iou_thres_list, degree_thres_list, shift_thres_list): """ Draw iou 3d AP vs. iou thresholds. """ # mpl.style.use('ggplot') labels = ['bottle', 'bowl', 'camera', 'can', 'laptop', 'mug', 'mean', 'nocs'] colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:pink', 'tab:olive', 'tab:purple', 'tab:red', 'tab:gray'] styles = ['-', '-', '-', '-', '-', '-', '--', ':'] fig, (ax_iou, ax_degree, ax_shift) = plt.subplots(1, 3, figsize=(8, 3.5)) # IoU subplot ax_iou.set_title('3D IoU', fontsize=16) ax_iou.set_ylabel('Average Precision', fontsize=14) ax_iou.set_ylim(0, 100) ax_iou.set_xlabel('Percent', fontsize=14) ax_iou.set_xlim(0, 100) ax_iou.xaxis.set_ticks([0, 25, 50, 75, 100]) ax_iou.grid() for i in range(1, iou_aps.shape[0]): ax_iou.plot(100*np.array(iou_thres_list), 100*iou_aps[i, :], color=colors[i-1], linestyle=styles[i-1], label=labels[i-1]) # rotation subplot ax_degree.set_title('Rotation', fontsize=16) ax_degree.set_ylim(0, 100) ax_degree.yaxis.set_ticklabels([]) ax_degree.set_xlabel('Degree', fontsize=14) ax_degree.set_xlim(0, 60) ax_degree.xaxis.set_ticks([0, 20, 40, 60]) ax_degree.grid() for i in range(1, pose_aps.shape[0]): ax_degree.plot(np.array(degree_thres_list), 100*pose_aps[i, :len(degree_thres_list), -1], color=colors[i-1], linestyle=styles[i-1], label=labels[i-1]) # translation subplot ax_shift.set_title('Translation', fontsize=16) ax_shift.set_ylim(0, 100) ax_shift.yaxis.set_ticklabels([]) ax_shift.set_xlabel('Centimeter', fontsize=14) ax_shift.set_xlim(0, 10) ax_shift.xaxis.set_ticks([0, 5, 10]) ax_shift.grid() for i in range(1, pose_aps.shape[0]): ax_shift.plot(np.array(shift_thres_list), 100*pose_aps[i, -1, :len(shift_thres_list)], color=colors[i-1], linestyle=styles[i-1], label=labels[i-1]) ax_shift.legend(loc='lower right', fontsize='small') plt.tight_layout() # plt.show() plt.savefig(os.path.join(out_dir, 'mAP.jpg'), dpi=600) plt.close(fig) return def calculate_2d_projections(coordinates_3d, intrinsics): """ Args: coordinates_3d: [3, N] intrinsics: [3, 3] Returns: projected_coordinates: [N, 2] """ projected_coordinates = intrinsics @ coordinates_3d projected_coordinates = projected_coordinates[:2, :] / projected_coordinates[2, :] projected_coordinates = projected_coordinates.transpose() projected_coordinates = np.array(projected_coordinates, dtype=np.int32) return projected_coordinates def align_rotation(sRT): """ Align rotations for symmetric objects. Args: sRT: 4 x 4 """ s = np.cbrt(np.linalg.det(sRT[:3, :3])) R = sRT[:3, :3] / s T = sRT[:3, 3] theta_x = R[0, 0] + R[2, 2] theta_y = R[0, 2] - R[2, 0] r_norm = math.sqrt(theta_x**2 + theta_y**2) s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm], [0.0, 1.0, 0.0 ], [theta_y/r_norm, 0.0, theta_x/r_norm]]) rotation = R @ s_map aligned_sRT = np.identity(4, dtype=np.float32) aligned_sRT[:3, :3] = s * rotation aligned_sRT[:3, 3] = T return aligned_sRT def draw(img, imgpts, axes, color): imgpts = np.int32(imgpts).reshape(-1, 2) # draw ground layer in darker color color_ground = (int(color[0]), int(color[1]), int(color[2])) for i, j in zip([4, 5, 6, 7], [5, 7, 4, 6]): img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]), color=color_ground, thickness=2, lineType=cv2.LINE_AA) # draw pillars in blue color color_pillar = (int(color[0]), int(color[1]), int(color[2])) for i, j in zip(range(4), range(4, 8)): img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]), color=color_pillar, thickness=2, lineType=cv2.LINE_AA) # finally, draw top layer in color for i, j in zip([0, 1, 2, 3], [1, 3, 0, 2]): img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]), color=color, thickness=2, lineType=cv2.LINE_AA) # # draw axes img = cv2.line(img, tuple(axes[0]), tuple(axes[1]), (0, 0, 255), 3, lineType=cv2.LINE_AA) img = cv2.line(img, tuple(axes[0]), tuple(axes[3]), (255, 0, 0), 3, lineType=cv2.LINE_AA) img = cv2.line(img, tuple(axes[0]), tuple(axes[2]), (0, 255, 0), 3, lineType=cv2.LINE_AA) ## y last return img def draw_detections(img, out_dir, data_name, img_id, intrinsics, pred_sRT, pred_size, pred_class_ids, gt_sRT, gt_size, gt_class_ids, nocs_sRT, nocs_size, nocs_class_ids, draw_gt=True, draw_nocs=True): """ Visualize pose predictions. """ out_path = os.path.join(out_dir, '{}_{}_pred.png'.format(data_name, img_id)) # draw nocs results - BLUE color if draw_nocs: for i in range(nocs_sRT.shape[0]): if nocs_class_ids[i] in [1, 2, 4]: sRT = align_rotation(nocs_sRT[i, :, :]) else: sRT = nocs_sRT[i, :, :] bbox_3d = get_3d_bbox(nocs_size[i, :], 0) transformed_bbox_3d = transform_coordinates_3d(bbox_3d, sRT) projected_bbox = calculate_2d_projections(transformed_bbox_3d, intrinsics) xyz_axis = 0.1 * np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]]).transpose() transformed_axes = transform_coordinates_3d(xyz_axis, sRT) projected_axes = calculate_2d_projections(transformed_axes, intrinsics) img = draw(img, projected_bbox, projected_axes, (255, 0, 0)) # darw ground truth - GREEN color if draw_gt: for i in range(gt_sRT.shape[0]): if gt_class_ids[i] in [1, 2, 4]: sRT = align_rotation(gt_sRT[i, :, :]) else: sRT = gt_sRT[i, :, :] bbox_3d = get_3d_bbox(gt_size[i, :], 0) transformed_bbox_3d = transform_coordinates_3d(bbox_3d, sRT) projected_bbox = calculate_2d_projections(transformed_bbox_3d, intrinsics) img = draw(img, projected_bbox, projected_axes, (255, 0, 0)) # darw prediction - RED color for i in range(pred_sRT.shape[0]): if pred_class_ids[i] in [1, 2, 4]: sRT = align_rotation(pred_sRT[i, :, :]) else: sRT = pred_sRT[i, :, :] bbox_3d = get_3d_bbox(pred_size[i, :], 0) transformed_bbox_3d = transform_coordinates_3d(bbox_3d, sRT) projected_bbox = calculate_2d_projections(transformed_bbox_3d, intrinsics) img = draw(img, projected_bbox, projected_axes, (255, 0, 0)) cv2.imwrite(out_path, img) # cv2.imshow('vis', img) # cv2.waitKey(0)