"""
Evaluation-related codes are modified from
https://github.com/hughw19/NOCS_CVPR2019
"""
import logging
import os
import math
import cv2
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import _pickle as cPickle
from tqdm import tqdm
def setup_logger(logger_name, log_file, level=logging.INFO):
logger = logging.getLogger(logger_name)
formatter = logging.Formatter('%(asctime)s : %(message)s')
fileHandler = logging.FileHandler(log_file, mode='a')
fileHandler.setFormatter(formatter)
logger.setLevel(level)
logger.addHandler(fileHandler)
streamHandler = logging.StreamHandler()
streamHandler.setFormatter(formatter)
logger.addHandler(streamHandler)
return logger
def load_obj(path_to_file):
""" Load obj file.
Args:
path_to_file: path
Returns:
vertices: ndarray
faces: ndarray, index of triangle vertices
"""
vertices = []
faces = []
with open(path_to_file, 'r') as f:
for line in f:
if line[:2] == 'v ':
vertex = line[2:].strip().split(' ')
vertex = [float(xyz) for xyz in vertex]
vertices.append(vertex)
elif line[0] == 'f':
face = line[1:].replace('//', '/').strip().split(' ')
face = [int(idx.split('/')[0])-1 for idx in face]
faces.append(face)
else:
continue
vertices = np.asarray(vertices)
faces = np.asarray(faces)
return vertices, faces
def load_depth(img_path):
""" Load depth image from img_path. """
depth_path = img_path + '_depth.png'
depth = cv2.imread(depth_path, -1)
if len(depth.shape) == 3:
# This is encoded depth image, let's convert
# NOTE: RGB is actually BGR in opencv
depth16 = depth[:, :, 1]*256 + depth[:, :, 2]
depth16 = np.where(depth16==32001, 0, depth16)
depth16 = depth16.astype(np.uint16)
elif len(depth.shape) == 2 and depth.dtype == 'uint16':
depth16 = depth
else:
assert False, '[ Error ]: Unsupported depth type.'
return depth16
def backproject(depth, intrinsics, instance_mask):
""" Back-projection, use opencv camera coordinate frame.
"""
cam_fx = intrinsics[0, 0]
cam_fy = intrinsics[1, 1]
cam_cx = intrinsics[0, 2]
cam_cy = intrinsics[1, 2]
non_zero_mask = (depth > 0)
final_instance_mask = np.logical_and(instance_mask, non_zero_mask)
idxs = np.where(final_instance_mask)
z = depth[idxs[0], idxs[1]]
x = (idxs[1] - cam_cx) * z / cam_fx
y = (idxs[0] - cam_cy) * z / cam_fy
pts = np.stack((x, y, z), axis=1)
return pts, idxs
def pc_normalize(pc):
""" pc: NxC, return NxC """
l = pc.shape[0]
centroid = np.mean(pc, axis=0)
pc = pc - centroid
scale = np.max(np.sqrt(np.sum(pc ** 2, axis=1)))
pc = pc / scale
return pc, centroid, scale
def save_to_obj(verts, faces, path):
file = open(path, 'w')
for v in verts:
file.write('v %f %f %f\n' % (v[0], v[1], v[2]))
for f in faces + 1:
file.write('f %d %d %d\n' % (f[0], f[1], f[2]))
file.close()
def save_to_obj_pts(verts, path):
file = open(path, 'w')
for v in verts:
file.write('v %f %f %f\n' % (v[0], v[1], v[2]))
file.close()
def get_bbox(bbox):
""" Compute square image crop window. """
y1, x1, y2, x2 = bbox
img_width = 480
img_length = 640
# 凑æˆæ•´æ•° 40çš„å€æ•°
window_size = (max(y2-y1, x2-x1) // 40 + 1) * 40
window_size = min(window_size, 440)
center = [(y1 + y2) // 2, (x1 + x2) // 2]
rmin = center[0] - int(window_size / 2)
rmax = center[0] + int(window_size / 2)
cmin = center[1] - int(window_size / 2)
cmax = center[1] + int(window_size / 2)
#bbx最上边å°äºŽ0 从0开始
if rmin < 0:
delt = -rmin
rmin = 0
rmax += delt
# bbx最左边å°äºŽ0 从0开始
if cmin < 0:
delt = -cmin
cmin = 0
cmax += delt
# bbx最下边大于图片高度 从最底下开始
if rmax > img_width:
delt = rmax - img_width
rmax = img_width
rmin -= delt
# bbx最å³è¾¹å¤§äºŽå›¾ç‰‡é•¿åº¦ 从最å³è¾¹å¼€å§‹
if cmax > img_length:
delt = cmax - img_length
cmax = img_length
cmin -= delt
return rmin, rmax, cmin, cmax
def compute_sRT_errors(sRT1, sRT2):
"""
Args:
sRT1: [4, 4]. homogeneous affine transformation
sRT2: [4, 4]. homogeneous affine transformation
Returns:
R_error: angle difference in degree,
T_error: Euclidean distance
IoU: relative scale error
"""
try:
assert np.array_equal(sRT1[3, :], sRT2[3, :])
assert np.array_equal(sRT1[3, :], np.array([0, 0, 0, 1]))
except AssertionError:
print(sRT1[3, :], sRT2[3, :])
s1 = np.cbrt(np.linalg.det(sRT1[:3, :3]))
R1 = sRT1[:3, :3] / s1
T1 = sRT1[:3, 3]
s2 = np.cbrt(np.linalg.det(sRT2[:3, :3]))
R2 = sRT2[:3, :3] / s2
T2 = sRT2[:3, 3]
R12 = R1 @ R2.transpose()
R_error = np.arccos(np.clip((np.trace(R12)-1)/2, -1.0, 1.0)) * 180 / np.pi
T_error = np.linalg.norm(T1 - T2)
IoU = np.abs(s1 - s2) / s2
return R_error, T_error, IoU
############################################################
# Evaluation
############################################################
def get_3d_bbox(size, shift=0):
"""
Args:
size: [3] or scalar
shift: [3] or scalar
Returns:
bbox_3d: [3, N]
"""
bbox_3d = np.array([[+size[0] / 2, +size[1] / 2, +size[2] / 2],
[+size[0] / 2, +size[1] / 2, -size[2] / 2],
[-size[0] / 2, +size[1] / 2, +size[2] / 2],
[-size[0] / 2, +size[1] / 2, -size[2] / 2],
[+size[0] / 2, -size[1] / 2, +size[2] / 2],
[+size[0] / 2, -size[1] / 2, -size[2] / 2],
[-size[0] / 2, -size[1] / 2, +size[2] / 2],
[-size[0] / 2, -size[1] / 2, -size[2] / 2]]) + shift
bbox_3d = bbox_3d.transpose()
return bbox_3d
def transform_coordinates_3d(coordinates, sRT):
"""
Args:
coordinates: [3, N]
sRT: [4, 4]
Returns:
new_coordinates: [3, N]
"""
assert coordinates.shape[0] == 3
coordinates = np.vstack([coordinates, np.ones((1, coordinates.shape[1]), dtype=np.float32)])
new_coordinates = sRT @ coordinates
# if new_coordinates[3, :].any() == 0:
# new_coordinates = new_coordinates
# else:
# new_coordinates = new_coordinates[:3, :] / new_coordinates[3, :]
new_coordinates = new_coordinates[:3, :] / new_coordinates[3, :]
return new_coordinates
def compute_3d_IoU(sRT_1, sRT_2, size_1, size_2, class_name_1, class_name_2, handle_visibility):
""" Computes IoU overlaps between two 3D bboxes. """
def asymmetric_3d_iou(sRT_1, sRT_2, size_1, size_2):
noc_cube_1 = get_3d_bbox(size_1, 0) #(3,N )
bbox_3d_1 = transform_coordinates_3d(noc_cube_1, sRT_1) #(3,N )
noc_cube_2 = get_3d_bbox(size_2, 0) #(3,N )
bbox_3d_2 = transform_coordinates_3d(noc_cube_2, sRT_2) #(3,N )
# bbox_1_max = np.amax(bbox_3d_1, axis=0) # N
# bbox_1_min = np.amin(bbox_3d_1, axis=0)
# bbox_2_max = np.amax(bbox_3d_2, axis=0)
# bbox_2_min = np.amin(bbox_3d_2, axis=0)
'''modified'''
bbox_1_max = np.amax(bbox_3d_1, axis=1) # N
bbox_1_min = np.amin(bbox_3d_1, axis=1)
bbox_2_max = np.amax(bbox_3d_2, axis=1)
bbox_2_min = np.amin(bbox_3d_2, axis=1)
overlap_min = np.maximum(bbox_1_min, bbox_2_min) # N
overlap_max = np.minimum(bbox_1_max, bbox_2_max)
# intersections and union
if np.amin(overlap_max - overlap_min) < 0:
intersections = 0
else:
intersections = np.prod(overlap_max - overlap_min)
union = np.prod(bbox_1_max - bbox_1_min) + np.prod(bbox_2_max - bbox_2_min) - intersections
overlaps = intersections / union
return overlaps
if sRT_1 is None or sRT_2 is None:
return -1
if (class_name_1 in ['bottle', 'bowl', 'can'] and class_name_1 == class_name_2) or \
(class_name_1 == 'mug' and class_name_1 == class_name_2 and handle_visibility==0):
def y_rotation_matrix(theta):
return np.array([[ np.cos(theta), 0, np.sin(theta), 0],
[ 0, 1, 0, 0],
[-np.sin(theta), 0, np.cos(theta), 0],
[ 0, 0, 0, 1]])
n = 20
max_iou = 0
for i in range(n):
rotated_RT_1 = sRT_1 @ y_rotation_matrix(2 * math.pi * i / float(n))
max_iou = max(max_iou, asymmetric_3d_iou(rotated_RT_1, sRT_2, size_1, size_2))
else:
max_iou = asymmetric_3d_iou(sRT_1, sRT_2, size_1, size_2)
return max_iou
def compute_IoU_matches(gt_class_ids, gt_sRT, gt_size, gt_handle_visibility,
pred_class_ids, pred_sRT, pred_size, pred_scores,
synset_names, iou_3d_thresholds, score_threshold=0):
""" Find matches between NOCS prediction and ground truth instances.
Args:
size: 3D bounding box size
bboxes: 2D bounding boxes
Returns:
gt_matches: 2-D array. For each GT box it has the index of the matched predicted box.
pred_matches: 2-D array. For each predicted box, it has the index of the matched ground truth box.
overlaps: IoU overlaps.
indices:
"""
num_pred = len(pred_class_ids)
num_gt = len(gt_class_ids)
indices = np.zeros(0)
if num_pred:
# Sort predictions by score from high to low
indices = np.argsort(pred_scores)[::-1]
pred_class_ids = pred_class_ids[indices].copy()
pred_size = pred_size[indices].copy()
pred_sRT = pred_sRT[indices].copy()
# compute IoU overlaps [pred_bboxs gt_bboxs]
overlaps = np.zeros((num_pred, num_gt), dtype=np.float32)
for i in range(num_pred):
for j in range(num_gt):
overlaps[i, j] = compute_3d_IoU(pred_sRT[i], gt_sRT[j], pred_size[i, :], gt_size[j],
synset_names[pred_class_ids[i]], synset_names[gt_class_ids[j]], gt_handle_visibility[j])
# loop through predictions and find matching ground truth boxes
num_iou_3d_thres = len(iou_3d_thresholds)
pred_matches = -1 * np.ones([num_iou_3d_thres, num_pred])
gt_matches = -1 * np.ones([num_iou_3d_thres, num_gt])
for s, iou_thres in enumerate(iou_3d_thresholds):
for i in range(indices.shape[0]):
# Find best matching ground truth box
# 1. Sort matches by score
sorted_ixs = np.argsort(overlaps[i])[::-1]
# 2. Remove low scores
low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
if low_score_idx.size > 0:
sorted_ixs = sorted_ixs[:low_score_idx[0]]
# 3. Find the match
for j in sorted_ixs:
# If ground truth box is already matched, go to next one
if gt_matches[s, j] > -1:
continue
# If we reach IoU smaller than the threshold, end the loop
iou = overlaps[i, j]
if iou < iou_thres:
break
# Do we have a match?
if not pred_class_ids[i] == gt_class_ids[j]:
continue
if iou > iou_thres:
gt_matches[s, j] = i
pred_matches[s, i] = j
break
return gt_matches, pred_matches, overlaps, indices
def compute_RT_errors(sRT_1, sRT_2, class_id, handle_visibility, synset_names):
"""
Args:
sRT_1: [4, 4]. homogeneous affine transformation
sRT_2: [4, 4]. homogeneous affine transformation
Returns:
theta: angle difference of R in degree
shift: l2 difference of T in centimeter
"""
# make sure the last row is [0, 0, 0, 1]
if sRT_1 is None or sRT_2 is None:
return -1
try:
assert np.array_equal(sRT_1[3, :], sRT_2[3, :])
assert np.array_equal(sRT_1[3, :], np.array([0, 0, 0, 1]))
except AssertionError:
print(sRT_1[3, :], sRT_2[3, :])
exit()
R1 = sRT_1[:3, :3] / np.cbrt(np.linalg.det(sRT_1[:3, :3]))
T1 = sRT_1[:3, 3]
R2 = sRT_2[:3, :3] / np.cbrt(np.linalg.det(sRT_2[:3, :3]))
T2 = sRT_2[:3, 3]
# symmetric when rotating around y-axis
if synset_names[class_id] in ['bottle', 'can', 'bowl'] or \
(synset_names[class_id] == 'mug' and handle_visibility == 0):
y = np.array([0, 1, 0])
y1 = R1 @ y
y2 = R2 @ y
cos_theta = y1.dot(y2) / (np.linalg.norm(y1) * np.linalg.norm(y2))
else:
R = R1 @ R2.transpose()
cos_theta = (np.trace(R) - 1) / 2
theta = np.arccos(np.clip(cos_theta, -1.0, 1.0)) * 180 / np.pi
shift = np.linalg.norm(T1 - T2) * 100
result = np.array([theta, shift])
return result
def compute_RT_overlaps(gt_class_ids, gt_sRT, gt_handle_visibility, pred_class_ids, pred_sRT, synset_names):
""" Finds overlaps between prediction and ground truth instances.
Returns:
overlaps:
"""
num_pred = len(pred_class_ids)
num_gt = len(gt_class_ids)
overlaps = np.zeros((num_pred, num_gt, 2))
for i in range(num_pred):
for j in range(num_gt):
overlaps[i, j, :] = compute_RT_errors(pred_sRT[i], gt_sRT[j], gt_class_ids[j],
gt_handle_visibility[j], synset_names)
return overlaps
def compute_RT_matches(overlaps, pred_class_ids, gt_class_ids, degree_thres_list, shift_thres_list):
num_degree_thres = len(degree_thres_list)
num_shift_thres = len(shift_thres_list)
num_pred = len(pred_class_ids)
num_gt = len(gt_class_ids)
pred_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_pred))
gt_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_gt))
if num_pred == 0 or num_gt == 0:
return gt_matches, pred_matches
assert num_pred == overlaps.shape[0]
assert num_gt == overlaps.shape[1]
assert overlaps.shape[2] == 2
for d, degree_thres in enumerate(degree_thres_list):
for s, shift_thres in enumerate(shift_thres_list):
for i in range(num_pred):
# Find best matching ground truth box
# 1. Sort matches by scores from low to high
sum_degree_shift = np.sum(overlaps[i, :, :], axis=-1)
sorted_ixs = np.argsort(sum_degree_shift)
# 2. Find the match
for j in sorted_ixs:
# If ground truth box is already matched, go to next one
if gt_matches[d, s, j] > -1 or pred_class_ids[i] != gt_class_ids[j]:
continue
# If we reach IoU smaller than the threshold, end the loop
if overlaps[i, j, 0] > degree_thres or overlaps[i, j, 1] > shift_thres:
continue
gt_matches[d, s, j] = i
pred_matches[d, s, i] = j
break
return gt_matches, pred_matches
def compute_ap_and_acc(pred_matches, pred_scores, gt_matches):
# sort the scores from high to low
assert pred_matches.shape[0] == pred_scores.shape[0]
score_indices = np.argsort(pred_scores)[::-1]
# pred_scores = pred_scores[score_indices]
pred_matches = pred_matches[score_indices]
precisions = np.cumsum(pred_matches > -1) / (np.arange(len(pred_matches)) + 1)
recalls = np.cumsum(pred_matches > -1).astype(np.float32) / len(gt_matches)
# Pad with start and end values to simplify the math
precisions = np.concatenate([[0], precisions, [0]])
recalls = np.concatenate([[0], recalls, [1]])
# Ensure precision values decrease but don't increase. This way, the
# precision value at each recall threshold is the maximum it can be
# for all following recall thresholds, as specified by the VOC paper.
for i in range(len(precisions) - 2, -1, -1):
precisions[i] = np.maximum(precisions[i], precisions[i + 1])
# compute mean AP over recall range
indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
ap = np.sum((recalls[indices] - recalls[indices - 1]) * precisions[indices])
# accuracy
acc = np.sum(pred_matches > -1) / len(pred_matches)
return ap, acc
def compute_mAP(pred_results, out_dir, degree_thresholds=[180], shift_thresholds=[100],
iou_3d_thresholds=[0.1], iou_pose_thres=0.1, use_matches_for_pose=False):
""" Compute mean Average Precision.
Returns:
iou_aps:
pose_aps:
iou_acc:
pose_acc:
"""
synset_names = ['BG', 'bottle', 'bowl', 'camera', 'can', 'laptop', 'mug']
num_classes = len(synset_names)
degree_thres_list = list(degree_thresholds) + [360]
num_degree_thres = len(degree_thres_list)
shift_thres_list = list(shift_thresholds) + [100]
num_shift_thres = len(shift_thres_list)
iou_thres_list = list(iou_3d_thresholds)
num_iou_thres = len(iou_thres_list)
if use_matches_for_pose:
assert iou_pose_thres in iou_thres_list
# pre-allocate more than enough memory
#iou
iou_aps = np.zeros((num_classes + 1, num_iou_thres))
iou_acc = np.zeros((num_classes + 1, num_iou_thres))
iou_pred_matches_all = [np.zeros((num_iou_thres, 30000)) for _ in range(num_classes)]
iou_pred_scores_all = [np.zeros((num_iou_thres, 30000)) for _ in range(num_classes)]
iou_gt_matches_all = [np.zeros((num_iou_thres, 30000)) for _ in range(num_classes)]
iou_pred_count = [0 for _ in range(num_classes)]
iou_gt_count = [0 for _ in range(num_classes)]
# pose
pose_aps = np.zeros((num_classes + 1, num_degree_thres, num_shift_thres))
pose_acc = np.zeros((num_classes + 1, num_degree_thres, num_shift_thres))
pose_pred_matches_all = [np.zeros((num_degree_thres, num_shift_thres, 30000)) for _ in range(num_classes)]
pose_pred_scores_all = [np.zeros((num_degree_thres, num_shift_thres, 30000)) for _ in range(num_classes)]
pose_gt_matches_all = [np.zeros((num_degree_thres, num_shift_thres, 30000)) for _ in range(num_classes)]
pose_pred_count = [0 for _ in range(num_classes)]
pose_gt_count = [0 for _ in range(num_classes)]
# loop over results to gather pred matches and gt matches for iou and pose metrics
progress = 0
for progress, result in enumerate(tqdm(pred_results)):
gt_class_ids = result['gt_class_ids'].astype(np.int32)
gt_sRT = np.array(result['gt_RTs'])
gt_size = np.array(result['gt_scales'])
gt_handle_visibility = result['gt_handle_visibility']
pred_class_ids = result['pred_class_ids']
pred_sRT = np.array(result['pred_RTs'])
pred_size = result['pred_scales']
pred_scores = result['pred_scores']
if len(gt_class_ids) == 0 and len(pred_class_ids) == 0:
continue
for cls_id in range(1, num_classes):
# get gt and predictions in this class
# 先在类内进行匹é…,å¯èƒ½ä¼šé‡åˆ°ä¸¤ä¸ªinstance属于一个类的情况,åŽé¢å†match
cls_gt_class_ids = gt_class_ids[gt_class_ids==cls_id] if len(gt_class_ids) else np.zeros(0)
cls_gt_sRT = gt_sRT[gt_class_ids==cls_id] if len(gt_class_ids) else np.zeros((0, 4, 4))
cls_gt_size = gt_size[gt_class_ids==cls_id] if len(gt_class_ids) else np.zeros((0, 3))
# éžæ¯å类默认 handle_visibility=1
if synset_names[cls_id] != 'mug':
cls_gt_handle_visibility = np.ones_like(cls_gt_class_ids)
# æ¯åç±» handle_visibility和之å‰å˜çš„gt_handle_visibilityä¿æŒä¸€è‡´å°±å¥½
else:
cls_gt_handle_visibility = gt_handle_visibility[gt_class_ids==cls_id] if len(gt_class_ids) else np.ones(0)
cls_pred_class_ids = pred_class_ids[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros(0)
cls_pred_sRT = pred_sRT[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros((0, 4, 4))
cls_pred_size = pred_size[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros((0, 3))
cls_pred_scores = pred_scores[pred_class_ids==cls_id] if len(pred_class_ids) else np.zeros(0)
# calculate the overlap between each gt instance and pred instance
iou_cls_gt_match, iou_cls_pred_match, _, iou_pred_indices = \
compute_IoU_matches(cls_gt_class_ids, cls_gt_sRT, cls_gt_size, cls_gt_handle_visibility,
cls_pred_class_ids, cls_pred_sRT, cls_pred_size, cls_pred_scores,
synset_names, iou_thres_list)
if len(iou_pred_indices):
cls_pred_class_ids = cls_pred_class_ids[iou_pred_indices]
cls_pred_sRT = cls_pred_sRT[iou_pred_indices]
cls_pred_scores = cls_pred_scores[iou_pred_indices]
num_pred = iou_cls_pred_match.shape[1]
pred_start = iou_pred_count[cls_id]
pred_end = pred_start + num_pred
iou_pred_count[cls_id] = pred_end
iou_pred_matches_all[cls_id][:, pred_start:pred_end] = iou_cls_pred_match
cls_pred_scores_tile = np.tile(cls_pred_scores, (num_iou_thres, 1))
assert cls_pred_scores_tile.shape[1] == num_pred
iou_pred_scores_all[cls_id][:, pred_start:pred_end] = cls_pred_scores_tile
num_gt = iou_cls_gt_match.shape[1]
gt_start = iou_gt_count[cls_id]
gt_end = gt_start + num_gt
iou_gt_count[cls_id] = gt_end
iou_gt_matches_all[cls_id][:, gt_start:gt_end] = iou_cls_gt_match
if use_matches_for_pose:
thres_ind = list(iou_thres_list).index(iou_pose_thres)
iou_thres_pred_match = iou_cls_pred_match[thres_ind, :]
cls_pred_class_ids = cls_pred_class_ids[iou_thres_pred_match > -1] if len(iou_thres_pred_match) > 0 else np.zeros(0)
cls_pred_sRT = cls_pred_sRT[iou_thres_pred_match > -1] if len(iou_thres_pred_match) > 0 else np.zeros((0, 4, 4))
cls_pred_scores = cls_pred_scores[iou_thres_pred_match > -1] if len(iou_thres_pred_match) > 0 else np.zeros(0)
iou_thres_gt_match = iou_cls_gt_match[thres_ind, :]
cls_gt_class_ids = cls_gt_class_ids[iou_thres_gt_match > -1] if len(iou_thres_gt_match) > 0 else np.zeros(0)
cls_gt_sRT = cls_gt_sRT[iou_thres_gt_match > -1] if len(iou_thres_gt_match) > 0 else np.zeros((0, 4, 4))
cls_gt_handle_visibility = cls_gt_handle_visibility[iou_thres_gt_match > -1] if len(iou_thres_gt_match) > 0 else np.zeros(0)
RT_overlaps = compute_RT_overlaps(cls_gt_class_ids, cls_gt_sRT, cls_gt_handle_visibility,
cls_pred_class_ids, cls_pred_sRT, synset_names)
pose_cls_gt_match, pose_cls_pred_match = compute_RT_matches(RT_overlaps, cls_pred_class_ids, cls_gt_class_ids,
degree_thres_list, shift_thres_list)
num_pred = pose_cls_pred_match.shape[2]
pred_start = pose_pred_count[cls_id]
pred_end = pred_start + num_pred
pose_pred_count[cls_id] = pred_end
pose_pred_matches_all[cls_id][:, :, pred_start:pred_end] = pose_cls_pred_match
cls_pred_scores_tile = np.tile(cls_pred_scores, (num_degree_thres, num_shift_thres, 1))
assert cls_pred_scores_tile.shape[2] == num_pred
pose_pred_scores_all[cls_id][:, :, pred_start:pred_end] = cls_pred_scores_tile
num_gt = pose_cls_gt_match.shape[2]
gt_start = pose_gt_count[cls_id]
gt_end = gt_start + num_gt
pose_gt_count[cls_id] = gt_end
pose_gt_matches_all[cls_id][:, :, gt_start:gt_end] = pose_cls_gt_match
# trim zeros
for cls_id in range(num_classes):
# IoU
iou_pred_matches_all[cls_id] = iou_pred_matches_all[cls_id][:, :iou_pred_count[cls_id]]
iou_pred_scores_all[cls_id] = iou_pred_scores_all[cls_id][:, :iou_pred_count[cls_id]]
iou_gt_matches_all[cls_id] = iou_gt_matches_all[cls_id][:, :iou_gt_count[cls_id]]
# pose
pose_pred_matches_all[cls_id] = pose_pred_matches_all[cls_id][:, :, :pose_pred_count[cls_id]]
pose_pred_scores_all[cls_id] = pose_pred_scores_all[cls_id][:, :, :pose_pred_count[cls_id]]
pose_gt_matches_all[cls_id] = pose_gt_matches_all[cls_id][:, :, :pose_gt_count[cls_id]]
# compute 3D IoU mAP
for cls_id in range(1, num_classes):
for s, iou_thres in enumerate(iou_thres_list):
iou_aps[cls_id, s], iou_acc[cls_id, s] = compute_ap_and_acc(iou_pred_matches_all[cls_id][s, :],
iou_pred_scores_all[cls_id][s, :],
iou_gt_matches_all[cls_id][s, :])
iou_aps[-1, :] = np.mean(iou_aps[1:-1, :], axis=0)
iou_acc[-1, :] = np.mean(iou_acc[1:-1, :], axis=0)
# compute pose mAP
for i, degree_thres in enumerate(degree_thres_list):
for j, shift_thres in enumerate(shift_thres_list):
for cls_id in range(1, num_classes):
cls_pose_pred_matches_all = pose_pred_matches_all[cls_id][i, j, :]
cls_pose_gt_matches_all = pose_gt_matches_all[cls_id][i, j, :]
cls_pose_pred_scores_all = pose_pred_scores_all[cls_id][i, j, :]
pose_aps[cls_id, i, j], pose_acc[cls_id, i, j] = compute_ap_and_acc(cls_pose_pred_matches_all,
cls_pose_pred_scores_all,
cls_pose_gt_matches_all)
pose_aps[-1, i, j] = np.mean(pose_aps[1:-1, i, j])
pose_acc[-1, i, j] = np.mean(pose_acc[1:-1, i, j])
# save results to pkl
result_dict = {}
result_dict['iou_thres_list'] = iou_thres_list
result_dict['degree_thres_list'] = degree_thres_list
result_dict['shift_thres_list'] = shift_thres_list
result_dict['iou_aps'] = iou_aps
result_dict['pose_aps'] = pose_aps
result_dict['iou_acc'] = iou_acc
result_dict['pose_acc'] = pose_acc
pkl_path = os.path.join(out_dir, 'mAP_Acc.pkl')
with open(pkl_path, 'wb') as f:
cPickle.dump(result_dict, f)
return iou_aps, pose_aps, iou_acc, pose_acc
def plot_mAP(iou_aps, pose_aps, out_dir, iou_thres_list, degree_thres_list, shift_thres_list):
""" Draw iou 3d AP vs. iou thresholds.
"""
# mpl.style.use('ggplot')
labels = ['bottle', 'bowl', 'camera', 'can', 'laptop', 'mug', 'mean', 'nocs']
colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:pink', 'tab:olive', 'tab:purple', 'tab:red', 'tab:gray']
styles = ['-', '-', '-', '-', '-', '-', '--', ':']
fig, (ax_iou, ax_degree, ax_shift) = plt.subplots(1, 3, figsize=(8, 3.5))
# IoU subplot
ax_iou.set_title('3D IoU', fontsize=16)
ax_iou.set_ylabel('Average Precision', fontsize=14)
ax_iou.set_ylim(0, 100)
ax_iou.set_xlabel('Percent', fontsize=14)
ax_iou.set_xlim(0, 100)
ax_iou.xaxis.set_ticks([0, 25, 50, 75, 100])
ax_iou.grid()
for i in range(1, iou_aps.shape[0]):
ax_iou.plot(100*np.array(iou_thres_list), 100*iou_aps[i, :],
color=colors[i-1], linestyle=styles[i-1], label=labels[i-1])
# rotation subplot
ax_degree.set_title('Rotation', fontsize=16)
ax_degree.set_ylim(0, 100)
ax_degree.yaxis.set_ticklabels([])
ax_degree.set_xlabel('Degree', fontsize=14)
ax_degree.set_xlim(0, 60)
ax_degree.xaxis.set_ticks([0, 20, 40, 60])
ax_degree.grid()
for i in range(1, pose_aps.shape[0]):
ax_degree.plot(np.array(degree_thres_list), 100*pose_aps[i, :len(degree_thres_list), -1],
color=colors[i-1], linestyle=styles[i-1], label=labels[i-1])
# translation subplot
ax_shift.set_title('Translation', fontsize=16)
ax_shift.set_ylim(0, 100)
ax_shift.yaxis.set_ticklabels([])
ax_shift.set_xlabel('Centimeter', fontsize=14)
ax_shift.set_xlim(0, 10)
ax_shift.xaxis.set_ticks([0, 5, 10])
ax_shift.grid()
for i in range(1, pose_aps.shape[0]):
ax_shift.plot(np.array(shift_thres_list), 100*pose_aps[i, -1, :len(shift_thres_list)],
color=colors[i-1], linestyle=styles[i-1], label=labels[i-1])
ax_shift.legend(loc='lower right', fontsize='small')
plt.tight_layout()
# plt.show()
plt.savefig(os.path.join(out_dir, 'mAP.jpg'), dpi=600)
plt.close(fig)
return
def calculate_2d_projections(coordinates_3d, intrinsics):
"""
Args:
coordinates_3d: [3, N]
intrinsics: [3, 3]
Returns:
projected_coordinates: [N, 2]
"""
projected_coordinates = intrinsics @ coordinates_3d
projected_coordinates = projected_coordinates[:2, :] / projected_coordinates[2, :]
projected_coordinates = projected_coordinates.transpose()
projected_coordinates = np.array(projected_coordinates, dtype=np.int32)
return projected_coordinates
def align_rotation(sRT):
""" Align rotations for symmetric objects.
Args:
sRT: 4 x 4
"""
s = np.cbrt(np.linalg.det(sRT[:3, :3]))
R = sRT[:3, :3] / s
T = sRT[:3, 3]
theta_x = R[0, 0] + R[2, 2]
theta_y = R[0, 2] - R[2, 0]
r_norm = math.sqrt(theta_x**2 + theta_y**2)
s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm],
[0.0, 1.0, 0.0 ],
[theta_y/r_norm, 0.0, theta_x/r_norm]])
rotation = R @ s_map
aligned_sRT = np.identity(4, dtype=np.float32)
aligned_sRT[:3, :3] = s * rotation
aligned_sRT[:3, 3] = T
return aligned_sRT
def draw(img, imgpts, axes, color):
imgpts = np.int32(imgpts).reshape(-1, 2)
# draw ground layer in darker color
color_ground = (int(color[0]), int(color[1]), int(color[2]))
for i, j in zip([4, 5, 6, 7], [5, 7, 4, 6]):
img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]), color=color_ground, thickness=2, lineType=cv2.LINE_AA)
# draw pillars in blue color
color_pillar = (int(color[0]), int(color[1]), int(color[2]))
for i, j in zip(range(4), range(4, 8)):
img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]), color=color_pillar, thickness=2, lineType=cv2.LINE_AA)
# finally, draw top layer in color
for i, j in zip([0, 1, 2, 3], [1, 3, 0, 2]):
img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]), color=color, thickness=2, lineType=cv2.LINE_AA)
# # draw axes
img = cv2.line(img, tuple(axes[0]), tuple(axes[1]), (0, 0, 255), 3, lineType=cv2.LINE_AA)
img = cv2.line(img, tuple(axes[0]), tuple(axes[3]), (255, 0, 0), 3, lineType=cv2.LINE_AA)
img = cv2.line(img, tuple(axes[0]), tuple(axes[2]), (0, 255, 0), 3, lineType=cv2.LINE_AA) ## y last
return img
def draw_detections(img, out_dir, data_name, img_id, intrinsics, pred_sRT, pred_size, pred_class_ids,
gt_sRT, gt_size, gt_class_ids, nocs_sRT, nocs_size, nocs_class_ids, draw_gt=True, draw_nocs=True):
""" Visualize pose predictions.
"""
out_path = os.path.join(out_dir, '{}_{}_pred.png'.format(data_name, img_id))
# draw nocs results - BLUE color
if draw_nocs:
for i in range(nocs_sRT.shape[0]):
if nocs_class_ids[i] in [1, 2, 4]:
sRT = align_rotation(nocs_sRT[i, :, :])
else:
sRT = nocs_sRT[i, :, :]
bbox_3d = get_3d_bbox(nocs_size[i, :], 0)
transformed_bbox_3d = transform_coordinates_3d(bbox_3d, sRT)
projected_bbox = calculate_2d_projections(transformed_bbox_3d, intrinsics)
xyz_axis = 0.1 * np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]]).transpose()
transformed_axes = transform_coordinates_3d(xyz_axis, sRT)
projected_axes = calculate_2d_projections(transformed_axes, intrinsics)
img = draw(img, projected_bbox, projected_axes, (255, 0, 0))
# darw ground truth - GREEN color
if draw_gt:
for i in range(gt_sRT.shape[0]):
if gt_class_ids[i] in [1, 2, 4]:
sRT = align_rotation(gt_sRT[i, :, :])
else:
sRT = gt_sRT[i, :, :]
bbox_3d = get_3d_bbox(gt_size[i, :], 0)
transformed_bbox_3d = transform_coordinates_3d(bbox_3d, sRT)
projected_bbox = calculate_2d_projections(transformed_bbox_3d, intrinsics)
img = draw(img, projected_bbox, projected_axes, (255, 0, 0))
# darw prediction - RED color
for i in range(pred_sRT.shape[0]):
if pred_class_ids[i] in [1, 2, 4]:
sRT = align_rotation(pred_sRT[i, :, :])
else:
sRT = pred_sRT[i, :, :]
bbox_3d = get_3d_bbox(pred_size[i, :], 0)
transformed_bbox_3d = transform_coordinates_3d(bbox_3d, sRT)
projected_bbox = calculate_2d_projections(transformed_bbox_3d, intrinsics)
img = draw(img, projected_bbox, projected_axes, (255, 0, 0))
cv2.imwrite(out_path, img)
# cv2.imshow('vis', img)
# cv2.waitKey(0)