# demo file. # author: ynie # date: July, 2020 from net_utils.utils import load_device, load_model from net_utils.utils import CheckpointIO from configs.config_utils import mount_external_config from time import time import trimesh import numpy as np from utils import pc_util from models.iscnet.dataloader import collate_fn import torch from net_utils.ap_helper import parse_predictions from net_utils.libs import flip_axis_to_depth, extract_pc_in_box3d, flip_axis_to_camera from net_utils.box_util import get_3d_box from torch import optim from models.loss import chamfer_func import os import vtk from vtk.util.numpy_support import vtk_to_numpy, numpy_to_vtk from utils.scannet.visualization.vis_for_demo import Vis_base def load_demo_data(cfg, device): point_cloud = trimesh.load(cfg.config['demo_path']).vertices use_color = cfg.config['data']['use_color_detection'] or cfg.config['data']['use_color_completion'] MEAN_COLOR_RGB = np.array([121.87661, 109.73591, 95.61673]) use_height = not cfg.config['data']['no_height'] num_points = cfg.config['data']['num_point'] if not use_color: point_cloud = point_cloud[:, 0:3] # do not use color for now else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0 if use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)], 1) point_cloud, choices = pc_util.random_sampling(point_cloud, num_points, return_choices=True) data = collate_fn([{'point_clouds': point_cloud.astype(np.float32)}]) for key in data: if key not in ['object_voxels', 'shapenet_catids', 'shapenet_ids']: data[key] = data[key].to(device) return data def get_proposal_id(cfg, end_points, data, mode='random', batch_sample_ids=None, DUMP_CONF_THRESH=-1.): ''' Get the proposal ids for completion training for the limited GPU RAM. :param end_points: estimated data from votenet. :param data: data source which contains gt contents. :return: ''' batch_size = 1 device = end_points['center'].device NUM_PROPOSALS = end_points['center'].size(1) proposal_id_list = [] if mode == 'objectness' or batch_sample_ids is not None: objectness_probs = torch.softmax(end_points['objectness_scores'], dim=2)[..., 1] for batch_id in range(batch_size): proposal_to_gt_box_w_cls = torch.arange(0, NUM_PROPOSALS).unsqueeze(-1).to(device).long() sample_ids = (objectness_probs[batch_id] > DUMP_CONF_THRESH).cpu().numpy()*batch_sample_ids[batch_id] sample_ids = sample_ids.astype(np.bool) proposal_to_gt_box_w_cls = proposal_to_gt_box_w_cls[sample_ids].long() proposal_id_list.append(proposal_to_gt_box_w_cls.unsqueeze(0)) return torch.cat(proposal_id_list, dim=0) def chamfer_dist(obj_points, obj_points_masks, pc_in_box, pc_in_box_masks, centroid_params, orientation_params): b_s = obj_points.size(0) axis_rectified = torch.zeros(size=(b_s, 3, 3)).to(obj_points.device) axis_rectified[:, 2, 2] = 1 axis_rectified[:, 0, 0] = torch.cos(orientation_params) axis_rectified[:, 0, 1] = torch.sin(orientation_params) axis_rectified[:, 1, 0] = -torch.sin(orientation_params) axis_rectified[:, 1, 1] = torch.cos(orientation_params) obj_points_after = torch.bmm(obj_points, axis_rectified) + centroid_params.unsqueeze(-2) dist1, dist2 = chamfer_func(obj_points_after, pc_in_box) return torch.mean(dist2 * pc_in_box_masks)*1e3 def fit_mesh_to_scan(cfg, pred_mesh_dict, parsed_predictions, eval_dict, input_scan, dump_threshold): '''fit meshes to input scan''' pred_corners_3d_upright_camera = parsed_predictions['pred_corners_3d_upright_camera'] pred_sem_cls = parsed_predictions['pred_sem_cls'] bsize, N_proposals = pred_sem_cls.shape pred_mask = eval_dict['pred_mask'] obj_prob = parsed_predictions['obj_prob'] device = input_scan.device input_scan = input_scan.cpu().numpy() transform_shapenet = np.array([[0, 0, -1], [-1, 0, 0], [0, 1, 0]]) index_list = [] box_params_list = [] max_obj_points = 10000 max_pc_in_box = 50000 obj_points_list = [] obj_points_mask_list = [] pc_in_box_list = [] pc_in_box_mask_list = [] for i in range(bsize): for j in range(N_proposals): if not (pred_mask[i, j] == 1 and obj_prob[i, j] > dump_threshold): continue # get mesh points mesh_data = pred_mesh_dict['meshes'][list(pred_mesh_dict['proposal_ids'][i,:,0]).index(j)] obj_points = mesh_data.vertices obj_points = obj_points - (obj_points.max(0) + obj_points.min(0)) / 2. obj_points = obj_points.dot(transform_shapenet.T) obj_points = obj_points / (obj_points.max(0) - obj_points.min(0)) obj_points_matrix = np.zeros((max_obj_points, 3)) obj_points_mask = np.zeros((max_obj_points,), dtype=np.uint8) obj_points_matrix[:obj_points.shape[0], :] = obj_points obj_points_mask[:obj_points.shape[0]] = 1 # box corners box_corners_cam = pred_corners_3d_upright_camera[i, j] box_corners_depth = flip_axis_to_depth(box_corners_cam) # box vector form centroid = (np.max(box_corners_depth, axis=0) + np.min(box_corners_depth, axis=0)) / 2. forward_vector = box_corners_depth[1] - box_corners_depth[2] left_vector = box_corners_depth[0] - box_corners_depth[1] up_vector = box_corners_depth[6] - box_corners_depth[2] orientation = np.arctan2(forward_vector[1], forward_vector[0]) sizes = np.linalg.norm([forward_vector, left_vector, up_vector], axis=1) box_params = np.array([*centroid, *sizes, orientation]) # points in larger boxes (remove grounds) larger_box = flip_axis_to_depth(get_3d_box(1.2*sizes, -orientation, flip_axis_to_camera(centroid))) height = np.percentile(input_scan[i, :, 2], 5) scene_scan = input_scan[i, input_scan[i, :, 2] >= height, :3] pc_in_box, inds = extract_pc_in_box3d(scene_scan, larger_box) if len(pc_in_box) < 5: continue pc_in_box_matrix = np.zeros((max_pc_in_box, 3)) pc_in_box_mask = np.zeros((max_pc_in_box,), dtype=np.uint8) pc_in_box_matrix[:pc_in_box.shape[0], :] = pc_in_box pc_in_box_mask[:pc_in_box.shape[0]] = 1 index_list.append((i, j)) obj_points_list.append(obj_points_matrix) obj_points_mask_list.append(obj_points_mask) box_params_list.append(box_params) pc_in_box_list.append(pc_in_box_matrix) pc_in_box_mask_list.append(pc_in_box_mask) obj_points_list = np.array(obj_points_list) pc_in_box_list = np.array(pc_in_box_list) obj_points_mask_list = np.array(obj_points_mask_list) pc_in_box_mask_list = np.array(pc_in_box_mask_list) box_params_list = np.array(box_params_list) # scale to predicted sizes obj_points_list = obj_points_list * box_params_list[:, np.newaxis, 3:6] obj_points_list = torch.from_numpy(obj_points_list).to(device).float() pc_in_box_list = torch.from_numpy(pc_in_box_list).to(device).float() pc_in_box_mask_list = torch.from_numpy(pc_in_box_mask_list).to(device).float() '''optimize box center and orientation''' centroid_params = box_params_list[:, :3] orientation_params = box_params_list[:, 6] centroid_params = torch.from_numpy(centroid_params).to(device).float() orientation_params = torch.from_numpy(orientation_params).to(device).float() centroid_params.requires_grad = True orientation_params.requires_grad = True lr = 0.01 iterations = 100 optimizer = optim.Adam([centroid_params, orientation_params], lr=lr) centroid_params_cpu, orientation_params_cpu, best_loss = None, None, 1e6 for iter in range(iterations): optimizer.zero_grad() loss = chamfer_dist(obj_points_list, obj_points_mask_list, pc_in_box_list, pc_in_box_mask_list, centroid_params, orientation_params) if loss < best_loss: centroid_params_cpu = centroid_params.data.cpu().numpy() orientation_params_cpu = orientation_params.data.cpu().numpy() best_loss = loss loss.backward() optimizer.step() for idx in range(box_params_list.shape[0]): i, j = index_list[idx] best_box_corners_cam = get_3d_box(box_params_list[idx, 3:6], -orientation_params_cpu[idx], flip_axis_to_camera(centroid_params_cpu[idx])) pred_corners_3d_upright_camera[i, j] = best_box_corners_cam parsed_predictions['pred_corners_3d_upright_camera'] = pred_corners_3d_upright_camera return parsed_predictions def generate(cfg, net, data, post_processing): with torch.no_grad(): '''For Detection''' mode = cfg.config['mode'] inputs = {'point_clouds': data['point_clouds']} end_points = {} end_points = net.backbone(inputs['point_clouds'], end_points) # --------- HOUGH VOTING --------- xyz = end_points['fp2_xyz'] features = end_points['fp2_features'] end_points['seed_inds'] = end_points['fp2_inds'] end_points['seed_xyz'] = xyz end_points['seed_features'] = features xyz, features = net.voting(xyz, features) features_norm = torch.norm(features, p=2, dim=1) features = features.div(features_norm.unsqueeze(1)) end_points['vote_xyz'] = xyz end_points['vote_features'] = features # --------- DETECTION --------- if_proposal_feature = cfg.config[mode]['phase'] == 'completion' end_points, proposal_features = net.detection(xyz, features, end_points, if_proposal_feature) eval_dict, parsed_predictions = parse_predictions(end_points, data, cfg.eval_config) '''For Completion''' # use 3D NMS to generate sample ids. batch_sample_ids = eval_dict['pred_mask'] dump_threshold = cfg.config['generation']['dump_threshold'] BATCH_PROPOSAL_IDs = get_proposal_id(cfg, end_points, data, mode='random', batch_sample_ids=batch_sample_ids, DUMP_CONF_THRESH=dump_threshold) # Skip propagate point clouds to box centers. device = end_points['center'].device if not cfg.config['data']['skip_propagate']: gather_ids = BATCH_PROPOSAL_IDs[..., 0].unsqueeze(1).repeat(1, 128, 1).long().to(device) object_input_features = torch.gather(proposal_features, 2, gather_ids) else: # gather proposal features gather_ids = BATCH_PROPOSAL_IDs[..., 0].unsqueeze(1).repeat(1, 128, 1).long().to(device) proposal_features = torch.gather(proposal_features, 2, gather_ids) # gather proposal centers gather_ids = BATCH_PROPOSAL_IDs[..., 0].unsqueeze(-1).repeat(1, 1, 3).long().to(device) pred_centers = torch.gather(end_points['center'], 1, gather_ids) # gather proposal orientations pred_heading_class = torch.argmax(end_points['heading_scores'], -1) # B,num_proposal heading_residuals = end_points['heading_residuals_normalized'] * (np.pi / cfg.eval_config[ 'dataset_config'].num_heading_bin) # Bxnum_proposalxnum_heading_bin pred_heading_residual = torch.gather(heading_residuals, 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 pred_heading_residual.squeeze_(2) heading_angles = cfg.eval_config['dataset_config'].class2angle_cuda(pred_heading_class, pred_heading_residual) heading_angles = torch.gather(heading_angles, 1, BATCH_PROPOSAL_IDs[..., 0]) object_input_features = net.skip_propagation.generate(pred_centers, heading_angles, proposal_features, inputs['point_clouds']) batch_size, feat_dim, N_proposals = object_input_features.size() object_input_features = object_input_features.transpose(1, 2).contiguous().view(batch_size * N_proposals, feat_dim) gather_ids = BATCH_PROPOSAL_IDs[..., 0].unsqueeze(-1).repeat(1, 1, end_points['sem_cls_scores'].size(2)) cls_codes_for_completion = torch.gather(end_points['sem_cls_scores'], 1, gather_ids) cls_codes_for_completion = ( cls_codes_for_completion >= torch.max(cls_codes_for_completion, dim=2, keepdim=True)[0]).float() cls_codes_for_completion = cls_codes_for_completion.view(batch_size * N_proposals, -1) meshes = net.completion.generator.generate_mesh(object_input_features, cls_codes_for_completion) if post_processing: pred_mesh_dict = {'meshes': meshes, 'proposal_ids': BATCH_PROPOSAL_IDs} parsed_predictions = fit_mesh_to_scan(cfg, pred_mesh_dict, parsed_predictions, eval_dict, inputs['point_clouds'], dump_threshold) return end_points, BATCH_PROPOSAL_IDs, eval_dict, meshes, parsed_predictions def save_visualization(cfg, input_data, our_data, output_dir): DUMP_CONF_THRESH = cfg.config['generation']['dump_threshold'] # Dump boxes with obj prob larger than that. '''Dump meshes''' meshes = our_data[3] BATCH_PROPOSAL_IDs = our_data[1][0].cpu().numpy() for mesh_data, map_data in zip(meshes, BATCH_PROPOSAL_IDs): object_mesh = os.path.join(output_dir, 'proposal_%d_mesh.ply' % tuple(map_data)) mesh_data.export(object_mesh) '''Dump boxes''' batch_id = 0 pred_corners_3d_upright_camera = our_data[4]['pred_corners_3d_upright_camera'] objectness_prob = our_data[4]['obj_prob'][batch_id] # INPUT point_clouds = input_data['point_clouds'].cpu().numpy() # Box params box_corners_cam = pred_corners_3d_upright_camera[batch_id] box_corners_depth = flip_axis_to_depth(box_corners_cam) centroid = (np.max(box_corners_depth, axis=1) + np.min(box_corners_depth, axis=1)) / 2. forward_vector = box_corners_depth[:, 1] - box_corners_depth[:, 2] left_vector = box_corners_depth[:, 0] - box_corners_depth[:, 1] up_vector = box_corners_depth[:, 6] - box_corners_depth[:, 2] orientation = np.arctan2(forward_vector[:, 1], forward_vector[:, 0]) forward_size = np.linalg.norm(forward_vector, axis=1) left_size = np.linalg.norm(left_vector, axis=1) up_size = np.linalg.norm(up_vector, axis=1) sizes = np.vstack([forward_size, left_size, up_size]).T box_params = np.hstack([centroid, sizes, orientation[:, np.newaxis]]) # OTHERS eval_dict = our_data[2] pred_mask = eval_dict['pred_mask'] # B,num_proposal pc = point_clouds[batch_id, :, :] '''Dump point cloud''' pc_util.write_ply(pc, os.path.join(output_dir, '%06d_pc.ply' % (batch_id))) '''Dump boxes''' if np.sum(objectness_prob > DUMP_CONF_THRESH) > 0: if len(box_params) > 0: save_path = os.path.join(output_dir, '%06d_pred_confident_nms_bbox.npz' % (batch_id)) np.savez(save_path, obbs=box_params[np.logical_and(objectness_prob > DUMP_CONF_THRESH, pred_mask[batch_id, :] == 1), :], proposal_map=BATCH_PROPOSAL_IDs) def visualize(output_dir, offline): predicted_boxes = np.load(os.path.join(output_dir, '000000_pred_confident_nms_bbox.npz')) input_point_cloud = pc_util.read_ply(os.path.join(output_dir, '000000_pc.ply')) bbox_params = predicted_boxes['obbs'] proposal_map = predicted_boxes['proposal_map'] transform_m = np.array([[0, 0, -1], [-1, 0, 0], [0, 1, 0]]) instance_models = [] center_list = [] vector_list = [] for map_data, bbox_param in zip(proposal_map, bbox_params): mesh_file = os.path.join(output_dir, 'proposal_%d_mesh.ply' % tuple(map_data)) ply_reader = vtk.vtkPLYReader() ply_reader.SetFileName(mesh_file) ply_reader.Update() # get points from object polydata = ply_reader.GetOutput() # read points using vtk_to_numpy obj_points = vtk_to_numpy(polydata.GetPoints().GetData()).astype(np.float) '''Fit obj points to bbox''' center = bbox_param[:3] orientation = bbox_param[6] sizes = bbox_param[3:6] obj_points = obj_points - (obj_points.max(0) + obj_points.min(0))/2. obj_points = obj_points.dot(transform_m.T) obj_points = obj_points.dot(np.diag(1/(obj_points.max(0) - obj_points.min(0)))).dot(np.diag(sizes)) axis_rectified = np.array([[np.cos(orientation), np.sin(orientation), 0], [-np.sin(orientation), np.cos(orientation), 0], [0, 0, 1]]) obj_points = obj_points.dot(axis_rectified) + center points_array = numpy_to_vtk(obj_points[..., :3], deep=True) polydata.GetPoints().SetData(points_array) ply_reader.Update() '''draw bboxes''' vectors = np.diag(sizes/2.).dot(axis_rectified) instance_models.append(ply_reader) center_list.append(center) vector_list.append(vectors) scene = Vis_base(scene_points=input_point_cloud, instance_models=instance_models, center_list=center_list, vector_list=vector_list) camera_center = np.array([0, -3, 3]) scene.visualize(centroid=camera_center, offline=offline, save_path=os.path.join(output_dir, 'pred.png')) def run(cfg): '''Begin to run network.''' checkpoint = CheckpointIO(cfg) '''Mount external config data''' cfg = mount_external_config(cfg) '''Load save path''' cfg.log_string('Data save path: %s' % (cfg.save_path)) '''Load device''' cfg.log_string('Loading device settings.') device = load_device(cfg) '''Load net''' cfg.log_string('Loading model.') net = load_model(cfg, device=device) checkpoint.register_modules(net=net) cfg.log_string(net) '''Load existing checkpoint''' checkpoint.parse_checkpoint() '''Load data''' cfg.log_string('Loading data.') input_data = load_demo_data(cfg, device) '''Run demo''' net.train(cfg.config['mode'] == 'train') start = time() our_data = generate(cfg, net.module, input_data, post_processing=False) end = time() print('Time elapsed: %s.' % (end - start)) '''Save visualization''' scene_name = os.path.splitext(os.path.basename(cfg.config['demo_path']))[0] output_dir = os.path.join('demo/outputs', scene_name) if not os.path.isdir(output_dir): os.makedirs(output_dir) save_visualization(cfg, input_data, our_data, output_dir) visualize(output_dir, offline=False)