Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP][Bug fix] use pad_shape instead of img_shape for bevformer and recover the origin code where the first frame pre_bev is None #225

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
use pad_shape instead of img_shape for bevformer; recover pre_bev=None
  • Loading branch information
luoqianhui committed Jan 30, 2023
commit ac80fcff6be6997bf1ab2079840816968b452377
23 changes: 2 additions & 21 deletions paddle3d/models/detection/bevformer/bevformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,6 @@ def obtain_history_bev(self, imgs_queue, img_metas_list):
img_metas = [each[i] for each in img_metas_list]
if not img_metas[0]['prev_bev_exists']:
prev_bev = None
if prev_bev is None:
prev_bev = paddle.zeros([
self.pts_bbox_head.bev_w * self.pts_bbox_head.bev_w, bs,
self.pts_bbox_head.transformer.embed_dims
],
dtype='float32')
img_feats = [each_scale[:, i] for each_scale in img_feats_list]
prev_bev = self.pts_bbox_head(
img_feats, img_metas, prev_bev, only_bev=True)
Expand Down Expand Up @@ -177,12 +171,6 @@ def forward_train(
img_metas = [each[len_queue - 1] for each in img_metas]
if not img_metas[0]['prev_bev_exists']:
prev_bev = None
if prev_bev is None:
prev_bev = paddle.zeros([
self.pts_bbox_head.bev_w * self.pts_bbox_head.bev_w, bs,
self.pts_bbox_head.transformer.embed_dims
],
dtype='float32')

img_feats = self.extract_feat(img=img, img_metas=img_metas)

Expand Down Expand Up @@ -221,13 +209,6 @@ def forward_test(self, samples, **kwargs):
img_metas[0]['can_bus'][-1] = 0
img_metas[0]['can_bus'][:3] = 0

if self.prev_frame_info['prev_bev'] is None:
self.prev_frame_info['prev_bev'] = paddle.zeros([
self.pts_bbox_head.bev_w * self.pts_bbox_head.bev_w,
img.shape[0], self.pts_bbox_head.transformer.embed_dims
],
dtype='float32')

new_prev_bev, bbox_results = self.simple_test(
img_metas, img, prev_bev=self.prev_frame_info['prev_bev'], **kwargs)
self.prev_frame_info['prev_pos'] = tmp_pos
Expand Down Expand Up @@ -318,9 +299,9 @@ def export(self, save_dir: str, **kwargs):
"lidar2img":
paddle.static.InputSpec(
shape=[-1, -1, 4, 4], dtype="float32", name='lidar2img'),
"img_shape":
"pad_shape":
paddle.static.InputSpec(
shape=[6, 3], dtype="int32", name='img_shape'),
shape=[6, 3], dtype="int32", name='pad_shape'),
}

input_spec = [image_spec, pre_bev_spec, img_metas_spec]
Expand Down
1 change: 0 additions & 1 deletion paddle3d/models/detection/bevformer/bevformer_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ def forward(self, mlvl_feats, img_metas, prev_bev=None, only_bev=False):
img_metas=img_metas,
prev_bev=prev_bev,
)
bev_embed = bev_embed.transpose([1, 0, 2])
return bev_embed
else:
outputs = self.transformer(
Expand Down
36 changes: 6 additions & 30 deletions paddle3d/models/transformers/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ def point_sampling(self, reference_points, point_cloud_range, img_metas):
reference_points_cam[..., 2:3],
paddle.ones_like(reference_points_cam[..., 2:3]) * eps)

reference_points_cam[..., 0] /= img_metas[0]['img_shape'][0][1]
reference_points_cam[..., 1] /= img_metas[0]['img_shape'][0][0]
reference_points_cam[..., 0] /= img_metas[0]['pad_shape'][0][1]
reference_points_cam[..., 1] /= img_metas[0]['pad_shape'][0][0]

bev_mask = (bev_mask & (reference_points_cam[..., 1:2] > 0.0)
& (reference_points_cam[..., 1:2] < 1.0)
Expand Down Expand Up @@ -225,7 +225,6 @@ def forward(self,
dim='2d',
bs=bev_query.shape[1],
dtype=bev_query.dtype)
# np.save("e_ref_2d.npy", ref_2d.numpy())

reference_points_cam, bev_mask = self.point_sampling(
ref_3d, self.point_cloud_range, kwargs['img_metas'])
Expand All @@ -235,44 +234,22 @@ def forward(self,
#shift_ref_2d = ref_2d
#shift_ref_2d += shift[:, None, None, :]
ref_2d += shift[:, None, None, :]
# np.save("e_shift_ref_2d.npy", ref_2d.numpy())
# np.save("e_ref_2dref_2d.npy", ref_2d.numpy())

# (num_query, bs, embed_dims) -> (bs, num_query, embed_dims)
bev_query = bev_query.transpose([1, 0, 2])
bev_pos = bev_pos.transpose([1, 0, 2])
bs, len_bev, num_bev_level, _ = ref_2d.shape
'''
if prev_bev is not None:
prev_bev = prev_bev.transpose([1, 0, 2])
prev_bev = paddle.stack(
[prev_bev, bev_query], 1).reshape([bs*2, len_bev, -1])
prev_bev = paddle.stack([prev_bev, bev_query],
1).reshape([bs * 2, len_bev, -1])
# TODO(qianhui): fix this clone bugs: paddle equal means clone but torch not
#hybird_ref_2d = paddle.stack([shift_ref_2d, ref_2d], 1).reshape(
hybird_ref_2d = paddle.stack([ref_2d, ref_2d], 1).reshape(
[bs*2, len_bev, num_bev_level, 2])
[bs * 2, len_bev, num_bev_level, 2])
else:
hybird_ref_2d = paddle.stack([ref_2d, ref_2d], 1).reshape(
[bs*2, len_bev, num_bev_level, 2])
'''
prev_bev = prev_bev.transpose([1, 0, 2])
valid_prev_bev = prev_bev.cast('bool').any().cast('int32')
prev_bev = prev_bev * valid_prev_bev + bev_query * (1 - valid_prev_bev)
prev_bev = paddle.stack([prev_bev, bev_query],
1).reshape([bs * 2, len_bev, -1])
hybird_ref_2d = paddle.stack([ref_2d, ref_2d], 1).reshape(
[bs * 2, len_bev, num_bev_level, 2])

# np.save("e_bev_query.npy", bev_query.numpy())
# np.save("e_key.npy", key.numpy())
# np.save("e_value.npy", value.numpy())
# np.save("e_bev_posbev_pos.npy", bev_pos.numpy())
# np.save("e_hybird_ref_2d.npy", hybird_ref_2d.numpy())
# np.save("e_ref_3d.npy", ref_3d.numpy())
# np.save("e_spatial_shapes.npy", spatial_shapes.numpy())
# np.save("e_reference_points_cam.npy", reference_points_cam.numpy())
# np.save("e_bev_mask.npy", bev_mask.numpy())
# np.save("e_prev_bev.npy", prev_bev.numpy())
[bs * 2, len_bev, num_bev_level, 2])
for lid, layer in enumerate(self.layers):
output = layer(
bev_query,
Expand All @@ -290,7 +267,6 @@ def forward(self,
bev_mask=bev_mask,
prev_bev=prev_bev,
**kwargs)
# np.save("e_output_{}.npy".format(lid), output.numpy())

bev_query = output
if self.return_intermediate:
Expand Down
39 changes: 1 addition & 38 deletions paddle3d/models/transformers/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,17 @@ def get_bev_features(self,
[each['can_bus'][0] for each in kwargs['img_metas']])
delta_y = paddle.concat(
[each['can_bus'][1] for each in kwargs['img_metas']])
#np.save('delta_x.npy', delta_x.numpy())
#np.save('delta_y.npy', delta_y.numpy())
ego_angle = paddle.concat(
[each['can_bus'][-2] / pi_tensor for each in kwargs['img_metas']])
#np.save('ego_angle.npy', ego_angle)
grid_length_y = grid_length[0]
grid_length_x = grid_length[1]
translation_length = paddle.sqrt(delta_x**2 + delta_y**2)
#np.save('translation_length.npy', translation_length.numpy())
translation_angle = paddle.atan2(delta_y, delta_x) / pi_tensor
# translation_angle = paddle.angle(delta_y, delta_x) / pi_tensor
#np.save('translation_angle.npy', translation_angle.numpy())
bev_angle = ego_angle - translation_angle
shift_y = translation_length * \
paddle.cos(bev_angle * pi_tensor) / grid_length_y / bev_h
shift_x = translation_length * \
paddle.sin(bev_angle * pi_tensor) / grid_length_x / bev_w
#np.save('shift_x.npy', shift_x.numpy())
#np.save('shift_y.npy', shift_y.numpy())

shift_y = shift_y * self.use_shift
shift_x = shift_x * self.use_shift
Expand All @@ -193,39 +185,19 @@ def get_bev_features(self,
shift_y]).transpose([1, 0]) # xy, bs -> bs, xy

shift = shift.cast(bev_queries.dtype)
'''
if prev_bev is not None:
if prev_bev.shape[1] == bev_h * bev_w:
prev_bev = prev_bev.transpose([1, 0, 2])
if self.rotate_prev_bev:
for i in range(bs):
rotation_angle = kwargs['img_metas'][i]['can_bus'][-1]
tmp_prev_bev = prev_bev[:, i].reshape([
bev_h, bev_w, -1]).transpose([2, 0, 1])
tmp_prev_bev = rotate(tmp_prev_bev, rotation_angle,
center=self.rotate_center)
# #np.save('tmp_prev_bev.npy', tmp_prev_bev.numpy())
tmp_prev_bev = tmp_prev_bev.transpose([1, 2, 0]).reshape([
bev_h * bev_w, 1, -1])
prev_bev[:, i] = tmp_prev_bev[:, 0]
'''
if prev_bev is not None:
if self.rotate_prev_bev:
for i in range(bs):
valid_prev_bev = prev_bev[:, i].cast('bool').any().cast(
'int32')
rotation_angle = kwargs['img_metas'][i]['can_bus'][-1]
tmp_prev_bev = prev_bev[:, i].reshape(
[bev_h, bev_w, -1]).transpose([2, 0, 1])
tmp_prev_bev = rotate(
tmp_prev_bev, rotation_angle, center=self.rotate_center)
# #np.save('tmp_prev_bev.npy', tmp_prev_bev.numpy())
tmp_prev_bev = tmp_prev_bev.transpose([1, 2, 0]).reshape(
[bev_h * bev_w, 1, -1])
prev_bev[:,
i] = tmp_prev_bev[:,
0] * valid_prev_bev + prev_bev[:, i] * (
1 - valid_prev_bev)
prev_bev[:, i] = tmp_prev_bev[:, 0]

# add can bus signals
# can_bus = paddle.to_tensor(
Expand Down Expand Up @@ -258,13 +230,6 @@ def get_bev_features(self,
feat_flatten = feat_flatten.transpose(
[0, 2, 1, 3]) # (num_cam, H*W, bs, embed_dims)

# #np.save('bev_queries.npy', bev_queries.numpy())
# #np.save('feat_flatten.npy', feat_flatten.numpy())
# #np.save('bev_pos.npy', bev_pos.numpy())
# #np.save('prev_bev.npy', prev_bev.numpy())
# #np.save('spatial_shapes.npy', spatial_shapes.numpy())
# #np.save('level_start_index.npy', level_start_index.numpy())
# #np.save('shift.npy', shift.numpy())
bev_embed = self.encoder(
bev_queries,
feat_flatten,
Expand All @@ -277,8 +242,6 @@ def get_bev_features(self,
prev_bev=prev_bev,
shift=shift,
**kwargs)
# #np.save('bev_embed.npy', bev_embed.numpy())
# exit()
return bev_embed

def forward(self,
Expand Down