Description
I have a foreground-background semantic segmentation task, which only involves two categories, ("fg", "bg"). During training, the loss decreases normally, but during inference, the entire image is classified as background. My mask labels are 0 and 1. The VSS registration code is as follows:
import json
import os
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.file_io import PathManager
def gen_video_vspw_lists(image_root, split_txt):
with open(split_txt, 'r') as f:
lines = f.readlines()
v_list = [line[:-1] for line in lines]
ret = []
for video_name in v_list:
path_video = os.path.join(image_root, video_name)
img_files = os.listdir(os.path.join(path_video, 'origin'))
img_files.sort()
img_files = [os.path.join(path_video, 'origin', item) for item in img_files]
if os.path.exists(os.path.join(path_video, 'mask')):
mask_files = os.listdir(os.path.join(path_video, 'mask'))
mask_files.sort()
mask_files = [os.path.join(path_video, 'mask', item) for item in mask_files]
else:
mask_files = [None] * len(img_files)
ret.append({'video_id': video_name,
'file_names': img_files,
'sem_mask_names': mask_files})
assert len(ret), f"No videos found in {image_root}!"
return ret
def register_video_vspw_vss(
name, metadata, image_root, split_txt,
):
"""
Register a "standard" version of ADE20k panoptic segmentation dataset named name
.
The dictionaries in this registered dataset follow detectron2's standard format.
Hence it's called "standard".
Args:
name (str): the name that identifies a dataset,
e.g. "ade20k_panoptic_train"
metadata (dict): extra metadata associated with this dataset.
image_root (str): directory which contains all the images
split_txt (str): file containing the list of video names
"""
DatasetCatalog.register(
name,
lambda: gen_video_vspw_lists(
image_root, split_txt
),
)
MetadataCatalog.get(name).set(
image_root=image_root,
evaluator_type=None,
ignore_label=255,
**metadata,
)
def get_metadata():
categories = [{"id": 0, "name": "bg", "isthing":