Skip to content

Commit 49c9163

Browse files
committed
update: datapreprocessor convert bbox format to xyxy
1 parent 5ba255a commit 49c9163

File tree

11 files changed

+466
-114
lines changed

11 files changed

+466
-114
lines changed

MISC/folder2video.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ def folder2video(image_folder, output_video_path, fps=30):
2929

3030

3131

32-
def run():
33-
input = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference'
34-
output = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference.mp4'
35-
folder2video(input, output, fps=30)
32+
# def run():
33+
# input = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference'
34+
# output = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference.mp4'
35+
# folder2video(input, output, fps=30)
3636

37-
run()
37+
# run()

custom_configs/DELIVER/deliver_cmnext_rcnn2.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@
2828
in_channels=256,
2929
feat_channels=256,
3030
anchor_generator=dict(
31-
type='AnchorGenerator',
32-
scales=[8],
31+
type='AnchorGenerator', # 32 16 8 4
32+
# scales=[8],
33+
scales=[2, 4, 8, 16],
3334
ratios=[0.5, 1.0, 2.0],
3435
strides=[4, 8, 16, 32, 64]
3536
),
@@ -74,9 +75,9 @@
7475
rpn=dict(
7576
assigner=dict(
7677
type='MaxIoUAssigner',
77-
pos_iou_thr=0.7,
78-
neg_iou_thr=0.3,
79-
min_pos_iou=0.3,
78+
pos_iou_thr=0.4,
79+
neg_iou_thr=0.1,
80+
min_pos_iou=0.1,
8081
match_low_quality=True,
8182
ignore_iof_thr=-1
8283
),
@@ -100,10 +101,10 @@
100101
rcnn=dict(
101102
assigner=dict(
102103
type='MaxIoUAssigner',
103-
pos_iou_thr=0.5,
104-
neg_iou_thr=0.5,
105-
min_pos_iou=0.5,
106-
match_low_quality=False,
104+
pos_iou_thr=0.3, # 🔥 0.5 → 0.3
105+
neg_iou_thr=0.1, # 🔥 0.5 → 0.1
106+
min_pos_iou=0.1, # 🔥 0.5 → 0.1
107+
match_low_quality=True, # 🔥 False → True
107108
ignore_iof_thr=-1
108109
),
109110
sampler=dict(
@@ -133,6 +134,19 @@
133134
)
134135
)
135136

137+
138+
optim_wrapper = dict(
139+
type='OptimWrapper',
140+
optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001),
141+
clip_grad=dict(max_norm=5, norm_type=2),
142+
accumulative_counts=4
143+
)
144+
145+
146+
# custom_hooks = [
147+
# dict(type='BboxLossDebugHook', log_interval=50)
148+
# ]
149+
136150
# Experiment name for logging
137151
experiment_name = 'deliver_cmnext_b2_faster_rcnn_2x'
138152

custom_configs/DELIVER/deliver_dataset.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,6 @@
3030
pad_size_divisor=32
3131
)
3232

33-
# data_preprocessor = dict(
34-
# type='ImgDataPreprocessor',
35-
# mean=[0.485, 0.456, 0.406], # RGB (ImageNet - same as DELIVER)
36-
37-
38-
# std=[0.229, 0.224, 0.225] , # RGB (ImageNet - same as DELIVER)
39-
40-
# pad_size_divisor=32
41-
# )
42-
43-
# Pipeline settings
4433
train_pipeline = [
4534
dict(type='LoadDELIVERImages'),
4635
dict(type='LoadAnnotations', with_bbox=True),
@@ -71,7 +60,7 @@
7160

7261
# Dataset configs
7362
train_dataloader = dict(
74-
batch_size=3,
63+
batch_size=8,
7564
num_workers=2,
7665
persistent_workers=True,
7766
sampler=dict(type='DefaultSampler', shuffle=True),
@@ -125,7 +114,7 @@
125114
optim_wrapper = dict(
126115
type='OptimWrapper',
127116
optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001),
128-
clip_grad=dict(max_norm=35, norm_type=2)
117+
clip_grad=dict(max_norm=5, norm_type=2)
129118
)
130119

131120
# Learning rate schedule

mcdet/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .models import *
2424
from .datasets import *
2525
from .evaluation import *
26+
from .hooks import *
2627

2728

2829

mcdet/datasets/custom_deliver_detection_dataset.py

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
from mmdet.datasets.api_wrappers import COCO
2020
from typing import List, Union, Any
21+
import torch
22+
2123
METAINFO = {
2224
'classes': ('Vehicle', 'Human'), # DELIVER 클래스들
2325
'palette': [(220, 20, 60), (119, 11, 32)] # 클래스별 색상
@@ -76,15 +78,8 @@ def full_init(self) -> None:
7678

7779

7880
def parse_data_info(self, raw_data_info: dict):
79-
"""Parse raw annotation to target format.
80-
81-
Args:
82-
raw_data_info (dict): Raw data information load from ``ann_file``
83-
84-
Returns:
85-
Union[dict, List[dict]]: Parsed annotation.
86-
"""
87-
81+
"""Parse raw annotation to target format."""
82+
8883
img_info = raw_data_info['raw_img_info']
8984
ann_info = raw_data_info['raw_ann_info']
9085

@@ -102,17 +97,17 @@ def parse_data_info(self, raw_data_info: dict):
10297
data_info['event_img_path'] = osp.join(self.data_prefix['img'], img_info['event_path'])
10398
data_info['lidar_img_path'] = osp.join(self.data_prefix['img'], img_info['lidar_path'])
10499

105-
106100
modality_paths = {
107-
'rgb': img_path,
108-
'depth': data_info['depth_img_path'],
109-
'event': data_info['event_img_path'],
110-
'lidar': data_info['lidar_img_path']
101+
'rgb': img_path,
102+
'depth': data_info['depth_img_path'],
103+
'event': data_info['event_img_path'],
104+
'lidar': data_info['lidar_img_path']
111105
}
112106

113107
for modality, path in modality_paths.items():
114108
if not osp.exists(path):
115109
print(f"Warning: {modality} image not found: {path}")
110+
116111
data_info['modality_paths'] = modality_paths
117112
data_info['img_id'] = img_info['img_id']
118113
data_info['seg_map_path'] = seg_map_path
@@ -125,36 +120,52 @@ def parse_data_info(self, raw_data_info: dict):
125120
data_info['custom_entities'] = True
126121

127122
instances = []
123+
valid_instances = 0
124+
128125
for i, ann in enumerate(ann_info):
129126
instance = {}
130127
if ann.get('ignore', False):
131128
continue
132-
# x1, y1, x2, y2 = ann['bbox']
133-
# w = x2 - x1
134-
# h = y2 - y1
129+
135130
x1, y1, w, h = ann['bbox']
136-
inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
137-
inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
138-
if inter_w * inter_h == 0:
131+
132+
# 경계 검사
133+
x1 = max(0, x1)
134+
y1 = max(0, y1)
135+
w = min(w, img_info['width'] - x1)
136+
h = min(h, img_info['height'] - y1)
137+
138+
# 기본적인 유효성 검사만 수행 (너무 엄격하지 않게)
139+
if w <= 1 or h <= 1:
139140
continue
140-
if ann['area'] <= 0 or w < 1 or h < 1:
141+
142+
if ann['area'] <= 0:
141143
continue
144+
142145
if ann['category_id'] not in self.cat_ids:
143146
continue
144-
# bbox = [x1, y1, x1 + w, y1 + h]
147+
145148
bbox = [x1, y1, w, h]
146149

147150
if ann.get('iscrowd', False):
148151
instance['ignore_flag'] = 1
149152
else:
150153
instance['ignore_flag'] = 0
154+
151155
instance['bbox'] = bbox
152156
instance['bbox_label'] = self.cat2label[ann['category_id']]
153157

154158
if ann.get('segmentation', None):
155159
instance['mask'] = ann['segmentation']
156160

157161
instances.append(instance)
162+
valid_instances += 1
163+
164+
# 빈 샘플 처리
165+
if valid_instances == 0:
166+
print(f"Warning: No valid instances for {img_info['file_name']}, creating dummy instance")
167+
168+
158169
data_info['instances'] = instances
159170
return data_info
160171

@@ -189,4 +200,5 @@ def __getitem__(self, idx: int) -> dict:
189200
if data is None:
190201
idx = self._rand_another()
191202
continue
192-
return data
203+
return data
204+

mcdet/hooks/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .debug_hook import BboxLossDebugHook
2+
3+
__all__ = [
4+
'BboxLossDebugHook'
5+
]

mcdet/hooks/debug_hook.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# 디버깅을 위한 커스텀 훅 추가
2+
import torch
3+
from mmengine.hooks import Hook
4+
from mmdet.registry import HOOKS
5+
6+
@HOOKS.register_module()
7+
class BboxLossDebugHook(Hook):
8+
"""Debug hook for bbox loss analysis"""
9+
10+
def __init__(self, log_interval=50):
11+
self.log_interval = log_interval
12+
13+
def after_train_iter(self, runner, batch_idx, data_batch=None, outputs=None):
14+
if (batch_idx + 1) % self.log_interval == 0:
15+
# 현재 losses 분석
16+
losses = outputs.get('loss', {}) if outputs else {}
17+
18+
# GT 정보 분석
19+
if data_batch and 'data_samples' in data_batch:
20+
data_samples = data_batch['data_samples']
21+
22+
total_gt_boxes = 0
23+
bbox_sizes = []
24+
25+
for sample in data_samples:
26+
if hasattr(sample, 'gt_instances') and hasattr(sample.gt_instances, 'bboxes'):
27+
bboxes = sample.gt_instances.bboxes
28+
total_gt_boxes += len(bboxes)
29+
30+
for bbox in bboxes:
31+
w, h = bbox[2].item(), bbox[3].item()
32+
bbox_sizes.append((w, h))
33+
34+
if bbox_sizes:
35+
widths = [w for w, h in bbox_sizes]
36+
heights = [h for w, h in bbox_sizes]
37+
areas = [w * h for w, h in bbox_sizes]
38+
39+
runner.logger.info(
40+
f"GT Analysis - Total boxes: {total_gt_boxes}, "
41+
f"Avg width: {sum(widths)/len(widths):.2f}, "
42+
f"Avg height: {sum(heights)/len(heights):.2f}, "
43+
f"Avg area: {sum(areas)/len(areas):.2f}, "
44+
f"Min area: {min(areas):.2f}, Max area: {max(areas):.2f}"
45+
)
46+
47+
# Loss 분석
48+
rpn_cls_loss = outputs.get('loss_rpn_cls', 0)
49+
rpn_bbox_loss = outputs.get('rpn_loss_bbox', 0)
50+
roi_cls_loss = outputs.get('loss_cls', 0)
51+
roi_bbox_loss = outputs.get('loss_bbox', 0)
52+
53+
runner.logger.info(
54+
f"Loss Analysis - RPN cls: {rpn_cls_loss:.6f}, "
55+
f"RPN bbox: {rpn_bbox_loss:.6f}, "
56+
f"ROI cls: {roi_cls_loss:.6f}, "
57+
f"ROI bbox: {roi_bbox_loss:.6f}"
58+
)
59+

0 commit comments

Comments
 (0)