gist-ailab
diff --git a/‎MISC/folder2video.py
Lines changed: 5 additions & 5 deletions b/‎MISC/folder2video.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎custom_configs/DELIVER/deliver_cmnext_rcnn2.py
Lines changed: 23 additions & 9 deletions b/‎custom_configs/DELIVER/deliver_cmnext_rcnn2.py
Lines changed: 23 additions & 9 deletions
diff --git a/‎custom_configs/DELIVER/deliver_dataset.py
Lines changed: 2 additions & 13 deletions b/‎custom_configs/DELIVER/deliver_dataset.py
Lines changed: 2 additions & 13 deletions
diff --git a/‎mcdet/__init__.py
Lines changed: 1 addition & 0 deletions b/‎mcdet/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎mcdet/datasets/custom_deliver_detection_dataset.py
Lines changed: 35 additions & 23 deletions b/‎mcdet/datasets/custom_deliver_detection_dataset.py
Lines changed: 35 additions & 23 deletions
diff --git a/‎mcdet/hooks/__init__.py
Lines changed: 5 additions & 0 deletions b/‎mcdet/hooks/__init__.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎mcdet/hooks/debug_hook.py
Lines changed: 59 additions & 0 deletions b/‎mcdet/hooks/debug_hook.py
Lines changed: 59 additions & 0 deletions
@@ -29,9 +29,9 @@ def folder2video(image_folder, output_video_path, fps=30):
 
 
 
-def run():
-    input = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference'
-    output = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference.mp4'
-    folder2video(input, output, fps=30)
+# def run():
+#     input = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference'
+#     output = '/SSDb/jemo_maeng/src/Project/Drone24/detection/drone-mmdetection-jm/work_dirs/lecun_ATTNet_swinL_fpn_2x_datav2_flir_adas_rgbt_lr001/20250206_163521/inference.mp4'
+#     folder2video(input, output, fps=30)
 
-run()
+# run()
@@ -28,8 +28,9 @@
         in_channels=256,
         feat_channels=256,
         anchor_generator=dict(
-            type='AnchorGenerator',
-            scales=[8],
+            type='AnchorGenerator',  # 32 16 8 4 
+            # scales=[8],
+            scales=[2, 4, 8, 16],
             ratios=[0.5, 1.0, 2.0],
             strides=[4, 8, 16, 32, 64]
         ),
@@ -74,9 +75,9 @@
         rpn=dict(
             assigner=dict(
                 type='MaxIoUAssigner',
-                pos_iou_thr=0.7,
-                neg_iou_thr=0.3,
-                min_pos_iou=0.3,
+                pos_iou_thr=0.4,
+                neg_iou_thr=0.1,
+                min_pos_iou=0.1,
                 match_low_quality=True,
                 ignore_iof_thr=-1
             ),
@@ -100,10 +101,10 @@
         rcnn=dict(
             assigner=dict(
                 type='MaxIoUAssigner',
-                pos_iou_thr=0.5,
-                neg_iou_thr=0.5,
-                min_pos_iou=0.5,
-                match_low_quality=False,
+                pos_iou_thr=0.3,        # 🔥 0.5 → 0.3
+                neg_iou_thr=0.1,        # 🔥 0.5 → 0.1  
+                min_pos_iou=0.1,        # 🔥 0.5 → 0.1
+                match_low_quality=True, # 🔥 False → True
                 ignore_iof_thr=-1
             ),
             sampler=dict(
@@ -133,6 +134,19 @@
     )
 )
 
+
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=5, norm_type=2),
+    accumulative_counts=4
+)
+
+
+# custom_hooks = [
+#     dict(type='BboxLossDebugHook', log_interval=50)
+# ]
+
 # Experiment name for logging
 experiment_name = 'deliver_cmnext_b2_faster_rcnn_2x'
 
 
@@ -30,17 +30,6 @@
     pad_size_divisor=32
 )
 
-# data_preprocessor = dict(
-#     type='ImgDataPreprocessor',
-#     mean=[0.485, 0.456, 0.406],       # RGB (ImageNet - same as DELIVER)
-
-    
-#     std=[0.229, 0.224, 0.225] ,      # RGB (ImageNet - same as DELIVER)
-            
-#     pad_size_divisor=32
-# )
-
-# Pipeline settings
 train_pipeline = [
     dict(type='LoadDELIVERImages'),
     dict(type='LoadAnnotations', with_bbox=True),
@@ -71,7 +60,7 @@
 
 # Dataset configs
 train_dataloader = dict(
-    batch_size=3,
+    batch_size=8,
     num_workers=2,
     persistent_workers=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
@@ -125,7 +114,7 @@
 optim_wrapper = dict(
     type='OptimWrapper',
     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001),
-    clip_grad=dict(max_norm=35, norm_type=2)
+    clip_grad=dict(max_norm=5, norm_type=2)
 )
 
 # Learning rate schedule
 
@@ -23,6 +23,7 @@
 from .models import *
 from .datasets import *
 from .evaluation import *
+from .hooks import *
 
 
 
 
@@ -18,6 +18,8 @@
 
 from mmdet.datasets.api_wrappers import COCO
 from typing import List, Union, Any
+import torch
+
 METAINFO = {
     'classes': ('Vehicle', 'Human'),  # DELIVER 클래스들
     'palette': [(220, 20, 60), (119, 11, 32)]  # 클래스별 색상
@@ -76,15 +78,8 @@ def full_init(self) -> None:
 
 
     def parse_data_info(self, raw_data_info: dict):
-        """Parse raw annotation to target format.
-
-        Args:
-            raw_data_info (dict): Raw data information load from ``ann_file``
-
-        Returns:
-            Union[dict, List[dict]]: Parsed annotation.
-        """
-
+        """Parse raw annotation to target format."""
+        
         img_info = raw_data_info['raw_img_info']
         ann_info = raw_data_info['raw_ann_info']
 
@@ -102,17 +97,17 @@ def parse_data_info(self, raw_data_info: dict):
         data_info['event_img_path'] = osp.join(self.data_prefix['img'], img_info['event_path'])
         data_info['lidar_img_path'] = osp.join(self.data_prefix['img'], img_info['lidar_path'])
 
-
         modality_paths = {
-        'rgb': img_path,
-        'depth': data_info['depth_img_path'],
-        'event': data_info['event_img_path'], 
-        'lidar': data_info['lidar_img_path']
+            'rgb': img_path,
+            'depth': data_info['depth_img_path'],
+            'event': data_info['event_img_path'], 
+            'lidar': data_info['lidar_img_path']
         }    
 
         for modality, path in modality_paths.items():
             if not osp.exists(path):
                 print(f"Warning: {modality} image not found: {path}")
+        
         data_info['modality_paths'] = modality_paths
         data_info['img_id'] = img_info['img_id']
         data_info['seg_map_path'] = seg_map_path
@@ -125,36 +120,52 @@ def parse_data_info(self, raw_data_info: dict):
             data_info['custom_entities'] = True
 
         instances = []
+        valid_instances = 0
+        
         for i, ann in enumerate(ann_info):
             instance = {}
             if ann.get('ignore', False):
                 continue
-            # x1, y1, x2, y2 = ann['bbox']
-            # w = x2 - x1
-            # h = y2 - y1
+
             x1, y1, w, h = ann['bbox']
-            inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
-            inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
-            if inter_w * inter_h == 0:
+            
+            # 경계 검사
+            x1 = max(0, x1)
+            y1 = max(0, y1)
+            w = min(w, img_info['width'] - x1)
+            h = min(h, img_info['height'] - y1)
+            
+            # 기본적인 유효성 검사만 수행 (너무 엄격하지 않게)
+            if w <= 1 or h <= 1:
                 continue
-            if ann['area'] <= 0 or w < 1 or h < 1:
+                
+            if ann['area'] <= 0:
                 continue
+                
             if ann['category_id'] not in self.cat_ids:
                 continue
-            # bbox = [x1, y1, x1 + w, y1 + h] 
+
             bbox = [x1, y1, w, h]
 
             if ann.get('iscrowd', False):
                 instance['ignore_flag'] = 1
             else:
                 instance['ignore_flag'] = 0
+                
             instance['bbox'] = bbox
             instance['bbox_label'] = self.cat2label[ann['category_id']]
 
             if ann.get('segmentation', None):
                 instance['mask'] = ann['segmentation']
 
             instances.append(instance)
+            valid_instances += 1
+        
+        # 빈 샘플 처리
+        if valid_instances == 0:
+            print(f"Warning: No valid instances for {img_info['file_name']}, creating dummy instance")
+
+        
         data_info['instances'] = instances
         return data_info
 
@@ -189,4 +200,5 @@ def __getitem__(self, idx: int) -> dict:
             if data is None:
                 idx = self._rand_another()
                 continue
-            return data
+            return data
+
@@ -0,0 +1,5 @@
+from .debug_hook import BboxLossDebugHook
+
+__all__ = [
+    'BboxLossDebugHook'
+]
@@ -0,0 +1,59 @@
+# 디버깅을 위한 커스텀 훅 추가
+import torch
+from mmengine.hooks import Hook
+from mmdet.registry import HOOKS
+
+@HOOKS.register_module()
+class BboxLossDebugHook(Hook):
+    """Debug hook for bbox loss analysis"""
+    
+    def __init__(self, log_interval=50):
+        self.log_interval = log_interval
+        
+    def after_train_iter(self, runner, batch_idx, data_batch=None, outputs=None):
+        if (batch_idx + 1) % self.log_interval == 0:
+            # 현재 losses 분석
+            losses = outputs.get('loss', {}) if outputs else {}
+            
+            # GT 정보 분석
+            if data_batch and 'data_samples' in data_batch:
+                data_samples = data_batch['data_samples']
+                
+                total_gt_boxes = 0
+                bbox_sizes = []
+                
+                for sample in data_samples:
+                    if hasattr(sample, 'gt_instances') and hasattr(sample.gt_instances, 'bboxes'):
+                        bboxes = sample.gt_instances.bboxes
+                        total_gt_boxes += len(bboxes)
+                        
+                        for bbox in bboxes:
+                            w, h = bbox[2].item(), bbox[3].item()
+                            bbox_sizes.append((w, h))
+                
+                if bbox_sizes:
+                    widths = [w for w, h in bbox_sizes]
+                    heights = [h for w, h in bbox_sizes]
+                    areas = [w * h for w, h in bbox_sizes]
+                    
+                    runner.logger.info(
+                        f"GT Analysis - Total boxes: {total_gt_boxes}, "
+                        f"Avg width: {sum(widths)/len(widths):.2f}, "
+                        f"Avg height: {sum(heights)/len(heights):.2f}, "
+                        f"Avg area: {sum(areas)/len(areas):.2f}, "
+                        f"Min area: {min(areas):.2f}, Max area: {max(areas):.2f}"
+                    )
+            
+            # Loss 분석
+            rpn_cls_loss = outputs.get('loss_rpn_cls', 0)
+            rpn_bbox_loss = outputs.get('rpn_loss_bbox', 0)
+            roi_cls_loss = outputs.get('loss_cls', 0)
+            roi_bbox_loss = outputs.get('loss_bbox', 0)
+            
+            runner.logger.info(
+                f"Loss Analysis - RPN cls: {rpn_cls_loss:.6f}, "
+                f"RPN bbox: {rpn_bbox_loss:.6f}, "
+                f"ROI cls: {roi_cls_loss:.6f}, "
+                f"ROI bbox: {roi_bbox_loss:.6f}"
+            )
+