gist-ailab
diff --git a/‎configs/_base_/schedules/schedule_2x.py
Lines changed: 2 additions & 2 deletions b/‎configs/_base_/schedules/schedule_2x.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎custom_configs/DELIVER/deliver_dataset.py
Lines changed: 12 additions & 4 deletions b/‎custom_configs/DELIVER/deliver_dataset.py
Lines changed: 12 additions & 4 deletions
diff --git a/‎custom_configs/DELIVER/hinton-deliver_geminifusion_rcnn_lr0.01.py
Lines changed: 4 additions & 4 deletions b/‎custom_configs/DELIVER/hinton-deliver_geminifusion_rcnn_lr0.01.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎custom_configs/DELIVER/hinton-deliver_geminifusion_rcnn_lr0.01_RGB.py
Lines changed: 195 additions & 0 deletions b/‎custom_configs/DELIVER/hinton-deliver_geminifusion_rcnn_lr0.01_RGB.py
Lines changed: 195 additions & 0 deletions
@@ -1,5 +1,5 @@
 # training schedule for 2x
-train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1)
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=100, val_interval=1)
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
 
@@ -10,7 +10,7 @@
     dict(
         type='MultiStepLR',
         begin=0,
-        end=24,
+        end=100,
         by_epoch=True,
         milestones=[16, 22],
         gamma=0.1)
 
@@ -8,7 +8,7 @@
 
 # Dataset basic info
 dataset_type = 'DELIVERDetectionDataset'
-data_root = '/mnt/nvme/workspace/AnySeg/data/DELIVER/'  # Added trailing slash
+data_root = '/SSDb/jemo_maeng/dset/DELIVER/'  # Added trailing slash
 backend_args = None
 classes = ('Vehicle', 'Human')
 
@@ -44,7 +44,11 @@
         prob=0.5,
         bbox_format='xywh'  # 🔥 xywh format 명시
     ),
-    dict(type='PackDELIVERDetInputs')
+    # dict(type='PackDELIVERDetInputs')
+    dict(
+    type='PackDELIVERDetInputs',
+    meta_keys=('img_path', 'img_id', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction')
+)
 ]
 
 test_pipeline = [
@@ -55,7 +59,11 @@
         keep_ratio=True,
         bbox_format='xywh'  # 🔥 xywh format 명시
     ),
-    dict(type='PackDELIVERDetInputs')
+    # dict(type='PackDELIVERDetInputs')
+    dict(
+    type='PackDELIVERDetInputs',
+    meta_keys=('img_path', 'img_id', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction')
+)
 ]
 
 # Dataset configs
@@ -133,7 +141,7 @@
 ]
 
 # Training settings
-train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1)  # Updated for 2x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=100, val_interval=1)  # Updated for 2x
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
 
 
@@ -7,14 +7,14 @@
     './deliver_dataset.py'  # Inherit dataset config
 ]
 
-data_root= '/mnt/nvme/workspace/AnySeg/data/DELIVER'
+data_root= '/SSDb/jemo_maeng/dset/DELIVER'
 
 # Model settings
 model = dict(
     type='FasterRCNN',
     data_preprocessor=_base_.data_preprocessor,  # This comes from _base_
     backbone=dict(
-        type='GeminiFusion',
+        type='GeminiFusion_second',
         # variant='B2',
         backbone='GeminiFusion-B2',
         modals=['rgb', 'depth', 'event', 'lidar'],
@@ -147,7 +147,7 @@
 )
 
 train_dataloader = dict(
-    batch_size=8,
+    batch_size=4,
     num_workers=2,
     persistent_workers=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
@@ -188,7 +188,7 @@
 
 
 # Experiment name for logging
-experiment_name = 'deliver_cmnext_b2_faster_rcnn_2x_lr0.01'
+experiment_name = 'deliver_cmnext_b2_faster_rcnn_2x_lr0.01_2'
 
 # Override work_dir if needed
 work_dir = f'./work_dirs/{experiment_name}'
@@ -0,0 +1,195 @@
+# geminifusion_rcnn.py
+
+# CMNeXt Detection with RCNN detector
+# custom_configs/DELIVER/deliver_cmnext_rcnn.py
+import os
+_base_ = [
+    './deliver_dataset.py'  # Inherit dataset config
+]
+
+data_root= '/SSDb/jemo_maeng/dset/DELIVER'
+
+# Model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=_base_.data_preprocessor,  # This comes from _base_
+    backbone=dict(
+        type='GeminiFusion',
+        # variant='B2',
+        backbone='GeminiFusion-B2',
+        # modals=['rgb', 'depth', 'event', 'lidar'],
+        modals=['rgb', 'depth'],
+        out_indices=(0, 1, 2, 3),
+        # frozen_stages=-1,
+        # pretrained='/mnt/nvme/workspace/drone-mmdetection-jm/ckpts/mit_b2.pth'
+        # num_modal=4,
+        # out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        # adapter_type='every_one',    
+
+        # pretrained='/mnt/nvme/workspace/drone-mmdetection-jm/ckpts/mit_b2.pth'
+    ),
+    neck=dict(
+        type='FPN',  # MMDetection 표준 FPN 사용
+        in_channels=[64, 128, 320, 512],
+        # in_channels=[64, 128],
+        out_channels=256,
+        num_outs=5
+    ),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',  # 32 16 8 4 
+            # scales=[8],
+            scales=[2, 4, 8, 16],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]
+        ),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]
+        ),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0
+        ),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)
+    ),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]
+        ),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=2,  # Vehicle, Human
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]
+            ),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0
+            ),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)
+        )
+    ),
+    # Training config
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1
+            ),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False
+            ),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False
+        ),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0
+        ),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,        # 🔥 0.5 → 0.3
+                neg_iou_thr=0.5,        # 🔥 0.5 → 0.1  
+                min_pos_iou=0.5,        # 🔥 0.5 → 0.1
+                match_low_quality=False, # 🔥 False → True
+                ignore_iof_thr=-1
+            ),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True
+            ),
+            pos_weight=-1,
+            debug=False
+        )
+    ),
+    # Testing config
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0
+        ),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100
+        )
+    )
+)
+
+train_dataloader = dict(
+    batch_size=4,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        data_root=data_root,
+        ann_file='coco_train_xywh.json',
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        data_root=data_root,
+        ann_file='coco_val_xywh.json',
+    ),
+)
+
+test_dataloader = val_dataloader
+
+# Evaluation settings  
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=os.path.join(data_root, 'coco_val_xywh.json'),  # Fixed: consistent with dataset
+    metric='bbox',
+    format_only=False
+)
+
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=5, norm_type=2),
+    accumulative_counts=4
+)
+
+
+# Experiment name for logging
+experiment_name = 'deliver_cmnext_b2_faster_rcnn_2x_lr0.01_feature_fusion'
+
+# Override work_dir if needed
+work_dir = f'./work_dirs/{experiment_name}'