1
+ # geminifusion_rcnn.py
2
+
3
+ # CMNeXt Detection with RCNN detector
4
+ # custom_configs/DELIVER/deliver_cmnext_rcnn.py
5
+ import os
6
+ _base_ = [
7
+ './deliver_dataset.py' # Inherit dataset config
8
+ ]
9
+
10
+ data_root = '/SSDb/jemo_maeng/dset/DELIVER'
11
+
12
+ # Model settings
13
+ model = dict (
14
+ type = 'FasterRCNN' ,
15
+ data_preprocessor = _base_ .data_preprocessor , # This comes from _base_
16
+ backbone = dict (
17
+ type = 'GeminiFusion_second' ,
18
+ # variant='B2',
19
+ backbone = 'GeminiFusion-B2' ,
20
+ modals = ['rgb' , 'depth' , 'event' , 'lidar' ],
21
+ out_indices = (0 , 1 , 2 , 3 ),
22
+ # frozen_stages=-1,
23
+ # pretrained='/mnt/nvme/workspace/drone-mmdetection-jm/ckpts/mit_b2.pth'
24
+ # num_modal=4,
25
+ # out_indices=(0, 1, 2, 3),
26
+ frozen_stages = - 1 ,
27
+ # adapter_type='every_one',
28
+
29
+ # pretrained='/mnt/nvme/workspace/drone-mmdetection-jm/ckpts/mit_b2.pth'
30
+ ),
31
+ neck = dict (
32
+ type = 'FPN' , # MMDetection 표준 FPN 사용
33
+ in_channels = [64 , 128 , 320 , 512 ],
34
+ # in_channels=[64, 128],
35
+ out_channels = 256 ,
36
+ num_outs = 5
37
+ ),
38
+ rpn_head = dict (
39
+ type = 'RPNHead' ,
40
+ in_channels = 256 ,
41
+ feat_channels = 256 ,
42
+ anchor_generator = dict (
43
+ type = 'AnchorGenerator' , # 32 16 8 4
44
+ # scales=[8],
45
+ scales = [2 , 4 , 8 , 16 ],
46
+ ratios = [0.5 , 1.0 , 2.0 ],
47
+ strides = [4 , 8 , 16 , 32 , 64 ]
48
+ ),
49
+ bbox_coder = dict (
50
+ type = 'DeltaXYWHBBoxCoder' ,
51
+ target_means = [.0 , .0 , .0 , .0 ],
52
+ target_stds = [1.0 , 1.0 , 1.0 , 1.0 ]
53
+ ),
54
+ loss_cls = dict (
55
+ type = 'CrossEntropyLoss' , use_sigmoid = True , loss_weight = 1.0
56
+ ),
57
+ loss_bbox = dict (type = 'L1Loss' , loss_weight = 1.0 )
58
+ ),
59
+ roi_head = dict (
60
+ type = 'StandardRoIHead' ,
61
+ bbox_roi_extractor = dict (
62
+ type = 'SingleRoIExtractor' ,
63
+ roi_layer = dict (type = 'RoIAlign' , output_size = 7 , sampling_ratio = 0 ),
64
+ out_channels = 256 ,
65
+ featmap_strides = [4 , 8 , 16 , 32 ]
66
+ ),
67
+ bbox_head = dict (
68
+ type = 'Shared2FCBBoxHead' ,
69
+ in_channels = 256 ,
70
+ fc_out_channels = 1024 ,
71
+ roi_feat_size = 7 ,
72
+ num_classes = 2 , # Vehicle, Human
73
+ bbox_coder = dict (
74
+ type = 'DeltaXYWHBBoxCoder' ,
75
+ target_means = [0. , 0. , 0. , 0. ],
76
+ target_stds = [0.1 , 0.1 , 0.2 , 0.2 ]
77
+ ),
78
+ reg_class_agnostic = False ,
79
+ loss_cls = dict (
80
+ type = 'CrossEntropyLoss' , use_sigmoid = False , loss_weight = 1.0
81
+ ),
82
+ loss_bbox = dict (type = 'L1Loss' , loss_weight = 1.0 )
83
+ )
84
+ ),
85
+ # Training config
86
+ train_cfg = dict (
87
+ rpn = dict (
88
+ assigner = dict (
89
+ type = 'MaxIoUAssigner' ,
90
+ pos_iou_thr = 0.7 ,
91
+ neg_iou_thr = 0.3 ,
92
+ min_pos_iou = 0.3 ,
93
+ match_low_quality = True ,
94
+ ignore_iof_thr = - 1
95
+ ),
96
+ sampler = dict (
97
+ type = 'RandomSampler' ,
98
+ num = 256 ,
99
+ pos_fraction = 0.5 ,
100
+ neg_pos_ub = - 1 ,
101
+ add_gt_as_proposals = False
102
+ ),
103
+ allowed_border = - 1 ,
104
+ pos_weight = - 1 ,
105
+ debug = False
106
+ ),
107
+ rpn_proposal = dict (
108
+ nms_pre = 2000 ,
109
+ max_per_img = 1000 ,
110
+ nms = dict (type = 'nms' , iou_threshold = 0.7 ),
111
+ min_bbox_size = 0
112
+ ),
113
+ rcnn = dict (
114
+ assigner = dict (
115
+ type = 'MaxIoUAssigner' ,
116
+ pos_iou_thr = 0.5 , # 🔥 0.5 → 0.3
117
+ neg_iou_thr = 0.5 , # 🔥 0.5 → 0.1
118
+ min_pos_iou = 0.5 , # 🔥 0.5 → 0.1
119
+ match_low_quality = False , # 🔥 False → True
120
+ ignore_iof_thr = - 1
121
+ ),
122
+ sampler = dict (
123
+ type = 'RandomSampler' ,
124
+ num = 512 ,
125
+ pos_fraction = 0.25 ,
126
+ neg_pos_ub = - 1 ,
127
+ add_gt_as_proposals = True
128
+ ),
129
+ pos_weight = - 1 ,
130
+ debug = False
131
+ )
132
+ ),
133
+ # Testing config
134
+ test_cfg = dict (
135
+ rpn = dict (
136
+ nms_pre = 1000 ,
137
+ max_per_img = 1000 ,
138
+ nms = dict (type = 'nms' , iou_threshold = 0.7 ),
139
+ min_bbox_size = 0
140
+ ),
141
+ rcnn = dict (
142
+ score_thr = 0.05 ,
143
+ nms = dict (type = 'nms' , iou_threshold = 0.5 ),
144
+ max_per_img = 100
145
+ )
146
+ )
147
+ )
148
+
149
+ train_dataloader = dict (
150
+ batch_size = 4 ,
151
+ num_workers = 2 ,
152
+ persistent_workers = True ,
153
+ sampler = dict (type = 'DefaultSampler' , shuffle = True ),
154
+ dataset = dict (
155
+ data_root = data_root ,
156
+ ann_file = 'coco_train_xywh.json' ,
157
+ ),
158
+ )
159
+
160
+ val_dataloader = dict (
161
+ batch_size = 1 ,
162
+ num_workers = 2 ,
163
+ persistent_workers = True ,
164
+ drop_last = False ,
165
+ sampler = dict (type = 'DefaultSampler' , shuffle = False ),
166
+ dataset = dict (
167
+ data_root = data_root ,
168
+ ann_file = 'coco_val_xywh.json' ,
169
+ ),
170
+ )
171
+
172
+ test_dataloader = val_dataloader
173
+
174
+ # Evaluation settings
175
+ val_evaluator = dict (
176
+ type = 'CocoMetric' ,
177
+ ann_file = os .path .join (data_root , 'coco_val_xywh.json' ), # Fixed: consistent with dataset
178
+ metric = 'bbox' ,
179
+ format_only = False
180
+ )
181
+
182
+ optim_wrapper = dict (
183
+ type = 'OptimWrapper' ,
184
+ optimizer = dict (type = 'SGD' , lr = 0.01 , momentum = 0.9 , weight_decay = 0.0001 ),
185
+ clip_grad = dict (max_norm = 5 , norm_type = 2 ),
186
+ accumulative_counts = 4
187
+ )
188
+
189
+
190
+ # Experiment name for logging
191
+ experiment_name = 'deliver_cmnext_b2_faster_rcnn_2x_lr0.01_2'
192
+
193
+ # Override work_dir if needed
194
+ work_dir = f'./work_dirs/{ experiment_name } '
0 commit comments