1.版本说明,博主使用的mmdetv1.
2.注册主干网络
在 mmdet/models/backbones/ 目录下添加mobilenetv2.py文件。
文件下载链接:https://download.csdn.net/download/Guo_Python/12523669。
修改 mmdet/models/backbones/__init__.py , 主要是将主干网络注册进去,修改后的内容如下:
from .hrnet import HRNet
from .resnet import ResNet, make_res_layer
from .resnext import ResNeXt
from .ssd_vgg import SSDVGG
from .mobilenetv2 import SSDMobilenetV2
__all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet', 'SSDMobilenetV2']
3. 注册检测头
在 mmdet/models/anchor_heads/ 目录下添加ssdlite_head.py文件。
文件下载链接:https://download.csdn.net/download/Guo_Python/12523682
修改 mmdet/models/anchor_heads/__init__.py, 主要是将检测头注册进去,修改后的内容如下:
from .anchor_head import AnchorHead
from .atss_head import ATSSHead
from .fcos_head import FCOSHead
from .fovea_head import FoveaHead
from .free_anchor_retina_head import FreeAnchorRetinaHead
from .ga_retina_head import GARetinaHead
from .ga_rpn_head import GARPNHead
from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
from .reppoints_head import RepPointsHead
from .retina_head import RetinaHead
from .retina_sepbn_head import RetinaSepBNHead
from .rpn_head import RPNHead
from .ssd_head import SSDHead
from .solo_head import SOLOHead
from .decoupled_solo_head import DecoupledSOLOHead
from .solov2_head import SOLOV2Head
from .ssdlite_head import SSDLiteHead
__all__ = [
'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 'SSDHead',
'FCOSHead', 'RepPointsHead', 'FoveaHead', 'FreeAnchorRetinaHead',
'ATSSHead', 'SOLOHead', 'DecoupledSOLOHead','SOLOV2Head', 'SSDLiteHead'
]
4. 模型训练
当你以上准备工作做好了之后,运用对应的config文件就可以训练模型了,我使用的config文件如下:
"""
if you use SSDLiteHead-300x300:the map is 68.4 in voc.
if you use SSDHead-300x300:the map is 69.0 in voc.
if you use SSDLiteHead-320x320:the map is 70.3 in voc
"""
# model settings
input_size = 320
model = dict(
type='SingleStageDetector',
pretrained='http://jeff95.me/models/mobilenet_v2-6a65762b.pth',
backbone=dict(
type='SSDMobilenetV2',
input_size=input_size,
activation_type='relu6'
),
neck=None,
bbox_head=dict(
#type='SSDHead',
type='SSDLiteHead',
input_size=input_size,
in_channels=(576, 1280, 512, 256, 256, 128),
num_classes=21,
anchor_strides=(16, 30, 60, 100, 150, 300),
basesize_ratio_range=(0.2, 0.95),
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2),)
)
# model training and testing setting
cudnn_benchmark = True
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.,
ignore_iof_thr=-1,
gt_max_assign_all=False),
smoothl1_beta=1.,
allowed_border=-1,
pos_weight=-1,
neg_pos_ratio=3,
debug=False)
test_cfg = dict(
nms=dict(type='nms', iou_thr=0.45),
min_bbox_size=0,
score_thr=0.02,
max_per_img=200)
# dataset settings
dataset_type = 'VOCDataset'
data_root = '/home/gp/work/project/learning/VOC/VOCdevkit/'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PhotoMetricDistortion',
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
dict(
type='Expand',
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
dict(
type='MinIoURandomCrop',
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
min_crop_size=0.3),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(320, 320),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=32,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=10,
dataset=dict(
type=dataset_type,
ann_file=[
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
],
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
pipeline=train_pipeline)),
val=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 10,
step=[16, 22])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 25
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ssd320_mobilenet_v2'
load_from = None
resume_from = None
workflow = [('train', 1)]
运行代码训练命令:
python tools/train.py configs/pascal_voc/ssd320_mobilenetv2_voc.py
不出意外,你的模型就开始训练了。
5. 模型测试
运行测试代码, 测试模型:
python tools/test.py configs/pascal_voc/ssd320_mobilenetv2_voc.py work_dirs/ssd320_mobilenet_v2/latest.pth --out result_ssd.pkl --eval bbox
当然,你会遇到如下错误:
AttributeError: 'VOCDataset' object has no attribute 'img_ids'
不管他,直接运行一下命令:
CUDA_VISIBLE_DEVICES=0 python tools/voc_eval.py result_ssd.pkl configs/pascal_voc/ssd320_mobilenetv2_voc.py
得到如下结果:

6.可视化效果如下:




end! ! !