Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
inovisao
compara_detectores_mmdetection
Commits
e3e191e6
Commit
e3e191e6
authored
Mar 20, 2021
by
Fábio Prestes
Browse files
final version, with final config files
parent
eb914060
Changes
71
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
567 additions
and
102 deletions
+567
-102
install.sh
install.sh
+7
-6
mmdetection/configs/_base_/datasets/coco_detection.py
mmdetection/configs/_base_/datasets/coco_detection.py
+7
-8
mmdetection/configs/_base_/datasets/voc0712.py
mmdetection/configs/_base_/datasets/voc0712.py
+4
-0
mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_coco_pupa.py
...on/configs/pascal_voc/faster_rcnn_r50_fpn_1x_coco_pupa.py
+25
-0
mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
...tion/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
+11
-2
mmdetection/configs/pascal_voc/mask_rcnn_r50_fpn_1x_coco_pupa.py
...tion/configs/pascal_voc/mask_rcnn_r50_fpn_1x_coco_pupa.py
+23
-0
mmdetection/configs/pascal_voc/mask_rcnn_r50_fpn_1x_voc0712.py
...ection/configs/pascal_voc/mask_rcnn_r50_fpn_1x_voc0712.py
+23
-0
mmdetection/configs/pascal_voc/retinanet_r50_fpn_1x_coco_pupa.py
...tion/configs/pascal_voc/retinanet_r50_fpn_1x_coco_pupa.py
+32
-0
mmdetection/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py
...ection/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py
+11
-1
mmdetection/configs/pascal_voc/ssd300_coco_pupa.py
mmdetection/configs/pascal_voc/ssd300_coco_pupa.py
+84
-0
mmdetection/configs/pascal_voc/ssd300_voc0712.py
mmdetection/configs/pascal_voc/ssd300_voc0712.py
+71
-69
mmdetection/configs/pascal_voc/yolov3_d53_mstrain-608_273e_coco_pupa.py
...nfigs/pascal_voc/yolov3_d53_mstrain-608_273e_coco_pupa.py
+125
-0
mmdetection/configs/pascal_voc/yolov3_d53_mstrain-608_273e_voc0712.py
...configs/pascal_voc/yolov3_d53_mstrain-608_273e_voc0712.py
+130
-0
mmdetection/mmdet/core/evaluation/class_names.py
mmdetection/mmdet/core/evaluation/class_names.py
+5
-1
mmdetection/mmdet/datasets/coco.py
mmdetection/mmdet/datasets/coco.py
+2
-14
mmdetection/mmdet/datasets/voc.py
mmdetection/mmdet/datasets/voc.py
+7
-1
results/JPEGImages/f_16.jpg
results/JPEGImages/f_16.jpg
+0
-0
results/JPEGImages/f_2.jpg
results/JPEGImages/f_2.jpg
+0
-0
results/JPEGImages/f_20.jpg
results/JPEGImages/f_20.jpg
+0
-0
results/JPEGImages/f_26.jpg
results/JPEGImages/f_26.jpg
+0
-0
No files found.
install.sh
View file @
e3e191e6
...
...
@@ -4,19 +4,20 @@
# Criando um ambiente conda específico para rodar o mmdetection
# estou usando esta versão do python pois não testei em outras
# (mas pode ser que funcione em outras)
conda create
--name
od
python
=
3.7
.7
-y
conda create
--name
od
_porto
python
=
3.7
-y
# Ativando o ambiente recém criado (confira para ver se ativou
# corretamente olhando para a linha de comando, deve começar
# a aparecer um (od) no lugar de (base) no prompt
conda activate
od
conda activate od
_porto
# Algumas coisas instalam com o conda mas outras precisam do pip
conda
install
pytorch torchvision
-c
pytorch
conda
install
pytorch torchvision
torchaudio
cudatoolkit
=
11.0
-c
pytorch
# Se der erro na linha abaixo pode ter relação com a instalação do CUDA
# e da placa gráfica na máquina
pip
install
mmcv-full
pip
install
mmcv-full
-f
https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
cd
mmdetection
pip
install
-r
requirements/build.txt
pip
install
-v
-e
.
# or "python setup.py develop"
pip
install
seaborn
pip
install
-r
requirements/optional.txt
pip
install
-r
requirements/runtime.txt
pip
install
-r
requirements/tests.txt
mmdetection/configs/_base_/datasets/coco_detection.py
View file @
e3e191e6
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
...
...
@@ -32,17 +31,17 @@ data = dict(
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations
/instances_train2017
.json'
,
img_prefix
=
data_root
+
'train
2017
/'
,
ann_file
=
data_root
+
'
train/_
annotations
.coco
.json'
,
img_prefix
=
data_root
+
'train/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations
/instances_val2017
.json'
,
img_prefix
=
data_root
+
'val
2017
/'
,
ann_file
=
data_root
+
'
valid/_
annotations
.coco
.json'
,
img_prefix
=
data_root
+
'val
id
/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations
/instances_val2017
.json'
,
img_prefix
=
data_root
+
'
val2017
/'
,
ann_file
=
data_root
+
'
test/_
annotations
.coco
.json'
,
img_prefix
=
data_root
+
'
test
/'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
1
,
metric
=
'bbox'
)
mmdetection/configs/_base_/datasets/voc0712.py
View file @
e3e191e6
# Eu precisei mexer neste arquivo pois estou usando apenas o formato VOC2007
# e não o VOC2012. Estava dando um erro antes, por isso eu copio este
# arquivo para a estrutura do mmdetection antes de rodar
# dataset settings
dataset_type
=
'VOCDataset'
data_root
=
'data/VOCdevkit/'
...
...
mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_coco_pupa.py
0 → 100644
View file @
e3e191e6
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_
=
[
'../_base_/models/faster_rcnn_r50_fpn.py'
,
'../_base_/datasets/coco_detection.py'
,
'../_base_/default_runtime.py'
]
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model
=
dict
(
roi_head
=
dict
(
bbox_head
=
dict
(
num_classes
=
2
)))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
#optimizer = dict(type='Adam', lr=0.001, weight_decay=0.0001)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
3
])
# runtime settings
total_epochs
=
1000
# actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
View file @
e3e191e6
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_
=
[
'../_base_/models/faster_rcnn_r50_fpn.py'
,
'../_base_/datasets/voc0712.py'
,
'../_base_/default_runtime.py'
]
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model
=
dict
(
roi_head
=
dict
(
bbox_head
=
dict
(
num_classes
=
2
)))
# optimizer
#
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0.0001
)
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
#
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.0001)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
3
])
# runtime settings
total_epochs
=
1000
# actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
mmdetection/configs/pascal_voc/mask_rcnn_r50_fpn_1x_coco_pupa.py
0 → 100644
View file @
e3e191e6
_base_
=
[
'../_base_/models/mask_rcnn_r50_fpn.py'
,
'../_base_/datasets/coco_instance.py'
,
'../_base_/default_runtime.py'
]
train_pipeline
=
[
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
True
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_masks'
]),
]
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
#optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
3
])
# runtime settings
total_epochs
=
1000
# actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
mmdetection/configs/pascal_voc/mask_rcnn_r50_fpn_1x_voc0712.py
0 → 100644
View file @
e3e191e6
_base_
=
[
'../_base_/models/mask_rcnn_r50_fpn.py'
,
'../_base_/datasets/voc0712.py'
,
'../_base_/default_runtime.py'
]
train_pipeline
=
[
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
True
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_masks'
]),
]
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
#optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
3
])
# runtime settings
total_epochs
=
1000
# actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
mmdetection/configs/pascal_voc/retinanet_r50_fpn_1x_coco_pupa.py
0 → 100644
View file @
e3e191e6
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_
=
[
'../_base_/models/retinanet_r50_fpn.py'
,
'../_base_/datasets/coco_detection.py'
,
'../_base_/default_runtime.py'
]
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model
=
dict
(
bbox_head
=
dict
(
num_classes
=
2
))
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
)
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
3
])
# runtime settings
total_epochs
=
1000
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
# mexer mais aqui depois pra ver o pq q ta dando esses erros de nao salvar resutlado
# 2021-03-12 16:31:10,797 - mmdet - ERROR - The testing results of the whole dataset is empty.
# acontece depois da primiera epoca
mmdetection/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py
View file @
e3e191e6
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_
=
[
'../_base_/models/retinanet_r50_fpn.py'
,
'../_base_/datasets/voc0712.py'
,
'../_base_/default_runtime.py'
]
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model
=
dict
(
bbox_head
=
dict
(
num_classes
=
2
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
...
...
@@ -10,4 +18,6 @@ optimizer_config = dict(grad_clip=None)
# actual epoch = 3 * 3 = 9
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
3
])
# runtime settings
total_epochs
=
40
# actual epoch = 4 * 3 = 12
total_epochs
=
1000
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
mmdetection/configs/pascal_voc/ssd300_coco_pupa.py
0 → 100644
View file @
e3e191e6
_base_
=
[
'../_base_/models/ssd300.py'
,
'../_base_/datasets/coco_detection.py'
,
'../_base_/default_runtime.py'
]
model
=
dict
(
bbox_head
=
dict
(
num_classes
=
2
,
anchor_generator
=
dict
(
basesize_ratio_range
=
(
0.2
,
0.9
))))
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
to_float32
=
True
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'PhotoMetricDistortion'
,
brightness_delta
=
32
,
contrast_range
=
(
0.5
,
1.5
),
saturation_range
=
(
0.5
,
1.5
),
hue_delta
=
18
),
dict
(
type
=
'Expand'
,
mean
=
img_norm_cfg
[
'mean'
],
to_rgb
=
img_norm_cfg
[
'to_rgb'
],
ratio_range
=
(
1
,
4
)),
dict
(
type
=
'MinIoURandomCrop'
,
min_ious
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
),
min_crop_size
=
0.3
),
dict
(
type
=
'Resize'
,
img_scale
=
(
300
,
300
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
300
,
300
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
3
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'train/_annotations.coco.json'
,
img_prefix
=
data_root
+
'train/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'valid/_annotations.coco.json'
,
img_prefix
=
data_root
+
'valid/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'test/_annotations.coco.json'
,
img_prefix
=
data_root
+
'test/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
1e-3
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
optimizer_config
=
dict
()
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
16
,
20
])
# runtime settings
total_epochs
=
1000
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
mmdetection/configs/pascal_voc/ssd300_voc0712.py
View file @
e3e191e6
_base_
=
[
'../_base_/models/ssd300.py'
,
'../_base_/datasets/voc0712.py'
,
'../_base_/default_runtime.py'
]
model
=
dict
(
bbox_head
=
dict
(
num_classes
=
20
,
anchor_generator
=
dict
(
basesize_ratio_range
=
(
0.2
,
0.9
))))
# dataset settings
dataset_type
=
'VOCDataset'
data_root
=
'data/VOCdevkit/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
1
,
1
,
1
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
to_float32
=
True
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'PhotoMetricDistortion'
,
brightness_delta
=
32
,
contrast_range
=
(
0.5
,
1.5
),
saturation_range
=
(
0.5
,
1.5
),
hue_delta
=
18
),
dict
(
type
=
'Expand'
,
mean
=
img_norm_cfg
[
'mean'
],
to_rgb
=
img_norm_cfg
[
'to_rgb'
],
ratio_range
=
(
1
,
4
)),
dict
(
type
=
'MinIoURandomCrop'
,
min_ious
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
),
min_crop_size
=
0.3
),
dict
(
type
=
'Resize'
,
img_scale
=
(
300
,
300
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
300
,
300
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
3
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
10
,
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
1e-3
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
optimizer_config
=
dict
()
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
16
,
20
])
checkpoint_config
=
dict
(
interval
=
1
)
# runtime settings
total_epochs
=
24
_base_
=
[
'../_base_/models/ssd300.py'
,
'../_base_/datasets/voc0712.py'
,
'../_base_/default_runtime.py'
]
model
=
dict
(
bbox_head
=
dict
(
num_classes
=
2
,
anchor_generator
=
dict
(
basesize_ratio_range
=
(
0.2
,
0.9
))))
# dataset settings
dataset_type
=
'VOCDataset'
data_root
=
'data/VOCdevkit/'
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
to_float32
=
True
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'PhotoMetricDistortion'
,
brightness_delta
=
32
,
contrast_range
=
(
0.5
,
1.5
),
saturation_range
=
(
0.5
,
1.5
),
hue_delta
=
18
),
dict
(
type
=
'Expand'
,
mean
=
img_norm_cfg
[
'mean'
],
to_rgb
=
img_norm_cfg
[
'to_rgb'
],
ratio_range
=
(
1
,
4
)),
dict
(
type
=
'MinIoURandomCrop'
,
min_ious
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
),
min_crop_size
=
0.3
),
dict
(
type
=
'Resize'
,
img_scale
=
(
300
,
300
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
300
,
300
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
()
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
16
,
20
])
# runtime settings
total_epochs
=
1000
# checkpoint saving configuration
checkpoint_config
=
dict
(
interval
=
250
)
mmdetection/configs/pascal_voc/yolov3_d53_mstrain-608_273e_coco_pupa.py
0 → 100644
View file @
e3e191e6
_base_
=
'../_base_/default_runtime.py'
# model settings
model
=
dict
(
type
=
'YOLOV3'
,
pretrained
=
'open-mmlab://darknet53'
,
backbone
=
dict
(
type
=
'Darknet'
,
depth
=
53
,
out_indices
=
(
3
,
4
,
5
)),
neck
=
dict
(
type
=
'YOLOV3Neck'
,
num_scales
=
3
,
in_channels
=
[
1024
,
512
,
256
],
out_channels
=
[
512
,
256
,
128
]),
bbox_head
=
dict
(
type
=
'YOLOV3Head'
,
num_classes
=
2
,
in_channels
=
[
512
,
256
,
128
],
out_channels
=
[
1024
,
512
,
256
],
anchor_generator
=
dict
(
type
=
'YOLOAnchorGenerator'
,
base_sizes
=
[[(
116
,
90
),
(
156
,
198
),
(
373
,
326
)],
[(
30
,
61
),
(
62
,
45
),
(
59
,
119
)],
[(
10
,
13
),
(
16
,
30
),
(
33
,
23
)]],
strides
=
[
32
,
16
,
8
]),
bbox_coder
=
dict
(
type
=
'YOLOBBoxCoder'
),
featmap_strides
=
[
32
,
16
,
8
],
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
,
reduction
=
'sum'
),
loss_conf
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
,
reduction
=
'sum'
),
loss_xy
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
2.0
,
reduction
=
'sum'
),
loss_wh
=
dict
(
type
=
'MSELoss'
,
loss_weight
=
2.0
,
reduction
=
'sum'
)))
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'GridAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
conf_thr
=
0.005
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.45
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
154.04
,
159.83
,
181.41
],
std
=
[
13.59
,
17.70
,
25.08
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
to_float32
=
True
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Expand'
,
mean
=
img_norm_cfg
[
'mean'
],
to_rgb
=
img_norm_cfg
[
'to_rgb'
],
ratio_range
=
(
1
,
2
)),
dict
(
type
=
'MinIoURandomCrop'
,
min_ious
=
(
0.4
,
0.5
,
0.6
,
0.7
,
0.8
,
0.9
),
min_crop_size
=
0.3
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
320
,
320
),
(
608
,
608
)],
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
608
,
608
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
3
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'train/_annotations.coco.json'
,
img_prefix
=
data_root
+
'train/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'valid/_annotations.coco.json'
,
img_prefix
=
data_root
+
'valid/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'test/_annotations.coco.json'
,
img_prefix
=
data_root
+
'test/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0005
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
2000
,
# same as burn-in in darknet