Commit e3e191e6 authored by Fábio Prestes's avatar Fábio Prestes
Browse files

final version, with final config files

parent eb914060
......@@ -4,19 +4,20 @@
# Criando um ambiente conda específico para rodar o mmdetection
# estou usando esta versão do python pois não testei em outras
# (mas pode ser que funcione em outras)
conda create --name od python=3.7.7 -y
conda create --name od_porto python=3.7 -y
# Ativando o ambiente recém criado (confira para ver se ativou
# corretamente olhando para a linha de comando, deve começar
# a aparecer um (od) no lugar de (base) no prompt
conda activate od
conda activate od_porto
# Algumas coisas instalam com o conda mas outras precisam do pip
conda install pytorch torchvision -c pytorch
conda install pytorch torchvision torchaudio cudatoolkit=11.0 -c pytorch
# Se der erro na linha abaixo pode ter relação com a instalação do CUDA
# e da placa gráfica na máquina
pip install mmcv-full
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
cd mmdetection
pip install -r requirements/build.txt
pip install -v -e . # or "python setup.py develop"
pip install seaborn
pip install -r requirements/optional.txt
pip install -r requirements/runtime.txt
pip install -r requirements/tests.txt
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
......@@ -32,17 +31,17 @@ data = dict(
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
ann_file=data_root + 'train/_annotations.coco.json',
img_prefix=data_root + 'train/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
ann_file=data_root + 'valid/_annotations.coco.json',
img_prefix=data_root + 'valid/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
ann_file=data_root + 'test/_annotations.coco.json',
img_prefix=data_root + 'test/',
pipeline=test_pipeline))
evaluation = dict(interval=1, metric='bbox')
# Eu precisei mexer neste arquivo pois estou usando apenas o formato VOC2007
# e não o VOC2012. Estava dando um erro antes, por isso eu copio este
# arquivo para a estrutura do mmdetection antes de rodar
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
......
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_ = [
'../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/coco_detection.py',
'../_base_/default_runtime.py'
]
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model = dict(roi_head=dict(bbox_head=dict(num_classes=2)))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
#optimizer = dict(type='Adam', lr=0.001, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config = dict(policy='step', step=[3])
# runtime settings
total_epochs = 1000 # actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_ = [
'../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py',
'../_base_/default_runtime.py'
]
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model = dict(roi_head=dict(bbox_head=dict(num_classes=2)))
# optimizer
#optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.0001)
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
#optimizer = dict(type='Adam', lr=0.001, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config = dict(policy='step', step=[3])
# runtime settings
total_epochs = 1000 # actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/coco_instance.py',
'../_base_/default_runtime.py'
]
train_pipeline = [
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
#optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config = dict(policy='step', step=[3])
# runtime settings
total_epochs = 1000 # actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/voc0712.py',
'../_base_/default_runtime.py'
]
train_pipeline = [
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
#optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config = dict(policy='step', step=[3])
# runtime settings
total_epochs = 1000 # actual epoch = 4 * 3 = 12
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_ = [
'../_base_/models/retinanet_r50_fpn.py', '../_base_/datasets/coco_detection.py',
'../_base_/default_runtime.py'
]
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model = dict(bbox_head=dict(num_classes=2))
data = dict(
samples_per_gpu=2,
workers_per_gpu=2)
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
# actual epoch = 3 * 3 = 9
lr_config = dict(policy='step', step=[3])
# runtime settings
total_epochs = 1000
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
# mexer mais aqui depois pra ver o pq q ta dando esses erros de nao salvar resutlado
# 2021-03-12 16:31:10,797 - mmdet - ERROR - The testing results of the whole dataset is empty.
# acontece depois da primiera epoca
# Neste arquivo são definidos os vários hiperparâmetros do modelo.
# Atenção com o número de classes que pode mudar de um conjunto de
# dados para outro. Geralmente usa-se uma classe a mais por conta
# do fundo (background)
_base_ = [
'../_base_/models/retinanet_r50_fpn.py', '../_base_/datasets/voc0712.py',
'../_base_/default_runtime.py'
]
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
# TROQUE AQUI PELA QUANTIDADE DE CLASSES DO SEU PROBLEMA (UMA A MAIS PARA O FUNDO SE NECESSÁRIO)
model = dict(bbox_head=dict(num_classes=2))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
......@@ -10,4 +18,6 @@ optimizer_config = dict(grad_clip=None)
# actual epoch = 3 * 3 = 9
lr_config = dict(policy='step', step=[3])
# runtime settings
total_epochs = 40 # actual epoch = 4 * 3 = 12
total_epochs = 1000
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
_base_ = [
'../_base_/models/ssd300.py', '../_base_/datasets/coco_detection.py',
'../_base_/default_runtime.py'
]
model = dict(
bbox_head=dict(
num_classes=2, anchor_generator=dict(basesize_ratio_range=(0.2,
0.9))))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PhotoMetricDistortion',
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
dict(
type='Expand',
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
dict(
type='MinIoURandomCrop',
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
min_crop_size=0.3),
dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(300, 300),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=3,
train=dict(
type=dataset_type,
ann_file=data_root + 'train/_annotations.coco.json',
img_prefix=data_root + 'train/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'valid/_annotations.coco.json',
img_prefix=data_root + 'valid/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'test/_annotations.coco.json',
img_prefix=data_root + 'test/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict()
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[16, 20])
# runtime settings
total_epochs = 1000
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
_base_ = [
'../_base_/models/ssd300.py', '../_base_/datasets/voc0712.py',
'../_base_/default_runtime.py'
]
model = dict(
bbox_head=dict(
num_classes=20, anchor_generator=dict(basesize_ratio_range=(0.2,
0.9))))
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PhotoMetricDistortion',
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
dict(
type='Expand',
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
dict(
type='MinIoURandomCrop',
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
min_crop_size=0.3),
dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(300, 300),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=3,
train=dict(
type='RepeatDataset', times=10, dataset=dict(pipeline=train_pipeline)),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict()
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[16, 20])
checkpoint_config = dict(interval=1)
# runtime settings
total_epochs = 24
_base_ = [
'../_base_/models/ssd300.py', '../_base_/datasets/voc0712.py',
'../_base_/default_runtime.py'
]
model = dict(
bbox_head=dict(
num_classes=2, anchor_generator=dict(basesize_ratio_range=(0.2,
0.9))))
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PhotoMetricDistortion',
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
dict(
type='Expand',
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
dict(
type='MinIoURandomCrop',
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
min_crop_size=0.3),
dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(300, 300),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(
dataset=dict(pipeline=train_pipeline)),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict()
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[16, 20])
# runtime settings
total_epochs = 1000
# checkpoint saving configuration
checkpoint_config = dict(interval=250)
_base_ = '../_base_/default_runtime.py'
# model settings
model = dict(
type='YOLOV3',
pretrained='open-mmlab://darknet53',
backbone=dict(type='Darknet', depth=53, out_indices=(3, 4, 5)),
neck=dict(
type='YOLOV3Neck',
num_scales=3,
in_channels=[1024, 512, 256],
out_channels=[512, 256, 128]),
bbox_head=dict(
type='YOLOV3Head',
num_classes=2,
in_channels=[512, 256, 128],
out_channels=[1024, 512, 256],
anchor_generator=dict(
type='YOLOAnchorGenerator',
base_sizes=[[(116, 90), (156, 198), (373, 326)],
[(30, 61), (62, 45), (59, 119)],
[(10, 13), (16, 30), (33, 23)]],
strides=[32, 16, 8]),
bbox_coder=dict(type='YOLOBBoxCoder'),
featmap_strides=[32, 16, 8],
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0,
reduction='sum'),
loss_conf=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0,
reduction='sum'),
loss_xy=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=2.0,
reduction='sum'),
loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')))
# training and testing settings
train_cfg = dict(
assigner=dict(
type='GridAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0))
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
conf_thr=0.005,
nms=dict(type='nms', iou_threshold=0.45),
max_per_img=100)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(mean=[154.04, 159.83, 181.41], std=[13.59, 17.70, 25.08], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='PhotoMetricDistortion'),
dict(
type='Expand',
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 2)),
dict(
type='MinIoURandomCrop',
min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
min_crop_size=0.3),
dict(type='Resize', img_scale=[(320, 320), (608, 608)], keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(608, 608),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=3,
train=dict(
type=dataset_type,
ann_file=data_root + 'train/_annotations.coco.json',
img_prefix=data_root + 'train/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'valid/_annotations.coco.json',
img_prefix=data_root + 'valid/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'test/_annotations.coco.json',
img_prefix=data_root + 'test/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=2000, # same as burn-in in darknet