diff --git a/configs/ote/person-detection/person-detection-0200/compression_config.json b/configs/ote/person-detection/person-detection-0200/compression_config.json new file mode 100644 index 00000000000..fa7b87948a2 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0200/compression_config.json @@ -0,0 +1,46 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "input_info": { + "sample_size": [ + 1, + 3, + 256, + 256 + ] + }, + "compression": [], + "log_dir": "." + } + }, + "nncf_quantization": { + "optimizer": { + "type": "SGD", + "lr": 0.00005, + "momentum": 0.9, + "weight_decay": 0.0005 + }, + "runner": { + "max_epochs": 2 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 10 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 30 + } + } + } + ] + } + }, + "order_of_parts": [ + "nncf_quantization" + ] +} \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0200/model.py b/configs/ote/person-detection/person-detection-0200/model.py new file mode 100644 index 00000000000..d403c5b51d5 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0200/model.py @@ -0,0 +1,162 @@ +# model settings +input_size = 256 +image_width, image_height = input_size, input_size +width_mult = 1.0 +model = dict( + type='SingleStageDetector', + backbone=dict( + type='mobilenetv2_w1', + out_indices=(4, 5), + frozen_stages=-1, + norm_eval=False, + pretrained=True + ), + neck=None, + bbox_head=dict( + type='SSDHead', + num_classes=1, + in_channels=(int(width_mult * 96), int(width_mult * 320)), + anchor_generator=dict( + type='SSDAnchorGeneratorClustered', + strides=(16, 32), + widths=[ + [image_width * x for x in + [0.015411783166343854, 0.033018232306549156, 0.04467156688464953, + 0.0610697815328886]], + [image_width * x for x in + [0.0789599954420517, 0.10113984043326349, 0.12805187473050397, 0.16198319380154758, + 0.21636496806213493]], + + ], + heights=[ + [image_height * x for x in + [0.05032631418898226, 0.10070800135152037, 0.15806180366055939, + 0.22343401646383804]], + [image_height * x for x in + [0.300881401352503, 0.393181898580379, 0.4998807213337051, 0.6386035764261081, + 0.8363451552091518]], + + ], + ), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.2, 0.2), ), + depthwise_heads=True, + depthwise_heads_activations='relu', + loss_balancing=True), + # model training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.4, + neg_iou_thr=0.4, + min_pos_iou=0., + ignore_iof_thr=-1, + gt_max_assign_all=False), + smoothl1_beta=1., + use_giou=False, + use_focal=False, + allowed_border=-1, + pos_weight=-1, + neg_pos_ratio=3, + debug=False), + test_cfg=dict( + nms=dict(type='nms', iou_threshold=0.45), + min_bbox_size=0, + score_thr=0.02, + max_per_img=200)) +cudnn_benchmark = True +# dataset settings +dataset_type = 'CocoDataset' +data_root = '../../data/airport/' +img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.1), + dict(type='Resize', img_scale=(input_size, input_size), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(input_size, input_size), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=122, + workers_per_gpu=3, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_train.json', + min_size=20, + img_prefix=data_root + 'train', + pipeline=train_pipeline + ) + ), + val=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline), + test=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1200, + warmup_ratio=1.0 / 3, + step=[8, 15, 18]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=1, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=20) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = 'output' +load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0200-1.pth' +resume_from = None +evaluation = dict(interval=1, metric='mAP', save_best='mAP') +workflow = [('train', 1)] diff --git a/configs/ote/person-detection/person-detection-0200/template_experimental.yaml b/configs/ote/person-detection/person-detection-0200/template_experimental.yaml new file mode 100644 index 00000000000..3cfd0b91e95 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0200/template_experimental.yaml @@ -0,0 +1,56 @@ +# Description. +model_template_id: Person_Detection_0200 +name: person-detection-0200 +task_type: DETECTION +task_family: VISION +instantiation: "CLASS" +summary: Fastest Person Detection model for large and simple objects (MobileNetV2-SSD). +application: + ~ + +# Algo backend. +framework: OTEDetection v2.9.1 + +# Task implementations. +entrypoints: + base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask + openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask + nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask + +# Capabilities. +capabilities: + - compute_representations + +# Hyperparameters. +hyper_parameters: + base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml + parameter_overrides: + learning_parameters: + batch_size: + default_value: 122 + learning_rate: + default_value: 0.05 + learning_rate_warmup_iters: + default_value: 1200 + num_iters: + default_value: 20 + nncf_optimization: + enable_quantization: + default_value: true + enable_pruning: + default_value: false + maximal_accuracy_degradation: + default_value: 0.01 + +# Training resources. +max_nodes: 1 +training_targets: + - GPU + - CPU + +# Stats. +gigaflops: 0.82 #to be checked with model-analyzer +size: 1.83 + +# Obsolete +# gpu_nums: 2 diff --git a/configs/ote/person-detection/person-detection-0201/compression_config.json b/configs/ote/person-detection/person-detection-0201/compression_config.json new file mode 100644 index 00000000000..35a22a6b267 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0201/compression_config.json @@ -0,0 +1,46 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "input_info": { + "sample_size": [ + 1, + 3, + 384, + 384 + ] + }, + "compression": [], + "log_dir": "." + } + }, + "nncf_quantization": { + "optimizer": { + "type": "SGD", + "lr": 0.00005, + "momentum": 0.9, + "weight_decay": 0.0005 + }, + "runner": { + "max_epochs": 2 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 10 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 30 + } + } + } + ] + } + }, + "order_of_parts": [ + "nncf_quantization" + ] +} \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0201/model.py b/configs/ote/person-detection/person-detection-0201/model.py new file mode 100644 index 00000000000..89a151f748d --- /dev/null +++ b/configs/ote/person-detection/person-detection-0201/model.py @@ -0,0 +1,162 @@ +# model settings +input_size = 384 +image_width, image_height = input_size, input_size +width_mult = 1.0 +model = dict( + type='SingleStageDetector', + backbone=dict( + type='mobilenetv2_w1', + out_indices=(4, 5), + frozen_stages=-1, + norm_eval=False, + pretrained=True + ), + neck=None, + bbox_head=dict( + type='SSDHead', + num_classes=1, + in_channels=(int(width_mult * 96), int(width_mult * 320)), + anchor_generator=dict( + type='SSDAnchorGeneratorClustered', + strides=(16, 32), + widths=[ + [image_width * x for x in + [0.015411783166343854, 0.033018232306549156, 0.04467156688464953, + 0.0610697815328886]], + [image_width * x for x in + [0.0789599954420517, 0.10113984043326349, 0.12805187473050397, 0.16198319380154758, + 0.21636496806213493]], + + ], + heights=[ + [image_height * x for x in + [0.05032631418898226, 0.10070800135152037, 0.15806180366055939, + 0.22343401646383804]], + [image_height * x for x in + [0.300881401352503, 0.393181898580379, 0.4998807213337051, 0.6386035764261081, + 0.8363451552091518]], + + ], + ), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.2, 0.2), ), + depthwise_heads=True, + depthwise_heads_activations='relu', + loss_balancing=True), + # model training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.4, + neg_iou_thr=0.4, + min_pos_iou=0., + ignore_iof_thr=-1, + gt_max_assign_all=False), + smoothl1_beta=1., + use_giou=False, + use_focal=False, + allowed_border=-1, + pos_weight=-1, + neg_pos_ratio=3, + debug=False), + test_cfg=dict( + nms=dict(type='nms', iou_threshold=0.45), + min_bbox_size=0, + score_thr=0.02, + max_per_img=200)) +cudnn_benchmark = True +# dataset settings +dataset_type = 'CocoDataset' +data_root = '../../data/airport/' +img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.1), + dict(type='Resize', img_scale=(input_size, input_size), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(input_size, input_size), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=84, + workers_per_gpu=3, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_train.json', + min_size=20, + img_prefix=data_root + 'train', + pipeline=train_pipeline + ) + ), + val=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline), + test=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1200, + warmup_ratio=1.0 / 3, + step=[8, 15, 18]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=1, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=20) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = 'output' +load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0201-1.pth' +resume_from = None +evaluation = dict(interval=1, metric='mAP', save_best='mAP') +workflow = [('train', 1)] \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0201/template_experimental.yaml b/configs/ote/person-detection/person-detection-0201/template_experimental.yaml new file mode 100644 index 00000000000..1283a1b7a13 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0201/template_experimental.yaml @@ -0,0 +1,56 @@ +# Description. +model_template_id: Person_Detection_0201 +name: person-detection-0201 +task_type: DETECTION +task_family: VISION +instantiation: "CLASS" +summary: Fast Person Detection models (MobileNetV2-SSD). +application: + ~ + +# Algo backend. +framework: OTEDetection v2.9.1 + +# Task implementations. +entrypoints: + base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask + openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask + nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask + +# Capabilities. +capabilities: + - compute_representations + +# Hyperparameters. +hyper_parameters: + base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml + parameter_overrides: + learning_parameters: + batch_size: + default_value: 84 + learning_rate: + default_value: 0.05 + learning_rate_warmup_iters: + default_value: 1200 + num_iters: + default_value: 20 + nncf_optimization: + enable_quantization: + default_value: true + enable_pruning: + default_value: false + maximal_accuracy_degradation: + default_value: 0.01 + +# Training resources. +max_nodes: 1 +training_targets: + - GPU + - CPU + +# Stats. +gigaflops: 1.84 #to be checked with model-analyzer +size: 1.83 + +# Obsolete +# gpu_nums: 4 diff --git a/configs/ote/person-detection/person-detection-0202/compression_config.json b/configs/ote/person-detection/person-detection-0202/compression_config.json new file mode 100644 index 00000000000..390711bdeec --- /dev/null +++ b/configs/ote/person-detection/person-detection-0202/compression_config.json @@ -0,0 +1,46 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "input_info": { + "sample_size": [ + 1, + 3, + 512, + 512 + ] + }, + "compression": [], + "log_dir": "." + } + }, + "nncf_quantization": { + "optimizer": { + "type": "SGD", + "lr": 0.00005, + "momentum": 0.9, + "weight_decay": 0.0005 + }, + "runner": { + "max_epochs": 2 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 10 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 30 + } + } + } + ] + } + }, + "order_of_parts": [ + "nncf_quantization" + ] +} \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0202/model.py b/configs/ote/person-detection/person-detection-0202/model.py new file mode 100644 index 00000000000..c171b700c24 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0202/model.py @@ -0,0 +1,162 @@ +# model settings +input_size = 512 +image_width, image_height = input_size, input_size +width_mult = 1.0 +model = dict( + type='SingleStageDetector', + backbone=dict( + type='mobilenetv2_w1', + out_indices=(4, 5), + frozen_stages=-1, + norm_eval=False, + pretrained=True + ), + neck=None, + bbox_head=dict( + type='SSDHead', + num_classes=1, + in_channels=(int(width_mult * 96), int(width_mult * 320)), + anchor_generator=dict( + type='SSDAnchorGeneratorClustered', + strides=(16, 32), + widths=[ + [image_width * x for x in + [0.015411783166343854, 0.033018232306549156, 0.04467156688464953, + 0.0610697815328886]], + [image_width * x for x in + [0.0789599954420517, 0.10113984043326349, 0.12805187473050397, 0.16198319380154758, + 0.21636496806213493]], + + ], + heights=[ + [image_height * x for x in + [0.05032631418898226, 0.10070800135152037, 0.15806180366055939, + 0.22343401646383804]], + [image_height * x for x in + [0.300881401352503, 0.393181898580379, 0.4998807213337051, 0.6386035764261081, + 0.8363451552091518]], + + ], + ), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.2, 0.2), ), + depthwise_heads=True, + depthwise_heads_activations='relu', + loss_balancing=True), + # model training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.4, + neg_iou_thr=0.4, + min_pos_iou=0., + ignore_iof_thr=-1, + gt_max_assign_all=False), + smoothl1_beta=1., + use_giou=False, + use_focal=False, + allowed_border=-1, + pos_weight=-1, + neg_pos_ratio=3, + debug=False), + test_cfg=dict( + nms=dict(type='nms', iou_threshold=0.45), + min_bbox_size=0, + score_thr=0.02, + max_per_img=200)) +cudnn_benchmark = True +# dataset settings +dataset_type = 'CocoDataset' +data_root = '../../data/airport/' +img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.1), + dict(type='Resize', img_scale=(input_size, input_size), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(input_size, input_size), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=30, + workers_per_gpu=3, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_train.json', + min_size=20, + img_prefix=data_root + 'train', + pipeline=train_pipeline + ) + ), + val=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline), + test=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1200, + warmup_ratio=1.0 / 3, + step=[8, 15, 18]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=1, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=20) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = 'output' +load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0202-1.pth' +resume_from = None +evaluation = dict(interval=1, metric='mAP', save_best='mAP') +workflow = [('train', 1)] \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0202/template_experimental.yaml b/configs/ote/person-detection/person-detection-0202/template_experimental.yaml new file mode 100644 index 00000000000..bd149350235 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0202/template_experimental.yaml @@ -0,0 +1,56 @@ +# Description. +model_template_id: Person_Detection_0202 +name: person-detection-0202 +task_type: DETECTION +task_family: VISION +instantiation: "CLASS" +summary: Medium Person Detection model (MobileNetV2-SSD). +application: + ~ + +# Algo backend. +framework: OTEDetection v2.9.1 + +# Task implementations. +entrypoints: + base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask + openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask + nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask + +# Capabilities. +capabilities: + - compute_representations + +# Hyperparameters. +hyper_parameters: + base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml + parameter_overrides: + learning_parameters: + batch_size: + default_value: 30 + learning_rate: + default_value: 0.05 + learning_rate_warmup_iters: + default_value: 1200 + num_iters: + default_value: 20 + nncf_optimization: + enable_quantization: + default_value: true + enable_pruning: + default_value: false + maximal_accuracy_degradation: + default_value: 0.01 + +# Training resources. +max_nodes: 1 +training_targets: + - GPU + - CPU + +# Stats. +gigaflops: 2.52 #to be checked with model-analyzer +size: 1.83 + +# Obsolete +# gpu_nums: 2 diff --git a/configs/ote/person-detection/person-detection-0203/compression_config.json b/configs/ote/person-detection/person-detection-0203/compression_config.json new file mode 100755 index 00000000000..671a8e47ac3 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0203/compression_config.json @@ -0,0 +1,47 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "input_info": { + "sample_size": [ + 1, + 3, + 480, + 864 + ] + }, + "compression": [], + "log_dir": "." + } + }, + "nncf_quantization": { + "optimizer": { + "type": "SGD", + "lr": 0.00005, + "momentum": 0.9, + "weight_decay": 0.0005 + }, + "runner": { + "max_epochs": 1 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 10 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 30 + } + } + } + ] + } + }, + "order_of_parts": [ + "nncf_quantization" + ] +} + diff --git a/configs/ote/person-detection/person-detection-0203/model.py b/configs/ote/person-detection/person-detection-0203/model.py new file mode 100644 index 00000000000..53acb329637 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0203/model.py @@ -0,0 +1,158 @@ +# model settings +model = dict( + type='ATSS', + backbone=dict( + type='mobilenetv2_w1', + out_indices=(2, 3, 4, 5), + frozen_stages=-1, + norm_eval=False, + pretrained=True + ), + neck=dict( + type='FPN', + in_channels=[24, 32, 96, 320], + out_channels=32, + start_level=1, + add_extra_convs=True, + extra_convs_on_inputs=False, + num_outs=5), + bbox_head=dict( + type='ATSSHead', + num_classes=1, + in_channels=32, + stacked_convs=4, + feat_channels=32, anchor_generator=dict( + type='AnchorGenerator', + ratios=[0.5, 1.0, 2.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = '../../data/airport/' +img_norm_cfg = dict( + mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict(type='Expand', ratio_range=(1, 3)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict( + type='Resize', + img_scale=[(864, 480), (864, 640)], + keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(864, 480), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=14, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=1, + dataset=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_train.json', + img_prefix=data_root + 'train', + pipeline=train_pipeline + ) + ), + val=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline), + test=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', + lr=0.025, + momentum=0.9, + weight_decay=0.0001) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='constant', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[10, 15, 18]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=10, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=20) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = 'output' +load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0203.pth' +resume_from = None +evaluation = dict(interval=1, metric='mAP', save_best='mAP') +workflow = [('train', 1)] \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0203/template_experimental.yaml b/configs/ote/person-detection/person-detection-0203/template_experimental.yaml new file mode 100644 index 00000000000..158f89f0877 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0203/template_experimental.yaml @@ -0,0 +1,56 @@ +# Description. +model_template_id: Person_Detection_0203 +name: person-detection-0203 +task_type: DETECTION +task_family: VISION +instantiation: "CLASS" +summary: Medium Person Detection model for small and hard objects (MobileNetV2-ATSS). +application: + ~ + +# Algo backend. +framework: OTEDetection v2.9.1 + +# Task implementations. +entrypoints: + base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask + openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask + nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask + +# Capabilities. +capabilities: + - compute_representations + +# Hyperparameters. +hyper_parameters: + base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml + parameter_overrides: + learning_parameters: + batch_size: + default_value: 14 + learning_rate: + default_value: 0.025 + learning_rate_warmup_iters: + default_value: 500 + num_iters: + default_value: 20 + nncf_optimization: + enable_quantization: + default_value: true + enable_pruning: + default_value: false + maximal_accuracy_degradation: + default_value: 0.01 + +# Training resources. +max_nodes: 1 +training_targets: + - GPU + - CPU + +# Stats. +gigaflops: 6.74 #to be checked with model-analyzer +size: 1.83 + +# Obsolete +# gpu_nums: 2 diff --git a/configs/ote/person-detection/person-detection-0301/accuracy-check.yml b/configs/ote/person-detection/person-detection-0301/accuracy-check.yml new file mode 100644 index 00000000000..fdcfe010d0c --- /dev/null +++ b/configs/ote/person-detection/person-detection-0301/accuracy-check.yml @@ -0,0 +1,24 @@ +models: + - name: person-detection-0301 + + launchers: + - framework: dlsdk + adapter: + + type: class_agnostic_detection + scale: [0.0007440476, 0.00125] + + datasets: + - name: crossroad_extra_untagged_person_hb + preprocessing: + - type: resize + dst_width: 1344 + dst_height: 800 + + postprocessing: + - type: resize_prediction_boxes + - type: cast_to_int + - type: nms + overlap: 0.6 + - type: clip_boxes + apply_to: prediction diff --git a/configs/ote/person-detection/person-detection-0301/compression_config.json b/configs/ote/person-detection/person-detection-0301/compression_config.json new file mode 100644 index 00000000000..496a9c68a03 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0301/compression_config.json @@ -0,0 +1,46 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "input_info": { + "sample_size": [ + 1, + 3, + 800, + 1344 + ] + }, + "compression": [], + "log_dir": "." + } + }, + "nncf_quantization": { + "optimizer": { + "type": "SGD", + "lr": 0.001, + "momentum": 0.9, + "weight_decay": 0.0001 + }, + "runner": { + "max_epochs": 2 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 10 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 30 + } + } + } + ] + } + }, + "order_of_parts": [ + "nncf_quantization" + ] +} \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0301/model.py b/configs/ote/person-detection/person-detection-0301/model.py new file mode 100644 index 00000000000..1f210c975d4 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0301/model.py @@ -0,0 +1,157 @@ +model = dict( + type='VFNet', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='VFNetHead', + num_classes=1, + in_channels=256, + stacked_convs=3, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + center_sampling=False, + dcn_on_last_conv=False, + use_atss=True, + use_vfl=True, + loss_cls=dict( + type='VarifocalLoss', + use_sigmoid=True, + alpha=0.75, + gamma=2.0, + iou_weighted=True, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=1.5), + loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)), + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.01, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +cudnn_benchmark = True + +dataset_type = 'CocoDataset' +data_root = '../../data/airport/' +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1344, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.1), + dict( + type='Resize', + img_scale=[(1344, 480), (1344, 960)], + multiscale_mode='range', + keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_train.json', + min_size=20, + img_prefix=data_root + 'train', + pipeline=train_pipeline + ) + ), + val=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline), + test=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline)) + + +optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +lr_config = dict( + policy='step', + warmup='constant', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[10, 15, 18]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook')]) +# yapf:enable +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=20) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = 'output' +load_from = 'https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/object_detection/v2/person_detection_0301.pth' +resume_from = None +evaluation = dict(interval=1, metric='mAP', save_best='mAP') +workflow = [('train', 1)] diff --git a/configs/ote/person-detection/person-detection-0301/template_experimental.yaml b/configs/ote/person-detection/person-detection-0301/template_experimental.yaml new file mode 100644 index 00000000000..f5ebf8848a0 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0301/template_experimental.yaml @@ -0,0 +1,54 @@ +# Description. +model_template_id: Person_Detection_0301 +name: person-detection-0301 +task_type: DETECTION +task_family: VISION +instantiation: "CLASS" +summary: Accurate person detection model (Resnet50-VFnet). +application: + ~ + +# Algo backend. +framework: OTEDetection v2.9.1 + +# Task implementations. +entrypoints: + base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask + openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask + nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask + +# Capabilities. +capabilities: + - compute_representations + +# Hyperparameters. +hyper_parameters: + base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml + parameter_overrides: + learning_parameters: + batch_size: + default_value: 4 + learning_rate: + default_value: 0.001 + learning_rate_warmup_iters: + default_value: 500 + num_iters: + default_value: 20 + nncf_optimization: + enable_quantization: + default_value: true + enable_pruning: + default_value: false + maximal_accuracy_degradation: + default_value: 0.01 + +# Training resources. +max_nodes: 1 +training_targets: + - GPU + - CPU + +# Stats. +gigaflops: 198 +size: 32.48 + diff --git a/configs/ote/person-detection/person-detection-0302/accuracy-check.yml b/configs/ote/person-detection/person-detection-0302/accuracy-check.yml new file mode 100644 index 00000000000..4578634a011 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0302/accuracy-check.yml @@ -0,0 +1,24 @@ +models: + - name: person-detection-0302 + + launchers: + - framework: dlsdk + adapter: + + type: class_agnostic_detection + scale: [0.00078125, 0.0013888888] + + datasets: + - name: crossroad_extra_untagged_person_hb + preprocessing: + - type: resize + dst_width: 1280 + dst_height: 720 + + postprocessing: + - type: resize_prediction_boxes + - type: cast_to_int + - type: nms + overlap: 0.6 + - type: clip_boxes + apply_to: prediction diff --git a/configs/ote/person-detection/person-detection-0302/compression_config.json b/configs/ote/person-detection/person-detection-0302/compression_config.json new file mode 100644 index 00000000000..990eca03574 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0302/compression_config.json @@ -0,0 +1,46 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "input_info": { + "sample_size": [ + 1, + 3, + 720, + 1280 + ] + }, + "compression": [], + "log_dir": "." + } + }, + "nncf_quantization": { + "optimizer": { + "type": "SGD", + "lr": 0.001, + "momentum": 0.9, + "weight_decay": 0.0001 + }, + "runner": { + "max_epochs": 2 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 10 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 30 + } + } + } + ] + } + }, + "order_of_parts": [ + "nncf_quantization" + ] +} \ No newline at end of file diff --git a/configs/ote/person-detection/person-detection-0302/model.py b/configs/ote/person-detection/person-detection-0302/model.py new file mode 100644 index 00000000000..d5b555da72e --- /dev/null +++ b/configs/ote/person-detection/person-detection-0302/model.py @@ -0,0 +1,158 @@ +model = dict( + type='ATSS', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='ATSSHead', + num_classes=1, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + # score_thr=0.05, + score_thr=0.2, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +dataset_type = 'CocoDataset' +data_root = '../../data/airport/' +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1280, 720), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict( + type='Normalize', + mean=[0, 0, 0], + std=[255, 255, 255], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict( + type='Resize', + img_scale=[(1280, 720), (896, 720), (1088, 720), + (1280, 672), (1280, 800)], + multiscale_mode='value', + keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='Normalize', + mean=[0, 0, 0], + std=[255, 255, 255], + to_rgb=True), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_train.json', + min_size=20, + img_prefix=data_root + 'train', + pipeline=train_pipeline + ) + ), + val=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline), + test=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline)) + +optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict() +lr_config = dict( + policy='ReduceLROnPlateau', + metric='bbox_mAP', + patience=5, + iteration_patience=600, + interval=1, + min_lr=9e-06, + warmup='linear', + warmup_iters=200, + warmup_ratio=0.3333333333333333) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=100, + hooks=[dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook')]) +# yapf:enable +runner = dict(type='EpochRunnerWithCancel', max_epochs=20) +evaluation = dict(interval=1, metric='mAP', save_best='mAP') +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = 'https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/object_detection/v2/person_detection_0302.pth' +resume_from = None +workflow = [('train', 1)] diff --git a/configs/ote/person-detection/person-detection-0302/template_experimental.yaml b/configs/ote/person-detection/person-detection-0302/template_experimental.yaml new file mode 100644 index 00000000000..1a83e5c5367 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0302/template_experimental.yaml @@ -0,0 +1,56 @@ +# Description. +model_template_id: Person_Detection_0302 +name: person-detection-0302 +task_type: DETECTION +task_family: VISION +instantiation: "CLASS" +summary: Moderate Person Detection model (Resnet50-ATSS). +application: + ~ + +# Algo backend. +framework: OTEDetection v2.9.1 + +# Task implementations. +entrypoints: + base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask + openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask + nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask + +# Capabilities. +capabilities: + - compute_representations + +# Hyperparameters. +hyper_parameters: + base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml + parameter_overrides: + learning_parameters: + batch_size: + default_value: 4 + learning_rate: + default_value: 0.001 + learning_rate_warmup_iters: + default_value: 200 + num_iters: + default_value: 20 + nncf_optimization: + enable_quantization: + default_value: true + enable_pruning: + default_value: false + maximal_accuracy_degradation: + default_value: 0.01 + +# Training resources. +max_nodes: 1 +training_targets: + - GPU + - CPU + +# Stats. +gigaflops: 181.28 +size: 31.89 + +# Obsolete +# gpu_nums: 2 diff --git a/configs/ote/person-detection/person-detection-0303/accuracy-check.yml b/configs/ote/person-detection/person-detection-0303/accuracy-check.yml new file mode 100644 index 00000000000..aa1022a606f --- /dev/null +++ b/configs/ote/person-detection/person-detection-0303/accuracy-check.yml @@ -0,0 +1,24 @@ +models: + - name: person-detection-0303 + + launchers: + - framework: dlsdk + adapter: + + type: class_agnostic_detection + scale: [0.00078125, 0.0013888888] + + datasets: + - name: crossroad_extra_untagged_person_hb + preprocessing: + - type: resize + dst_width: 1280 + dst_height: 720 + + postprocessing: + - type: resize_prediction_boxes + - type: cast_to_int + - type: nms + overlap: 0.6 + - type: clip_boxes + apply_to: prediction diff --git a/configs/ote/person-detection/person-detection-0303/compression_config.json b/configs/ote/person-detection/person-detection-0303/compression_config.json new file mode 100755 index 00000000000..5e6d697de2b --- /dev/null +++ b/configs/ote/person-detection/person-detection-0303/compression_config.json @@ -0,0 +1,47 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "input_info": { + "sample_size": [ + 1, + 3, + 720, + 1280 + ] + }, + "compression": [], + "log_dir": "." + } + }, + "nncf_quantization": { + "optimizer": { + "type": "SGD", + "lr": 0.003, + "momentum": 0.9, + "weight_decay": 0.0001 + }, + "runner": { + "max_epochs": 2 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 10 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 30 + } + } + } + ] + } + }, + "order_of_parts": [ + "nncf_quantization" + ] +} + diff --git a/configs/ote/person-detection/person-detection-0303/model.py b/configs/ote/person-detection/person-detection-0303/model.py new file mode 100644 index 00000000000..5ee37bc4e7c --- /dev/null +++ b/configs/ote/person-detection/person-detection-0303/model.py @@ -0,0 +1,156 @@ +model = dict( + type='ATSS', + backbone=dict( + type='mobilenetv2_w1', + out_indices=(2, 3, 4, 5), + frozen_stages=-1, + norm_eval=False, + pretrained=False, + ), + neck=dict( + type='FPN', + in_channels=[24, 32, 96, 320], + out_channels=64, + start_level=1, + add_extra_convs=True, + extra_convs_on_inputs=False, + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='ATSSHead', + num_classes=1, + in_channels=64, + stacked_convs=4, + feat_channels=64, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +dataset_type = 'CocoDataset' +data_root = '../../data/airport/' +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1280, 720), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict( + type='Normalize', + mean=[0, 0, 0], + std=[255, 255, 255], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict( + type='Resize', + img_scale=[(1280, 720), (896, 720), (1088, 720), + (1280, 672), (1280, 800)], + multiscale_mode='value', + keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='Normalize', + mean=[0, 0, 0], + std=[255, 255, 255], + to_rgb=True), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] + +data = dict( + samples_per_gpu=9, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_train.json', + min_size=20, + img_prefix=data_root + 'train', + pipeline=train_pipeline + ) + ), + val=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline), + test=dict( + type=dataset_type, + labels=('person',), + ann_file=data_root + 'annotation_person_val.json', + img_prefix=data_root + 'val', + test_mode=True, + pipeline=test_pipeline)) + +optimizer = dict(type='SGD', lr=0.003, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict() +lr_config = dict( + policy='ReduceLROnPlateau', + metric='bbox_mAP', + patience=5, + iteration_patience=600, + interval=1, + min_lr=9e-06, + warmup='linear', + warmup_iters=200, + warmup_ratio=0.3333333333333333) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=100, + hooks=[dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook')]) +# yapf:enable +runner = dict(type='EpochRunnerWithCancel', max_epochs=10) +evaluation = dict(interval=1, metric='mAP', save_best='mAP') +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = 'output' +load_from = 'https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/object_detection/v2/person_detection_0303.pth' +resume_from = None +workflow = [('train', 1)] diff --git a/configs/ote/person-detection/person-detection-0303/template_experimental.yaml b/configs/ote/person-detection/person-detection-0303/template_experimental.yaml new file mode 100644 index 00000000000..7eea3088b80 --- /dev/null +++ b/configs/ote/person-detection/person-detection-0303/template_experimental.yaml @@ -0,0 +1,56 @@ +# Description. +model_template_id: Person_Detection_0303 +name: person-detection-0303 +task_type: DETECTION +task_family: VISION +instantiation: "CLASS" +summary: Fast Person Detection model (MobileNetV2-ATSS). +application: + ~ + +# Algo backend. +framework: OTEDetection v2.9.1 + +# Task implementations. +entrypoints: + base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask + openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask + nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask + +# Capabilities. +capabilities: + - compute_representations + +# Hyperparameters. +hyper_parameters: + base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml + parameter_overrides: + learning_parameters: + batch_size: + default_value: 9 + learning_rate: + default_value: 0.003 + learning_rate_warmup_iters: + default_value: 600 + num_iters: + default_value: 10 + nncf_optimization: + enable_quantization: + default_value: true + enable_pruning: + default_value: false + maximal_accuracy_degradation: + default_value: 0.01 + +# Training resources. +max_nodes: 1 +training_targets: + - GPU + - CPU + +# Stats. +gigaflops: 12.31 #to be checked with model-analyzer +size: 2.33 + +# Obsolete +# gpu_nums: 4 diff --git a/configs/ote/person-detection/readme.md b/configs/ote/person-detection/readme.md new file mode 100644 index 00000000000..30a9e32974a --- /dev/null +++ b/configs/ote/person-detection/readme.md @@ -0,0 +1,11 @@ +# Person Detection + +| Model Name | Complexity (GFLOPs) | Size (Mp) | AP @ [IoU=0.50:0.95] (%) | Links | GPU_NUM | +| --- | --- | --- | --- | --- | --- | +| person-detection-0200 | 0.82 | 1.83 | 24.4 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0200-1.pth), [config](./person-detection-0200/template.yaml) | 2 | +| person-detection-0201 | 1.84 | 1.83 | 29.9 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0201-1.pth), [config](./person-detection-0201/template.yaml) | 4 | +| person-detection-0202 | 3.28 | 1.83 | 32.8 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0202-1.pth), [config](./person-detection-0202/template.yaml) | 2 | +| person-detection-0203 | 6.74 | 1.95 | 40.8 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0203.pth), [config](./person-detection-0203/template.yaml) | 2 | +| person-detection-0301 | 198 | 32.48 | | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0301-1.pth), [config](./person-detection-0301/template.yaml) | 4 | +| person-detection-0302 | 181.28 | 31.89 | | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0302-1.pth), [config](./person-detection-0302/template.yaml) | 2 | +| person-detection-0303 | 12.31 | 2.33 | | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0303.pth), [config](./person-detection-0303/template.yaml) | 2 | diff --git a/tests/test_ote_api.py b/tests/test_ote_api.py new file mode 100644 index 00000000000..42e4b5cc51e --- /dev/null +++ b/tests/test_ote_api.py @@ -0,0 +1,590 @@ +# Copyright (C) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + +import glob +import io +import os.path as osp +import random +import time +import unittest +import warnings +from concurrent.futures import ThreadPoolExecutor +from subprocess import run # nosec +from typing import Optional + +import numpy as np +import torch +from bson import ObjectId +from ote_sdk.test_suite.e2e_test_system import e2e_pytest_api +from ote_sdk.configuration.helper import convert, create +from ote_sdk.entities.annotation import AnnotationSceneEntity, AnnotationSceneKind +from ote_sdk.entities.dataset_item import DatasetItemEntity +from ote_sdk.entities.datasets import DatasetEntity +from ote_sdk.entities.image import Image +from ote_sdk.entities.inference_parameters import InferenceParameters +from ote_sdk.entities.model_template import TaskType, task_type_to_label_domain +from ote_sdk.entities.metrics import Performance +from ote_sdk.entities.model import ModelEntity, ModelFormat, ModelOptimizationType +from ote_sdk.entities.model_template import parse_model_template +from ote_sdk.entities.optimization_parameters import OptimizationParameters +from ote_sdk.entities.resultset import ResultSetEntity +from ote_sdk.entities.subset import Subset +from ote_sdk.entities.task_environment import TaskEnvironment +from ote_sdk.entities.train_parameters import TrainParameters +from ote_sdk.tests.test_helpers import generate_random_annotated_image +from ote_sdk.usecases.tasks.interfaces.export_interface import ExportType, IExportTask +from ote_sdk.usecases.tasks.interfaces.optimization_interface import OptimizationType +from ote_sdk.utils.shape_factory import ShapeFactory + +from mmdet.apis.ote.apis.detection import (OpenVINODetectionTask, OTEDetectionConfig, + OTEDetectionInferenceTask, + OTEDetectionNNCFTask, OTEDetectionTrainingTask) +from mmdet.apis.ote.apis.detection.ote_utils import generate_label_schema +from mmdet.integration.nncf.utils import is_nncf_enabled + +DEFAULT_TEMPLATE_DIR = osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_ATSS') + +class ModelTemplate(unittest.TestCase): + def check_capabilities(self, template): + self.assertTrue(template.computes_representations()) + self.assertFalse(template.computes_uncertainty_score()) + self.assertEqual(len(template.capabilities), 1) + + @e2e_pytest_api + def test_reading_gen3_ssd(self): + template = parse_model_template(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_SSD', 'template.yaml')) + self.check_capabilities(template) + + @e2e_pytest_api + def test_reading_gen3_atss(self): + template = parse_model_template(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_ATSS', 'template.yaml')) + self.check_capabilities(template) + + @e2e_pytest_api + def test_reading_gen3_vfnet(self): + template = parse_model_template(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_resnet50_VFNet', 'template_experimental.yaml')) + self.check_capabilities(template) + + @e2e_pytest_api + def test_reading_yolox(self): + template = parse_model_template( + osp.join('configs', 'ote', 'custom-object-detection', 'cspdarknet_YOLOX', + 'template.yaml')) + self.check_capabilities(template) + + +@e2e_pytest_api +def test_configuration_yaml(): + configuration = OTEDetectionConfig() + configuration_yaml_str = convert(configuration, str) + configuration_yaml_converted = create(configuration_yaml_str) + configuration_yaml_loaded = create(osp.join('mmdet', 'apis', 'ote', 'apis', 'detection', 'configuration.yaml')) + assert configuration_yaml_converted == configuration_yaml_loaded + + +class Sample(unittest.TestCase): + template = osp.join(DEFAULT_TEMPLATE_DIR, 'template.yaml') + + @e2e_pytest_api + def test_sample_on_cpu(self): + output = run('export CUDA_VISIBLE_DEVICES=;' + 'python mmdet/apis/ote/sample/sample.py ' + f'--export {self.template}', + shell=True, check=True) + assert output.returncode == 0 + + @e2e_pytest_api + def test_sample_on_gpu(self): + output = run('python mmdet/apis/ote/sample/sample.py ' + f'--export {self.template}', + shell=True, check=True) + assert output.returncode == 0 + + +class API(unittest.TestCase): + """ + Collection of tests for OTE API and OTE Model Templates + """ + + def init_environment( + self, + params, + model_template, + number_of_images=500, + task_type=TaskType.DETECTION): + + labels_names = ('rectangle', 'ellipse', 'triangle') + labels_schema = generate_label_schema(labels_names, task_type_to_label_domain(task_type)) + labels_list = labels_schema.get_labels(False) + environment = TaskEnvironment(model=None, hyper_parameters=params, label_schema=labels_schema, + model_template=model_template) + + warnings.filterwarnings('ignore', message='.* coordinates .* are out of bounds.*') + items = [] + for i in range(0, number_of_images): + image_numpy, annos = generate_random_annotated_image( + image_width=640, + image_height=480, + labels=labels_list, + max_shapes=20, + min_size=50, + max_size=100, + random_seed=None) + # Convert shapes according to task + for anno in annos: + if task_type == TaskType.INSTANCE_SEGMENTATION: + anno.shape = ShapeFactory.shape_as_polygon(anno.shape) + else: + anno.shape = ShapeFactory.shape_as_rectangle(anno.shape) + + image = Image(data=image_numpy) + annotation_scene = AnnotationSceneEntity( + kind=AnnotationSceneKind.ANNOTATION, + annotations=annos) + items.append(DatasetItemEntity(media=image, annotation_scene=annotation_scene)) + warnings.resetwarnings() + + rng = random.Random() + rng.shuffle(items) + for i, _ in enumerate(items): + subset_region = i / number_of_images + if subset_region >= 0.8: + subset = Subset.TESTING + elif subset_region >= 0.6: + subset = Subset.VALIDATION + else: + subset = Subset.TRAINING + items[i].subset = subset + + dataset = DatasetEntity(items) + return environment, dataset + + def setup_configurable_parameters(self, template_dir, num_iters=10): + glb = glob.glob(f'{template_dir}/template*.yaml') + template_path = glb[0] if glb else None + if not template_path: + raise RuntimeError(f"Template YAML not found: {template_dir}") + + model_template = parse_model_template(template_path) + hyper_parameters = create(model_template.hyper_parameters.data) + hyper_parameters.learning_parameters.num_iters = num_iters + hyper_parameters.postprocessing.result_based_confidence_threshold = False + hyper_parameters.postprocessing.confidence_threshold = 0.1 + return hyper_parameters, model_template + + @e2e_pytest_api + def test_cancel_training_detection(self): + """ + Tests starting and cancelling training. + + Flow of the test: + - Creates a randomly annotated project with a small dataset containing 3 classes: + ['rectangle', 'triangle', 'circle']. + - Start training and give cancel training signal after 10 seconds. Assert that training + stops within 35 seconds after that + - Start training and give cancel signal immediately. Assert that training stops within 25 seconds. + + This test should be finished in under one minute on a workstation. + """ + hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=500) + detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 64) + + detection_task = OTEDetectionTrainingTask(task_environment=detection_environment) + + executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix='train_thread') + + output_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + ) + + training_progress_curve = [] + def progress_callback(progress: float, score: Optional[float] = None): + training_progress_curve.append(progress) + + train_parameters = TrainParameters + train_parameters.update_progress = progress_callback + + # Test stopping after some time + start_time = time.time() + train_future = executor.submit(detection_task.train, dataset, output_model, train_parameters) + # give train_thread some time to initialize the model + while not detection_task._is_training: + time.sleep(10) + detection_task.cancel_training() + + # stopping process has to happen in less than 35 seconds + train_future.result() + self.assertEqual(training_progress_curve[-1], 100) + self.assertLess(time.time() - start_time, 100, 'Expected to stop within 100 seconds.') + + # Test stopping immediately (as soon as training is started). + start_time = time.time() + train_future = executor.submit(detection_task.train, dataset, output_model) + while not detection_task._is_training: + time.sleep(0.1) + detection_task.cancel_training() + + train_future.result() + self.assertLess(time.time() - start_time, 25) # stopping process has to happen in less than 25 seconds + + @e2e_pytest_api + def test_training_progress_tracking(self): + hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=5) + detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50) + + task = OTEDetectionTrainingTask(task_environment=detection_environment) + self.addCleanup(task._delete_scratch_space) + + print('Task initialized, model training starts.') + training_progress_curve = [] + + def progress_callback(progress: float, score: Optional[float] = None): + training_progress_curve.append(progress) + + train_parameters = TrainParameters + train_parameters.update_progress = progress_callback + output_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + ) + task.train(dataset, output_model, train_parameters) + + self.assertGreater(len(training_progress_curve), 0) + training_progress_curve = np.asarray(training_progress_curve) + self.assertTrue(np.all(training_progress_curve[1:] >= training_progress_curve[:-1])) + + @e2e_pytest_api + def test_nncf_optimize_progress_tracking(self): + if not is_nncf_enabled(): + self.skipTest("Required NNCF module.") + + # Prepare pretrained weights + hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=2) + detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50) + + task = OTEDetectionTrainingTask(task_environment=detection_environment) + self.addCleanup(task._delete_scratch_space) + + original_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + ) + task.train(dataset, original_model, TrainParameters) + + # Create NNCFTask + detection_environment.model = original_model + nncf_task = OTEDetectionNNCFTask(task_environment=detection_environment) + self.addCleanup(nncf_task._delete_scratch_space) + + # Rewrite some parameters to spend less time + nncf_task._config["runner"]["max_epochs"] = 10 + nncf_init_cfg = nncf_task._config["nncf_config"]["compression"][0]["initializer"] + nncf_init_cfg["range"]["num_init_samples"] = 1 + nncf_init_cfg["batchnorm_adaptation"]["num_bn_adaptation_samples"] = 1 + + print('Task initialized, model optimization starts.') + training_progress_curve = [] + + def progress_callback(progress: int): + assert isinstance(progress, int) + training_progress_curve.append(progress) + + optimization_parameters = OptimizationParameters + optimization_parameters.update_progress = progress_callback + nncf_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + ) + + nncf_task.optimize(OptimizationType.NNCF, dataset, nncf_model, optimization_parameters) + + self.assertGreater(len(training_progress_curve), 0) + training_progress_curve = np.asarray(training_progress_curve) + self.assertTrue(np.all(training_progress_curve[1:] >= training_progress_curve[:-1])) + + @e2e_pytest_api + def test_inference_progress_tracking(self): + hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=10) + detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50) + + task = OTEDetectionTrainingTask(task_environment=detection_environment) + self.addCleanup(task._delete_scratch_space) + + print('Task initialized, model inference starts.') + inference_progress_curve = [] + + def progress_callback(progress: int): + assert isinstance(progress, int) + inference_progress_curve.append(progress) + + inference_parameters = InferenceParameters + inference_parameters.update_progress = progress_callback + + task.infer(dataset.with_empty_annotations(), inference_parameters) + + self.assertGreater(len(inference_progress_curve), 0) + inference_progress_curve = np.asarray(inference_progress_curve) + self.assertTrue(np.all(inference_progress_curve[1:] >= inference_progress_curve[:-1])) + + @e2e_pytest_api + def test_inference_task(self): + # Prepare pretrained weights + hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=2) + detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50) + val_dataset = dataset.get_subset(Subset.VALIDATION) + + train_task = OTEDetectionTrainingTask(task_environment=detection_environment) + self.addCleanup(train_task._delete_scratch_space) + + trained_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + ) + train_task.train(dataset, trained_model, TrainParameters) + performance_after_train = self.eval(train_task, trained_model, val_dataset) + + # Create InferenceTask + detection_environment.model = trained_model + inference_task = OTEDetectionInferenceTask(task_environment=detection_environment) + self.addCleanup(inference_task._delete_scratch_space) + + performance_after_load = self.eval(inference_task, trained_model, val_dataset) + + assert performance_after_train == performance_after_load + + # Export + exported_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + _id=ObjectId()) + inference_task.export(ExportType.OPENVINO, exported_model) + + @staticmethod + def eval(task: OTEDetectionTrainingTask, model: ModelEntity, dataset: DatasetEntity) -> Performance: + start_time = time.time() + result_dataset = task.infer(dataset.with_empty_annotations()) + end_time = time.time() + print(f'{len(dataset)} analysed in {end_time - start_time} seconds') + result_set = ResultSetEntity( + model=model, + ground_truth_dataset=dataset, + prediction_dataset=result_dataset + ) + task.evaluate(result_set) + assert result_set.performance is not None + return result_set.performance + + def check_threshold(self, reference, value, delta_tolerance, message=''): + delta = value.score.value - reference.score.value + self.assertLessEqual( + np.abs(delta), + delta_tolerance, + msg=message + + f' (reference metric: {reference.score.value}, ' + f'actual value: {value.score.value}, ' + f'delta tolerance threshold: {delta_tolerance})' + ) + + def end_to_end( + self, + template_dir, + num_iters=5, + quality_score_threshold=0.5, + reload_perf_delta_tolerance=0.0, + export_perf_delta_tolerance=0.0005, + pot_perf_delta_tolerance=0.1, + nncf_perf_delta_tolerance=0.1, + task_type=TaskType.DETECTION): + + hyper_parameters, model_template = self.setup_configurable_parameters( + template_dir, num_iters=num_iters) + detection_environment, dataset = self.init_environment( + hyper_parameters, model_template, 250, task_type=task_type) + + val_dataset = dataset.get_subset(Subset.VALIDATION) + task = OTEDetectionTrainingTask(task_environment=detection_environment) + self.addCleanup(task._delete_scratch_space) + + print('Task initialized, model training starts.') + # Train the task. + # train_task checks that the task returns an Model and that + # validation f-measure is higher than the threshold, which is a pretty low bar + # considering that the dataset is so easy + output_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + _id=ObjectId()) + task.train(dataset, output_model) + + # Test that output model is valid. + modelinfo = torch.load(io.BytesIO(output_model.get_data("weights.pth"))) + modelinfo.pop('anchors', None) + self.assertEqual(list(modelinfo.keys()), ['model', 'config', 'confidence_threshold', 'VERSION']) + + # Run inference. + validation_performance = self.eval(task, output_model, val_dataset) + print(f'Performance: {validation_performance.score.value:.4f}') + self.assertGreater(validation_performance.score.value, quality_score_threshold, + f'Expected F-measure to be higher than {quality_score_threshold}') + + # Run another training round. + first_model = output_model + new_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + _id=ObjectId()) + task._hyperparams.learning_parameters.num_iters = 1 + task.train(dataset, new_model) + self.assertNotEqual(first_model, new_model) + self.assertNotEqual(first_model.get_data("weights.pth"), new_model.get_data("weights.pth")) + + # Reload task with the first model. + detection_environment.model = first_model + task = OTEDetectionTrainingTask(detection_environment) + self.assertEqual(task._task_environment.model.id, first_model.id) + + print('Reevaluating model.') + # Performance should be the same after reloading + performance_after_reloading = self.eval(task, output_model, val_dataset) + print(f'Performance after reloading: {performance_after_reloading.score.value:.4f}') + self.check_threshold(validation_performance, performance_after_reloading, reload_perf_delta_tolerance, + 'Too big performance difference after model reload.') + + if isinstance(task, IExportTask): + # Run export. + exported_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + _id=ObjectId()) + task.export(ExportType.OPENVINO, exported_model) + self.assertEqual(exported_model.model_format, ModelFormat.OPENVINO) + self.assertEqual(exported_model.optimization_type, ModelOptimizationType.MO) + + # Create OpenVINO Task and evaluate the model. + detection_environment.model = exported_model + ov_task = OpenVINODetectionTask(detection_environment) + predicted_validation_dataset = ov_task.infer(val_dataset.with_empty_annotations()) + resultset = ResultSetEntity( + model=output_model, + ground_truth_dataset=val_dataset, + prediction_dataset=predicted_validation_dataset, + ) + ov_task.evaluate(resultset) + export_performance = resultset.performance + assert export_performance is not None + print(f'Performance of exported model: {export_performance.score.value:.4f}') + self.check_threshold(validation_performance, export_performance, export_perf_delta_tolerance, + 'Too big performance difference after OpenVINO export.') + + # Run POT optimization and evaluate the result. + print('Run POT optimization.') + optimized_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + ) + ov_task.optimize(OptimizationType.POT, dataset, optimized_model, OptimizationParameters()) + pot_performance = self.eval(ov_task, optimized_model, val_dataset) + print(f'Performance of optimized model: {pot_performance.score.value:.4f}') + self.check_threshold(validation_performance, pot_performance, pot_perf_delta_tolerance, + 'Too big performance difference after POT optimization.') + + if model_template.entrypoints.nncf: + if is_nncf_enabled(): + print('Run NNCF optimization.') + nncf_model = ModelEntity( + dataset, + detection_environment.get_model_configuration(), + ) + nncf_model.set_data('weights.pth', output_model.get_data("weights.pth")) + + detection_environment.model = nncf_model + + nncf_task = OTEDetectionNNCFTask(task_environment=detection_environment) + + nncf_task.optimize(OptimizationType.NNCF, dataset, nncf_model, OptimizationParameters()) + nncf_task.save_model(nncf_model) + nncf_performance = self.eval(nncf_task, nncf_model, val_dataset) + + print(f'Performance of NNCF model: {nncf_performance.score.value:.4f}') + self.check_threshold(validation_performance, nncf_performance, nncf_perf_delta_tolerance, + 'Too big performance difference after NNCF optimization.') + else: + print('Skipped test of OTEDetectionNNCFTask. Required NNCF module.') + + @e2e_pytest_api + def test_training_gen3_ssd(self): + self.end_to_end(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_SSD')) + + @e2e_pytest_api + def test_training_gen3_atss(self): + self.end_to_end(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_ATSS')) + + @e2e_pytest_api + def test_training_gen3_vfnet(self): + self.end_to_end(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_resnet50_VFNet'), + export_perf_delta_tolerance=0.01) + + @e2e_pytest_api + def test_training_yolox(self): + self.end_to_end( + osp.join('configs', 'ote', 'custom-object-detection', 'cspdarknet_YOLOX')) + + @e2e_pytest_api + def test_training_pers_det_0200(self): + self.end_to_end( + osp.join('configs', 'ote', 'person-detection', 'person-detection-0200')) + + @e2e_pytest_api + def test_training_pers_det_0201(self): + self.end_to_end( + osp.join('configs', 'ote', 'person-detection', 'person-detection-0201')) + + @e2e_pytest_api + def test_training_pers_det_0202(self): + self.end_to_end( + osp.join('configs', 'ote', 'person-detection', 'person-detection-0202')) + + @e2e_pytest_api + def test_training_pers_det_0203(self): + self.end_to_end( + osp.join('configs', 'ote', 'person-detection', 'person-detection-0203')) + + @e2e_pytest_api + def test_training_pers_det_0301(self): + self.end_to_end( + osp.join('configs', 'ote', 'person-detection', 'person-detection-0301')) + + @e2e_pytest_api + def test_training_pers_det_0302(self): + self.end_to_end( + osp.join('configs', 'ote', 'person-detection', 'person-detection-0302')) + + @e2e_pytest_api + def test_training_pers_det_0303(self): + self.end_to_end( + osp.join('configs', 'ote', 'person-detection', 'person-detection-0303')) + + + @e2e_pytest_api + def test_training_maskrcnn_resnet50(self): + self.end_to_end(osp.join('configs', 'ote', + 'custom-counting-instance-seg', 'resnet50_maskrcnn'), + task_type=TaskType.INSTANCE_SEGMENTATION) + + @e2e_pytest_api + def test_training_maskrcnn_efficientnetb2b(self): + self.end_to_end(osp.join('configs', 'ote', + 'custom-counting-instance-seg', 'efficientnetb2b_maskrcnn'), + task_type=TaskType.INSTANCE_SEGMENTATION)