diff --git a/configs/ote/person-detection/person-detection-0200/compression_config.json b/configs/ote/person-detection/person-detection-0200/compression_config.json
new file mode 100644
index 00000000000..fa7b87948a2
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0200/compression_config.json
@@ -0,0 +1,46 @@
+{
+    "base": {
+        "find_unused_parameters": true,
+        "nncf_config": {
+            "input_info": {
+                "sample_size": [
+                    1,
+                    3,
+                    256,
+                    256
+                ]
+            },
+            "compression": [],
+            "log_dir": "."
+        }
+    },
+    "nncf_quantization": {
+        "optimizer": {
+            "type": "SGD",
+            "lr": 0.00005,
+            "momentum": 0.9,
+            "weight_decay": 0.0005
+        },
+        "runner": {
+            "max_epochs": 2
+        },
+        "nncf_config": {
+            "compression": [
+                {
+                    "algorithm": "quantization",
+                    "initializer": {
+                        "range": {
+                            "num_init_samples": 10
+                        },
+                        "batchnorm_adaptation": {
+                            "num_bn_adaptation_samples": 30
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "order_of_parts": [
+        "nncf_quantization"
+    ]
+}
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0200/model.py b/configs/ote/person-detection/person-detection-0200/model.py
new file mode 100644
index 00000000000..d403c5b51d5
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0200/model.py
@@ -0,0 +1,162 @@
+# model settings
+input_size = 256
+image_width, image_height = input_size, input_size
+width_mult = 1.0
+model = dict(
+    type='SingleStageDetector',
+    backbone=dict(
+        type='mobilenetv2_w1',
+        out_indices=(4, 5),
+        frozen_stages=-1,
+        norm_eval=False,
+        pretrained=True
+    ),
+    neck=None,
+    bbox_head=dict(
+        type='SSDHead',
+        num_classes=1,
+        in_channels=(int(width_mult * 96), int(width_mult * 320)),
+        anchor_generator=dict(
+            type='SSDAnchorGeneratorClustered',
+            strides=(16, 32),
+            widths=[
+                [image_width * x for x in
+                 [0.015411783166343854, 0.033018232306549156, 0.04467156688464953,
+                  0.0610697815328886]],
+                [image_width * x for x in
+                 [0.0789599954420517, 0.10113984043326349, 0.12805187473050397, 0.16198319380154758,
+                  0.21636496806213493]],
+
+            ],
+            heights=[
+                [image_height * x for x in
+                 [0.05032631418898226, 0.10070800135152037, 0.15806180366055939,
+                  0.22343401646383804]],
+                [image_height * x for x in
+                 [0.300881401352503, 0.393181898580379, 0.4998807213337051, 0.6386035764261081,
+                  0.8363451552091518]],
+
+            ],
+        ),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=(.0, .0, .0, .0),
+            target_stds=(0.1, 0.1, 0.2, 0.2), ),
+        depthwise_heads=True,
+        depthwise_heads_activations='relu',
+        loss_balancing=True),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.4,
+            neg_iou_thr=0.4,
+            min_pos_iou=0.,
+            ignore_iof_thr=-1,
+            gt_max_assign_all=False),
+        smoothl1_beta=1.,
+        use_giou=False,
+        use_focal=False,
+        allowed_border=-1,
+        pos_weight=-1,
+        neg_pos_ratio=3,
+        debug=False),
+    test_cfg=dict(
+        nms=dict(type='nms', iou_threshold=0.45),
+        min_bbox_size=0,
+        score_thr=0.02,
+        max_per_img=200))
+cudnn_benchmark = True
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = '../../data/airport/'
+img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', to_float32=True),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.1),
+    dict(type='Resize', img_scale=(input_size, input_size), keep_ratio=False),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(input_size, input_size),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=False),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=122,
+    workers_per_gpu=3,
+    train=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            labels=('person',),
+            ann_file=data_root + 'annotation_person_train.json',
+            min_size=20,
+            img_prefix=data_root + 'train',
+            pipeline=train_pipeline
+        )
+    ),
+    val=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1200,
+    warmup_ratio=1.0 / 3,
+    step=[8, 15, 18])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=1,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=20)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = 'output'
+load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0200-1.pth'
+resume_from = None
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+workflow = [('train', 1)]
diff --git a/configs/ote/person-detection/person-detection-0200/template_experimental.yaml b/configs/ote/person-detection/person-detection-0200/template_experimental.yaml
new file mode 100644
index 00000000000..3cfd0b91e95
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0200/template_experimental.yaml
@@ -0,0 +1,56 @@
+# Description.
+model_template_id: Person_Detection_0200
+name: person-detection-0200
+task_type: DETECTION
+task_family: VISION
+instantiation: "CLASS"
+summary: Fastest Person Detection model for large and simple objects (MobileNetV2-SSD).
+application:
+  ~
+
+# Algo backend.
+framework: OTEDetection v2.9.1
+
+# Task implementations.
+entrypoints:
+  base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask
+  openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask
+  nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask
+
+# Capabilities.
+capabilities:
+  - compute_representations
+
+# Hyperparameters.
+hyper_parameters:
+  base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml
+  parameter_overrides:
+    learning_parameters:
+      batch_size:
+        default_value: 122
+      learning_rate:
+        default_value: 0.05
+      learning_rate_warmup_iters:
+        default_value: 1200
+      num_iters:
+        default_value: 20
+    nncf_optimization:
+      enable_quantization:
+        default_value: true
+      enable_pruning:
+        default_value: false
+      maximal_accuracy_degradation:
+        default_value: 0.01
+
+# Training resources.
+max_nodes: 1
+training_targets:
+  - GPU
+  - CPU
+
+# Stats.
+gigaflops: 0.82 #to be checked with model-analyzer
+size: 1.83
+
+# Obsolete
+# gpu_nums: 2
diff --git a/configs/ote/person-detection/person-detection-0201/compression_config.json b/configs/ote/person-detection/person-detection-0201/compression_config.json
new file mode 100644
index 00000000000..35a22a6b267
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0201/compression_config.json
@@ -0,0 +1,46 @@
+{
+    "base": {
+        "find_unused_parameters": true,
+        "nncf_config": {
+            "input_info": {
+                "sample_size": [
+                    1,
+                    3,
+                    384,
+                    384
+                ]
+            },
+            "compression": [],
+            "log_dir": "."
+        }
+    },
+    "nncf_quantization": {
+        "optimizer": {
+            "type": "SGD",
+            "lr": 0.00005,
+            "momentum": 0.9,
+            "weight_decay": 0.0005
+        },
+        "runner": {
+            "max_epochs": 2
+        },
+        "nncf_config": {
+            "compression": [
+                {
+                    "algorithm": "quantization",
+                    "initializer": {
+                        "range": {
+                            "num_init_samples": 10
+                        },
+                        "batchnorm_adaptation": {
+                            "num_bn_adaptation_samples": 30
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "order_of_parts": [
+        "nncf_quantization"
+    ]
+}
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0201/model.py b/configs/ote/person-detection/person-detection-0201/model.py
new file mode 100644
index 00000000000..89a151f748d
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0201/model.py
@@ -0,0 +1,162 @@
+# model settings
+input_size = 384
+image_width, image_height = input_size, input_size
+width_mult = 1.0
+model = dict(
+    type='SingleStageDetector',
+    backbone=dict(
+        type='mobilenetv2_w1',
+        out_indices=(4, 5),
+        frozen_stages=-1,
+        norm_eval=False,
+        pretrained=True
+    ),
+    neck=None,
+    bbox_head=dict(
+        type='SSDHead',
+        num_classes=1,
+        in_channels=(int(width_mult * 96), int(width_mult * 320)),
+        anchor_generator=dict(
+            type='SSDAnchorGeneratorClustered',
+            strides=(16, 32),
+            widths=[
+                [image_width * x for x in
+                 [0.015411783166343854, 0.033018232306549156, 0.04467156688464953,
+                  0.0610697815328886]],
+                [image_width * x for x in
+                 [0.0789599954420517, 0.10113984043326349, 0.12805187473050397, 0.16198319380154758,
+                  0.21636496806213493]],
+
+            ],
+            heights=[
+                [image_height * x for x in
+                 [0.05032631418898226, 0.10070800135152037, 0.15806180366055939,
+                  0.22343401646383804]],
+                [image_height * x for x in
+                 [0.300881401352503, 0.393181898580379, 0.4998807213337051, 0.6386035764261081,
+                  0.8363451552091518]],
+
+            ],
+        ),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=(.0, .0, .0, .0),
+            target_stds=(0.1, 0.1, 0.2, 0.2), ),
+        depthwise_heads=True,
+        depthwise_heads_activations='relu',
+        loss_balancing=True),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.4,
+            neg_iou_thr=0.4,
+            min_pos_iou=0.,
+            ignore_iof_thr=-1,
+            gt_max_assign_all=False),
+        smoothl1_beta=1.,
+        use_giou=False,
+        use_focal=False,
+        allowed_border=-1,
+        pos_weight=-1,
+        neg_pos_ratio=3,
+        debug=False),
+    test_cfg=dict(
+        nms=dict(type='nms', iou_threshold=0.45),
+        min_bbox_size=0,
+        score_thr=0.02,
+        max_per_img=200))
+cudnn_benchmark = True
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = '../../data/airport/'
+img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', to_float32=True),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.1),
+    dict(type='Resize', img_scale=(input_size, input_size), keep_ratio=False),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(input_size, input_size),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=False),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=84,
+    workers_per_gpu=3,
+    train=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            labels=('person',),
+            ann_file=data_root + 'annotation_person_train.json',
+            min_size=20,
+            img_prefix=data_root + 'train',
+            pipeline=train_pipeline
+        )
+    ),
+    val=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1200,
+    warmup_ratio=1.0 / 3,
+    step=[8, 15, 18])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=1,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=20)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = 'output'
+load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0201-1.pth'
+resume_from = None
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+workflow = [('train', 1)]
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0201/template_experimental.yaml b/configs/ote/person-detection/person-detection-0201/template_experimental.yaml
new file mode 100644
index 00000000000..1283a1b7a13
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0201/template_experimental.yaml
@@ -0,0 +1,56 @@
+# Description.
+model_template_id: Person_Detection_0201
+name: person-detection-0201
+task_type: DETECTION
+task_family: VISION
+instantiation: "CLASS"
+summary: Fast Person Detection models (MobileNetV2-SSD).
+application:
+  ~
+
+# Algo backend.
+framework: OTEDetection v2.9.1
+
+# Task implementations.
+entrypoints:
+  base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask
+  openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask
+  nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask
+
+# Capabilities.
+capabilities:
+  - compute_representations
+
+# Hyperparameters.
+hyper_parameters:
+  base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml
+  parameter_overrides:
+    learning_parameters:
+      batch_size:
+        default_value: 84
+      learning_rate:
+        default_value: 0.05
+      learning_rate_warmup_iters:
+        default_value: 1200
+      num_iters:
+        default_value: 20
+    nncf_optimization:
+      enable_quantization:
+        default_value: true
+      enable_pruning:
+        default_value: false
+      maximal_accuracy_degradation:
+        default_value: 0.01
+
+# Training resources.
+max_nodes: 1
+training_targets:
+  - GPU
+  - CPU
+
+# Stats.
+gigaflops: 1.84 #to be checked with model-analyzer
+size: 1.83
+
+# Obsolete
+# gpu_nums: 4
diff --git a/configs/ote/person-detection/person-detection-0202/compression_config.json b/configs/ote/person-detection/person-detection-0202/compression_config.json
new file mode 100644
index 00000000000..390711bdeec
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0202/compression_config.json
@@ -0,0 +1,46 @@
+{
+    "base": {
+        "find_unused_parameters": true,
+        "nncf_config": {
+            "input_info": {
+                "sample_size": [
+                    1,
+                    3,
+                    512,
+                    512
+                ]
+            },
+            "compression": [],
+            "log_dir": "."
+        }
+    },
+    "nncf_quantization": {
+        "optimizer": {
+            "type": "SGD",
+            "lr": 0.00005,
+            "momentum": 0.9,
+            "weight_decay": 0.0005
+        },
+        "runner": {
+            "max_epochs": 2
+        },
+        "nncf_config": {
+            "compression": [
+                {
+                    "algorithm": "quantization",
+                    "initializer": {
+                        "range": {
+                            "num_init_samples": 10
+                        },
+                        "batchnorm_adaptation": {
+                            "num_bn_adaptation_samples": 30
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "order_of_parts": [
+        "nncf_quantization"
+    ]
+}
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0202/model.py b/configs/ote/person-detection/person-detection-0202/model.py
new file mode 100644
index 00000000000..c171b700c24
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0202/model.py
@@ -0,0 +1,162 @@
+# model settings
+input_size = 512
+image_width, image_height = input_size, input_size
+width_mult = 1.0
+model = dict(
+    type='SingleStageDetector',
+    backbone=dict(
+        type='mobilenetv2_w1',
+        out_indices=(4, 5),
+        frozen_stages=-1,
+        norm_eval=False,
+        pretrained=True
+    ),
+    neck=None,
+    bbox_head=dict(
+        type='SSDHead',
+        num_classes=1,
+        in_channels=(int(width_mult * 96), int(width_mult * 320)),
+        anchor_generator=dict(
+            type='SSDAnchorGeneratorClustered',
+            strides=(16, 32),
+            widths=[
+                [image_width * x for x in
+                 [0.015411783166343854, 0.033018232306549156, 0.04467156688464953,
+                  0.0610697815328886]],
+                [image_width * x for x in
+                 [0.0789599954420517, 0.10113984043326349, 0.12805187473050397, 0.16198319380154758,
+                  0.21636496806213493]],
+
+            ],
+            heights=[
+                [image_height * x for x in
+                 [0.05032631418898226, 0.10070800135152037, 0.15806180366055939,
+                  0.22343401646383804]],
+                [image_height * x for x in
+                 [0.300881401352503, 0.393181898580379, 0.4998807213337051, 0.6386035764261081,
+                  0.8363451552091518]],
+
+            ],
+        ),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=(.0, .0, .0, .0),
+            target_stds=(0.1, 0.1, 0.2, 0.2), ),
+        depthwise_heads=True,
+        depthwise_heads_activations='relu',
+        loss_balancing=True),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.4,
+            neg_iou_thr=0.4,
+            min_pos_iou=0.,
+            ignore_iof_thr=-1,
+            gt_max_assign_all=False),
+        smoothl1_beta=1.,
+        use_giou=False,
+        use_focal=False,
+        allowed_border=-1,
+        pos_weight=-1,
+        neg_pos_ratio=3,
+        debug=False),
+    test_cfg=dict(
+        nms=dict(type='nms', iou_threshold=0.45),
+        min_bbox_size=0,
+        score_thr=0.02,
+        max_per_img=200))
+cudnn_benchmark = True
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = '../../data/airport/'
+img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', to_float32=True),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.1),
+    dict(type='Resize', img_scale=(input_size, input_size), keep_ratio=False),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(input_size, input_size),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=False),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=30,
+    workers_per_gpu=3,
+    train=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            labels=('person',),
+            ann_file=data_root + 'annotation_person_train.json',
+            min_size=20,
+            img_prefix=data_root + 'train',
+            pipeline=train_pipeline
+        )
+    ),
+    val=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1200,
+    warmup_ratio=1.0 / 3,
+    step=[8, 15, 18])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=1,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=20)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = 'output'
+load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0202-1.pth'
+resume_from = None
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+workflow = [('train', 1)]
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0202/template_experimental.yaml b/configs/ote/person-detection/person-detection-0202/template_experimental.yaml
new file mode 100644
index 00000000000..bd149350235
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0202/template_experimental.yaml
@@ -0,0 +1,56 @@
+# Description.
+model_template_id: Person_Detection_0202
+name: person-detection-0202
+task_type: DETECTION
+task_family: VISION
+instantiation: "CLASS"
+summary: Medium Person Detection model (MobileNetV2-SSD).
+application:
+  ~
+
+# Algo backend.
+framework: OTEDetection v2.9.1
+
+# Task implementations.
+entrypoints:
+  base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask
+  openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask
+  nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask
+
+# Capabilities.
+capabilities:
+  - compute_representations
+
+# Hyperparameters.
+hyper_parameters:
+  base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml
+  parameter_overrides:
+    learning_parameters:
+      batch_size:
+        default_value: 30
+      learning_rate:
+        default_value: 0.05
+      learning_rate_warmup_iters:
+        default_value: 1200
+      num_iters:
+        default_value: 20
+    nncf_optimization:
+      enable_quantization:
+        default_value: true
+      enable_pruning:
+        default_value: false
+      maximal_accuracy_degradation:
+        default_value: 0.01
+
+# Training resources.
+max_nodes: 1
+training_targets:
+  - GPU
+  - CPU
+
+# Stats.
+gigaflops: 2.52 #to be checked with model-analyzer
+size: 1.83
+
+# Obsolete
+# gpu_nums: 2
diff --git a/configs/ote/person-detection/person-detection-0203/compression_config.json b/configs/ote/person-detection/person-detection-0203/compression_config.json
new file mode 100755
index 00000000000..671a8e47ac3
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0203/compression_config.json
@@ -0,0 +1,47 @@
+{
+    "base": {
+        "find_unused_parameters": true,
+        "nncf_config": {
+            "input_info": {
+                "sample_size": [
+                    1,
+                    3,
+                    480,
+                    864
+                ]
+            },
+            "compression": [],
+            "log_dir": "."
+        }
+    },
+    "nncf_quantization": {
+        "optimizer": {
+            "type": "SGD",
+            "lr": 0.00005,
+            "momentum": 0.9,
+            "weight_decay": 0.0005
+        },
+        "runner": {
+            "max_epochs": 1
+        },
+        "nncf_config": {
+            "compression": [
+                {
+                    "algorithm": "quantization",
+                    "initializer": {
+                        "range": {
+                            "num_init_samples": 10
+                        },
+                        "batchnorm_adaptation": {
+                            "num_bn_adaptation_samples": 30
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "order_of_parts": [
+        "nncf_quantization"
+    ]
+}
+
diff --git a/configs/ote/person-detection/person-detection-0203/model.py b/configs/ote/person-detection/person-detection-0203/model.py
new file mode 100644
index 00000000000..53acb329637
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0203/model.py
@@ -0,0 +1,158 @@
+# model settings
+model = dict(
+    type='ATSS',
+    backbone=dict(
+        type='mobilenetv2_w1',
+        out_indices=(2, 3, 4, 5),
+        frozen_stages=-1,
+        norm_eval=False,
+        pretrained=True
+    ),
+    neck=dict(
+        type='FPN',
+        in_channels=[24, 32, 96, 320],
+        out_channels=32,
+        start_level=1,
+        add_extra_convs=True,
+        extra_convs_on_inputs=False,
+        num_outs=5),
+    bbox_head=dict(
+        type='ATSSHead',
+        num_classes=1,
+        in_channels=32,
+        stacked_convs=4,
+        feat_channels=32, anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[0.5, 1.0, 2.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = '../../data/airport/'
+img_norm_cfg = dict(
+    mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', to_float32=True),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='Expand', ratio_range=(1, 3)),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.3),
+    dict(
+        type='Resize',
+        img_scale=[(864, 480), (864, 640)],
+        keep_ratio=False),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(864, 480),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=False),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=14,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=1,
+        dataset=dict(
+            type=dataset_type,
+            labels=('person',),
+            ann_file=data_root + 'annotation_person_train.json',
+            img_prefix=data_root + 'train',
+            pipeline=train_pipeline
+        )
+    ),
+    val=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.025,
+    momentum=0.9,
+    weight_decay=0.0001)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='constant',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[10, 15, 18])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=10,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=20)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = 'output'
+load_from = 'https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0203.pth'
+resume_from = None
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+workflow = [('train', 1)]
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0203/template_experimental.yaml b/configs/ote/person-detection/person-detection-0203/template_experimental.yaml
new file mode 100644
index 00000000000..158f89f0877
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0203/template_experimental.yaml
@@ -0,0 +1,56 @@
+# Description.
+model_template_id: Person_Detection_0203
+name: person-detection-0203
+task_type: DETECTION
+task_family: VISION
+instantiation: "CLASS"
+summary: Medium Person Detection model for small and hard objects (MobileNetV2-ATSS).
+application:
+  ~
+
+# Algo backend.
+framework: OTEDetection v2.9.1
+
+# Task implementations.
+entrypoints:
+  base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask
+  openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask
+  nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask
+
+# Capabilities.
+capabilities:
+  - compute_representations
+
+# Hyperparameters.
+hyper_parameters:
+  base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml
+  parameter_overrides:
+    learning_parameters:
+      batch_size:
+        default_value: 14
+      learning_rate:
+        default_value: 0.025
+      learning_rate_warmup_iters:
+        default_value: 500
+      num_iters:
+        default_value: 20
+    nncf_optimization:
+      enable_quantization:
+        default_value: true
+      enable_pruning:
+        default_value: false
+      maximal_accuracy_degradation:
+        default_value: 0.01
+
+# Training resources.
+max_nodes: 1
+training_targets:
+  - GPU
+  - CPU
+
+# Stats.
+gigaflops: 6.74 #to be checked with model-analyzer
+size: 1.83
+
+# Obsolete
+# gpu_nums: 2
diff --git a/configs/ote/person-detection/person-detection-0301/accuracy-check.yml b/configs/ote/person-detection/person-detection-0301/accuracy-check.yml
new file mode 100644
index 00000000000..fdcfe010d0c
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0301/accuracy-check.yml
@@ -0,0 +1,24 @@
+models:
+   - name: person-detection-0301
+
+     launchers:
+       - framework: dlsdk
+         adapter:
+
+           type: class_agnostic_detection
+           scale: [0.0007440476, 0.00125]
+
+     datasets:
+       - name: crossroad_extra_untagged_person_hb
+         preprocessing:
+           - type: resize
+             dst_width: 1344
+             dst_height: 800
+
+         postprocessing:
+           - type: resize_prediction_boxes
+           - type: cast_to_int
+           - type: nms
+             overlap: 0.6
+           - type: clip_boxes
+             apply_to: prediction
diff --git a/configs/ote/person-detection/person-detection-0301/compression_config.json b/configs/ote/person-detection/person-detection-0301/compression_config.json
new file mode 100644
index 00000000000..496a9c68a03
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0301/compression_config.json
@@ -0,0 +1,46 @@
+{
+    "base": {
+        "find_unused_parameters": true,
+        "nncf_config": {
+            "input_info": {
+                "sample_size": [
+                    1,
+                    3,
+                    800,
+                    1344
+                ]
+            },
+            "compression": [],
+            "log_dir": "."
+        }
+    },
+    "nncf_quantization": {
+        "optimizer": {
+            "type": "SGD",
+            "lr": 0.001,
+            "momentum": 0.9,
+            "weight_decay": 0.0001
+        },
+        "runner": {
+            "max_epochs": 2
+        },
+        "nncf_config": {
+            "compression": [
+                {
+                    "algorithm": "quantization",
+                    "initializer": {
+                        "range": {
+                            "num_init_samples": 10
+                        },
+                        "batchnorm_adaptation": {
+                            "num_bn_adaptation_samples": 30
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "order_of_parts": [
+        "nncf_quantization"
+    ]
+}
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0301/model.py b/configs/ote/person-detection/person-detection-0301/model.py
new file mode 100644
index 00000000000..1f210c975d4
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0301/model.py
@@ -0,0 +1,157 @@
+model = dict(
+    type='VFNet',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='VFNetHead',
+        num_classes=1,
+        in_channels=256,
+        stacked_convs=3,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        center_sampling=False,
+        dcn_on_last_conv=False,
+        use_atss=True,
+        use_vfl=True,
+        loss_cls=dict(
+            type='VarifocalLoss',
+            use_sigmoid=True,
+            alpha=0.75,
+            gamma=2.0,
+            iou_weighted=True,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
+        loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.01,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+cudnn_benchmark = True
+
+dataset_type = 'CocoDataset'
+data_root = '../../data/airport/'
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1344, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=False),
+            dict(
+                type='Normalize',
+                mean=[123.675, 116.28, 103.53],
+                std=[58.395, 57.12, 57.375],
+                to_rgb=True),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', to_float32=True),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.1),
+    dict(
+        type='Resize',
+        img_scale=[(1344, 480), (1344, 960)],
+        multiscale_mode='range',
+        keep_ratio=False),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(
+        type='Normalize',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        to_rgb=True),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            labels=('person',),
+            ann_file=data_root + 'annotation_person_train.json',
+            min_size=20,
+            img_prefix=data_root + 'train',
+            pipeline=train_pipeline
+        )
+    ),
+    val=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline))
+
+
+optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+    policy='step',
+    warmup='constant',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[10, 15, 18])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+# yapf:enable
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=20)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = 'output'
+load_from = 'https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/object_detection/v2/person_detection_0301.pth'
+resume_from = None
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+workflow = [('train', 1)]
diff --git a/configs/ote/person-detection/person-detection-0301/template_experimental.yaml b/configs/ote/person-detection/person-detection-0301/template_experimental.yaml
new file mode 100644
index 00000000000..f5ebf8848a0
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0301/template_experimental.yaml
@@ -0,0 +1,54 @@
+# Description.
+model_template_id: Person_Detection_0301
+name: person-detection-0301
+task_type: DETECTION
+task_family: VISION
+instantiation: "CLASS"
+summary: Accurate person detection model (Resnet50-VFnet).
+application:
+  ~
+
+# Algo backend.
+framework: OTEDetection v2.9.1
+
+# Task implementations.
+entrypoints:
+  base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask
+  openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask
+  nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask
+
+# Capabilities.
+capabilities:
+  - compute_representations
+
+# Hyperparameters.
+hyper_parameters:
+  base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml
+  parameter_overrides:
+    learning_parameters:
+      batch_size:
+        default_value: 4
+      learning_rate:
+        default_value: 0.001
+      learning_rate_warmup_iters:
+        default_value: 500
+      num_iters:
+        default_value: 20
+    nncf_optimization:
+      enable_quantization:
+        default_value: true
+      enable_pruning:
+        default_value: false
+      maximal_accuracy_degradation:
+        default_value: 0.01
+
+# Training resources.
+max_nodes: 1
+training_targets:
+  - GPU
+  - CPU
+
+# Stats.
+gigaflops: 198
+size: 32.48
+
diff --git a/configs/ote/person-detection/person-detection-0302/accuracy-check.yml b/configs/ote/person-detection/person-detection-0302/accuracy-check.yml
new file mode 100644
index 00000000000..4578634a011
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0302/accuracy-check.yml
@@ -0,0 +1,24 @@
+models:
+   - name: person-detection-0302
+
+     launchers:
+       - framework: dlsdk
+         adapter:
+
+           type: class_agnostic_detection
+           scale: [0.00078125, 0.0013888888]
+
+     datasets:
+       - name: crossroad_extra_untagged_person_hb
+         preprocessing:
+           - type: resize
+             dst_width: 1280
+             dst_height: 720
+
+         postprocessing:
+           - type: resize_prediction_boxes
+           - type: cast_to_int
+           - type: nms
+             overlap: 0.6
+           - type: clip_boxes
+             apply_to: prediction
diff --git a/configs/ote/person-detection/person-detection-0302/compression_config.json b/configs/ote/person-detection/person-detection-0302/compression_config.json
new file mode 100644
index 00000000000..990eca03574
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0302/compression_config.json
@@ -0,0 +1,46 @@
+{
+    "base": {
+        "find_unused_parameters": true,
+        "nncf_config": {
+            "input_info": {
+                "sample_size": [
+                    1,
+                    3,
+                    720,
+                    1280
+                ]
+            },
+            "compression": [],
+            "log_dir": "."
+        }
+    },
+    "nncf_quantization": {
+        "optimizer": {
+            "type": "SGD",
+            "lr": 0.001,
+            "momentum": 0.9,
+            "weight_decay": 0.0001
+        },
+        "runner": {
+            "max_epochs": 2
+        },
+        "nncf_config": {
+            "compression": [
+                {
+                    "algorithm": "quantization",
+                    "initializer": {
+                        "range": {
+                            "num_init_samples": 10
+                        },
+                        "batchnorm_adaptation": {
+                            "num_bn_adaptation_samples": 30
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "order_of_parts": [
+        "nncf_quantization"
+    ]
+}
\ No newline at end of file
diff --git a/configs/ote/person-detection/person-detection-0302/model.py b/configs/ote/person-detection/person-detection-0302/model.py
new file mode 100644
index 00000000000..d5b555da72e
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0302/model.py
@@ -0,0 +1,158 @@
+model = dict(
+    type='ATSS',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='ATSSHead',
+        num_classes=1,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[0.0, 0.0, 0.0, 0.0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        # score_thr=0.05,
+        score_thr=0.2,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+dataset_type = 'CocoDataset'
+data_root = '../../data/airport/'
+test_pipeline = [
+        dict(type='LoadImageFromFile'),
+        dict(
+            type='MultiScaleFlipAug',
+            img_scale=(1280, 720),
+            flip=False,
+            transforms=[
+                dict(type='Resize', keep_ratio=False),
+                dict(
+                    type='Normalize',
+                    mean=[0, 0, 0],
+                    std=[255, 255, 255],
+                    to_rgb=True),
+                dict(type='ImageToTensor', keys=['img']),
+                dict(type='Collect', keys=['img'])
+            ])
+]
+
+train_pipeline = [
+        dict(type='LoadImageFromFile'),
+        dict(type='LoadAnnotations', with_bbox=True),
+        dict(
+            type='MinIoURandomCrop',
+            min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+            min_crop_size=0.3),
+        dict(
+            type='Resize',
+            img_scale=[(1280, 720), (896, 720), (1088, 720),
+                        (1280, 672), (1280, 800)],
+            multiscale_mode='value',
+            keep_ratio=False),
+        dict(type='RandomFlip', flip_ratio=0.5),
+        dict(
+            type='Normalize',
+            mean=[0, 0, 0],
+            std=[255, 255, 255],
+            to_rgb=True),
+        dict(type='Pad', size_divisor=32),
+        dict(type='DefaultFormatBundle'),
+        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            labels=('person',),
+            ann_file=data_root + 'annotation_person_train.json',
+            min_size=20,
+            img_prefix=data_root + 'train',
+            pipeline=train_pipeline
+        )
+    ),
+    val=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline))
+
+optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict()
+lr_config = dict(
+    policy='ReduceLROnPlateau',
+    metric='bbox_mAP',
+    patience=5,
+    iteration_patience=600,
+    interval=1,
+    min_lr=9e-06,
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.3333333333333333)
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=100,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+# yapf:enable
+runner = dict(type='EpochRunnerWithCancel', max_epochs=20)
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = 'https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/object_detection/v2/person_detection_0302.pth'
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/ote/person-detection/person-detection-0302/template_experimental.yaml b/configs/ote/person-detection/person-detection-0302/template_experimental.yaml
new file mode 100644
index 00000000000..1a83e5c5367
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0302/template_experimental.yaml
@@ -0,0 +1,56 @@
+# Description.
+model_template_id: Person_Detection_0302
+name: person-detection-0302
+task_type: DETECTION
+task_family: VISION
+instantiation: "CLASS"
+summary: Moderate Person Detection model (Resnet50-ATSS).
+application:
+  ~
+
+# Algo backend.
+framework: OTEDetection v2.9.1
+
+# Task implementations.
+entrypoints:
+  base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask
+  openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask
+  nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask
+
+# Capabilities.
+capabilities:
+  - compute_representations
+
+# Hyperparameters.
+hyper_parameters:
+  base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml
+  parameter_overrides:
+    learning_parameters:
+      batch_size:
+        default_value: 4
+      learning_rate:
+        default_value: 0.001
+      learning_rate_warmup_iters:
+        default_value: 200
+      num_iters:
+        default_value: 20
+    nncf_optimization:
+      enable_quantization:
+        default_value: true
+      enable_pruning:
+        default_value: false
+      maximal_accuracy_degradation:
+        default_value: 0.01
+
+# Training resources.
+max_nodes: 1
+training_targets:
+  - GPU
+  - CPU
+
+# Stats.
+gigaflops: 181.28
+size: 31.89
+
+# Obsolete
+# gpu_nums: 2
diff --git a/configs/ote/person-detection/person-detection-0303/accuracy-check.yml b/configs/ote/person-detection/person-detection-0303/accuracy-check.yml
new file mode 100644
index 00000000000..aa1022a606f
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0303/accuracy-check.yml
@@ -0,0 +1,24 @@
+models:
+   - name: person-detection-0303
+
+     launchers:
+       - framework: dlsdk
+         adapter:
+
+           type: class_agnostic_detection
+           scale: [0.00078125, 0.0013888888]
+
+     datasets:
+       - name: crossroad_extra_untagged_person_hb
+         preprocessing:
+           - type: resize
+             dst_width: 1280
+             dst_height: 720
+
+         postprocessing:
+           - type: resize_prediction_boxes
+           - type: cast_to_int
+           - type: nms
+             overlap: 0.6
+           - type: clip_boxes
+             apply_to: prediction
diff --git a/configs/ote/person-detection/person-detection-0303/compression_config.json b/configs/ote/person-detection/person-detection-0303/compression_config.json
new file mode 100755
index 00000000000..5e6d697de2b
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0303/compression_config.json
@@ -0,0 +1,47 @@
+{
+    "base": {
+        "find_unused_parameters": true,
+        "nncf_config": {
+            "input_info": {
+                "sample_size": [
+                    1,
+                    3,
+                    720,
+                    1280
+                ]
+            },
+            "compression": [],
+            "log_dir": "."
+        }
+    },
+    "nncf_quantization": {
+        "optimizer": {
+            "type": "SGD",
+            "lr": 0.003,
+            "momentum": 0.9,
+            "weight_decay": 0.0001
+        },
+        "runner": {
+            "max_epochs": 2
+        },
+        "nncf_config": {
+            "compression": [
+                {
+                    "algorithm": "quantization",
+                    "initializer": {
+                        "range": {
+                            "num_init_samples": 10
+                        },
+                        "batchnorm_adaptation": {
+                            "num_bn_adaptation_samples": 30
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "order_of_parts": [
+        "nncf_quantization"
+    ]
+}
+
diff --git a/configs/ote/person-detection/person-detection-0303/model.py b/configs/ote/person-detection/person-detection-0303/model.py
new file mode 100644
index 00000000000..5ee37bc4e7c
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0303/model.py
@@ -0,0 +1,156 @@
+model = dict(
+    type='ATSS',
+    backbone=dict(
+        type='mobilenetv2_w1',
+        out_indices=(2, 3, 4, 5),
+        frozen_stages=-1,
+        norm_eval=False,
+        pretrained=False,
+        ),
+    neck=dict(
+        type='FPN',
+        in_channels=[24, 32, 96, 320],
+        out_channels=64,
+        start_level=1,
+        add_extra_convs=True,
+        extra_convs_on_inputs=False,
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='ATSSHead',
+        num_classes=1,
+        in_channels=64,
+        stacked_convs=4,
+        feat_channels=64,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[0.0, 0.0, 0.0, 0.0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+dataset_type = 'CocoDataset'
+data_root = '../../data/airport/'
+test_pipeline = [
+        dict(type='LoadImageFromFile'),
+        dict(
+            type='MultiScaleFlipAug',
+            img_scale=(1280, 720),
+            flip=False,
+            transforms=[
+                dict(type='Resize', keep_ratio=False),
+                dict(
+                    type='Normalize',
+                    mean=[0, 0, 0],
+                    std=[255, 255, 255],
+                    to_rgb=True),
+                dict(type='ImageToTensor', keys=['img']),
+                dict(type='Collect', keys=['img'])
+            ])
+]
+
+train_pipeline = [
+        dict(type='LoadImageFromFile'),
+        dict(type='LoadAnnotations', with_bbox=True),
+        dict(
+            type='MinIoURandomCrop',
+            min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+            min_crop_size=0.3),
+        dict(
+            type='Resize',
+            img_scale=[(1280, 720), (896, 720), (1088, 720),
+                        (1280, 672), (1280, 800)],
+            multiscale_mode='value',
+            keep_ratio=False),
+        dict(type='RandomFlip', flip_ratio=0.5),
+        dict(
+            type='Normalize',
+            mean=[0, 0, 0],
+            std=[255, 255, 255],
+            to_rgb=True),
+        dict(type='Pad', size_divisor=32),
+        dict(type='DefaultFormatBundle'),
+        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+data = dict(
+    samples_per_gpu=9,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            labels=('person',),
+            ann_file=data_root + 'annotation_person_train.json',
+            min_size=20,
+            img_prefix=data_root + 'train',
+            pipeline=train_pipeline
+        )
+    ),
+    val=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        labels=('person',),
+        ann_file=data_root + 'annotation_person_val.json',
+        img_prefix=data_root + 'val',
+        test_mode=True,
+        pipeline=test_pipeline))
+
+optimizer = dict(type='SGD', lr=0.003, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict()
+lr_config = dict(
+    policy='ReduceLROnPlateau',
+    metric='bbox_mAP',
+    patience=5,
+    iteration_patience=600,
+    interval=1,
+    min_lr=9e-06,
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.3333333333333333)
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=100,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+# yapf:enable
+runner = dict(type='EpochRunnerWithCancel', max_epochs=10)
+evaluation = dict(interval=1, metric='mAP', save_best='mAP')
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = 'output'
+load_from = 'https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/object_detection/v2/person_detection_0303.pth'
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/ote/person-detection/person-detection-0303/template_experimental.yaml b/configs/ote/person-detection/person-detection-0303/template_experimental.yaml
new file mode 100644
index 00000000000..7eea3088b80
--- /dev/null
+++ b/configs/ote/person-detection/person-detection-0303/template_experimental.yaml
@@ -0,0 +1,56 @@
+# Description.
+model_template_id: Person_Detection_0303
+name: person-detection-0303
+task_type: DETECTION
+task_family: VISION
+instantiation: "CLASS"
+summary: Fast Person Detection model (MobileNetV2-ATSS).
+application:
+  ~
+
+# Algo backend.
+framework: OTEDetection v2.9.1
+
+# Task implementations.
+entrypoints:
+  base: mmdet.apis.ote.apis.detection.OTEDetectionTrainingTask
+  openvino: mmdet.apis.ote.apis.detection.OpenVINODetectionTask
+  nncf: mmdet.apis.ote.apis.detection.OTEDetectionNNCFTask
+
+# Capabilities.
+capabilities:
+  - compute_representations
+
+# Hyperparameters.
+hyper_parameters:
+  base_path: ../../../../mmdet/apis/ote/apis/detection/configuration.yaml
+  parameter_overrides:
+    learning_parameters:
+      batch_size:
+        default_value: 9
+      learning_rate:
+        default_value: 0.003
+      learning_rate_warmup_iters:
+        default_value: 600
+      num_iters:
+        default_value: 10
+    nncf_optimization:
+      enable_quantization:
+        default_value: true
+      enable_pruning:
+        default_value: false
+      maximal_accuracy_degradation:
+        default_value: 0.01
+
+# Training resources.
+max_nodes: 1
+training_targets:
+  - GPU
+  - CPU
+
+# Stats.
+gigaflops: 12.31 #to be checked with model-analyzer
+size: 2.33
+
+# Obsolete
+# gpu_nums: 4
diff --git a/configs/ote/person-detection/readme.md b/configs/ote/person-detection/readme.md
new file mode 100644
index 00000000000..30a9e32974a
--- /dev/null
+++ b/configs/ote/person-detection/readme.md
@@ -0,0 +1,11 @@
+# Person Detection
+
+| Model Name | Complexity (GFLOPs) | Size (Mp) | AP @ [IoU=0.50:0.95] (%) | Links | GPU_NUM |
+| --- | --- | --- | --- | --- | --- |
+| person-detection-0200 | 0.82 | 1.83 | 24.4 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0200-1.pth), [config](./person-detection-0200/template.yaml) | 2 |
+| person-detection-0201 | 1.84 | 1.83 | 29.9 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0201-1.pth), [config](./person-detection-0201/template.yaml) | 4 |
+| person-detection-0202 | 3.28 | 1.83 | 32.8 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0202-1.pth), [config](./person-detection-0202/template.yaml) | 2 |
+| person-detection-0203 | 6.74 | 1.95 | 40.8 | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0203.pth), [config](./person-detection-0203/template.yaml) | 2 |
+| person-detection-0301 | 198 | 32.48 |  | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0301-1.pth), [config](./person-detection-0301/template.yaml) | 4 |
+| person-detection-0302 | 181.28 | 31.89 |  | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0302-1.pth), [config](./person-detection-0302/template.yaml) | 2 |
+| person-detection-0303 | 12.31 | 2.33 |  | [snapshot](https://download.01.org/opencv/openvino_training_extensions/models/object_detection/v2/person-detection-0303.pth), [config](./person-detection-0303/template.yaml) | 2 |
diff --git a/tests/test_ote_api.py b/tests/test_ote_api.py
new file mode 100644
index 00000000000..42e4b5cc51e
--- /dev/null
+++ b/tests/test_ote_api.py
@@ -0,0 +1,590 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+import glob
+import io
+import os.path as osp
+import random
+import time
+import unittest
+import warnings
+from concurrent.futures import ThreadPoolExecutor
+from subprocess import run  # nosec
+from typing import Optional
+
+import numpy as np
+import torch
+from bson import ObjectId
+from ote_sdk.test_suite.e2e_test_system import e2e_pytest_api
+from ote_sdk.configuration.helper import convert, create
+from ote_sdk.entities.annotation import AnnotationSceneEntity, AnnotationSceneKind
+from ote_sdk.entities.dataset_item import DatasetItemEntity
+from ote_sdk.entities.datasets import DatasetEntity
+from ote_sdk.entities.image import Image
+from ote_sdk.entities.inference_parameters import InferenceParameters
+from ote_sdk.entities.model_template import TaskType, task_type_to_label_domain
+from ote_sdk.entities.metrics import Performance
+from ote_sdk.entities.model import ModelEntity, ModelFormat, ModelOptimizationType
+from ote_sdk.entities.model_template import parse_model_template
+from ote_sdk.entities.optimization_parameters import OptimizationParameters
+from ote_sdk.entities.resultset import ResultSetEntity
+from ote_sdk.entities.subset import Subset
+from ote_sdk.entities.task_environment import TaskEnvironment
+from ote_sdk.entities.train_parameters import TrainParameters
+from ote_sdk.tests.test_helpers import generate_random_annotated_image
+from ote_sdk.usecases.tasks.interfaces.export_interface import ExportType, IExportTask
+from ote_sdk.usecases.tasks.interfaces.optimization_interface import OptimizationType
+from ote_sdk.utils.shape_factory import ShapeFactory
+
+from mmdet.apis.ote.apis.detection import (OpenVINODetectionTask, OTEDetectionConfig,
+                                           OTEDetectionInferenceTask,
+                                           OTEDetectionNNCFTask, OTEDetectionTrainingTask)
+from mmdet.apis.ote.apis.detection.ote_utils import generate_label_schema
+from mmdet.integration.nncf.utils import is_nncf_enabled
+
+DEFAULT_TEMPLATE_DIR = osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_ATSS')
+
+class ModelTemplate(unittest.TestCase):
+    def check_capabilities(self, template):
+        self.assertTrue(template.computes_representations())
+        self.assertFalse(template.computes_uncertainty_score())
+        self.assertEqual(len(template.capabilities), 1)
+
+    @e2e_pytest_api
+    def test_reading_gen3_ssd(self):
+        template = parse_model_template(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_SSD', 'template.yaml'))
+        self.check_capabilities(template)
+
+    @e2e_pytest_api
+    def test_reading_gen3_atss(self):
+        template = parse_model_template(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_ATSS', 'template.yaml'))
+        self.check_capabilities(template)
+
+    @e2e_pytest_api
+    def test_reading_gen3_vfnet(self):
+        template = parse_model_template(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_resnet50_VFNet', 'template_experimental.yaml'))
+        self.check_capabilities(template)
+
+    @e2e_pytest_api
+    def test_reading_yolox(self):
+        template = parse_model_template(
+            osp.join('configs', 'ote', 'custom-object-detection', 'cspdarknet_YOLOX',
+                     'template.yaml'))
+        self.check_capabilities(template)
+
+
+@e2e_pytest_api
+def test_configuration_yaml():
+    configuration = OTEDetectionConfig()
+    configuration_yaml_str = convert(configuration, str)
+    configuration_yaml_converted = create(configuration_yaml_str)
+    configuration_yaml_loaded = create(osp.join('mmdet', 'apis', 'ote', 'apis', 'detection', 'configuration.yaml'))
+    assert configuration_yaml_converted == configuration_yaml_loaded
+
+
+class Sample(unittest.TestCase):
+    template = osp.join(DEFAULT_TEMPLATE_DIR, 'template.yaml')
+
+    @e2e_pytest_api
+    def test_sample_on_cpu(self):
+        output = run('export CUDA_VISIBLE_DEVICES=;'
+                     'python mmdet/apis/ote/sample/sample.py '
+                     f'--export {self.template}',
+                     shell=True, check=True)
+        assert output.returncode == 0
+
+    @e2e_pytest_api
+    def test_sample_on_gpu(self):
+        output = run('python mmdet/apis/ote/sample/sample.py '
+                     f'--export {self.template}',
+                     shell=True, check=True)
+        assert output.returncode == 0
+
+
+class API(unittest.TestCase):
+    """
+    Collection of tests for OTE API and OTE Model Templates
+    """
+
+    def init_environment(
+            self,
+            params,
+            model_template,
+            number_of_images=500,
+            task_type=TaskType.DETECTION):
+
+        labels_names = ('rectangle', 'ellipse', 'triangle')
+        labels_schema = generate_label_schema(labels_names, task_type_to_label_domain(task_type))
+        labels_list = labels_schema.get_labels(False)
+        environment = TaskEnvironment(model=None, hyper_parameters=params, label_schema=labels_schema,
+                                      model_template=model_template)
+
+        warnings.filterwarnings('ignore', message='.* coordinates .* are out of bounds.*')
+        items = []
+        for i in range(0, number_of_images):
+            image_numpy, annos = generate_random_annotated_image(
+                image_width=640,
+                image_height=480,
+                labels=labels_list,
+                max_shapes=20,
+                min_size=50,
+                max_size=100,
+                random_seed=None)
+            # Convert shapes according to task
+            for anno in annos:
+                if task_type == TaskType.INSTANCE_SEGMENTATION:
+                    anno.shape = ShapeFactory.shape_as_polygon(anno.shape)
+                else:
+                    anno.shape = ShapeFactory.shape_as_rectangle(anno.shape)
+
+            image = Image(data=image_numpy)
+            annotation_scene = AnnotationSceneEntity(
+                kind=AnnotationSceneKind.ANNOTATION,
+                annotations=annos)
+            items.append(DatasetItemEntity(media=image, annotation_scene=annotation_scene))
+        warnings.resetwarnings()
+
+        rng = random.Random()
+        rng.shuffle(items)
+        for i, _ in enumerate(items):
+            subset_region = i / number_of_images
+            if subset_region >= 0.8:
+                subset = Subset.TESTING
+            elif subset_region >= 0.6:
+                subset = Subset.VALIDATION
+            else:
+                subset = Subset.TRAINING
+            items[i].subset = subset
+
+        dataset = DatasetEntity(items)
+        return environment, dataset
+
+    def setup_configurable_parameters(self, template_dir, num_iters=10):
+        glb = glob.glob(f'{template_dir}/template*.yaml')
+        template_path = glb[0] if glb else None
+        if not template_path:
+          raise RuntimeError(f"Template YAML not found: {template_dir}")
+
+        model_template = parse_model_template(template_path)
+        hyper_parameters = create(model_template.hyper_parameters.data)
+        hyper_parameters.learning_parameters.num_iters = num_iters
+        hyper_parameters.postprocessing.result_based_confidence_threshold = False
+        hyper_parameters.postprocessing.confidence_threshold = 0.1
+        return hyper_parameters, model_template
+
+    @e2e_pytest_api
+    def test_cancel_training_detection(self):
+        """
+        Tests starting and cancelling training.
+
+        Flow of the test:
+        - Creates a randomly annotated project with a small dataset containing 3 classes:
+            ['rectangle', 'triangle', 'circle'].
+        - Start training and give cancel training signal after 10 seconds. Assert that training
+            stops within 35 seconds after that
+        - Start training and give cancel signal immediately. Assert that training stops within 25 seconds.
+
+        This test should be finished in under one minute on a workstation.
+        """
+        hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=500)
+        detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 64)
+
+        detection_task = OTEDetectionTrainingTask(task_environment=detection_environment)
+
+        executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix='train_thread')
+
+        output_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+        )
+
+        training_progress_curve = []
+        def progress_callback(progress: float, score: Optional[float] = None):
+            training_progress_curve.append(progress)
+
+        train_parameters = TrainParameters
+        train_parameters.update_progress = progress_callback
+
+        # Test stopping after some time
+        start_time = time.time()
+        train_future = executor.submit(detection_task.train, dataset, output_model, train_parameters)
+        # give train_thread some time to initialize the model
+        while not detection_task._is_training:
+            time.sleep(10)
+        detection_task.cancel_training()
+
+        # stopping process has to happen in less than 35 seconds
+        train_future.result()
+        self.assertEqual(training_progress_curve[-1], 100)
+        self.assertLess(time.time() - start_time, 100, 'Expected to stop within 100 seconds.')
+
+        # Test stopping immediately (as soon as training is started).
+        start_time = time.time()
+        train_future = executor.submit(detection_task.train, dataset, output_model)
+        while not detection_task._is_training:
+            time.sleep(0.1)
+        detection_task.cancel_training()
+
+        train_future.result()
+        self.assertLess(time.time() - start_time, 25)  # stopping process has to happen in less than 25 seconds
+
+    @e2e_pytest_api
+    def test_training_progress_tracking(self):
+        hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=5)
+        detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50)
+
+        task = OTEDetectionTrainingTask(task_environment=detection_environment)
+        self.addCleanup(task._delete_scratch_space)
+
+        print('Task initialized, model training starts.')
+        training_progress_curve = []
+
+        def progress_callback(progress: float, score: Optional[float] = None):
+            training_progress_curve.append(progress)
+
+        train_parameters = TrainParameters
+        train_parameters.update_progress = progress_callback
+        output_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+        )
+        task.train(dataset, output_model, train_parameters)
+
+        self.assertGreater(len(training_progress_curve), 0)
+        training_progress_curve = np.asarray(training_progress_curve)
+        self.assertTrue(np.all(training_progress_curve[1:] >= training_progress_curve[:-1]))
+
+    @e2e_pytest_api
+    def test_nncf_optimize_progress_tracking(self):
+        if not is_nncf_enabled():
+            self.skipTest("Required NNCF module.")
+
+        # Prepare pretrained weights
+        hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=2)
+        detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50)
+
+        task = OTEDetectionTrainingTask(task_environment=detection_environment)
+        self.addCleanup(task._delete_scratch_space)
+
+        original_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+        )
+        task.train(dataset, original_model, TrainParameters)
+
+        # Create NNCFTask
+        detection_environment.model = original_model
+        nncf_task = OTEDetectionNNCFTask(task_environment=detection_environment)
+        self.addCleanup(nncf_task._delete_scratch_space)
+
+        # Rewrite some parameters to spend less time
+        nncf_task._config["runner"]["max_epochs"] = 10
+        nncf_init_cfg = nncf_task._config["nncf_config"]["compression"][0]["initializer"]
+        nncf_init_cfg["range"]["num_init_samples"] = 1
+        nncf_init_cfg["batchnorm_adaptation"]["num_bn_adaptation_samples"] = 1
+
+        print('Task initialized, model optimization starts.')
+        training_progress_curve = []
+
+        def progress_callback(progress: int):
+            assert isinstance(progress, int)
+            training_progress_curve.append(progress)
+
+        optimization_parameters = OptimizationParameters
+        optimization_parameters.update_progress = progress_callback
+        nncf_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+        )
+
+        nncf_task.optimize(OptimizationType.NNCF, dataset, nncf_model, optimization_parameters)
+
+        self.assertGreater(len(training_progress_curve), 0)
+        training_progress_curve = np.asarray(training_progress_curve)
+        self.assertTrue(np.all(training_progress_curve[1:] >= training_progress_curve[:-1]))
+
+    @e2e_pytest_api
+    def test_inference_progress_tracking(self):
+        hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=10)
+        detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50)
+
+        task = OTEDetectionTrainingTask(task_environment=detection_environment)
+        self.addCleanup(task._delete_scratch_space)
+
+        print('Task initialized, model inference starts.')
+        inference_progress_curve = []
+
+        def progress_callback(progress: int):
+            assert isinstance(progress, int)
+            inference_progress_curve.append(progress)
+
+        inference_parameters = InferenceParameters
+        inference_parameters.update_progress = progress_callback
+
+        task.infer(dataset.with_empty_annotations(), inference_parameters)
+
+        self.assertGreater(len(inference_progress_curve), 0)
+        inference_progress_curve = np.asarray(inference_progress_curve)
+        self.assertTrue(np.all(inference_progress_curve[1:] >= inference_progress_curve[:-1]))
+
+    @e2e_pytest_api
+    def test_inference_task(self):
+        # Prepare pretrained weights
+        hyper_parameters, model_template = self.setup_configurable_parameters(DEFAULT_TEMPLATE_DIR, num_iters=2)
+        detection_environment, dataset = self.init_environment(hyper_parameters, model_template, 50)
+        val_dataset = dataset.get_subset(Subset.VALIDATION)
+
+        train_task = OTEDetectionTrainingTask(task_environment=detection_environment)
+        self.addCleanup(train_task._delete_scratch_space)
+
+        trained_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+        )
+        train_task.train(dataset, trained_model, TrainParameters)
+        performance_after_train = self.eval(train_task, trained_model, val_dataset)
+
+        # Create InferenceTask
+        detection_environment.model = trained_model
+        inference_task = OTEDetectionInferenceTask(task_environment=detection_environment)
+        self.addCleanup(inference_task._delete_scratch_space)
+
+        performance_after_load = self.eval(inference_task, trained_model, val_dataset)
+
+        assert performance_after_train == performance_after_load
+
+        # Export
+        exported_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+            _id=ObjectId())
+        inference_task.export(ExportType.OPENVINO, exported_model)
+
+    @staticmethod
+    def eval(task: OTEDetectionTrainingTask, model: ModelEntity, dataset: DatasetEntity) -> Performance:
+        start_time = time.time()
+        result_dataset = task.infer(dataset.with_empty_annotations())
+        end_time = time.time()
+        print(f'{len(dataset)} analysed in {end_time - start_time} seconds')
+        result_set = ResultSetEntity(
+            model=model,
+            ground_truth_dataset=dataset,
+            prediction_dataset=result_dataset
+        )
+        task.evaluate(result_set)
+        assert result_set.performance is not None
+        return result_set.performance
+
+    def check_threshold(self, reference, value, delta_tolerance, message=''):
+        delta = value.score.value - reference.score.value
+        self.assertLessEqual(
+            np.abs(delta),
+            delta_tolerance,
+            msg=message +
+                f' (reference metric: {reference.score.value}, '
+                f'actual value: {value.score.value}, '
+                f'delta tolerance threshold: {delta_tolerance})'
+            )
+
+    def end_to_end(
+            self,
+            template_dir,
+            num_iters=5,
+            quality_score_threshold=0.5,
+            reload_perf_delta_tolerance=0.0,
+            export_perf_delta_tolerance=0.0005,
+            pot_perf_delta_tolerance=0.1,
+            nncf_perf_delta_tolerance=0.1,
+            task_type=TaskType.DETECTION):
+
+        hyper_parameters, model_template = self.setup_configurable_parameters(
+            template_dir, num_iters=num_iters)
+        detection_environment, dataset = self.init_environment(
+            hyper_parameters, model_template, 250, task_type=task_type)
+
+        val_dataset = dataset.get_subset(Subset.VALIDATION)
+        task = OTEDetectionTrainingTask(task_environment=detection_environment)
+        self.addCleanup(task._delete_scratch_space)
+
+        print('Task initialized, model training starts.')
+        # Train the task.
+        # train_task checks that the task returns an Model and that
+        # validation f-measure is higher than the threshold, which is a pretty low bar
+        # considering that the dataset is so easy
+        output_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+            _id=ObjectId())
+        task.train(dataset, output_model)
+
+        # Test that output model is valid.
+        modelinfo = torch.load(io.BytesIO(output_model.get_data("weights.pth")))
+        modelinfo.pop('anchors', None)
+        self.assertEqual(list(modelinfo.keys()), ['model', 'config', 'confidence_threshold', 'VERSION'])
+
+        # Run inference.
+        validation_performance = self.eval(task, output_model, val_dataset)
+        print(f'Performance: {validation_performance.score.value:.4f}')
+        self.assertGreater(validation_performance.score.value, quality_score_threshold,
+            f'Expected F-measure to be higher than {quality_score_threshold}')
+
+        # Run another training round.
+        first_model = output_model
+        new_model = ModelEntity(
+            dataset,
+            detection_environment.get_model_configuration(),
+            _id=ObjectId())
+        task._hyperparams.learning_parameters.num_iters = 1
+        task.train(dataset, new_model)
+        self.assertNotEqual(first_model, new_model)
+        self.assertNotEqual(first_model.get_data("weights.pth"), new_model.get_data("weights.pth"))
+
+        # Reload task with the first model.
+        detection_environment.model = first_model
+        task = OTEDetectionTrainingTask(detection_environment)
+        self.assertEqual(task._task_environment.model.id, first_model.id)
+
+        print('Reevaluating model.')
+        # Performance should be the same after reloading
+        performance_after_reloading = self.eval(task, output_model, val_dataset)
+        print(f'Performance after reloading: {performance_after_reloading.score.value:.4f}')
+        self.check_threshold(validation_performance, performance_after_reloading, reload_perf_delta_tolerance,
+            'Too big performance difference after model reload.')
+
+        if isinstance(task, IExportTask):
+            # Run export.
+            exported_model = ModelEntity(
+                dataset,
+                detection_environment.get_model_configuration(),
+                _id=ObjectId())
+            task.export(ExportType.OPENVINO, exported_model)
+            self.assertEqual(exported_model.model_format, ModelFormat.OPENVINO)
+            self.assertEqual(exported_model.optimization_type, ModelOptimizationType.MO)
+
+            # Create OpenVINO Task and evaluate the model.
+            detection_environment.model = exported_model
+            ov_task = OpenVINODetectionTask(detection_environment)
+            predicted_validation_dataset = ov_task.infer(val_dataset.with_empty_annotations())
+            resultset = ResultSetEntity(
+                model=output_model,
+                ground_truth_dataset=val_dataset,
+                prediction_dataset=predicted_validation_dataset,
+            )
+            ov_task.evaluate(resultset)
+            export_performance = resultset.performance
+            assert export_performance is not None
+            print(f'Performance of exported model: {export_performance.score.value:.4f}')
+            self.check_threshold(validation_performance, export_performance, export_perf_delta_tolerance,
+                'Too big performance difference after OpenVINO export.')
+
+            # Run POT optimization and evaluate the result.
+            print('Run POT optimization.')
+            optimized_model = ModelEntity(
+                dataset,
+                detection_environment.get_model_configuration(),
+            )
+            ov_task.optimize(OptimizationType.POT, dataset, optimized_model, OptimizationParameters())
+            pot_performance = self.eval(ov_task, optimized_model, val_dataset)
+            print(f'Performance of optimized model: {pot_performance.score.value:.4f}')
+            self.check_threshold(validation_performance, pot_performance, pot_perf_delta_tolerance,
+                'Too big performance difference after POT optimization.')
+
+        if model_template.entrypoints.nncf:
+            if is_nncf_enabled():
+                print('Run NNCF optimization.')
+                nncf_model = ModelEntity(
+                    dataset,
+                    detection_environment.get_model_configuration(),
+                )
+                nncf_model.set_data('weights.pth', output_model.get_data("weights.pth"))
+
+                detection_environment.model = nncf_model
+
+                nncf_task = OTEDetectionNNCFTask(task_environment=detection_environment)
+
+                nncf_task.optimize(OptimizationType.NNCF, dataset, nncf_model, OptimizationParameters())
+                nncf_task.save_model(nncf_model)
+                nncf_performance = self.eval(nncf_task, nncf_model, val_dataset)
+
+                print(f'Performance of NNCF model: {nncf_performance.score.value:.4f}')
+                self.check_threshold(validation_performance, nncf_performance, nncf_perf_delta_tolerance,
+                    'Too big performance difference after NNCF optimization.')
+            else:
+                print('Skipped test of OTEDetectionNNCFTask. Required NNCF module.')
+
+    @e2e_pytest_api
+    def test_training_gen3_ssd(self):
+        self.end_to_end(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_SSD'))
+
+    @e2e_pytest_api
+    def test_training_gen3_atss(self):
+        self.end_to_end(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_mobilenetV2_ATSS'))
+
+    @e2e_pytest_api
+    def test_training_gen3_vfnet(self):
+        self.end_to_end(osp.join('configs', 'ote', 'custom-object-detection', 'gen3_resnet50_VFNet'),
+            export_perf_delta_tolerance=0.01)
+
+    @e2e_pytest_api
+    def test_training_yolox(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'custom-object-detection', 'cspdarknet_YOLOX'))
+
+    @e2e_pytest_api
+    def test_training_pers_det_0200(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'person-detection', 'person-detection-0200'))
+
+    @e2e_pytest_api
+    def test_training_pers_det_0201(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'person-detection', 'person-detection-0201'))
+
+    @e2e_pytest_api
+    def test_training_pers_det_0202(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'person-detection', 'person-detection-0202'))
+
+    @e2e_pytest_api
+    def test_training_pers_det_0203(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'person-detection', 'person-detection-0203'))
+
+    @e2e_pytest_api
+    def test_training_pers_det_0301(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'person-detection', 'person-detection-0301'))
+
+    @e2e_pytest_api
+    def test_training_pers_det_0302(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'person-detection', 'person-detection-0302'))
+
+    @e2e_pytest_api
+    def test_training_pers_det_0303(self):
+        self.end_to_end(
+            osp.join('configs', 'ote', 'person-detection', 'person-detection-0303'))
+
+
+    @e2e_pytest_api
+    def test_training_maskrcnn_resnet50(self):
+        self.end_to_end(osp.join('configs', 'ote',
+                        'custom-counting-instance-seg', 'resnet50_maskrcnn'),
+                        task_type=TaskType.INSTANCE_SEGMENTATION)
+
+    @e2e_pytest_api
+    def test_training_maskrcnn_efficientnetb2b(self):
+        self.end_to_end(osp.join('configs', 'ote',
+                        'custom-counting-instance-seg', 'efficientnetb2b_maskrcnn'),
+                        task_type=TaskType.INSTANCE_SEGMENTATION)