From 7ecdcf182b1bfae2d29e0f256e025743dd8d198e Mon Sep 17 00:00:00 2001
From: Ilya Kochankov <33292483+KochankovID@users.noreply.github.com>
Date: Mon, 20 Jul 2020 11:52:47 +0300
Subject: [PATCH] [Datumaro] Reducing nesting of tests (#1875)

* Add `Dataset.from_iterable` constructor
* Simplify creation of `Dataset` objects in common simple cases
* Refactor tests
---
 datumaro/datumaro/components/extractor.py |  52 ++
 datumaro/datumaro/components/project.py   |  32 +-
 datumaro/tests/test_coco_format.py        | 837 ++++++++++------------
 datumaro/tests/test_cvat_format.py        | 435 ++++++-----
 datumaro/tests/test_datumaro_format.py    | 125 ++--
 datumaro/tests/test_labelme_format.py     | 333 ++++-----
 datumaro/tests/test_mot_format.py         | 202 +++---
 datumaro/tests/test_tfrecord_format.py    | 226 +++---
 datumaro/tests/test_transforms.py         | 526 +++++++-------
 datumaro/tests/test_yolo_format.py        | 173 ++---
 10 files changed, 1363 insertions(+), 1578 deletions(-)

diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py
index fe4d897b6cc..3180665f5ab 100644
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@@ -72,6 +72,33 @@ def __eq__(self, other):
 class LabelCategories(Categories):
     Category = namedtuple('Category', ['name', 'parent', 'attributes'])
 
+    @classmethod
+    def from_iterable(cls, iterable):
+        """Generation of LabelCategories from iterable object
+
+        Args:
+            iterable ([type]): This iterable object can be:
+            1)simple str - will generate one Category with str as name
+            2)list of str - will interpreted as list of Category names
+            3)list of positional argumetns - will generate Categories
+            with this arguments
+
+
+        Returns:
+            LabelCategories: LabelCategories object
+        """
+        temp_categories = cls()
+
+        if isinstance(iterable, str):
+            iterable = [[iterable]]
+
+        for category in iterable:
+            if isinstance(category, str):
+                category = [category]
+            temp_categories.add(*category)
+
+        return temp_categories
+
     def __init__(self, items=None, attributes=None):
         super().__init__(attributes=attributes)
 
@@ -482,6 +509,31 @@ def iou(self, other):
 class PointsCategories(Categories):
     Category = namedtuple('Category', ['labels', 'joints'])
 
+    @classmethod
+    def from_iterable(cls, iterable):
+        """Generation of PointsCategories from iterable object
+
+        Args:
+            iterable ([type]): This iterable object can be:
+            1)simple int - will generate one Category with int as label
+            2)list of int - will interpreted as list of Category labels
+            3)list of positional argumetns - will generate Categories
+            with this arguments
+
+        Returns:
+            PointsCategories: PointsCategories object
+        """
+        temp_categories = cls()
+
+        if isinstance(iterable, int):
+            iterable = [[iterable]]
+
+        for category in iterable:
+            if isinstance(category, int):
+                category = [category]
+            temp_categories.add(*category)
+        return temp_categories
+
     def __init__(self, items=None, attributes=None):
         super().__init__(attributes=attributes)
 
diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py
index a1cd4919ff5..9ee38839737 100644
--- a/datumaro/datumaro/components/project.py
+++ b/datumaro/datumaro/components/project.py
@@ -18,7 +18,8 @@
 from datumaro.components.config import Config, DEFAULT_FORMAT
 from datumaro.components.config_model import (Model, Source,
     PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA)
-from datumaro.components.extractor import Extractor
+from datumaro.components.extractor import Extractor, LabelCategories,\
+    AnnotationType
 from datumaro.components.launcher import ModelTransform
 from datumaro.components.dataset_filter import \
     XPathDatasetFilter, XPathAnnotationsFilter
@@ -319,6 +320,35 @@ def categories(self):
         return self._parent.categories()
 
 class Dataset(Extractor):
+    @classmethod
+    def from_iterable(cls, iterable, categories=None):
+        """Generation of Dataset from iterable object
+
+        Args:
+            iterable: Iterable object contains DatasetItems
+            categories (dict, optional): You can pass dict of categories or
+            you can pass list of names. It'll interpreted as list of names of
+            LabelCategories. Defaults to {}.
+
+        Returns:
+            Dataset: Dataset object
+        """
+
+        if isinstance(categories, list):
+            categories = {AnnotationType.label : LabelCategories.from_iterable(categories)}
+
+        if not categories:
+            categories = {}
+
+        class tmpExtractor(Extractor):
+            def __iter__(self):
+                return iter(iterable)
+
+            def categories(self):
+                return categories
+
+        return cls.from_extractors(tmpExtractor())
+
     @classmethod
     def from_extractors(cls, *sources):
         # merge categories
diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py
index 129d64e7882..3c50996e051 100644
--- a/datumaro/tests/test_coco_format.py
+++ b/datumaro/tests/test_coco_format.py
@@ -4,7 +4,7 @@
 
 from unittest import TestCase
 
-from datumaro.components.project import Project
+from datumaro.components.project import (Project, Dataset)
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
     LabelCategories, PointsCategories
@@ -26,32 +26,26 @@
 
 class CocoImporterTest(TestCase):
     def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
-                        subset='val', attributes={'id': 1},
-                        annotations=[
-                            Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
-                                id=1, group=1, attributes={'is_crowd': False}),
-                            Mask(np.array(
-                                [[1, 0, 0, 1, 0]] * 5 +
-                                [[1, 1, 1, 1, 0]] * 5
-                                ), label=0,
-                                id=2, group=2, attributes={'is_crowd': True}),
-                        ]
-                    ),
-                ])
 
-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('TEST')
-                return { AnnotationType.label: label_cat }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
+                subset='val', attributes={'id': 1},
+                annotations=[
+                    Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0,
+                        id=1, group=1, attributes={'is_crowd': False}),
+                    Mask(np.array(
+                        [[1, 0, 0, 1, 0]] * 5 +
+                        [[1, 1, 1, 1, 0]] * 5
+                        ), label=0,
+                        id=2, group=2, attributes={'is_crowd': True}),
+                ]
+            ),
+        ], categories=['TEST',])
 
         dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \
             .make_dataset()
 
-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)
 
     def test_can_detect(self):
         self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR))
@@ -71,526 +65,417 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
         compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
 
     def test_can_save_and_load_captions(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        annotations=[
-                            Caption('hello', id=1, group=1),
-                            Caption('world', id=2, group=2),
-                        ], attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train',
-                        annotations=[
-                            Caption('test', id=3, group=3),
-                        ], attributes={'id': 2}),
-
-                    DatasetItem(id=3, subset='val',
-                        annotations=[
-                            Caption('word', id=1, group=1),
-                        ], attributes={'id': 1}
-                    ),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                annotations=[
+                    Caption('hello', id=1, group=1),
+                    Caption('world', id=2, group=2),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train',
+                annotations=[
+                    Caption('test', id=3, group=3),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='val',
+                annotations=[
+                    Caption('word', id=1, group=1),
+                ], attributes={'id': 1}),
+            ])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                 CocoCaptionsConverter.convert, test_dir)
 
     def test_can_save_and_load_instances(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-        categories = { AnnotationType.label: label_categories }
-
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            # Bbox + single polygon
-                            Bbox(0, 1, 2, 2,
-                                label=2, group=1, id=1,
-                                attributes={ 'is_crowd': False }),
-                            Polygon([0, 1, 2, 1, 2, 3, 0, 3],
-                                attributes={ 'is_crowd': False },
-                                label=2, group=1, id=1),
-                        ], attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            # Mask + bbox
-                            Mask(np.array([
-                                    [0, 1, 0, 0],
-                                    [0, 1, 0, 0],
-                                    [0, 1, 1, 1],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                            Bbox(1, 0, 2, 2, label=4, group=3, id=3,
-                                attributes={ 'is_crowd': True }),
-                        ], attributes={'id': 2}),
-
-                    DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            # Bbox + mask
-                            Bbox(0, 1, 2, 2, label=4, group=3, id=3,
-                                attributes={ 'is_crowd': True }),
-                            Mask(np.array([
-                                    [0, 0, 0, 0],
-                                    [1, 1, 1, 0],
-                                    [1, 1, 0, 0],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                        ], attributes={'id': 1}),
-                ])
-
-            def categories(self):
-                return categories
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            Polygon([0, 1, 2, 1, 2, 3, 0, 3],
-                                attributes={ 'is_crowd': False },
-                                label=2, group=1, id=1),
-                        ], attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 1, 0, 0],
-                                    [0, 1, 0, 0],
-                                    [0, 1, 1, 1],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                        ], attributes={'id': 2}),
-
-                    DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 0, 0],
-                                    [1, 1, 1, 0],
-                                    [1, 1, 0, 0],
-                                    [0, 0, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=4, group=3, id=3),
-                        ], attributes={'id': 1}),
-                ])
-
-            def categories(self):
-                return categories
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    # Bbox + single polygon
+                    Bbox(0, 1, 2, 2,
+                        label=2, group=1, id=1,
+                        attributes={ 'is_crowd': False }),
+                    Polygon([0, 1, 2, 1, 2, 3, 0, 3],
+                        attributes={ 'is_crowd': False },
+                        label=2, group=1, id=1),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    # Mask + bbox
+                    Mask(np.array([
+                            [0, 1, 0, 0],
+                            [0, 1, 0, 0],
+                            [0, 1, 1, 1],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                    Bbox(1, 0, 2, 2, label=4, group=3, id=3,
+                        attributes={ 'is_crowd': True }),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
+                annotations=[
+                    # Bbox + mask
+                    Bbox(0, 1, 2, 2, label=4, group=3, id=3,
+                        attributes={ 'is_crowd': True }),
+                    Mask(np.array([
+                            [0, 0, 0, 0],
+                            [1, 1, 1, 0],
+                            [1, 1, 0, 0],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                ], attributes={'id': 1}),
+            ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Polygon([0, 1, 2, 1, 2, 3, 0, 3],
+                        attributes={ 'is_crowd': False },
+                        label=2, group=1, id=1),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 1, 0, 0],
+                            [0, 1, 0, 0],
+                            [0, 1, 1, 1],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 0, 0],
+                            [1, 1, 1, 0],
+                            [1, 1, 0, 0],
+                            [0, 0, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=4, group=3, id=3),
+                ], attributes={'id': 1})
+            ], categories=[str(i) for i in range(10)])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(source_dataset,
                 CocoInstancesConverter.convert, test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
 
     def test_can_merge_polygons_on_loading(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-        categories = { AnnotationType.label: label_categories }
-
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, id=4, group=4),
-                            Polygon([5, 0, 9, 0, 5, 5],
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return categories
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-                                [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-                                [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                                [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
-                                # only internal fragment (without the border),
-                                # but not everywhere...
-                            ),
-                            label=3, id=4, group=4,
-                            attributes={ 'is_crowd': False }),
-                        ], attributes={'id': 1}
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, id=4, group=4),
+                    Polygon([5, 0, 9, 0, 5, 5],
+                        label=3, id=4, group=4),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                        [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                        [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+                        [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+                        # only internal fragment (without the border),
+                        # but not everywhere...
                     ),
-                ])
-
-            def categories(self):
-                return categories
+                    label=3, id=4, group=4,
+                    attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                 CocoInstancesConverter.convert, test_dir,
                 importer_args={'merge_instance_polygons': True},
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
 
     def test_can_crop_covered_segments(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-
-        class SrcTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 1, 1, 1],
-                                    [1, 1, 0, 1, 1],
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                label=2, id=1, z_order=0),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                label=1, id=2, z_order=1),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=2, id=1, group=1),
-
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                label=1, id=2, group=2,
-                                attributes={ 'is_crowd': False }),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 1, 1, 1],
+                            [1, 1, 0, 1, 1],
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        label=2, id=1, z_order=0),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        label=1, id=2, z_order=1),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=2, id=1, group=1),
+
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        label=1, id=2, group=2,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcTestExtractor(),
-                partial(CocoInstancesConverter.convert, crop_covered=True),
-                test_dir, target_dataset=DstTestExtractor())
+            self._test_save_and_load(source_dataset,
+                 partial(CocoInstancesConverter.convert, crop_covered=True),
+                 test_dir, target_dataset=target_dataset)
 
     def test_can_convert_polygons_to_mask(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-
-        class SrcTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, id=4, group=4),
-                            Polygon([5, 0, 9, 0, 5, 5],
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstTestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-                                    [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-                                    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
-                                    # only internal fragment (without the border),
-                                    # but not everywhere...
-                                ),
-                                attributes={ 'is_crowd': True },
-                                label=3, id=4, group=4),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, id=4, group=4),
+                    Polygon([5, 0, 9, 0, 5, 5],
+                        label=3, id=4, group=4),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((6, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                            [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+                            [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+                            # only internal fragment (without the border),
+                            # but not everywhere...
+                        ),
+                        attributes={ 'is_crowd': True },
+                        label=3, id=4, group=4),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcTestExtractor(),
+            self._test_save_and_load(source_dataset,
                 partial(CocoInstancesConverter.convert, segmentation_mode='mask'),
-                test_dir, target_dataset=DstTestExtractor())
+                test_dir, target_dataset=target_dataset)
 
     def test_can_convert_masks_to_polygons(self):
-        label_categories = LabelCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
-                                    [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
-                                    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                ]),
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon(
-                                [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
-                                label=3, id=4, group=4,
-                                attributes={ 'is_crowd': False }),
-                            Polygon(
-                                [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
-                                label=3, id=4, group=4,
-                                attributes={ 'is_crowd': False }),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
+                            [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
+                            [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        ]),
+                        label=3, id=4, group=4),
+                ]
+            ),
+        ], categories=[str(i) for i in range(10)])
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon(
+                        [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
+                        label=3, id=4, group=4,
+                        attributes={ 'is_crowd': False }),
+                    Polygon(
+                        [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
+                        label=3, id=4, group=4,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}
+            ),
+        ], categories=[str(i) for i in range(10)])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                 partial(CocoInstancesConverter.convert, segmentation_mode='polygons'),
-                test_dir, target_dataset=DstExtractor())
+                test_dir,
+                target_dataset=target_dataset)
 
     def test_can_save_and_load_images(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', attributes={'id': 1}),
-                    DatasetItem(id=2, subset='train', attributes={'id': 2}),
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', attributes={'id': 2}),
 
-                    DatasetItem(id=2, subset='val', attributes={'id': 2}),
-                    DatasetItem(id=3, subset='val', attributes={'id': 3}),
-                    DatasetItem(id=4, subset='val', attributes={'id': 4}),
+            DatasetItem(id=2, subset='val', attributes={'id': 2}),
+            DatasetItem(id=3, subset='val', attributes={'id': 3}),
+            DatasetItem(id=4, subset='val', attributes={'id': 4}),
 
-                    DatasetItem(id=5, subset='test', attributes={'id': 1}),
-                ])
+            DatasetItem(id=5, subset='test', attributes={'id': 1}),
+        ])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                 CocoImageInfoConverter.convert, test_dir)
 
     def test_can_save_and_load_labels(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        annotations=[
-                            Label(4, id=1, group=1),
-                            Label(9, id=2, group=2),
-                        ], attributes={'id': 1}
-                    ),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add(str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                annotations=[
+                    Label(4, id=1, group=1),
+                    Label(9, id=2, group=2),
+                ], attributes={'id': 1}),
+        ], categories=[str(i) for i in range(10)])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                 CocoLabelsConverter.convert, test_dir)
 
     def test_can_save_and_load_keypoints(self):
-        label_categories = LabelCategories()
-        points_categories = PointsCategories()
-        for i in range(10):
-            label_categories.add(str(i))
-            points_categories.add(i, joints=[[0, 1], [1, 2]])
-        categories = {
-            AnnotationType.label: label_categories,
-            AnnotationType.points: points_categories,
-        }
-
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            # Full instance annotations: polygon + keypoints
-                            Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
-                                label=3, group=1, id=1),
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, group=1, id=1),
-
-                            # Full instance annotations: bbox + keypoints
-                            Points([1, 2, 3, 4, 2, 3], group=2, id=2),
-                            Bbox(1, 2, 2, 2, group=2, id=2),
-
-                            # Solitary keypoints
-                            Points([1, 2, 0, 2, 4, 1], label=5, id=3),
-
-                            # Some other solitary annotations (bug #1387)
-                            Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
-
-                            # Solitary keypoints with no label
-                            Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
-                        ])
-                ])
-
-            def categories(self):
-                return categories
-
-        class DstTestExtractor(TestExtractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
-                                label=3, group=1, id=1,
-                                attributes={'is_crowd': False}),
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=3, group=1, id=1,
-                                attributes={'is_crowd': False}),
-
-                            Points([1, 2, 3, 4, 2, 3],
-                                group=2, id=2,
-                                attributes={'is_crowd': False}),
-                            Polygon([1, 2, 3, 2, 3, 4, 1, 4],
-                                group=2, id=2,
-                                attributes={'is_crowd': False}),
-
-                            Points([1, 2, 0, 2, 4, 1],
-                                label=5, group=3, id=3,
-                                attributes={'is_crowd': False}),
-                            Polygon([0, 1, 4, 1, 4, 2, 0, 2],
-                                label=5, group=3, id=3,
-                                attributes={'is_crowd': False}),
-
-                            Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
-                                group=5, id=5,
-                                attributes={'is_crowd': False}),
-                            Polygon([1, 2, 3, 2, 3, 4, 1, 4],
-                                group=5, id=5,
-                                attributes={'is_crowd': False}),
-                        ], attributes={'id': 1}),
-                ])
+
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
+                annotations=[
+                    # Full instance annotations: polygon + keypoints
+                    Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
+                        label=3, group=1, id=1),
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, group=1, id=1),
+
+                    # Full instance annotations: bbox + keypoints
+                    Points([1, 2, 3, 4, 2, 3], group=2, id=2),
+                    Bbox(1, 2, 2, 2, group=2, id=2),
+
+                    # Solitary keypoints
+                    Points([1, 2, 0, 2, 4, 1], label=5, id=3),
+
+                    # Some other solitary annotations (bug #1387)
+                    Polygon([0, 0, 4, 0, 4, 4], label=3, id=4),
+
+                    # Solitary keypoints with no label
+                    Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
+                ]),
+            ], categories={
+                    AnnotationType.label: LabelCategories.from_iterable(
+                        str(i) for i in range(10)),
+                    AnnotationType.points: PointsCategories.from_iterable(
+                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
+                    ),
+            })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
+                        label=3, group=1, id=1,
+                        attributes={'is_crowd': False}),
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=3, group=1, id=1,
+                        attributes={'is_crowd': False}),
+
+                    Points([1, 2, 3, 4, 2, 3],
+                        group=2, id=2,
+                        attributes={'is_crowd': False}),
+                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+                        group=2, id=2,
+                        attributes={'is_crowd': False}),
+
+                    Points([1, 2, 0, 2, 4, 1],
+                        label=5, group=3, id=3,
+                        attributes={'is_crowd': False}),
+                    Polygon([0, 1, 4, 1, 4, 2, 0, 2],
+                        label=5, group=3, id=3,
+                        attributes={'is_crowd': False}),
+
+                    Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
+                        group=5, id=5,
+                        attributes={'is_crowd': False}),
+                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+                        group=5, id=5,
+                        attributes={'is_crowd': False}),
+                ], attributes={'id': 1}),
+            ], categories={
+                    AnnotationType.label: LabelCategories.from_iterable(
+                        str(i) for i in range(10)),
+                    AnnotationType.points: PointsCategories.from_iterable(
+                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
+                    ),
+            })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(source_dataset,
                 CocoPersonKeypointsConverter.convert, test_dir,
-                target_dataset=DstTestExtractor())
+                target_dataset=target_dataset)
 
     def test_can_save_dataset_with_no_subsets(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, attributes={'id': 1}),
-                    DatasetItem(id=2, attributes={'id': 2}),
-                ])
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, attributes={'id': 1}),
+            DatasetItem(id=2, attributes={'id': 2}),
+        ])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                 CocoConverter.convert, test_dir)
 
     def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
-                        attributes={'id': 1}),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)),
+                attributes={'id': 1}),
+        ])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                 CocoImageInfoConverter.convert, test_dir)
 
     def test_relative_paths(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3)),
-                        attributes={'id': 1}),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
-                        attributes={'id': 2}),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
-                        attributes={'id': 3}),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3)),
+                attributes={'id': 1}),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
+                attributes={'id': 2}),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
+                attributes={'id': 3}),
+        ])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
-                partial(CocoImageInfoConverter.convert, save_images=True),
-                test_dir)
+            self._test_save_and_load(expected_dataset,
+                partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
 
     def test_preserve_coco_ids(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
-                        attributes={'id': 40}),
-                ])
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
+                attributes={'id': 40}),
+        ])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
-                partial(CocoImageInfoConverter.convert, save_images=True),
-                test_dir)
+            self._test_save_and_load(expected_dataset,
+                partial(CocoImageInfoConverter.convert, save_images=True), test_dir)
 
     def test_annotation_attributes(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
-                        Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
-                            attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
-                    ], attributes={'id': 1})
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add(str(i))
-                return { AnnotationType.label: label_categories, }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[
+                Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1,
+                    attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}),
+            ], attributes={'id': 1})
+        ], categories=[str(i) for i in range(10)])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                 CocoConverter.convert, test_dir)
diff --git a/datumaro/tests/test_cvat_format.py b/datumaro/tests/test_cvat_format.py
index 9f2622034eb..5c246ff4843 100644
--- a/datumaro/tests/test_cvat_format.py
+++ b/datumaro/tests/test_cvat_format.py
@@ -3,7 +3,7 @@
 import os.path as osp
 
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Points, Polygon, PolyLine, Bbox, Label,
     LabelCategories,
@@ -28,121 +28,115 @@ def test_can_detect_video(self):
         self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR))
 
     def test_can_load_image(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='img0', subset='train',
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=0, z_order=1,
-                                attributes={
-                                    'occluded': True,
-                                    'a1': True, 'a2': 'v3'
-                                }),
-                            PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
-                                attributes={'occluded': False}),
-                        ], attributes={'frame': 0}),
-                    DatasetItem(id='img1', subset='train',
-                        image=np.ones((10, 10, 3)),
-                        annotations=[
-                            Polygon([1, 2, 3, 4, 6, 5], z_order=1,
-                                attributes={'occluded': False}),
-                            Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
-                                attributes={'occluded': False}),
-                        ], attributes={'frame': 1}),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                label_categories.add('label1', attributes={'a1', 'a2'})
-                label_categories.add('label2')
-                return { AnnotationType.label: label_categories }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='img0', subset='train',
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=0, z_order=1,
+                        attributes={
+                            'occluded': True,
+                            'a1': True, 'a2': 'v3'
+                        }),
+                    PolyLine([1, 2, 3, 4, 5, 6, 7, 8],
+                        attributes={'occluded': False}),
+                ], attributes={'frame': 0}),
+            DatasetItem(id='img1', subset='train',
+                image=np.ones((10, 10, 3)),
+                annotations=[
+                    Polygon([1, 2, 3, 4, 6, 5], z_order=1,
+                        attributes={'occluded': False}),
+                    Points([1, 2, 3, 4, 5, 6], label=1, z_order=2,
+                        attributes={'occluded': False}),
+                ], attributes={'frame': 1}),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                ['label1', '', {'a1', 'a2'}],
+                ['label2'],
+            ])
+        })
 
         parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset()
 
-        compare_datasets(self, DstExtractor(), parsed_dataset)
+        compare_datasets(self, expected_dataset, parsed_dataset)
 
     def test_can_load_video(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='frame_000010', subset='annotations',
-                        image=np.ones((20, 25, 3)),
-                        annotations=[
-                            Bbox(3, 4, 7, 1, label=2,
-                                id=0,
-                                attributes={
-                                    'occluded': True,
-                                    'outside': False, 'keyframe': True,
-                                    'track_id': 0
-                                }),
-                            Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
-                                label=0,
-                                id=1,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': False, 'keyframe': True,
-                                    'track_id': 1, 'hgl': 'hgkf',
-                                }),
-                        ], attributes={'frame': 10}),
-                    DatasetItem(id='frame_000013', subset='annotations',
-                        image=np.ones((20, 25, 3)),
-                        annotations=[
-                            Bbox(7, 6, 7, 2, label=2,
-                                id=0,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 0
-                                }),
-                            Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
-                                label=0,
-                                id=1,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 1, 'hgl': 'jk',
-                                }),
-                            PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
-                                label=2,
-                                id=2,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': False, 'keyframe': True,
-                                    'track_id': 2,
-                                }),
-                        ], attributes={'frame': 13}),
-                    DatasetItem(id='frame_000016', subset='annotations',
-                        image=Image(path='frame_0000016.png', size=(20, 25)),
-                        annotations=[
-                            Bbox(8, 7, 6, 10, label=2,
-                                id=0,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 0
-                                }),
-                            PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
-                                label=2,
-                                id=2,
-                                attributes={
-                                    'occluded': False,
-                                    'outside': True, 'keyframe': True,
-                                    'track_id': 2,
-                                }),
-                        ], attributes={'frame': 16}),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                label_categories.add('klhg', attributes={'hgl'})
-                label_categories.add('z U k')
-                label_categories.add('II')
-                return { AnnotationType.label: label_categories }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='frame_000010', subset='annotations',
+                image=np.ones((20, 25, 3)),
+                annotations=[
+                    Bbox(3, 4, 7, 1, label=2,
+                        id=0,
+                        attributes={
+                            'occluded': True,
+                            'outside': False, 'keyframe': True,
+                            'track_id': 0
+                        }),
+                    Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16],
+                        label=0,
+                        id=1,
+                        attributes={
+                            'occluded': False,
+                            'outside': False, 'keyframe': True,
+                            'track_id': 1, 'hgl': 'hgkf',
+                        }),
+                ], attributes={'frame': 10}),
+            DatasetItem(id='frame_000013', subset='annotations',
+                image=np.ones((20, 25, 3)),
+                annotations=[
+                    Bbox(7, 6, 7, 2, label=2,
+                        id=0,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 0
+                        }),
+                    Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16],
+                        label=0,
+                        id=1,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 1, 'hgl': 'jk',
+                        }),
+                    PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
+                        label=2,
+                        id=2,
+                        attributes={
+                            'occluded': False,
+                            'outside': False, 'keyframe': True,
+                            'track_id': 2,
+                        }),
+                ], attributes={'frame': 13}),
+            DatasetItem(id='frame_000016', subset='annotations',
+                image=Image(path='frame_0000016.png', size=(20, 25)),
+                annotations=[
+                    Bbox(8, 7, 6, 10, label=2,
+                        id=0,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 0
+                        }),
+                    PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21],
+                        label=2,
+                        id=2,
+                        attributes={
+                            'occluded': False,
+                            'outside': True, 'keyframe': True,
+                            'track_id': 2,
+                        }),
+                ], attributes={'frame': 16}),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                ['klhg', '', {'hgl'}],
+                ['z U k'],
+                ['II']
+            ]),
+        })
 
         parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset()
 
-        compare_datasets(self, DstExtractor(), parsed_dataset)
+        compare_datasets(self, expected_dataset, parsed_dataset)
 
 class CvatConverterTest(TestCase):
     def _test_save_and_load(self, source_dataset, converter, test_dir,
@@ -165,137 +159,120 @@ def test_can_save_and_load(self):
         label_categories.items[2].attributes.update(['a1', 'a2'])
         label_categories.attributes.update(['occluded'])
 
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=1, group=4,
-                                attributes={ 'occluded': True }),
-                            Points([1, 1, 3, 2, 2, 3],
-                                label=2,
-                                attributes={ 'a1': 'x', 'a2': 42,
-                                    'unknown': 'bar' }),
-                            Label(1),
-                            Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
-                        ]
-                    ),
-                    DatasetItem(id=1, subset='s1',
-                        annotations=[
-                            PolyLine([0, 0, 4, 0, 4, 4],
-                                label=3, id=4, group=4),
-                            Bbox(5, 0, 1, 9,
-                                label=3, id=4, group=4),
-                        ]
-                    ),
-
-                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4], z_order=1,
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                            PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
-                        ]
-                    ),
-
-                    DatasetItem(id=3, subset='s3', image=Image(
-                        path='3.jpg', size=(2, 4))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4],
-                                label=1, group=4,
-                                attributes={ 'occluded': True }),
-                            Points([1, 1, 3, 2, 2, 3],
-                                label=2,
-                                attributes={ 'occluded': False,
-                                    'a1': 'x', 'a2': 42 }),
-                            Label(1),
-                            Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
-                        ], attributes={'frame': 0}
-                    ),
-                    DatasetItem(id=1, subset='s1',
-                        annotations=[
-                            PolyLine([0, 0, 4, 0, 4, 4],
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                            Bbox(5, 0, 1, 9,
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                        ], attributes={'frame': 1}
-                    ),
-
-                    DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4], z_order=1,
-                                label=3, group=4,
-                                attributes={ 'occluded': False }),
-                        ], attributes={'frame': 0}
-                    ),
-
-                    DatasetItem(id=3, subset='s3', image=Image(
-                            path='3.jpg', size=(2, 4)),
-                        attributes={'frame': 0}),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: label_categories }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=1, group=4,
+                        attributes={ 'occluded': True }),
+                    Points([1, 1, 3, 2, 2, 3],
+                        label=2,
+                        attributes={ 'a1': 'x', 'a2': 42,
+                            'unknown': 'bar' }),
+                    Label(1),
+                    Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
+                ]
+            ),
+            DatasetItem(id=1, subset='s1',
+                annotations=[
+                    PolyLine([0, 0, 4, 0, 4, 4],
+                        label=3, id=4, group=4),
+                    Bbox(5, 0, 1, 9,
+                        label=3, id=4, group=4),
+                ]
+            ),
+
+            DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4], z_order=1,
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                    PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label
+                ]
+            ),
+
+            DatasetItem(id=3, subset='s3', image=Image(
+                path='3.jpg', size=(2, 4))),
+        ], categories={
+            AnnotationType.label: label_categories,
+        })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4],
+                        label=1, group=4,
+                        attributes={ 'occluded': True }),
+                    Points([1, 1, 3, 2, 2, 3],
+                        label=2,
+                        attributes={ 'occluded': False,
+                            'a1': 'x', 'a2': 42 }),
+                    Label(1),
+                    Label(2, attributes={ 'a1': 'y', 'a2': 44 }),
+                ], attributes={'frame': 0}
+            ),
+            DatasetItem(id=1, subset='s1',
+                annotations=[
+                    PolyLine([0, 0, 4, 0, 4, 4],
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                    Bbox(5, 0, 1, 9,
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                ], attributes={'frame': 1}
+            ),
+
+            DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4], z_order=1,
+                        label=3, group=4,
+                        attributes={ 'occluded': False }),
+                ], attributes={'frame': 0}
+            ),
+
+            DatasetItem(id=3, subset='s3', image=Image(
+                    path='3.jpg', size=(2, 4)),
+                attributes={'frame': 0}),
+        ], categories={
+            AnnotationType.label: label_categories,
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                 partial(CvatConverter.convert, save_images=True), test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
 
     def test_relative_paths(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3))),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3)),
-                        attributes={'frame': 0}),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
-                        attributes={'frame': 1}),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
-                        attributes={'frame': 2}),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
+        ], categories={ AnnotationType.label: LabelCategories() })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3)),
+                attributes={'frame': 0}),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
+                attributes={'frame': 1}),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
+                attributes={'frame': 2}),
+        ], categories={
+            AnnotationType.label: LabelCategories()
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(source_dataset,
                 partial(CvatConverter.convert, save_images=True), test_dir,
-                target_dataset=DstExtractor())
+                target_dataset=target_dataset)
 
     def test_preserve_frame_ids(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
-                        attributes={'frame': 40}),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='some/name1', image=np.ones((4, 2, 3)),
+                attributes={'frame': 40}),
+        ], categories={
+            AnnotationType.label: LabelCategories()
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(expected_dataset,
                 CvatConverter.convert, test_dir)
diff --git a/datumaro/tests/test_datumaro_format.py b/datumaro/tests/test_datumaro_format.py
index 26e6fc88ddc..e67a1b90c12 100644
--- a/datumaro/tests/test_datumaro_format.py
+++ b/datumaro/tests/test_datumaro_format.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.project import Project
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Label, Mask, Points, Polygon,
@@ -32,82 +32,75 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
         compare_datasets_strict(self,
             expected=target_dataset, actual=parsed_dataset)
 
-    class TestExtractor(Extractor):
-        def __iter__(self):
-            return iter([
-                DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
-                    annotations=[
-                        Caption('hello', id=1),
-                        Caption('world', id=2, group=5),
-                        Label(2, id=3, attributes={
-                            'x': 1,
-                            'y': '2',
-                        }),
-                        Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
-                            'score': 1.0,
-                        }),
-                        Bbox(5, 6, 7, 8, id=5, group=5),
-                        Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
-                        Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
-                    ]),
-                DatasetItem(id=21, subset='train',
-                    annotations=[
-                        Caption('test'),
-                        Label(2),
-                        Bbox(1, 2, 3, 4, 5, id=42, group=42)
-                    ]),
-
-                DatasetItem(id=2, subset='val',
-                    annotations=[
-                        PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
-                        Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
-                    ]),
-
-                DatasetItem(id=42, subset='test',
-                    attributes={'a1': 5, 'a2': '42'}),
-
-                DatasetItem(id=42),
-                DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
-            ])
-
-        def categories(self):
-            label_categories = LabelCategories()
-            for i in range(5):
-                label_categories.add('cat' + str(i))
-
-            mask_categories = MaskCategories(
-                generate_colormap(len(label_categories.items)))
-
-            points_categories = PointsCategories()
-            for index, _ in enumerate(label_categories.items):
-                points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
-
-            return {
-                AnnotationType.label: label_categories,
-                AnnotationType.mask: mask_categories,
-                AnnotationType.points: points_categories,
-            }
+    label_categories = LabelCategories()
+    for i in range(5):
+        label_categories.add('cat' + str(i))
+
+    mask_categories = MaskCategories(
+        generate_colormap(len(label_categories.items)))
+
+    points_categories = PointsCategories()
+    for index, _ in enumerate(label_categories.items):
+        points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
+
+    test_dataset = Dataset.from_iterable([
+        DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
+            annotations=[
+                Caption('hello', id=1),
+                Caption('world', id=2, group=5),
+                Label(2, id=3, attributes={
+                    'x': 1,
+                    'y': '2',
+                }),
+                Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
+                    'score': 1.0,
+                }),
+                Bbox(5, 6, 7, 8, id=5, group=5),
+                Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
+                Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
+            ]),
+        DatasetItem(id=21, subset='train',
+            annotations=[
+                Caption('test'),
+                Label(2),
+                Bbox(1, 2, 3, 4, 5, id=42, group=42)
+            ]),
+
+        DatasetItem(id=2, subset='val',
+            annotations=[
+                PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
+                Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
+            ]),
+
+        DatasetItem(id=42, subset='test',
+            attributes={'a1': 5, 'a2': '42'}),
+
+        DatasetItem(id=42),
+        DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
+    ], categories={
+        AnnotationType.label: label_categories,
+        AnnotationType.mask: mask_categories,
+        AnnotationType.points: points_categories,
+    })
 
     def test_can_save_and_load(self):
         with TestDir() as test_dir:
-            self._test_save_and_load(self.TestExtractor(),
+            self._test_save_and_load(self.test_dataset,
                 partial(DatumaroConverter.convert, save_images=True), test_dir)
 
     def test_can_detect(self):
         with TestDir() as test_dir:
-            DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir)
+            DatumaroConverter.convert(self.test_dataset, save_dir=test_dir)
 
             self.assertTrue(DatumaroImporter.detect(test_dir))
 
     def test_relative_paths(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', image=np.ones((4, 2, 3))),
-                    DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
-                    DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
-                ])
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
+            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
+        ])
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                 partial(DatumaroConverter.convert, save_images=True), test_dir)
diff --git a/datumaro/tests/test_labelme_format.py b/datumaro/tests/test_labelme_format.py
index b3abd823d91..d40938bd8a7 100644
--- a/datumaro/tests/test_labelme_format.py
+++ b/datumaro/tests/test_labelme_format.py
@@ -3,7 +3,7 @@
 import os.path as osp
 
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, Mask, Polygon, LabelCategories
 )
@@ -29,101 +29,84 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
         compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
 
     def test_can_save_and_load(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, group=2),
-                            Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
-                                'occluded': True,
-                                'a1': 'qwe',
-                                'a2': True,
-                                'a3': 123,
-                            }),
-                            Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
-                                attributes={ 'username': 'test' }),
-                            Bbox(1, 2, 3, 4, group=3),
-                            Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
-                                attributes={ 'occluded': True }
-                            ),
-                        ]
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, group=2),
+                    Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={
+                        'occluded': True,
+                        'a1': 'qwe',
+                        'a2': True,
+                        'a3': 123,
+                    }),
+                    Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
+                        attributes={ 'username': 'test' }),
+                    Bbox(1, 2, 3, 4, group=3),
+                    Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3,
+                        attributes={ 'occluded': True }
                     ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=0, group=2, id=0,
-                                attributes={
-                                    'occluded': False, 'username': '',
-                                }
-                            ),
-                            Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
-                                attributes={
-                                    'occluded': True, 'username': '',
-                                    'a1': 'qwe',
-                                    'a2': True,
-                                    'a3': 123,
-                                }
-                            ),
-                            Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
-                                id=2, attributes={
-                                    'occluded': False, 'username': 'test'
-                                }
-                            ),
-                            Bbox(1, 2, 3, 4, group=1, id=3, attributes={
-                                'occluded': False, 'username': '',
-                            }),
-                            Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
-                                id=4, attributes={
-                                    'occluded': True, 'username': ''
-                                }
-                            ),
-                        ]
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=0, group=2, id=0,
+                        attributes={
+                            'occluded': False, 'username': '',
+                        }
                     ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('label_2')
-                label_cat.add('label_3')
-                return {
-                    AnnotationType.label: label_cat,
-                }
+                    Polygon([0, 4, 4, 4, 5, 6], label=1, id=1,
+                        attributes={
+                            'occluded': True, 'username': '',
+                            'a1': 'qwe',
+                            'a2': True,
+                            'a3': 123,
+                        }
+                    ),
+                    Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2,
+                        id=2, attributes={
+                            'occluded': False, 'username': 'test'
+                        }
+                    ),
+                    Bbox(1, 2, 3, 4, group=1, id=3, attributes={
+                        'occluded': False, 'username': '',
+                    }),
+                    Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1,
+                        id=4, attributes={
+                            'occluded': True, 'username': ''
+                        }
+                    ),
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                'label_2', 'label_3']),
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(
+                source_dataset,
                 partial(LabelMeConverter.convert, save_images=True),
-                test_dir, target_dataset=DstExtractor())
+                test_dir, target_dataset=target_dataset)
 
     def test_cant_save_dataset_with_relative_paths(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
+        ], categories={
+            AnnotationType.label: LabelCategories(),
+        })
 
         with self.assertRaisesRegex(Exception, r'only supports flat'):
             with TestDir() as test_dir:
-                self._test_save_and_load(SrcExtractor(),
-                    partial(LabelMeConverter.convert, save_images=True),
-                    test_dir)
+                self._test_save_and_load(expected_dataset,
+                    LabelMeConverter.convert, test_dir)
 
 
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
@@ -133,101 +116,91 @@ def test_can_detect(self):
         self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
 
     def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                img1 = np.ones((77, 102, 3)) * 255
-                img1[6:32, 7:41] = 0
-
-                mask1 = np.zeros((77, 102), dtype=int)
-                mask1[67:69, 58:63] = 1
-
-                mask2 = np.zeros((77, 102), dtype=int)
-                mask2[13:25, 54:71] = [
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
-                    [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
-                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                ]
-
-                return iter([
-                    DatasetItem(id='img1', image=img1,
-                        annotations=[
-                            Polygon([43, 34, 45, 34, 45, 37, 43, 37],
-                                label=0, id=0,
-                                attributes={
-                                    'occluded': False,
-                                    'username': 'admin'
-                                }
-                            ),
-                            Mask(mask1, label=1, id=1,
-                                attributes={
-                                    'occluded': False,
-                                    'username': 'brussell'
-                                }
-                            ),
-                            Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
-                                label=2, group=2, id=2,
-                                attributes={
-                                    'a1': True,
-                                    'occluded': True,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
-                                label=3, group=2, id=3,
-                                attributes={
-                                    'kj': True,
-                                    'occluded': False,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Bbox(13, 19, 10, 11, label=4, group=2, id=4,
-                                attributes={
-                                    'hg': True,
-                                    'occluded': True,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Mask(mask2, label=5, group=1, id=5,
-                                attributes={
-                                    'd': True,
-                                    'occluded': False,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                            Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
-                                label=6, group=1, id=6,
-                                attributes={
-                                    'gfd lkj lkj hi': True,
-                                    'occluded': False,
-                                    'username': 'anonymous'
-                                }
-                            ),
-                        ]
+        img1 = np.ones((77, 102, 3)) * 255
+        img1[6:32, 7:41] = 0
+
+        mask1 = np.zeros((77, 102), dtype=int)
+        mask1[67:69, 58:63] = 1
+
+        mask2 = np.zeros((77, 102), dtype=int)
+        mask2[13:25, 54:71] = [
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+            [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        ]
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id='img1', image=img1,
+                annotations=[
+                    Polygon([43, 34, 45, 34, 45, 37, 43, 37],
+                        label=0, id=0,
+                        attributes={
+                            'occluded': False,
+                            'username': 'admin'
+                        }
+                    ),
+                    Mask(mask1, label=1, id=1,
+                        attributes={
+                            'occluded': False,
+                            'username': 'brussell'
+                        }
+                    ),
+                    Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12],
+                        label=2, group=2, id=2,
+                        attributes={
+                            'a1': True,
+                            'occluded': True,
+                            'username': 'anonymous'
+                        }
+                    ),
+                    Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25],
+                        label=3, group=2, id=3,
+                        attributes={
+                            'kj': True,
+                            'occluded': False,
+                            'username': 'anonymous'
+                        }
                     ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('window')
-                label_cat.add('license plate')
-                label_cat.add('o1')
-                label_cat.add('q1')
-                label_cat.add('b1')
-                label_cat.add('m1')
-                label_cat.add('hg')
-                return {
-                    AnnotationType.label: label_cat,
-                }
+                    Bbox(13, 19, 10, 11, label=4, group=2, id=4,
+                        attributes={
+                            'hg': True,
+                            'occluded': True,
+                            'username': 'anonymous'
+                        }
+                    ),
+                    Mask(mask2, label=5, group=1, id=5,
+                        attributes={
+                            'd': True,
+                            'occluded': False,
+                            'username': 'anonymous'
+                        }
+                    ),
+                    Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22],
+                        label=6, group=1, id=6,
+                        attributes={
+                            'gfd lkj lkj hi': True,
+                            'occluded': False,
+                            'username': 'anonymous'
+                        }
+                    ),
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable([
+                'window', 'license plate', 'o1',
+                'q1', 'b1', 'm1', 'hg',
+            ]),
+        })
 
         parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \
             .make_dataset()
-        compare_datasets(self, expected=DstExtractor(), actual=parsed)
\ No newline at end of file
+        compare_datasets(self, expected=target_dataset, actual=parsed)
\ No newline at end of file
diff --git a/datumaro/tests/test_mot_format.py b/datumaro/tests/test_mot_format.py
index 9f212116b7e..4cc2a98b3a4 100644
--- a/datumaro/tests/test_mot_format.py
+++ b/datumaro/tests/test_mot_format.py
@@ -3,7 +3,7 @@
 import os.path as osp
 
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, LabelCategories
 )
@@ -28,96 +28,83 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
         compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
 
     def test_can_save_bboxes(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, attributes={
-                                'occluded': True,
-                            }),
-                            Bbox(0, 4, 4, 4, label=3, attributes={
-                                'visibility': 0.4,
-                            }),
-                            Bbox(2, 4, 4, 4, attributes={
-                                'ignored': True
-                            }),
-                        ]
-                    ),
-
-                    DatasetItem(id=2, subset='val',
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(1, 2, 4, 2, label=3),
-                        ]
-                    ),
-
-                    DatasetItem(id=3, subset='test',
-                        image=np.ones((5, 4, 3)) * 3,
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1,
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, attributes={
-                                'occluded': True,
-                                'visibility': 0.0,
-                                'ignored': False,
-                            }),
-                            Bbox(0, 4, 4, 4, label=3, attributes={
-                                'occluded': False,
-                                'visibility': 0.4,
-                                'ignored': False,
-                            }),
-                            Bbox(2, 4, 4, 4, attributes={
-                                'occluded': False,
-                                'visibility': 1.0,
-                                'ignored': True,
-                            }),
-                        ]
-                    ),
-
-                    DatasetItem(id=2,
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(1, 2, 4, 2, label=3, attributes={
-                                'occluded': False,
-                                'visibility': 1.0,
-                                'ignored': False,
-                            }),
-                        ]
-                    ),
-
-                    DatasetItem(id=3,
-                        image=np.ones((5, 4, 3)) * 3,
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, attributes={
+                        'occluded': True,
+                    }),
+                    Bbox(0, 4, 4, 4, label=3, attributes={
+                        'visibility': 0.4,
+                    }),
+                    Bbox(2, 4, 4, 4, attributes={
+                        'ignored': True
+                    }),
+                ]
+            ),
+
+            DatasetItem(id=2, subset='val',
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(1, 2, 4, 2, label=3),
+                ]
+            ),
+
+            DatasetItem(id=3, subset='test',
+                image=np.ones((5, 4, 3)) * 3,
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
+
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1,
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, attributes={
+                        'occluded': True,
+                        'visibility': 0.0,
+                        'ignored': False,
+                    }),
+                    Bbox(0, 4, 4, 4, label=3, attributes={
+                        'occluded': False,
+                        'visibility': 0.4,
+                        'ignored': False,
+                    }),
+                    Bbox(2, 4, 4, 4, attributes={
+                        'occluded': False,
+                        'visibility': 1.0,
+                        'ignored': True,
+                    }),
+                ]
+            ),
+
+            DatasetItem(id=2,
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(1, 2, 4, 2, label=3, attributes={
+                        'occluded': False,
+                        'visibility': 1.0,
+                        'ignored': False,
+                    }),
+                ]
+            ),
+
+            DatasetItem(id=3,
+                image=np.ones((5, 4, 3)) * 3,
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(SrcExtractor(),
+            self._test_save_and_load(
+                source_dataset,
                 partial(MotSeqGtConverter.convert, save_images=True),
-                test_dir, target_dataset=DstExtractor())
+                test_dir, target_dataset=target_dataset)
 
 
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset')
@@ -127,30 +114,23 @@ def test_can_detect(self):
         self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR))
 
     def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1,
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2, attributes={
-                                'occluded': False,
-                                'visibility': 1.0,
-                                'ignored': False,
-                            }),
-                        ]
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1,
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2, attributes={
+                        'occluded': False,
+                        'visibility': 1.0,
+                        'ignored': False,
+                    }),
+                ]
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
 
         dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
             .make_dataset()
 
-        compare_datasets(self, DstExtractor(), dataset)
\ No newline at end of file
+        compare_datasets(self, expected_dataset, dataset)
\ No newline at end of file
diff --git a/datumaro/tests/test_tfrecord_format.py b/datumaro/tests/test_tfrecord_format.py
index 5071ad25915..f2dbd160fde 100644
--- a/datumaro/tests/test_tfrecord_format.py
+++ b/datumaro/tests/test_tfrecord_format.py
@@ -3,7 +3,7 @@
 import os.path as osp
 
 from unittest import TestCase, skipIf
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, Mask, LabelCategories
 )
@@ -48,117 +48,96 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
         compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
 
     def test_can_save_bboxes(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2),
-                            Bbox(0, 4, 4, 4, label=3),
-                            Bbox(2, 4, 4, 4),
-                        ], attributes={'source_id': ''}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2),
+                    Bbox(0, 4, 4, 4, label=3),
+                    Bbox(2, 4, 4, 4),
+                ], attributes={'source_id': ''}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
+                test_dataset,
                 partial(TfDetectionApiConverter.convert, save_images=True),
                 test_dir)
 
     def test_can_save_masks(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
-                        annotations=[
-                            Mask(image=np.array([
-                                [1, 0, 0, 1],
-                                [0, 1, 1, 0],
-                                [0, 1, 1, 0],
-                                [1, 0, 0, 1],
-                            ]), label=1),
-                        ],
-                        attributes={'source_id': ''}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
+                annotations=[
+                    Mask(image=np.array([
+                        [1, 0, 0, 1],
+                        [0, 1, 1, 0],
+                        [0, 1, 1, 0],
+                        [1, 0, 0, 1],
+                    ]), label=1),
+                ],
+                attributes={'source_id': ''}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
+                test_dataset,
                 partial(TfDetectionApiConverter.convert, save_masks=True),
                 test_dir)
 
     def test_can_save_dataset_with_no_subsets(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1,
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(2, 1, 4, 4, label=2),
-                            Bbox(4, 2, 8, 4, label=3),
-                        ],
-                        attributes={'source_id': ''}
-                    ),
-
-                    DatasetItem(id=2,
-                        image=np.ones((8, 8, 3)) * 2,
-                        annotations=[
-                            Bbox(4, 4, 4, 4, label=3),
-                        ],
-                        attributes={'source_id': ''}
-                    ),
-
-                    DatasetItem(id=3,
-                        image=np.ones((8, 4, 3)) * 3,
-                        attributes={'source_id': ''}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id=1,
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(2, 1, 4, 4, label=2),
+                    Bbox(4, 2, 8, 4, label=3),
+                ],
+                attributes={'source_id': ''}
+            ),
+
+            DatasetItem(id=2,
+                image=np.ones((8, 8, 3)) * 2,
+                annotations=[
+                    Bbox(4, 4, 4, 4, label=3),
+                ],
+                attributes={'source_id': ''}
+            ),
+
+            DatasetItem(id=3,
+                image=np.ones((8, 4, 3)) * 3,
+                attributes={'source_id': ''}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(
+                test_dataset,
                 partial(TfDetectionApiConverter.convert, save_images=True),
                 test_dir)
 
     def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1/q.e',
-                        image=Image(path='1/q.e', size=(10, 15)),
-                        attributes={'source_id': ''}
-                    )
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        test_dataset = Dataset.from_iterable([
+            DatasetItem(id='1/q.e',
+                image=Image(path='1/q.e', size=(10, 15)),
+                attributes={'source_id': ''}
+            )
+        ], categories={
+            AnnotationType.label: LabelCategories(),
+        })
 
         with TestDir() as test_dir:
-            self._test_save_and_load(TestExtractor(),
+            self._test_save_and_load(test_dataset,
                 TfDetectionApiConverter.convert, test_dir)
 
     def test_labelmap_parsing(self):
@@ -197,42 +176,35 @@ def test_can_detect(self):
         self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR))
 
     def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((16, 16, 3)),
-                        annotations=[
-                            Bbox(0, 4, 4, 8, label=2),
-                            Bbox(0, 4, 4, 4, label=3),
-                            Bbox(2, 4, 4, 4),
-                        ],
-                        attributes={'source_id': '1'}
-                    ),
-
-                    DatasetItem(id=2, subset='val',
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(1, 2, 4, 2, label=3),
-                        ],
-                        attributes={'source_id': '2'}
-                    ),
-
-                    DatasetItem(id=3, subset='test',
-                        image=np.ones((5, 4, 3)) * 3,
-                        attributes={'source_id': '3'}
-                    ),
-                ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                for label in range(10):
-                    label_cat.add('label_' + str(label))
-                return {
-                    AnnotationType.label: label_cat,
-                }
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((16, 16, 3)),
+                annotations=[
+                    Bbox(0, 4, 4, 8, label=2),
+                    Bbox(0, 4, 4, 4, label=3),
+                    Bbox(2, 4, 4, 4),
+                ],
+                attributes={'source_id': '1'}
+            ),
+
+            DatasetItem(id=2, subset='val',
+                image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(1, 2, 4, 2, label=3),
+                ],
+                attributes={'source_id': '2'}
+            ),
+
+            DatasetItem(id=3, subset='test',
+                image=np.ones((5, 4, 3)) * 3,
+                attributes={'source_id': '3'}
+            ),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(label) for label in range(10)),
+        })
 
         dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
             .make_dataset()
 
-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, target_dataset, dataset)
diff --git a/datumaro/tests/test_transforms.py b/datumaro/tests/test_transforms.py
index 522cf2b520f..a55a446ea58 100644
--- a/datumaro/tests/test_transforms.py
+++ b/datumaro/tests/test_transforms.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 from unittest import TestCase
-
+from datumaro.components.project import Dataset
 from datumaro.components.extractor import (Extractor, DatasetItem,
     Mask, Polygon, PolyLine, Points, Bbox, Label,
     LabelCategories, MaskCategories, AnnotationType
@@ -67,304 +67,269 @@ def __iter__(self):
         compare_datasets(self, DstExtractor(), actual)
 
     def test_mask_to_polygons_small_polygons_message(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                items = [
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 0],
-                                    [0, 1, 0],
-                                    [0, 0, 0],
-                                ]),
-                            ),
-                        ]
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 0],
+                            [0, 1, 0],
+                            [0, 0, 0],
+                        ]),
                     ),
                 ]
-                return iter(items)
+            ),
+        ])
 
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([ DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3))), ])
 
         with self.assertLogs(level=log.DEBUG) as logs:
-            actual = transforms.MasksToPolygons(SrcExtractor())
+            actual = transforms.MasksToPolygons(source_dataset)
 
-            compare_datasets(self, DstExtractor(), actual)
+            compare_datasets(self, target_dataset, actual)
             self.assertRegex('\n'.join(logs.output), 'too small polygons')
 
     def test_polygons_to_masks(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Polygon([0, 0, 4, 0, 4, 4]),
-                            Polygon([5, 0, 9, 0, 5, 5]),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Polygon([0, 0, 4, 0, 4, 4]),
+                    Polygon([5, 0, 9, 0, 5, 5]),
+                ]
+            ),
+        ])
 
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 10, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
-                                    [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                ]),
-                            ),
-                            Mask(np.array([
-                                    [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                                ]),
-                            ),
-                        ]
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 10, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
+                            [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        ]),
                     ),
-                ])
+                    Mask(np.array([
+                            [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        ]),
+                    ),
+                ]
+            ),
+        ])
 
-        actual = transforms.PolygonsToMasks(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.PolygonsToMasks(source_dataset)
+        compare_datasets(self, target_dataset, actual)
 
     def test_crop_covered_segments(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            # The mask is partially covered by the polygon
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 1, 1, 1],
-                                    [1, 1, 1, 1, 1],
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    # The mask is partially covered by the polygon
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 1, 1, 1],
+                            [1, 1, 1, 1, 1],
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        z_order=1),
+                ]
+            ),
+        ])
 
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        z_order=1),
+                ]
+            ),
+        ])
 
-        actual = transforms.CropCoveredSegments(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.CropCoveredSegments(source_dataset)
+        compare_datasets(self, target_dataset, actual)
 
     def test_merge_instance_segments(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0, group=1),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4],
-                                z_order=1, group=1),
-                            Polygon([0, 0, 0, 2, 2, 2, 2, 0],
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0, group=1),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                        z_order=1, group=1),
+                    Polygon([0, 0, 0, 2, 2, 2, 2, 0],
+                        z_order=1),
+                ]
+            ),
+        ])
 
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 1, 1, 1, 1],
-                                    [1, 1, 1, 1, 1],
-                                    [1, 1, 1, 1, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ),
-                                z_order=0, group=1),
-                            Mask(np.array([
-                                    [1, 1, 0, 0, 0],
-                                    [1, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=1),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 1, 1, 1, 1],
+                            [1, 1, 1, 1, 1],
+                            [1, 1, 1, 1, 0],
+                            [1, 1, 1, 0, 0]],
+                        ),
+                        z_order=0, group=1),
+                    Mask(np.array([
+                            [1, 1, 0, 0, 0],
+                            [1, 1, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=1),
+                ]
+            ),
+        ])
 
-        actual = transforms.MergeInstanceSegments(SrcExtractor(),
+        actual = transforms.MergeInstanceSegments(source_dataset,
             include_polygons=True)
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
 
     def test_map_subsets(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='a'),
-                    DatasetItem(id=2, subset='b'),
-                    DatasetItem(id=3, subset='c'),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='a'),
+            DatasetItem(id=2, subset='b'),
+            DatasetItem(id=3, subset='c'),
+        ])
 
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset=''),
-                    DatasetItem(id=2, subset='a'),
-                    DatasetItem(id=3, subset='c'),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset=''),
+            DatasetItem(id=2, subset='a'),
+            DatasetItem(id=3, subset='c'),
+        ])
 
-        actual = transforms.MapSubsets(SrcExtractor(),
+        actual = transforms.MapSubsets(source_dataset,
             { 'a': '', 'b': 'a' })
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
 
     def test_shapes_to_boxes(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [0, 0, 1, 1, 1],
-                                    [0, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 1],
-                                    [1, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0]],
-                                ), id=1),
-                            Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
-                            PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
-                            Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 1],
+                            [1, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0]],
+                        ), id=1),
+                    Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2),
+                    PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3),
+                    Points([2, 2, 4, 2, 4, 4, 2, 4], id=4),
+                ]
+            ),
+        ])
 
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Bbox(0, 0, 4, 4, id=1),
-                            Bbox(1, 1, 3, 3, id=2),
-                            Bbox(1, 1, 1, 1, id=3),
-                            Bbox(2, 2, 2, 2, id=4),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Bbox(0, 0, 4, 4, id=1),
+                    Bbox(1, 1, 3, 3, id=2),
+                    Bbox(1, 1, 1, 1, id=3),
+                    Bbox(2, 2, 2, 2, id=4),
+                ]
+            ),
+        ])
 
-        actual = transforms.ShapesToBoxes(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.ShapesToBoxes(source_dataset)
+        compare_datasets(self, target_dataset, actual)
 
     def test_id_from_image(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image='path.jpg'),
-                    DatasetItem(id=2),
-                ])
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='path', image='path.jpg'),
-                    DatasetItem(id=2),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image='path.jpg'),
+            DatasetItem(id=2),
+        ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id='path', image='path.jpg'),
+            DatasetItem(id=2),
+        ])
 
-        actual = transforms.IdFromImageName(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.IdFromImageName(source_dataset)
+        compare_datasets(self, target_dataset, actual)
 
     def test_boxes_to_masks(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Bbox(0, 0, 3, 3, z_order=1),
-                            Bbox(0, 0, 3, 1, z_order=2),
-                            Bbox(0, 2, 3, 1, z_order=3),
-                        ]
-                    ),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Bbox(0, 0, 3, 3, z_order=1),
+                    Bbox(0, 0, 3, 1, z_order=2),
+                    Bbox(0, 2, 3, 1, z_order=3),
+                ]
+            ),
+        ])
 
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, image=np.zeros((5, 5, 3)),
-                        annotations=[
-                            Mask(np.array([
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0],
-                                    [1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=1),
-                            Mask(np.array([
-                                    [1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=2),
-                            Mask(np.array([
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [1, 1, 1, 0, 0],
-                                    [0, 0, 0, 0, 0],
-                                    [0, 0, 0, 0, 0]],
-                                ),
-                                z_order=3),
-                        ]
-                    ),
-                ])
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
+                annotations=[
+                    Mask(np.array([
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0],
+                            [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=1),
+                    Mask(np.array([
+                            [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=2),
+                    Mask(np.array([
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [1, 1, 1, 0, 0],
+                            [0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0]],
+                        ),
+                        z_order=3),
+                ]
+            ),
+        ])
 
-        actual = transforms.BoxesToMasks(SrcExtractor())
-        compare_datasets(self, DstExtractor(), actual)
+        actual = transforms.BoxesToMasks(source_dataset)
+        compare_datasets(self, target_dataset, actual)
 
     def test_random_split(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset="a"),
-                    DatasetItem(id=2, subset="a"),
-                    DatasetItem(id=3, subset="b"),
-                    DatasetItem(id=4, subset="b"),
-                    DatasetItem(id=5, subset="b"),
-                    DatasetItem(id=6, subset=""),
-                    DatasetItem(id=7, subset=""),
-                ])
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset="a"),
+            DatasetItem(id=2, subset="a"),
+            DatasetItem(id=3, subset="b"),
+            DatasetItem(id=4, subset="b"),
+            DatasetItem(id=5, subset="b"),
+            DatasetItem(id=6, subset=""),
+            DatasetItem(id=7, subset=""),
+        ])
 
-        actual = transforms.RandomSplit(SrcExtractor(), splits=[
+        actual = transforms.RandomSplit(source_dataset, splits=[
             ('train', 4.0 / 7.0),
             ('test', 3.0 / 7.0),
         ])
@@ -373,21 +338,19 @@ def __iter__(self):
         self.assertEqual(3, len(actual.get_subset('test')))
 
     def test_random_split_gives_error_on_wrong_ratios(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([DatasetItem(id=1)])
+        source_dataset = Dataset.from_iterable([DatasetItem(id=1)])
 
         with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[
+            transforms.RandomSplit(source_dataset, splits=[
                 ('train', 0.5),
                 ('test', 0.7),
             ])
 
         with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[])
+            transforms.RandomSplit(source_dataset, splits=[])
 
         with self.assertRaises(Exception):
-            transforms.RandomSplit(SrcExtractor(), splits=[
+            transforms.RandomSplit(source_dataset, splits=[
                 ('train', -0.5),
                 ('test', 1.5),
             ])
@@ -462,24 +425,19 @@ def categories(self):
         compare_datasets(self, DstExtractor(), actual)
 
     def test_remap_labels_delete_unspecified(self):
-        class SrcExtractor(Extractor):
-            def __iter__(self):
-                return iter([ DatasetItem(id=1, annotations=[ Label(0) ]) ])
-
-            def categories(self):
-                label_cat = LabelCategories()
-                label_cat.add('label0')
-
-                return { AnnotationType.label: label_cat }
-
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([ DatasetItem(id=1, annotations=[]) ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
-
-        actual = transforms.RemapLabels(SrcExtractor(),
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, annotations=[ Label(0) ])
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable('label0'),
+        })
+
+        target_dataset = Dataset.from_iterable([
+                DatasetItem(id=1, annotations=[]),
+            ], categories={
+                AnnotationType.label: LabelCategories(),
+            })
+
+        actual = transforms.RemapLabels(source_dataset,
             mapping={}, default='delete')
 
-        compare_datasets(self, DstExtractor(), actual)
+        compare_datasets(self, target_dataset, actual)
diff --git a/datumaro/tests/test_yolo_format.py b/datumaro/tests/test_yolo_format.py
index bf6d71aeb78..1f6425d1bc9 100644
--- a/datumaro/tests/test_yolo_format.py
+++ b/datumaro/tests/test_yolo_format.py
@@ -6,7 +6,7 @@
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, LabelCategories,
 )
-from datumaro.components.project import Project
+from datumaro.components.project import Project, Dataset
 from datumaro.plugins.yolo_format.importer import YoloImporter
 from datumaro.plugins.yolo_format.converter import YoloConverter
 from datumaro.util.image import Image, save_image
@@ -15,40 +15,32 @@
 
 class YoloFormatTest(TestCase):
     def test_can_save_and_load(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(0, 1, 2, 3, label=4),
-                        ]),
-                    DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                            Bbox(2, 1, 2, 3, label=4),
-                        ]),
-
-                    DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(0, 1, 5, 2, label=2),
-                            Bbox(0, 2, 3, 2, label=5),
-                            Bbox(0, 2, 4, 2, label=6),
-                            Bbox(0, 7, 3, 2, label=7),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(0, 1, 2, 3, label=4),
+                ]),
+            DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                    Bbox(2, 1, 2, 3, label=4),
+                ]),
+
+            DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(0, 1, 5, 2, label=2),
+                    Bbox(0, 2, 3, 2, label=5),
+                    Bbox(0, 2, 4, 2, label=6),
+                    Bbox(0, 7, 3, 2, label=7),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })
 
         with TestDir() as test_dir:
-            source_dataset = TestExtractor()
 
             YoloConverter.convert(source_dataset, test_dir, save_images=True)
             parsed_dataset = YoloImporter()(test_dir).make_dataset()
@@ -56,27 +48,19 @@ def categories(self):
             compare_datasets(self, source_dataset, parsed_dataset)
 
     def test_can_save_dataset_with_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=Image(path='1.jpg', size=(10, 15)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=Image(path='1.jpg', size=(10, 15)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })
 
         with TestDir() as test_dir:
-            source_dataset = TestExtractor()
 
             YoloConverter.convert(source_dataset, test_dir)
 
@@ -87,27 +71,19 @@ def categories(self):
             compare_datasets(self, source_dataset, parsed_dataset)
 
     def test_can_load_dataset_with_exact_image_info(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=Image(path='1.jpg', size=(10, 15)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=Image(path='1.jpg', size=(10, 15)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })
 
         with TestDir() as test_dir:
-            source_dataset = TestExtractor()
 
             YoloConverter.convert(source_dataset, test_dir)
 
@@ -117,24 +93,20 @@ def categories(self):
             compare_datasets(self, source_dataset, parsed_dataset)
 
     def test_relative_paths(self):
-        class TestExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='1', subset='train',
-                        image=np.ones((4, 2, 3))),
-                    DatasetItem(id='subdir1/1', subset='train',
-                        image=np.ones((2, 6, 3))),
-                    DatasetItem(id='subdir2/1', subset='train',
-                        image=np.ones((5, 4, 3))),
-                ])
-
-            def categories(self):
-                return { AnnotationType.label: LabelCategories() }
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', subset='train',
+                image=np.ones((4, 2, 3))),
+            DatasetItem(id='subdir1/1', subset='train',
+                image=np.ones((2, 6, 3))),
+            DatasetItem(id='subdir2/1', subset='train',
+                image=np.ones((5, 4, 3))),
+        ], categories={
+            AnnotationType.label: LabelCategories(),
+        })
 
         for save_images in {True, False}:
             with self.subTest(save_images=save_images):
                 with TestDir() as test_dir:
-                    source_dataset = TestExtractor()
 
                     YoloConverter.convert(source_dataset, test_dir,
                         save_images=save_images)
@@ -150,26 +122,19 @@ def test_can_detect(self):
         self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR))
 
     def test_can_import(self):
-        class DstExtractor(Extractor):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id=1, subset='train',
-                        image=np.ones((10, 15, 3)),
-                        annotations=[
-                            Bbox(0, 2, 4, 2, label=2),
-                            Bbox(3, 3, 2, 3, label=4),
-                        ]),
-                ])
-
-            def categories(self):
-                label_categories = LabelCategories()
-                for i in range(10):
-                    label_categories.add('label_' + str(i))
-                return {
-                    AnnotationType.label: label_categories,
-                }
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train',
+                image=np.ones((10, 15, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, label=2),
+                    Bbox(3, 3, 2, 3, label=4),
+                ]),
+        ], categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                'label_' + str(i) for i in range(10)),
+        })
 
         dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
             .make_dataset()
 
-        compare_datasets(self, DstExtractor(), dataset)
+        compare_datasets(self, expected_dataset, dataset)