From 7ecdcf182b1bfae2d29e0f256e025743dd8d198e Mon Sep 17 00:00:00 2001 From: Ilya Kochankov <33292483+KochankovID@users.noreply.github.com> Date: Mon, 20 Jul 2020 11:52:47 +0300 Subject: [PATCH] [Datumaro] Reducing nesting of tests (#1875) * Add `Dataset.from_iterable` constructor * Simplify creation of `Dataset` objects in common simple cases * Refactor tests --- datumaro/datumaro/components/extractor.py | 52 ++ datumaro/datumaro/components/project.py | 32 +- datumaro/tests/test_coco_format.py | 837 ++++++++++------------ datumaro/tests/test_cvat_format.py | 435 ++++++----- datumaro/tests/test_datumaro_format.py | 125 ++-- datumaro/tests/test_labelme_format.py | 333 ++++----- datumaro/tests/test_mot_format.py | 202 +++--- datumaro/tests/test_tfrecord_format.py | 226 +++--- datumaro/tests/test_transforms.py | 526 +++++++------- datumaro/tests/test_yolo_format.py | 173 ++--- 10 files changed, 1363 insertions(+), 1578 deletions(-) diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py index fe4d897b6cc..3180665f5ab 100644 --- a/datumaro/datumaro/components/extractor.py +++ b/datumaro/datumaro/components/extractor.py @@ -72,6 +72,33 @@ def __eq__(self, other): class LabelCategories(Categories): Category = namedtuple('Category', ['name', 'parent', 'attributes']) + @classmethod + def from_iterable(cls, iterable): + """Generation of LabelCategories from iterable object + + Args: + iterable ([type]): This iterable object can be: + 1)simple str - will generate one Category with str as name + 2)list of str - will interpreted as list of Category names + 3)list of positional argumetns - will generate Categories + with this arguments + + + Returns: + LabelCategories: LabelCategories object + """ + temp_categories = cls() + + if isinstance(iterable, str): + iterable = [[iterable]] + + for category in iterable: + if isinstance(category, str): + category = [category] + temp_categories.add(*category) + + return temp_categories + def __init__(self, items=None, attributes=None): super().__init__(attributes=attributes) @@ -482,6 +509,31 @@ def iou(self, other): class PointsCategories(Categories): Category = namedtuple('Category', ['labels', 'joints']) + @classmethod + def from_iterable(cls, iterable): + """Generation of PointsCategories from iterable object + + Args: + iterable ([type]): This iterable object can be: + 1)simple int - will generate one Category with int as label + 2)list of int - will interpreted as list of Category labels + 3)list of positional argumetns - will generate Categories + with this arguments + + Returns: + PointsCategories: PointsCategories object + """ + temp_categories = cls() + + if isinstance(iterable, int): + iterable = [[iterable]] + + for category in iterable: + if isinstance(category, int): + category = [category] + temp_categories.add(*category) + return temp_categories + def __init__(self, items=None, attributes=None): super().__init__(attributes=attributes) diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py index a1cd4919ff5..9ee38839737 100644 --- a/datumaro/datumaro/components/project.py +++ b/datumaro/datumaro/components/project.py @@ -18,7 +18,8 @@ from datumaro.components.config import Config, DEFAULT_FORMAT from datumaro.components.config_model import (Model, Source, PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA) -from datumaro.components.extractor import Extractor +from datumaro.components.extractor import Extractor, LabelCategories,\ + AnnotationType from datumaro.components.launcher import ModelTransform from datumaro.components.dataset_filter import \ XPathDatasetFilter, XPathAnnotationsFilter @@ -319,6 +320,35 @@ def categories(self): return self._parent.categories() class Dataset(Extractor): + @classmethod + def from_iterable(cls, iterable, categories=None): + """Generation of Dataset from iterable object + + Args: + iterable: Iterable object contains DatasetItems + categories (dict, optional): You can pass dict of categories or + you can pass list of names. It'll interpreted as list of names of + LabelCategories. Defaults to {}. + + Returns: + Dataset: Dataset object + """ + + if isinstance(categories, list): + categories = {AnnotationType.label : LabelCategories.from_iterable(categories)} + + if not categories: + categories = {} + + class tmpExtractor(Extractor): + def __iter__(self): + return iter(iterable) + + def categories(self): + return categories + + return cls.from_extractors(tmpExtractor()) + @classmethod def from_extractors(cls, *sources): # merge categories diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py index 129d64e7882..3c50996e051 100644 --- a/datumaro/tests/test_coco_format.py +++ b/datumaro/tests/test_coco_format.py @@ -4,7 +4,7 @@ from unittest import TestCase -from datumaro.components.project import Project +from datumaro.components.project import (Project, Dataset) from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption, LabelCategories, PointsCategories @@ -26,32 +26,26 @@ class CocoImporterTest(TestCase): def test_can_import(self): - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='000000000001', image=np.ones((10, 5, 3)), - subset='val', attributes={'id': 1}, - annotations=[ - Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0, - id=1, group=1, attributes={'is_crowd': False}), - Mask(np.array( - [[1, 0, 0, 1, 0]] * 5 + - [[1, 1, 1, 1, 0]] * 5 - ), label=0, - id=2, group=2, attributes={'is_crowd': True}), - ] - ), - ]) - def categories(self): - label_cat = LabelCategories() - label_cat.add('TEST') - return { AnnotationType.label: label_cat } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='000000000001', image=np.ones((10, 5, 3)), + subset='val', attributes={'id': 1}, + annotations=[ + Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0, + id=1, group=1, attributes={'is_crowd': False}), + Mask(np.array( + [[1, 0, 0, 1, 0]] * 5 + + [[1, 1, 1, 1, 0]] * 5 + ), label=0, + id=2, group=2, attributes={'is_crowd': True}), + ] + ), + ], categories=['TEST',]) dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \ .make_dataset() - compare_datasets(self, DstExtractor(), dataset) + compare_datasets(self, expected_dataset, dataset) def test_can_detect(self): self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR)) @@ -71,526 +65,417 @@ def _test_save_and_load(self, source_dataset, converter, test_dir, compare_datasets(self, expected=target_dataset, actual=parsed_dataset) def test_can_save_and_load_captions(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - annotations=[ - Caption('hello', id=1, group=1), - Caption('world', id=2, group=2), - ], attributes={'id': 1}), - DatasetItem(id=2, subset='train', - annotations=[ - Caption('test', id=3, group=3), - ], attributes={'id': 2}), - - DatasetItem(id=3, subset='val', - annotations=[ - Caption('word', id=1, group=1), - ], attributes={'id': 1} - ), - ]) + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + annotations=[ + Caption('hello', id=1, group=1), + Caption('world', id=2, group=2), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', + annotations=[ + Caption('test', id=3, group=3), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='val', + annotations=[ + Caption('word', id=1, group=1), + ], attributes={'id': 1}), + ]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(expected_dataset, CocoCaptionsConverter.convert, test_dir) def test_can_save_and_load_instances(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add(str(i)) - categories = { AnnotationType.label: label_categories } - - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), - annotations=[ - # Bbox + single polygon - Bbox(0, 1, 2, 2, - label=2, group=1, id=1, - attributes={ 'is_crowd': False }), - Polygon([0, 1, 2, 1, 2, 3, 0, 3], - attributes={ 'is_crowd': False }, - label=2, group=1, id=1), - ], attributes={'id': 1}), - DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), - annotations=[ - # Mask + bbox - Mask(np.array([ - [0, 1, 0, 0], - [0, 1, 0, 0], - [0, 1, 1, 1], - [0, 0, 0, 0]], - ), - attributes={ 'is_crowd': True }, - label=4, group=3, id=3), - Bbox(1, 0, 2, 2, label=4, group=3, id=3, - attributes={ 'is_crowd': True }), - ], attributes={'id': 2}), - - DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)), - annotations=[ - # Bbox + mask - Bbox(0, 1, 2, 2, label=4, group=3, id=3, - attributes={ 'is_crowd': True }), - Mask(np.array([ - [0, 0, 0, 0], - [1, 1, 1, 0], - [1, 1, 0, 0], - [0, 0, 0, 0]], - ), - attributes={ 'is_crowd': True }, - label=4, group=3, id=3), - ], attributes={'id': 1}), - ]) - - def categories(self): - return categories - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), - annotations=[ - Polygon([0, 1, 2, 1, 2, 3, 0, 3], - attributes={ 'is_crowd': False }, - label=2, group=1, id=1), - ], attributes={'id': 1}), - DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), - annotations=[ - Mask(np.array([ - [0, 1, 0, 0], - [0, 1, 0, 0], - [0, 1, 1, 1], - [0, 0, 0, 0]], - ), - attributes={ 'is_crowd': True }, - label=4, group=3, id=3), - ], attributes={'id': 2}), - - DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 0, 0], - [1, 1, 1, 0], - [1, 1, 0, 0], - [0, 0, 0, 0]], - ), - attributes={ 'is_crowd': True }, - label=4, group=3, id=3), - ], attributes={'id': 1}), - ]) - - def categories(self): - return categories + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + # Bbox + single polygon + Bbox(0, 1, 2, 2, + label=2, group=1, id=1, + attributes={ 'is_crowd': False }), + Polygon([0, 1, 2, 1, 2, 3, 0, 3], + attributes={ 'is_crowd': False }, + label=2, group=1, id=1), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + # Mask + bbox + Mask(np.array([ + [0, 1, 0, 0], + [0, 1, 0, 0], + [0, 1, 1, 1], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + Bbox(1, 0, 2, 2, label=4, group=3, id=3, + attributes={ 'is_crowd': True }), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)), + annotations=[ + # Bbox + mask + Bbox(0, 1, 2, 2, label=4, group=3, id=3, + attributes={ 'is_crowd': True }), + Mask(np.array([ + [0, 0, 0, 0], + [1, 1, 1, 0], + [1, 1, 0, 0], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + ], attributes={'id': 1}), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Polygon([0, 1, 2, 1, 2, 3, 0, 3], + attributes={ 'is_crowd': False }, + label=2, group=1, id=1), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 0, 0], + [0, 1, 0, 0], + [0, 1, 1, 1], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 0, 0], + [1, 1, 1, 0], + [1, 1, 0, 0], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + ], attributes={'id': 1}) + ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(source_dataset, CocoInstancesConverter.convert, test_dir, - target_dataset=DstExtractor()) + target_dataset=target_dataset) def test_can_merge_polygons_on_loading(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add(str(i)) - categories = { AnnotationType.label: label_categories } - - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((6, 10, 3)), - annotations=[ - Polygon([0, 0, 4, 0, 4, 4], - label=3, id=4, group=4), - Polygon([5, 0, 9, 0, 5, 5], - label=3, id=4, group=4), - ] - ), - ]) - - def categories(self): - return categories - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((6, 10, 3)), - annotations=[ - Mask(np.array([ - [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], - [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], - # only internal fragment (without the border), - # but not everywhere... - ), - label=3, id=4, group=4, - attributes={ 'is_crowd': False }), - ], attributes={'id': 1} + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=3, id=4, group=4), + Polygon([5, 0, 9, 0, 5, 5], + label=3, id=4, group=4), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + # only internal fragment (without the border), + # but not everywhere... ), - ]) - - def categories(self): - return categories + label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: - self._test_save_and_load(SrcExtractor(), + self._test_save_and_load(source_dataset, CocoInstancesConverter.convert, test_dir, importer_args={'merge_instance_polygons': True}, - target_dataset=DstExtractor()) + target_dataset=target_dataset) def test_can_crop_covered_segments(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add(str(i)) - - class SrcTestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 1, 1, 1], - [0, 0, 1, 1, 1], - [1, 1, 0, 1, 1], - [1, 1, 1, 0, 0], - [1, 1, 1, 0, 0]], - ), - label=2, id=1, z_order=0), - Polygon([1, 1, 4, 1, 4, 4, 1, 4], - label=1, id=2, z_order=1), - ] - ), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } - - class DstTestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 1, 1, 1], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - [1, 1, 1, 0, 0]], - ), - attributes={ 'is_crowd': True }, - label=2, id=1, group=1), - - Polygon([1, 1, 4, 1, 4, 4, 1, 4], - label=1, id=2, group=2, - attributes={ 'is_crowd': False }), - ], attributes={'id': 1} - ), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [1, 1, 0, 1, 1], + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0]], + ), + label=2, id=1, z_order=0), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + label=1, id=2, z_order=1), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=2, id=1, group=1), + + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + label=1, id=2, group=2, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: - self._test_save_and_load(SrcTestExtractor(), - partial(CocoInstancesConverter.convert, crop_covered=True), - test_dir, target_dataset=DstTestExtractor()) + self._test_save_and_load(source_dataset, + partial(CocoInstancesConverter.convert, crop_covered=True), + test_dir, target_dataset=target_dataset) def test_can_convert_polygons_to_mask(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add(str(i)) - - class SrcTestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((6, 10, 3)), - annotations=[ - Polygon([0, 0, 4, 0, 4, 4], - label=3, id=4, group=4), - Polygon([5, 0, 9, 0, 5, 5], - label=3, id=4, group=4), - ] - ), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } - - class DstTestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((6, 10, 3)), - annotations=[ - Mask(np.array([ - [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], - [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], - # only internal fragment (without the border), - # but not everywhere... - ), - attributes={ 'is_crowd': True }, - label=3, id=4, group=4), - ], attributes={'id': 1} - ), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=3, id=4, group=4), + Polygon([5, 0, 9, 0, 5, 5], + label=3, id=4, group=4), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + # only internal fragment (without the border), + # but not everywhere... + ), + attributes={ 'is_crowd': True }, + label=3, id=4, group=4), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: - self._test_save_and_load(SrcTestExtractor(), + self._test_save_and_load(source_dataset, partial(CocoInstancesConverter.convert, segmentation_mode='mask'), - test_dir, target_dataset=DstTestExtractor()) + test_dir, target_dataset=target_dataset) def test_can_convert_masks_to_polygons(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add(str(i)) - - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 10, 3)), - annotations=[ - Mask(np.array([ - [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], - [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ]), - label=3, id=4, group=4), - ] - ), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 10, 3)), - annotations=[ - Polygon( - [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5], - label=3, id=4, group=4, - attributes={ 'is_crowd': False }), - Polygon( - [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5], - label=3, id=4, group=4, - attributes={ 'is_crowd': False }), - ], attributes={'id': 1} - ), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), + label=3, id=4, group=4), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Polygon( + [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5], + label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + Polygon( + [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5], + label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: - self._test_save_and_load(SrcExtractor(), + self._test_save_and_load(source_dataset, partial(CocoInstancesConverter.convert, segmentation_mode='polygons'), - test_dir, target_dataset=DstExtractor()) + test_dir, + target_dataset=target_dataset) def test_can_save_and_load_images(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', attributes={'id': 1}), - DatasetItem(id=2, subset='train', attributes={'id': 2}), + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', attributes={'id': 1}), + DatasetItem(id=2, subset='train', attributes={'id': 2}), - DatasetItem(id=2, subset='val', attributes={'id': 2}), - DatasetItem(id=3, subset='val', attributes={'id': 3}), - DatasetItem(id=4, subset='val', attributes={'id': 4}), + DatasetItem(id=2, subset='val', attributes={'id': 2}), + DatasetItem(id=3, subset='val', attributes={'id': 3}), + DatasetItem(id=4, subset='val', attributes={'id': 4}), - DatasetItem(id=5, subset='test', attributes={'id': 1}), - ]) + DatasetItem(id=5, subset='test', attributes={'id': 1}), + ]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(expected_dataset, CocoImageInfoConverter.convert, test_dir) def test_can_save_and_load_labels(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - annotations=[ - Label(4, id=1, group=1), - Label(9, id=2, group=2), - ], attributes={'id': 1} - ), - ]) - - def categories(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add(str(i)) - return { - AnnotationType.label: label_categories, - } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + annotations=[ + Label(4, id=1, group=1), + Label(9, id=2, group=2), + ], attributes={'id': 1}), + ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(expected_dataset, CocoLabelsConverter.convert, test_dir) def test_can_save_and_load_keypoints(self): - label_categories = LabelCategories() - points_categories = PointsCategories() - for i in range(10): - label_categories.add(str(i)) - points_categories.add(i, joints=[[0, 1], [1, 2]]) - categories = { - AnnotationType.label: label_categories, - AnnotationType.points: points_categories, - } - - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), - annotations=[ - # Full instance annotations: polygon + keypoints - Points([0, 0, 0, 2, 4, 1], [0, 1, 2], - label=3, group=1, id=1), - Polygon([0, 0, 4, 0, 4, 4], - label=3, group=1, id=1), - - # Full instance annotations: bbox + keypoints - Points([1, 2, 3, 4, 2, 3], group=2, id=2), - Bbox(1, 2, 2, 2, group=2, id=2), - - # Solitary keypoints - Points([1, 2, 0, 2, 4, 1], label=5, id=3), - - # Some other solitary annotations (bug #1387) - Polygon([0, 0, 4, 0, 4, 4], label=3, id=4), - - # Solitary keypoints with no label - Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5), - ]) - ]) - - def categories(self): - return categories - - class DstTestExtractor(TestExtractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), - annotations=[ - Points([0, 0, 0, 2, 4, 1], [0, 1, 2], - label=3, group=1, id=1, - attributes={'is_crowd': False}), - Polygon([0, 0, 4, 0, 4, 4], - label=3, group=1, id=1, - attributes={'is_crowd': False}), - - Points([1, 2, 3, 4, 2, 3], - group=2, id=2, - attributes={'is_crowd': False}), - Polygon([1, 2, 3, 2, 3, 4, 1, 4], - group=2, id=2, - attributes={'is_crowd': False}), - - Points([1, 2, 0, 2, 4, 1], - label=5, group=3, id=3, - attributes={'is_crowd': False}), - Polygon([0, 1, 4, 1, 4, 2, 0, 2], - label=5, group=3, id=3, - attributes={'is_crowd': False}), - - Points([0, 0, 1, 2, 3, 4], [0, 1, 2], - group=5, id=5, - attributes={'is_crowd': False}), - Polygon([1, 2, 3, 2, 3, 4, 1, 4], - group=5, id=5, - attributes={'is_crowd': False}), - ], attributes={'id': 1}), - ]) + + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), + annotations=[ + # Full instance annotations: polygon + keypoints + Points([0, 0, 0, 2, 4, 1], [0, 1, 2], + label=3, group=1, id=1), + Polygon([0, 0, 4, 0, 4, 4], + label=3, group=1, id=1), + + # Full instance annotations: bbox + keypoints + Points([1, 2, 3, 4, 2, 3], group=2, id=2), + Bbox(1, 2, 2, 2, group=2, id=2), + + # Solitary keypoints + Points([1, 2, 0, 2, 4, 1], label=5, id=3), + + # Some other solitary annotations (bug #1387) + Polygon([0, 0, 4, 0, 4, 4], label=3, id=4), + + # Solitary keypoints with no label + Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + str(i) for i in range(10)), + AnnotationType.points: PointsCategories.from_iterable( + (i, None, [[0, 1], [1, 2]]) for i in range(10) + ), + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), + annotations=[ + Points([0, 0, 0, 2, 4, 1], [0, 1, 2], + label=3, group=1, id=1, + attributes={'is_crowd': False}), + Polygon([0, 0, 4, 0, 4, 4], + label=3, group=1, id=1, + attributes={'is_crowd': False}), + + Points([1, 2, 3, 4, 2, 3], + group=2, id=2, + attributes={'is_crowd': False}), + Polygon([1, 2, 3, 2, 3, 4, 1, 4], + group=2, id=2, + attributes={'is_crowd': False}), + + Points([1, 2, 0, 2, 4, 1], + label=5, group=3, id=3, + attributes={'is_crowd': False}), + Polygon([0, 1, 4, 1, 4, 2, 0, 2], + label=5, group=3, id=3, + attributes={'is_crowd': False}), + + Points([0, 0, 1, 2, 3, 4], [0, 1, 2], + group=5, id=5, + attributes={'is_crowd': False}), + Polygon([1, 2, 3, 2, 3, 4, 1, 4], + group=5, id=5, + attributes={'is_crowd': False}), + ], attributes={'id': 1}), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + str(i) for i in range(10)), + AnnotationType.points: PointsCategories.from_iterable( + (i, None, [[0, 1], [1, 2]]) for i in range(10) + ), + }) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(source_dataset, CocoPersonKeypointsConverter.convert, test_dir, - target_dataset=DstTestExtractor()) + target_dataset=target_dataset) def test_can_save_dataset_with_no_subsets(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, attributes={'id': 1}), - DatasetItem(id=2, attributes={'id': 2}), - ]) + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, attributes={'id': 1}), + DatasetItem(id=2, attributes={'id': 2}), + ]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(test_dataset, CocoConverter.convert, test_dir) def test_can_save_dataset_with_image_info(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), - attributes={'id': 1}), - ]) + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), + attributes={'id': 1}), + ]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(expected_dataset, CocoImageInfoConverter.convert, test_dir) def test_relative_paths(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='1', image=np.ones((4, 2, 3)), - attributes={'id': 1}), - DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), - attributes={'id': 2}), - DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), - attributes={'id': 3}), - ]) + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3)), + attributes={'id': 1}), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), + attributes={'id': 2}), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), + attributes={'id': 3}), + ]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), - partial(CocoImageInfoConverter.convert, save_images=True), - test_dir) + self._test_save_and_load(expected_dataset, + partial(CocoImageInfoConverter.convert, save_images=True), test_dir) def test_preserve_coco_ids(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), - attributes={'id': 40}), - ]) + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), + attributes={'id': 40}), + ]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), - partial(CocoImageInfoConverter.convert, save_images=True), - test_dir) + self._test_save_and_load(expected_dataset, + partial(CocoImageInfoConverter.convert, save_images=True), test_dir) def test_annotation_attributes(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[ - Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1, - attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}), - ], attributes={'id': 1}) - ]) - - def categories(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add(str(i)) - return { AnnotationType.label: label_categories, } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[ + Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1, + attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}), + ], attributes={'id': 1}) + ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(expected_dataset, CocoConverter.convert, test_dir) diff --git a/datumaro/tests/test_cvat_format.py b/datumaro/tests/test_cvat_format.py index 9f2622034eb..5c246ff4843 100644 --- a/datumaro/tests/test_cvat_format.py +++ b/datumaro/tests/test_cvat_format.py @@ -3,7 +3,7 @@ import os.path as osp from unittest import TestCase - +from datumaro.components.project import Dataset from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Points, Polygon, PolyLine, Bbox, Label, LabelCategories, @@ -28,121 +28,115 @@ def test_can_detect_video(self): self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR)) def test_can_load_image(self): - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='img0', subset='train', - image=np.ones((8, 8, 3)), - annotations=[ - Bbox(0, 2, 4, 2, label=0, z_order=1, - attributes={ - 'occluded': True, - 'a1': True, 'a2': 'v3' - }), - PolyLine([1, 2, 3, 4, 5, 6, 7, 8], - attributes={'occluded': False}), - ], attributes={'frame': 0}), - DatasetItem(id='img1', subset='train', - image=np.ones((10, 10, 3)), - annotations=[ - Polygon([1, 2, 3, 4, 6, 5], z_order=1, - attributes={'occluded': False}), - Points([1, 2, 3, 4, 5, 6], label=1, z_order=2, - attributes={'occluded': False}), - ], attributes={'frame': 1}), - ]) - - def categories(self): - label_categories = LabelCategories() - label_categories.add('label1', attributes={'a1', 'a2'}) - label_categories.add('label2') - return { AnnotationType.label: label_categories } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='img0', subset='train', + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=0, z_order=1, + attributes={ + 'occluded': True, + 'a1': True, 'a2': 'v3' + }), + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], + attributes={'occluded': False}), + ], attributes={'frame': 0}), + DatasetItem(id='img1', subset='train', + image=np.ones((10, 10, 3)), + annotations=[ + Polygon([1, 2, 3, 4, 6, 5], z_order=1, + attributes={'occluded': False}), + Points([1, 2, 3, 4, 5, 6], label=1, z_order=2, + attributes={'occluded': False}), + ], attributes={'frame': 1}), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + ['label1', '', {'a1', 'a2'}], + ['label2'], + ]) + }) parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset() - compare_datasets(self, DstExtractor(), parsed_dataset) + compare_datasets(self, expected_dataset, parsed_dataset) def test_can_load_video(self): - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='frame_000010', subset='annotations', - image=np.ones((20, 25, 3)), - annotations=[ - Bbox(3, 4, 7, 1, label=2, - id=0, - attributes={ - 'occluded': True, - 'outside': False, 'keyframe': True, - 'track_id': 0 - }), - Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16], - label=0, - id=1, - attributes={ - 'occluded': False, - 'outside': False, 'keyframe': True, - 'track_id': 1, 'hgl': 'hgkf', - }), - ], attributes={'frame': 10}), - DatasetItem(id='frame_000013', subset='annotations', - image=np.ones((20, 25, 3)), - annotations=[ - Bbox(7, 6, 7, 2, label=2, - id=0, - attributes={ - 'occluded': False, - 'outside': True, 'keyframe': True, - 'track_id': 0 - }), - Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16], - label=0, - id=1, - attributes={ - 'occluded': False, - 'outside': True, 'keyframe': True, - 'track_id': 1, 'hgl': 'jk', - }), - PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], - label=2, - id=2, - attributes={ - 'occluded': False, - 'outside': False, 'keyframe': True, - 'track_id': 2, - }), - ], attributes={'frame': 13}), - DatasetItem(id='frame_000016', subset='annotations', - image=Image(path='frame_0000016.png', size=(20, 25)), - annotations=[ - Bbox(8, 7, 6, 10, label=2, - id=0, - attributes={ - 'occluded': False, - 'outside': True, 'keyframe': True, - 'track_id': 0 - }), - PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], - label=2, - id=2, - attributes={ - 'occluded': False, - 'outside': True, 'keyframe': True, - 'track_id': 2, - }), - ], attributes={'frame': 16}), - ]) - - def categories(self): - label_categories = LabelCategories() - label_categories.add('klhg', attributes={'hgl'}) - label_categories.add('z U k') - label_categories.add('II') - return { AnnotationType.label: label_categories } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='frame_000010', subset='annotations', + image=np.ones((20, 25, 3)), + annotations=[ + Bbox(3, 4, 7, 1, label=2, + id=0, + attributes={ + 'occluded': True, + 'outside': False, 'keyframe': True, + 'track_id': 0 + }), + Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16], + label=0, + id=1, + attributes={ + 'occluded': False, + 'outside': False, 'keyframe': True, + 'track_id': 1, 'hgl': 'hgkf', + }), + ], attributes={'frame': 10}), + DatasetItem(id='frame_000013', subset='annotations', + image=np.ones((20, 25, 3)), + annotations=[ + Bbox(7, 6, 7, 2, label=2, + id=0, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 0 + }), + Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16], + label=0, + id=1, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 1, 'hgl': 'jk', + }), + PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], + label=2, + id=2, + attributes={ + 'occluded': False, + 'outside': False, 'keyframe': True, + 'track_id': 2, + }), + ], attributes={'frame': 13}), + DatasetItem(id='frame_000016', subset='annotations', + image=Image(path='frame_0000016.png', size=(20, 25)), + annotations=[ + Bbox(8, 7, 6, 10, label=2, + id=0, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 0 + }), + PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], + label=2, + id=2, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 2, + }), + ], attributes={'frame': 16}), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + ['klhg', '', {'hgl'}], + ['z U k'], + ['II'] + ]), + }) parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset() - compare_datasets(self, DstExtractor(), parsed_dataset) + compare_datasets(self, expected_dataset, parsed_dataset) class CvatConverterTest(TestCase): def _test_save_and_load(self, source_dataset, converter, test_dir, @@ -165,137 +159,120 @@ def test_can_save_and_load(self): label_categories.items[2].attributes.update(['a1', 'a2']) label_categories.attributes.update(['occluded']) - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), - annotations=[ - Polygon([0, 0, 4, 0, 4, 4], - label=1, group=4, - attributes={ 'occluded': True }), - Points([1, 1, 3, 2, 2, 3], - label=2, - attributes={ 'a1': 'x', 'a2': 42, - 'unknown': 'bar' }), - Label(1), - Label(2, attributes={ 'a1': 'y', 'a2': 44 }), - ] - ), - DatasetItem(id=1, subset='s1', - annotations=[ - PolyLine([0, 0, 4, 0, 4, 4], - label=3, id=4, group=4), - Bbox(5, 0, 1, 9, - label=3, id=4, group=4), - ] - ), - - DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), - annotations=[ - Polygon([0, 0, 4, 0, 4, 4], z_order=1, - label=3, group=4, - attributes={ 'occluded': False }), - PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label - ] - ), - - DatasetItem(id=3, subset='s3', image=Image( - path='3.jpg', size=(2, 4))), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), - annotations=[ - Polygon([0, 0, 4, 0, 4, 4], - label=1, group=4, - attributes={ 'occluded': True }), - Points([1, 1, 3, 2, 2, 3], - label=2, - attributes={ 'occluded': False, - 'a1': 'x', 'a2': 42 }), - Label(1), - Label(2, attributes={ 'a1': 'y', 'a2': 44 }), - ], attributes={'frame': 0} - ), - DatasetItem(id=1, subset='s1', - annotations=[ - PolyLine([0, 0, 4, 0, 4, 4], - label=3, group=4, - attributes={ 'occluded': False }), - Bbox(5, 0, 1, 9, - label=3, group=4, - attributes={ 'occluded': False }), - ], attributes={'frame': 1} - ), - - DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), - annotations=[ - Polygon([0, 0, 4, 0, 4, 4], z_order=1, - label=3, group=4, - attributes={ 'occluded': False }), - ], attributes={'frame': 0} - ), - - DatasetItem(id=3, subset='s3', image=Image( - path='3.jpg', size=(2, 4)), - attributes={'frame': 0}), - ]) - - def categories(self): - return { AnnotationType.label: label_categories } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=1, group=4, + attributes={ 'occluded': True }), + Points([1, 1, 3, 2, 2, 3], + label=2, + attributes={ 'a1': 'x', 'a2': 42, + 'unknown': 'bar' }), + Label(1), + Label(2, attributes={ 'a1': 'y', 'a2': 44 }), + ] + ), + DatasetItem(id=1, subset='s1', + annotations=[ + PolyLine([0, 0, 4, 0, 4, 4], + label=3, id=4, group=4), + Bbox(5, 0, 1, 9, + label=3, id=4, group=4), + ] + ), + + DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], z_order=1, + label=3, group=4, + attributes={ 'occluded': False }), + PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label + ] + ), + + DatasetItem(id=3, subset='s3', image=Image( + path='3.jpg', size=(2, 4))), + ], categories={ + AnnotationType.label: label_categories, + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=1, group=4, + attributes={ 'occluded': True }), + Points([1, 1, 3, 2, 2, 3], + label=2, + attributes={ 'occluded': False, + 'a1': 'x', 'a2': 42 }), + Label(1), + Label(2, attributes={ 'a1': 'y', 'a2': 44 }), + ], attributes={'frame': 0} + ), + DatasetItem(id=1, subset='s1', + annotations=[ + PolyLine([0, 0, 4, 0, 4, 4], + label=3, group=4, + attributes={ 'occluded': False }), + Bbox(5, 0, 1, 9, + label=3, group=4, + attributes={ 'occluded': False }), + ], attributes={'frame': 1} + ), + + DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], z_order=1, + label=3, group=4, + attributes={ 'occluded': False }), + ], attributes={'frame': 0} + ), + + DatasetItem(id=3, subset='s3', image=Image( + path='3.jpg', size=(2, 4)), + attributes={'frame': 0}), + ], categories={ + AnnotationType.label: label_categories, + }) with TestDir() as test_dir: - self._test_save_and_load(SrcExtractor(), + self._test_save_and_load(source_dataset, partial(CvatConverter.convert, save_images=True), test_dir, - target_dataset=DstExtractor()) + target_dataset=target_dataset) def test_relative_paths(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='1', image=np.ones((4, 2, 3))), - DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), - DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), - ]) - - def categories(self): - return { AnnotationType.label: LabelCategories() } - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='1', image=np.ones((4, 2, 3)), - attributes={'frame': 0}), - DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), - attributes={'frame': 1}), - DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), - attributes={'frame': 2}), - ]) - - def categories(self): - return { AnnotationType.label: LabelCategories() } + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), + ], categories={ AnnotationType.label: LabelCategories() }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3)), + attributes={'frame': 0}), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), + attributes={'frame': 1}), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), + attributes={'frame': 2}), + ], categories={ + AnnotationType.label: LabelCategories() + }) with TestDir() as test_dir: - self._test_save_and_load(SrcExtractor(), + self._test_save_and_load(source_dataset, partial(CvatConverter.convert, save_images=True), test_dir, - target_dataset=DstExtractor()) + target_dataset=target_dataset) def test_preserve_frame_ids(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), - attributes={'frame': 40}), - ]) - - def categories(self): - return { AnnotationType.label: LabelCategories() } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), + attributes={'frame': 40}), + ], categories={ + AnnotationType.label: LabelCategories() + }) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(expected_dataset, CvatConverter.convert, test_dir) diff --git a/datumaro/tests/test_datumaro_format.py b/datumaro/tests/test_datumaro_format.py index 26e6fc88ddc..e67a1b90c12 100644 --- a/datumaro/tests/test_datumaro_format.py +++ b/datumaro/tests/test_datumaro_format.py @@ -2,7 +2,7 @@ import numpy as np from unittest import TestCase - +from datumaro.components.project import Dataset from datumaro.components.project import Project from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Label, Mask, Points, Polygon, @@ -32,82 +32,75 @@ def _test_save_and_load(self, source_dataset, converter, test_dir, compare_datasets_strict(self, expected=target_dataset, actual=parsed_dataset) - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), - annotations=[ - Caption('hello', id=1), - Caption('world', id=2, group=5), - Label(2, id=3, attributes={ - 'x': 1, - 'y': '2', - }), - Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={ - 'score': 1.0, - }), - Bbox(5, 6, 7, 8, id=5, group=5), - Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4), - Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))), - ]), - DatasetItem(id=21, subset='train', - annotations=[ - Caption('test'), - Label(2), - Bbox(1, 2, 3, 4, 5, id=42, group=42) - ]), - - DatasetItem(id=2, subset='val', - annotations=[ - PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), - Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), - ]), - - DatasetItem(id=42, subset='test', - attributes={'a1': 5, 'a2': '42'}), - - DatasetItem(id=42), - DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), - ]) - - def categories(self): - label_categories = LabelCategories() - for i in range(5): - label_categories.add('cat' + str(i)) - - mask_categories = MaskCategories( - generate_colormap(len(label_categories.items))) - - points_categories = PointsCategories() - for index, _ in enumerate(label_categories.items): - points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) - - return { - AnnotationType.label: label_categories, - AnnotationType.mask: mask_categories, - AnnotationType.points: points_categories, - } + label_categories = LabelCategories() + for i in range(5): + label_categories.add('cat' + str(i)) + + mask_categories = MaskCategories( + generate_colormap(len(label_categories.items))) + + points_categories = PointsCategories() + for index, _ in enumerate(label_categories.items): + points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) + + test_dataset = Dataset.from_iterable([ + DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), + annotations=[ + Caption('hello', id=1), + Caption('world', id=2, group=5), + Label(2, id=3, attributes={ + 'x': 1, + 'y': '2', + }), + Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={ + 'score': 1.0, + }), + Bbox(5, 6, 7, 8, id=5, group=5), + Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4), + Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))), + ]), + DatasetItem(id=21, subset='train', + annotations=[ + Caption('test'), + Label(2), + Bbox(1, 2, 3, 4, 5, id=42, group=42) + ]), + + DatasetItem(id=2, subset='val', + annotations=[ + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), + Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), + ]), + + DatasetItem(id=42, subset='test', + attributes={'a1': 5, 'a2': '42'}), + + DatasetItem(id=42), + DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), + ], categories={ + AnnotationType.label: label_categories, + AnnotationType.mask: mask_categories, + AnnotationType.points: points_categories, + }) def test_can_save_and_load(self): with TestDir() as test_dir: - self._test_save_and_load(self.TestExtractor(), + self._test_save_and_load(self.test_dataset, partial(DatumaroConverter.convert, save_images=True), test_dir) def test_can_detect(self): with TestDir() as test_dir: - DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir) + DatumaroConverter.convert(self.test_dataset, save_dir=test_dir) self.assertTrue(DatumaroImporter.detect(test_dir)) def test_relative_paths(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='1', image=np.ones((4, 2, 3))), - DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), - DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), - ]) + test_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), + ]) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(test_dataset, partial(DatumaroConverter.convert, save_images=True), test_dir) diff --git a/datumaro/tests/test_labelme_format.py b/datumaro/tests/test_labelme_format.py index b3abd823d91..d40938bd8a7 100644 --- a/datumaro/tests/test_labelme_format.py +++ b/datumaro/tests/test_labelme_format.py @@ -3,7 +3,7 @@ import os.path as osp from unittest import TestCase - +from datumaro.components.project import Dataset from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Bbox, Mask, Polygon, LabelCategories ) @@ -29,101 +29,84 @@ def _test_save_and_load(self, source_dataset, converter, test_dir, compare_datasets(self, expected=target_dataset, actual=parsed_dataset) def test_can_save_and_load(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(0, 4, 4, 8, label=2, group=2), - Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={ - 'occluded': True, - 'a1': 'qwe', - 'a2': True, - 'a3': 123, - }), - Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, - attributes={ 'username': 'test' }), - Bbox(1, 2, 3, 4, group=3), - Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3, - attributes={ 'occluded': True } - ), - ] + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, group=2), + Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={ + 'occluded': True, + 'a1': 'qwe', + 'a2': True, + 'a3': 123, + }), + Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, + attributes={ 'username': 'test' }), + Bbox(1, 2, 3, 4, group=3), + Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3, + attributes={ 'occluded': True } ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(0, 4, 4, 8, label=0, group=2, id=0, - attributes={ - 'occluded': False, 'username': '', - } - ), - Polygon([0, 4, 4, 4, 5, 6], label=1, id=1, - attributes={ - 'occluded': True, 'username': '', - 'a1': 'qwe', - 'a2': True, - 'a3': 123, - } - ), - Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, - id=2, attributes={ - 'occluded': False, 'username': 'test' - } - ), - Bbox(1, 2, 3, 4, group=1, id=3, attributes={ - 'occluded': False, 'username': '', - }), - Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1, - id=4, attributes={ - 'occluded': True, 'username': '' - } - ), - ] + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=0, group=2, id=0, + attributes={ + 'occluded': False, 'username': '', + } ), - ]) - - def categories(self): - label_cat = LabelCategories() - label_cat.add('label_2') - label_cat.add('label_3') - return { - AnnotationType.label: label_cat, - } + Polygon([0, 4, 4, 4, 5, 6], label=1, id=1, + attributes={ + 'occluded': True, 'username': '', + 'a1': 'qwe', + 'a2': True, + 'a3': 123, + } + ), + Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, + id=2, attributes={ + 'occluded': False, 'username': 'test' + } + ), + Bbox(1, 2, 3, 4, group=1, id=3, attributes={ + 'occluded': False, 'username': '', + }), + Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1, + id=4, attributes={ + 'occluded': True, 'username': '' + } + ), + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + 'label_2', 'label_3']), + }) with TestDir() as test_dir: - self._test_save_and_load(SrcExtractor(), + self._test_save_and_load( + source_dataset, partial(LabelMeConverter.convert, save_images=True), - test_dir, target_dataset=DstExtractor()) + test_dir, target_dataset=target_dataset) def test_cant_save_dataset_with_relative_paths(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='dir/1', image=np.ones((2, 6, 3))), - ]) - - def categories(self): - return { AnnotationType.label: LabelCategories() } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='dir/1', image=np.ones((2, 6, 3))), + ], categories={ + AnnotationType.label: LabelCategories(), + }) with self.assertRaisesRegex(Exception, r'only supports flat'): with TestDir() as test_dir: - self._test_save_and_load(SrcExtractor(), - partial(LabelMeConverter.convert, save_images=True), - test_dir) + self._test_save_and_load(expected_dataset, + LabelMeConverter.convert, test_dir) DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset') @@ -133,101 +116,91 @@ def test_can_detect(self): self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR)) def test_can_import(self): - class DstExtractor(Extractor): - def __iter__(self): - img1 = np.ones((77, 102, 3)) * 255 - img1[6:32, 7:41] = 0 - - mask1 = np.zeros((77, 102), dtype=int) - mask1[67:69, 58:63] = 1 - - mask2 = np.zeros((77, 102), dtype=int) - mask2[13:25, 54:71] = [ - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ] - - return iter([ - DatasetItem(id='img1', image=img1, - annotations=[ - Polygon([43, 34, 45, 34, 45, 37, 43, 37], - label=0, id=0, - attributes={ - 'occluded': False, - 'username': 'admin' - } - ), - Mask(mask1, label=1, id=1, - attributes={ - 'occluded': False, - 'username': 'brussell' - } - ), - Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12], - label=2, group=2, id=2, - attributes={ - 'a1': True, - 'occluded': True, - 'username': 'anonymous' - } - ), - Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], - label=3, group=2, id=3, - attributes={ - 'kj': True, - 'occluded': False, - 'username': 'anonymous' - } - ), - Bbox(13, 19, 10, 11, label=4, group=2, id=4, - attributes={ - 'hg': True, - 'occluded': True, - 'username': 'anonymous' - } - ), - Mask(mask2, label=5, group=1, id=5, - attributes={ - 'd': True, - 'occluded': False, - 'username': 'anonymous' - } - ), - Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22], - label=6, group=1, id=6, - attributes={ - 'gfd lkj lkj hi': True, - 'occluded': False, - 'username': 'anonymous' - } - ), - ] + img1 = np.ones((77, 102, 3)) * 255 + img1[6:32, 7:41] = 0 + + mask1 = np.zeros((77, 102), dtype=int) + mask1[67:69, 58:63] = 1 + + mask2 = np.zeros((77, 102), dtype=int) + mask2[13:25, 54:71] = [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] + + target_dataset = Dataset.from_iterable([ + DatasetItem(id='img1', image=img1, + annotations=[ + Polygon([43, 34, 45, 34, 45, 37, 43, 37], + label=0, id=0, + attributes={ + 'occluded': False, + 'username': 'admin' + } + ), + Mask(mask1, label=1, id=1, + attributes={ + 'occluded': False, + 'username': 'brussell' + } + ), + Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12], + label=2, group=2, id=2, + attributes={ + 'a1': True, + 'occluded': True, + 'username': 'anonymous' + } + ), + Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], + label=3, group=2, id=3, + attributes={ + 'kj': True, + 'occluded': False, + 'username': 'anonymous' + } ), - ]) - - def categories(self): - label_cat = LabelCategories() - label_cat.add('window') - label_cat.add('license plate') - label_cat.add('o1') - label_cat.add('q1') - label_cat.add('b1') - label_cat.add('m1') - label_cat.add('hg') - return { - AnnotationType.label: label_cat, - } + Bbox(13, 19, 10, 11, label=4, group=2, id=4, + attributes={ + 'hg': True, + 'occluded': True, + 'username': 'anonymous' + } + ), + Mask(mask2, label=5, group=1, id=5, + attributes={ + 'd': True, + 'occluded': False, + 'username': 'anonymous' + } + ), + Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22], + label=6, group=1, id=6, + attributes={ + 'gfd lkj lkj hi': True, + 'occluded': False, + 'username': 'anonymous' + } + ), + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + 'window', 'license plate', 'o1', + 'q1', 'b1', 'm1', 'hg', + ]), + }) parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \ .make_dataset() - compare_datasets(self, expected=DstExtractor(), actual=parsed) \ No newline at end of file + compare_datasets(self, expected=target_dataset, actual=parsed) \ No newline at end of file diff --git a/datumaro/tests/test_mot_format.py b/datumaro/tests/test_mot_format.py index 9f212116b7e..4cc2a98b3a4 100644 --- a/datumaro/tests/test_mot_format.py +++ b/datumaro/tests/test_mot_format.py @@ -3,7 +3,7 @@ import os.path as osp from unittest import TestCase - +from datumaro.components.project import Dataset from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Bbox, LabelCategories ) @@ -28,96 +28,83 @@ def _test_save_and_load(self, source_dataset, converter, test_dir, compare_datasets(self, expected=target_dataset, actual=parsed_dataset) def test_can_save_bboxes(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(0, 4, 4, 8, label=2, attributes={ - 'occluded': True, - }), - Bbox(0, 4, 4, 4, label=3, attributes={ - 'visibility': 0.4, - }), - Bbox(2, 4, 4, 4, attributes={ - 'ignored': True - }), - ] - ), - - DatasetItem(id=2, subset='val', - image=np.ones((8, 8, 3)), - annotations=[ - Bbox(1, 2, 4, 2, label=3), - ] - ), - - DatasetItem(id=3, subset='test', - image=np.ones((5, 4, 3)) * 3, - ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(0, 4, 4, 8, label=2, attributes={ - 'occluded': True, - 'visibility': 0.0, - 'ignored': False, - }), - Bbox(0, 4, 4, 4, label=3, attributes={ - 'occluded': False, - 'visibility': 0.4, - 'ignored': False, - }), - Bbox(2, 4, 4, 4, attributes={ - 'occluded': False, - 'visibility': 1.0, - 'ignored': True, - }), - ] - ), - - DatasetItem(id=2, - image=np.ones((8, 8, 3)), - annotations=[ - Bbox(1, 2, 4, 2, label=3, attributes={ - 'occluded': False, - 'visibility': 1.0, - 'ignored': False, - }), - ] - ), - - DatasetItem(id=3, - image=np.ones((5, 4, 3)) * 3, - ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': True, + }), + Bbox(0, 4, 4, 4, label=3, attributes={ + 'visibility': 0.4, + }), + Bbox(2, 4, 4, 4, attributes={ + 'ignored': True + }), + ] + ), + + DatasetItem(id=2, subset='val', + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3), + ] + ), + + DatasetItem(id=3, subset='test', + image=np.ones((5, 4, 3)) * 3, + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': True, + 'visibility': 0.0, + 'ignored': False, + }), + Bbox(0, 4, 4, 4, label=3, attributes={ + 'occluded': False, + 'visibility': 0.4, + 'ignored': False, + }), + Bbox(2, 4, 4, 4, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': True, + }), + ] + ), + + DatasetItem(id=2, + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': False, + }), + ] + ), + + DatasetItem(id=3, + image=np.ones((5, 4, 3)) * 3, + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) with TestDir() as test_dir: - self._test_save_and_load(SrcExtractor(), + self._test_save_and_load( + source_dataset, partial(MotSeqGtConverter.convert, save_images=True), - test_dir, target_dataset=DstExtractor()) + test_dir, target_dataset=target_dataset) DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset') @@ -127,30 +114,23 @@ def test_can_detect(self): self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR)) def test_can_import(self): - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(0, 4, 4, 8, label=2, attributes={ - 'occluded': False, - 'visibility': 1.0, - 'ignored': False, - }), - ] - ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': False, + }), + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \ .make_dataset() - compare_datasets(self, DstExtractor(), dataset) \ No newline at end of file + compare_datasets(self, expected_dataset, dataset) \ No newline at end of file diff --git a/datumaro/tests/test_tfrecord_format.py b/datumaro/tests/test_tfrecord_format.py index 5071ad25915..f2dbd160fde 100644 --- a/datumaro/tests/test_tfrecord_format.py +++ b/datumaro/tests/test_tfrecord_format.py @@ -3,7 +3,7 @@ import os.path as osp from unittest import TestCase, skipIf - +from datumaro.components.project import Dataset from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Bbox, Mask, LabelCategories ) @@ -48,117 +48,96 @@ def _test_save_and_load(self, source_dataset, converter, test_dir, compare_datasets(self, expected=target_dataset, actual=parsed_dataset) def test_can_save_bboxes(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(0, 4, 4, 8, label=2), - Bbox(0, 4, 4, 4, label=3), - Bbox(2, 4, 4, 4), - ], attributes={'source_id': ''} - ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2), + Bbox(0, 4, 4, 4, label=3), + Bbox(2, 4, 4, 4), + ], attributes={'source_id': ''} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load( + test_dataset, partial(TfDetectionApiConverter.convert, save_images=True), test_dir) def test_can_save_masks(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)), - annotations=[ - Mask(image=np.array([ - [1, 0, 0, 1], - [0, 1, 1, 0], - [0, 1, 1, 0], - [1, 0, 0, 1], - ]), label=1), - ], - attributes={'source_id': ''} - ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)), + annotations=[ + Mask(image=np.array([ + [1, 0, 0, 1], + [0, 1, 1, 0], + [0, 1, 1, 0], + [1, 0, 0, 1], + ]), label=1), + ], + attributes={'source_id': ''} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load( + test_dataset, partial(TfDetectionApiConverter.convert, save_masks=True), test_dir) def test_can_save_dataset_with_no_subsets(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(2, 1, 4, 4, label=2), - Bbox(4, 2, 8, 4, label=3), - ], - attributes={'source_id': ''} - ), - - DatasetItem(id=2, - image=np.ones((8, 8, 3)) * 2, - annotations=[ - Bbox(4, 4, 4, 4, label=3), - ], - attributes={'source_id': ''} - ), - - DatasetItem(id=3, - image=np.ones((8, 4, 3)) * 3, - attributes={'source_id': ''} - ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(2, 1, 4, 4, label=2), + Bbox(4, 2, 8, 4, label=3), + ], + attributes={'source_id': ''} + ), + + DatasetItem(id=2, + image=np.ones((8, 8, 3)) * 2, + annotations=[ + Bbox(4, 4, 4, 4, label=3), + ], + attributes={'source_id': ''} + ), + + DatasetItem(id=3, + image=np.ones((8, 4, 3)) * 3, + attributes={'source_id': ''} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load( + test_dataset, partial(TfDetectionApiConverter.convert, save_images=True), test_dir) def test_can_save_dataset_with_image_info(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='1/q.e', - image=Image(path='1/q.e', size=(10, 15)), - attributes={'source_id': ''} - ) - ]) - - def categories(self): - return { AnnotationType.label: LabelCategories() } + test_dataset = Dataset.from_iterable([ + DatasetItem(id='1/q.e', + image=Image(path='1/q.e', size=(10, 15)), + attributes={'source_id': ''} + ) + ], categories={ + AnnotationType.label: LabelCategories(), + }) with TestDir() as test_dir: - self._test_save_and_load(TestExtractor(), + self._test_save_and_load(test_dataset, TfDetectionApiConverter.convert, test_dir) def test_labelmap_parsing(self): @@ -197,42 +176,35 @@ def test_can_detect(self): self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR)) def test_can_import(self): - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((16, 16, 3)), - annotations=[ - Bbox(0, 4, 4, 8, label=2), - Bbox(0, 4, 4, 4, label=3), - Bbox(2, 4, 4, 4), - ], - attributes={'source_id': '1'} - ), - - DatasetItem(id=2, subset='val', - image=np.ones((8, 8, 3)), - annotations=[ - Bbox(1, 2, 4, 2, label=3), - ], - attributes={'source_id': '2'} - ), - - DatasetItem(id=3, subset='test', - image=np.ones((5, 4, 3)) * 3, - attributes={'source_id': '3'} - ), - ]) - - def categories(self): - label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) - return { - AnnotationType.label: label_cat, - } + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2), + Bbox(0, 4, 4, 4, label=3), + Bbox(2, 4, 4, 4), + ], + attributes={'source_id': '1'} + ), + + DatasetItem(id=2, subset='val', + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3), + ], + attributes={'source_id': '2'} + ), + + DatasetItem(id=3, subset='test', + image=np.ones((5, 4, 3)) * 3, + attributes={'source_id': '3'} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \ .make_dataset() - compare_datasets(self, DstExtractor(), dataset) + compare_datasets(self, target_dataset, dataset) diff --git a/datumaro/tests/test_transforms.py b/datumaro/tests/test_transforms.py index 522cf2b520f..a55a446ea58 100644 --- a/datumaro/tests/test_transforms.py +++ b/datumaro/tests/test_transforms.py @@ -2,7 +2,7 @@ import numpy as np from unittest import TestCase - +from datumaro.components.project import Dataset from datumaro.components.extractor import (Extractor, DatasetItem, Mask, Polygon, PolyLine, Points, Bbox, Label, LabelCategories, MaskCategories, AnnotationType @@ -67,304 +67,269 @@ def __iter__(self): compare_datasets(self, DstExtractor(), actual) def test_mask_to_polygons_small_polygons_message(self): - class SrcExtractor(Extractor): - def __iter__(self): - items = [ - DatasetItem(id=1, image=np.zeros((5, 10, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 0], - [0, 1, 0], - [0, 0, 0], - ]), - ), - ] + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 0], + [0, 1, 0], + [0, 0, 0], + ]), ), ] - return iter(items) + ), + ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ DatasetItem(id=1, image=np.zeros((5, 10, 3))), ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3))), ]) with self.assertLogs(level=log.DEBUG) as logs: - actual = transforms.MasksToPolygons(SrcExtractor()) + actual = transforms.MasksToPolygons(source_dataset) - compare_datasets(self, DstExtractor(), actual) + compare_datasets(self, target_dataset, actual) self.assertRegex('\n'.join(logs.output), 'too small polygons') def test_polygons_to_masks(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 10, 3)), - annotations=[ - Polygon([0, 0, 4, 0, 4, 4]), - Polygon([5, 0, 9, 0, 5, 5]), - ] - ), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4]), + Polygon([5, 0, 9, 0, 5, 5]), + ] + ), + ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 10, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 0, 0, 0, 1, 1, 1, 1, 0], - [0, 0, 0, 0, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 0, 1, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ]), - ), - Mask(np.array([ - [0, 1, 1, 1, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ]), - ), - ] + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 0, 0, 0, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), ), - ]) + Mask(np.array([ + [0, 1, 1, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), + ), + ] + ), + ]) - actual = transforms.PolygonsToMasks(SrcExtractor()) - compare_datasets(self, DstExtractor(), actual) + actual = transforms.PolygonsToMasks(source_dataset) + compare_datasets(self, target_dataset, actual) def test_crop_covered_segments(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - # The mask is partially covered by the polygon - Mask(np.array([ - [0, 0, 1, 1, 1], - [0, 0, 1, 1, 1], - [1, 1, 1, 1, 1], - [1, 1, 1, 0, 0], - [1, 1, 1, 0, 0]], - ), - z_order=0), - Polygon([1, 1, 4, 1, 4, 4, 1, 4], - z_order=1), - ] - ), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + # The mask is partially covered by the polygon + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + z_order=1), + ] + ), + ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 1, 1, 1], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - [1, 1, 1, 0, 0]], - ), - z_order=0), - Polygon([1, 1, 4, 1, 4, 4, 1, 4], - z_order=1), - ] - ), - ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + z_order=1), + ] + ), + ]) - actual = transforms.CropCoveredSegments(SrcExtractor()) - compare_datasets(self, DstExtractor(), actual) + actual = transforms.CropCoveredSegments(source_dataset) + compare_datasets(self, target_dataset, actual) def test_merge_instance_segments(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 1, 1, 1], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - [1, 1, 1, 0, 0]], - ), - z_order=0, group=1), - Polygon([1, 1, 4, 1, 4, 4, 1, 4], - z_order=1, group=1), - Polygon([0, 0, 0, 2, 2, 2, 2, 0], - z_order=1), - ] - ), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0, group=1), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + z_order=1, group=1), + Polygon([0, 0, 0, 2, 2, 2, 2, 0], + z_order=1), + ] + ), + ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 1, 1, 1], - [0, 1, 1, 1, 1], - [1, 1, 1, 1, 1], - [1, 1, 1, 1, 0], - [1, 1, 1, 0, 0]], - ), - z_order=0, group=1), - Mask(np.array([ - [1, 1, 0, 0, 0], - [1, 1, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]], - ), - z_order=1), - ] - ), - ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0, group=1), + Mask(np.array([ + [1, 1, 0, 0, 0], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=1), + ] + ), + ]) - actual = transforms.MergeInstanceSegments(SrcExtractor(), + actual = transforms.MergeInstanceSegments(source_dataset, include_polygons=True) - compare_datasets(self, DstExtractor(), actual) + compare_datasets(self, target_dataset, actual) def test_map_subsets(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='a'), - DatasetItem(id=2, subset='b'), - DatasetItem(id=3, subset='c'), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='a'), + DatasetItem(id=2, subset='b'), + DatasetItem(id=3, subset='c'), + ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset=''), - DatasetItem(id=2, subset='a'), - DatasetItem(id=3, subset='c'), - ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset=''), + DatasetItem(id=2, subset='a'), + DatasetItem(id=3, subset='c'), + ]) - actual = transforms.MapSubsets(SrcExtractor(), + actual = transforms.MapSubsets(source_dataset, { 'a': '', 'b': 'a' }) - compare_datasets(self, DstExtractor(), actual) + compare_datasets(self, target_dataset, actual) def test_shapes_to_boxes(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Mask(np.array([ - [0, 0, 1, 1, 1], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - [1, 1, 1, 0, 0]], - ), id=1), - Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2), - PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3), - Points([2, 2, 4, 2, 4, 4, 2, 4], id=4), - ] - ), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), id=1), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2), + PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3), + Points([2, 2, 4, 2, 4, 4, 2, 4], id=4), + ] + ), + ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Bbox(0, 0, 4, 4, id=1), - Bbox(1, 1, 3, 3, id=2), - Bbox(1, 1, 1, 1, id=3), - Bbox(2, 2, 2, 2, id=4), - ] - ), - ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Bbox(0, 0, 4, 4, id=1), + Bbox(1, 1, 3, 3, id=2), + Bbox(1, 1, 1, 1, id=3), + Bbox(2, 2, 2, 2, id=4), + ] + ), + ]) - actual = transforms.ShapesToBoxes(SrcExtractor()) - compare_datasets(self, DstExtractor(), actual) + actual = transforms.ShapesToBoxes(source_dataset) + compare_datasets(self, target_dataset, actual) def test_id_from_image(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image='path.jpg'), - DatasetItem(id=2), - ]) - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='path', image='path.jpg'), - DatasetItem(id=2), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image='path.jpg'), + DatasetItem(id=2), + ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id='path', image='path.jpg'), + DatasetItem(id=2), + ]) - actual = transforms.IdFromImageName(SrcExtractor()) - compare_datasets(self, DstExtractor(), actual) + actual = transforms.IdFromImageName(source_dataset) + compare_datasets(self, target_dataset, actual) def test_boxes_to_masks(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Bbox(0, 0, 3, 3, z_order=1), - Bbox(0, 0, 3, 1, z_order=2), - Bbox(0, 2, 3, 1, z_order=3), - ] - ), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Bbox(0, 0, 3, 3, z_order=1), + Bbox(0, 0, 3, 1, z_order=2), + Bbox(0, 2, 3, 1, z_order=3), + ] + ), + ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, image=np.zeros((5, 5, 3)), - annotations=[ - Mask(np.array([ - [1, 1, 1, 0, 0], - [1, 1, 1, 0, 0], - [1, 1, 1, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]], - ), - z_order=1), - Mask(np.array([ - [1, 1, 1, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]], - ), - z_order=2), - Mask(np.array([ - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [1, 1, 1, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]], - ), - z_order=3), - ] - ), - ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=1), + Mask(np.array([ + [1, 1, 1, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=2), + Mask(np.array([ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [1, 1, 1, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=3), + ] + ), + ]) - actual = transforms.BoxesToMasks(SrcExtractor()) - compare_datasets(self, DstExtractor(), actual) + actual = transforms.BoxesToMasks(source_dataset) + compare_datasets(self, target_dataset, actual) def test_random_split(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset="a"), - DatasetItem(id=2, subset="a"), - DatasetItem(id=3, subset="b"), - DatasetItem(id=4, subset="b"), - DatasetItem(id=5, subset="b"), - DatasetItem(id=6, subset=""), - DatasetItem(id=7, subset=""), - ]) + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset="a"), + DatasetItem(id=2, subset="a"), + DatasetItem(id=3, subset="b"), + DatasetItem(id=4, subset="b"), + DatasetItem(id=5, subset="b"), + DatasetItem(id=6, subset=""), + DatasetItem(id=7, subset=""), + ]) - actual = transforms.RandomSplit(SrcExtractor(), splits=[ + actual = transforms.RandomSplit(source_dataset, splits=[ ('train', 4.0 / 7.0), ('test', 3.0 / 7.0), ]) @@ -373,21 +338,19 @@ def __iter__(self): self.assertEqual(3, len(actual.get_subset('test'))) def test_random_split_gives_error_on_wrong_ratios(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([DatasetItem(id=1)]) + source_dataset = Dataset.from_iterable([DatasetItem(id=1)]) with self.assertRaises(Exception): - transforms.RandomSplit(SrcExtractor(), splits=[ + transforms.RandomSplit(source_dataset, splits=[ ('train', 0.5), ('test', 0.7), ]) with self.assertRaises(Exception): - transforms.RandomSplit(SrcExtractor(), splits=[]) + transforms.RandomSplit(source_dataset, splits=[]) with self.assertRaises(Exception): - transforms.RandomSplit(SrcExtractor(), splits=[ + transforms.RandomSplit(source_dataset, splits=[ ('train', -0.5), ('test', 1.5), ]) @@ -462,24 +425,19 @@ def categories(self): compare_datasets(self, DstExtractor(), actual) def test_remap_labels_delete_unspecified(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ DatasetItem(id=1, annotations=[ Label(0) ]) ]) - - def categories(self): - label_cat = LabelCategories() - label_cat.add('label0') - - return { AnnotationType.label: label_cat } - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ DatasetItem(id=1, annotations=[]) ]) - - def categories(self): - return { AnnotationType.label: LabelCategories() } - - actual = transforms.RemapLabels(SrcExtractor(), + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(0) ]) + ], categories={ + AnnotationType.label: LabelCategories.from_iterable('label0'), + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[]), + ], categories={ + AnnotationType.label: LabelCategories(), + }) + + actual = transforms.RemapLabels(source_dataset, mapping={}, default='delete') - compare_datasets(self, DstExtractor(), actual) + compare_datasets(self, target_dataset, actual) diff --git a/datumaro/tests/test_yolo_format.py b/datumaro/tests/test_yolo_format.py index bf6d71aeb78..1f6425d1bc9 100644 --- a/datumaro/tests/test_yolo_format.py +++ b/datumaro/tests/test_yolo_format.py @@ -6,7 +6,7 @@ from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Bbox, LabelCategories, ) -from datumaro.components.project import Project +from datumaro.components.project import Project, Dataset from datumaro.plugins.yolo_format.importer import YoloImporter from datumaro.plugins.yolo_format.converter import YoloConverter from datumaro.util.image import Image, save_image @@ -15,40 +15,32 @@ class YoloFormatTest(TestCase): def test_can_save_and_load(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)), - annotations=[ - Bbox(0, 2, 4, 2, label=2), - Bbox(0, 1, 2, 3, label=4), - ]), - DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)), - annotations=[ - Bbox(0, 2, 4, 2, label=2), - Bbox(3, 3, 2, 3, label=4), - Bbox(2, 1, 2, 3, label=4), - ]), - - DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)), - annotations=[ - Bbox(0, 1, 5, 2, label=2), - Bbox(0, 2, 3, 2, label=5), - Bbox(0, 2, 4, 2, label=6), - Bbox(0, 7, 3, 2, label=7), - ]), - ]) - - def categories(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add('label_' + str(i)) - return { - AnnotationType.label: label_categories, - } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(0, 1, 2, 3, label=4), + ]), + DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + Bbox(2, 1, 2, 3, label=4), + ]), + + DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 1, 5, 2, label=2), + Bbox(0, 2, 3, 2, label=5), + Bbox(0, 2, 4, 2, label=6), + Bbox(0, 7, 3, 2, label=7), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) with TestDir() as test_dir: - source_dataset = TestExtractor() YoloConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = YoloImporter()(test_dir).make_dataset() @@ -56,27 +48,19 @@ def categories(self): compare_datasets(self, source_dataset, parsed_dataset) def test_can_save_dataset_with_image_info(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=Image(path='1.jpg', size=(10, 15)), - annotations=[ - Bbox(0, 2, 4, 2, label=2), - Bbox(3, 3, 2, 3, label=4), - ]), - ]) - - def categories(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add('label_' + str(i)) - return { - AnnotationType.label: label_categories, - } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=Image(path='1.jpg', size=(10, 15)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) with TestDir() as test_dir: - source_dataset = TestExtractor() YoloConverter.convert(source_dataset, test_dir) @@ -87,27 +71,19 @@ def categories(self): compare_datasets(self, source_dataset, parsed_dataset) def test_can_load_dataset_with_exact_image_info(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=Image(path='1.jpg', size=(10, 15)), - annotations=[ - Bbox(0, 2, 4, 2, label=2), - Bbox(3, 3, 2, 3, label=4), - ]), - ]) - - def categories(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add('label_' + str(i)) - return { - AnnotationType.label: label_categories, - } + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=Image(path='1.jpg', size=(10, 15)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) with TestDir() as test_dir: - source_dataset = TestExtractor() YoloConverter.convert(source_dataset, test_dir) @@ -117,24 +93,20 @@ def categories(self): compare_datasets(self, source_dataset, parsed_dataset) def test_relative_paths(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id='1', subset='train', - image=np.ones((4, 2, 3))), - DatasetItem(id='subdir1/1', subset='train', - image=np.ones((2, 6, 3))), - DatasetItem(id='subdir2/1', subset='train', - image=np.ones((5, 4, 3))), - ]) - - def categories(self): - return { AnnotationType.label: LabelCategories() } + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', subset='train', + image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', subset='train', + image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', subset='train', + image=np.ones((5, 4, 3))), + ], categories={ + AnnotationType.label: LabelCategories(), + }) for save_images in {True, False}: with self.subTest(save_images=save_images): with TestDir() as test_dir: - source_dataset = TestExtractor() YoloConverter.convert(source_dataset, test_dir, save_images=save_images) @@ -150,26 +122,19 @@ def test_can_detect(self): self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR)) def test_can_import(self): - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((10, 15, 3)), - annotations=[ - Bbox(0, 2, 4, 2, label=2), - Bbox(3, 3, 2, 3, label=4), - ]), - ]) - - def categories(self): - label_categories = LabelCategories() - for i in range(10): - label_categories.add('label_' + str(i)) - return { - AnnotationType.label: label_categories, - } + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((10, 15, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \ .make_dataset() - compare_datasets(self, DstExtractor(), dataset) + compare_datasets(self, expected_dataset, dataset)