Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix export job dataset #5052

Merged
merged 14 commits into from
Oct 15, 2022
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ non-ascii paths while adding files from "Connected file share" (issue #4428)
(<https://github.com/opencv/cvat/pull/5057>)
- Double modal export/backup a task/project (<https://github.com/opencv/cvat/pull/5075>)
- Fixed bug of computing Job's unsolved/resolved issues numbers (<https://github.com/opencv/cvat/pull/5101>)
- Dataset export for job (<https://github.com/opencv/cvat/pull/5052>)

### Security
- TDB
Expand Down
446 changes: 298 additions & 148 deletions cvat/apps/dataset_manager/bindings.py

Large diffs are not rendered by default.

45 changes: 23 additions & 22 deletions cvat/apps/dataset_manager/formats/cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from datumaro.util.image import Image
from defusedxml import ElementTree

from cvat.apps.dataset_manager.bindings import (ProjectData, TaskData,
from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData,
get_defaulted_subset,
import_dm_annotations,
match_dm_item)
Expand Down Expand Up @@ -984,11 +984,11 @@ def dump_track(idx, track):
counter += 1

for shape in annotations.shapes:
frame_step = annotations.frame_step if isinstance(annotations, TaskData) else annotations.frame_step[shape.task_id]
if isinstance(annotations, TaskData):
stop_frame = int(annotations.meta['task']['stop_frame'])
frame_step = annotations.frame_step if not isinstance(annotations, ProjectData) else annotations.frame_step[shape.task_id]
if not isinstance(annotations, ProjectData):
stop_frame = int(annotations.meta[annotations.META_FIELD]['stop_frame'])
else:
task_meta = list(filter(lambda task: int(task[1]['id']) == shape.task_id, annotations.meta['project']['tasks']))[0][1]
task_meta = list(filter(lambda task: int(task[1]['id']) == shape.task_id, annotations.meta[annotations.META_FIELD]['tasks']))[0][1]
stop_frame = int(task_meta['stop_frame'])
track = {
'label': shape.label,
Expand Down Expand Up @@ -1102,7 +1102,7 @@ def load_anno(file_object, annotations):
attributes={'frame': el.attrib['id']},
image=el.attrib['name']
),
task_data=annotations
instance_data=annotations
))
elif el.tag in supported_shapes and (track is not None or image_is_opened):
if shape and shape['type'] == 'skeleton':
Expand Down Expand Up @@ -1258,10 +1258,10 @@ def load_anno(file_object, annotations):
tag = None
el.clear()

def dump_task_anno(dst_file, task_data, callback):
def dump_task_or_job_anno(dst_file, instance_data, callback):
dumper = create_xml_dumper(dst_file)
dumper.open_document()
callback(dumper, task_data)
callback(dumper, instance_data)
dumper.close_document()

def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callback: Callable):
Expand All @@ -1270,33 +1270,34 @@ def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callb
callback(dumper, project_data)
dumper.close_document()

def dump_media_files(task_data: TaskData, img_dir: str, project_data: ProjectData = None):
def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None):
ext = ''
if task_data.meta['task']['mode'] == 'interpolation':
if instance_data.meta[instance_data.META_FIELD]['mode'] == 'interpolation':
ext = FrameProvider.VIDEO_FRAME_EXT

frame_provider = FrameProvider(task_data.db_task.data)
frame_provider = FrameProvider(instance_data.db_data)
frames = frame_provider.get_frames(
instance_data.start, instance_data.stop,
frame_provider.Quality.ORIGINAL,
frame_provider.Type.BUFFER)
for frame_id, (frame_data, _) in enumerate(frames):
if (project_data is not None and (task_data.db_task.id, frame_id) in project_data.deleted_frames) \
or frame_id in task_data.deleted_frames:
for frame_id, (frame_data, _) in zip(instance_data.rel_range, frames):
if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \
or frame_id in instance_data.deleted_frames:
continue
frame_name = task_data.frame_info[frame_id]['path'] if project_data is None \
else project_data.frame_info[(task_data.db_task.id, frame_id)]['path']
frame_name = instance_data.frame_info[frame_id]['path'] if project_data is None \
else project_data.frame_info[(instance_data.db_instance.id, frame_id)]['path']
img_path = osp.join(img_dir, frame_name + ext)
os.makedirs(osp.dirname(img_path), exist_ok=True)
with open(img_path, 'wb') as f:
f.write(frame_data.getvalue())

def _export_task(dst_file, task_data, anno_callback, save_images=False):
def _export_task_or_job(dst_file, instance_data, anno_callback, save_images=False):
with TemporaryDirectory() as temp_dir:
with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f:
dump_task_anno(f, task_data, anno_callback)
dump_task_or_job_anno(f, instance_data, anno_callback)

if save_images:
dump_media_files(task_data, osp.join(temp_dir, 'images'))
dump_media_files(instance_data, osp.join(temp_dir, 'images'))

make_zip_archive(temp_dir, dst_file)

Expand All @@ -1307,7 +1308,7 @@ def _export_project(dst_file: str, project_data: ProjectData, anno_callback: Cal

if save_images:
for task_data in project_data.task_data:
subset = get_defaulted_subset(task_data.db_task.subset, project_data.subsets)
subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets)
subset_dir = osp.join(temp_dir, 'images', subset)
os.makedirs(subset_dir, exist_ok=True)
dump_media_files(task_data, subset_dir, project_data)
Expand All @@ -1320,7 +1321,7 @@ def _export_video(dst_file, instance_data, save_images=False):
_export_project(dst_file, instance_data,
anno_callback=dump_as_cvat_interpolation, save_images=save_images)
else:
_export_task(dst_file, instance_data,
_export_task_or_job(dst_file, instance_data,
anno_callback=dump_as_cvat_interpolation, save_images=save_images)

@exporter(name='CVAT for images', ext='ZIP', version='1.1')
Expand All @@ -1329,7 +1330,7 @@ def _export_images(dst_file, instance_data, save_images=False):
_export_project(dst_file, instance_data,
anno_callback=dump_as_cvat_annotation, save_images=save_images)
else:
_export_task(dst_file, instance_data,
_export_task_or_job(dst_file, instance_data,
anno_callback=dump_as_cvat_annotation, save_images=save_images)

@importer(name='CVAT', ext='XML, ZIP', version='1.1')
Expand Down
7 changes: 3 additions & 4 deletions cvat/apps/dataset_manager/formats/kitti.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -9,8 +10,7 @@
from datumaro.plugins.kitti_format.format import KittiPath, write_label_map
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
ProjectData, import_dm_annotations)
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

from .transformations import RotatedBoxesToPolygons
Expand Down Expand Up @@ -45,8 +45,7 @@ def _import(src_file, instance_data):
write_label_map(color_map_path, color_map)

dataset = Dataset.import_from(tmp_dir, format='kitti', env=dm_env)
labels_meta = instance_data.meta['project']['labels'] \
if isinstance(instance_data, ProjectData) else instance_data.meta['task']['labels']
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
if 'background' not in [label['name'] for _, label in labels_meta]:
dataset.filter('/item/annotation[label != "background"]',
filter_annotations=True)
Expand Down
31 changes: 17 additions & 14 deletions cvat/apps/dataset_manager/formats/mot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -13,13 +14,15 @@

from .registry import dm_env, exporter, importer

def _import_task(dataset, task_data):
def _import_to_task(dataset, instance_data):
tracks = {}
label_cat = dataset.categories()[datumaro.AnnotationType.label]

for item in dataset:
frame_number = int(item.id) - 1 # NOTE: MOT frames start from 1
frame_number = task_data.abs_frame_id(frame_number)
# NOTE: MOT frames start from 1
# job has an offset, for task offset is 0
frame_number = int(item.id) - 1 + instance_data.start
frame_number = instance_data.abs_frame_id(frame_number)

for ann in item.annotations:
if ann.type != datumaro.AnnotationType.bbox:
Expand All @@ -28,7 +31,7 @@ def _import_task(dataset, task_data):
track_id = ann.attributes.get('track_id')
if track_id is None:
# Extension. Import regular boxes:
task_data.add_shape(task_data.LabeledShape(
instance_data.add_shape(instance_data.LabeledShape(
type='rectangle',
label=label_cat.items[ann.label].name,
points=ann.points,
Expand All @@ -41,7 +44,7 @@ def _import_task(dataset, task_data):
))
continue

shape = task_data.TrackedShape(
shape = instance_data.TrackedShape(
type='rectangle',
points=ann.points,
occluded=ann.attributes.get('occluded') is True,
Expand All @@ -55,7 +58,7 @@ def _import_task(dataset, task_data):

# build trajectories as lists of shapes in track dict
if track_id not in tracks:
tracks[track_id] = task_data.Track(
tracks[track_id] = instance_data.Track(
label_cat.items[ann.label].name, 0, 'manual', [])
tracks[track_id].shapes.append(shape)

Expand All @@ -67,23 +70,23 @@ def _import_task(dataset, task_data):
prev_shape_idx = 0
prev_shape = track.shapes[0]
for shape in track.shapes[1:]:
has_skip = task_data.frame_step < shape.frame - prev_shape.frame
has_skip = instance_data.frame_step < shape.frame - prev_shape.frame
if has_skip and not prev_shape.outside:
prev_shape = prev_shape._replace(outside=True,
frame=prev_shape.frame + task_data.frame_step)
frame=prev_shape.frame + instance_data.frame_step)
prev_shape_idx += 1
track.shapes.insert(prev_shape_idx, prev_shape)
prev_shape = shape
prev_shape_idx += 1

# Append a shape with outside=True to finish the track
last_shape = track.shapes[-1]
if last_shape.frame + task_data.frame_step <= \
int(task_data.meta['task']['stop_frame']):
if last_shape.frame + instance_data.frame_step <= \
int(instance_data.meta[instance_data.META_FIELD]['stop_frame']):
track.shapes.append(last_shape._replace(outside=True,
frame=last_shape.frame + task_data.frame_step)
frame=last_shape.frame + instance_data.frame_step)
)
task_data.add_track(track)
instance_data.add_track(track)


@exporter(name='MOT', ext='ZIP', version='1.1')
Expand All @@ -107,7 +110,7 @@ def _import(src_file, instance_data, load_data_callback=None):
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_task(sub_dataset, task_data)
_import_to_task(sub_dataset, task_data)
else:
_import_task(dataset, instance_data)
_import_to_task(dataset, instance_data)

29 changes: 15 additions & 14 deletions cvat/apps/dataset_manager/formats/mots.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -22,16 +23,16 @@ def transform_item(self, item):
return item.wrap(annotations=[a for a in item.annotations
if 'track_id' in a.attributes])

def _import_task(dataset, task_data):
def _import_to_task(dataset, instance_data):
tracks = {}
label_cat = dataset.categories()[AnnotationType.label]

root_hint = find_dataset_root(dataset, task_data)
root_hint = find_dataset_root(dataset, instance_data)

shift = 0
for item in dataset:
frame_number = task_data.abs_frame_id(
match_dm_item(item, task_data, root_hint=root_hint))
frame_number = instance_data.abs_frame_id(
match_dm_item(item, instance_data, root_hint=root_hint))

track_ids = set()

Expand All @@ -49,7 +50,7 @@ def _import_task(dataset, task_data):
else:
track_ids.add(track_id)

shape = task_data.TrackedShape(
shape = instance_data.TrackedShape(
type='polygon',
points=ann.points,
occluded=ann.attributes.get('occluded') is True,
Expand All @@ -64,7 +65,7 @@ def _import_task(dataset, task_data):

# build trajectories as lists of shapes in track dict
if track_id not in tracks:
tracks[track_id] = task_data.Track(
tracks[track_id] = instance_data.Track(
label_cat.items[ann.label].name, 0, 'manual', [])
tracks[track_id].shapes.append(shape)

Expand All @@ -75,23 +76,23 @@ def _import_task(dataset, task_data):
prev_shape_idx = 0
prev_shape = track.shapes[0]
for shape in track.shapes[1:]:
has_skip = task_data.frame_step < shape.frame - prev_shape.frame
has_skip = instance_data.frame_step < shape.frame - prev_shape.frame
if has_skip and not prev_shape.outside:
prev_shape = prev_shape._replace(outside=True,
frame=prev_shape.frame + task_data.frame_step)
frame=prev_shape.frame + instance_data.frame_step)
prev_shape_idx += 1
track.shapes.insert(prev_shape_idx, prev_shape)
prev_shape = shape
prev_shape_idx += 1

# Append a shape with outside=True to finish the track
last_shape = track.shapes[-1]
if last_shape.frame + task_data.frame_step <= \
int(task_data.meta['task']['stop_frame']):
if last_shape.frame + instance_data.frame_step <= \
int(instance_data.meta[instance_data.META_FIELD]['stop_frame']):
track.shapes.append(last_shape._replace(outside=True,
frame=last_shape.frame + task_data.frame_step)
frame=last_shape.frame + instance_data.frame_step)
)
task_data.add_track(track)
instance_data.add_track(track)

@exporter(name='MOTS PNG', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
Expand Down Expand Up @@ -120,7 +121,7 @@ def _import(src_file, instance_data, load_data_callback=None):
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_task(sub_dataset, task_data)
_import_to_task(sub_dataset, task_data)
else:
_import_task(dataset, instance_data)
_import_to_task(dataset, instance_data)

7 changes: 3 additions & 4 deletions cvat/apps/dataset_manager/formats/pascal_voc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -11,8 +12,7 @@
from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
ProjectData, import_dm_annotations)
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

from .registry import dm_env, exporter, importer
Expand All @@ -36,8 +36,7 @@ def _import(src_file, instance_data, load_data_callback=None):
# put label map from the task if not present
labelmap_file = osp.join(tmp_dir, 'labelmap.txt')
if not osp.isfile(labelmap_file):
labels_meta = instance_data.meta['project']['labels'] \
if isinstance(instance_data, ProjectData) else instance_data.meta['task']['labels']
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
labels = (label['name'] + ':::' for _, label in labels_meta)
with open(labelmap_file, 'w') as f:
f.write('\n'.join(labels))
Expand Down
3 changes: 1 addition & 2 deletions cvat/apps/dataset_manager/formats/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ def hex2rgb(color):
return tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))

def make_colormap(instance_data):
instance_name = 'project' if 'project' in instance_data.meta.keys() else 'task'
labels = [label for _, label in instance_data.meta[instance_name]['labels']]
labels = [label for _, label in instance_data.meta[instance_data.META_FIELD]['labels']]
label_names = [label['name'] for label in labels]

if 'background' not in label_names:
Expand Down
Loading