Skip to content

Commit

Permalink
refactor: Unified np_dtype and fix comments (#782)
Browse files Browse the repository at this point in the history
* start synth

* cleanup

* start synth

* add synthtext

* add docu and tests

* apply code factor suggestions

* apply changes

* clean

* unify and replace wrong comments

* fix synth and apply request
  • Loading branch information
felixdittrich92 committed Jan 5, 2022
1 parent 68e2120 commit e14e643
Show file tree
Hide file tree
Showing 14 changed files with 29 additions and 14 deletions.
6 changes: 4 additions & 2 deletions doctr/datasets/cord.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
tmp_root = os.path.join(self.root, 'image')
self.data: List[Tuple[str, Dict[str, Any]]] = []
self.train = train
np_dtype = np.float32
for img_path in os.listdir(tmp_root):
# File existence check
if not os.path.exists(os.path.join(tmp_root, img_path)):
Expand All @@ -65,14 +66,15 @@ def __init__(
x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
box = np.array([
[x[0], y[0]],
[x[1], y[1]],
[x[2], y[2]],
[x[3], y[3]],
], dtype=np.float32)
], dtype=np_dtype)
else:
# Reduce 8 coords to 4
# Reduce 8 coords to 4 -> xmin, ymin, xmax, ymax
box = [min(x), min(y), max(x), max(y)]
_targets.append((word['text'], box))

Expand Down
5 changes: 3 additions & 2 deletions doctr/datasets/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,13 @@ def __init__(
labels = json.load(f)

self.data: List[Tuple[str, np.ndarray]] = []
np_dtype = np.float32
for img_name, label in labels.items():
# File existence check
if not os.path.exists(os.path.join(self.root, img_name)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")

polygons = np.asarray(label['polygons'], dtype=np.float32)
polygons = np.asarray(label['polygons'], dtype=np_dtype)
geoms = polygons if use_polygons else np.concatenate((polygons.min(axis=1), polygons.max(axis=1)), axis=1)

self.data.append((img_name, np.asarray(geoms, dtype=np.float32)))
self.data.append((img_name, np.asarray(geoms, dtype=np_dtype)))
2 changes: 2 additions & 0 deletions doctr/datasets/doc_artefacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,11 @@ def __init__(
if not os.path.exists(os.path.join(tmp_root, img_name)):
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_name)}")

# xmin, ymin, xmax, ymax
boxes = np.asarray([obj['geometry'] for obj in label], dtype=np_dtype)
classes = np.asarray([self.CLASSES.index(obj['label']) for obj in label], dtype=np.int64)
if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
boxes = np.stack(
[
np.stack([boxes[:, 0], boxes[:, 1]], axis=-1),
Expand Down
5 changes: 3 additions & 2 deletions doctr/datasets/funsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(
**kwargs
)
self.train = train
np_dtype = np.float32

# Use the subset
subfolder = os.path.join('dataset', 'training_data' if train else 'testing_data')
Expand All @@ -71,7 +72,7 @@ def __init__(
for word in block['words'] if len(word['text']) > 0]
text_targets, box_targets = zip(*_targets)
if use_polygons:
# box_targets: xmin, ymin, xmax, ymax -> x, y, w, h, alpha = 0
# xmin, ymin, xmax, ymax -> (x, y) coordinates of top left, top right, bottom right, bottom left corners
box_targets = [
[
[box[0], box[1]],
Expand All @@ -83,7 +84,7 @@ def __init__(

self.data.append((
img_path,
dict(boxes=np.asarray(box_targets, dtype=np.float32), labels=list(text_targets)),
dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(text_targets)),
))

self.root = tmp_root
Expand Down
1 change: 1 addition & 0 deletions doctr/datasets/ic03.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def __init__(
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, name.text)}")

if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
_boxes = [
[
[float(rect.attrib['x']), float(rect.attrib['y'])],
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/ic13.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(
# xmin, ymin, xmax, ymax
box_targets = np.array([list(map(int, line[:4])) for line in _lines], dtype=np_dtype)
if use_polygons:
# x_center, y_center, width, height, 0
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
box_targets = np.array(
[
[
Expand Down
4 changes: 2 additions & 2 deletions doctr/datasets/iiit5k.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, _raw_path)}")

if use_polygons:
# x_center, y_center, w, h, alpha = 0
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
box_targets = [
[
[box[0], box[1]],
Expand All @@ -80,7 +80,7 @@ def __init__(
] for box in box_targets
]
else:
# x, y, width, height -> xmin, ymin, xmax, ymax
# xmin, ymin, xmax, ymax
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]

# label are casted to list where each char corresponds to the character's bounding box
Expand Down
3 changes: 2 additions & 1 deletion doctr/datasets/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ def __init__(
if len(annotations["typed_words"]) == 0:
self.data.append((img_name, dict(boxes=np.zeros((0, 4), dtype=np_dtype), labels=[])))
continue
# Unpack the straight boxes
# Unpack the straight boxes (xmin, ymin, xmax, ymax)
geoms = [list(map(float, obj['geometry'][:4])) for obj in annotations['typed_words']]
if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
geoms = [
[geom[:2], [geom[2], geom[1]], geom[2:], [geom[0], geom[3]]] # type: ignore[list-item]
for geom in geoms
Expand Down
4 changes: 3 additions & 1 deletion doctr/datasets/sroie.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@ def __init__(
_rows = [row for row in list(csv.reader(f, delimiter=',')) if len(row) > 0]

labels = [",".join(row[8:]) for row in _rows]
# reorder coordinates (8 -> (4,2)) and filter empty lines
# reorder coordinates (8 -> (4,2) ->
# (x, y) coordinates of top left, top right, bottom right, bottom left corners) and filter empty lines
coords = np.stack([np.array(list(map(int, row[:8])), dtype=np_dtype).reshape((4, 2))
for row in _rows], axis=0)

if not use_polygons:
# xmin, ymin, xmax, ymax
coords = np.concatenate((coords.min(axis=1), coords.max(axis=1)), axis=1)

self.data.append((img_path, dict(boxes=coords, labels=labels)))
Expand Down
1 change: 1 addition & 0 deletions doctr/datasets/svhn.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(
label_targets = list(map(str, box_dict['label']))

if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
box_targets = np.stack(
[
np.stack([coords[:, 0], coords[:, 1]], axis=-1),
Expand Down
1 change: 1 addition & 0 deletions doctr/datasets/svt.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, name.text)}")

if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
_boxes = [
[
[float(rect.attrib['x']), float(rect.attrib['y'])],
Expand Down
5 changes: 4 additions & 1 deletion doctr/datasets/synthtext.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,12 @@ def __init__(
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path[0])}")

labels = [elt for word in txt.tolist() for elt in word.split()]
word_boxes = word_boxes.transpose(2, 1, 0) if word_boxes.ndim == 3 else np.expand_dims(word_boxes, axis=0)
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
word_boxes = word_boxes.transpose(2, 1, 0) if word_boxes.ndim == 3 else np.expand_dims(
word_boxes.transpose(1, 0), axis=0)

if not use_polygons:
# xmin, ymin, xmax, ymax
word_boxes = np.concatenate((word_boxes.min(axis=1), word_boxes.max(axis=1)), axis=1)

self.data.append((img_path[0], dict(boxes=np.asarray(word_boxes, dtype=np_dtype), labels=labels)))
Expand Down
2 changes: 1 addition & 1 deletion tests/pytorch/test_datasets_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def _validate_dataset(ds, input_size, batch_size=2, class_indices=False, is_poly
assert img.shape == (3, *input_size)
assert img.dtype == torch.float32
assert isinstance(target, dict)
assert isinstance(target['boxes'], np.ndarray)
assert isinstance(target['boxes'], np.ndarray) and target['boxes'].dtype == np.float32
if is_polygons:
assert target['boxes'].ndim == 3 and target['boxes'].shape[1:] == (4, 2)
else:
Expand Down
2 changes: 1 addition & 1 deletion tests/tensorflow/test_datasets_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def _validate_dataset(ds, input_size, batch_size=2, class_indices=False, is_poly
assert img.shape == (*input_size, 3)
assert img.dtype == tf.float32
assert isinstance(target, dict)
assert isinstance(target['boxes'], np.ndarray)
assert isinstance(target['boxes'], np.ndarray) and target['boxes'].dtype == np.float32
if is_polygons:
assert target['boxes'].ndim == 3 and target['boxes'].shape[1:] == (4, 2)
else:
Expand Down

0 comments on commit e14e643

Please sign in to comment.