Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implements erosion & dilation in PyTorch & TF #669

Merged
merged 2 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doctr/models/detection/_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from doctr.file_utils import is_tf_available

if is_tf_available():
from .tensorflow import *
else:
from .pytorch import * # type: ignore[misc]
37 changes: 37 additions & 0 deletions doctr/models/detection/_utils/pytorch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (C) 2021, Mindee.

# This program is licensed under the Apache License version 2.
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.

from torch import Tensor
from torch.nn.functional import max_pool2d

__all__ = ['erode', 'dilate']


def erode(x: Tensor, kernel_size: int) -> Tensor:
"""Performs erosion on a given tensor

Args:
x: boolean tensor of shape (N, C, H, W)
kernel_size: the size of the kernel to use for erosion
Returns:
the eroded tensor
"""
_pad = (kernel_size - 1) // 2

return 1 - max_pool2d(1 - x, kernel_size, stride=1, padding=_pad)


def dilate(x: Tensor, kernel_size: int) -> Tensor:
"""Performs dilation on a given tensor

Args:
x: boolean tensor of shape (N, C, H, W)
kernel_size: the size of the kernel to use for dilation
Returns:
the dilated tensor
"""
_pad = (kernel_size - 1) // 2

return max_pool2d(x, kernel_size, stride=1, padding=_pad)
34 changes: 34 additions & 0 deletions doctr/models/detection/_utils/tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (C) 2021, Mindee.

# This program is licensed under the Apache License version 2.
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.

import tensorflow as tf

__all__ = ['erode', 'dilate']


def erode(x: tf.Tensor, kernel_size: int) -> tf.Tensor:
"""Performs erosion on a given tensor

Args:
x: boolean tensor of shape (N, H, W, C)
kernel_size: the size of the kernel to use for erosion
Returns:
the eroded tensor
"""

return 1 - tf.nn.max_pool2d(1 - x, kernel_size, strides=1, padding="SAME")


def dilate(x: tf.Tensor, kernel_size: int) -> tf.Tensor:
"""Performs dilation on a given tensor

Args:
x: boolean tensor of shape (N, H, W, C)
kernel_size: the size of the kernel to use for dilation
Returns:
the dilated tensor
"""

return tf.nn.max_pool2d(x, kernel_size, strides=1, padding="SAME")
17 changes: 17 additions & 0 deletions tests/pytorch/test_models_detection_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import torch

from doctr.models import detection
from doctr.models.detection._utils import dilate, erode
from doctr.models.detection.predictor import DetectionPredictor


Expand Down Expand Up @@ -67,3 +68,19 @@ def test_detection_zoo(arch_name):
with torch.no_grad():
out = predictor(input_tensor)
assert all(isinstance(boxes, np.ndarray) and boxes.shape[1] == 5 for boxes in out)


def test_erode():
x = torch.zeros((1, 1, 3, 3))
x[..., 1, 1] = 1
expected = torch.zeros((1, 1, 3, 3))
out = erode(x, 3)
assert torch.equal(out, expected)


def test_dilate():
x = torch.zeros((1, 1, 3, 3))
x[..., 1, 1] = 1
expected = torch.ones((1, 1, 3, 3))
out = dilate(x, 3)
assert torch.equal(out, expected)
19 changes: 19 additions & 0 deletions tests/tensorflow/test_models_detection_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from doctr.io import DocumentFile
from doctr.models import detection
from doctr.models.detection._utils import dilate, erode
from doctr.models.detection.predictor import DetectionPredictor
from doctr.models.preprocessor import PreProcessor

Expand Down Expand Up @@ -139,3 +140,21 @@ def test_linknet_focal_loss():
# test focal loss
out = model(input_tensor, target, return_model_output=True, return_boxes=True, training=True, focal_loss=True)
assert isinstance(out['loss'], tf.Tensor)


def test_erode():
x = np.zeros((1, 3, 3, 1), dtype=np.float32)
x[:, 1, 1] = 1
x = tf.convert_to_tensor(x)
expected = tf.zeros((1, 3, 3, 1))
out = erode(x, 3)
assert tf.math.reduce_all(out == expected)


def test_dilate():
x = np.zeros((1, 3, 3, 1), dtype=np.float32)
x[:, 1, 1] = 1
x = tf.convert_to_tensor(x)
expected = tf.ones((1, 3, 3, 1))
out = dilate(x, 3)
assert tf.math.reduce_all(out == expected)