mindee · fharper · Apr 28, 2022 · Jan 11, 2022 · Jan 11, 2022 · Jan 18, 2022
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -70,6 +70,7 @@ Supported datasets
    :hidden:
 
    using_doctr/using_models
+   using_doctr/sharing_models
    using_doctr/using_model_export
 
 

diff --git a/docs/source/modules/models.rst b/docs/source/modules/models.rst
@@ -68,3 +68,13 @@ doctr.models.zoo
 ----------------
 
 .. autofunction:: doctr.models.ocr_predictor
+
+
+doctr.models.factory
+--------------------
+
+.. autofunction:: doctr.models.factory.login_to_hub
+
+.. autofunction:: doctr.models.factory.from_hub
+
+.. autofunction:: doctr.models.factory.push_to_hf_hub
diff --git a/docs/source/using_doctr/sharing_models.rst b/docs/source/using_doctr/sharing_models.rst
@@ -0,0 +1,118 @@
+Share your model with the community
+===================================
+
+docTR's focus is on open source, so if you also feel in love with than we appreciate sharing your trained model with the community.
+To make it easy for you, we have integrated a interface to the huggingface hub.
+
+.. currentmodule:: doctr.models.factory
+
+
+Loading from Huggingface Hub
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This section shows you how you can easily load a pretrained model from the Huggingface Hub.
+
+.. tabs::
+
+    .. tab:: TensorFlow
+
+        .. code:: python3
+
+            from doctr.io import DocumentFile
+            from doctr.models import ocr_predictor, from_hub
+            image = DocumentFile.from_images(['data/example.jpg'])
+            # Load a custom detection model from huggingface hub
+            det_model = from_hub('Felix92/doctr-tf-db-resnet50')
+            # Load a custom recognition model from huggingface hub
+            reco_model = from_hub('Felix92/doctr-tf-crnn-vgg16-bn-french')
+            # You can easily plug in this models to the OCR predictor
+            predictor = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
+            result = predictor(image)
+
+    .. tab:: PyTorch
+
+        .. code:: python3
+
+            from doctr.io import DocumentFile
+            from doctr.models import ocr_predictor, from_hub
+            image = DocumentFile.from_images(['data/example.jpg'])
+            # Load a custom detection model from huggingface hub
+            det_model = from_hub('Felix92/doctr-torch-db-mobilenet-v3-large')
+            # Load a custom recognition model from huggingface hub
+            reco_model = from_hub('Felix92/doctr-torch-crnn-mobilenet-v3-large-french')
+            # You can easily plug in this models to the OCR predictor
+            predictor = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
+            result = predictor(image)
+
+
+Pushing to the Huggingface Hub
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can also push your trained model to the Huggingface Hub.
+You need only to provide the task type (classification, detection, recognition or obj_detection), a name for your trained model (NOTE:
+existing repositories will not be overwritten) and the model name itself.
+
+- Prerequisites:
+    - Huggingface account (you can easy create one at https://huggingface.co/)
+    - installed Git LFS (check installation at: https://git-lfs.github.com/) in the repository
+
+.. code:: python3
+
+    from doctr.models import recognition, login_to_hub, push_to_hf_hub
+    login_to_hub()
+    my_awesome_model = recognition.crnn_mobilenet_v3_large(pretrained=True)
+    push_to_hf_hub(my_awesome_model, model_name='doctr-crnn-mobilenet-v3-large-french-v1', task='recognition', arch='crnn_mobilenet_v3_large')
+
+It is also possible to push your model directly after training.
+
+.. tabs::
+
+    .. tab:: TensorFlow
+
+        python3 ~/doctr/references/recognition/train_tensorflow.py crnn_mobilenet_v3_large --name doctr-crnn-mobilenet-v3-large --push-to-hub
+
+    .. tab:: PyTorch
+
+        python3 ~/doctr/references/recognition/train_pytorch.py crnn_mobilenet_v3_large --name doctr-crnn-mobilenet-v3-large --push-to-hub
+
+
+Pretrained community models
+---------------------------
+
+This section is to provide some tables for pretrained community models.
+Feel free to open a pull request or issue to add your model to this list.
+
+Classification
+^^^^^^^^^^^^^^
+
++---------------------------------+-------------------------------------+-----------------------+------------------------+
+|        **Architecture**         |            **Repo_ID**              |     **Vocabulary**    |     **Framework**      |
++=================================+=====================================+=======================+========================+
+| resnet18 (dummy)                | Felix92/doctr-dummy-torch-resnet18  | french                | PyTorch                |
++---------------------------------+-------------------------------------+-----------------------+------------------------+
+| resnet18 (dummy)                | Felix92/doctr-dummy-tf-resnet18     | french                | TensorFlow             |
++---------------------------------+-------------------------------------+-----------------------+------------------------+
+
+
+Detection
+^^^^^^^^^
+
++---------------------------------+-------------------------------------------------+------------------------+
+|        **Architecture**         |            **Repo_ID**                          |     **Framework**      |
++=================================+=================================================+========================+
+| db_mobilenet_v3_large (dummy)   | Felix92/doctr-torch-db-mobilenet-v3-large       | PyTorch                |
++---------------------------------+-------------------------------------------------+------------------------+
+| db_resnet50 (dummy)             | Felix92/doctr-tf-db-resnet50                    | TensorFlow             |
++---------------------------------+-------------------------------------------------+------------------------+
+
+
+Recognition
+^^^^^^^^^^^
+
++---------------------------------+---------------------------------------------------+---------------------+------------------------+
+|        **Architecture**         |            **Repo_ID**                            |     **Language**    |     **Framework**      |
++=================================+===================================================+=====================+========================+
+| crnn_mobilenet_v3_large (dummy) | Felix92/doctr-torch-crnn-mobilenet-v3-large       | french              | PyTorch                |
++---------------------------------+---------------------------------------------------+---------------------+------------------------+
+| crnn_vgg16_bn (dummy)           | Felix92/doctr-tf-crnn-vgg16-bn-french             | french              | TensorFlow             |
++---------------------------------+---------------------------------------------------+---------------------+------------------------+
diff --git a/doctr/models/detection/linknet/pytorch.py b/doctr/models/detection/linknet/pytorch.py
@@ -98,6 +98,7 @@ def __init__(
 
         super().__init__()
         self.cfg = cfg
+        self.assume_straight_pages = assume_straight_pages
 
         self.feat_extractor = feat_extractor
         # Identify the number of channels for the FPN initialization
@@ -124,7 +125,7 @@ def __init__(
             nn.ConvTranspose2d(head_chans, num_classes, kernel_size=2, stride=2),
         )
 
-        self.postprocessor = LinkNetPostProcessor(assume_straight_pages=assume_straight_pages)
+        self.postprocessor = LinkNetPostProcessor(assume_straight_pages=self.assume_straight_pages)
 
         for n, m in self.named_modules():
             # Don't override the initialization of the backbone

diff --git a/doctr/models/detection/zoo.py b/doctr/models/detection/zoo.py
@@ -27,22 +27,30 @@
 
 
 def _predictor(
-    arch: str,
+    arch: Any,
     pretrained: bool,
     assume_straight_pages: bool = True,
     **kwargs: Any
 ) -> DetectionPredictor:
 
-    if arch not in ARCHS:
-        raise ValueError(f"unknown architecture '{arch}'")
+    if isinstance(arch, str):
+        if arch not in ARCHS + ROT_ARCHS:
+            raise ValueError(f"unknown architecture '{arch}'")
 
-    if arch not in ROT_ARCHS and not assume_straight_pages:
-        raise AssertionError("You are trying to use a model trained on straight pages while not assuming"
-                             " your pages are straight. If you have only straight documents, don't pass"
-                             f" assume_straight_pages=False, otherwise you should use one of these archs: {ROT_ARCHS}")
+        if arch not in ROT_ARCHS and not assume_straight_pages:
+            raise AssertionError("You are trying to use a model trained on straight pages while not assuming"
+                                 " your pages are straight. If you have only straight documents, don't pass"
+                                 " assume_straight_pages=False, otherwise you should use one of these archs:"
+                                 f"{ROT_ARCHS}")
+
+        _model = detection.__dict__[arch](pretrained=pretrained, assume_straight_pages=assume_straight_pages)
+    else:
+        if not isinstance(arch, (detection.DBNet, detection.LinkNet)):
+            raise ValueError(f"unknown architecture: {type(arch)}")
+
+        _model = arch
+        _model.assume_straight_pages = assume_straight_pages
 
-    # Detection
-    _model = detection.__dict__[arch](pretrained=pretrained, assume_straight_pages=assume_straight_pages)
     kwargs['mean'] = kwargs.get('mean', _model.cfg['mean'])
     kwargs['std'] = kwargs.get('std', _model.cfg['std'])
     kwargs['batch_size'] = kwargs.get('batch_size', 1)
@@ -54,7 +62,7 @@ def _predictor(
 
 
 def detection_predictor(
-    arch: str = 'db_resnet50',
+    arch: Any = 'db_resnet50',
     pretrained: bool = False,
     assume_straight_pages: bool = True,
     **kwargs: Any
@@ -68,7 +76,7 @@ def detection_predictor(
     >>> out = model([input_page])
 
     Args:
-        arch: name of the architecture to use (e.g. 'db_resnet50')
+        arch: name of the architecture or model itself to use (e.g. 'db_resnet50')
         pretrained: If True, returns a model pre-trained on our text detection dataset
         assume_straight_pages: If True, fit straight boxes to the page
 

diff --git a/doctr/models/recognition/zoo.py b/doctr/models/recognition/zoo.py
@@ -17,12 +17,18 @@
 ARCHS: List[str] = ['crnn_vgg16_bn', 'crnn_mobilenet_v3_small', 'crnn_mobilenet_v3_large', 'sar_resnet31', 'master']
 
 
-def _predictor(arch: str, pretrained: bool, **kwargs: Any) -> RecognitionPredictor:
+def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredictor:
 
-    if arch not in ARCHS:
-        raise ValueError(f"unknown architecture '{arch}'")
+    if isinstance(arch, str):
+        if arch not in ARCHS:
+            raise ValueError(f"unknown architecture '{arch}'")
+
+        _model = recognition.__dict__[arch](pretrained=pretrained)
+    else:
+        if not isinstance(arch, (recognition.CRNN, recognition.SAR, recognition.MASTER)):
+            raise ValueError(f"unknown architecture: {type(arch)}")
+        _model = arch
 
-    _model = recognition.__dict__[arch](pretrained=pretrained)
     kwargs['mean'] = kwargs.get('mean', _model.cfg['mean'])
     kwargs['std'] = kwargs.get('std', _model.cfg['std'])
     kwargs['batch_size'] = kwargs.get('batch_size', 32)
@@ -35,7 +41,11 @@ def _predictor(arch: str, pretrained: bool, **kwargs: Any) -> RecognitionPredict
     return predictor
 
 
-def recognition_predictor(arch: str = 'crnn_vgg16_bn', pretrained: bool = False, **kwargs: Any) -> RecognitionPredictor:
+def recognition_predictor(
+    arch: Any = 'crnn_vgg16_bn',
+    pretrained: bool = False,
+    **kwargs: Any
+) -> RecognitionPredictor:
     """Text recognition architecture.
 
     Example::
@@ -46,7 +56,7 @@ def recognition_predictor(arch: str = 'crnn_vgg16_bn', pretrained: bool = False,
         >>> out = model([input_page])
 
     Args:
-        arch: name of the architecture to use (e.g. 'crnn_vgg16_bn')
+        arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn')
         pretrained: If True, returns a model pre-trained on our text recognition dataset
 
     Returns:

diff --git a/doctr/models/zoo.py b/doctr/models/zoo.py
@@ -13,8 +13,8 @@
 
 
 def _predictor(
-    det_arch: str,
-    reco_arch: str,
+    det_arch: Any,
+    reco_arch: Any,
     pretrained: bool,
     assume_straight_pages: bool = True,
     preserve_aspect_ratio: bool = False,
@@ -48,8 +48,8 @@ def _predictor(
 
 
 def ocr_predictor(
-    det_arch: str = 'db_resnet50',
-    reco_arch: str = 'crnn_vgg16_bn',
+    det_arch: Any = 'db_resnet50',
+    reco_arch: Any = 'crnn_vgg16_bn',
     pretrained: bool = False,
     assume_straight_pages: bool = True,
     preserve_aspect_ratio: bool = False,
@@ -66,8 +66,10 @@ def ocr_predictor(
     >>> out = model([input_page])
 
     Args:
-        det_arch: name of the detection architecture to use (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
-        reco_arch: name of the recognition architecture to use (e.g. 'crnn_vgg16_bn', 'sar_resnet31')
+        det_arch: name of the detection architecture or the model itself to use
+            (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
+        reco_arch: name of the recognition architecture or the model itself to use
+            (e.g. 'crnn_vgg16_bn', 'sar_resnet31')
         pretrained: If True, returns a model pre-trained on our OCR dataset
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.

diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py
@@ -58,15 +58,7 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         _ = predictor([input_page])
 
 
-@pytest.mark.parametrize(
-    "det_arch, reco_arch",
-    [
-        ["db_mobilenet_v3_large", "crnn_mobilenet_v3_large"],
-    ],
-)
-def test_zoo_models(det_arch, reco_arch):
-    # Model
-    predictor = models.ocr_predictor(det_arch, reco_arch, pretrained=True)
+def _test_predictor(predictor):
     # Output checks
     assert isinstance(predictor, OCRPredictor)
 
@@ -81,3 +73,29 @@ def test_zoo_models(det_arch, reco_arch):
     with pytest.raises(ValueError):
         input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
         _ = predictor([input_page])
+
+
+@pytest.mark.parametrize(
+    "det_arch, reco_arch",
+    [
+        ["db_mobilenet_v3_large", "crnn_mobilenet_v3_large"],
+    ],
+)
+def test_zoo_models(det_arch, reco_arch):
+    # Model
+    predictor = models.ocr_predictor(det_arch, reco_arch, pretrained=True)
+    _test_predictor(predictor)
+
+    # passing model instance directly
+    det_model = detection.__dict__[det_arch](pretrained=True)
+    reco_model = recognition.__dict__[reco_arch](pretrained=True)
+    predictor = models.ocr_predictor(det_model, reco_model)
+    _test_predictor(predictor)
+
+    # passing recognition model as detection model
+    with pytest.raises(ValueError):
+        models.ocr_predictor(det_arch=reco_model, pretrained=True)
+
+    # passing detection model as recognition model
+    with pytest.raises(ValueError):
+        models.ocr_predictor(reco_arch=det_model, pretrained=True)
diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py
@@ -110,15 +110,7 @@ def test_trained_ocr_predictor(mock_tilted_payslip):
     assert out.pages[0].blocks[0].lines[0].words[0].value == 'Mr.'
 
 
-@pytest.mark.parametrize(
-    "det_arch, reco_arch",
-    [
-        ["db_mobilenet_v3_large", "crnn_vgg16_bn"],
-    ],
-)
-def test_zoo_models(det_arch, reco_arch):
-    # Model
-    predictor = models.ocr_predictor(det_arch, reco_arch, pretrained=True)
+def _test_predictor(predictor):
     # Output checks
     assert isinstance(predictor, OCRPredictor)
 
@@ -133,3 +125,29 @@ def test_zoo_models(det_arch, reco_arch):
     with pytest.raises(ValueError):
         input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
         _ = predictor([input_page])
+
+
+@pytest.mark.parametrize(
+    "det_arch, reco_arch",
+    [
+        ["db_mobilenet_v3_large", "crnn_vgg16_bn"],
+    ],
+)
+def test_zoo_models(det_arch, reco_arch):
+    # Model
+    predictor = models.ocr_predictor(det_arch, reco_arch, pretrained=True)
+    _test_predictor(predictor)
+
+    # passing model instance directly
+    det_model = detection.__dict__[det_arch](pretrained=True)
+    reco_model = recognition.__dict__[reco_arch](pretrained=True)
+    predictor = models.ocr_predictor(det_model, reco_model)
+    _test_predictor(predictor)
+
+    # passing recognition model as detection model
+    with pytest.raises(ValueError):
+        models.ocr_predictor(det_arch=reco_model, pretrained=True)
+
+    # passing detection model as recognition model
+    with pytest.raises(ValueError):
+        models.ocr_predictor(reco_arch=det_model, pretrained=True)