mindee · charlesmindee · Apr 13, 2022 · Jan 11, 2022 · Jan 11, 2022 · Jan 18, 2022
diff --git a/doctr/models/__init__.py b/doctr/models/__init__.py
@@ -3,3 +3,4 @@
 from .detection import *
 from .recognition import *
 from .zoo import *
+from .factory import *
diff --git a/doctr/models/classification/resnet/tensorflow.py b/doctr/models/classification/resnet/tensorflow.py
@@ -343,8 +343,15 @@ def resnet50(pretrained: bool = False, **kwargs: Any) -> ResNet:
         A classification model
     """
 
-    kwargs['num_classes'] = kwargs.get('num_classes', len(default_cfgs['resnet50']['classes']))
-    kwargs['input_shape'] = kwargs.get('input_shape', default_cfgs['resnet50']['input_shape'])
+    kwargs['num_classes'] = kwargs.get("num_classes", len(default_cfgs['resnet50']['classes']))
+    kwargs['input_shape'] = kwargs.get("input_shape", default_cfgs['resnet50']['input_shape'])
+    kwargs['classes'] = kwargs.get('classes', default_cfgs['resnet50']['classes'])
+
+    _cfg = deepcopy(default_cfgs['resnet50'])
+    _cfg['num_classes'] = kwargs['num_classes']
+    _cfg['classes'] = kwargs['classes']
+    _cfg['input_shape'] = kwargs['input_shape']
+    kwargs.pop('classes')
 
     model = ResNet50(
         weights=None,
@@ -355,6 +362,8 @@ def resnet50(pretrained: bool = False, **kwargs: Any) -> ResNet:
         classifier_activation=None,
     )
 
+    model.cfg = _cfg
+
     # Load pretrained parameters
     if pretrained:
         load_pretrained_params(model, default_cfgs['resnet50']['url'])

diff --git a/doctr/models/detection/zoo.py b/doctr/models/detection/zoo.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License version 2.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
-from typing import Any
+from typing import Any, List
 
 from doctr.file_utils import is_tf_available, is_torch_available
 
@@ -13,6 +13,9 @@
 
 __all__ = ["detection_predictor"]
 
+ARCHS: List[str]
+ROT_ARCHS: List[str]
+
 
 if is_tf_available():
     ARCHS = ['db_resnet50', 'db_mobilenet_v3_large', 'linknet_resnet18', 'linknet_resnet18_rotation']

diff --git a/doctr/models/factory/__init__.py b/doctr/models/factory/__init__.py
@@ -0,0 +1 @@
+from .hub import *
diff --git a/doctr/models/factory/hub.py b/doctr/models/factory/hub.py
@@ -0,0 +1,172 @@
+# Copyright (C) 2022, Mindee.
+
+# This program is licensed under the Apache License version 2.
+# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
+
+# Inspired by: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hub.py
+
+import json
+import logging
+import os
+import subprocess
+import textwrap
+from pathlib import Path
+from typing import Any
+
+from huggingface_hub import HfApi, HfFolder, Repository
+
+from doctr.file_utils import is_tf_available, is_torch_available
+
+from ..detection import zoo as det_zoo
+from ..recognition import zoo as reco_zoo
+
+if is_torch_available():
+    import torch
+
+__all__ = ['login_to_hub', 'push_to_hf_hub', '_save_model_and_config_for_hf_hub']
+
+
+AVAILABLE_ARCHS = {
+    'detection': det_zoo.ARCHS + det_zoo.ROT_ARCHS,
+    'recognition': reco_zoo.ARCHS,
+    'obj_detection': ['fasterrcnn_mobilenet_v3_large_fpn'] if is_torch_available() else None
+}
+
+
+def login_to_hub() -> None:
+    """Login to huggingface hub
+    """
+    access_token = HfFolder.get_token()
+    if access_token is not None and HfApi()._is_valid_token(access_token):
+        logging.info("Huggingface Hub token found and valid")
+        HfApi().set_access_token(access_token)
+    else:
+        subprocess.call(['huggingface-cli', 'login'])
+        HfApi().set_access_token(HfFolder().get_token())
+    # check if git lfs is installed
+    try:
+        subprocess.call(['git', 'lfs', 'version'])
+    except FileNotFoundError:
+        raise OSError('Looks like you do not have git-lfs installed, please install. \
+                      You can install from https://git-lfs.github.com/. \
+                      Then run `git lfs install` (you only have to do this once).')
+
+
+def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task: str) -> None:
+    """Save model and config to disk for pushing to huggingface hub
+
+    Args:
+        model: TF or PyTorch model to be saved
+        save_dir: directory to save model and config
+        arch: architecture name
+        task: task name
+    """
+    save_directory = Path(save_dir)
+
+    if is_torch_available():
+        weights_path = save_directory / 'pytorch_model.bin'
+        torch.save(model.state_dict(), weights_path)
+    elif is_tf_available():
+        weights_path = save_directory / 'tf_model' / 'weights'
+        model.save_weights(str(weights_path))
+
+    config_path = save_directory / 'config.json'
+
+    # add model configuration
+    model_config = model.cfg
+    model_config['arch'] = arch
+    model_config['task'] = task
+
+    with config_path.open('w') as f:
+        json.dump(model_config, f, indent=2, ensure_ascii=False)
+
+
+def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:
+    """Save model and its configuration on HF hub
+
+    >>> from doctr.models import login_to_hub, push_to_hf_hub
+    >>> from doctr.models.recognition import crnn_mobilenet_v3_small
+    >>> login_to_hub()
+    >>> model = crnn_mobilenet_v3_small(pretrained=True)
+    >>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small')
+
+    Args:
+        model: TF or PyTorch model to be saved
+        model_name: name of the model which is also the repository name
+        task: task name
+        **kwargs: keyword arguments for push_to_hf_hub
+    """
+    run_config = kwargs.get('run_config', None)
+    arch = kwargs.get('arch', None)
+
+    if run_config is None and arch is None:
+        raise ValueError('run_config or arch must be specified')
+    if task not in ['classification', 'detection', 'recognition', 'obj_detection']:
+        raise ValueError('task must be one of classification, detection, recognition, obj_detection')
+
+    # default readme
+    readme = textwrap.dedent(f"""
+    ---
+    language: en
+    ---
+
+    <p align="center">
+    <img src="https://github.com/mindee/doctr/releases/download/v0.3.1/Logo_doctr.gif" width="60%">
+    </p>
+
+    **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
+
+    ## Task: {task}
+
+    https://github.com/mindee/doctr
+
+    ### Example usage:
+
+    ```python
+    >>> from doctr.io import DocumentFile
+    >>> from doctr.models import ocr_predictor
+    >>> from doctr.models.<task> import from_hub
+
+    >>> img = DocumentFile.from_images(['<image_path>'])
+    >>> # Load your model from the hub
+    >>> model = from_hub('mindee/my-model').eval()
+
+    >>> # Pass it to the predictor
+    >>> # If your model is a recognition model:
+    >>> predictor = ocr_predictor(det_arch='db_mobilenet_v3_large',
+    >>>                           reco_arch=model,
+    >>>                           pretrained=True)
+
+    >>> # If your model is a detection model:
+    >>> predictor = ocr_predictor(det_arch=model,
+    >>>                           reco_arch='crnn_mobilenet_v3_small',
+    >>>                           pretrained=True)
+
+    >>> # Get your predictions
+    >>> res = predictor(img)
+    ```
+    """)
+
+    # add run configuration to readme if available
+    if run_config is not None:
+        arch = run_config.arch
+        readme += textwrap.dedent(f"""### Run Configuration
+                                  \n{json.dumps(vars(run_config), indent=2, ensure_ascii=False)}""")
+
+    if arch not in AVAILABLE_ARCHS[task]:  # type: ignore
+        raise ValueError(f'Architecture: {arch} for task: {task} not found.\
+                         \nAvailable architectures: {AVAILABLE_ARCHS}')
+
+    commit_message = f'Add {model_name} model'
+
+    local_cache_dir = os.path.join(os.path.expanduser('~'), '.cache', 'huggingface', 'hub', model_name)
+    repo_url = HfApi().create_repo(model_name, token=HfFolder.get_token(), exist_ok=False)
+    repo = Repository(local_dir=local_cache_dir, clone_from=repo_url, use_auth_token=True)
+
+    with repo.commit(commit_message):
+
+        _save_model_and_config_for_hf_hub(model, repo.local_dir, arch=arch, task=task)
+        readme_path = Path(repo.local_dir) / 'README.md'
+        readme_path.write_text(readme)
+
+    repo.git_push()
diff --git a/doctr/models/obj_detection/faster_rcnn/pytorch.py b/doctr/models/obj_detection/faster_rcnn/pytorch.py
@@ -39,6 +39,7 @@ def _fasterrcnn(arch: str, pretrained: bool, **kwargs: Any) -> FasterRCNN:
     # Build the model
     _kwargs.update(kwargs)
     model = faster_rcnn.__dict__[arch](pretrained=False, pretrained_backbone=False, **_kwargs)
+    model.cfg = default_cfgs[arch]
 
     if pretrained:
         # Load pretrained parameters

diff --git a/doctr/models/recognition/zoo.py b/doctr/models/recognition/zoo.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License version 2.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
-from typing import Any
+from typing import Any, List
 
 from doctr.file_utils import is_tf_available
 from doctr.models.preprocessor import PreProcessor
@@ -14,7 +14,7 @@
 __all__ = ["recognition_predictor"]
 
 
-ARCHS = ['crnn_vgg16_bn', 'crnn_mobilenet_v3_small', 'crnn_mobilenet_v3_large', 'sar_resnet31', 'master']
+ARCHS: List[str] = ['crnn_vgg16_bn', 'crnn_mobilenet_v3_small', 'crnn_mobilenet_v3_large', 'sar_resnet31', 'master']
 
 
 def _predictor(arch: str, pretrained: bool, **kwargs: Any) -> RecognitionPredictor:

diff --git a/references/classification/train_pytorch.py b/references/classification/train_pytorch.py
@@ -24,7 +24,7 @@
 
 from doctr import transforms as T
 from doctr.datasets import VOCABS, CharacterGenerator
-from doctr.models import classification
+from doctr.models import classification, login_to_hub, push_to_hf_hub
 from doctr.models.utils import export_classification_model_to_onnx
 from utils import plot_recorder, plot_samples
 
@@ -170,6 +170,9 @@ def main(args):
 
     print(args)
 
+    if args.push_to_hub:
+        login_to_hub()
+
     if not isinstance(args.workers, int):
         args.workers = min(16, mp.cpu_count())
 
@@ -335,6 +338,9 @@ def main(args):
     if args.wb:
         run.finish()
 
+    if args.push_to_hub:
+        push_to_hf_hub(model, exp_name, task='classification', run_config=args)
+
     if args.export_onnx:
         print("Exporting model to ONNX...")
         dummy_batch = next(iter(val_loader))
@@ -382,8 +388,8 @@ def parse_args():
     parser.add_argument("--test-only", dest='test_only', action='store_true', help="Run the validation loop")
     parser.add_argument('--show-samples', dest='show_samples', action='store_true',
                         help='Display unormalized training samples')
-    parser.add_argument('--wb', dest='wb', action='store_true',
-                        help='Log to Weights & Biases')
+    parser.add_argument('--wb', dest='wb', action='store_true', help='Log to Weights & Biases')
+    parser.add_argument('--push-to-hub', dest='push_to_hub', action='store_true', help='Push to Huggingface Hub')
     parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                         help='Load pretrained parameters before starting the training')
     parser.add_argument('--export-onnx', dest='export_onnx', action='store_true',

diff --git a/references/classification/train_tensorflow.py b/references/classification/train_tensorflow.py
@@ -18,6 +18,8 @@
 from fastprogress.fastprogress import master_bar, progress_bar
 from tensorflow.keras import mixed_precision
 
+from doctr.models import login_to_hub, push_to_hf_hub
+
 gpu_devices = tf.config.experimental.list_physical_devices('GPU')
 if any(gpu_devices):
     tf.config.experimental.set_memory_growth(gpu_devices[0], True)
@@ -131,6 +133,9 @@ def main(args):
 
     print(args)
 
+    if args.push_to_hub:
+        login_to_hub()
+
     if not isinstance(args.workers, int):
         args.workers = min(16, mp.cpu_count())
 
@@ -298,6 +303,9 @@ def main(args):
     if args.wb:
         run.finish()
 
+    if args.push_to_hub:
+        push_to_hf_hub(model, exp_name, task='classification', run_config=args)
+
 
 def parse_args():
     import argparse
@@ -336,8 +344,8 @@ def parse_args():
     parser.add_argument("--test-only", dest='test_only', action='store_true', help="Run the validation loop")
     parser.add_argument('--show-samples', dest='show_samples', action='store_true',
                         help='Display unormalized training samples')
-    parser.add_argument('--wb', dest='wb', action='store_true',
-                        help='Log to Weights & Biases')
+    parser.add_argument('--wb', dest='wb', action='store_true', help='Log to Weights & Biases')
+    parser.add_argument('--push-to-hub', dest='push_to_hub', action='store_true', help='Push to Huggingface Hub')
     parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                         help='Load pretrained parameters before starting the training')
     parser.add_argument("--amp", dest="amp", help="Use Automatic Mixed Precision", action="store_true")

diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
@@ -23,7 +23,7 @@
 
 from doctr import transforms as T
 from doctr.datasets import DetectionDataset
-from doctr.models import detection
+from doctr.models import detection, login_to_hub, push_to_hf_hub
 from doctr.utils.metrics import LocalizationConfusion
 from utils import plot_recorder, plot_samples
 
@@ -172,6 +172,9 @@ def main(args):
 
     print(args)
 
+    if args.push_to_hub:
+        login_to_hub()
+
     if not isinstance(args.workers, int):
         args.workers = min(16, mp.cpu_count())
 
@@ -364,6 +367,9 @@ def main(args):
     if args.wb:
         run.finish()
 
+    if args.push_to_hub:
+        push_to_hf_hub(model, exp_name, task='detection', run_config=args)
+
 
 def parse_args():
     import argparse
@@ -387,8 +393,8 @@ def parse_args():
                         help='freeze model backbone for fine-tuning')
     parser.add_argument('--show-samples', dest='show_samples', action='store_true',
                         help='Display unormalized training samples')
-    parser.add_argument('--wb', dest='wb', action='store_true',
-                        help='Log to Weights & Biases')
+    parser.add_argument('--wb', dest='wb', action='store_true', help='Log to Weights & Biases')
+    parser.add_argument('--push-to-hub', dest='push_to_hub', action='store_true', help='Push to Huggingface Hub')
     parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                         help='Load pretrained parameters before starting the training')
     parser.add_argument('--rotation', dest='rotation', action='store_true',