From 5ffe4c45fd8145c835a1ec69cc893659bb5bcafc Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Sun, 21 Jul 2024 23:40:27 +0200 Subject: [PATCH 1/3] Move error message to main load function --- flair/file_utils.py | 7 ------- flair/nn/model.py | 12 +++++++++++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/flair/file_utils.py b/flair/file_utils.py index dfb0049b7..f7f20a20f 100644 --- a/flair/file_utils.py +++ b/flair/file_utils.py @@ -171,13 +171,6 @@ def hf_download(model_name: str) -> str: ) except HTTPError: # output information - logger.error("-" * 80) - logger.error( - f"ERROR: The key '{model_name}' was neither found on the ModelHub nor is this a valid path to a file on your system!" - ) - logger.error(" -> Please check https://huggingface.co/models?filter=flair for all available models.") - logger.error(" -> Alternatively, point to a model file on your local drive.") - logger.error("-" * 80) Path(flair.cache_root / "models" / model_folder).rmdir() # remove folder again if not valid raise diff --git a/flair/nn/model.py b/flair/nn/model.py index 96b2c2d92..88f51f443 100644 --- a/flair/nn/model.py +++ b/flair/nn/model.py @@ -151,7 +151,17 @@ def load(cls, model_path: Union[str, Path, Dict[str, Any]]) -> "Model": continue # if the model cannot be fetched, load as a file - state = model_path if isinstance(model_path, dict) else load_torch_state(str(model_path)) + try: + state = model_path if isinstance(model_path, dict) else load_torch_state(str(model_path)) + except Exception: + log.error("-" * 80) + log.error( + f"ERROR: The key '{model_path}' was neither found on the ModelHub nor is this a valid path to a file on your system!" + ) + log.error(" -> Please check https://huggingface.co/models?filter=flair for all available models.") + log.error(" -> Alternatively, point to a model file on your local drive.") + log.error("-" * 80) + raise ValueError(f"Could not find any model with name '{model_path}'") # try to get model class from state cls_name = state.pop("__cls__", None) From a4f7a80b6c376dbe8c05aee9c02bd7856f0ed171 Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Mon, 22 Jul 2024 00:11:54 +0200 Subject: [PATCH 2/3] Make mypy happy --- flair/trainers/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flair/trainers/trainer.py b/flair/trainers/trainer.py index 6d9c3ec54..fb8590841 100644 --- a/flair/trainers/trainer.py +++ b/flair/trainers/trainer.py @@ -473,7 +473,7 @@ def train_custom( if inspect.isclass(sampler): sampler = sampler() # set dataset to sample from - sampler.set_dataset(train_data) # type: ignore[union-attr] + sampler.set_dataset(train_data) shuffle = False # this field stores the names of all dynamic embeddings in the model (determined after first forward pass) From 2f3e82e5e3e0d50e24abbf2cc60c7eb2b63c06d7 Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Tue, 23 Jul 2024 07:42:24 +0200 Subject: [PATCH 3/3] Mark some tests as integration tests to address space issues --- tests/test_datasets.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 52fec1c5e..2d0391b26 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -418,6 +418,7 @@ def test_load_universal_dependencies_conllu_corpus(tasks_base_path): _assert_universal_dependencies_conllu_dataset(corpus.train) +@pytest.mark.integration() def test_hipe_2022_corpus(tasks_base_path): # This test covers the complete HIPE 2022 dataset. # https://github.com/hipe-eval/HIPE-2022-data @@ -681,6 +682,7 @@ def test_hipe_2022(dataset_version="v2.1", add_document_separator=True): test_hipe_2022(dataset_version="v2.1", add_document_separator=False) +@pytest.mark.integration() def test_icdar_europeana_corpus(tasks_base_path): # This test covers the complete ICDAR Europeana corpus: # https://github.com/stefan-it/historic-domain-adaptation-icdar @@ -698,6 +700,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str): check_number_sentences(len(corpus.test), gold_stats[language]["test"], "test") +@pytest.mark.integration() def test_masakhane_corpus(tasks_base_path): # This test covers the complete MasakhaNER dataset, including support for v1 and v2. supported_versions = ["v1", "v2"] @@ -781,6 +784,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version) +@pytest.mark.integration() def test_nermud_corpus(tasks_base_path): # This test covers the NERMuD dataset. Official stats can be found here: # https://github.com/dhfbk/KIND/tree/main/evalita-2023 @@ -808,6 +812,7 @@ def test_german_ler_corpus(tasks_base_path): assert len(corpus.test) == 6673, "Mismatch in number of sentences for test split" +@pytest.mark.integration() def test_masakha_pos_corpus(tasks_base_path): # This test covers the complete MasakhaPOS dataset. supported_versions = ["v1"] @@ -876,6 +881,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version) +@pytest.mark.integration() def test_german_mobie(tasks_base_path): corpus = flair.datasets.NER_GERMAN_MOBIE() @@ -960,6 +966,7 @@ def test_jsonl_corpus_loads_metadata(tasks_base_path): assert dataset.sentences[2].get_metadata("from") == 125 +@pytest.mark.integration() def test_ontonotes_download(): from urllib.parse import urlparse