diff --git a/medcat/config_meta_cat.py b/medcat/config_meta_cat.py index 686029052..9c1b02888 100644 --- a/medcat/config_meta_cat.py +++ b/medcat/config_meta_cat.py @@ -1,10 +1,12 @@ from typing import Dict, Any from medcat.config import MixingConfig, BaseModel, Optional, Extra +import logging class General(MixingConfig, BaseModel): """The General part of the MetaCAT config""" device: str = 'cpu' + log_level: int = logging.INFO disable_component_lock: bool = False seed: int = 13 description: str = "No description" diff --git a/medcat/meta_cat.py b/medcat/meta_cat.py index e0968304e..528bfdbc4 100644 --- a/medcat/meta_cat.py +++ b/medcat/meta_cat.py @@ -57,6 +57,8 @@ def __init__(self, config = ConfigMetaCAT() self.config = config set_all_seeds(config.general['seed']) + logging.basicConfig(level=self.config.general.log_level) + logger.setLevel(self.config.general.log_level) if tokenizer is not None: # Set it in the config diff --git a/medcat/utils/meta_cat/data_utils.py b/medcat/utils/meta_cat/data_utils.py index c4dc5f9c2..cae477606 100644 --- a/medcat/utils/meta_cat/data_utils.py +++ b/medcat/utils/meta_cat/data_utils.py @@ -210,6 +210,7 @@ def encode_category_values(data: Dict, existing_category_value2id: Optional[Dict for i in range(len(data)): if data[i][2] in category_value2id.values(): label_data_[data[i][2]] = label_data_[data[i][2]] + 1 + logger.info(f"Class distribution: {label_data_}") # Undersampling data if category_undersample is None or category_undersample == '': min_label = min(label_data_.values()) @@ -232,7 +233,7 @@ def encode_category_values(data: Dict, existing_category_value2id: Optional[Dict for i in range(len(data_undersampled)): if data_undersampled[i][2] in category_value2id.values(): label_data[data_undersampled[i][2]] = label_data[data_undersampled[i][2]] + 1 - logger.info(f"Updated label_data: {label_data}") + # logger.info(f"Updated label_data: {label_data}") return data_undersampled, data, category_value2id