Skip to content

Commit

Permalink
Merge pull request #370 from CogStack/protected-add_concept
Browse files Browse the repository at this point in the history
CU-2exy49p: Make sure the cdb.add_concept really adds a concept or somehow make it clear
  • Loading branch information
adam-sutton-1992 authored Nov 27, 2023
2 parents 6a5103c + 26b5120 commit b0ecd83
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 8 deletions.
2 changes: 1 addition & 1 deletion medcat/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ def add_and_train_concept(self,
names = prepare_name(name, self.pipe.spacy_nlp, {}, self.config)
# Only if not negative, otherwise do not add the new name if in fact it should not be detected
if do_add_concept and not negative:
self.cdb.add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids, description=description,
self.cdb._add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids, description=description,
full_build=full_build)

if spacy_entity is not None and spacy_doc is not None:
Expand Down
44 changes: 42 additions & 2 deletions medcat/cdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from medcat.utils.hasher import Hasher
from medcat.utils.matutils import unitvec
from medcat.utils.ml_utils import get_lr_linking
from medcat.utils.decorators import deprecated
from medcat.config import Config, weighted_average, workers
from medcat.utils.saving.serializer import CDBSerializer

Expand Down Expand Up @@ -222,8 +223,9 @@ def add_names(self, cui: str, names: Dict, name_status: str = 'A', full_build: b
# Name status must be one of the three
name_status = 'A'

self.add_concept(cui=cui, names=names, ontologies=set(), name_status=name_status, type_ids=set(), description='', full_build=full_build)
self._add_concept(cui=cui, names=names, ontologies=set(), name_status=name_status, type_ids=set(), description='', full_build=full_build)

@deprecated("Use `cdb._add_concept` as this will be removed in a future release.")
def add_concept(self,
cui: str,
names: Dict,
Expand All @@ -232,6 +234,43 @@ def add_concept(self,
type_ids: Set[str],
description: str,
full_build: bool = False) -> None:
"""
Deprecated: Use `cdb._add_concept` as this will be removed in a future release.
Add a concept to internal Concept Database (CDB). Depending on what you are providing
this will add a large number of properties for each concept.
Args:
cui (str):
Concept ID or unique identifier in this database, all concepts that have
the same CUI will be merged internally.
names (Dict[str, Dict]):
Names for this concept, or the value that if found in free text can be linked to this concept.
Names is a dict like: `{name: {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}`
Names should be generated by helper function 'medcat.preprocessing.cleaners.prepare_name'
ontologies (Set[str]):
ontologies in which the concept exists (e.g. SNOMEDCT, HPO)
name_status (str):
One of `P`, `N`, `A`
type_ids (Set[str]):
Semantic type identifier (have a look at TUIs in UMLS or SNOMED-CT)
description (str):
Description of this concept.
full_build (bool):
If True the dictionary self.addl_info will also be populated, contains a lot of extra information
about concepts, but can be very memory consuming. This is not necessary
for normal functioning of MedCAT (Default Value `False`).
"""
self._add_concept(cui, names, ontologies, name_status, type_ids, description, full_build)

def _add_concept(self,
cui: str,
names: Dict,
ontologies: set,
name_status: str,
type_ids: Set[str],
description: str,
full_build: bool = False) -> None:
"""Add a concept to internal Concept Database (CDB). Depending on what you are providing
this will add a large number of properties for each concept.
Expand All @@ -241,7 +280,8 @@ def add_concept(self,
the same CUI will be merged internally.
names (Dict[str, Dict]):
Names for this concept, or the value that if found in free text can be linked to this concept.
Names is an dict like: `{name: {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}`
Names is a dict like: `{name: {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}`
Names should be generated by helper function 'medcat.preprocessing.cleaners.prepare_name'
ontologies (Set[str]):
ontologies in which the concept exists (e.g. SNOMEDCT, HPO)
name_status (str):
Expand Down
2 changes: 1 addition & 1 deletion medcat/cdb_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def prepare_csvs(self,
if len(raw_name) >= self.config.cdb_maker['remove_parenthesis']:
prepare_name(raw_name, self.pipe.spacy_nlp, names, self.config)

self.cdb.add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids,
self.cdb._add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids,
description=description, full_build=full_build)
# DEBUG
logger.debug("\n\n**** Added\n CUI: %s\n Names: %s\n Ontologies: %s\n Name status: %s\n Type IDs: %s\n Description: %s\n Is full build: %s",
Expand Down
2 changes: 1 addition & 1 deletion tests/archive_tests/test_cdb_maker_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_concept_similarity(self):
for i in range(500):
cui = "C" + str(i)
type_ids = {'T-' + str(i%10)}
cdb.add_concept(cui=cui, names=prepare_name('Name: ' + str(i), self.maker.pipe.get_spacy_nlp(), {}, self.config), ontologies=set(),
cdb._add_concept(cui=cui, names=prepare_name('Name: ' + str(i), self.maker.pipe.get_spacy_nlp(), {}, self.config), ontologies=set(),
name_status='P', type_ids=type_ids, description='', full_build=True)

vectors = {}
Expand Down
6 changes: 3 additions & 3 deletions tests/utils/test_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class CATHashingTestsWithChange(CATHashingTestsWithFakeHash):

def test_when_changes_do_calc(self):
with unittest.mock.patch.object(CDB, 'calculate_hash', return_value='abcd1234') as patch_method:
self.undertest.cdb.add_concept(**self.concept_kwargs)
self.undertest.cdb._add_concept(**self.concept_kwargs)
hash = self.undertest.get_hash()
self.assertIsInstance(hash, str)
patch_method.assert_called()
Expand All @@ -151,10 +151,10 @@ def test_default_cdb_not_dirty(self):
self.assertFalse(self.undertest.cdb.is_dirty)

def test_after_add_concept_is_dirty(self):
self.undertest.cdb.add_concept(**self.concept_kwargs)
self.undertest.cdb._add_concept(**self.concept_kwargs)
self.assertTrue(self.undertest.cdb.is_dirty)

def test_after_recalc_not_dirty(self):
self.undertest.cdb.add_concept(**self.concept_kwargs)
self.undertest.cdb._add_concept(**self.concept_kwargs)
self.undertest.get_hash()
self.assertFalse(self.undertest.cdb.is_dirty)

0 comments on commit b0ecd83

Please sign in to comment.