Merge pull request #23 from maximskorik/add_docs

Document functions with docstrings and add developer documentation
RECETOX · Dec 13, 2022 · 9e06a79 · 9e06a79
2 parents 078a081 + 94e4ae7
commit 9e06a79
Show file tree

Hide file tree

Showing 4 changed files with 109 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -15,7 +15,15 @@ Known limitation is methoximation on cycles which should be broken. This is not
 
 ## Installation
 
-1. From source by cloning the repository and installing the package with `pip` as follows:
+There are a few ways to install `gc-meox-tms`:
+
+1. Install in a new `conda` environment (recommended):
+```shell
+$ conda create -n gc-meox-tms -c bioconda gc-meox-tms
+$ conda activate gc-meox-tms
+```
+
+2. From source by cloning the repository and installing the package with `pip` as follows:
 ```shell
 $ git clone https://github.com/RECETOX/gc-meox-tms.git
 
@@ -84,3 +92,48 @@ the function enough times you will get all possible derivatizations. The number
 is individual for each compound (depends on possible conversion degrees etc.).
 
 See also the Jupyter notebook in `example/` directory for more examples.
+
+## Developer documentation
+
+---
+### Installation
+Create a virtual environment of your choice (e.g., conda or venv).
+The development version can be installed with conda or pip as follows:
+```shell
+# 1. Fork and clone the repository
+$ git clone https://github.com/<YOUR_GITHUB_USERNAME>/gc-meox-tms.git
+$ cd gc-meox-tms
+
+# 2a. To create a conda env run from the package directory:
+$ conda env create -f conda/environment-dev.yaml
+$ conda activate gc-meox-tms-dev
+
+# 2b. Alternatively, install using python venv:
+$ python3 -m venv gc-meox-tms-dev
+$ source gc-meox-tms-dev/bin/activate
+$ pip install -e .[dev]
+```
+
+### Contributing
+Before opening a PR make sure all the tests are passing by running `pytest` from within the package directory:
+```shell
+$ pytest
+```
+It may happen that some tests which are dependent on probabilistic logic may fail. If that occurs,
+try rerunning the tests. Usually one rerun is enough.
+
+We strongly advise you to add new tests for the functionality that you want to contribute. If you want to check whether
+your changes are covered with tests, run `$ pytest --cov` and examine the output to see what parts may need better test coverage.
+
+Run linter, to make sure all is nicely formatted:
+```shell
+$ flake8
+
+# if you use venv, exclude venv directory from linting
+$ flake8 --exclude 'gc-meox-tms-dev'
+```
+
+Lastly make sure the Python imports are in a proper order:
+```shell
+$ isort gc_meox_tms
+```
diff --git a/gc_meox_tms/derivatization.py b/gc_meox_tms/derivatization.py
@@ -18,6 +18,14 @@
 
 
 def is_derivatized(mol: Optional[Chem.Mol] = None, smiles: Optional[str] = None) -> bool:
+    """
+    Return whether a molecule is derivatized by searching for MeOX and TMS substructures within that molecule.
+
+    :param mol: RDKit molecule object
+    :param smiles: SMILES string
+
+    :return: True if derivatized, False otherwise
+    """
     if mol is None:
         mol = Chem.MolFromSmiles(smiles)
     mol = Chem.AddHs(mol)
@@ -27,6 +35,14 @@ def is_derivatized(mol: Optional[Chem.Mol] = None, smiles: Optional[str] = None)
 
 
 def remove_derivatization_groups(mol: Optional[Chem.Mol] = None, smiles: Optional[str] = None) -> Chem.Mol:
+    """
+    If a molecule is derivatized, remove derivatization substructures and return the original underivatized molecule.
+
+    :param mol: RDKit molecule object
+    :param smiles: SMILES string
+
+    :return: RDKit molecule object in underivatized (original) form
+    """
     if mol is None:
         em = Chem.MolFromSmiles(smiles)
     else:
@@ -75,6 +91,15 @@ def remove_derivatization_groups(mol: Optional[Chem.Mol] = None, smiles: Optiona
 
 
 def add_derivatization_groups(mol: Optional[Chem.Mol] = None, smiles: Optional[str] = None) -> Chem.Mol:
+    """
+    Add derivatization substructures to a molecule and return its derivatized form. This function is not deterministic
+    and will return a random derivatized form of the molecule. Run multiple times to get all possible derivatized forms.
+
+    :param mol: RDKit molecule object
+    :param smiles: SMILES string
+
+    :return: RDKit molecule object in a derivatized form
+    """
     if mol is None:
         mol = Chem.MolFromSmiles(smiles)
 
@@ -97,6 +122,15 @@ def add_derivatization_groups(mol: Optional[Chem.Mol] = None, smiles: Optional[s
 
 
 def process_one_mol(mol: Tuple[str, Chem.Mol], repeats: int):
+    """
+    Return derivatized and underivatized forms of one molecule. Since underlying function is not deterministic, this
+    function may or may not return all possible derivatized forms of the molecule depending on the number of repeats.
+
+    :param mol: SMILES string or RDKit molecule object
+    :param repeats: number of repeats to simulate molecule derivatization
+
+    :return: tuple containing the input molecule, its underivatized form, and a set of derivatized forms
+    """
     return (
         mol[0],
         Chem.MolToSmiles(remove_derivatization_groups(mol[1]), kekuleSmiles=True),

diff --git a/gc_meox_tms/utils.py b/gc_meox_tms/utils.py
@@ -6,18 +6,37 @@
 
 
 def read_input_txt(infiles: PathLike) -> List[Tuple[str, Mol]]:
-    """Read input from txt files with SMILES."""
+    """
+    Read input from txt files with SMILES.
+
+    :param infiles: Path to input file(s) with SMILES. One SMILES per line.
+
+    :return: List of tuples (molecule string from the input file, RDKit molecule object of that molecule)
+    """
     return [(line.rstrip(), MolFromSmiles(line)) for line in fileinput.input(files=infiles)]
 
 
 def write_tab_separated(tsv_path: PathLike, data) -> None:
+    """
+    Write output to a tab-separated file.
+
+    :param tsv_path: Path to output file.
+    :param data: Tuple of (original SMILES, underivatized SMILES, set of derivatized SMILES)
+    """
     with open(tsv_path, "w") as tsv:
         tsv.write("orig\tderiv. removed\tderiv. added ...\n")
         for orig, removed, added in data:
             tsv.write("\t".join([orig, removed, *added]) + "\n")
 
 
 def write_flat(txt_path: PathLike, data, keep: bool = False) -> None:
+    """
+    Write output to a txt file with one SMILES per line.
+
+    :param txt_path: Path to output file.
+    :param data: Tuple of (original SMILES, underivatized SMILES, set of derivatized SMILES)
+    :param keep: Whether to write the original and underivatized SMILES to the output.
+    """
     with open(txt_path, "w") as flat:
         if keep:
             for orig, removed, added in data:

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,4 +1,5 @@
 flake8
+isort
 mock
 pytest
 pytest-cov