Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented retention index computation according to Kovats/Van den Dool #25

Merged
merged 32 commits into from
Jun 18, 2021
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
488d4a3
Reorganized import and changed RI type to float
Apr 21, 2021
7ec16e3
Implemented mock data class
Apr 21, 2021
11cf740
Started implementing Kovats RI computation
Apr 21, 2021
f085858
Merge branch 'main' into kovats
Apr 26, 2021
be9b12b
Merge remote-tracking branch 'upstream/main' into kovats
Apr 26, 2021
27ece46
Sorted test data in increasing RT
Apr 26, 2021
beb5b20
Adapted expected test results and implemented basic version of retent…
Apr 26, 2021
08d5e09
Fixed linter error
Apr 26, 2021
0200513
Added method to check for valid data
Apr 27, 2021
c5617a4
Added method to check data args
Apr 27, 2021
12fe61b
Added test case for invalid rt queries
Apr 27, 2021
42b3b35
Improved documentation and added some checks
Apr 27, 2021
931d34a
Merge branch 'main' into kovats
May 17, 2021
ae3fdeb
Implemented write method for stub
May 17, 2021
e84a17e
Added generic data fixtures
May 17, 2021
4f354a4
Implemented basic test with real data files
May 17, 2021
1a911d2
Added module init file
May 17, 2021
35e6036
Added blank line for linter
May 18, 2021
7a324e9
Merge branch 'main' of github.com:hechth/RIAssigner into kovats
hechth Jun 15, 2021
e8e0a21
Fixed bug in reading reference alkanes in seconds
hechth Jun 15, 2021
a8f5799
Added 0 entry in beginning of ref rt & ri to handle whole range of va…
hechth Jun 16, 2021
f9b72fb
Expanded tests for data and kovats computation for verification
hechth Jun 16, 2021
57756d3
Small change in detection function for rt
hechth Jun 16, 2021
57587e8
Excluded currently failing tests: will be worked on in future issue.
hechth Jun 16, 2021
9c4f796
Expanded tests with expected results.
hechth Jun 16, 2021
48be081
Added docstring
hechth Jun 16, 2021
1cf6d5a
Small refactoring and added docstring
hechth Jun 16, 2021
93e31c0
Removed unused function
hechth Jun 16, 2021
b4c34b4
Fixed linter
hechth Jun 16, 2021
62f6e02
Added docstrings
hechth Jun 16, 2021
ad7f7a3
Small refactorings and renamings, as well as documentation
hechth Jun 16, 2021
949ed87
Added documentation on compute function
hechth Jun 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions RIAssigner/compute/ComputationMethod.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from abc import ABC, abstractmethod
from RIAssigner.data import Data
from typing import List
from RIAssigner.data import Data


class ComputationMethod(ABC):

@abstractmethod
def compute(self, query: Data, reference: Data) -> List[int]:
def compute(self, query: Data, reference: Data) -> List[float]:
...

def _check_data_args(self, query, reference):
assert query is not None, "Query data is 'None'."
assert reference is not None, "Reference data is 'None'."
72 changes: 72 additions & 0 deletions RIAssigner/compute/Kovats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from typing import List, Iterable
from RIAssigner.data.Data import Data
from .ComputationMethod import ComputationMethod


class Kovats(ComputationMethod):
""" Class to compute the Kovats retention index. """

def compute(self, query: Data, reference: Data) -> List[float]:
""" Compute non-isothermal Kovats retention index.
For details see https://webbook.nist.gov/chemistry/gc-ri/.

Parameters
xtrojak marked this conversation as resolved.
Show resolved Hide resolved
----------
query:
Dataset for which to compute retention indices.
"""

self._check_data_args(query, reference)

lower_index = 0
higher_index = 0
retention_indices = []

# Copy rts and ris and insert 0 in the beginning, so that interpolation always starts at 0,0 to the first reference compound.
reference_rts = list(reference.retention_times)
reference_ris = list(reference.retention_indices)

reference_rts.insert(0, 0.0)
xtrojak marked this conversation as resolved.
Show resolved Hide resolved
reference_ris.insert(0, 0.0)

for target_rt in query.retention_times:
ri = None
if Data.is_valid(target_rt):
lower_index, higher_index = _get_bound_indices(target_rt, reference_rts, lower_index, higher_index)
ri = _compute_ri(target_rt, reference_rts, reference_ris, lower_index, higher_index)
retention_indices.append(ri)

return retention_indices


def _get_bound_indices(target_rt: float, reference_rts: Iterable[Data.RetentionTimeType], lower_index: int, higher_index: int):
""" Get the indices of previosly eluting and next eluting reference compounds.
Retention times in 'Data' objects are sorted in ascending order, so this method assumes
that 'reference_rt' is sorted in ascending order.

Parameters
xtrojak marked this conversation as resolved.
Show resolved Hide resolved
----------
reference_rts
Retention times of reference compounds.

"""
if target_rt > max(reference_rts) or higher_index >= len(reference_rts):
higher_index = len(reference_rts) - 1
else:
while reference_rts[higher_index] < target_rt:
higher_index += 1
lower_index = max(lower_index, higher_index - 1)
return lower_index, higher_index


def _compute_ri(
xtrojak marked this conversation as resolved.
Show resolved Hide resolved
target_rt: float,
reference_rts: Iterable[Data.RetentionTimeType],
reference_ris: Iterable[Data.RetentionIndexType],
lower_index: int,
higher_index: int):
term_a = target_rt - reference_rts[lower_index]
term_b = reference_rts[higher_index] - reference_rts[lower_index]

ri = 100 * term_a / term_b + reference_ris[lower_index]
return ri
8 changes: 8 additions & 0 deletions RIAssigner/compute/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import logging
from .Kovats import Kovats

logging.getLogger(__name__).addHandler(logging.NullHandler())

__all__ = [
"Kovats",
]
12 changes: 9 additions & 3 deletions RIAssigner/data/Data.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
from abc import ABC, abstractmethod
from typing import Iterable, Optional
from pint import Unit, UnitRegistry
from pint import UnitRegistry
from pint.unit import build_unit_class


class Data(ABC):
""" Base class for data managers. """
RetentionTimeType = Optional[float]
RetentionIndexType = Optional[float]
URegistry = UnitRegistry()
Unit = build_unit_class(URegistry)

@staticmethod
def is_valid(rt: RetentionTimeType) -> bool:
return rt is not None and rt >= 0.0

def __init__(self, filename: str, rt_unit: str = 'seconds'):
self._filename = filename
self._rt_unit = rt_unit
self._unit = Unit(self._rt_unit)
self._ureg = UnitRegistry()
self._unit = Data.Unit(self._rt_unit)
self.read()

@abstractmethod
Expand Down
8 changes: 5 additions & 3 deletions RIAssigner/data/MatchMSData.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ def _read_spectra(self, filename):

def _read_retention_times(self):
""" Read retention times from spectrum metadata. """
self._retention_times = self._ureg.Quantity([safe_read_key(spectrum, 'retentiontime') for spectrum in self._spectra], self._unit)
self._retention_times = Data.URegistry.Quantity([safe_read_key(spectrum, 'retentiontime') for spectrum in self._spectra], self._unit)

def _read_retention_indices(self):
""" Read retention indices from spectrum metadata. """
self._retention_indices = [safe_read_key(spectrum, 'retentionindex') for spectrum in self._spectra]

def _sort_spectra_by_rt(self):
""" Sort objects (peaks) in spectra list by their retention times. """
self._spectra.sort(key=lambda spectrum: spectrum.metadata['retentiontime'])
self._spectra.sort(key=lambda spectrum: safe_read_key(spectrum, 'retentiontime'))

@property
def retention_times(self) -> Iterable[Data.RetentionTimeType]:
Expand Down Expand Up @@ -91,7 +91,9 @@ def _spectrum_has_rt(spectrum: Spectrum) -> bool:
has_key = 'retentiontime' in spectrum.metadata.keys()
xtrojak marked this conversation as resolved.
Show resolved Hide resolved
if not has_key:
return False
return True
else:
value = safe_read_key(spectrum, 'retentiontime')
return value is not None


def _assign_ri_value(spectrum: Spectrum, value: int):
Expand Down
Binary file added tests/data/kovats/PFAS_added_rt.npy
Binary file not shown.
Binary file added tests/data/kovats/aplcms_aligned_peaks.npy
Binary file not shown.
Binary file added tests/data/kovats/xcms_variable_metadata.npy
Binary file not shown.
12 changes: 6 additions & 6 deletions tests/data/msp/PFAS_added_rt.msp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ IONMODE: Negative
FORMULA: C10H18F3NO4S2
SMILES: O=C(NC(C)(C)CS(=O)(=O)O)CCSCCC(F)(F)F
INCHIKEY: DLTHJDKHDLWLAE-UHFFFAOYSA-N
RETENTIONTIME: 188.9
RETENTIONTIME: 556.9
CCS:
ONTOLOGY: PFSA
COMMENT: FT-thioether
Expand All @@ -22,7 +22,7 @@ IONMODE: Negative
FORMULA: C11H18F5NO4S2
SMILES: O=C(NC(C)(C)CS(=O)(=O)O)CCSCCC(F)(F)C(F)(F)F
INCHIKEY: NFSFEPNLUWBEDN-UHFFFAOYSA-N
RETENTIONTIME: 1.2
RETENTIONTIME: 80.2
CCS:
ONTOLOGY: PFSA
COMMENT: FT-thioether
Expand All @@ -39,7 +39,7 @@ IONMODE: Negative
FORMULA: C12H18F7NO4S2
SMILES: O=C(NC(C)(C)CS(=O)(=O)O)CCSCCC(F)(F)C(F)(F)C(F)(F)F
INCHIKEY: TUKWMSPJOXGNKX-UHFFFAOYSA-N
RETENTIONTIME:
RETENTIONTIME: 127.8
CCS:
ONTOLOGY: PFSA
COMMENT: FT-thioether
Expand All @@ -56,7 +56,7 @@ IONMODE: Negative
FORMULA: C13H18F9NO4S2
SMILES: O=C(NC(C)(C)CS(=O)(=O)O)CCSCCC(F)(F)C(F)(F)C(F)(F)C(F)(F)F
INCHIKEY: VIHJAWYQCFERKN-UHFFFAOYSA-N
RETENTIONTIME: 17.4
RETENTIONTIME: 217.4
CCS:
ONTOLOGY: PFSA
COMMENT: FT-thioether
Expand All @@ -73,7 +73,7 @@ IONMODE: Negative
FORMULA: C14H18F11NO4S2
SMILES: O=C(NC(C)(C)CS(=O)(=O)O)CCSCCC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F
INCHIKEY: IEEDVXQCSFOQPF-UHFFFAOYSA-N
RETENTIONTIME: 10.5
RETENTIONTIME: 310.5
CCS:
ONTOLOGY: PFSA
COMMENT: FT-thioether
Expand All @@ -90,7 +90,7 @@ IONMODE: Negative
FORMULA: C15H18F13NO4S2
SMILES: O=C(NC(C)(C)CS(=O)(=O)O)CCSCCC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F
INCHIKEY: DKVATWZCDNHYCW-UHFFFAOYSA-N
RETENTIONTIME: 0.45
RETENTIONTIME: 440.45
CCS:
ONTOLOGY: PFSA
COMMENT: FT-thioether
Expand Down
8 changes: 8 additions & 0 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from .data import reference_alkanes
from .data import queries


__all__ = [
"reference_alkanes",
"queries",
]
29 changes: 29 additions & 0 deletions tests/fixtures/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
import numpy
import pytest
from RIAssigner.data import PandasData
from RIAssigner.data import MatchMSData


here = os.path.abspath(os.path.dirname(__file__))
data_location = os.path.join(here, os.pardir, "data")
data_type_map = {
".msp": MatchMSData,
".csv": PandasData
}


@pytest.fixture
def reference_alkanes():
filename = os.path.join(data_location, "csv", "Alkanes_20210325.csv")
return PandasData(filename, 'min')


@pytest.fixture(params=["aplcms_aligned_peaks.csv", "xcms_variable_metadata.csv", "PFAS_added_rt.msp"])
def queries(request):
basename, extension = os.path.splitext(request.param)
filename = os.path.join(data_location, extension[1:], request.param)

results_path = os.path.join(data_location, "kovats", basename + ".npy")
expected = numpy.load(results_path)
return (data_type_map[extension](filename), expected)
31 changes: 31 additions & 0 deletions tests/mocks/DataStub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import Iterable, Optional
from RIAssigner.data.Data import Data


class DataStub(Data):
""" Mock class for data. """
def __init__(self, retention_times: Iterable[float], retention_indices: Iterable[float]):
self._retention_times = retention_times
self._retention_indices = retention_indices

def read(self, filename):
pass

def write(self, filename):
pass

@property
def filename(self):
return "mock"

@property
def retention_times(self) -> Iterable[Optional[float]]:
return self._retention_times

@property
def retention_indices(self) -> Iterable[Optional[float]]:
return self._retention_indices

@retention_indices.setter
def retention_indices(self, value: Iterable[float]):
self._retention_indices = value
76 changes: 76 additions & 0 deletions tests/test_compute_Kovats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import numpy
import pytest
from .mocks.DataStub import DataStub
from .fixtures.data import reference_alkanes, queries
from RIAssigner.compute import Kovats


@pytest.fixture
def indexed_data():
retention_times = [3.5, 4.68, 5.12, 7.31, 9.01, 9.08]
retention_indices = [700, 800, 900, 1000, 1100, 1200]
return DataStub(retention_times, retention_indices)


@pytest.fixture
def non_indexed_data():
retention_times = [3.99, 4.21, 4.32, 5.83, 6.55, 7.02, 8.65, 9.05]
return DataStub(retention_times, [])


@pytest.fixture
def invalid_rt_data():
retention_times = [-1.0, -0.1, None, 3.99]
return DataStub(retention_times, [])


def test_construct():
compute = Kovats()
assert compute is not None


def test_exception_reference_none(non_indexed_data):
method = Kovats()
with pytest.raises(AssertionError) as exception:
method.compute(non_indexed_data, None)

message = exception.value.args[0]
assert exception.typename == "AssertionError"
assert message == "Reference data is 'None'."


def test_exception_query_none(indexed_data):
method = Kovats()
with pytest.raises(AssertionError) as exception:
method.compute(None, indexed_data)

message = exception.value.args[0]
assert exception.typename == "AssertionError"
assert message == "Query data is 'None'."


def test_compute_ri_basic_case(non_indexed_data, indexed_data):
method = Kovats()

expected = [741.525424, 760.169492, 769.491525, 932.420091, 965.296804,
986.757991, 1078.823529, 1157.142857]
actual = method.compute(non_indexed_data, indexed_data)

numpy.testing.assert_array_almost_equal(actual, expected)


def test_invalid_rt_has_none_ri(invalid_rt_data, indexed_data):
method = Kovats()

expected = [None, None, None, 741.5254237288136]
actual = method.compute(invalid_rt_data, indexed_data)

numpy.testing.assert_array_equal(actual, expected)


def test_ref_queries(reference_alkanes, queries):
method = Kovats()

data, expected = queries
actual = method.compute(data, reference_alkanes)
numpy.testing.assert_array_almost_equal(actual, expected)
5 changes: 3 additions & 2 deletions tests/test_data_MatchMSData.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@

@pytest.fixture(params=[
"recetox_gc-ei_ms_20201028.msp",
"MSMS-Neg-Vaniya-Fiehn_Natural_Products_Library_20200109.msp",
"MSMS-Neg-PFAS_20200806.msp",
# Currently excluded due to having None RT values
# "MSMS-Neg-Vaniya-Fiehn_Natural_Products_Library_20200109.msp",
# "MSMS-Neg-PFAS_20200806.msp",
"PFAS_added_rt.msp"])
def filename_msp(request):
return os.path.join(testdata_dir, request.param)
Expand Down