feat: Add InputConversion & OutputConversion for nn interface (#625)

Closes #621 ### Summary of Changes added input and output conversion Interface also added this functionality for Table --------- Co-authored-by: Alexander Gréus <alexgreus51@gmail.com> Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> Co-authored-by: WinPlay02 <winplay02_gh@woberlaender.de> Co-authored-by: Simon <s6snbreu@uni-bonn.de> Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com>
Safe-DS · Apr 18, 2024 · fd723f7 · fd723f7
1 parent e01ad89
commit fd723f7
Show file tree

Hide file tree

Showing 12 changed files with 384 additions and 100 deletions.
diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py
@@ -15,7 +15,7 @@
     from collections.abc import Callable, Mapping, Sequence
     from typing import Any
 
-    import numpy as np
+    import torch
     from torch import Tensor
     from torch.utils.data import DataLoader, Dataset
 
@@ -916,7 +916,7 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) ->
             )
 
 
-def _create_dataset(features: np.array, target: np.array) -> Dataset:
+def _create_dataset(features: Tensor, target: Tensor) -> Dataset:
     import torch
     from torch.utils.data import Dataset
 

diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py
@@ -32,12 +32,12 @@
         DatasetContainsTargetError,
         DatasetMissesDataError,
         DatasetMissesFeaturesError,
+        FeatureDataMismatchError,
         InputSizeError,
         LearningError,
         ModelNotFittedError,
         NonTimeSeriesError,
         PredictionError,
-        TestTrainDataMismatchError,
         UntaggedTableError,
     )
 
@@ -66,12 +66,12 @@
         "DatasetContainsTargetError": "._ml:DatasetContainsTargetError",
         "DatasetMissesDataError": "._ml:DatasetMissesDataError",
         "DatasetMissesFeaturesError": "._ml:DatasetMissesFeaturesError",
+        "FeatureDataMismatchError": "._ml:FeatureDataMismatchError",
         "InputSizeError": "._ml:InputSizeError",
         "LearningError": "._ml:LearningError",
         "ModelNotFittedError": "._ml:ModelNotFittedError",
         "NonTimeSeriesError": "._ml:NonTimeSeriesError",
         "PredictionError": "._ml:PredictionError",
-        "TestTrainDataMismatchError": "._ml:TestTrainDataMismatchError",
         "UntaggedTableError": "._ml:UntaggedTableError",
         # Other
         "Bound": "._generic:Bound",
@@ -103,12 +103,12 @@
     "DatasetContainsTargetError",
     "DatasetMissesDataError",
     "DatasetMissesFeaturesError",
+    "FeatureDataMismatchError",
     "InputSizeError",
     "LearningError",
     "ModelNotFittedError",
     "NonTimeSeriesError",
     "PredictionError",
-    "TestTrainDataMismatchError",
     "UntaggedTableError",
     # Other
     "Bound",

diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py
@@ -68,12 +68,12 @@ def __init__(self, reason: str):
         super().__init__(f"Error occurred while predicting: {reason}")
 
 
-class TestTrainDataMismatchError(Exception):
-    """Raised when the columns of the table passed to the predict method do not match with the feature columns of the training data."""
+class FeatureDataMismatchError(Exception):
+    """Raised when the columns of the table passed to the predict or fit method do not match with the specified features of the neural network."""
 
     def __init__(self) -> None:
         super().__init__(
-            "The column names in the test table do not match with the feature columns names of the training data.",
+            "The features in the given table do not match with the specified feature columns names of the neural network.",
         )
 
 

diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py
@@ -6,19 +6,25 @@
 
 if TYPE_CHECKING:
     from ._forward_layer import ForwardLayer
+    from ._input_conversion_table import InputConversionTable
     from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor
+    from ._output_conversion_table import OutputConversionTable
 
 apipkg.initpkg(
     __name__,
     {
         "ForwardLayer": "._forward_layer:ForwardLayer",
+        "InputConversionTable": "._input_conversion_table:InputConversionTable",
+        "OutputConversionTable": "._output_conversion_table:OutputConversionTable",
         "NeuralNetworkClassifier": "._model:NeuralNetworkClassifier",
         "NeuralNetworkRegressor": "._model:NeuralNetworkRegressor",
     },
 )
 
 __all__ = [
     "ForwardLayer",
+    "InputConversionTable",
+    "OutputConversionTable",
     "NeuralNetworkClassifier",
     "NeuralNetworkRegressor",
 ]
diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py
@@ -1,28 +1,38 @@
-from torch import Tensor, nn
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from torch import Tensor, nn
 
 from safeds.exceptions import ClosedBound, OutOfBoundsError
-from safeds.ml.nn._layer import Layer
+from safeds.ml.nn._layer import _Layer
+
+
+def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module:
+    from torch import nn
 
+    class _InternalLayer(nn.Module):
+        def __init__(self, input_size: int, output_size: int, activation_function: str):
+            super().__init__()
+            self._layer = nn.Linear(input_size, output_size)
+            match activation_function:
+                case "sigmoid":
+                    self._fn = nn.Sigmoid()
+                case "relu":
+                    self._fn = nn.ReLU()
+                case "softmax":
+                    self._fn = nn.Softmax()
+                case _:
+                    raise ValueError("Unknown Activation Function: " + activation_function)
 
-class _InternalLayer(nn.Module):
-    def __init__(self, input_size: int, output_size: int, activation_function: str):
-        super().__init__()
-        self._layer = nn.Linear(input_size, output_size)
-        match activation_function:
-            case "sigmoid":
-                self._fn = nn.Sigmoid()
-            case "relu":
-                self._fn = nn.ReLU()
-            case "softmax":
-                self._fn = nn.Softmax()
-            case _:
-                raise ValueError("Unknown Activation Function: " + activation_function)
+        def forward(self, x: Tensor) -> Tensor:
+            return self._fn(self._layer(x))
 
-    def forward(self, x: Tensor) -> Tensor:
-        return self._fn(self._layer(x))
+    return _InternalLayer(input_size, output_size, activation_function)
 
 
-class ForwardLayer(Layer):
+class ForwardLayer(_Layer):
     def __init__(self, output_size: int, input_size: int | None = None):
         """
         Create a FNN Layer.
@@ -47,8 +57,8 @@ def __init__(self, output_size: int, input_size: int | None = None):
             raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1))
         self._output_size = output_size
 
-    def _get_internal_layer(self, activation_function: str) -> _InternalLayer:
-        return _InternalLayer(self._input_size, self._output_size, activation_function)
+    def _get_internal_layer(self, activation_function: str) -> nn.Module:
+        return _create_internal_model(self._input_size, self._output_size, activation_function)
 
     @property
     def input_size(self) -> int:

diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+if TYPE_CHECKING:
+    from torch.utils.data import DataLoader
+
+from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries
+
+FT = TypeVar("FT", TaggedTable, TimeSeries)
+PT = TypeVar("PT", Table, TimeSeries)
+
+
+class _InputConversion(Generic[FT, PT], ABC):
+    """The input conversion for a neural network, defines the input parameters for the neural network."""
+
+    @property
+    @abstractmethod
+    def _data_size(self) -> int:
+        pass  # pragma: no cover
+
+    @abstractmethod
+    def _data_conversion_fit(self, input_data: FT, batch_size: int, num_of_classes: int = 1) -> DataLoader:
+        pass  # pragma: no cover
+
+    @abstractmethod
+    def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader:
+        pass  # pragma: no cover
+
+    @abstractmethod
+    def _is_fit_data_valid(self, input_data: FT) -> bool:
+        pass  # pragma: no cover
+
+    @abstractmethod
+    def _is_predict_data_valid(self, input_data: PT) -> bool:
+        pass  # pragma: no cover
diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from torch.utils.data import DataLoader
+
+from safeds.data.tabular.containers import Table, TaggedTable
+from safeds.ml.nn._input_conversion import _InputConversion
+
+
+class InputConversionTable(_InputConversion[TaggedTable, Table]):
+    """The input conversion for a neural network, defines the input parameters for the neural network."""
+
+    def __init__(self, feature_names: list[str], target_name: str) -> None:
+        """
+        Define the input parameters for the neural network in the input conversion.
+
+        Parameters
+        ----------
+        feature_names
+            The names of the features for the input table, used as features for the training.
+        target_name
+            The name of the target for the input table, used as target for the training.
+        """
+        self._feature_names = feature_names
+        self._target_name = target_name
+
+    @property
+    def _data_size(self) -> int:
+        return len(self._feature_names)
+
+    def _data_conversion_fit(self, input_data: TaggedTable, batch_size: int, num_of_classes: int = 1) -> DataLoader:
+        return input_data._into_dataloader_with_classes(
+            batch_size,
+            num_of_classes,
+        )
+
+    def _data_conversion_predict(self, input_data: Table, batch_size: int) -> DataLoader:
+        return input_data._into_dataloader(batch_size)
+
+    def _is_fit_data_valid(self, input_data: TaggedTable) -> bool:
+        return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names))
+
+    def _is_predict_data_valid(self, input_data: Table) -> bool:
+        return (sorted(input_data.column_names)).__eq__(sorted(self._feature_names))
diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py
@@ -1,9 +1,13 @@
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
 
-from torch import nn
+if TYPE_CHECKING:
+    from torch import nn
 
 
-class Layer(ABC):
+class _Layer(ABC):
     @abstractmethod
     def __init__(self) -> None:
         pass  # pragma: no cover