Merge pull request #477 from N720720/476-memory-optimization-using-ov…

…ito-pipeline 476 memory optimization using ovito pipeline
N720720 · Jun 11, 2024 · 6ace2f5 · 6ace2f5
2 parents ac1e36d + 1bf8359
commit 6ace2f5
Show file tree

Hide file tree

Showing 14 changed files with 527 additions and 114 deletions.
diff --git a/lindemann/index/mem_use.py b/lindemann/index/mem_use.py
@@ -23,13 +23,18 @@ def in_gb(nframes: int, natoms: int) -> str:
     num_distances = natoms * (natoms - 1) // 2
     float_size = np.float32().nbytes
     trj = nframes * natoms * 3 * float_size
-    atom_atom_array = 3 * natoms * natoms * float_size
+    atom_atom_array = 2 * natoms * natoms * float_size
     atom_array = natoms * float_size
     linde_index = nframes * natoms * float_size
     sum_bytes = trj + atom_atom_array + atom_array + linde_index
     per_trj = (
         f"\nFlag -t (per_trj) will use {np.round((trj+num_distances*2*float_size)/1024**3,4)} GB\n"
     )
+    online_per_trj = (
+        f"Flag -ot (per_trj) will use {np.round((num_distances*2*float_size)/1024**3,4)} GB\n"
+    )
     per_frames = f"Flag -f (per_frames) will use {np.round((trj+(num_distances*2*float_size)+(nframes*float_size))/1024**3,4)} GB\n"
-    per_atoms = f"Flag -a (per_atoms) will use {np.round(sum_bytes/1024**3,4)} GB"
-    return f"{per_trj}{per_frames}{per_atoms}"
+    online_per_frames = f"Flag -of (per_frames) will use {np.round(((num_distances*2*float_size)+(nframes*float_size))/1024**3,4)} GB\n"
+    per_atoms = f"Flag -a (per_atoms) will use {np.round((sum_bytes)/1024**3,4)} GB\n"
+    online_per_atoms = f"Flag -oa (per_atoms) will use {np.round((sum_bytes-trj)/1024**3,4)} GB"
+    return f"{per_trj}{online_per_trj}{per_frames}{online_per_frames}{per_atoms}{online_per_atoms}"
diff --git a/lindemann/index/online_atoms.py b/lindemann/index/online_atoms.py
@@ -0,0 +1,92 @@
+from typing import Optional
+
+import numba as nb
+import numpy as np
+import numpy.typing as npt
+from ovito.data import DataCollection
+from ovito.pipeline import Pipeline
+
+
+@nb.njit(fastmath=True, parallel=False)
+def calculate_frame(
+    positions: npt.NDArray[np.float32],
+    array_mean: npt.NDArray[np.float32],
+    array_var: npt.NDArray[np.float32],
+    frame: int,
+    natoms: int,
+) -> npt.NDArray[np.float32]:
+    """
+    Calculates the contribution of the individual atomic positions to the Lindemann Index for a specific frame.
+
+    Args:
+        positions (npt.NDArray[np.float32]): Array of atomic positions for the current frame.
+        array_mean (npt.NDArray[np.float32]): Array to store the mean distances.
+        array_var (npt.NDArray[np.float32]): Array to store the variance of distances.
+        frame (int): The current frame index.
+        natoms (int): The number of atoms.
+
+    Returns:
+        npt.NDArray[np.float32]: Array of the individual atomic contributions to the Lindemann indices for the current frame.
+    """
+    frame_count = frame + 1
+    for i in range(natoms):
+        for j in range(i + 1, natoms):
+            dist = 0.0
+            for k in range(3):
+                dist += (positions[i, k] - positions[j, k]) ** 2
+            dist = np.sqrt(dist)
+            mean = array_mean[i, j]
+            var = array_var[i, j]
+            delta = dist - mean
+            update_mean = mean + delta / frame_count
+            array_mean[i, j] = update_mean
+            array_mean[j, i] = update_mean
+            delta2 = dist - array_mean[i, j]
+            update_var = var + delta * delta2
+            array_var[i, j] = update_var
+            array_var[j, i] = update_var
+
+    np.fill_diagonal(array_mean, 1.0)
+    lindemann_indices = np.divide(np.sqrt(np.divide(array_var, frame_count)), array_mean).astype(
+        np.float32
+    )
+    lindemann_indices = np.asarray(
+        [np.nanmean(lin[lin != 0]) for lin in lindemann_indices]
+    ).astype(np.float32)
+
+    return lindemann_indices
+
+
+def calculate(
+    pipeline: Pipeline, data: DataCollection, nframes: Optional[int] = None
+) -> npt.NDArray[np.float32]:
+    """
+    Calculates the contribution of the individual atomic positions to the Lindemann Index for a series of frames from an OVITO pipeline.
+
+    Args:
+        pipeline (Pipeline): The OVITO pipeline object.
+        data (DataCollection): The data collection object from OVITO.
+        nframes (Optional[int]): The number of frames to process. If None, all frames are processed.
+
+    Returns:
+        npt.NDArray[np.float32]: Array of the individual atomic contributions to the Lindemann indices for each frame.
+
+    Raises:
+        ValueError: If the requested number of frames exceeds the available frames in the pipeline.
+    """
+    num_particle = data.particles.count
+    num_frame = pipeline.source.num_frames
+    if nframes is None:
+        nframes = num_frame
+    elif nframes > num_frame:
+        raise ValueError(f"Requested {nframes} frames, but only {num_frame} frames are available.")
+
+    array_mean = np.zeros((num_particle, num_particle), dtype=np.float32)
+    array_var = np.zeros((num_particle, num_particle), dtype=np.float32)
+    lindex_array = np.zeros((nframes, num_particle), dtype=np.float32)
+    for frame in range(nframes):
+        data = pipeline.compute(frame)
+        lindex_array[frame] = calculate_frame(
+            data.particles["Position"].array, array_mean, array_var, frame, num_particle
+        )
+    return lindex_array
diff --git a/lindemann/index/online_frames.py b/lindemann/index/online_frames.py
@@ -0,0 +1,82 @@
+from typing import Any, Optional
+
+import numba as nb
+import numpy as np
+import numpy.typing as npt
+from ovito.data import DataCollection
+from ovito.pipeline import Pipeline
+
+
+@nb.njit(fastmath=True, parallel=False)
+def calculate_frame(
+    positions: npt.NDArray[np.float32],
+    mean_distances: npt.NDArray[np.float32],
+    m2_distances: npt.NDArray[np.float32],
+    frame: int,
+    num_atoms: int,
+) -> np.floating[Any]:
+    """
+    Calculates the Lindemann Index for a specific frame.
+
+    Args:
+        positions (npt.NDArray[np.float32]): Array of atomic positions for the current frame.
+        mean_distances (npt.NDArray[np.float32]): Array to store the mean distances between pairs of atoms.
+        m2_distances (npt.NDArray[np.float32]): Array to store the squared differences of distances between pairs of atoms.
+        frame (int): The current frame index.
+        num_atoms (int): The number of atoms.
+
+    Returns:
+        float: The Lindemann index for the current frame.
+    """
+    index = 0
+    frame_count = frame + 1
+    for i in range(num_atoms):
+        for j in range(i + 1, num_atoms):
+            dist = 0.0
+            for k in range(3):
+                dist += (positions[i, k] - positions[j, k]) ** 2
+
+            dist = np.sqrt(dist)
+            delta = dist - mean_distances[index]
+            mean_distances[index] += delta / frame_count
+            delta2 = dist - mean_distances[index]
+            m2_distances[index] += delta * delta2
+
+            index += 1
+    return np.mean(np.sqrt(m2_distances / frame_count) / mean_distances)
+
+
+def calculate(
+    pipeline: Pipeline, data: DataCollection, nframes: Optional[int] = None
+) -> npt.NDArray[np.float32]:
+    """
+    Calculates the Lindemann indices for a series of frames from an OVITO pipeline.
+
+    Args:
+        pipeline (Pipeline): The OVITO pipeline object.
+        data (DataCollection): The data collection object from OVITO.
+        nframes (Optional[int]): The number of frames to process. If None, all frames are processed.
+
+    Returns:
+        npt.NDArray[np.float32]: Array of Lindemann indices for each frame.
+
+    Raises:
+        ValueError: If the requested number of frames exceeds the available frames in the pipeline.
+    """
+    num_particle = data.particles.count
+    num_frame = pipeline.source.num_frames
+    if nframes is None:
+        nframes = num_frame
+    elif nframes > num_frame:
+        raise ValueError(f"Requested {nframes} frames, but only {num_frame} frames are available.")
+
+    num_distances = num_particle * (num_particle - 1) // 2
+    mean_distances = np.zeros(num_distances, dtype=np.float32)
+    m2_distances = np.zeros(num_distances, dtype=np.float32)
+    lindemann_index_array = np.zeros(nframes, dtype=np.float32)
+    for frame in range(nframes):
+        data = pipeline.compute(frame)
+        lindemann_index_array[frame] = calculate_frame(
+            data.particles["Position"].array, mean_distances, m2_distances, frame, num_particle
+        )
+    return lindemann_index_array
diff --git a/lindemann/index/online_trj.py b/lindemann/index/online_trj.py
@@ -0,0 +1,84 @@
+from typing import Any, Optional
+
+import numba as nb
+import numpy as np
+import numpy.typing as npt
+from ovito.data import DataCollection
+from ovito.pipeline import Pipeline
+
+from lindemann.trajectory import read
+
+
+@nb.njit(fastmath=True, parallel=False)
+def calculate_frame(
+    positions: npt.NDArray[np.float32],
+    mean_distances: npt.NDArray[np.float32],
+    m2_distances: npt.NDArray[np.float32],
+    frame: int,
+    num_atoms: int,
+) -> None:
+    """
+    Updates the mean and variance of distances between pairs of atoms for a specific frame.
+
+    Args:
+        positions (npt.NDArray[np.float32]): Array of atomic positions for the current frame.
+        mean_distances (npt.NDArray[np.float32]): Array to store the mean distances between pairs of atoms.
+        m2_distances (npt.NDArray[np.float32]): Array to store the squared differences of distances between pairs of atoms.
+        frame (int): The current frame index.
+        num_atoms (int): The number of atoms.
+
+    Returns:
+        None
+    """
+    index = 0
+    frame_count = frame + 1
+    for i in range(num_atoms):
+        for j in range(i + 1, num_atoms):
+            dist = 0.0
+            for k in range(3):
+                dist += (positions[i, k] - positions[j, k]) ** 2
+
+            dist = np.sqrt(dist)
+            delta = dist - mean_distances[index]
+            mean_distances[index] += delta / frame_count
+            delta2 = dist - mean_distances[index]
+            m2_distances[index] += delta * delta2
+
+            index += 1
+
+
+def calculate(
+    pipeline: Pipeline, data: DataCollection, nframes: Optional[int] = None
+) -> np.floating[Any]:
+    """
+    Calculates the overall Lindemann index for a series of frames from an OVITO pipeline.
+
+    Args:
+        pipeline (Pipeline): The OVITO pipeline object.
+        data (DataCollection): The data collection object from OVITO.
+        nframes (Optional[int]): The number of frames to process. If None, all frames are processed.
+
+    Returns:
+        float: The overall Lindemann index.
+
+    Raises:
+        ValueError: If the requested number of frames exceeds the available frames in the pipeline.
+    """
+    num_particle = data.particles.count
+    num_frame = pipeline.source.num_frames
+    if nframes is None:
+        nframes = num_frame
+    elif nframes > num_frame:
+        raise ValueError(f"Requested {nframes} frames, but only {num_frame} frames are available.")
+
+    num_distances = num_particle * (num_particle - 1) // 2
+    mean_distances = np.zeros(num_distances, dtype=np.float32)
+    m2_distances = np.zeros(num_distances, dtype=np.float32)
+    print(nframes, num_particle)
+    for frame in range(nframes):
+        data = pipeline.compute(frame)
+        calculate_frame(
+            data.particles["Position"].array, mean_distances, m2_distances, frame, num_particle
+        )
+
+    return np.mean(np.sqrt(m2_distances / nframes) / mean_distances)
diff --git a/lindemann/index/per_atoms.py b/lindemann/index/per_atoms.py
@@ -1,19 +1,20 @@
-from typing import List
-
 import numba as nb
 import numpy as np
 import numpy.typing as npt
-from numba import float32
 
 
-@nb.njit(fastmath=True, error_model="numpy")  # type: ignore # , cache=True) #(parallel=True)
+@nb.njit(fastmath=True, parallel=False)
 def calculate(frames: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:
-    """Calculate the contribution of each atom to the lindemann index over the frames
+    """
+    Calculate the contribution of each atom to the Lindemann index over the frames.
 
     Args:
-        frames: numpy array of shape(frames,atoms)
+        frames (npt.NDArray[np.float32]): A numpy array of shape (frames, atoms, 3) containing the atomic positions
+                                          over multiple frames.
+
     Returns:
-        npt.NDArray[np.float32]: Returns 1D array with the progression of the lindeman index per frame of shape(frames, atoms)
+        npt.NDArray[np.float32]: A 2D array of shape (frames, atoms) containing the progression of the Lindemann index
+                                 per frame.
     """
     len_frames, natoms, _ = frames.shape
 

diff --git a/lindemann/index/per_frames.py b/lindemann/index/per_frames.py
@@ -1,9 +1,19 @@
 import numba as nb
 import numpy as np
+import numpy.typing as npt
 
 
 @nb.njit(fastmath=True, parallel=False)
-def calculate(positions):
+def calculate(positions: npt.NDArray[np.float32]):
+    """
+    Calculates the Lindemann index for each frame of atomic positions.
+
+    Args:
+        positions (npt.NDArray[np.float32]): Array of atomic positions with shape (num_frames, num_atoms, 3).
+
+    Returns:
+        npt.NDArray[np.float32]: Array of Lindemann indices for each frame.
+    """
     num_frames, num_atoms, _ = positions.shape
     num_distances = num_atoms * (num_atoms - 1) // 2
 

diff --git a/lindemann/index/per_trj.py b/lindemann/index/per_trj.py
@@ -1,9 +1,21 @@
+from typing import Any
+
 import numba as nb
 import numpy as np
+import numpy.typing as npt
 
 
 @nb.njit(fastmath=True)
-def calculate(positions):
+def calculate(positions: npt.NDArray[np.float32]) -> np.floating[Any]:
+    """
+    Calculates the overall Lindemann index for a series of atomic positions over multiple frames.
+
+    Args:
+        positions (npt.NDArray[np.float32]): Array of atomic positions with shape (num_frames, num_atoms, 3).
+
+    Returns:
+        np.floating[np.Any]: The overall Lindemann index.
+    """
     num_frames, num_atoms, _ = positions.shape
     num_distances = num_atoms * (num_atoms - 1) // 2