Skip to content

Commit

Permalink
Support cp2k cell parsing in npt log file
Browse files Browse the repository at this point in the history
Support cp2k cell parsing in npt log file
  • Loading branch information
robinzyb authored Oct 5, 2023
2 parents d0d41c1 + 3f266d4 commit 718e4d1
Show file tree
Hide file tree
Showing 31 changed files with 9,256 additions and 88 deletions.
10 changes: 10 additions & 0 deletions .github/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# .github/release.yml

changelog:
categories:
- title: New Features
labels:
- New-Feature
- title: Other Changes
labels:
- "*"
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
[![Python package](https://github.com/robinzyb/cp2kdata/actions/workflows/ci.yml/badge.svg)](https://github.com/robinzyb/cp2kdata/actions/workflows/ci.yml)[![Coverage Status](https://coveralls.io/repos/github/robinzyb/cp2kdata/badge.svg)](https://coveralls.io/github/robinzyb/cp2kdata)
![pythonv](https://img.shields.io/pypi/pyversions/cp2kdata)
![pypiv](https://img.shields.io/pypi/v/cp2kdata)
![PyPI - pip install](https://img.shields.io/pypi/dm/cp2kdata?logo=pypi&label=pip%20install)

Python Package to postprocess cp2k data.

Expand Down
37 changes: 37 additions & 0 deletions cp2kdata/block_parser/cells.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import regex as re
import numpy as np
from ase.geometry.cell import cellpar_to_cell

ALL_CELL_RE = re.compile(
r"""
Expand Down Expand Up @@ -40,3 +41,39 @@ def parse_all_cells(output_file):
return np.array(all_cells, dtype=float)
else:
return None


ALL_MD_CELL_RE = re.compile(
r"""
#parse cell lengths
\sMD\|\sCell\slengths\s\[ang\]\s{9}
\s{2}(?P<a>\d\.\d{8}E(\+|\-)\d{2})
\s{2}(?P<b>\d\.\d{8}E(\+|\-)\d{2})
\s{2}(?P<c>\d\.\d{8}E(\+|\-)\d{2})
\n
#skip two lines
(.{80}\n){2}
#parse angles
\sMD\|\sCell\sangles\s\[deg\]\s{10}
\s{2}(?P<alpha>\d\.\d{8}E(\+|\-)\d{2})
\s{2}(?P<beta>\d\.\d{8}E(\+|\-)\d{2})
\s{2}(?P<gamma>\d\.\d{8}E(\+|\-)\d{2})
""",
re.VERBOSE
)

def parse_all_md_cells(output_file):
# notice that the cell of step 0 is not included
all_md_cells = []
for match in ALL_MD_CELL_RE.finditer(output_file):
#print(match)
cell = [match["a"], match["b"], match["c"],
match["alpha"], match["beta"], match["gamma"]]
cell = np.array(cell, dtype=float)
cell = cellpar_to_cell(cell)
all_md_cells.append(cell)

if all_md_cells:
return np.array(all_md_cells, dtype=float)
else:
return None
2 changes: 1 addition & 1 deletion cp2kdata/block_parser/geo_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
)


def parse_geo_opt(output_file) -> float:
def parse_geo_opt_info(output_file) -> float:
geo_opt_info = []

for match in GEO_OPT_INFO_FIRST_RE.finditer(output_file):
Expand Down
31 changes: 29 additions & 2 deletions cp2kdata/block_parser/header_info.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import regex as re
from dataclasses import dataclass
# use monty.re because it can terminate on match
from monty.re import regrep


Expand All @@ -9,7 +10,7 @@ class Cp2kInfo:

CP2K_INFO_VERSION_PATTERN = \
r"""(?xm)
^\sCP2K\|\sversion\sstring:\s{20,42}
^\sCP2K\|\sversion\sstring:\s{10,42}
CP2K\sversion\s(?P<version>\d{1,4}\.\d)(?:\s\(Development\sVersion\))?$
"""

Expand Down Expand Up @@ -83,8 +84,34 @@ def parse_dft_info(filename) -> DFTInfo:
patterns={
"ks_type": DFT_INFO_KS_TYPE_PATTERN,
"multiplicity": DFT_INFO_MULTIPLICITY_PATTERN
},
terminate_on_match=True
)

return DFTInfo(ks_type=dft_info["ks_type"][0][0][0], multiplicity=dft_info["multiplicity"][0][0][0])




@dataclass
class MDInfo:
ensemble_type: str = None

# PATTERNS
MD_INFO_ENSEMBLE_TYPE_PATTERN = \
r"""(?xm)
^\s(?:MD_PAR|MD)\|\sEnsemble\s(?:t|T)ype\s{39,60}(?P<ensemble_type>\w{3,16})
"""

def parse_md_info(filename):
md_info = {}

md_info = regrep(
filename=filename,
patterns={
"ensemble_type": MD_INFO_ENSEMBLE_TYPE_PATTERN
},
terminate_on_match=True
)

return DFTInfo(ks_type=dft_info["ks_type"][0][0][0], multiplicity=dft_info["multiplicity"][0][0][0])
return MDInfo(ensemble_type=md_info["ensemble_type"][0][0][0])
10 changes: 5 additions & 5 deletions cp2kdata/block_parser/md_xyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
)

def parse_md_ener(ener_file):
print(f"Obtian Energies From {ener_file}")
print(f"Parsing Energies From {ener_file}")
energies_list = np.loadtxt(ener_file, usecols=4, ndmin=1, dtype=np.float64)
return energies_list

def parse_pos_xyz(posxyz_file):
print(f"Obtian Structures From {posxyz_file}")
print(f"Parsing Structures From {posxyz_file}")
fp = zopen(posxyz_file, "r")
lines = fp.readlines()
energies_list = []
Expand All @@ -36,7 +36,7 @@ def parse_pos_xyz(posxyz_file):
return pos_list, energies_list, chemical_symbols

def parse_frc_xyz(frcxyz_file):
print(f"Obtian Froces From {frcxyz_file}")
print(f"Parsing Froces From {frcxyz_file}")
fp = zopen(frcxyz_file, "r")
lines = fp.readlines()
force_list = []
Expand All @@ -57,7 +57,7 @@ def parse_frc_xyz(frcxyz_file):

#NOTE: incomplete function, do not release!
def parse_pos_xyz_from_wannier(wannier_xyz_fiel):
print(f"Obtian Structures From {wannier_xyz_fiel}")
print(f"Parsing Structures From {wannier_xyz_fiel}")
fp = zopen(wannier_xyz_fiel, "r")
lines = fp.readlines()
force_list = []
Expand All @@ -79,7 +79,7 @@ def parse_pos_xyz_from_wannier(wannier_xyz_fiel):
return force_list

def parse_md_stress(stress_file):
print(f"Obtian Stresses From {stress_file}")
print(f"Parsing Stresses From {stress_file}")
stresses_list = np.loadtxt(
stress_file,
usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10),
Expand Down
14 changes: 8 additions & 6 deletions cp2kdata/dpdata_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@ def from_labeled_system(self, file_name, **kwargs):

if cells is None:
if cp2kmd.filename:
cells = cp2kmd.get_init_cell()
cells = cells[np.newaxis, :, :]
cells = np.repeat(cells, repeats=num_frames, axis=0)
# cells = cp2kmd.get_init_cell()
# cells = cells[np.newaxis, :, :]
# cells = np.repeat(cells, repeats=num_frames, axis=0)
cells = cp2kmd.get_all_cells()
else:
print("No cell information, please check if your inputs are correct.")
elif isinstance(cells, np.ndarray):
Expand Down Expand Up @@ -168,9 +169,10 @@ def from_labeled_system(self, file_name, **kwargs):

if cells is None:
if cp2kmd.filename:
cells = cp2kmd.get_init_cell()
cells = cells[np.newaxis, :, :]
cells = np.repeat(cells, repeats=num_frames, axis=0)
# cells = cp2kmd.get_init_cell()
# cells = cells[np.newaxis, :, :]
# cells = np.repeat(cells, repeats=num_frames, axis=0)
cells = cp2kmd.get_all_cells()
else:
print("No cell information, please check if your inputs are correct.")
elif isinstance(cells, np.ndarray):
Expand Down
70 changes: 53 additions & 17 deletions cp2kdata/output.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from ast import parse
import sys
import time
import numpy as np
Expand Down Expand Up @@ -66,7 +65,7 @@ def __init__(self, output_file=None, run_type: str=None, path_prefix=".", **kwar
with open(self.filename, 'r') as fp:
self.output_file = fp.read()
self.cp2k_info = parse_cp2k_info(self.filename)
self.DFTInfo = parse_dft_info(self.filename)
self.dft_info = parse_dft_info(self.filename)
else:
self.cp2k_info = Cp2kInfo(version="Unknown")

Expand Down Expand Up @@ -105,7 +104,7 @@ def __init__(self, output_file=None, run_type: str=None, path_prefix=".", **kwar
# self.output_file)

# self.mulliken_pop_list = parse_mulliken_pop_list(
# self.output_file, self.DFTInfo)
# self.output_file, self.dft_info)
# self.hirshfeld_pop_list = parse_hirshfeld_pop_list(self.output_file)
# self.dft_plus_u_occ = parse_dft_plus_u_occ(self.output_file)

Expand Down Expand Up @@ -268,7 +267,7 @@ def parse_energy_force(self):
"atomic_kinds": parse_atomic_kinds,
"cells": parse_all_cells
}
# convert kwargs to flexible attribute!
#TODO: convert kwargs to flexible attribute!
self.geo_opt_info = None
self.num_frames = 1
self.init_atomic_coordinates, self.atom_kind_list, self.chemical_symbols = parse_init_atomic_coordinates(
Expand All @@ -283,8 +282,10 @@ def parse_energy_force(self):
def parse_geo_opt(self):
self.init_atomic_coordinates, self.atom_kind_list, self.chemical_symbols = parse_init_atomic_coordinates(
self.output_file)
self.geo_opt_info = parse_geo_opt(self.output_file)
self.geo_opt_info = parse_geo_opt_info(self.output_file)
self.num_frames = len(self.geo_opt_info)
self.atomic_forces_list = parse_atomic_forces_list(self.output_file)
self.stress_tensor_list = parse_stress_tensor_list(self.output_file)

def parse_cell_opt(self):
# initial information
Expand All @@ -305,15 +306,9 @@ def parse_cell_opt(self):
self.num_frames = len(self.energies_list)

def parse_md(self):

if self.filename:
self.all_cells = parse_all_cells(self.output_file)
print(f"You are reading cell information from {self.filename}")
self.init_atomic_coordinates, self.atom_kind_list, self.chemical_symbols = parse_init_atomic_coordinates(
self.output_file)
self.atomic_kind = parse_atomic_kinds(self.output_file)
self.md_info = parse_md_info(self.filename)
self.check_md_type(md_type=self.md_info.ensemble_type)


ener_file_list = glob.glob(os.path.join(self.path_prefix, "*.ener"))
if ener_file_list:
self.energies_list = parse_md_ener(ener_file_list[0])
Expand All @@ -324,9 +319,13 @@ def parse_md(self):

if not hasattr(self, "energies_list"):
self.energies_list = energies_list_from_pos

frc_xyz_file_list = glob.glob(os.path.join(self.path_prefix,"*frc*.xyz"))
if frc_xyz_file_list:
self.atomic_forces_list = parse_frc_xyz(frc_xyz_file_list[0])
else:
print(f"Parsing Forces from the CP2K output/log file: {self.filename}")
self.atomic_forces_list = parse_atomic_forces_list(self.output_file)

stress_file_list = glob.glob(os.path.join(self.path_prefix,"*.stress"))
if stress_file_list:
Expand All @@ -336,6 +335,35 @@ def parse_md(self):

self.num_frames = len(self.energies_list)

# here parse cell information
if self.filename:

if (self.md_info.ensemble_type == "NVT") or \
(self.md_info.ensemble_type == "NVE") or \
(self.md_info.ensemble_type == "REFTRAJ"):
# parse the first cell
first_cell = parse_all_cells(self.output_file)
assert first_cell.shape == (1, 3, 3), \
"cp2kdata obtains more than one cell from the output file, please check if your output file has duplicated header information."

self.all_cells = first_cell
self.all_cells = np.repeat(self.all_cells, repeats=self.num_frames, axis=0)

elif self.md_info.ensemble_type == "NPT_F":
# only parse the first cell
first_cell = parse_all_cells(self.output_file)
assert first_cell.shape == (1, 3, 3), \
"cp2kdata obtains more than one cell from the output file, please check if your output file has duplicated header information."
# parse the rest of the cells
self.all_cells = parse_all_md_cells(self.output_file)
# prepend the first cell
self.all_cells = np.insert(self.all_cells, 0, first_cell[0], axis=0)

print(f"Parsing Cells Information from {self.filename}")
self.init_atomic_coordinates, self.atom_kind_list, self.chemical_symbols = parse_init_atomic_coordinates(
self.output_file)
self.atomic_kind = parse_atomic_kinds(self.output_file)

@staticmethod
def get_global_info(run_type=None, filename=None):
if filename:
Expand All @@ -350,11 +378,19 @@ def get_global_info(run_type=None, filename=None):
def check_run_type(run_type):
implemented_run_type_parsers = \
["ENERGY_FORCE", "ENERGY", "MD", "GEO_OPT", "CELL_OPT"]
if run_type in implemented_run_type_parsers:
pass
else:
if run_type not in implemented_run_type_parsers:
raise ValueError(
f"Parser for Run Type {run_type} Haven't Been Implemented Yet!"
f"Parser for Run Type {run_type} haven't been implemented yet!"
"Please contact the developer for more information."
)

@staticmethod
def check_md_type(md_type):
implemented_ensemble_type_parsers = \
["NVE", "NVT", "NPT_F", "REFTRAJ"]
if md_type not in implemented_ensemble_type_parsers:
raise ValueError(
f"Parser for MD Type {md_type} haven't been implemented yet!\n"
"Please contact the developer for more information."
)

26 changes: 13 additions & 13 deletions cp2kdata/paser_func.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from .block_parser.dft_plus_u import parse_dft_plus_u_occ
from .block_parser.forces import parse_atomic_forces_list
from .block_parser.geo_opt import parse_geo_opt
from .block_parser.header_info import parse_dft_info, parse_global_info, parse_cp2k_info
from .block_parser.hirshfeld import parse_hirshfeld_pop_list
from .block_parser.mulliken import parse_mulliken_pop_list
from .block_parser.energies import parse_energies_list
from .block_parser.coordinates import parse_init_atomic_coordinates
from .block_parser.atomic_kind import parse_atomic_kinds
from .block_parser.errors_handle import parse_errors
from .block_parser.stress import parse_stress_tensor_list
from .block_parser.cells import parse_all_cells
from .block_parser.md_xyz import parse_md_ener, parse_pos_xyz, parse_frc_xyz, parse_md_stress
from cp2kdata.block_parser.dft_plus_u import parse_dft_plus_u_occ
from cp2kdata.block_parser.forces import parse_atomic_forces_list
from cp2kdata.block_parser.geo_opt import parse_geo_opt_info
from cp2kdata.block_parser.header_info import parse_dft_info, parse_global_info, parse_cp2k_info, parse_md_info
from cp2kdata.block_parser.hirshfeld import parse_hirshfeld_pop_list
from cp2kdata.block_parser.mulliken import parse_mulliken_pop_list
from cp2kdata.block_parser.energies import parse_energies_list
from cp2kdata.block_parser.coordinates import parse_init_atomic_coordinates
from cp2kdata.block_parser.atomic_kind import parse_atomic_kinds
from cp2kdata.block_parser.errors_handle import parse_errors
from cp2kdata.block_parser.stress import parse_stress_tensor_list
from cp2kdata.block_parser.cells import parse_all_cells, parse_all_md_cells
from cp2kdata.block_parser.md_xyz import parse_md_ener, parse_pos_xyz, parse_frc_xyz, parse_md_stress
Empty file added cp2kdata/pdos/__init__.py
Empty file.
Loading

0 comments on commit 718e4d1

Please sign in to comment.