Skip to content

Commit

Permalink
Handle job abort due to SCF not being converged (#224)
Browse files Browse the repository at this point in the history
* Parse the corresponding error in the CP2K output.
* If the simulation updates geometry,  enable IGNORE_COVERGENCE_FAILURE [1] 
* Add a test that works for both: old and new CP2K versions

[1] https://manual.cp2k.org/trunk/CP2K_INPUT/FORCE_EVAL/DFT/SCF.html#CP2K_INPUT.FORCE_EVAL.DFT.SCF.IGNORE_CONVERGENCE_FAILURE
---------
Co-authored-by: Aliaksandr Yakutovich <[email protected]>
  • Loading branch information
cpignedoli authored Jan 28, 2025
1 parent f5116d8 commit 5d10ef7
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 13 deletions.
5 changes: 5 additions & 0 deletions aiida_cp2k/calculations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ def define(cls, spec):
"ERROR_OUT_OF_WALLTIME",
message="The calculation stopped prematurely because it ran out of walltime.",
)
spec.exit_code(
450,
"ERROR_SCF_NOT_CONVERGED",
message="SCF cycle did not converge for the given threshold.",
)
spec.exit_code(
500,
"ERROR_GEOMETRY_CONVERGENCE_NOT_REACHED",
Expand Down
27 changes: 21 additions & 6 deletions aiida_cp2k/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class Cp2kBaseParser(parsers.Parser):
def parse(self, **kwargs):
"""Receives in input a dictionary of retrieved nodes. Does all the logic here."""

self.SEVERE_ERRORS = [
self.exit_codes.ERROR_OUTPUT_CONTAINS_ABORT,
]

try:
_ = self.retrieved
except common.NotExistent:
Expand Down Expand Up @@ -68,13 +72,15 @@ def _parse_stdout(self):

# Check the standard output for errors.
exit_code = self._check_stdout_for_errors(output_string)
if exit_code:

# Return the error code if an error was severe enough to stop the parsing.
if exit_code in self.SEVERE_ERRORS:
return exit_code

# Parse the standard output.
result_dict = utils.parse_cp2k_output(output_string)
self.out("output_parameters", orm.Dict(dict=result_dict))
return None
return exit_code

def _parse_final_structure(self):
"""CP2K trajectory parser."""
Expand All @@ -100,6 +106,11 @@ def _check_stdout_for_errors(self, output_string):
"""This function checks the CP2K output file for some basic errors."""

if "ABORT" in output_string:
if (
"SCF run NOT converged. To continue the calculation regardless"
in output_string
):
return self.exit_codes.ERROR_SCF_NOT_CONVERGED
return self.exit_codes.ERROR_OUTPUT_CONTAINS_ABORT

if "exceeded requested execution time" in output_string:
Expand Down Expand Up @@ -219,7 +230,9 @@ def _parse_stdout(self):

# Check the standard output for errors.
exit_code = self._check_stdout_for_errors(output_string)
if exit_code:

# Return the error code if an error was severe enough to stop the parsing.
if exit_code in self.SEVERE_ERRORS:
return exit_code

# Parse the standard output.
Expand Down Expand Up @@ -257,7 +270,7 @@ def _parse_stdout(self):
self.out("output_bands", bnds)

self.out("output_parameters", orm.Dict(dict=result_dict))
return None
return exit_code


class Cp2kToolsParser(Cp2kBaseParser):
Expand All @@ -275,7 +288,9 @@ def _parse_stdout(self):

# Check the standard output for errors.
exit_code = self._check_stdout_for_errors(output_string)
if exit_code:

# Return the error code if an error was severe enough to stop the parsing.
if exit_code in self.SEVERE_ERRORS:
return exit_code

# Parse the standard output.
Expand All @@ -296,4 +311,4 @@ def _parse_stdout(self):
pass

self.out("output_parameters", orm.Dict(dict=result_dict))
return None
return exit_code
4 changes: 4 additions & 0 deletions aiida_cp2k/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Cp2kInput,
add_ext_restart_section,
add_first_snapshot_in_reftraj_section,
add_ignore_convergence_failure,
add_wfn_restart_section,
increase_geo_opt_max_iter_by_factor,
)
Expand All @@ -24,6 +25,7 @@
check_resize_unit_cell,
get_input_multiplicity,
get_kinds_section,
get_last_convergence_value,
merge_dict,
merge_Dict,
ot_has_small_bandgap,
Expand All @@ -33,11 +35,13 @@
__all__ = [
"Cp2kInput",
"add_ext_restart_section",
"add_ignore_convergence_failure",
"add_first_snapshot_in_reftraj_section",
"add_wfn_restart_section",
"check_resize_unit_cell",
"get_input_multiplicity",
"get_kinds_section",
"get_last_convergence_value",
"HARTREE2EV",
"HARTREE2KJMOL",
"increase_geo_opt_max_iter_by_factor",
Expand Down
10 changes: 10 additions & 0 deletions aiida_cp2k/utils/input_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,13 @@ def increase_geo_opt_max_iter_by_factor(input_dict, factor):
factor * params.get("MOTION", {}).get("GEO_OPT", {}).get("MAX_ITER", 100)
)
return Dict(params)


@calcfunction
def add_ignore_convergence_failure(input_dict):
"""Add IGNORE_CONVERGENCE_FAILURE for non converged SCF runs."""
params = input_dict.get_dict()
params.setdefault("FORCE_EVAL", {}).setdefault("DFT", {}).setdefault("SCF", {})[
"IGNORE_CONVERGENCE_FAILURE"
] = ".TRUE."
return Dict(params)
30 changes: 30 additions & 0 deletions aiida_cp2k/utils/workchains.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
###############################################################################
"""AiiDA-CP2K utilities for workchains"""

import re

from aiida.engine import calcfunction
from aiida.orm import Dict
from aiida.plugins import DataFactory
Expand Down Expand Up @@ -100,6 +102,34 @@ def ot_has_small_bandgap(cp2k_input, cp2k_output, bandgap_thr_ev):
return using_ot and is_bandgap_small


def get_last_convergence_value(input_string):
"""Search for last "OT CG" and returns the SCF gradient.
If no "OT CG", searches for last "DIIS/Diag" and returns the gradient.
Args:
input_string (str): the cp2k output string.
Returns:
float or None: the SCF gradient or None if not found.
"""
# Search all "OT CG" lines and gets the 6th column.
ot_cg_pattern = r"OT CG\s+\S+\s+\S+\s+\S+\s+\S+\s+([\d.E+-]+)"
ot_cg_matches = re.findall(ot_cg_pattern, input_string)

if ot_cg_matches:
return float(ot_cg_matches[-1]) # Last value found for "OT CG".

# Search for "DIIS/Diag" lines and returns the 5th column.
diis_diag_pattern = r"DIIS/Diag\.\s+\S+\s+\S+\s+\S+\s+([\d.E+-]+)"
diis_diag_matches = re.findall(diis_diag_pattern, input_string)

if diis_diag_matches:
return float(diis_diag_matches[-1]) # Last value found for "DIIS/Diag".

return None # No value found.


@calcfunction
def check_resize_unit_cell(struct, threshold):
"""Returns the multiplication factors for the cell vectors to respect, in every direction:
Expand Down
34 changes: 27 additions & 7 deletions aiida_cp2k/workchains/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,33 @@ def overwrite_input_structure(self):
@engine.process_handler(priority=401, exit_codes=[
Cp2kCalculation.exit_codes.ERROR_OUT_OF_WALLTIME,
Cp2kCalculation.exit_codes.ERROR_OUTPUT_INCOMPLETE,
Cp2kCalculation.exit_codes.ERROR_SCF_NOT_CONVERGED,
Cp2kCalculation.exit_codes.ERROR_MAXIMUM_NUMBER_OPTIMIZATION_STEPS_REACHED,
], enabled=False)
def restart_incomplete_calculation(self, calc):
"""This handler restarts incomplete calculations."""
content_string = calc.outputs.retrieved.base.repository.get_object_content(calc.base.attributes.get('output_filename'))

# CP2K was updating geometry - continue with that.
restart_geometry_transformation = re.search(r"Max. gradient\s+=", content_string) or re.search(r"OPT\| Maximum gradient\s*[-+]?\d*\.?\d+", content_string) or "MD| Step number" in content_string
end_inner_scf_loop = "Total energy: " in content_string
# The message is written in the log file when the CP2K input parameter `LOG_PRINT_KEY` is set to True.
if not (restart_geometry_transformation or end_inner_scf_loop or "Writing RESTART" in content_string):
# CP2K was updating geometry.
possible_geometry_restart = re.search(r"Max. gradient\s+=", content_string) or re.search(r"OPT\| Maximum gradient\s*[-+]?\d*\.?\d+", content_string) or "MD| Step number" in content_string

# CP2K wrote a wavefunction restart file.
possible_scf_restart = "Total energy: " in content_string

# External restart file was written.
possible_ext_restart = "Writing RESTART" in content_string

# Check if calculation aborted due to SCF convergence failure.
scf_didnt_converge_and_aborted = "SCF run NOT converged. To continue the calculation regardless" in content_string
good_scf_gradient = None
if scf_didnt_converge_and_aborted:
scf_gradient = utils.get_last_convergence_value(content_string)
scf_restart_thr = 1e-5 # if ABORT for not SCF convergence, but SCF gradient is small, continue
good_scf_gradient = (scf_gradient is not None) and (scf_gradient < scf_restart_thr)

# Condition for allowing restart.
restart_possible = any([possible_geometry_restart, possible_scf_restart, possible_ext_restart]) and good_scf_gradient is not False
if not restart_possible: # The message is written in the log file when the CP2K input parameter `LOG_PRINT_KEY` is set to True.
self.report("It seems that the restart of CP2K calculation wouldn't be able to fix the problem as the "
"previous calculation didn't produce any output to restart from. "
"Sending a signal to stop the Base work chain.")
Expand All @@ -103,7 +119,7 @@ def restart_incomplete_calculation(self, calc):

params = utils.add_wfn_restart_section(params, orm.Bool('kpoints' in self.ctx.inputs))

if restart_geometry_transformation:
if possible_geometry_restart:
# Check if we need to fix restart snapshot in REFTRAJ MD
first_snapshot = None
try:
Expand All @@ -114,9 +130,13 @@ def restart_incomplete_calculation(self, calc):
pass
params = utils.add_ext_restart_section(params)

is_geo_opt = params.get_dict().get("GLOBAL", {}).get("RUN_TYPE") in ["GEO_OPT", "CELL_OPT"]
if is_geo_opt and good_scf_gradient:
self.report("The SCF was not converged, but the SCF gradient is small and we are optimising geometry. Enabling IGNORE_CONVERGENCE_FAILURE.")
params = utils.add_ignore_convergence_failure(params)

if calc.exit_code == Cp2kCalculation.exit_codes.ERROR_MAXIMUM_NUMBER_OPTIMIZATION_STEPS_REACHED:
# If the maximum number of optimization steps is reached, we increase the number of steps by 40%.
print(type(params))
params = utils.increase_geo_opt_max_iter_by_factor(params, 1.4)

self.ctx.inputs.parameters = params # params (new or old ones) that include the necessary restart information.
Expand Down
146 changes: 146 additions & 0 deletions examples/workchains/example_base_geo_opt_ignore_converge_restart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
###############################################################################
# Copyright (c), The AiiDA-CP2K authors. #
# SPDX-License-Identifier: MIT #
# AiiDA-CP2K is hosted on GitHub at https://github.com/aiidateam/aiida-cp2k #
# For further information on the license, see the LICENSE.txt file. #
###############################################################################
"""An example testing the restart calculation handler for ENERGY run in CP2K."""

import os
import sys

import ase.io
import click
from aiida.common import NotExistent
from aiida.engine import run_get_node
from aiida.orm import Dict, SinglefileData, load_code
from aiida.plugins import DataFactory, WorkflowFactory

Cp2kBaseWorkChain = WorkflowFactory("cp2k.base")
StructureData = DataFactory("core.structure")


def example_base(cp2k_code):
"""Run simple DFT calculation through a workchain."""

thisdir = os.path.dirname(os.path.realpath(__file__))

print("Testing CP2K ENERGY on H2O (DFT) through a workchain...")

# Basis set.
basis_file = SinglefileData(
file=os.path.join(thisdir, "..", "files", "BASIS_MOLOPT")
)

# Pseudopotentials.
pseudo_file = SinglefileData(
file=os.path.join(thisdir, "..", "files", "GTH_POTENTIALS")
)

# Structure.
structure = StructureData(
ase=ase.io.read(os.path.join(thisdir, "..", "files", "h2o.xyz"))
)

# Parameters.
parameters = Dict(
{
"GLOBAL": {
"RUN_TYPE": "GEO_OPT",
},
"FORCE_EVAL": {
"METHOD": "Quickstep",
"DFT": {
"BASIS_SET_FILE_NAME": "BASIS_MOLOPT",
"POTENTIAL_FILE_NAME": "GTH_POTENTIALS",
"QS": {
"EPS_DEFAULT": 1.0e-16,
"WF_INTERPOLATION": "ps",
"EXTRAPOLATION_ORDER": 3,
},
"MGRID": {
"NGRIDS": 4,
"CUTOFF": 450,
"REL_CUTOFF": 70,
},
"XC": {
"XC_FUNCTIONAL": {
"_": "LDA",
},
},
"POISSON": {
"PERIODIC": "none",
"PSOLVER": "MT",
},
"SCF": {
"MAX_SCF": 10, # not enough to converge
"EPS_SCF": "1.e-6",
"PRINT": {"RESTART": {"_": "ON"}},
},
},
"SUBSYS": {
"KIND": [
{
"_": "O",
"BASIS_SET": "DZVP-MOLOPT-SR-GTH",
"POTENTIAL": "GTH-LDA-q6",
},
{
"_": "H",
"BASIS_SET": "DZVP-MOLOPT-SR-GTH",
"POTENTIAL": "GTH-LDA-q1",
},
],
},
},
}
)

# Construct process builder.
builder = Cp2kBaseWorkChain.get_builder()

# Switch on resubmit_unconverged_geometry disabled by default.
builder.handler_overrides = Dict(
{"restart_incomplete_calculation": {"enabled": True}}
)

# Input structure.
builder.cp2k.structure = structure
builder.cp2k.parameters = parameters
builder.cp2k.code = cp2k_code
builder.cp2k.file = {
"basis": basis_file,
"pseudo": pseudo_file,
}
builder.cp2k.metadata.options = {
"resources": {
"num_machines": 1,
"num_mpiprocs_per_machine": 1,
},
"max_wallclock_seconds": 1 * 10 * 60,
}

print("Submitted calculation...")
_, process_node = run_get_node(builder)

if process_node.exit_status == 0:
print("Work chain is finished correctly.")
else:
print("ERROR! Work chain failed.")
sys.exit(1)


@click.command("cli")
@click.argument("codelabel")
def cli(codelabel):
"""Click interface."""
try:
code = load_code(codelabel)
except NotExistent:
print(f"The code '{codelabel}' does not exist")
sys.exit(1)
example_base(code)


if __name__ == "__main__":
cli()

0 comments on commit 5d10ef7

Please sign in to comment.