Skip to content

Commit

Permalink
parsing orca files - major revamp (#5)
Browse files Browse the repository at this point in the history
* mod parse_orca.py to skip first output_block if len(output_blocks) > 1

* mod orca_file.py to read charge and multiplicity from SCF settings for compatibility with compound jobs. also annotate parse_orca.py to describe conditional for returning output blocks

* mod orca_file.py to recognize run time and successful terminations in compound jobs

* tidy orca_file.py by removing lines that were commented out

* add TightOpt and OptTs to job types, comment out if len(enthalpies) > 0 to prevent errors on parsing jobs that don't converge

* update orca_file.py and parse_orca.py to look for multiple frequency calcs and return the last one

* update docs and untit tests to indicate incompatibility with MiniPrint

* start new orca recipe

* clean up OrcaJobTypes with value and expected properties accessed via tuple. If ScanTs is in header, add OPT to job_types.

* update description of properties_list function in ensemble.py

* update tutorial 1 to prevent error message about accessing molecules via file.molecule

* update hello_world.py to prevent error

* add ORCA recipe

* parsing charges

* add orca to tutorial_01

* format tutorial_01

* format tutorial_01

* format tutorial_01

* format tutorial_01

* update recipe 9

* update recipe 9

* update recipe 9

* add orca tests for frequencies and compound jobs

* format recipe09

---------

Co-authored-by: Joe Gair <[email protected]>
  • Loading branch information
joegair and Joe Gair authored Apr 11, 2023
1 parent e00e298 commit d971a8f
Show file tree
Hide file tree
Showing 28 changed files with 222,732 additions and 132 deletions.
2 changes: 1 addition & 1 deletion cctk/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def molecule_list(self):

def properties_list(self):
"""
Returns a list of the constituent molecules.
Returns a list of dictionaries. One dictionary per geometry. Each dictionary contains the property names and property values for each geometry in the ensemble.
"""
return list(self.values())

Expand Down
117 changes: 73 additions & 44 deletions cctk/orca_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,29 @@ class OrcaJobType(Enum):
All jobs have type ``SP`` by default.
"""

SP = "sp"
SP = ("sp", ["energy", "scf_iterations"])
"""
Single point energy calculation.
"""

OPT = "opt"
OPT = ("opt", ["rms_gradient", "rms_step", "max_gradient", "max_step"])
"""
Geometry optimization.
"""

FREQ = "freq"
FREQ = ("freq", ["gibbs_free_energy", "enthalpy", "frequencies", "temperature"])
"""
Hessian calculation.
"""

NMR = "nmr"
NMR = ("nmr", ["isotropic_shielding"])
"""
NMR shielding prediction.
"""

#### This static variable tells what properties are expected from each JobType.
EXPECTED_PROPERTIES = {
"sp": ["energy", "scf_iterations",],
# "opt": ["rms_gradient", "rms_step", "max_gradient", "max_step"],
"opt": [],
"freq": ["gibbs_free_energy", "enthalpy", "frequencies", "temperature"],
"nmr": ["isotropic_shielding",],
}

def __init__(self, value, expected_properties):
self._value_ = value
self.expected_properties = expected_properties

class OrcaFile(File):
"""
Expand Down Expand Up @@ -107,19 +101,40 @@ def read_file(cls, filename):
job_types = cls._assign_job_types(header)
variables, blocks = parse.read_blocks_and_variables(input_lines)

success = 0
successful_scf_convergence = 0
successful_opt = 0
successful_freq = 0
successful_NMR_EPR = 0
is_scan_job = False
# add identifiers for successful termination of other job types

elapsed_time = 0
for line in lines:
if line.strip().startswith("****ORCA TERMINATED NORMALLY****"):
success += 1
elif line.startswith("TOTAL RUN TIME"):
for line in lines:
if line.startswith("FINAL SINGLE POINT ENERGY"): #### SCF converged at least once
successful_scf_convergence += 1
elif line.strip().startswith("*** THE OPTIMIZATION HAS CONVERGED ***"): #### geometry converged
successful_opt += 1
elif line.startswith("VIBRATIONAL FREQUENCIES"): #### a frequency job was completed
successful_freq += 1
elif line.startswith("Maximum memory used throughout the entire EPRNMR-calculation:"): #### an EPR NMR job was completed
successful_NMR_EPR += 1
elif line.strip().startswith("* Relaxed Surface Scan *"): #### this is a scan job
is_scan_job = True
elif line.startswith("Sum of individual times ..."): #### the job was completed
fields = line.split()
assert len(fields) == 13, f"unexpected number of fields on elapsed time line:\n{line}"
days = float(fields[3])
hours = float(fields[5])
minutes = float(fields[7])
seconds = float(fields[9])
elapsed_time = days * 86400 + hours * 3600 + minutes * 60 + seconds
assert len(fields) == 10, f"unexpected number of fields on elapsed time line:\n{line}"
elapsed_time = float(fields[5])

# different than G16 "successful termination"
success = 0
if successful_opt > 0:
success += 1
if successful_freq > 0:
success += 1
if successful_NMR_EPR > 0:
success += 1
if successful_scf_convergence > 0:
success += 1

energies, iters = parse.read_energies(lines)
if len(energies) == 0:
Expand All @@ -128,8 +143,9 @@ def read_file(cls, filename):
atomic_numbers, geometries = parse.read_geometries(lines, num_to_find=len(energies))
assert len(geometries) >= len(energies), "can't have an energy without a geometry (cf. pigeonhole principle)"

charge = lines.find_parameter("xyz", 6, 4)[0]
multip = lines.find_parameter("xyz", 6, 5)[0]
# this approach does not work with the option miniprint
charge = lines.find_parameter("Total Charge Charge ....", 5, 4)[0]
multip = lines.find_parameter("Multiplicity Mult ....", 4, 3)[0]

#### TODO
# detect Mayer bond orders
Expand Down Expand Up @@ -169,35 +185,44 @@ def read_file(cls, filename):
properties[idx]["max_step"] = max_step[idx]

if OrcaJobType.FREQ in job_types:
properties[-1]["frequencies"] = sorted(parse.read_freqs(lines))
properties[-1]["frequencies"] = sorted(parse.read_freqs(lines, successful_freq))

enthalpies = lines.find_parameter("Total Enthalpy", expected_length=5, which_field=3)
properties[-1]["enthalpy"] = enthalpies[-1]
try:
properties[-1]["enthalpy"] = enthalpies[-1]
except Exception as e:
pass

gibbs = lines.find_parameter("Final Gibbs free", expected_length=7, which_field=5)
properties[-1]["gibbs_free_energy"] = gibbs[-1]

temperature = lines.find_parameter("Temperature", expected_length=4, which_field=2)
if len(temperature) > 0 and len(gibbs) > 0:
properties[-1]["temperature"] = temperature[-1]
corrected_free_energy = get_corrected_free_energy(gibbs[-1], properties[-1]["frequencies"],
try:
properties[-1]["gibbs_free_energy"] = gibbs[-1]
except Exception as e:
pass

try:
temperature = lines.find_parameter("Temperature", expected_length=4, which_field=2)
if len(temperature) > 0 and len(gibbs) > 0:
properties[-1]["temperature"] = temperature[-1]
corrected_free_energy = get_corrected_free_energy(gibbs[-1], properties[-1]["frequencies"],
frequency_cutoff=100.0, temperature=temperature[-1])
properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy)
properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy)
except Exception as e:
pass

if OrcaJobType.NMR in job_types:
nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms())
if nmr_shifts is not None:
properties[-1]["isotropic_shielding"] = nmr_shifts

try:
charges = parse.read_mulliken_charges(lines)
charges = parse.read_mulliken_charges(lines, successful_opt, is_scan_job)
assert len(charges) == len(atomic_numbers)
properties[-1]["mulliken_charges"] = charges
except Exception as e:
pass

try:
charges = parse.read_loewdin_charges(lines)
charges = parse.read_loewdin_charges(lines, successful_opt, is_scan_job)
assert len(charges) == len(atomic_numbers)
properties[-1]["lowdin_charges"] = charges
except Exception as e:
Expand Down Expand Up @@ -253,7 +278,7 @@ def write_file(self, filename, molecule=None, header=None, variables=None, block
self.write_molecule_to_file(filename, molecule, header, variables, blocks)

@classmethod
def write_molecule_to_file(cls, filename, molecule, header, variables=None, blocks=None, print_symbol=False):
def write_molecule_to_file(cls, filename, molecule, header, variables={"maxcore": 2000}, blocks={"pal": ["nproc 16"]}, print_symbol=False):
"""
Write an ``.inp`` file using the given molecule.
Expand Down Expand Up @@ -336,7 +361,7 @@ def imaginaries(self):
@classmethod
def _assign_job_types(cls, header):
"""
Assigns ``OrcaJobType`` objects from route card. ``OrcaJobType.SP`` is assigned by default.
Assigns ``OrcaJobType`` objects from header. ``OrcaJobType.SP`` is assigned by default.
Args:
header (str): Orca header
Expand All @@ -345,11 +370,13 @@ def _assign_job_types(cls, header):
list of ``OrcaJobType`` objects
"""
job_types = []
for name, member in OrcaJobType.__members__.items():
if re.search(f" {member.value}", str(header), re.IGNORECASE):
job_types.append(member)
for job_type in OrcaJobType:
if re.search(f"{job_type.value}", str(header), re.IGNORECASE):
job_types.append(job_type)
if OrcaJobType.SP not in job_types:
job_types.append(OrcaJobType.SP)
job_types.append(OrcaJobType.SP) # include SP in all jobs, whether specified in header or not
if re.search("ScanTs", str(header), re.IGNORECASE):
job_types.append(OrcaJobType.OPT) #ScanTs is an optimization job that does not contain the string "opt" in it's keyword
return job_types

def check_has_properties(self):
Expand All @@ -359,8 +386,10 @@ def check_has_properties(self):
This only checks the last molecule in ``self.ensemble``, for now.
"""
if self.successful_terminations > 0:
if self.successful_terminations == 2 and ((OrcaJobType.OPT in self.job_types) and (OrcaJobType.FREQ in self.job_types)):
return # opt and freq jobs should have three terminations
for job_type in self.job_types:
for prop in EXPECTED_PROPERTIES[job_type.value]:
for prop in job_type.expected_properties:
if not self.ensemble.has_property(-1, prop):
raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!")
else:
Expand Down
108 changes: 76 additions & 32 deletions cctk/parse_orca.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
import re
import re, warnings

from cctk.helper_functions import get_number
from cctk import OneIndexedArray, LazyLineObject
Expand Down Expand Up @@ -61,59 +61,87 @@ def split_multiple_inputs(filename):
start_block = 0
with open(filename, "r") as lines:
for idx, line in enumerate(lines):
if re.search("COMPOUND JOB \d{1,}", line):
if re.search("COMPOUND JOB \d{1,}", line):
output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx))
start_block = idx
output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx))

return output_blocks[:]
if len(output_blocks) <= 1:
return output_blocks[:]

def read_mulliken_charges(lines):
# this conditional skips the first block of %compound jobs which only describes the input and doesn't have an associated energy
elif len(output_blocks) > 1:
return output_blocks[1:]

def read_mulliken_charges(lines, successful_opt, is_scan_job):
"""
Reads charges.
Reads charges. Returns charges on penultimate geometry for some scan jobs.
Args:
lines (list): list of lines in file
Returns:
``cctk.OneIndexedArray`` of charges
"""
charges = []
charge_block = lines.search_for_block("MULLIKEN ATOMIC CHARGES", "Sum of atomic charges", join="\n")
for line in charge_block.split("\n")[2:]:
fields = re.split(" +", line)
fields = list(filter(None, fields))

if len(fields) == 4:
charges.append(float(fields[3]))
count = successful_opt + 1
if is_scan_job:
count = 2*successful_opt
else:
count = successful_opt + 1

charge_block = lines.search_for_block("MULLIKEN ATOMIC CHARGES", "Sum of atomic charges", count=count, join="\n")
if not isinstance(charge_block, list):
charge_block = [charge_block]

for block in charge_block:
charges = []
for line in block.split("\n")[2:]:
fields = re.split(" +", line)
fields = list(filter(None, fields))
if len(fields) == 4:
charges.append(float(fields[3]))

return OneIndexedArray(charges)


def read_loewdin_charges(lines):

def read_loewdin_charges(lines, successful_opt, is_scan_job):
"""
Reads charges.
Reads charges. Returns charges on penultimate geometry for some scan jobs.
Args:
lines (list): list of lines in file
Returns:
``cctk.OneIndexedArray`` of charges
"""
charges = []
charge_block = lines.search_for_block("LOEWDIN ATOMIC CHARGES", "^$", join="\n")
for line in charge_block.split("\n")[2:]:
fields = re.split(" +", line)
fields = list(filter(None, fields))

if len(fields) == 4:
charges.append(float(fields[3]))
count = successful_opt + 1
if is_scan_job:
count = 2*successful_opt
else:
count = successful_opt + 1

charge_block = lines.search_for_block("LOEWDIN ATOMIC CHARGES", "^$", count=count, join="\n")
if not isinstance(charge_block, list):
charge_block = [charge_block]

for block in charge_block:
charges = []
for line in block.split("\n")[2:]:
fields = re.split(" +", line)
fields = list(filter(None, fields))
if len(fields) == 4:
charges.append(float(fields[3]))

return OneIndexedArray(charges)


def read_header(lines):
for line in lines:
if re.match("!", line):
if 'miniprint' in line.lower():
warnings.warn('The miniprint option may lead to parsing errors in cctk')
return line

def read_blocks_and_variables(lines):
Expand Down Expand Up @@ -149,17 +177,33 @@ def extract_input_file(lines):
input_lines.append(line)
return input_lines

def read_freqs(lines):
freq_block = lines.search_for_block("VIBRATIONAL FREQUENCIES", "NORMAL MODES", join="\n", max_len=1000)
if freq_block is None:


def read_freqs(lines, successful_freq):
freq_blocks = lines.search_for_block("VIBRATIONAL FREQUENCIES", "NORMAL MODES", count=successful_freq, join="\n", max_len=1000)
if freq_blocks is None:
return []
freqs = []
for line in freq_block.split("\n"):
fields = re.split(" +", line.strip())
if len(fields) == 3:
if fields[2] == "cm**-1" and float(fields[1]) > 0:
freqs.append(float(fields[1]))
return freqs

elif successful_freq == 1:
freqs = []
for line in freq_blocks.split("\n"):
fields = re.split(" +", line.strip())
if len(fields) == 3 or len(fields) == 5:
if fields[2] == "cm**-1" and float(fields[1]) != 0:
freqs.append(float(fields[1]))
return freqs

elif successful_freq > 1 :
freqs_lists = []
for freq_block in freq_blocks:
freqs = []
for line in freq_block.split("\n"):
fields = re.split(" +", line.strip())
if len(fields) == 3 or len(fields) == 5:
if fields[2] == "cm**-1" and float(fields[1]) != 0:
freqs.append(float(fields[1]))
freqs_lists.append(freqs)
return freqs_lists[-1]

def read_gradients(lines, num_to_find):
grad_blocks = lines.search_for_block("Geometry convergence", "Max\(Bonds", join="\n", count=num_to_find)
Expand Down
Binary file added docs/img/r09_step_vs_energy.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/img/r09_step_vs_rms_grad.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ To run a single unit test::

To run all the tests::

pythom -m unittest discover
python -m unittest discover



Expand Down
Loading

0 comments on commit d971a8f

Please sign in to comment.