Merge pull request #110 from bryanwweber/comp-namedtuple

Add composition NamedTuple
pr-omethe-us · Mar 7, 2018 · c3b33dd · c3b33dd
2 parents 2423b5b + cec7dec
commit c3b33dd
Show file tree

Hide file tree

Showing 4 changed files with 143 additions and 84 deletions.
diff --git a/.gitignore b/.gitignore
@@ -45,6 +45,7 @@ nosetests.xml
 coverage.xml
 *,cover
 .hypothesis/
+.pytest_cache/
 
 # Translations
 *.mo

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 - Require Habanero>=0.6.0 to support the `mailto` argument
 - Require pytest>=3.2.0 to support the `pytest.mark.filterwarnings` decorator
 - Deprecate the `volume-history` field in the ChemKED YAML file and replace with `time-histories`
+- ORCID lookups are now done by a function in the local `orcid.py` module, removing an external dependency
+- Composition in a `DataPoint` is now stored in a dictionary of `namedtuple`s (called `Composition`) rather than a list of dictionaries
 
 ### Fixed
 - Crossref lookups in the converters use the common API instance from validation

diff --git a/pyked/chemked.py b/pyked/chemked.py
@@ -8,6 +8,7 @@
 from copy import deepcopy
 import xml.etree.ElementTree as etree
 import xml.dom.minidom as minidom
+from itertools import chain
 
 import numpy as np
 
@@ -68,6 +69,14 @@
 Apparatus.institution.__doc__ = '(`str`) The institution where the experiment is located'
 Apparatus.facility.__doc__ = '(`str`) The particular experimental facility at the location'
 
+Composition = namedtuple('Composition', 'species_name InChI SMILES atomic_composition amount')
+Composition.__doc__ = 'Detail of the initial composition of the mixture for the experiment'
+Composition.species_name.__doc__ = '(`str`) The name of the species'
+Composition.InChI.__doc__ = '(`str`) The InChI identifier for the species'
+Composition.SMILES.__doc__ = '(`str`) The SMILES identifier for the species'
+Composition.atomic_composition.__doc__ = '(`dict`) The atomic composition of the species'
+Composition.amount.__doc__ = '(`~pint.Quantity`) The amount of this species'
+
 
 class ChemKED(object):
     """Main ChemKED class.
@@ -241,8 +250,7 @@ def get_dataframe(self, output_columns=None):
         valid_labels[ref_index:ref_index + 1] = ['reference:' + a for a in Reference._fields]
         app_index = valid_labels.index('apparatus')
         valid_labels[app_index:app_index + 1] = ['apparatus:' + a for a in Apparatus._fields]
-
-        species_list = list(set([s['species-name'] for d in self.datapoints for s in d.composition]))  # noqa: E501
+        species_list = list(set(chain(*[list(d.composition.keys()) for d in self.datapoints])))
 
         if output_columns is None or len(output_columns) == 0:
             col_labels = valid_labels
@@ -270,12 +278,11 @@ def get_dataframe(self, output_columns=None):
         data = []
         for d in self.datapoints:
             row = []
-            d_species = [s['species-name'] for s in d.composition]
+            d_species = list(d.composition.keys())
             for col in col_labels:
                 if col in species_list:
                     if col in d_species:
-                        s_idx = d_species.index(col)
-                        row.append(d.composition[s_idx]['amount'])
+                        row.append(d.composition[col].amount)
                     else:
                         row.append(Q_(0.0, 'dimensionless'))
                 elif 'reference' in col or 'apparatus' in col:
@@ -393,16 +400,16 @@ def convert_to_ReSpecTh(self, filename):
             prop = etree.SubElement(common_properties, 'property')
             prop.set('name', 'initial composition')
 
-            for species in composition:
+            for species_name, species in composition.items():
                 component = etree.SubElement(prop, 'component')
                 species_link = etree.SubElement(component, 'speciesLink')
-                species_link.set('preferredKey', species['species-name'])
-                if species.get('InChI') is not None:
-                    species_link.set('InChI', species['InChI'])
+                species_link.set('preferredKey', species_name)
+                if species.InChI is not None:
+                    species_link.set('InChI', species.InChI)
 
                 amount = etree.SubElement(component, 'amount')
                 amount.set('units', composition_type)
-                amount.text = str(species['amount'].magnitude)
+                amount.text = str(species.amount.magnitude)
 
         # If multiple datapoints present, then find any common properties. If only
         # one datapoint, then composition should be the only "common" property.
@@ -459,26 +466,26 @@ def convert_to_ReSpecTh(self, filename):
         # Need to handle datapoints with possibly different species in the initial composition
         if 'composition' not in common:
             for dp in self.datapoints:
-                for species in dp.composition:
+                for species in dp.composition.values():
                     # Only add new property for species not already considered
-                    has_spec = any([species['species-name'] in d.values()
+                    has_spec = any([species.species_name in d.values()
                                     for d in property_idx.values()
                                     ])
                     if not has_spec:
                         prop = etree.SubElement(datagroup, 'property')
                         prop.set('description', '')
 
                         idx = 'x{}'.format(len(property_idx) + 1)
-                        property_idx[idx] = {'name': species['species-name']}
+                        property_idx[idx] = {'name': species.species_name}
                         prop.set('id', idx)
-                        prop.set('label', '[' + species['species-name'] + ']')
+                        prop.set('label', '[' + species.species_name + ']')
                         prop.set('name', 'composition')
                         prop.set('units', self.datapoints[0].composition_type)
 
                         species_link = etree.SubElement(prop, 'speciesLink')
-                        species_link.set('preferredKey', species['species-name'])
-                        if species.get('InChI'):
-                            species_link.set('InChI', species['InChI'])
+                        species_link.set('preferredKey', species.species_name)
+                        if species.InChI is not None:
+                            species_link.set('InChI', species.InChI)
 
         for dp in self.datapoints:
             datapoint = etree.SubElement(datagroup, 'dataPoint')
@@ -490,10 +497,10 @@ def convert_to_ReSpecTh(self, filename):
                     value.text = str(quantity.magnitude)
                 else:
                     # composition
-                    for item in dp.composition:
-                        if item['species-name'] == val['name']:
+                    for item in dp.composition.values():
+                        if item.species_name == val['name']:
                             value = etree.SubElement(datapoint, idx)
-                            value.text = str(item['amount'].magnitude)
+                            value.text = str(item.amount.magnitude)
 
         # See https://stackoverflow.com/a/16097112 for the None.__ne__
         history_types = ['volume_history', 'temperature_history', 'pressure_history',
@@ -646,15 +653,20 @@ def __init__(self, properties):
             self.rcm_data = None
 
         self.composition_type = properties['composition']['kind']
-        composition = deepcopy(properties['composition']['species'])
+        composition = {}
+        for species in properties['composition']['species']:
+            species_name = species['species-name']
+            amount = self.process_quantity(species['amount'])
+            InChI = species.get('InChI')
+            SMILES = species.get('SMILES')
+            atomic_composition = species.get('atomic-composition')
+            composition[species_name] = Composition(
+                species_name=species_name, InChI=InChI, SMILES=SMILES,
+                atomic_composition=atomic_composition, amount=amount)
 
-        for idx, species in enumerate(composition):
-            quant = self.process_quantity(species['amount'])
-            composition[idx]['amount'] = quant
         setattr(self, 'composition', composition)
 
         self.equivalence_ratio = properties.get('equivalence-ratio')
-
         self.ignition_type = deepcopy(properties.get('ignition-type'))
 
         if 'time-histories' in properties and 'volume-history' in properties:
@@ -774,20 +786,20 @@ def get_cantera_composition_string(self, species_conversion=None):
             raise ValueError('Unknown composition type: {}'.format(self.composition_type))
 
         if species_conversion is None:
-            comps = ['{!s}:{:.4e}'.format(c['species-name'],
-                     c['amount'].magnitude/factor) for c in self.composition]
+            comps = ['{!s}:{:.4e}'.format(c.species_name,
+                     c.amount.magnitude/factor) for c in self.composition.values()]
         else:
             comps = []
-            for c in self.composition:
-                amount = c['amount'].magnitude/factor
-                idents = [c.get(s) for s in ['species-name', 'InChI', 'SMILES'] if c.get(s, False)]
+            for c in self.composition.values():
+                amount = c.amount.magnitude/factor
+                idents = [getattr(c, s, False) for s in ['species_name', 'InChI', 'SMILES']]
                 present = [i in species_conversion for i in idents]
                 if not any(present):
-                    comps.append('{!s}:{:.4e}'.format(c['species-name'], amount))
+                    comps.append('{!s}:{:.4e}'.format(c.species_name, amount))
                 else:
                     if len([i for i in present if i]) > 1:
                         raise ValueError('More than one conversion present for species {}'.format(
-                                         c['species-name']))
+                                         c.species_name))
 
                     ident = idents[present.index(True)]
                     species_replacement_name = species_conversion.pop(ident)