Skip to content

Commit

Permalink
WIP: refactor: delete get_data_quantities shift_reference_state funct…
Browse files Browse the repository at this point in the history
…ions

Tests pass
  • Loading branch information
bocklund committed Jan 17, 2024
1 parent 491e57b commit f84e43a
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 137 deletions.
2 changes: 2 additions & 0 deletions espei/parameter_selection/fitting_descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@ def build_phase(self, dbe):
setattr(self, prop, prop_val)

elastic_fitting_description = ModelFittingDescription([StepElasticC11, StepElasticC12, StepElasticC44], model=ElasticModel)

gibbs_energy_fitting_description = ModelFittingDescription([StepHM, StepSM, StepCPM])
137 changes: 130 additions & 7 deletions espei/parameter_selection/fitting_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@

__all__ = [
"FittingStep",
"StepGenericRKMProperty",
"AbstractRKMPropertyStep",
"StepHM",
"StepSM",
"StepCPM",
"StepElasticC11",
"StepElasticC12",
"StepElasticC44",
Expand Down Expand Up @@ -82,7 +85,7 @@ def get_data_quantities(cls, desired_property, fixed_model, fixed_portions, data
raise NotImplementedError()


class StepGenericRKMProperty(FittingStep):
class AbstractRKMPropertyStep(FittingStep):
"""
This class is a base class for generic Redlich-Kister-Muggianu (RKM) properties.
Expand Down Expand Up @@ -188,19 +191,139 @@ def get_data_quantities(cls, desired_property: str, fixed_model: Model, fixed_po
rhs = np.asarray(rhs, dtype=np.float_)
return rhs

class StepElasticC11(StepGenericRKMProperty):

# Legacy imports while we're transitioning, fix these
from espei.parameter_selection.utils import feature_transforms
# TODO: support fitting GM_MIX and GM_FORM directly from DFT?
# TODO: for HM, SM, and CPM, refactor to stop using the transforms and build the transforms into the subclasses
# Maybe this is where we introduce the data and feature transforms class methods?
class StepHM(FittingStep):
parameter_name = "GM"
data_types_read = ["HM"]
supported_reference_states = ["_MIX", "_FORM"]
features: [symengine.S.One]

# TODO: this function actually does 2 things that should be split up into separate functions:
# 1. Extract data from Dataset objects into an array of raw values
# 2. Shifts the reference state of the values
# For Gibbs energy (and derivatives), we always shift to _FORM reference state
# This is the original s_r_s method from ESPEI
@classmethod
def shift_reference_state(cls, desired_data, feature_transform, fixed_model, mole_atoms_per_mole_formula_unit):
"""
Shift _MIX or _FORM data to a common reference state in per mole-atom units.
Parameters
----------
desired_data : List[Dict[str, Any]]
ESPEI single phase dataset
feature_transform : Callable
Function to transform an AST for the GM property to the property of
interest, i.e. entropy would be ``lambda GM: -symengine.diff(GM, v.T)``
fixed_model : pycalphad.Model
Model with all lower order (in composition) terms already fit. Pure
element reference state (GHSER functions) should be set to zero.
mole_atoms_per_mole_formula_unit : float
Number of moles of atoms in every mole atom unit.
Returns
-------
np.ndarray
Data for this feature in [qty]/mole-formula in a common reference state.
Raises
------
ValueError
Notes
-----
pycalphad Model parameters are stored as per mole-formula quantites, but
the calculated properties and our data are all in [qty]/mole-atoms. We
multiply by mole-atoms/mole-formula to convert the units to
[qty]/mole-formula.
"""
total_response = []
for dataset in desired_data:
values = np.asarray(dataset['values'], dtype=np.object_)*mole_atoms_per_mole_formula_unit
unique_excluded_contributions = set(dataset.get('excluded_model_contributions', []))
for config_idx in range(len(dataset['solver']['sublattice_configurations'])):
occupancy = dataset['solver'].get('sublattice_occupancies', None)
if dataset['output'].endswith('_FORM'):
pass
elif dataset['output'].endswith('_MIX'):
if occupancy is None:
raise ValueError('Cannot have a _MIX property without sublattice occupancies.')
else:
values[..., config_idx] += feature_transform(fixed_model.models['ref'])*mole_atoms_per_mole_formula_unit
else:
raise ValueError(f'Unknown property to shift: {dataset["output"]}')
for excluded_contrib in unique_excluded_contributions:
values[..., config_idx] += feature_transform(fixed_model.models[excluded_contrib])*mole_atoms_per_mole_formula_unit
total_response.append(values.flatten())
return total_response


@classmethod
def get_data_quantities(cls, desired_property, fixed_model, fixed_portions, data, sample_condition_dicts):
mole_atoms_per_mole_formula_unit = fixed_model._site_ratio_normalization
# Define site fraction symbols that will be reused
phase_name = fixed_model.phase_name

# Construct flattened list of site fractions corresponding to the ravelled data (from shift_reference_state)
site_fractions = []
for ds in data:
for _ in ds['conditions']['T']:
sf = build_sitefractions(phase_name, ds['solver']['sublattice_configurations'], ds['solver'].get('sublattice_occupancies', np.ones((len(ds['solver']['sublattice_configurations']), len(ds['solver']['sublattice_configurations'][0])), dtype=np.float_)))
site_fractions.append(sf)
site_fractions = list(itertools.chain(*site_fractions))

feat_transform = feature_transforms[desired_property]
data_qtys = np.concatenate(cls.shift_reference_state(data, feat_transform, fixed_model, mole_atoms_per_mole_formula_unit), axis=-1)
# Remove existing partial model contributions from the data, convert to per mole-formula units
data_qtys = data_qtys - feat_transform(fixed_model.ast)*mole_atoms_per_mole_formula_unit
# Subtract out high-order (in T) parameters we've already fit, already in per mole-formula units
data_qtys = data_qtys - feat_transform(sum(fixed_portions))
# if any site fractions show up in our data_qtys that aren't in this datasets site fractions, set them to zero.
for sf, i, cond_dict in zip(site_fractions, data_qtys, sample_condition_dicts):
missing_variables = symengine.S(i).atoms(v.SiteFraction) - set(sf.keys())
sf.update({x: 0. for x in missing_variables})
# The equations we have just have the site fractions as YS
# and interaction products as Z, so take the product of all
# the site fractions that we see in our data qtys
sf.update(cond_dict)
data_qtys = [symengine.S(i).xreplace(sf).evalf() for i, sf in zip(data_qtys, site_fractions)]
data_qtys = np.asarray(data_qtys, dtype=np.float_)
return data_qtys


# TODO: does it make sense to inherit from HM? Do we need an abstract class? Or does fixing the transforms issue and having each implementation be separate be correct?
# TODO: support "" (absolute) entropy reference state?
class StepSM(StepHM):
data_types_read = ["SM"]
features: [v.T]


# TODO: support "" (absolute) heat capacity reference state?
class StepCPM(StepHM):
data_types_read = ["CPM"]
features: [v.T * symengine.log(v.T), v.T**2, v.T**-1, v.T**3]



class StepElasticC11(AbstractRKMPropertyStep):
parameter_name = "C11"
data_types_read = ["C11"]

class StepElasticC12(StepGenericRKMProperty):
class StepElasticC12(AbstractRKMPropertyStep):
parameter_name = "C12"
data_types_read = ["C12"]

class StepElasticC44(StepGenericRKMProperty):
class StepElasticC44(AbstractRKMPropertyStep):
parameter_name = "C44"
data_types_read = ["C44"]

class StepV0(StepGenericRKMProperty):
class StepV0(AbstractRKMPropertyStep):
parameter_name = "V0"
data_types_read = ["V0"]
features = [symengine.S.One]
Expand Down Expand Up @@ -288,7 +411,7 @@ def shift_reference_state(cls, desired_data, fixed_model, mole_atoms_per_mole_fo
total_response.append(values.flatten())
return total_response

# TODO: maybe we could refactor the existing StepGenericRKMProperty to use
# TODO: maybe we could refactor the existing AbstractRKMPropertyStep to use
# the cls.transform_data method, as that's really the only difference
# # between this function and the V0 function.
@classmethod
Expand Down
124 changes: 2 additions & 122 deletions espei/parameter_selection/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
Tools used across parameter selection modules
"""

# TODO: small refactor so this file can be deleted :) - down with utils files!

from typing import Any, List, Dict, Tuple, Union
import itertools
import numpy as np
Expand All @@ -22,128 +24,6 @@
"HM": lambda GM: GM - v.T*symengine.diff(GM, v.T)}


# TODO: this function actually does 2 things that should be split up into separate functions:
# 1. Extract data from Dataset objects into an array of raw values
# 2. Shifts the reference state of the values
# For Gibbs energy (and derivatives), we always shift to _FORM reference state
def shift_reference_state(desired_data, feature_transform, fixed_model, mole_atoms_per_mole_formula_unit):
"""
Shift _MIX or _FORM data to a common reference state in per mole-atom units.
Parameters
----------
desired_data : List[Dict[str, Any]]
ESPEI single phase dataset
feature_transform : Callable
Function to transform an AST for the GM property to the property of
interest, i.e. entropy would be ``lambda GM: -symengine.diff(GM, v.T)``
fixed_model : pycalphad.Model
Model with all lower order (in composition) terms already fit. Pure
element reference state (GHSER functions) should be set to zero.
mole_atoms_per_mole_formula_unit : float
Number of moles of atoms in every mole atom unit.
Returns
-------
np.ndarray
Data for this feature in [qty]/mole-formula in a common reference state.
Raises
------
ValueError
Notes
-----
pycalphad Model parameters are stored as per mole-formula quantites, but
the calculated properties and our data are all in [qty]/mole-atoms. We
multiply by mole-atoms/mole-formula to convert the units to
[qty]/mole-formula.
"""
total_response = []
for dataset in desired_data:
values = np.asarray(dataset['values'], dtype=np.object_)*mole_atoms_per_mole_formula_unit
unique_excluded_contributions = set(dataset.get('excluded_model_contributions', []))
for config_idx in range(len(dataset['solver']['sublattice_configurations'])):
occupancy = dataset['solver'].get('sublattice_occupancies', None)
if dataset['output'].endswith('_FORM'):
pass
elif dataset['output'].endswith('_MIX'):
if occupancy is None:
raise ValueError('Cannot have a _MIX property without sublattice occupancies.')
else:
values[..., config_idx] += feature_transform(fixed_model.models['ref'])*mole_atoms_per_mole_formula_unit
else:
raise ValueError(f'Unknown property to shift: {dataset["output"]}')
for excluded_contrib in unique_excluded_contributions:
values[..., config_idx] += feature_transform(fixed_model.models[excluded_contrib])*mole_atoms_per_mole_formula_unit
total_response.append(values.flatten())
return total_response


def get_data_quantities(desired_property, fixed_model, fixed_portions, data, sample_condition_dicts):
"""
Parameters
----------
desired_property : str
String property corresponding to the features that could be fit, e.g. HM, SM_FORM, CPM_MIX
fixed_model : pycalphad.Model
Model with all lower order (in composition) terms already fit. Pure
element reference state (GHSER functions) should be set to zero.
fixed_portions : List[symengine.Basic]
SymEngine expressions for model parameters and interaction productions for
higher order (in T) terms for this property, e.g. [0, 3.0*YS*v.T]. In
[qty]/mole-formula.
data : List[Dict[str, Any]]
ESPEI single phase datasets for this property.
Returns
-------
np.ndarray[:]
Ravelled data quantities in [qty]/mole-formula
Notes
-----
pycalphad Model parameters (and therefore fixed_portions) are stored as per
mole-formula quantites, but the calculated properties and our data are all
in [qty]/mole-atoms. We multiply by mole-atoms/mole-formula to convert the
units to [qty]/mole-formula.
"""
mole_atoms_per_mole_formula_unit = fixed_model._site_ratio_normalization
# Define site fraction symbols that will be reused
YS = Symbol('YS')
Z = Symbol('Z')
V_I, V_J, V_K = Symbol('V_I'), Symbol('V_J'), Symbol('V_K')
phase_name = fixed_model.phase_name

# Construct flattened list of site fractions corresponding to the ravelled data (from shift_reference_state)
site_fractions = []
for ds in data:
for _ in ds['conditions']['T']:
sf = build_sitefractions(phase_name, ds['solver']['sublattice_configurations'], ds['solver'].get('sublattice_occupancies', np.ones((len(ds['solver']['sublattice_configurations']), len(ds['solver']['sublattice_configurations'][0])), dtype=np.float_)))
site_fractions.append(sf)
site_fractions = list(itertools.chain(*site_fractions))

feat_transform = feature_transforms[desired_property]
data_qtys = np.concatenate(shift_reference_state(data, feat_transform, fixed_model, mole_atoms_per_mole_formula_unit), axis=-1)
# Remove existing partial model contributions from the data, convert to per mole-formula units
data_qtys = data_qtys - feat_transform(fixed_model.ast)*mole_atoms_per_mole_formula_unit
# Subtract out high-order (in T) parameters we've already fit, already in per mole-formula units
data_qtys = data_qtys - feat_transform(sum(fixed_portions))
# if any site fractions show up in our data_qtys that aren't in this datasets site fractions, set them to zero.
for sf, i, cond_dict in zip(site_fractions, data_qtys, sample_condition_dicts):
missing_variables = symengine.S(i).atoms(v.SiteFraction) - set(sf.keys())
sf.update({x: 0. for x in missing_variables})
# The equations we have just have the site fractions as YS
# and interaction products as Z, so take the product of all
# the site fractions that we see in our data qtys
sf.update(cond_dict)
data_qtys = [symengine.S(i).xreplace(sf).evalf() for i, sf in zip(data_qtys, site_fractions)]
data_qtys = np.asarray(data_qtys, dtype=np.float_)
return data_qtys


def _get_sample_condition_dicts(calculate_dict: Dict[Any, Any], configuration_tuple: Tuple[Union[str, Tuple[str]]], phase_name: str) -> List[Dict[Symbol, float]]:
sublattice_dof = list(map(len, configuration_tuple))
sample_condition_dicts = []
Expand Down
5 changes: 3 additions & 2 deletions espei/paramselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@
from espei.error_functions.non_equilibrium_thermochemical_error import get_prop_samples
from espei.parameter_selection.model_building import build_redlich_kister_candidate_models, make_successive
from espei.parameter_selection.selection import select_model
from espei.parameter_selection.utils import get_data_quantities, feature_transforms, _get_sample_condition_dicts
from espei.parameter_selection.utils import feature_transforms, _get_sample_condition_dicts
from espei.sublattice_tools import generate_symmetric_group, generate_interactions, \
tuplify, recursive_tuplify, interaction_test, endmembers_from_interaction, generate_endmembers
from espei.utils import PickleableTinyDB, sigfigs, extract_aliases
from espei.parameter_selection.fitting_steps import StepHM

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -206,7 +207,7 @@ def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datase
# We assume all properties in the same fitting step have the same
# features (all CPM, all HM, etc., but different ref states).
# data quantities are the same for each candidate model and can be computed up front
data_qtys = get_data_quantities(feature_type, fixed_model, fixed_portions, desired_data, sample_condition_dicts)
data_qtys = StepHM.get_data_quantities(feature_type, fixed_model, fixed_portions, desired_data, sample_condition_dicts)

# build the candidate model transformation matrix and response vector (A, b in Ax=b)
feature_matricies = []
Expand Down
12 changes: 6 additions & 6 deletions tests/test_parameter_generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import numpy as np
from pycalphad import Database, Model, variables as v
from espei.error_functions.non_equilibrium_thermochemical_error import get_prop_samples
from espei.parameter_selection.utils import get_data_quantities, _get_sample_condition_dicts
from espei.parameter_selection.utils import _get_sample_condition_dicts
from espei.parameter_selection.fitting_steps import StepHM
from espei.sublattice_tools import interaction_test


def test_interaction_detection():
"""interaction_test should correctly calculate interactions for different candidate configurations"""

Expand Down Expand Up @@ -104,7 +104,7 @@ def test_get_data_quantities_AL_NI_VA_interaction():
config_tup = (('AL',), ('NI', 'VA'), ('VA',))
calculate_dict = get_prop_samples(data, config_tup)
sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, config_tup, mod.phase_name)
qty = get_data_quantities('HM_FORM', mod, [0], data, sample_condition_dicts)
qty = StepHM.get_data_quantities('HM_FORM', mod, [0], data, sample_condition_dicts)
print(qty)
assert np.all(np.isclose([-6254.7802775, -5126.1206475, -7458.3974225, -6358.04118875], qty))

Expand All @@ -127,7 +127,7 @@ def test_get_data_quantities_mixing_entropy():
config_tup = (('AL',), ('AL', 'CR'))
calculate_dict = get_prop_samples(data, config_tup)
sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, config_tup, mod.phase_name)
qty = get_data_quantities('SM_MIX', mod, [0], data, sample_condition_dicts)
qty = StepHM.get_data_quantities('SM_MIX', mod, [0], data, sample_condition_dicts)
print(qty)
assert np.all(np.isclose([7.27266667], qty))

Expand All @@ -146,7 +146,7 @@ def test_get_data_quantities_magnetic_energy():
CONSTITUENT ALCR2 :AL,CR:AL,CR: !
""")
mod_nomag = Model(dbf_nomag, ['AL', 'CR'], 'ALCR2')
qty_nomag = get_data_quantities('SM_FORM', mod_nomag, [0], data, sample_condition_dicts)
qty_nomag = StepHM.get_data_quantities('SM_FORM', mod_nomag, [0], data, sample_condition_dicts)
print(qty_nomag)
assert np.all(np.isclose([16.78896], qty_nomag))

Expand Down Expand Up @@ -174,6 +174,6 @@ def test_get_data_quantities_magnetic_energy():
PARAMETER BMAGN(ALCR2,AL,CR:CR;0) 298.15 -.92; 6000 N REF0 !
""")
mod = Model(dbf, ['AL', 'CR'], 'ALCR2')
qty = get_data_quantities('SM_FORM', mod, [0], data, sample_condition_dicts)
qty = StepHM.get_data_quantities('SM_FORM', mod, [0], data, sample_condition_dicts)
print(qty)
assert np.all(np.isclose([16.78896], qty))

0 comments on commit f84e43a

Please sign in to comment.