Skip to content

Commit

Permalink
WIP: more refactoring fit_formation_energy
Browse files Browse the repository at this point in the history
tests still passing
  • Loading branch information
bocklund committed Jan 17, 2024
1 parent f84e43a commit d315498
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 44 deletions.
9 changes: 8 additions & 1 deletion espei/parameter_selection/fitting_descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
# one. We'd need to be able to resolve what happens if there are different model
# objects used.
class ModelFittingDescription():
"""
Attributes
----------
fitting_steps: [FittingStep]
model: Type[Model]
"""
def __init__(self, fitting_steps: [FittingStep], model: Optional[Type[Model]] = Model) -> None:
self.fitting_steps = fitting_steps
self.model = model
Expand All @@ -31,4 +38,4 @@ def build_phase(self, dbe):

elastic_fitting_description = ModelFittingDescription([StepElasticC11, StepElasticC12, StepElasticC44], model=ElasticModel)

gibbs_energy_fitting_description = ModelFittingDescription([StepHM, StepSM, StepCPM])
gibbs_energy_fitting_description = ModelFittingDescription([StepCPM, StepSM, StepHM])
26 changes: 13 additions & 13 deletions espei/parameter_selection/fitting_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

class FittingStep():
parameter_name: str
# TODO: does it make sense for one fitting step to use multiple data types? Does this actually happen anywhere?
data_types_read: [str]
# TODO: can we think of a situtation where it makes sense to go to multilpe data types read in a single step?
data_types_read: str
features: [symengine.Expr]
# TODO: does a reference state support list here make sense?
# If we instead make shift_reference_state a part of the API, it would be
Expand Down Expand Up @@ -199,9 +199,9 @@ def get_data_quantities(cls, desired_property: str, fixed_model: Model, fixed_po
# Maybe this is where we introduce the data and feature transforms class methods?
class StepHM(FittingStep):
parameter_name = "GM"
data_types_read = ["HM"]
data_types_read = "HM"
supported_reference_states = ["_MIX", "_FORM"]
features: [symengine.S.One]
features = [symengine.S.One]

# TODO: this function actually does 2 things that should be split up into separate functions:
# 1. Extract data from Dataset objects into an array of raw values
Expand Down Expand Up @@ -300,39 +300,39 @@ def get_data_quantities(cls, desired_property, fixed_model, fixed_portions, data
# TODO: does it make sense to inherit from HM? Do we need an abstract class? Or does fixing the transforms issue and having each implementation be separate be correct?
# TODO: support "" (absolute) entropy reference state?
class StepSM(StepHM):
data_types_read = ["SM"]
features: [v.T]
data_types_read = "SM"
features = [v.T]


# TODO: support "" (absolute) heat capacity reference state?
class StepCPM(StepHM):
data_types_read = ["CPM"]
features: [v.T * symengine.log(v.T), v.T**2, v.T**-1, v.T**3]
data_types_read = "CPM"
features = [v.T * symengine.log(v.T), v.T**2, v.T**-1, v.T**3]



class StepElasticC11(AbstractRKMPropertyStep):
parameter_name = "C11"
data_types_read = ["C11"]
data_types_read = "C11"

class StepElasticC12(AbstractRKMPropertyStep):
parameter_name = "C12"
data_types_read = ["C12"]
data_types_read = "C12"

class StepElasticC44(AbstractRKMPropertyStep):
parameter_name = "C44"
data_types_read = ["C44"]
data_types_read = "C44"

class StepV0(AbstractRKMPropertyStep):
parameter_name = "V0"
data_types_read = ["V0"]
data_types_read = "V0"
features = [symengine.S.One]

class StepLogVA(FittingStep):
# V = V0*exp(VA), to linearize in terms of VA features, we want to fit
# VA = ln(V/V0)
parameter_name = "VA"
data_types_read = ["VM"]
data_types_read = "VM"
features = [v.T, v.T**2, v.T**3, v.T**(-1)]
supported_reference_states = ["", "_MIX"] # TODO: add formation support

Expand Down
41 changes: 11 additions & 30 deletions espei/paramselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
tuplify, recursive_tuplify, interaction_test, endmembers_from_interaction, generate_endmembers
from espei.utils import PickleableTinyDB, sigfigs, extract_aliases
from espei.parameter_selection.fitting_steps import StepHM
from espei.parameter_selection.fitting_descriptions import gibbs_energy_fitting_description

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -155,40 +156,19 @@ def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datase
aicc_feature_factors = aicc_phase_penalty if aicc_phase_penalty is not None else {}
if interaction_test(configuration):
_log.debug('ENDMEMBERS FROM INTERACTION: %s', endmembers_from_interaction(configuration))
fitting_steps = (["CPM_FORM", "CPM_MIX"], ["SM_FORM", "SM_MIX"], ["HM_FORM", "HM_MIX"])

else:
# We are only fitting an endmember; no mixing data needed
fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"])

# create the candidate models and fitting steps
if features is None:
features = OrderedDict([("CPM_FORM", (v.T * symengine.log(v.T), v.T**2, v.T**-1, v.T**3)),
("SM_FORM", (v.T,)),
("HM_FORM", (symengine.S.One,)),
])
# dict of {feature, [candidate_models]}
candidate_models_features = {}
for feature_name, potential_features in features.items():
candidate_models_features[feature_name] = build_redlich_kister_candidate_models(configuration, make_successive(potential_features))

# All possible parameter values that could be taken on. This is some legacy
# code from before there were many candidate models built. For very large
# sets of candidate models, this could be quite slow.
# TODO: we might be able to remove this initialization for clarity, depends on fixed poritions
parameters = {}
for candidate_models in candidate_models_features.values():
for model in candidate_models:
for coef in model:
parameters[coef] = 0

# These is our previously fit partial model from previous steps
# Subtract out all of these contributions (zero out reference state because these are formation properties)
fixed_model = None # Profiling suggests we delay instantiation
fixed_portions = [0]

for desired_props in fitting_steps:
feature_type = desired_props[0].split('_')[0] # HM_FORM -> HM
fitting_descrption = gibbs_energy_fitting_description
for fitting_step in fitting_descrption.fitting_steps:
# TODO: maybe we're losing "_FORM" vs. "_FORM"+"_MIX" data here, not sure if that matters, used to be set in desired props
# is it okay if we also grab "_MIX" data for endmember fitting? It should get filtered out by some configuration filter late I think
desired_props = [fitting_step.data_types_read + refstate for refstate in fitting_step.supported_reference_states]
feature_type = fitting_step.data_types_read
aicc_factor = aicc_feature_factors.get(feature_type, 1.0)
solver_qry = (where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry))
desired_data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry)
Expand All @@ -197,7 +177,7 @@ def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datase
_log.trace('%s: datasets found: %s', desired_props, len(desired_data))
if len(desired_data) > 0:
if fixed_model is None:
fixed_model = Model(dbf, comps, phase_name, parameters={'GHSER'+(c.upper()*2)[:2]: 0 for c in comps})
fixed_model = fitting_descrption.model(dbf, comps, phase_name, parameters={'GHSER'+(c.upper()*2)[:2]: 0 for c in comps})
config_tup = tuple(map(tuplify, configuration))
calculate_dict = get_prop_samples(desired_data, config_tup)
sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, config_tup, phase_name)
Expand All @@ -212,7 +192,8 @@ def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datase
# build the candidate model transformation matrix and response vector (A, b in Ax=b)
feature_matricies = []
data_quantities = []
for candidate_coefficients in candidate_models_features[desired_props[0]]:
feature_sets = build_redlich_kister_candidate_models(configuration, fitting_step.get_feature_sets())
for candidate_coefficients in feature_sets:
# Map coeffiecients in G to coefficients in the feature_type (H, S, CP)
transformed_coefficients = list(map(feature_transforms[feature_type], candidate_coefficients))
if interaction_test(configuration, 3):
Expand All @@ -222,7 +203,7 @@ def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datase
data_quantities.append(data_qtys)

# provide candidate models and get back a selected model.
selected_model = select_model(zip(candidate_models_features[desired_props[0]], feature_matricies, data_quantities), ridge_alpha, weights=weights, aicc_factor=aicc_factor)
selected_model = select_model(zip(feature_sets, feature_matricies, data_quantities), ridge_alpha, weights=weights, aicc_factor=aicc_factor)
selected_features, selected_values = selected_model
parameters.update(zip(*(selected_features, selected_values)))
# Add these parameters to be fixed for the next fitting step
Expand Down

0 comments on commit d315498

Please sign in to comment.