Skip to content

Commit

Permalink
Use a file for ESPEI inputs.
Browse files Browse the repository at this point in the history
gh#28
closes PhasesResearchLab#27 

* ESPEI deprecates command line input arguments for a YAML (or JSON, others possible) input file.
* Files are validated using [cerberus](http://docs.python-cerberus.org/en/stable/index.html) with a schema.
* Support for setting chains per parameter and the standard deviation of the chains.
* Include tests for several different possible runs

Almost all of the constraints are handled by cerberus including
* checking for parameter conflicts
* handling enumeration options (e.g. choose either 'linear' or 'exponential' models, validated with regex)
* Checking for filetype compatibility (again, regex)
  • Loading branch information
bocklund authored Sep 18, 2017
1 parent 78b07e6 commit b1272af
Show file tree
Hide file tree
Showing 7 changed files with 386 additions and 121 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include versioneer.py
include espei/_version.py
include espei/input-schema.yaml
20 changes: 20 additions & 0 deletions espei/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,24 @@
__version__ = get_versions()['version']
del get_versions

import os
import yaml
from cerberus import Validator

MODULE_DIR = os.path.dirname(os.path.abspath(__file__))

# extension for iseven
class ESPEIValidator(Validator):
def _validate_iseven(self, iseven, field, value):
""" Test the oddity of a value.
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if iseven and bool(value & 1):
self._error(field, "Must be an even number")

with open(os.path.join(MODULE_DIR, 'input-schema.yaml')) as f:
schema = ESPEIValidator(yaml.load(f))

from espei.paramselect import fit
91 changes: 91 additions & 0 deletions espei/input-schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# core run settings
system: # phase models and input data
type: dict
schema:
phase_models: # describes the CALPHAD models for the phases
type: string
required: True
regex: '.*\.json$'
datasets: # path to datasets. Defaults to current directory.
type: string
required: True

output:
type: dict
default: {}
schema:
verbosity: # integer verbosity level 0 | 1 | 2, where 2 is most verbose.
type: integer
min: 0
max: 2
default: 0
required: True
output_db:
type: string
default: out.tdb
tracefile: # name of the file containing the mcmc chain array
type: string
default: chain.npy
regex: '.*\.npy$'
probfile: # name of the file containing the mcmc ln probability array
type: string
default: lnprob.npy
regex: '.*\.npy$'

## if present, will do a single phase fitting
generate_parameters:
type: dict
schema:
excess_model:
type: string
required: True
regex: 'linear'
ref_state:
type: string
required: True
regex: 'SGTE91'

## if present, will run mcmc fitting
## you must specifiy some kind of input for the parameters.
## Parameters can come from
## 1. a preceding generate_parameters step
## 2. by generating chains from a previous input_db
## 3. by using chains from a restart_chain for phases in an input_db
mcmc:
type: dict
oneof_dependencies:
- 'mcmc.input_db'
- 'generate_parameters'
schema:
mcmc_steps:
type: integer
min: 1
required: True
mcmc_save_interval:
type: integer
default: 20
min: 1
required: True
scheduler: # scheduler to use for parallelization
type: string
default: dask # dask | MPIPool
regex: 'dask|MPIPool'
required: True
input_db: # TDB file used to start the mcmc run
type: string
restart_chain: # restart the mcmc fitting from a previous calculation
type: string
dependencies: input_db
regex: '.*\.npy$'
chains_per_parameter: # even integer multiple of number of chains corresponding to on parameter
type: integer
iseven: True
min: 2
allof:
- required: True
- excludes: restart_chain
chain_std_deviation: # fraction of a parameter for the standard deviation in the walkers
min: 0
allof:
- required: True
- excludes: restart_chain
12 changes: 9 additions & 3 deletions espei/paramselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ def lnprob(params, data=None, comps=None, dbf=None, phases=None, datasets=None,

def fit(input_fname, datasets, resume=None, scheduler=None, run_mcmc=True,
tracefile=None, probfile=None, restart_chain=None, mcmc_steps=1000,
save_interval=100):
save_interval=100, chains_per_parameter=2, chain_std_deviation=0.1):
"""Fit thermodynamic and phase equilibria data to a model.
Parameters
Expand Down Expand Up @@ -652,6 +652,12 @@ def fit(input_fname, datasets, resume=None, scheduler=None, run_mcmc=True,
int (Default value = 1000)
save_interval : int
interval of steps to save the chain to the tracefile.
chains_per_parameter : int
number of chains for each parameter. Must be an even integer greater or
equal to 2. Defaults to 2.
chain_std_deviation : float
standard deviation of normal for parameter initialization as a fraction
of each parameter. Must be greater than 0. Default is 0.1, which is 10%.
Returns
-------
Expand Down Expand Up @@ -759,9 +765,9 @@ def save_sampler_state(sampler):
initial_parameters = np.array(initial_parameters)
logging.debug('Initial parameters: {}'.format(initial_parameters))
ndim = len(initial_parameters)
nwalkers = 2*ndim # walkers must be of size (2n*ndim)
nwalkers = chains_per_parameter*ndim # walkers must be of size (2n*ndim)
initial_walkers = np.tile(initial_parameters, (nwalkers, 1))
walkers = rng.normal(initial_walkers, np.abs(initial_walkers*0.10))
walkers = rng.normal(initial_walkers, np.abs(initial_walkers*chain_std_deviation))

# set up with emcee
import emcee
Expand Down
Loading

0 comments on commit b1272af

Please sign in to comment.