"""Provides model parameter optimization to fit experimental data.
"""
import random
import numpy
import scipy.optimize
from collections import OrderedDict
from .thermo import *
from .utilities import *
[docs]class ITCFit:
"""A class for optimizing model parameters to accurately fit experimental data.
Attributes
----------
model : ITCModel
The model to use for fitting the experimental data.
bounds : dict of tuples
A parameter-name keyed dict of low and high bounds to enforce during fitting (retrieved from the model itself, or explicitly provided by the user).
chisq : float
The most recently evaluated goodness-of-fit chisquare.
"""
def __init__(self, sim, method='simplex', method_args={}, verbose=False):
"""Constructor function for the ITCFit object.
Arguments
---------
sim : ITCSim
The ITCSim object that possesses the experimental data to replicate.
bounds : dict of tuples
A parameter-name keyed dict of low and high bounds to enforce during fitting.
method : string
The optimization algorithm to use.
method_args : dict
Arguments to pass to the optimization algorithm (method specific).
verbose : boolean
Print additional information to the console?
"""
self.sim = sim
self.model = self.sim.model
self.method = method
self.method_args = method_args
self.verbose = verbose
self.chisq = 0.0
# obtain model-defined boundaries to enforce during fitting
self.bounds = dict( (name,self.model.get_param_bounds(name)) for name in self.model.get_param_names() )
def _noisy_bisect(self, f, a, b, fa, fb, tolerance):
# From http://www.johndcook.com/blog/2012/06/14/root-finding-with-noisy-functions/
# Assume a < b, fa = f(a) < 0, fb = f(b) > 0.
if b-a < tolerance:
return (a, b)
mid = 0.5*(a+b)
fmid = f(mid)
if fmid < fa or fmid > fb: # Monotonicity violated. Reached resolution of noise.
return (a, b)
if fmid < 0:
a, fa = mid, fmid
else:
b, fb = mid, fmid
return self._noisy_bisect(f, a, b, fa, fb, tolerance)
[docs] def set_sim(self, sim):
"""Sets the simulator to use for fitting."""
self.sim = sim
self.model = self.sim.model
[docs] def get_sim(self):
"""Returns the simulator used for fitting."""
return self.sim
[docs] def add_bounds(self, param, low=None, high=None):
"""Add low and/or high boundaries for the specified parameter.
Arguments
---------
param : string
The name of the parameter to create the boundary for
low : float
The lower boundary for the parameter, or None for no boundary
high : float
The upper boundary for the parameter, or None for no boundary
Returns
-------
None
Notes
-----
Implementation of parameter boundaries is fit method dependent and can be unpredictable. Use at your own risk, and sparingly.
"""
assert param in self.model.get_param_names()
self.bounds[param] = (low,high)
[docs] def optimize(self, params=[], callback=None, update_fits=False, use_bounds=True ):
"""Optimize the specified parameters.
Arguments
---------
params : dict of strings
The names of the model parameters to optimize.
callback : function
A callback function to call at each optimization step, will be passed a vector of the current parameter values.
update_fits : boolean
Update the dQ_fit attributes of the experiments in the simulator object?
Returns
_______
(dict,float)
A parameter-name keyed dict of optimized values, and the corresponding goodness-of-fit.
"""
if self.verbose:
print("\nitc_fit: Optimizing %s parameters using the %s algorithm\n"%(",".join(params),self.method))
def _printer(x):
print("itc_fit: %s (%f)" %(" ".join(map(str,x)),self.sim.chisq))
else:
def _printer(x):
return
# starting parameter values to restore later
start_params = self.sim.get_model_params().copy()
# initial param guesses as list
x0 = [start_params[p] for p in params]
# the target objective function to minimize
def _target(x,sim):
for i,p in enumerate(params):
sim.set_model_param(p, x[i])
if use_bounds:
m = self._apply_bounds()
if m > 0:
return sim.run(writeback=False)**(1.0+m)
return sim.run(writeback=False)
# optimize parameters
opt = self._fitter( _target, x0, callback )
ret = OrderedDict( (p,opt[0][i]) for i,p in enumerate(params) )
self.sim.set_model_params(**ret)
if update_fits:
self.sim.run()
else: # restore initial parameter values
self.sim.set_model_params(**start_params)
# return the optimized parameters and the chisquare value
return ret,opt[1]
[docs] def estimate(self, params, method='bootstrap', *args, **kwargs ):
"""Wrapper for the two methods of estimating uncertainties in the fitted parameter values
Arguments
--------
params : list of strings
The names of the model parameters to find intervals for.
method : string
The method to use for interval estimation (either "sigma" or "bootstrap")
*args
Positional arguments to pass to either estimate_sigma or estimate_bootstrap
**kwargs
Keyword arguments to pass to either estimate_sigma or estimate_bootstrap
Returns
-------
dict of tuples
A parameter-name keyed dict of tuples consisting of the mean and standard deviation, or high and low values of the provided parameters.
"""
assert method in ['sigma','bootstrap']
if method == 'sigma':
return self.estimate_sigma( params, *args, **kwargs )
else:
return self.estimate_bootstrap( params, *args, **kwargs )
[docs] def estimate_sigma(self, params=[], params_opt=None, sigma=None, stdevs=1, estimate=0.1, rootfinder='bisect', tolerance=0.001 ):
"""Generate high and low parameter value estimates for optimized parameters according to the provided criterion
Arguments
---------
params : list of strings
The names of the model parameters to find intervals for.
params_opt : list of strings
The names of the model parameters to optimize. Setting to None implies that all available model parameters will be optimized (except the parameter that is being evaluated).
sigma : float
The critical chisq value of the overall fit to use for estimating a parameter's expected maximum and minimum (to be used in lieu of the stdevs parameter)
stdevs : int
The number of standard deviations above the best-fit chi-square value to use as the critical cutoff for estimating a parameter's expected maximum and minimum (to be used in lieu of the sigma parameter)
estimate : float
A guess of how large the change from the optimized parameter values should be (up or down) in order to generate the critical chi-square value.
rootfinder : string
The algorithm used to find the root of the target function ("bisect" or "secant")
tolerance : float
The tolerance (in standard deviations) used to find the critical parameter values. Smaller numbers mean a more accurate estimation of a parameter's estimated maximum and minimum values.
Returns
-------
(dict of tuples)
A parameter-name keyed dict of tuples consisting of the high and low estimates for each of the parameters in the "params" argument.
"""
assert rootfinder in ("bisect","secant")
model_params = self.sim.get_model_params()
# starting parameter values to restore later
start_params = self.sim.get_model_params().copy()
if sigma == None: # calculate the expected sigma based on the number of observations
# Andrae, Rene, Tim Schulze-Hartung, and Peter Melchior. "Dos and don'ts of reduced chi-squared." arXiv preprint arXiv:1012.3754 (2010).
sigma = stdevs * (numpy.sqrt( 2.0 / sum([ e.npoints - len(e.skip) for e in self.sim.experiments ]) ))
# the critical chisq is the point where the low and high parameter estimates are obtained
critical_chisq = self.sim.run() + sigma
param_values = {}
for p in params:
param_values[p] = [None,None]
if params_opt == None: # get the model parameters that are to be optimized while the param "p" is gridded
params_opt = list(self.sim.get_model_params().keys())
try: # remove the parameter to be held constant from the list of those to be optimized
params_opt.remove(p)
except ValueError:
pass
def target_function( x ): # return the discrepancy between the critical chisq and the chisq of the best fit when parameter p is fixed to argument x
self.sim.set_model_param(p,x)
return critical_chisq - self.optimize(params_opt)[1]
# find a LOW value for the model parameter "p" that exceeds the critical chi-square value (to serve as a bracketing point for starting the boundary search)
self.sim.set_model_params(**start_params)
estimate_counter,param_values[p][0] = estimate,model_params[p] * (1.0-estimate)
chisq_diff = target_function( param_values[p][0] )
while chisq_diff > 0: # if necessary, decrease the fixed parameter value until we've exceeded the critical chisq
estimate_counter = estimate_counter + estimate
if self.verbose:
print("itc_fit: Lower guess (%f) for parameter \"%s\" is insufficient (%f from critical chisq). Decreasing parameter value to %f." % (param_values[p][0],p,chisq_diff,model_params[p] * (1.0-estimate_counter)))
param_values[p][0] = model_params[p] * (1.0-estimate_counter)
chisq_diff = target_function( param_values[p][0] )
# find the low param value that provides the desired confidence interval
if rootfinder == "secant":
param_values[p][0] = scipy.optimize.brentq(target_function, model_params[p], param_values[p][0], xtol=tolerance)
else:
param_values[p][0] = sum(self._noisy_bisect(target_function, model_params[p], param_values[p][0], sigma, chisq_diff, tolerance))/2.0
# find a HIGH value for the model parameter "p" that exceeds the critical chi-square value (to serve as a bracketing point for starting the boundary search)
self.sim.set_model_params(**start_params)
estimate_counter,param_values[p][1] = estimate,model_params[p] * (1.0+estimate)
chisq_diff = target_function( param_values[p][1] )
while chisq_diff > 0: # if necessary, increase the fixed parameter value until we've exceeded the critical chisq
estimate_counter = estimate_counter + estimate
if self.verbose:
print("itc_fit: Upper guess (%f) for parameter \"%s\" is insufficient (%f from critical chisq). Increasing parameter value to %f." % (param_values[p][1],p,chisq_diff,model_params[p] * (1.0+estimate_counter)))
param_values[p][1] = model_params[p] * (1.0+estimate_counter)
chisq_diff = target_function( param_values[p][1] )
# find the high param value that provides the desired confidence interval
if rootfinder == "secant":
param_values[p][1] = scipy.optimize.brentq(target_function, model_params[p], param_values[p][1], xtol=tolerance)
else:
param_values[p][1] = sum(self._noisy_bisect(target_function, model_params[p], param_values[p][1], sigma, chisq_diff, tolerance))/2.0
# From chapter 2 tutorial data (stdevs=2, rootfinder='secant'):
#{'dG': [-10.727396269988125, -11.070049429908266], 'dCp': [-0.09939296467688882, -0.1524609718306775], 'dH': [-11.493031681092932, -12.022877990152883], 'n': [1.778168415517038, 1.8276370690304506]}
# restore initial parameter values for the next iteration
self.sim.set_model_params(**start_params)
return param_values
[docs] def estimate_bootstrap(self, params=[], bootstraps=100, randomize=0.1, callback=None, logfile=None ):
"""Generate confidence intervals for optimized parameters by bootstrapping (also known as jackknife estimation)
Arguments
---------
params : list of strings
The names of the model parameters to find intervals for.
bootstraps : int
The number of synthetic datasets to refit.
randomize : float
A fraction by which to perturb the starting parameters before optimization.
callback : function
A callback function to call at each optimization step, will be passed a vector of the current parameter values.
logfile : string
A file path to write the optimized parameter values for each bootstrap dataset
Returns
-------
(dict of tuples)
A parameter-name keyed dict of tuples consisting of the mean and standard deviation of the optimized values.
"""
if self.verbose:
print("\nitc_fit: Estimating uncertainty intervals for %s using %i bootstraps\n"%(",".join(params),bootstraps))
# initialize to make sure we have fit data to use
self.sim.run()
# starting parameter values to restore later
start_params = self.sim.get_model_params().copy()
# store the original experimental and fit data for restoration later
dQ_exp,dQ_fit = [],[]
for E in self.sim.experiments:
assert len(E.dQ_exp) == len(E.dQ_fit)
dQ_exp.append( E.dQ_exp[:] )
dQ_fit.append( E.dQ_fit[:] )
# generates a synthetic dataset from the fit and fit residuals
def _make_bootstrap(n,exp,fit):
res = [ exp[i]-fit[i] for i in range(n) ]
return [ f + random.choice(res) for f in exp ]
param_values = OrderedDict( (p,[]) for p in params )
for i in range(bootstraps):
if self.verbose:
print("itc_fit: Bootstrap %i"%(i))
# randomize starting point by user-specifiable amount
for p in params:
self.sim.set_model_param(p,self.sim.get_model_param(p) * (1+(2*random.random()-0.5)*randomize))
# replace the experimental data points with the synthetic data
for j,E in enumerate(self.sim.experiments):
E.dQ_exp = _make_bootstrap(E.npoints,dQ_exp[j],dQ_fit[j])
# re-optimize the parameters using the new, synthetic datasets
optimized,chisq = self.optimize(params)
# append the optimized parameter values to our growing array
for p in optimized:
param_values[p].append( optimized[p] )
if callback != None:
callback(optimized)
if logfile != None:
write_params_to_file(logfile,optimized,header=(i==0),post="%.3f"%(chisq))
# restore initial parameter values for the next iteration
self.sim.set_model_params(**start_params)
# restore original experimental data and fit for the experiments
for i,E in enumerate(self.sim.experiments):
E.dQ_exp = dQ_exp[i]
E.dQ_fit = dQ_fit[i]
# return the mean and standard deviation of the model parameters
return OrderedDict( (p,(numpy.mean(param_values[p]),numpy.std(param_values[p]))) for p in params )
def _apply_bounds(self):
ret = 0
for k,v in self.sim.get_model_params().items():
if self.bounds[k][0] != None and v < self.bounds[k][0]:
self.sim.set_model_param(k, self.bounds[k][0])
ret += numpy.fabs((self.bounds[k][0] - v) / self.bounds[k][0])
if self.verbose:
print("itc_fit: Boundary violation for \"%s\" (%f<%f)"%(k,v,self.bounds[k][0]))
elif self.bounds[k][1] != None and v > self.bounds[k][1]:
self.sim.set_model_param(k, self.bounds[k][1])
ret += fabs((v - self.bounds[k][1]) / self.bounds[k][1])
if self.verbose:
print("itc_fit: Boundary violation for \"%s\" (%f>%f)"%(k,v,self.bounds[k][1]))
return ret
def _fitter(self, func, x0, callback=None):
# wrapper function for a variety of optimization algorithms
# note that some of these aren't fully integrated yet
if self.method == 'simplex':
ret = scipy.optimize.fmin(
func=func,
x0=x0,
args=(self.sim,),
disp=self.verbose,
full_output=True,
callback=callback,
**self.method_args)
elif self.method == 'powell':
ret = scipy.optimize.fmin_powell(
func=func,
x0=x0,
args=(self.sim,),
disp=self.verbose,
full_output=True,
callback=callback,
**self.method_args)
elif self.method == 'tnc':
opt = scipy.optimize.fmin_tnc(
func=func,
x0=x0,
args=(self.sim,),
approx_grad=True,
disp=self.verbose,
**self.method_args)[0]
ret = opt,func(opt,self.sim)
elif self.method == 'bfgs':
ret = scipy.optimize.fmin_l_bfgs_b(
func=func,
x0=x0,
args=(self.sim,),
approx_grad=True,
disp=self.verbose,
**self.method_args)
else:
raise Exception('Unrecognized fitting algorithm')
return ret