"""Core classes for handling simulated and empirical data.
"""
import uuid
import os
import numpy
from . import MATPLOTLIB_BACKEND
from .thermo import *
from .thermo import _UNITS
from .utilities import savitzky_golay
[docs]class ITCExperimentBase:
"""Provides the essential required elements of an ITC experiment.
Attributes
----------
chisq : float
If a fit has been generated, the reduced chi-squared goodness-of-fit value.
T : float
The experimental temperature (in Kelvin).
Concentrations : list of dicts
The concentrations of the components (in the cell) at each titration point
Notes
-----
Energies are stored in Joules internally, and then converted (if necessary) to the desired unit when retrieved by the user.
"""
def __init__(self, T, V0, injections, dQ, Cell, Syringe, skip=[], dQ_err=[], Q_dil=0.0, cellRef=None, syringeRef=None, title=None, units='J'):
"""Constructor for the ITCExperiment object.
Arguments
---------
T : float
The experimental temperature (in Kelvin).
V0 : float
The volume of the calorimeter's cell, in microliters.
injections : list of floats
The volumes injected at each titration point.
dQ : list of floats
The evolved heat at each injection (in calories, unnormalized)
Cell : dict of floats
The starting concentration of the named components in the cell.
Syringe : dict of floats
The concentration of the named components in the syringe.
skip : list of ints
Titration points to exclude during fitting.
dQ_err : list of floats
List of estimated error in each titration point enthalpy.
Q_dil : float
Heat of dilution for syringe solution.
cellRef : string
The cell reference component.
syringeRef : string
The syringe reference component.
title : string
A descriptive title for the experiment.
units : string
The units of dQ heats.
"""
self.T = T
self.V0 = V0
self.injections = injections
self.npoints = len(self.injections)
self.Cell = Cell
self.Syringe = Syringe
self.skip = skip
self.Q_dil = Q_dil
self.units = units
self._USE_OLD_DILUTION_Q = False # deprecated patch to use old heat of dilution calculation
if title:
self.title = title
else: # assign a pseudounique title
self.title = uuid.uuid4()
# reference components
if cellRef == None:
self.cellRef = list(self.Cell.keys())[0]
else:
assert cellRef in Cell.keys()
self.cellRef = cellRef
if syringeRef == None:
self.syringeRef = list(self.Syringe.keys())[0]
else:
assert syringeRef in Syringe.keys()
self.syringeRef = syringeRef
# initialize the list of dicts used to track the concentrations of the components (in the cell) at each titration point
self.Concentrations = []
for i in range(self.npoints):
self.Concentrations.append({})
for s in Cell:
self.Concentrations[-1][s] = 0.0
for s in Syringe:
self.Concentrations[-1][s] = 0.0
# calculate the total ligand and macromolecule concentration in the cell at each titration point
# this uses the dilution formula described in Microcal's data processing in Origin manual
self.dDQ_conc = [0.0]*self.npoints # fractional concentration of syringe solution in cell to use for dilution calculations
for i in range(self.npoints):
dV = sum(self.injections[0:i])
self.dDQ_conc[i] = (1.0*self.injections[i]/self.V0) + ((1.0*dV/self.V0) * (1.0/(1.0+(dV/(2.0*self.V0)))))
# these are += because it's possible that a component could be in both the syringe and cell solutions
for s in self.Syringe:
self.Concentrations[i][s] += (self.Syringe[s]*self.injections[i]/self.V0) + ((self.Syringe[s]*dV/self.V0) * (1.0/(1.0+(dV/(2.0*self.V0)))))
for s in self.Cell:
self.Concentrations[i][s] += self.Cell[s] * ( (1-(dV/(2.0*self.V0))) / (1.0+(dV/(2.0*self.V0))) )
self.dQ_dil = [0.0]*self.npoints
for i in range(self.npoints):
if self._USE_OLD_DILUTION_Q: # old heat of dilution calculation, based on syringe content
if i==0:
self.dQ_dil[i] = (self.V0/1E6)*(self.Concentrations[i][self.syringeRef])*self.Q_dil
else:
self.dQ_dil[i] = (self.V0/1E6)*(self.Concentrations[i][self.syringeRef]-self.Concentrations[i-1][self.syringeRef])*self.Q_dil
else: # heat of dilution will be proportional to the difference in concentration between syringe solution and cell solutions
if i == 0:
self.dQ_dil[i] = (1.0 -self.dDQ_conc[i]) * self.Q_dil
else:
self.dQ_dil[i] = (1.0 -self.dDQ_conc[i] -self.dDQ_conc[i-1]) * self.Q_dil
assert len(dQ) == self.npoints
# convert raw data (in calories) to joules (note that this is not normalized per mol of injectant!)
self.dQ_exp = numpy.array([J_from_cal(dQ[i]) for i in range(self.npoints)],dtype='d')
self.dQ_fit = None
self.spline = None
if len(dQ_err) == 0:
self.dQ_err = None
else:
assert len(dQ_err) == self.npoints
self.dQ_err = numpy.array([J_from_cal(dQ_err[i]) for i in range(self.npoints)],dtype='d')
self.chisq = None
self.initialized = False # will be set to True by implementors of this base class
def __str__(self):
"""Stringify the experiment to be suitable for display to the user
Parameters
----------
None
Returns
-------
string
String containing the description and other experimental info.
"""
ret = "Title: %s\n"%(self.title)
if (self.chisq != None):
ret+= "Chisq: %f\n"%(self.chisq)
ret+= "Temperature: %0.2f K\n"%(self.T)
ret+= "%i Injections (%i skipped)\n"%(self.npoints,len(self.skip))
ret+= "Dilution enthalpy: %.3E\n"%(self.Q_dil)
ret+= "Cell components:\n"
for s in self.Cell:
ret+="\t%s (%.3E M)"%(s,self.Concentrations[0][s])
if s == list(self.Cell.keys())[0]:
ret+=" (reference)"
ret+="\n"
ret+= "Syringe components:\n"
for s in self.Syringe:
ret+="\t%s (%.3E M)"%(s,self.Concentrations[0][s])
if s == list(self.Syringe.keys())[0]:
ret+=" (reference)"
ret+="\n"
return ret
[docs] def change_component_name(self,old_name,new_name):
"""Update the name of a component in the experiment to the specified one.
Arguments
---------
old_name : string
The name of the component in the experiment object to update
new_name : string
The new name of the component to use
Returns
-------
None
Notes
-----
Because syringe and cell components are referred to by name, problems can arise where the component is called by one name in the model, and another in the experiment.
This method allows the user to update an experimental component by name to match the one used in the model, e.g. from the specific "Tryptophan" specified in the experiment file to the generic "Ligand" used in the model.
"""
if new_name in self.Cell:
#raise Warning("Attempted to change component name to one that already exists in the experiment.")
return
if new_name in self.Syringe:
#raise Warning("Attempted to change component name to one that already exists in the experiment.")
return
for i in range(self.npoints):
self.Concentrations[i][new_name] = self.Concentrations[i][old_name]
del self.Concentrations[i][old_name]
if old_name in self.Cell:
self.Cell[new_name] = self.Cell[old_name]
del self.Cell[old_name]
if old_name in self.Syringe:
self.Syringe[new_name] = self.Syringe[old_name]
del self.Syringe[old_name]
if self.cellRef == old_name:
self.cellRef = new_name
if self.syringeRef == old_name:
self.syringeRef = new_name
[docs] def make_plot(self, residuals=True, hardcopy=False, hardcopydir='.', hardcopyprefix='', hardcopytype='png'):
"""Generate a plot of the experimental data, and the fit if present.
Arguments
----------
residuals: boolean
Includes a plot of fit residuals if a fit is present.
hardcopy : boolean
Writes the plot to a file instead of displaying to screen.
hardcopydir : string
The directory to write the hardcopy to.
hardcopyprefix : string
A prefix to append to the experiment's title when generating the output plot filename.
hardcopytype : string
The output format for the plot (availability is dependent upon what backends matplotlib was compiled with).
Returns
-------
None
"""
if not self.initialized:
raise Exception("No data to plot. If experiment is synthetic, call sim.run() first.")
try:
matplotlib.get_backend()
except:
if MATPLOTLIB_BACKEND != None:
import matplotlib
matplotlib.use(MATPLOTLIB_BACKEND)
import matplotlib.pyplot as pyplot
if self.dQ_fit is not None and residuals:
fig, (ax1, ax2) = pyplot.subplots(2, gridspec_kw={"height_ratios": [3, 1]})
ax2.set_ylabel("Residual (%s)"%(self.units))
ax2.set_xlabel("%s / %s"%(self.syringeRef,self.cellRef))
ax2.axhline(y=0.0, c='#000000',lw=1.0, ls="--")
fig.tight_layout()
else:
fig, ax1 = pyplot.subplots()
ax1.set_title(self.title)
ax1.set_ylabel("%s/mol of %s"%(self.units,self.syringeRef))
ax1.set_xlabel("%s / %s"%(self.syringeRef,self.cellRef))
# For convention, normalize the heat evolved as per mol of injected reference ligand
tmp_rat = [ self.Concentrations[i][self.syringeRef]/self.Concentrations[i][self.cellRef] for i in range(self.npoints) if i not in self.skip ]
tmp_exp = [ convert_from_J(self.units,self.dQ_exp[i])/self.Syringe[self.syringeRef]/self.injections[i] for i in range(self.npoints) if i not in self.skip ]
if self.dQ_err is not None:
tmp_err = [ convert_from_J(self.units,self.dQ_err[i]) for i in range(self.npoints) if i not in self.skip ]
ax1.errorbar(tmp_rat,tmp_exp,yerr=tmp_err,c='#000000',fmt='s')
if self.spline is not None:
tmp_spl = [ convert_from_J(self.units,self.spline[i])/self.Syringe[self.syringeRef]/self.injections[i] for i in range(self.npoints) if i not in self.skip ]
ax1.plot(tmp_rat,tmp_spl,c='g')
if len(self.skip) > 0:
tmp_xsk = [ self.Concentrations[i][self.syringeRef]/self.Concentrations[i][self.cellRef] for i in self.skip ]
tmp_ysk = [ convert_from_J(self.units,self.dQ_exp[i])/self.Syringe[self.syringeRef]/self.injections[i] for i in self.skip ]
ax1.errorbar(tmp_xsk,tmp_ysk,yerr=0.0,c='g',fmt='s')
if self.Q_dil != 0:
tmp_xdl = [ self.Concentrations[i][self.syringeRef]/self.Concentrations[i][self.cellRef] for i in range(self.npoints) if i not in self.skip ]
tmp_ydl = [ convert_from_J(self.units,self.dQ_dil[i])/self.Syringe[self.syringeRef]/self.injections[i] for i in range(self.npoints) if i not in self.skip ]
ax1.plot(tmp_xdl,tmp_ydl,c='b')
if self.dQ_fit is not None:
tmp_fit = [ convert_from_J(self.units,self.dQ_fit[i])/self.Syringe[self.syringeRef]/self.injections[i] for i in range(self.npoints) if i not in self.skip ]
ax1.plot(tmp_rat,tmp_fit,c='r')
if residuals:
ax2.errorbar(tmp_rat,[y1-y2 for y1,y2 in zip(tmp_exp,tmp_fit)],yerr=tmp_err,c='r',fmt='s')
if residuals and len(self.skip) > 0:
tmp_fit = [ convert_from_J(self.units,self.dQ_fit[i])/self.Syringe[self.syringeRef]/self.injections[i] for i in self.skip ]
ax2.errorbar(tmp_xsk,[y1-y2 for y1,y2 in zip(tmp_ysk,tmp_fit)],yerr=0.0,c='g',fmt='s')
if hardcopy:
fig.savefig( os.path.join(hardcopydir,"%s%s.%s"%(hardcopyprefix,self.title,hardcopytype)), bbox_inches='tight')
pyplot.close(fig)
else:
pyplot.show()
[docs] def export_to_file(self,path,units='cal',full=False):
"""Export the attributes of the experiment to a file.
Arguments
---------
path : string
The filesystem path to write the output to.
units : string
The units to use in the export, must be either "J" for Joules (the default), or "cal" for calories.
Returns
-------
None
Notes
-----
Most instruments (for historical reasons) report binding enthalpies in calories. Thus, this is the default behavior for this method.
"""
from datetime import datetime
assert units in _UNITS
h = open(path,'w')
h.write("# itcsim export of %s\n#\n"%(self.title))
h.write("# Date %s\n"%(datetime.ctime(datetime.today())))
h.write("# units %s\n"%(units))
h.write("# T %.5f\n"%(self.T))
h.write("# V0 %.5f\n"%(self.V0))
for s in self.Cell:
h.write("# Cell %s %.5E\n"%(s,self.Cell[s]))
for s in self.Syringe:
h.write("# Syringe %s %.5E\n"%(s,self.Syringe[s]))
h.write("# Q_dil %.5E\n"%(self.Q_dil))
h.write("# skip %s\n"%(",".join(map(str,self.skip))))
if full:
h.write("#\n# Ivol dQ_exp dQ_err_exp dQ_fit dQ_spline skipped %s %s\n"%("\t".join(self.Cell.keys()),"\t".join(self.Syringe.keys())))
else:
h.write("#\n# Ivol dQ_exp\n")
if self.dQ_fit is None:
fit = [0.0]*self.npoints
else:
fit = self.dQ_fit
if self.spline is None:
spline = [0.0]*self.npoints
else:
spline = self.spline
if self.dQ_err is None:
dQ_err = [0.0]*self.npoints
else:
dQ_err = self.dQ_err
for i in range(self.npoints):
cell = ["%.5E"%(self.Concentrations[i][s]) for s in self.Cell]
syringe = ["%.5E"%(self.Concentrations[i][s]) for s in self.Syringe]
if full:
h.write("%.5f %.5E %.5E %.5E %.5E %i %s %s\n"%(
self.injections[i],
convert_from_J(units,self.dQ_exp[i]),
convert_from_J(units,dQ_err[i]),
convert_from_J(units,dQ_fit[i]),
convert_from_J(units,spline[i]),
(i in self.skip),
"\t".join(cell),
"\t".join(syringe)
))
else:
h.write("%.5f %.5f\n"%(self.injections[i],convert_from_J('cal',self.dQ_exp[i])) )
h.close()
[docs] def get_chisq(self, Q, writeback=False):
"""Calculate the goodness-of-fit between the provided data and the experimental data.
Arguments
---------
Q : list of floats
The predicted total heat at each injection point.
writeback : boolean
Update the experiment's simulated heat attribute (dQ_fit) with the provided Qs, as well as the chisq value?
Returns
-------
float
The goodness of the fit, as a reduced chi-square or as a sum-of-squares if a experimental error in dQ was not provided.
"""
dV = 0.0
for i in range(self.npoints):
dV += self.injections[i]
Q[i] *= self.V0 * self.Concentrations[i][self.cellRef] # normalize total heat content to macromolecule concentration in the cell volume
# obtain the change in cell heat between each titration point
dQ = [0.0]*self.npoints
for i in range(self.npoints):
if i==0:
dQ[i] = Q[i] + ( (self.injections[i]/self.V0)*(Q[i]/2.0) )
else:
dQ[i] = Q[i] + ( (self.injections[i]/self.V0)*((Q[i]+Q[i-1])/2.0) ) - Q[i-1]
dQ[i] += self.dQ_dil[i] # add heat of dilution
if self.dQ_err is None:
dQ_err = [1.0]*self.npoints
else:
dQ_err = self.dQ_err
# calculate reduced chi-square / SOS value using error estimate
chisq = sum([(self.dQ_exp[i] - dQ[i])**2 / dQ_err[i]**2 for i in range(self.npoints) if i not in self.skip])
chisq /= (self.npoints -len(self.skip)) # reduced chi-square corrected for n degrees of freedom
if writeback:
self.dQ_fit = dQ[:]
self.chisq = chisq
return chisq
[docs]class ITCExperiment(ITCExperimentBase):
"""Provides splining for empirical ITC data.
Attributes
----------
spline : list of floats
The smoothed spline (if any) to the experimental enthalpies, in Joules.
"""
def __init__(self, spline_pts=7, spline_order=1, *args, **kwargs ):
"""The constructor for the empirical ITCExperiment class
Arguments
---------
spline_pts : int
Number of points to use in the Savitsky-Golay filter used to estimate errors in experimental enthalpies.
spline_order : int
Order of the Savitsky-Golay filter.
*args
Positional arguments to pass along to ITCExperimentBase.
**kwargs
Keyword arguments to pass along to ITCExperimentBase.
"""
ITCExperimentBase.__init__(self,*args,**kwargs)
if self.dQ_err is None: # estimate errors by fitting spline
counter,tmp = 0,[0]*self.npoints
for i in range(self.npoints):
if i not in self.skip:
tmp[i] = counter
counter += 1
spl = savitzky_golay([self.dQ_exp[i] for i in range(self.npoints) if i not in self.skip], spline_pts, spline_order )
err = numpy.std([self.dQ_exp[i] - spl[tmp[i]] for i in range(self.npoints) if i not in self.skip])
self.dQ_err = [err]*self.npoints
self.spline = [spl[tmp[i]] for i in range(self.npoints)]
self.initialized = True
[docs]class ITCExperimentSynthetic(ITCExperimentBase):
"""Provides the ability to save simulated data for display or re-fitting.
Attributes
----------
noise : float
The standard deviation (in percent) of normally-distributed noise to add to the pure simulated total evolved heat.
initialized: boolean
A flag that is set once the class's dQ_fit attribute has been set (see notes).
Notes
-----
The first time the get_chisq() method of this class is called, the "experimental" dQ attribute is populated with the per-injection heats simulated by the model (+ the amount of specified noise).
Subsequent calls will return the actual reduced chi-square difference between the experiment's calculated dQ and the model predictions, as expected.
If noise is set to 0 (or None), then the get_chisq() method of this class will always return 1.0, as without realistic noise, the reduced chisquare is meaningless.
"""
def __init__(self, injections, noise=None, *args, **kwargs):
"""The constructor for the ITCExperimentSynthetic class
Arguments
---------
injections : list of floats
The injection volumes (in uL) for each point in the titration.
noise : float
The standard deviation (in percent) of normally-distributed noise to add to the pure simulated total evolved heat.
*args
Positional arguments to pass along to ITCExperimentBase
**kwargs
Keyword arguments to pass along to ITCExperimentBase
"""
super().__init__(injections=injections,dQ=[0.0]*len(injections),*args,**kwargs)
if noise == None:
self.noise = None
else:
self.noise = convert_to_J(self.units,noise)*self.Syringe[self.syringeRef]
self.initialized = False
[docs] def get_chisq(self, Q, writeback):
"""Monkeypatches the parent ITCExperimentBase's get_chisq() class method to update the class's dQ_exp attribute the first time the method is called.
Arguments
---------
Q : list of floats
The predicted total heat at each injection point.
writeback : boolean
Update the experiment's simulated heat attribute (dQ_fit) with the provided Qs?
Returns
-------
float
The goodness of the fit, as a reduced chi-square, unless the class's noise attribute is zero or none in which case 1.0 is returned (see class notes).
"""
if not self.initialized:
if self.noise:
self.dQ_err = [self.noise]*self.npoints
ret = ITCExperimentBase.get_chisq(self, Q[:], writeback=True)
self.dQ_exp = [numpy.random.normal(self.dQ_fit[i],self.noise) for i in range(self.npoints)]
elif self.noise == 0 or self.noise == None:
ITCExperimentBase.get_chisq(self, Q[:], writeback=True)
self.chisq = 1.0
self.dQ_exp = self.dQ_fit[:]
ret = 1.0
self.initialized = True
return ret
if self.noise:
return ITCExperimentBase.get_chisq(self, Q[:], writeback)
else:
ITCExperimentBase.get_chisq(self, Q[:], writeback)
return 1.0