Source code for itcsimlib.mass_spec

"""Extension of Ising classes for fitting mass spectrometry data.


"""

import os
import sys
import uuid
import numpy

from .itc_experiment import ITCExperimentBase
from .model_ising import Ising


_MATPLOTLIB_BACKEND = None #None for default
_FILE_COMMENT = ["#"]
_FILE_KEYPAIR = ["@"]

[docs]class MSExperiment(ITCExperimentBase):
	"""A class that represents a specific mass spec experiment, i.e. weights of different stoichiometry populations

	Attributes
	----------
	chisq : float
		If a fit has been generated, the reduced chi-squared goodness-of-fit value.
	T : float
		The experimental temperature (in Kelvin).
	Lattice : string
		The name of the lattice component (if it exists, read from the experiment file)
	Ligand : string
		The name of the ligand component (if it exists, read from the experiment file)
	Concentrations : list of dicts
		The concentrations of the components at each titration point
	PopIntens : ndarray
		The normalized intensities of each lattice+ligand stoichiometry at each set of component concentrations
	PopSigmas : ndarray
		The uncertainty in the measured intensity of each lattice+ligand stoichiometry at each set of component concentrations
	PopFits	: ndarray
		The fitted intensities (if calculated) of each lattice+ligand stoichiometry at each set of component concentrations

	Notes
	-----
		This class abuses several of the usual ITCExperimentBase attributes in a rather dirty fashion - it won't as a simple replacement for many itcsimlib routines, but basic fitting should be OK
	"""

	def __init__(self, path, T=298.15, sigma=0.05, title=None, lattice_name="Lattice", ligand_name="Ligand"):
		"""Constructor for the MSExperiment object.

		Arguments
		---------
		path : string
			A path to a file containing experimental populations in the format "[Lattice] [Ligand] [population state 0] [population state 1]..."
		T : float
			An uncertainty in the measured population state abundances (default is 5%).
		sigma : float
			An uncertainty in the measured population state abundances (default is 5%). If None, read uncertainties from the experimental file.
		title : string
			A descriptive title for the experiment (default is the trimmed filename).
		"""
		
		assert os.path.isfile(path)

		# relevant file parameters. TODO: replace using getattr with defaults?
		keypairs = {
			"Lattice":lattice_name,
			"Ligand":ligand_name,
			"Temperature":T,
			"Error":sigma,
			"Title":title,
		}

		# dummy vars to execute the ITCExperimentBase base class constructor.
		V0, injections, dQ = 1.0, [1.0], [1.0]
		Cell, Syringe = {keypairs['Lattice']:1.0},{keypairs['Ligand']:1.0}
		ITCExperimentBase.__init__(self, keypairs['Temperature'], V0, injections, dQ, Cell, Syringe)

		# reset relevant attributes
		self.path = path
		self.Concentrations = []
		self.npops, self.npoints = None, 0

		data = []
		with open(path) as fh:
			for i, line in enumerate(fh):

				# ignore empty and comment lines
				if line.strip() == "":
					continue
				elif line[0] in _FILE_COMMENT: # comment
					if line[1] in _FILE_KEYPAIR: # convert "#@" to "@" to play nice with some plotting scripts
						line = line[1:]
					else:
						continue

				if line[0] in _FILE_KEYPAIR: # set a variable
					tmp = line.split()
					try:
						assert len(tmp) > 2
					except AssertionError:
						raise Exception("Found malformed experiment parameter specification at line %i, should be of the form \"@param = value\""%(i+1))

					try:
						assert tmp[0][1:] in keypairs
					except AssertionError:
						raise Exception("Found unknown parameter specification \"%s\" at line %i."%(tmp[0][1:],i+1))
					
					try:
						if type(keypairs[tmp[0][1:]]) is str or keypairs[tmp[0][1:]] is None: # title
							keypairs[tmp[0][1:]] = " ".join(tmp[2:])
						else:
							keypairs[tmp[0][1:]] = float(tmp[2])
					except ValueError:
						raise Exception("Found malformed parameter value (could not convert \"%s\" to float) at line %i"%(tmp[2],i+1))
					continue

				arr = line.split()
				if len(arr) < 3:
					raise Exception("Found malformed titration point (less than three columns) at line %i"%(i+1))		
				elif self.npops == None:
					self.npops = len(arr) -2
				elif self.npops != len(arr) -2:
					raise Exception("Found malformed titration point (inconsistent number of columns) at line %i"%(i+1))
				
				try:
					tmp = list(map(float,arr))
				except ValueError:
					raise Exception("Found malformed titration point (could not convert column values to floats) at line %i"%(i+1))

				data.extend(tmp[2:]) # strip the lattice/ligand concentration columns

				self.Concentrations.append({})
				self.Concentrations[-1][keypairs['Lattice']] = tmp[0]
				self.Concentrations[-1][keypairs['Ligand']] = tmp[1]

				self.npoints += self.npops

		if keypairs['Title'] is None:
			self.title = os.path.splitext(os.path.basename(self.path))[0]
		else:
			self.title = keypairs['Title']
		
		self.lattice_name, self.ligand_name = keypairs['Lattice'], keypairs['Ligand']

		if keypairs['Error'] is None:
			try:
				assert self.npoints % 2 == 0
				assert min(data[int(self.npoints/2):]) > 0.0
			except AssertionError:
				raise Exception("If sigma == None, must provide nonzero experimental uncertainties in the experimental file.")
				
			self.npoints = int(self.npoints/2) # first half of points matrix are experimental values, second half are uncertainties
			self.Concentrations = self.Concentrations[:self.npoints] # discard the duplicated concentration columns for sigmas
			self.PopIntens = numpy.array(data[:self.npoints]).reshape((int(self.npoints/self.npops),self.npops))
			self.PopSigmas = numpy.array(data[self.npoints:]).reshape((int(self.npoints/self.npops),self.npops))
		else:
			self.sigma = keypairs['Error']
			self.PopIntens = numpy.array(data).reshape((int(self.npoints/self.npops),self.npops))
			self.PopSigmas = numpy.full(self.PopIntens.shape,self.sigma)

		# normalize intensities to 1, accordingly scale their sigmas
		totals = self.PopIntens.sum(axis=1,keepdims=True)
		self.PopIntens /= totals
		self.PopSigmas /= totals

		# precompute variance (s**2) from sigmas
		self.PopSigmas = numpy.square(self.PopSigmas)
		self.sigma = numpy.sqrt(numpy.mean(self.PopSigmas))

		self.PopFits = numpy.zeros(self.PopIntens.shape)

		self.chisq = None
		self.initialized = True

	def __str__(self):
		"""Stringify the experiment to be suitable for display to the user
		
		Parameters
		----------
		None
		
		Returns
		-------
		string
			String containing the description and other experimental info.
		"""
		
		ret = "Title: %s\n"%(self.title)
		if (self.chisq != None):
			ret+= "Current chisq: %f\n"%(self.chisq)
		ret+= "Temperature: %0.2f K\n"%(self.T)
		ret+= "Titration points: %i\n"%(self.npoints/self.npops)
		ret+= "Stoichiometries: %i\n"%(self.npops)
		ret+= "Components:\n"
		ret+= "\tLattice: \"%s\"\n"%(self.lattice_name)
		ret+= "\tLigand: \"%s\"\n"%(self.ligand_name)
		return ret
	
[docs]	def make_plot(self, hardcopy=False, hardcopydir='.', hardcopyprefix='', hardcopytype='png'):
		"""Generate a stacked plot of the experimental populations, the fitted populations, and the residuals between the two.

		Arguments
		----------
		hardcopy : boolean
			Display the fit to the screen, or write it to a file?
		hardcopydir : string
			The directory to write the hardcopy to.
		hardcopyprefix : string
			A prefix to append to the experiment's title when generating the output plot filename.
		hardcopytype : string
			The output format for the plot (availability is dependent upon what backends matplotlib was compiled with).

		Returns
		-------
		None
		"""

		if _MATPLOTLIB_BACKEND != None:
			import matplotlib
			matplotlib.use(_MATPLOTLIB_BACKEND)
		import matplotlib.pyplot as pyplot

		if hardcopy: fig = pyplot.figure()

		pyplot.clf()
		pyplot.title(self.title)
		pyplot.ylabel("Experimental - Fit Abundance")
		pyplot.xlabel("Mass Populations")
		
		ax1 = pyplot.subplot(3,1,1)
		ax2 = pyplot.subplot(3,1,2)
		ax3 = pyplot.subplot(3,1,3)

		x_points = int(self.npoints/self.npops)

		xax_positions,xax_labels = [],[]
		width,space,left = 0.25,0.5,0.0
		for i in range(self.npops):
			bars1 = ax1.bar( [left + (j*width) for j in range(x_points)], [self.PopIntens[j][i] for j in range(x_points)], width=width, edgecolor='r' )
			bars2 = ax2.bar( [left + (j*width) for j in range(x_points)], [self.PopFits[j][i] for j in range(x_points)], width=width, edgecolor='r' )
			bars3 = ax3.bar( [left + (j*width) for j in range(x_points)], [self.PopIntens[j][i] - self.PopFits[j][i] for j in range(x_points)], width=width, edgecolor='r' )
			xax_positions.append( left + (x_points*width)/2.0 )
			xax_labels.append("%i"%i)
			left += (x_points*width)+space
	
			for j in range(x_points):
				color = 1.0 - (float(j) / x_points)
				bars1[j].set_color( (color, color, 1) )	
				bars2[j].set_color( (color, 1, color) )	
				bars3[j].set_color( (1, color, color) )

				bars1[j].set_edgecolor( (0,0,0) )	
				bars2[j].set_edgecolor( (0,0,0) )	
				bars3[j].set_edgecolor( (0,0,0) )	
		
		ax1.set_ylabel("Experimental")
		ax1.set_xticks( xax_positions )
		ax1.set_xticklabels( xax_labels)
		ax2.set_ylabel("Fit")
		ax2.set_xticks( xax_positions )
		ax2.set_xticklabels( xax_labels)
		ax3.set_ylabel("Residuals")
		ax3.set_xticks( xax_positions )
		ax3.set_xticklabels( xax_labels)
			
		pyplot.draw()

		if hardcopy:
			fig.savefig( os.path.join(hardcopydir,"%s%s.%s"%(hardcopyprefix,self.title,hardcopytype)), bbox_inches='tight')
			pyplot.close(fig)
		else:
			pyplot.show()

[docs]	def make_population_plot(self, dataset='fit', hardcopy=False, hardcopydir='.', hardcopyprefix='', hardcopytype='png'):
		assert dataset in ("fit", "experimental", "residuals")

		if not self.initialized and (dataset=="fit" or dataset=="residuals"):
			raise AssertionError("Fit data has not yet been generated for this experiment")

		if _MATPLOTLIB_BACKEND != None:
			import matplotlib
			matplotlib.use(_MATPLOTLIB_BACKEND)
		
		import matplotlib.pyplot as pyplot
		from mpl_toolkits.mplot3d import axes3d, Axes3D
		
		pyplot.clf()
		fig = pyplot.figure()
		ax = Axes3D(fig)
#		ax = fig.add_subplot(1, 1, 1, projection='3d') matplotlib version issue

		xs = range(int(self.npoints / self.npops)) # number of concentrations
		for i in range(self.npops): # iterating over each configuration 

			ys_fit = numpy.array([pop[i] for pop in self.PopFits])
			ys_exp = numpy.array([pop[i] for pop in self.PopIntens])
			
			if dataset == "fit":
				ys = ys_fit
			elif dataset == "experimental":
				ys = ys_exp
			else:
				ys = ys_exp - ys_fit

			# z-order doesn't actually do anything afaik, it's just here to show I tried
			ax.bar(xs, ys, zs=i, zdir='y', zorder=(self.npops-i), color=pyplot.cm.jet(1.0 * i / self.npops), alpha=0.8)

		ax.set_title("%s:(%s)"%(self.title, dataset))
		ax.set_xlabel('Titration Point')
		ax.set_ylabel('Stoichiometry')
		ax.set_zlabel('Abundance')
		ax.set_zlim([0,1])
		pyplot.draw()

		if hardcopy:
			fig.savefig( os.path.join(hardcopydir,"%s%s.%s"%(hardcopyprefix,self.title,hardcopytype)), bbox_inches='tight')
			pyplot.close(fig)
		else:
			pyplot.show()

[docs]	def export_to_file(self, path):
		"""Export the experimental data and any fit to the specified file path.

		Arguments
		---------
		path : string
			The filesystem path to write the output to.

		Returns
		-------
		None			
		"""
		fh = open(path, 'w')
		
		fh.write("# %s"%self.title)
		fh.write("# Experimental data\n")
		for i in range(int(self.npoints/self.npops)):
			fh.write("%0.3E\t%0.3E\t%s\n" % (
				self.Concentrations[i][self.lattice_name],
				self.Concentrations[i][self.ligand_name],
				"\t".join(["%f"%f for f in self.PopIntens[i]])))

		fh.write("# Fitted data\n")
		for i in range(int(self.npoints/self.npops)):
			fh.write("%0.3E\t%0.3E\t%s\n" % (
				self.Concentrations[i][self.lattice_name],
				self.Concentrations[i][self.ligand_name],
				"\t".join(["%f"%f for f in self.PopFits[i]])))

		fh.close()
			
[docs]	def get_chisq(self, pops, writeback=False):
		"""Calculate the chi-square goodness-of-fit between the experimental population abundances and the fitted ones.

		Arguments
		---------
		pops : ndarray
			The normalized abundances of each lattice+ligand stoichiometries at each of the provided component concentrations.
		writeback : boolean
			Update the experiment's simulated heat attribute (dQ_fit) with the provided Qs?
			
		Returns
		-------
		float
			The goodness of the fit, as a reduced chi-square.

		Notes
		-----
			This method takes advantage of the fact that ITCSim doesn't inspect the data that is returned by the model, and instead lets the associated experiment handle the goodness-of-fit.
			The only caveat of course is that model must return the same number of lattice+ligand stoichiometries as are present in the experimental results.
			Variances (sigma**2) must have been precomputed as self.PopSigmas (perhaps should be self.PopVariances?)
		"""

		assert self.PopIntens.shape == pops.shape

		self.chisq = numpy.sum(numpy.square(self.PopIntens - pops) / self.PopSigmas) / self.PopIntens.size

		if writeback:
			self.PopFits = pops		

		return self.chisq

[docs]class MSExperimentSynthetic(MSExperiment):
	"""A MSExperiment-derived class that can be used to simulate a mass spec experiment"""
		
	def __init__(self, lattice_concs, ligand_concs, T=298.15, noise=0.02, title=None, lattice_name="Lattice", ligand_name="Ligand"):
		"""Constructor for the MSExperimentSynthetic object.

		Arguments
		---------
		lattice_concentrations : list of floats
			Concentrations of the lattice component (in M)
		lattice_concentrations : list of floats
			Concentrations of the ligand component (in M)
		T : float
			The temperature of the experiment to simulate.
		noise : string
			The magnitude of noise to add to the simulated data.
		title :
			The title to assign to the experiment
		"""

		assert len(lattice_concs) == len(ligand_concs)

		self.sigma = noise
		if title is not None:
			self.title = title
		else:
			self.title = uuid.uuid4()

		self.lattice_name, self.ligand_name = lattice_name, ligand_name

		# patches to play nice with the ITCExperimentBase base class constructor. Do we even need this?
		V0,injections,dQ = 1.0,[1.0],[1.0]
		Cell,Syringe = {self.lattice_name:1.0},{self.ligand_name:1.0}
		self.path = None

		ITCExperimentBase.__init__(self, T, V0, injections, dQ, Cell, Syringe, title=self.title)

		# reset key attributes now
		self.Concentrations = [{self.lattice_name:lattice_concs[i],self.ligand_name:ligand_concs[i]} for i in range(len(lattice_concs))]

		self.initialized = False
		self.chisq = None

	def __str__(self):
		if not self.initialized:
			return "Synthetic experiment has not yet been generated - attach to a simulator and run to populate"
		return MSExperiment.__str__(self)

[docs]	def get_chisq(self, pops, writeback=False):
		"""Extends the base MSExperiment class to overwrite the PopFits attribute on the first call"""

		if not self.initialized:
			self.PopIntens = numpy.absolute(numpy.random.normal(pops,self.sigma))
			self.PopSigmas = numpy.full(self.PopIntens.shape,self.sigma**2)
			self.PopFits = numpy.zeros(self.PopIntens.shape)
			self.npoints, self.npops = self.PopIntens.shape
			self.npoints = self.npoints*self.npops
			self.initialized = True

		return MSExperiment.get_chisq(self, pops, writeback)


[docs]class MSModel(Ising):
	"""
	Class used to convert an existing Ising-based binding model to one that returns lattice+ligand stoichiometry abundances suitable for fitting mass spec data.
	
	Note #1: Inherits all attributes of the provided class instance, serves as a passthru using the provided class' set_energies().
	Note #2: If the provided class instance has overwritten any of the Ising class methods (other than set_energies), you're best off not using this convertor class."""

	def __init__(self,model):
		
		def _recursor(base, base_list):
			if base.__bases__ == ():
				return []
			else:
				for b in base.__bases__:
					base_list.append( b.__name__ )
					_recursor( b, base_list)
				return base_list

		# ensure that an Ising-based model has been in fact passed
		assert "Ising" in _recursor(model.__class__, [])

		# copy all of the (initialized) parent model attributes
		self.__dict__ = model.__dict__.copy()
		self.model = model

		self.__doc__ = "A model adapted from base type \"%s.%s\" for fitting mass spectrometric population data.\n\nOriginal docstring:\n%s"%(self.model.__module__,self.model.__class__.__name__,self.model.__doc__) 

[docs]	def set_energies(self,T0,T):
		"""Update the parent model parameters with whatever we currently have, and set the parent model config energies"""
		self.units = self.model.units
		self.model.params = self.params
		self.model.set_energies(T0,T)

[docs]	def Q(self,T0,T,concentrations):
		"""Return a 2D numpy array consisting of the base model's relative stoichiometries at each of the provided component concentrations.
		
		Arguments
		---------
		T0 : float
			The reference temperature of the simulation.
		T : float
			The temperature of the experiment to simulate.
		concentrations : list of dicts
			The concentrations of each component at each titration point.
		
		Returns
		-------
		ndarray
			The normalized abundances of each lattice+ligand stoichiometries at each of the provided component concentrations.
		"""

		# set the energies of this model's configs from the base model
		self.set_energies(T0,T)
		
		ret = numpy.zeros((len(concentrations),self.model.nsites+1))
		for i,c in enumerate(concentrations):
			
			# set the probabilities (weights) for all configurations
			self.model.set_probabilities(c[self.lattice_name],c[self.ligand_name],T)
			
			# for each stoichiometry, add up the configuration weights
			for j in range(self.model.nconfigs):
				ret[i][self.model.bound[j]] += self.model.weights[j]

		return ret