Module dadi.Triallele.TriSpectrum_mod

Contains triallelic Spectrum object

Expand source code
"""
Contains triallelic Spectrum object
"""

import numpy
import numpy as np

import os

class TriSpectrum(numpy.ma.masked_array):
    """
    Represents a triallelic frequency spectrum.
    
    Similar structure to biallelic spectra as a masked array, but specific
    to the triallelic spectrum, so that infeasible entries are masked,
    and operations performed on triallelic spectra are different than 
    typical spectra
    
    The constructor has the format:
        fs = dadi.Triallele.TriSpectrum(data, mask, mask_infeasible, 
                                        data_folded_major,
                                        data_folded_ancestral, extrap_x, 
                                        extrap_t)
        
        data: The triallelic frequency spectrum data
        mask: An optional array of the same size as data, similar to dadi.Spectrum
        data_folded_major: If True, it is assumed that the input data is folded 
                           for the major and minor derived alleles
        data_folded_ancestral: If True, it is assumed that the input data is folded
                               to account for uncertainty in the ancestral state. Note
                               that if True, data_folded_major must also be True.
        check_folding_major: If True and data_folded_ancestral=True, the data and
                             mask will be checked to ensure they are consistent
        check_folding_ancestral: If True and data_folded_ancestral=True, the data and
                                 mask will be checked to ensure they are consistent
        extrap_x: Optional floating point value specifying x value to use in
                  extrapolation.
        extrap_t: Optional floating point value specifying t value to use in
                  extrapolation.
    """
    def __new__(subtype, data, mask=numpy.ma.nomask, mask_infeasible=True, 
                data_folded_major=None, check_folding_major=True,
                data_folded_ancestral=None, check_folding_ancestral=True,
                dtype=float, copy=True, fill_value=numpy.nan, keep_mask=True,
                shrink=True, extrap_x=None, extrap_t=None):
        data = numpy.asanyarray(data)
        
        if mask is numpy.ma.nomask:
            mask = numpy.ma.make_mask_none(data.shape)
        
        subarr = numpy.ma.masked_array(data, mask=mask, dtype=dtype, copy=copy,
                                       fill_value=fill_value, keep_mask=True, 
                                       shrink=True)
        subarr = subarr.view(subtype)
        if hasattr(data, 'folded_major'):
            if data_folded_major is None or data_folded_major == data.folded_major:
                subarr.folded_major = data.folded_major
            elif data_folded_major != data.folded_major:
                raise ValueError('Data does not have same major/minor folding status as '
                                 'was called for in Spectrum constructor.')
        elif data_folded_major is not None:
            subarr.folded_major = data_folded_major
        else:
            subarr.folded_major = False
        
        if hasattr(data, 'folded_ancestral'):
            if data_folded_ancestral is None or data_folded_ancestral == data.folded_ancestral:
                subarr.folded_ancestral = data.folded_ancestral
            elif data_folded_ancestral != data.folded_ancestral:
                raise ValueError('Data does not have same ancestral folding status as '
                                 'was called for in Spectrum constructor.')
        elif data_folded_ancestral is not None:
            subarr.folded_ancestral = data_folded_ancestral
        else:
            subarr.folded_ancestral = False
        
        ### XXX To do: ensure that all goes well when creating the TriSpectrum, come
        ###     back to this
        # Check that if we're declaring that the input data is folded, it actually is,
        # and the mask reflects this.        
        
        ### XXX We also need to enforce that if the data is folded ancestra, it is also
        ###     folded major
        
        if mask_infeasible:
            subarr.mask_infeasible()
        
        subarr.extrap_x = extrap_x
        subarr.extrap_t = extrap_t
        
        return subarr
        
    # See https://scipy.github.io/old-wiki/pages/Subclasses.html for information on
    # __array_finalize__ and __array_wrap__ methods.
    #
    # We need these methods to ensure extra attributes get copied along when
    # we do arithmetic on the FS.
    def __array_finalize__(self, obj):
        if obj is None: 
            return
        numpy.ma.masked_array.__array_finalize__(self, obj)
        self.folded_major = getattr(obj, 'extrap_t', 'extrap_t')
        self.folded_major = getattr(obj, 'extrap_x', 'extrap_x')
        self.folded_major = getattr(obj, 'folded_major', 'unspecified')
        self.folded_ancestral = getattr(obj, 'folded_ancestral', 'unspecified')
    def __array_wrap__(self, obj, context=None):
        result = obj.view(type(self))
        result = numpy.ma.masked_array.__array_wrap__(self, obj, 
                                                      context=context)
        result.folded_major = self.folded_major
        result.folded_ancestral = self.folded_ancestral
        result.extrap_x = self.extrap_x
        result.extrap_t = self.extrap_t
        return result
    def _update_from(self, obj):
        numpy.ma.masked_array._update_from(self, obj)
        if hasattr(obj, 'folded_major'):
            self.folded_major = obj.folded_major
        if hasattr(obj, 'folded_ancestral'):
            self.folded_ancestral = obj.folded_ancestral
        if hasattr(obj, 'extrap_x'):
            self.extrap_x = obj.extrap_x
        if hasattr(obj, 'extrap_t'):
            self.extrap_t = obj.extrap_t
    # masked_array has priority 15.
    __array_priority__ = 20

    def __repr__(self):
        return 'TriSpectrum(%s, folded_major=%s, folded_ancestral=%s)'\
                % (str(self), str(self.folded_major), str(self.folded_ancestral))

    def mask_infeasible(self):
        """
        Mask any infeasible entries.
        """
        self.mask[:,0] = True
        self.mask[0,:] = True
        for ii in range(len(self))[1:]:
            self.mask[ii,len(self)-ii-1:] = True
    
    def unfold(self):
        if not self.folded_major:
            raise ValueError('Input Spectrum is not folded.')
        data = self.data
        unfolded = TriSpectrum(data, mask_infeasible=True)
        unfolded.extrap_x = self.extrap_x
        unfolded.extrap_t = self.extrap_t
        return unfolded

    def _get_sample_size(self):
        return numpy.asarray(self.shape)[0] - 1
    sample_size = property(_get_sample_size)

    def _get_sample_sizes(self):
        return [self._get_sample_size()]
    sample_sizes = property(_get_sample_sizes)
    
    def _ensure_shape_and_dimension(self):
        """
        Ensure that fs has Npop dimensions.
        """
        pass
    
    # Make from_file a static method, so we can use it without an instance.
    @staticmethod
    def from_file(fid, mask_infeasible=True, return_comments=False):
        """
        Read frequency spectrum from file.

        fid: string with file name to read from or an open file object.
        mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum.
        return_comments: If true, the return value is (fs, comments), where
                         comments is a list of strings containing the comments
                         from the file (without #'s).

        See to_file method for details on the file format.
        """
        newfile = False
        # Try to read from fid. If we can't, assume it's something that we can
        # use to open a file.
        if not hasattr(fid, 'read'):
            newfile = True
            fid = open(fid, 'r')

        line = fid.readline()
        # Strip out the comments
        comments = []
        while line.startswith('#'):
            comments.append(line[1:].strip())
            line = fid.readline()

        # Read the shape of the data
        shape,folded_major,folded_ancestral,extrap_x,extrap_t = line.split()
        shape = [int(shape)+1,int(shape)+1]

        data = numpy.fromstring(fid.readline().strip(), 
                                count=numpy.product(shape), sep=' ')
        # fromfile returns a 1-d array. Reshape it to the proper form.
        data = data.reshape(*shape)

        maskline = fid.readline().strip()
        mask = numpy.fromstring(maskline, 
                                count=numpy.product(shape), sep=' ')
        mask = mask.reshape(*shape)
        
        if folded_major == 'folded_major':
            folded_major = True
        else:
            folded_major = False
        if folded_ancestral == 'folded_ancestral':
            folded_ancestral = True
        else:
            folded_ancestral = False
        if extrap_x == 'None':
            extrap_x = None
        else:
            extrap_x = float(extrap_x)
        if extrap_t == 'None':
            extrap_t = None
        else:
            extrap_t = float(extrap_t)

        # If we opened a new file, clean it up.
        if newfile:
            fid.close()

        fs = TriSpectrum(data, mask, mask_infeasible, data_folded_ancestral=folded_ancestral,
                      data_folded_major=folded_major)
        fs.extrap_x = extrap_x
        fs.extrap_t = extrap_t
        if not return_comments:
            return fs
        else:
            return fs,comments
    
    def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True):
        """
        Write frequency spectrum to file.
    
        fid: string with file name to write to or an open file object.
        precision: precision with which to write out entries of the SFS. (They 
                   are formated via %.<p>g, where <p> is the precision.)
        comment lines: list of strings to be used as comment lines in the header
                       of the output file.
        foldmaskinfo: If False, folding and mask and population label
                      information will not be saved. This conforms to the file
                      format for dadi versions prior to 1.3.0.

        The file format is:
            # Any number of comment lines beginning with a '#'
            A single line containing N integers giving the dimensions of the fs
              array. So this line would be '5 5 3' for an SFS that was 5x5x3.
              (That would be 4x4x2 *samples*.)
            On the *same line*, the string 'folded_major' or 'unfolded_major' 
              denoting the folding status of the array
            On the *same line*, the string 'folded_ancestral' or 'unfolded_ancestral' 
              denoting the folding status of the array
            A single line giving the array elements. The order of elements is 
              e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
            A single line giving the elements of the mask in the same order as
              the data line. '1' indicates masked, '0' indicates unmasked.
        """
        # Open the file object.
        newfile = False
        if not hasattr(fid, 'write'):
            newfile = True
            fid = open(fid, 'w')

        # Write comments
        for line in comment_lines:
            fid.write('# ')
            fid.write(line.strip())
            fid.write(os.linesep)

        # Write out the shape of the fs
        fid.write('{0} '.format(self.sample_size))

        if foldmaskinfo:
            if not self.folded_major:
                fid.write('unfolded_major ')
            else:
                fid.write('folded_major ')
            if not self.folded_ancestral:
                fid.write('unfolded_ancestral ')
            else:
                fid.write('folded_ancestral ')
        
        if extrapinfo:
            if not self.extrap_x:
                fid.write('None ')
            else:
                fid.write('{0} '.format(self.extrap_x))
            if not self.extrap_t:
                fid.write('None')
            else:
                fid.write('{0}'.format(self.extrap_t))

        fid.write(os.linesep)

        # Write the data to the file
        self.data.tofile(fid, ' ', '%%.%ig' % precision)
        fid.write(os.linesep)

        if foldmaskinfo:
            # Write the mask to the file
            numpy.asarray(self.mask,int).tofile(fid, ' ')
            fid.write(os.linesep)

        # Close file
        if newfile:
            fid.close()

    tofile = to_file
    
    def fold_major(self):
        if self.folded_major:
            raise ValueError('Input Spectrum is already folded.')
        folded = self + np.transpose(self)
        for ii in range(len(folded)):
            folded[ii,ii] /= 2
        folded.mask[0,:] = True
        folded.mask[:,0] = True
        for ii in range(len(folded)):
            folded[ii,ii+1:] = 0
            folded.mask[ii,ii+1:] = True
            folded.mask[ii,len(self)-1-ii:] = True
        
        folded.folded_major = True
        folded.folded_ancestral = self.folded_ancestral
        folded.extrap_x = self.extrap_x
        folded.extrap_t = self.extrap_t
        return folded

    def log(self):
        """
        Return the natural logarithm of the entries of the frequency spectrum.

        Only necessary because numpy.ma.log now fails to propagate extra
        attributes after numpy 1.10.
        """
        logfs = numpy.ma.log(self)
        logfs.folded_major = self.folded_major
        logfs.folded_ancestral = self.folded_ancestral
        logfs.extrap_x = self.extrap_x
        logfs.extrap_t = self.extrap_t
        return logfs
    
    def fold_ancestral(self):
        if self.folded_ancestral:
            raise ValueError('Input Spectrum is already folded.')
        folded = 0*self
        ns = len(folded)-1
        for ii in range(ns):
            for jj in range(ns):
                kk = ns-ii-jj
                if self.mask[ii,jj] == True:
                    continue
                elif ii <= kk and jj <= kk:
                    if ii >= jj:
                        folded[ii,jj] += self[ii,jj]
                    else:
                        folded[jj,ii] += self[ii,jj]
                elif ii > kk and jj <= kk:
                    folded[kk,jj] += self[ii,jj]
                elif ii <= kk and jj > kk:
                    folded[kk,ii] += self[ii,jj]
                else: # ii > kk and jj > kk
                    if ii >= jj:
                        folded[jj,kk] += self[ii,jj]
                    else:
                        folded[ii,kk] += self[ii,jj]
        # mask if not a valid entry for ancestrally folded spectrum
        for ii in range(ns):
            for jj in range(ns):
                kk = ns-ii-jj
                if not (kk>=ii>=jj):
                    folded.mask[ii,jj] = True
        
        folded.folded_major = True
        folded.folded_ancestral = True
        folded.extrap_x = self.extrap_x
        folded.extrap_t = self.extrap_t
        return folded
    
    def S(self):
        """
        Segregating sites
        """
        return self.sum()
    
    @staticmethod
    def from_phi(phi, ns, other_stuff):
        pass

    # Ensures that when arithmetic is done with TriSpectrum objects,
    # attributes are preserved. For details, see similar code in
    # dadi.Spectrum_mod
    for method in ['__add__','__radd__','__sub__','__rsub__','__mul__',
                   '__rmul__','__div__','__rdiv__','__truediv__','__rtruediv__',
                   '__floordiv__','__rfloordiv__','__rpow__','__pow__']:
        exec("""
def %(method)s(self, other):
    self._check_other_folding(other)
    if isinstance(other, numpy.ma.masked_array):
        newdata = self.data.%(method)s (other.data)
        newmask = numpy.ma.mask_or(self.mask, other.mask)
    else:
        newdata = self.data.%(method)s (other)
        newmask = self.mask
    if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
        extrap_x = None
    else:
        extrap_x = self.extrap_x
    if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
        extrap_t = None
    else:
        extrap_t = self.extrap_t
    outfs = self.__class__.__new__(self.__class__, newdata, newmask, 
                                   mask_infeasible=False, 
                                   data_folded_major=self.folded_major,
                                   data_folded_ancestral=self.folded_ancestral,
                                   extrap_x=extrap_x, extrap_t=extrap_t)
    return outfs
""" % {'method':method})

    # Methods that modify the Spectrum in-place.
    for method in ['__iadd__','__isub__','__imul__','__idiv__',
                   '__itruediv__','__ifloordiv__','__ipow__']:
        exec("""
def %(method)s(self, other):
    self._check_other_folding(other)
    if isinstance(other, numpy.ma.masked_array):
        self.data.%(method)s (other.data)
        self.mask = numpy.ma.mask_or(self.mask, other.mask)
    else:
        self.data.%(method)s (other)
    if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
        self.extrap_x = None
    if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
        self.extrap_t = None
    return self
""" % {'method':method})

    def _check_other_folding(self, other):
        """
        Ensure other Spectrum has same .folded status
        """
        if isinstance(other, self.__class__)\
           and (other.folded_major != self.folded_major
                or other.folded_ancestral != self.folded_ancestral):
            raise ValueError('Cannot operate with a folded Spectrum and an '
                             'unfolded one.')


# Allow TriSpectrum objects to be pickled. 
# See http://effbot.org/librarybook/copy-reg.htm
try:
    import copyreg
except:
    # For Python 2.x compatibility
    import copy_reg as copyreg
def TriSpectrum_pickler(fs):
    # Collect all the info necessary to save the state of a TriSpectrum
    return TriSpectrum_unpickler, (fs.data, fs.mask, fs.folded_major,
                                   fs.folded_ancestral,
                                   fs.extrap_x, fs.extrap_t)
def TriSpectrum_unpickler(data, mask, folded_major, folded_ancestral,
                          extrap_x, extrap_t):
    # Use that info to recreate the TriSpectrum
    return TriSpectrum(data, mask, mask_infeasible=False,
                       data_folded_major=folded_major,
                       data_folded_ancestral=folded_ancestral,
                       extrap_x=extrap_x, extrap_t=extrap_t)
copyreg.pickle(TriSpectrum, TriSpectrum_pickler, TriSpectrum_unpickler)

Functions

def TriSpectrum_pickler(fs)
Expand source code
def TriSpectrum_pickler(fs):
    # Collect all the info necessary to save the state of a TriSpectrum
    return TriSpectrum_unpickler, (fs.data, fs.mask, fs.folded_major,
                                   fs.folded_ancestral,
                                   fs.extrap_x, fs.extrap_t)
def TriSpectrum_unpickler(data, mask, folded_major, folded_ancestral, extrap_x, extrap_t)
Expand source code
def TriSpectrum_unpickler(data, mask, folded_major, folded_ancestral,
                          extrap_x, extrap_t):
    # Use that info to recreate the TriSpectrum
    return TriSpectrum(data, mask, mask_infeasible=False,
                       data_folded_major=folded_major,
                       data_folded_ancestral=folded_ancestral,
                       extrap_x=extrap_x, extrap_t=extrap_t)

Classes

class TriSpectrum (data, mask=False, mask_infeasible=True, data_folded_major=None, check_folding_major=True, data_folded_ancestral=None, check_folding_ancestral=True, dtype=builtins.float, copy=True, fill_value=nan, keep_mask=True, shrink=True, extrap_x=None, extrap_t=None)

Represents a triallelic frequency spectrum.

Similar structure to biallelic spectra as a masked array, but specific to the triallelic spectrum, so that infeasible entries are masked, and operations performed on triallelic spectra are different than typical spectra

The constructor has the format: fs = dadi.Triallele.TriSpectrum(data, mask, mask_infeasible, data_folded_major, data_folded_ancestral, extrap_x, extrap_t)

data: The triallelic frequency spectrum data
mask: An optional array of the same size as data, similar to dadi.Spectrum
data_folded_major: If True, it is assumed that the input data is folded 
                   for the major and minor derived alleles
data_folded_ancestral: If True, it is assumed that the input data is folded
                       to account for uncertainty in the ancestral state. Note
                       that if True, data_folded_major must also be True.
check_folding_major: If True and data_folded_ancestral=True, the data and
                     mask will be checked to ensure they are consistent
check_folding_ancestral: If True and data_folded_ancestral=True, the data and
                         mask will be checked to ensure they are consistent
extrap_x: Optional floating point value specifying x value to use in
          extrapolation.
extrap_t: Optional floating point value specifying t value to use in
          extrapolation.
Expand source code
class TriSpectrum(numpy.ma.masked_array):
    """
    Represents a triallelic frequency spectrum.
    
    Similar structure to biallelic spectra as a masked array, but specific
    to the triallelic spectrum, so that infeasible entries are masked,
    and operations performed on triallelic spectra are different than 
    typical spectra
    
    The constructor has the format:
        fs = dadi.Triallele.TriSpectrum(data, mask, mask_infeasible, 
                                        data_folded_major,
                                        data_folded_ancestral, extrap_x, 
                                        extrap_t)
        
        data: The triallelic frequency spectrum data
        mask: An optional array of the same size as data, similar to dadi.Spectrum
        data_folded_major: If True, it is assumed that the input data is folded 
                           for the major and minor derived alleles
        data_folded_ancestral: If True, it is assumed that the input data is folded
                               to account for uncertainty in the ancestral state. Note
                               that if True, data_folded_major must also be True.
        check_folding_major: If True and data_folded_ancestral=True, the data and
                             mask will be checked to ensure they are consistent
        check_folding_ancestral: If True and data_folded_ancestral=True, the data and
                                 mask will be checked to ensure they are consistent
        extrap_x: Optional floating point value specifying x value to use in
                  extrapolation.
        extrap_t: Optional floating point value specifying t value to use in
                  extrapolation.
    """
    def __new__(subtype, data, mask=numpy.ma.nomask, mask_infeasible=True, 
                data_folded_major=None, check_folding_major=True,
                data_folded_ancestral=None, check_folding_ancestral=True,
                dtype=float, copy=True, fill_value=numpy.nan, keep_mask=True,
                shrink=True, extrap_x=None, extrap_t=None):
        data = numpy.asanyarray(data)
        
        if mask is numpy.ma.nomask:
            mask = numpy.ma.make_mask_none(data.shape)
        
        subarr = numpy.ma.masked_array(data, mask=mask, dtype=dtype, copy=copy,
                                       fill_value=fill_value, keep_mask=True, 
                                       shrink=True)
        subarr = subarr.view(subtype)
        if hasattr(data, 'folded_major'):
            if data_folded_major is None or data_folded_major == data.folded_major:
                subarr.folded_major = data.folded_major
            elif data_folded_major != data.folded_major:
                raise ValueError('Data does not have same major/minor folding status as '
                                 'was called for in Spectrum constructor.')
        elif data_folded_major is not None:
            subarr.folded_major = data_folded_major
        else:
            subarr.folded_major = False
        
        if hasattr(data, 'folded_ancestral'):
            if data_folded_ancestral is None or data_folded_ancestral == data.folded_ancestral:
                subarr.folded_ancestral = data.folded_ancestral
            elif data_folded_ancestral != data.folded_ancestral:
                raise ValueError('Data does not have same ancestral folding status as '
                                 'was called for in Spectrum constructor.')
        elif data_folded_ancestral is not None:
            subarr.folded_ancestral = data_folded_ancestral
        else:
            subarr.folded_ancestral = False
        
        ### XXX To do: ensure that all goes well when creating the TriSpectrum, come
        ###     back to this
        # Check that if we're declaring that the input data is folded, it actually is,
        # and the mask reflects this.        
        
        ### XXX We also need to enforce that if the data is folded ancestra, it is also
        ###     folded major
        
        if mask_infeasible:
            subarr.mask_infeasible()
        
        subarr.extrap_x = extrap_x
        subarr.extrap_t = extrap_t
        
        return subarr
        
    # See https://scipy.github.io/old-wiki/pages/Subclasses.html for information on
    # __array_finalize__ and __array_wrap__ methods.
    #
    # We need these methods to ensure extra attributes get copied along when
    # we do arithmetic on the FS.
    def __array_finalize__(self, obj):
        if obj is None: 
            return
        numpy.ma.masked_array.__array_finalize__(self, obj)
        self.folded_major = getattr(obj, 'extrap_t', 'extrap_t')
        self.folded_major = getattr(obj, 'extrap_x', 'extrap_x')
        self.folded_major = getattr(obj, 'folded_major', 'unspecified')
        self.folded_ancestral = getattr(obj, 'folded_ancestral', 'unspecified')
    def __array_wrap__(self, obj, context=None):
        result = obj.view(type(self))
        result = numpy.ma.masked_array.__array_wrap__(self, obj, 
                                                      context=context)
        result.folded_major = self.folded_major
        result.folded_ancestral = self.folded_ancestral
        result.extrap_x = self.extrap_x
        result.extrap_t = self.extrap_t
        return result
    def _update_from(self, obj):
        numpy.ma.masked_array._update_from(self, obj)
        if hasattr(obj, 'folded_major'):
            self.folded_major = obj.folded_major
        if hasattr(obj, 'folded_ancestral'):
            self.folded_ancestral = obj.folded_ancestral
        if hasattr(obj, 'extrap_x'):
            self.extrap_x = obj.extrap_x
        if hasattr(obj, 'extrap_t'):
            self.extrap_t = obj.extrap_t
    # masked_array has priority 15.
    __array_priority__ = 20

    def __repr__(self):
        return 'TriSpectrum(%s, folded_major=%s, folded_ancestral=%s)'\
                % (str(self), str(self.folded_major), str(self.folded_ancestral))

    def mask_infeasible(self):
        """
        Mask any infeasible entries.
        """
        self.mask[:,0] = True
        self.mask[0,:] = True
        for ii in range(len(self))[1:]:
            self.mask[ii,len(self)-ii-1:] = True
    
    def unfold(self):
        if not self.folded_major:
            raise ValueError('Input Spectrum is not folded.')
        data = self.data
        unfolded = TriSpectrum(data, mask_infeasible=True)
        unfolded.extrap_x = self.extrap_x
        unfolded.extrap_t = self.extrap_t
        return unfolded

    def _get_sample_size(self):
        return numpy.asarray(self.shape)[0] - 1
    sample_size = property(_get_sample_size)

    def _get_sample_sizes(self):
        return [self._get_sample_size()]
    sample_sizes = property(_get_sample_sizes)
    
    def _ensure_shape_and_dimension(self):
        """
        Ensure that fs has Npop dimensions.
        """
        pass
    
    # Make from_file a static method, so we can use it without an instance.
    @staticmethod
    def from_file(fid, mask_infeasible=True, return_comments=False):
        """
        Read frequency spectrum from file.

        fid: string with file name to read from or an open file object.
        mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum.
        return_comments: If true, the return value is (fs, comments), where
                         comments is a list of strings containing the comments
                         from the file (without #'s).

        See to_file method for details on the file format.
        """
        newfile = False
        # Try to read from fid. If we can't, assume it's something that we can
        # use to open a file.
        if not hasattr(fid, 'read'):
            newfile = True
            fid = open(fid, 'r')

        line = fid.readline()
        # Strip out the comments
        comments = []
        while line.startswith('#'):
            comments.append(line[1:].strip())
            line = fid.readline()

        # Read the shape of the data
        shape,folded_major,folded_ancestral,extrap_x,extrap_t = line.split()
        shape = [int(shape)+1,int(shape)+1]

        data = numpy.fromstring(fid.readline().strip(), 
                                count=numpy.product(shape), sep=' ')
        # fromfile returns a 1-d array. Reshape it to the proper form.
        data = data.reshape(*shape)

        maskline = fid.readline().strip()
        mask = numpy.fromstring(maskline, 
                                count=numpy.product(shape), sep=' ')
        mask = mask.reshape(*shape)
        
        if folded_major == 'folded_major':
            folded_major = True
        else:
            folded_major = False
        if folded_ancestral == 'folded_ancestral':
            folded_ancestral = True
        else:
            folded_ancestral = False
        if extrap_x == 'None':
            extrap_x = None
        else:
            extrap_x = float(extrap_x)
        if extrap_t == 'None':
            extrap_t = None
        else:
            extrap_t = float(extrap_t)

        # If we opened a new file, clean it up.
        if newfile:
            fid.close()

        fs = TriSpectrum(data, mask, mask_infeasible, data_folded_ancestral=folded_ancestral,
                      data_folded_major=folded_major)
        fs.extrap_x = extrap_x
        fs.extrap_t = extrap_t
        if not return_comments:
            return fs
        else:
            return fs,comments
    
    def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True):
        """
        Write frequency spectrum to file.
    
        fid: string with file name to write to or an open file object.
        precision: precision with which to write out entries of the SFS. (They 
                   are formated via %.<p>g, where <p> is the precision.)
        comment lines: list of strings to be used as comment lines in the header
                       of the output file.
        foldmaskinfo: If False, folding and mask and population label
                      information will not be saved. This conforms to the file
                      format for dadi versions prior to 1.3.0.

        The file format is:
            # Any number of comment lines beginning with a '#'
            A single line containing N integers giving the dimensions of the fs
              array. So this line would be '5 5 3' for an SFS that was 5x5x3.
              (That would be 4x4x2 *samples*.)
            On the *same line*, the string 'folded_major' or 'unfolded_major' 
              denoting the folding status of the array
            On the *same line*, the string 'folded_ancestral' or 'unfolded_ancestral' 
              denoting the folding status of the array
            A single line giving the array elements. The order of elements is 
              e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
            A single line giving the elements of the mask in the same order as
              the data line. '1' indicates masked, '0' indicates unmasked.
        """
        # Open the file object.
        newfile = False
        if not hasattr(fid, 'write'):
            newfile = True
            fid = open(fid, 'w')

        # Write comments
        for line in comment_lines:
            fid.write('# ')
            fid.write(line.strip())
            fid.write(os.linesep)

        # Write out the shape of the fs
        fid.write('{0} '.format(self.sample_size))

        if foldmaskinfo:
            if not self.folded_major:
                fid.write('unfolded_major ')
            else:
                fid.write('folded_major ')
            if not self.folded_ancestral:
                fid.write('unfolded_ancestral ')
            else:
                fid.write('folded_ancestral ')
        
        if extrapinfo:
            if not self.extrap_x:
                fid.write('None ')
            else:
                fid.write('{0} '.format(self.extrap_x))
            if not self.extrap_t:
                fid.write('None')
            else:
                fid.write('{0}'.format(self.extrap_t))

        fid.write(os.linesep)

        # Write the data to the file
        self.data.tofile(fid, ' ', '%%.%ig' % precision)
        fid.write(os.linesep)

        if foldmaskinfo:
            # Write the mask to the file
            numpy.asarray(self.mask,int).tofile(fid, ' ')
            fid.write(os.linesep)

        # Close file
        if newfile:
            fid.close()

    tofile = to_file
    
    def fold_major(self):
        if self.folded_major:
            raise ValueError('Input Spectrum is already folded.')
        folded = self + np.transpose(self)
        for ii in range(len(folded)):
            folded[ii,ii] /= 2
        folded.mask[0,:] = True
        folded.mask[:,0] = True
        for ii in range(len(folded)):
            folded[ii,ii+1:] = 0
            folded.mask[ii,ii+1:] = True
            folded.mask[ii,len(self)-1-ii:] = True
        
        folded.folded_major = True
        folded.folded_ancestral = self.folded_ancestral
        folded.extrap_x = self.extrap_x
        folded.extrap_t = self.extrap_t
        return folded

    def log(self):
        """
        Return the natural logarithm of the entries of the frequency spectrum.

        Only necessary because numpy.ma.log now fails to propagate extra
        attributes after numpy 1.10.
        """
        logfs = numpy.ma.log(self)
        logfs.folded_major = self.folded_major
        logfs.folded_ancestral = self.folded_ancestral
        logfs.extrap_x = self.extrap_x
        logfs.extrap_t = self.extrap_t
        return logfs
    
    def fold_ancestral(self):
        if self.folded_ancestral:
            raise ValueError('Input Spectrum is already folded.')
        folded = 0*self
        ns = len(folded)-1
        for ii in range(ns):
            for jj in range(ns):
                kk = ns-ii-jj
                if self.mask[ii,jj] == True:
                    continue
                elif ii <= kk and jj <= kk:
                    if ii >= jj:
                        folded[ii,jj] += self[ii,jj]
                    else:
                        folded[jj,ii] += self[ii,jj]
                elif ii > kk and jj <= kk:
                    folded[kk,jj] += self[ii,jj]
                elif ii <= kk and jj > kk:
                    folded[kk,ii] += self[ii,jj]
                else: # ii > kk and jj > kk
                    if ii >= jj:
                        folded[jj,kk] += self[ii,jj]
                    else:
                        folded[ii,kk] += self[ii,jj]
        # mask if not a valid entry for ancestrally folded spectrum
        for ii in range(ns):
            for jj in range(ns):
                kk = ns-ii-jj
                if not (kk>=ii>=jj):
                    folded.mask[ii,jj] = True
        
        folded.folded_major = True
        folded.folded_ancestral = True
        folded.extrap_x = self.extrap_x
        folded.extrap_t = self.extrap_t
        return folded
    
    def S(self):
        """
        Segregating sites
        """
        return self.sum()
    
    @staticmethod
    def from_phi(phi, ns, other_stuff):
        pass

    # Ensures that when arithmetic is done with TriSpectrum objects,
    # attributes are preserved. For details, see similar code in
    # dadi.Spectrum_mod
    for method in ['__add__','__radd__','__sub__','__rsub__','__mul__',
                   '__rmul__','__div__','__rdiv__','__truediv__','__rtruediv__',
                   '__floordiv__','__rfloordiv__','__rpow__','__pow__']:
        exec("""
def %(method)s(self, other):
    self._check_other_folding(other)
    if isinstance(other, numpy.ma.masked_array):
        newdata = self.data.%(method)s (other.data)
        newmask = numpy.ma.mask_or(self.mask, other.mask)
    else:
        newdata = self.data.%(method)s (other)
        newmask = self.mask
    if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
        extrap_x = None
    else:
        extrap_x = self.extrap_x
    if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
        extrap_t = None
    else:
        extrap_t = self.extrap_t
    outfs = self.__class__.__new__(self.__class__, newdata, newmask, 
                                   mask_infeasible=False, 
                                   data_folded_major=self.folded_major,
                                   data_folded_ancestral=self.folded_ancestral,
                                   extrap_x=extrap_x, extrap_t=extrap_t)
    return outfs
""" % {'method':method})

    # Methods that modify the Spectrum in-place.
    for method in ['__iadd__','__isub__','__imul__','__idiv__',
                   '__itruediv__','__ifloordiv__','__ipow__']:
        exec("""
def %(method)s(self, other):
    self._check_other_folding(other)
    if isinstance(other, numpy.ma.masked_array):
        self.data.%(method)s (other.data)
        self.mask = numpy.ma.mask_or(self.mask, other.mask)
    else:
        self.data.%(method)s (other)
    if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
        self.extrap_x = None
    if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
        self.extrap_t = None
    return self
""" % {'method':method})

    def _check_other_folding(self, other):
        """
        Ensure other Spectrum has same .folded status
        """
        if isinstance(other, self.__class__)\
           and (other.folded_major != self.folded_major
                or other.folded_ancestral != self.folded_ancestral):
            raise ValueError('Cannot operate with a folded Spectrum and an '
                             'unfolded one.')

Ancestors

  • numpy.ma.core.MaskedArray
  • numpy.ndarray

Class variables

var method

Static methods

def from_file(fid, mask_infeasible=True, return_comments=False)

Read frequency spectrum from file.

fid: string with file name to read from or an open file object. mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum. return_comments: If true, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s).

See to_file method for details on the file format.

Expand source code
@staticmethod
def from_file(fid, mask_infeasible=True, return_comments=False):
    """
    Read frequency spectrum from file.

    fid: string with file name to read from or an open file object.
    mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum.
    return_comments: If true, the return value is (fs, comments), where
                     comments is a list of strings containing the comments
                     from the file (without #'s).

    See to_file method for details on the file format.
    """
    newfile = False
    # Try to read from fid. If we can't, assume it's something that we can
    # use to open a file.
    if not hasattr(fid, 'read'):
        newfile = True
        fid = open(fid, 'r')

    line = fid.readline()
    # Strip out the comments
    comments = []
    while line.startswith('#'):
        comments.append(line[1:].strip())
        line = fid.readline()

    # Read the shape of the data
    shape,folded_major,folded_ancestral,extrap_x,extrap_t = line.split()
    shape = [int(shape)+1,int(shape)+1]

    data = numpy.fromstring(fid.readline().strip(), 
                            count=numpy.product(shape), sep=' ')
    # fromfile returns a 1-d array. Reshape it to the proper form.
    data = data.reshape(*shape)

    maskline = fid.readline().strip()
    mask = numpy.fromstring(maskline, 
                            count=numpy.product(shape), sep=' ')
    mask = mask.reshape(*shape)
    
    if folded_major == 'folded_major':
        folded_major = True
    else:
        folded_major = False
    if folded_ancestral == 'folded_ancestral':
        folded_ancestral = True
    else:
        folded_ancestral = False
    if extrap_x == 'None':
        extrap_x = None
    else:
        extrap_x = float(extrap_x)
    if extrap_t == 'None':
        extrap_t = None
    else:
        extrap_t = float(extrap_t)

    # If we opened a new file, clean it up.
    if newfile:
        fid.close()

    fs = TriSpectrum(data, mask, mask_infeasible, data_folded_ancestral=folded_ancestral,
                  data_folded_major=folded_major)
    fs.extrap_x = extrap_x
    fs.extrap_t = extrap_t
    if not return_comments:
        return fs
    else:
        return fs,comments
def from_phi(phi, ns, other_stuff)
Expand source code
@staticmethod
def from_phi(phi, ns, other_stuff):
    pass

Instance variables

var sample_size
Expand source code
def _get_sample_size(self):
    return numpy.asarray(self.shape)[0] - 1
var sample_sizes
Expand source code
def _get_sample_sizes(self):
    return [self._get_sample_size()]

Methods

def S(self)

Segregating sites

Expand source code
def S(self):
    """
    Segregating sites
    """
    return self.sum()
def fold_ancestral(self)
Expand source code
def fold_ancestral(self):
    if self.folded_ancestral:
        raise ValueError('Input Spectrum is already folded.')
    folded = 0*self
    ns = len(folded)-1
    for ii in range(ns):
        for jj in range(ns):
            kk = ns-ii-jj
            if self.mask[ii,jj] == True:
                continue
            elif ii <= kk and jj <= kk:
                if ii >= jj:
                    folded[ii,jj] += self[ii,jj]
                else:
                    folded[jj,ii] += self[ii,jj]
            elif ii > kk and jj <= kk:
                folded[kk,jj] += self[ii,jj]
            elif ii <= kk and jj > kk:
                folded[kk,ii] += self[ii,jj]
            else: # ii > kk and jj > kk
                if ii >= jj:
                    folded[jj,kk] += self[ii,jj]
                else:
                    folded[ii,kk] += self[ii,jj]
    # mask if not a valid entry for ancestrally folded spectrum
    for ii in range(ns):
        for jj in range(ns):
            kk = ns-ii-jj
            if not (kk>=ii>=jj):
                folded.mask[ii,jj] = True
    
    folded.folded_major = True
    folded.folded_ancestral = True
    folded.extrap_x = self.extrap_x
    folded.extrap_t = self.extrap_t
    return folded
def fold_major(self)
Expand source code
def fold_major(self):
    if self.folded_major:
        raise ValueError('Input Spectrum is already folded.')
    folded = self + np.transpose(self)
    for ii in range(len(folded)):
        folded[ii,ii] /= 2
    folded.mask[0,:] = True
    folded.mask[:,0] = True
    for ii in range(len(folded)):
        folded[ii,ii+1:] = 0
        folded.mask[ii,ii+1:] = True
        folded.mask[ii,len(self)-1-ii:] = True
    
    folded.folded_major = True
    folded.folded_ancestral = self.folded_ancestral
    folded.extrap_x = self.extrap_x
    folded.extrap_t = self.extrap_t
    return folded
def log(self)

Return the natural logarithm of the entries of the frequency spectrum.

Only necessary because numpy.ma.log now fails to propagate extra attributes after numpy 1.10.

Expand source code
def log(self):
    """
    Return the natural logarithm of the entries of the frequency spectrum.

    Only necessary because numpy.ma.log now fails to propagate extra
    attributes after numpy 1.10.
    """
    logfs = numpy.ma.log(self)
    logfs.folded_major = self.folded_major
    logfs.folded_ancestral = self.folded_ancestral
    logfs.extrap_x = self.extrap_x
    logfs.extrap_t = self.extrap_t
    return logfs
def mask_infeasible(self)

Mask any infeasible entries.

Expand source code
def mask_infeasible(self):
    """
    Mask any infeasible entries.
    """
    self.mask[:,0] = True
    self.mask[0,:] = True
    for ii in range(len(self))[1:]:
        self.mask[ii,len(self)-ii-1:] = True
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True)

Write frequency spectrum to file.

fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.

g, where

is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0.

The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) On the same line, the string 'folded_major' or 'unfolded_major' denoting the folding status of the array On the same line, the string 'folded_ancestral' or 'unfolded_ancestral' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] … A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked.

Expand source code
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True):
    """
    Write frequency spectrum to file.

    fid: string with file name to write to or an open file object.
    precision: precision with which to write out entries of the SFS. (They 
               are formated via %.<p>g, where <p> is the precision.)
    comment lines: list of strings to be used as comment lines in the header
                   of the output file.
    foldmaskinfo: If False, folding and mask and population label
                  information will not be saved. This conforms to the file
                  format for dadi versions prior to 1.3.0.

    The file format is:
        # Any number of comment lines beginning with a '#'
        A single line containing N integers giving the dimensions of the fs
          array. So this line would be '5 5 3' for an SFS that was 5x5x3.
          (That would be 4x4x2 *samples*.)
        On the *same line*, the string 'folded_major' or 'unfolded_major' 
          denoting the folding status of the array
        On the *same line*, the string 'folded_ancestral' or 'unfolded_ancestral' 
          denoting the folding status of the array
        A single line giving the array elements. The order of elements is 
          e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
        A single line giving the elements of the mask in the same order as
          the data line. '1' indicates masked, '0' indicates unmasked.
    """
    # Open the file object.
    newfile = False
    if not hasattr(fid, 'write'):
        newfile = True
        fid = open(fid, 'w')

    # Write comments
    for line in comment_lines:
        fid.write('# ')
        fid.write(line.strip())
        fid.write(os.linesep)

    # Write out the shape of the fs
    fid.write('{0} '.format(self.sample_size))

    if foldmaskinfo:
        if not self.folded_major:
            fid.write('unfolded_major ')
        else:
            fid.write('folded_major ')
        if not self.folded_ancestral:
            fid.write('unfolded_ancestral ')
        else:
            fid.write('folded_ancestral ')
    
    if extrapinfo:
        if not self.extrap_x:
            fid.write('None ')
        else:
            fid.write('{0} '.format(self.extrap_x))
        if not self.extrap_t:
            fid.write('None')
        else:
            fid.write('{0}'.format(self.extrap_t))

    fid.write(os.linesep)

    # Write the data to the file
    self.data.tofile(fid, ' ', '%%.%ig' % precision)
    fid.write(os.linesep)

    if foldmaskinfo:
        # Write the mask to the file
        numpy.asarray(self.mask,int).tofile(fid, ' ')
        fid.write(os.linesep)

    # Close file
    if newfile:
        fid.close()
def tofile(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True)

Write frequency spectrum to file.

fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.

g, where

is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0.

The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) On the same line, the string 'folded_major' or 'unfolded_major' denoting the folding status of the array On the same line, the string 'folded_ancestral' or 'unfolded_ancestral' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] … A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked.

Expand source code
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True):
    """
    Write frequency spectrum to file.

    fid: string with file name to write to or an open file object.
    precision: precision with which to write out entries of the SFS. (They 
               are formated via %.<p>g, where <p> is the precision.)
    comment lines: list of strings to be used as comment lines in the header
                   of the output file.
    foldmaskinfo: If False, folding and mask and population label
                  information will not be saved. This conforms to the file
                  format for dadi versions prior to 1.3.0.

    The file format is:
        # Any number of comment lines beginning with a '#'
        A single line containing N integers giving the dimensions of the fs
          array. So this line would be '5 5 3' for an SFS that was 5x5x3.
          (That would be 4x4x2 *samples*.)
        On the *same line*, the string 'folded_major' or 'unfolded_major' 
          denoting the folding status of the array
        On the *same line*, the string 'folded_ancestral' or 'unfolded_ancestral' 
          denoting the folding status of the array
        A single line giving the array elements. The order of elements is 
          e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
        A single line giving the elements of the mask in the same order as
          the data line. '1' indicates masked, '0' indicates unmasked.
    """
    # Open the file object.
    newfile = False
    if not hasattr(fid, 'write'):
        newfile = True
        fid = open(fid, 'w')

    # Write comments
    for line in comment_lines:
        fid.write('# ')
        fid.write(line.strip())
        fid.write(os.linesep)

    # Write out the shape of the fs
    fid.write('{0} '.format(self.sample_size))

    if foldmaskinfo:
        if not self.folded_major:
            fid.write('unfolded_major ')
        else:
            fid.write('folded_major ')
        if not self.folded_ancestral:
            fid.write('unfolded_ancestral ')
        else:
            fid.write('folded_ancestral ')
    
    if extrapinfo:
        if not self.extrap_x:
            fid.write('None ')
        else:
            fid.write('{0} '.format(self.extrap_x))
        if not self.extrap_t:
            fid.write('None')
        else:
            fid.write('{0}'.format(self.extrap_t))

    fid.write(os.linesep)

    # Write the data to the file
    self.data.tofile(fid, ' ', '%%.%ig' % precision)
    fid.write(os.linesep)

    if foldmaskinfo:
        # Write the mask to the file
        numpy.asarray(self.mask,int).tofile(fid, ' ')
        fid.write(os.linesep)

    # Close file
    if newfile:
        fid.close()
def unfold(self)
Expand source code
def unfold(self):
    if not self.folded_major:
        raise ValueError('Input Spectrum is not folded.')
    data = self.data
    unfolded = TriSpectrum(data, mask_infeasible=True)
    unfolded.extrap_x = self.extrap_x
    unfolded.extrap_t = self.extrap_t
    return unfolded