Module dadi.Numerics

Numerically useful functions, including extrapolation and default grid.

Expand source code
"""
Numerically useful functions, including extrapolation and default grid.
"""
import logging
logger = logging.getLogger('Numerics')

import functools, os
import numpy
# Account for difference in scipy installations.
from scipy.special import comb
from scipy.special import gammaln, betaln, beta

_multinomln_cache = {}
def multinomln(N):
    """
    Get the multinomial coefficient for an array N.
    
    N: array of integers.
    """
    if tuple(N) not in _multinomln_cache:
        N_sum = numpy.sum(N)
        res = gammaln(N_sum+1)
        for n in N:
            res -= gammaln(n+1)
        _multinomln_cache[tuple(N)] = res
    return _multinomln_cache[tuple(N)]

_BetaBinomln_cache = {}
def BetaBinomln(i,n,a,b):
    if (i,n,a,b) not in _BetaBinomln_cache:
        _BetaBinomln_cache[i,n,a,b] = _lncomb(n,i) + betaln(i+a,n-i+b) - betaln(a,b)
    return _BetaBinomln_cache[i,n,a,b]

_part_cache = {}
def cached_part(x,n,minval=0,maxval=2):
    """
    Returns the integer partition summing to x with n entries and
    min and max equal to 0 and 2 (or ploidy level), respectively.

    This version uses a cache to speed up repeated evaluations.
    
    x: integer summand.
    n: number of partition entries.
    minval: minimum value allowed for partition entries.
    maxval: maximum value allowed for partition entries.
    """
    if (x,n,minval,maxval) not in _part_cache:
        _part_cache[x,n,minval,maxval] = list(part(x,n,minval,maxval))
    return _part_cache[x,n,minval,maxval]

def part(x, n, minval=0, maxval=2):
    """
    Returns the integer partition summing to x with n entries and
    min and max equal to 0 and 2 (or ploidy level), respectively.
    
    x: integer summand.
    n: number of partition entries.
    minval: minimum value allowed for partition entries.
    maxval: maximum value allowed for partition entries.
    """
    if not n * minval <= x <= n * maxval:
        return
    elif n == 0:
        yield []
    else:
        for val in range(minval, maxval + 1):
            for p in part(x - val, n - 1, val, maxval):
                yield [val] + p

def BetaBinomConvolution(i,n,alpha,beta,ploidy=2):
    """
    Returns the probability of observing i 'successes' across
    n beta binomial random variables with equal alpha,
    beta, and number of trials.
    """
    res=0.0
    # Create list of BetaBinomln results, because list indexing
    # is much faster than the function call and hashing inside
    # the cached BetaBinomln. Caching in BetaBinomln is still
    # marginally useful, even after caching at this level.
    BetaBinomln_cache = [BetaBinomln(_,ploidy,alpha,beta) for _ in range(ploidy+1)]
    partitions = cached_part(i,n,maxval=ploidy)
    for prt in partitions:
        tmp=0.0
        # Partitions p have many repeated elements. It's faster
        # to count each unique element and use that, rather than 
        # iterating through all elements
        coeff = [prt.count(p) for p in range(ploidy+1)]
        for p in range(ploidy+1):
            tmp += BetaBinomln_cache[p]*coeff[p]
        tmp += multinomln(coeff)
        res += numpy.exp(tmp)
    return res

def apply_anc_state_misid(fs, p_misid):
    """
    Model ancestral state misidentification in a frequency spectrum.

    fs: Input frequency spectrum.
    p_misid: Fraction of sites assumed to suffer from ancestral
             state misidentification.
    """
    return (1-p_misid)*fs + p_misid*reverse_array(fs)

def make_anc_state_misid_func(func):
    """
    Generate a version of func accounting for ancestral state misidentification.

    func: The function to which misidentification should be incorporated. It
          is assumed that the first argument of the function is a params
          vector, to which the misidentification parameter will be added.

    Returns a new function which takes in a params vector that is one entry
    longer than the original function. The fraction misidentification will
    be the last entry in the new params vector.
    """
    def misid_func(*args, **kwargs):
        all_params = args[0]
        p_misid = all_params[-1]
        args = list(args)
        args[0] = all_params[:-1]
        fs = func(*args, **kwargs)
        return apply_anc_state_misid(fs, p_misid)
    misid_func.__name__ = func.__name__ + '_misid'
    misid_func.__doc__ = func.__doc__
    return misid_func

def quadratic_grid(num_pts):
    """
    A nonuniform grid of points on [0,1] with a quadratic pattern of spacings.

    The grid is weighted to be denser near 0 and 1, which is useful for
    population genetic simulations. In between, it smoothly increases and
    then decreases the step size.
    """
    # Rounds down...
    small_pts = int(num_pts / 10)
    large_pts = num_pts - small_pts+1

    grid = numpy.linspace(0, 0.05, small_pts)

    # The interior grid spacings are a quadratic function of x, being
    # approximately x1 at the boundaries. To achieve this, our actual grid
    # positions must come from a cubic function.
    # Here I calculate and apply the appropriate conditions:
    #   x[q = 0] = x_start  =>  d = x_start
    #   dx/dq [q = 0] = dx_start => c = dx_start/dq
    #   x[q = 1] = 1
    #   dx/dq [q = 1] = dx_start
    
    q = numpy.linspace(0, 1, large_pts)
    dq = q[1] - q[0]
    x_start = grid[-1]
    dx_start = grid[-1] - grid[-2]

    d = x_start
    c = dx_start/dq
    b = -3*(-dq + dx_start + dq*x_start)/dq
    a = -2 * b/3
    grid = numpy.concatenate((grid[:-1], a*q**3 + b*q**2 + c*q + d))

    return grid

def estimate_best_exp_grid_crwd(ns):
    """
    Emperical "best" values for exponential grid crowding.

    These functional forms were estimated by running many simulations at
    different parameter values and asking when a simulation at pts_l = [max(ns),
    max(ns)+10, max(ns)+20] was most accurate.

    These cannot be considered absolute best values, as that may depend on the
    model. It does seem broadly true that the optimal value of crwd increases
    with system size, up to a point.
    """
    xx = numpy.mean(ns)
    if len(ns) == 1:
        return min(max(xx**0.4 / 1.3, 1), 9)
    elif len(ns) == 2:
        return min(max(1.5 * xx**0.4, 1), 8)
    else:
        raise ValueError('Due to computational expense, no optimum has been '
                         'determined for 3D or more models. Try sticking with '
                         'crwd=8.')

def exponential_grid(pts, crwd=8.):
    """
    An exponentially spaced grid. This is now the default grid.

    crwd controls the degree to which grid points crowd against x=0 or x=1.
    The value of crwd=8 seems to be a good default for integration with large
    systems. See estimate_best_exp_grid_crwd for some empirical optimizations
    beyond this.

    This grid was contributed by Simon Gravel.
    """
    unif = numpy.linspace(-1,1,pts)
    grid = 1./(1. + numpy.exp(-crwd*unif))

    # Normalize
    grid = (grid-grid[0])/(grid[-1]-grid[0])
    return grid

default_grid = exponential_grid

def end_point_first_derivs(xx):
    """
    Coefficients for a 5-point one-sided approximation of the first derivative.

    xx: grid on which the data to be differentiated lives

    Returns ret, a 2x5 array. ret[0] is the coefficients for an approximation
    of the derivative at xx[0]. It is used by deriv = numpy.dot(ret[0],
    yy[:5]). ret[1] is the coefficients for the derivative at xx[-1]. It can be
    used by deriv = dot(ret[1][::-1], yy[-5:]). (Note that we need to reverse
    the coefficient array here.
    """
    output = numpy.zeros((2,5))

    # These are the coefficients for a one-sided 1st derivative of f[0].
    # So f'[0] = d10_0 f[0] + d10_1 f[1] + d10_2 f[2] + d10_3 f[3] + d10_4 f[4]
    # This expression is 4th order accurate.
    # To derive it in Mathematica, use NDSolve`FiniteDifferenceDerivative[1, {xx[0], xx[1], xx[2], xx[3], xx[4]}, {f[0], f[1], f[2], f[3], f[4]}, DifferenceOrder -> 4][[1]] // Simplify
    d10_0 = (-1 + ((-1 + ((-2*xx[0] + xx[1] + xx[2]) * (-xx[0] + xx[3]))/ ((xx[0] - xx[1])*(-xx[0] + xx[2])))* (-xx[0] + xx[4]))/(-xx[0] + xx[3]))/ (-xx[0] + xx[4])
    d10_1 = -(((xx[0] - xx[2])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[1])*(xx[1] - xx[2])*(xx[1] - xx[3])* (xx[1] - xx[4])))
    d10_2 = ((xx[0] - xx[1])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[2])*(xx[1] - xx[2])*(xx[2] - xx[3])* (xx[2] - xx[4]))
    d10_3 = -(((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[4]))/ ((xx[0] - xx[3])*(-xx[1] + xx[3])* (-xx[2] + xx[3])*(xx[3] - xx[4])))
    d10_4 = ((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[3]))/ ((xx[0] - xx[4])*(xx[1] - xx[4])*(xx[2] - xx[4])* (xx[3] - xx[4]))

    output[0] = (d10_0, d10_1, d10_2, d10_3, d10_4)

    # These are the coefficients for a one-sided 1st derivative of f[-1].
    # So f'[-1] = d1m1_m1 f[-1] + d1m1_m2 f[-2] + d1m1_m3 f[-3] + d1m1_m4 f[-4]
    #             + d1m1_m5 f[-5]
    d1m1_m1 = (xx[-1]*((3*xx[-2] - 4*xx[-1])*xx[-1] + xx[-3]*(-2*xx[-2] + 3*xx[-1])) + xx[-4]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])) + xx[-5]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-4]*(xx[-3] + xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])))/ ((xx[-4] - xx[-1])*(-xx[-5] + xx[-1])* (-xx[-3] + xx[-1])*(-xx[-2] + xx[-1]))
    d1m1_m2 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-3] - xx[-1]))/ ((xx[-5] - xx[-2])*(-xx[-4] + xx[-2])*(-xx[-3] + xx[-2])*(xx[-2] - xx[-1]))
    d1m1_m3 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-3])*(-xx[-4] + xx[-3])* (xx[-3] - xx[-2])*(xx[-3] - xx[-1]))
    d1m1_m4 = ((xx[-5] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-4] - xx[-3])* (xx[-4] - xx[-2])*(xx[-4] - xx[-1])) 
    d1m1_m5 = ((xx[-4] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-5] - xx[-3])* (xx[-5] - xx[-2])*(-xx[-5] + xx[-1]))

    output[1] = (d1m1_m1, d1m1_m2, d1m1_m3, d1m1_m4, d1m1_m5)

    return output
#
def reverse_array(arr):
    """
    Reverse an array along all axes, so arr[i,j] -> arr[-(i+1),-(j+1)].
    """
    reverse_slice = tuple(slice(None, None, -1) for ii in arr.shape)
    return arr[reverse_slice]

def intersect_masks(m1, m2):
    """
    Versions of m1 and m2 that are masked where either m1 or m2 were masked.

    If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns
    m1 and m2 wrapped as masked_arrays with identical masks.
    """
    ma = numpy.ma
    if ma.isMaskedArray(m1) and ma.isMaskedArray(m2)\
       and numpy.all(m1.mask == m2.mask):
        return m1,m2

    if ma.isMaskedArray(m1) or ma.isMaskedArray(m2):
        joint_mask = ma.mask_or(ma.getmask(m1), ma.getmask(m2))

        import dadi
        m1 = dadi.Spectrum(m1, mask=joint_mask.copy())
        m2 = dadi.Spectrum(m2, mask=joint_mask.copy())
    return m1,m2

def trapz(yy, xx=None, dx=None, axis=-1):
    """
    Integrate yy(xx) along given axis using the composite trapezoidal rule.
    
    xx must be one-dimensional and len(xx) must equal yy.shape[axis].

    This is modified from the SciPy version to work with n-D yy and 1-D xx.
    """
    if (xx is None and dx is None)\
       or (xx is not None and dx is not None):
        raise ValueError('One and only one of xx or dx must be specified.')
    elif (xx is not None) and (dx is None):
        dx = numpy.diff(xx)
    yy = numpy.asanyarray(yy)
    nd = yy.ndim

    if yy.shape[axis] != (len(dx)+1):
        raise ValueError('Length of xx must be equal to length of yy along '
                         'specified axis. Here len(xx) = %i and '
                         'yy.shape[axis] = %i.' % (len(dx)+1, yy.shape[axis]))

    slice1 = [slice(None)]*nd
    slice2 = [slice(None)]*nd
    slice1[axis] = slice(1,None)
    slice2[axis] = slice(None,-1)
    sliceX = [numpy.newaxis]*nd
    sliceX[axis] = slice(None)
    slice1, slice2, sliceX = tuple(slice1), tuple(slice2), tuple(sliceX)

    return numpy.sum(dx[sliceX] * (yy[slice1]+yy[slice2])/2.0, axis=axis)

def make_extrap_func(func, extrap_x_l=None, extrap_log=False, fail_mag=10):
    """
    Generate a version of func that extrapolates to infinitely many gridpoints.

    func: A function that returns a single scalar or array and whose last
        non-keyword argument is 'pts': the number of default_grid points to use
        in calculation.  
    extrap_x_l: An explict list of x values to use for extrapolation. If not 
        provided, the extrapolation routine will look for '.extrap_x'
        attributes on the results of func. The method Spectrum.from_phi will
        add an extrap_x attribute to resulting Spectra, equal to the x-value
        of the first non-zero grid point. An explicit list is useful if you
        want to override this behavior for testing.
    fail_mag:  Simon Gravel noted that there can be numerical instabilities in
        extrapolation when working with large spectra that have very small
        entires (of order 1e-24). To avoid these instabilities, we ignore the 
        extrapolation values (and use the input result with the smallest x) 
        if the extrapolation is more than fail_mag orders of magnitude away
        from the smallest x input result.

    Returns a new function whose last argument is a list of numbers of grid
    points and that returns a result extrapolated to infinitely many grid
    points.
    """
    x_l_from_results = (extrap_x_l is None)

    def extrap_func(*args, **kwargs):
        # Separate pts (or pts_l) from arguments
        if 'pts' not in kwargs:
            other_args, pts_l = args[:-1], args[-1]
        else:
            other_args = args
            pts_l = kwargs['pts']
            del kwargs['pts']

        if 'no_extrap' in kwargs:
            no_extrap = True
            del kwargs['no_extrap']
        else:
            no_extrap = False

        if numpy.isscalar(pts_l):
            pts_l = [pts_l]

        # Create a sub-function that fixes all other arguments and only
        # takes in pts.
        partial_func = functools.partial(func, *other_args, **kwargs)

        ##
        ## The commented-out code implements distribution of fs calculations
        ## among multiple processors. Unfortunately, it doesn't work with
        ## iPython, because pickling functions is very fragile in the
        ## interactive interpreter. If we had some sort of data structure for
        ## defining models, then this would be much easier to implement. Note
        ## that there might still be issues on Windows systems, because of its
        ## poor-man's version of fork().
        ##
        #import cPickle, multiprocessing
        #try:
        #    # Test whether sub-function is picklable. If not, pool.map will
        #    # hang.
        #    cPickle.dumps(partial_func)
        #    pool = multiprocessing.Pool()
        #    result_l = pool.map(partial_func, pts_l)
        #    pool.close()
        #except (cPickle.PicklingError, TypeError):
        #    print('Function passed to extrap func must be picklable for '
        #          'multi-processor executation.')
        #    import sys
        #    sys.exit()

        result_l = list(map(partial_func, pts_l))
        if no_extrap:
            return result_l

        if x_l_from_results:
            try:
                x_l = [r.extrap_x for r in result_l]
            except AttributeError:
                raise ValueError("Extrapolation function error: No explicit extrapolation x_l provided, and results do not have 'extrap_x' attributes. If this is an FS extrapolation, check your from_phi method.")
        else:
            x_l = extrap_x_l

        if extrap_log:
            result_l = [numpy.log(r) for r in result_l]

        # Extrapolate
        if len(pts_l) == 1:
            ex_result = result_l[0]
        elif len(pts_l) == 2:
            ex_result = linear_extrap(result_l, x_l)
        elif len(pts_l) == 3:
            ex_result = quadratic_extrap(result_l, x_l)
        elif len(pts_l) == 4:
            ex_result = cubic_extrap(result_l, x_l)
        elif len(pts_l) == 5:
            ex_result = quartic_extrap(result_l, x_l)
        elif len(pts_l) == 6:
            ex_result = quintic_extrap(result_l, x_l)
        else:
            raise ValueError('Number of calculations to use for extrapolation '
                             'must be between 1 and 6')

        if extrap_log:
            ex_result = numpy.exp(ex_result)

        # Simon Gravel noted that there can be numerical instabilities in
        # extrapolation when working with large spectra that have very small
        # entires (of order 1e-24).
        # To avoid these instabilities, we ignore the extrapolation values
        # if it is too different from the input values.
        if len(pts_l) > 1:
            # Assume the best input value comes from the smallest grid.
            best_result = result_l[numpy.argmin(x_l)]
            if extrap_log:
                best_result = numpy.exp(best_result)

            # The extrapolation is deemed to have failed if it results in a
            # value more than fail_mag orders of magnitude away from the 
            # best input value.
            extrap_failed = abs(numpy.log10(ex_result/best_result)) > fail_mag
            if numpy.any(extrap_failed):
                logger.warning('Extrapolation may have failed. Check resulting '
                            'frequency spectrum for unexpected results.')

            # For entries that fail, use the "best" input result.
            ex_result[extrap_failed] = best_result[extrap_failed]

        try:
            ex_result.pop_ids = result_l[0].pop_ids
        except AttributeError:
            pass
        return ex_result

    extrap_func.__name__ = func.__name__
    extrap_func.__doc__ = func.__doc__

    return extrap_func

def make_extrap_log_func(func, extrap_x_l=None):
    """
    Generate a version of func that extrapolates to infinitely many gridpoints.

    Note that extrapolation here is done on the *log* of the function result,
    so this will fail if any returned values are < 0. It does seem to be better
    behaved for SFS calculation.

    func: A function whose last argument is the number of Numerics.default_grid 
          points to use in calculation and that returns a single scalar or 
          array.
    extrap_x_l: An explict list of x values to use for extrapolation. If not 
         provided, the extrapolation routine will look for '.extrap_x'
         attributes on the results of func. The method Spectrum.from_phi will
         add an extrap_x attribute to resulting Spectra, equal to the x-value
         of the first non-zero grid point. An explicit list is useful if you
         want to override this behavior for testing.

    Returns a new function whose last argument is a list of numbers of grid
    points and that returns a result extrapolated to infinitely many grid
    points.
    """
    return make_extrap_func(func, extrap_x_l=extrap_x_l, extrap_log=True)

_projection_cache = {}
def _lncomb(N,k):
    """
    Log of N choose k.
    """
    return gammaln(N+1) - gammaln(k+1) - gammaln(N-k+1)

def _cached_projection(proj_to, proj_from, hits):
    """
    Coefficients for projection from a different fs size.

    proj_to: Numper of samples to project down to.
    proj_from: Numper of samples to project from.
    hits: Number of derived alleles projecting from.
    """
    key = (proj_to, proj_from, hits)
    try:
        return _projection_cache[key]
    except KeyError:
        pass

    if numpy.isscalar(proj_to) and numpy.isscalar(proj_from)\
       and proj_from < proj_to:
        # Short-circuit calculation.
        contrib = numpy.zeros(proj_to+1)
    else:
        # We set numpy's error reporting so that it will ignore underflows, 
        # because those just imply that contrib is 0.
        previous_err_state = numpy.seterr(under='ignore', divide='raise',
                                          over='raise', invalid='raise')
        proj_hits = numpy.arange(proj_to+1)
        # For large sample sizes, we need to do the calculation in logs, and it
        # is accurate enough for small sizes as well.
        lncontrib = _lncomb(proj_to,proj_hits)
        lncontrib += _lncomb(proj_from-proj_to,hits-proj_hits)
        lncontrib -= _lncomb(proj_from, hits)
        contrib = numpy.exp(lncontrib)
        numpy.seterr(**previous_err_state)
    _projection_cache[key] = contrib
    return contrib

def array_from_file(fid, return_comments=False):
    """
    Read array from file.

    fid: string with file name to read from or an open file object.
    return_comments: If True, the return value is (fs, comments), where
                     comments is a list of strings containing the comments
                     from the file (without #'s).

    The file format is:
        # Any number of comment lines beginning with a '#'
        A single line containing N integers giving the dimensions of the fs
          array. So this line would be '5 5 3' for an SFS that was 5x5x3.
          (That would be 4x4x2 *samples*.)
        A single line giving the array elements. The order of elements is 
          e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
    """
    newfile = False
    # Try to read from fid. If we can't, assume it's something that we can
    # use to open a file.
    if not hasattr(fid, 'read'):
        newfile = True
        fid = open(fid, 'r')

    line = fid.readline()
    # Strip out the comments
    comments = []
    while line.startswith('#'):
        comments.append(line[1:].strip())
        line = fid.readline()

    # Read the shape of the data
    shape = tuple([int(d) for d in line.split()])

    data = numpy.fromfile(fid, count=numpy.product(shape), sep=' ')
    # fromfile returns a 1-d array. Reshape it to the proper form.
    data = data.reshape(*shape)

    # If we opened a new file, clean it up.
    if newfile:
        fid.close()

    if not return_comments:
        return data
    else:
        return data,comments

def array_to_file(data, fid, precision=16, comment_lines = []):
    """
    Write array to file.

    data: array to write
    fid: string with file name to write to or an open file object.
    precision: precision with which to write out entries of the SFS. (They 
               are formated via %.<p>g, where <p> is the precision.)
    comment lines: list of strings to be used as comment lines in the header
                   of the output file.

    The file format is:
        # Any number of comment lines beginning with a '#'
        A single line containing N integers giving the dimensions of the fs
          array. So this line would be '5 5 3' for an SFS that was 5x5x3.
          (That would be 4x4x2 *samples*.)
        A single line giving the array elements. The order of elements is 
          e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
    """
    # Open the file object.
    newfile = False
    if not hasattr(fid, 'write'):
        newfile = True
        fid = open(fid, 'w')

    # Write comments
    for line in comment_lines:
        fid.write('# ')
        fid.write(line.strip())
        fid.write(os.linesep)

    # Write out the shape of the fs
    for elem in data.shape:
        fid.write('%i ' % elem)
    fid.write(os.linesep)

    if hasattr(data, 'filled'):
        # Masked entries in the fs will go in as 'nan'
        data = data.filled()
    # Write to file
    data.tofile(fid, ' ', '%%.%ig' % precision)
    fid.write(os.linesep)

    # Close file
    if newfile:
        fid.close()

def linear_extrap(ys, xs):
    """
    Linearly extrapolate from two x,y pairs to x = 0.

    ys: y values from x,y pairs. Note that these can be arrays of values.
    xs: x values from x,y pairs. These should be scalars.

    Returns extrapolated y at x=0.
    """
    y1,y2 = ys
    x1,x2 = xs
    return (x2 * y1 - x1 * y2)/(x2 - x1)

def quadratic_extrap(ys, xs):
    """
    Quadratically extrapolate from three x,y pairs to x = 0.

    ys: y values from x,y pairs. Note that these can be arrays of values.
    xs: x values from x,y pairs. These should be scalars.

    Returns extrapolated y at x=0.
    """
    y1,y2,y3 = ys
    x1,x2,x3 = xs
    return x2*x3/((x1-x2)*(x1-x3)) * y1 + x1*x3/((x2-x1)*(x2-x3)) * y2\
            + x1*x2/((x3-x1)*(x3-x2)) * y3

Functions

def BetaBinomConvolution(i, n, alpha, beta, ploidy=2)

Returns the probability of observing i 'successes' across n beta binomial random variables with equal alpha, beta, and number of trials.

Expand source code
def BetaBinomConvolution(i,n,alpha,beta,ploidy=2):
    """
    Returns the probability of observing i 'successes' across
    n beta binomial random variables with equal alpha,
    beta, and number of trials.
    """
    res=0.0
    # Create list of BetaBinomln results, because list indexing
    # is much faster than the function call and hashing inside
    # the cached BetaBinomln. Caching in BetaBinomln is still
    # marginally useful, even after caching at this level.
    BetaBinomln_cache = [BetaBinomln(_,ploidy,alpha,beta) for _ in range(ploidy+1)]
    partitions = cached_part(i,n,maxval=ploidy)
    for prt in partitions:
        tmp=0.0
        # Partitions p have many repeated elements. It's faster
        # to count each unique element and use that, rather than 
        # iterating through all elements
        coeff = [prt.count(p) for p in range(ploidy+1)]
        for p in range(ploidy+1):
            tmp += BetaBinomln_cache[p]*coeff[p]
        tmp += multinomln(coeff)
        res += numpy.exp(tmp)
    return res
def BetaBinomln(i, n, a, b)
Expand source code
def BetaBinomln(i,n,a,b):
    if (i,n,a,b) not in _BetaBinomln_cache:
        _BetaBinomln_cache[i,n,a,b] = _lncomb(n,i) + betaln(i+a,n-i+b) - betaln(a,b)
    return _BetaBinomln_cache[i,n,a,b]
def apply_anc_state_misid(fs, p_misid)

Model ancestral state misidentification in a frequency spectrum.

fs: Input frequency spectrum. p_misid: Fraction of sites assumed to suffer from ancestral state misidentification.

Expand source code
def apply_anc_state_misid(fs, p_misid):
    """
    Model ancestral state misidentification in a frequency spectrum.

    fs: Input frequency spectrum.
    p_misid: Fraction of sites assumed to suffer from ancestral
             state misidentification.
    """
    return (1-p_misid)*fs + p_misid*reverse_array(fs)
def array_from_file(fid, return_comments=False)

Read array from file.

fid: string with file name to read from or an open file object. return_comments: If True, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s).

The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] …

Expand source code
def array_from_file(fid, return_comments=False):
    """
    Read array from file.

    fid: string with file name to read from or an open file object.
    return_comments: If True, the return value is (fs, comments), where
                     comments is a list of strings containing the comments
                     from the file (without #'s).

    The file format is:
        # Any number of comment lines beginning with a '#'
        A single line containing N integers giving the dimensions of the fs
          array. So this line would be '5 5 3' for an SFS that was 5x5x3.
          (That would be 4x4x2 *samples*.)
        A single line giving the array elements. The order of elements is 
          e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
    """
    newfile = False
    # Try to read from fid. If we can't, assume it's something that we can
    # use to open a file.
    if not hasattr(fid, 'read'):
        newfile = True
        fid = open(fid, 'r')

    line = fid.readline()
    # Strip out the comments
    comments = []
    while line.startswith('#'):
        comments.append(line[1:].strip())
        line = fid.readline()

    # Read the shape of the data
    shape = tuple([int(d) for d in line.split()])

    data = numpy.fromfile(fid, count=numpy.product(shape), sep=' ')
    # fromfile returns a 1-d array. Reshape it to the proper form.
    data = data.reshape(*shape)

    # If we opened a new file, clean it up.
    if newfile:
        fid.close()

    if not return_comments:
        return data
    else:
        return data,comments
def array_to_file(data, fid, precision=16, comment_lines=[])

Write array to file.

data: array to write fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.

g, where

is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file.

The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] …

Expand source code
def array_to_file(data, fid, precision=16, comment_lines = []):
    """
    Write array to file.

    data: array to write
    fid: string with file name to write to or an open file object.
    precision: precision with which to write out entries of the SFS. (They 
               are formated via %.<p>g, where <p> is the precision.)
    comment lines: list of strings to be used as comment lines in the header
                   of the output file.

    The file format is:
        # Any number of comment lines beginning with a '#'
        A single line containing N integers giving the dimensions of the fs
          array. So this line would be '5 5 3' for an SFS that was 5x5x3.
          (That would be 4x4x2 *samples*.)
        A single line giving the array elements. The order of elements is 
          e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
    """
    # Open the file object.
    newfile = False
    if not hasattr(fid, 'write'):
        newfile = True
        fid = open(fid, 'w')

    # Write comments
    for line in comment_lines:
        fid.write('# ')
        fid.write(line.strip())
        fid.write(os.linesep)

    # Write out the shape of the fs
    for elem in data.shape:
        fid.write('%i ' % elem)
    fid.write(os.linesep)

    if hasattr(data, 'filled'):
        # Masked entries in the fs will go in as 'nan'
        data = data.filled()
    # Write to file
    data.tofile(fid, ' ', '%%.%ig' % precision)
    fid.write(os.linesep)

    # Close file
    if newfile:
        fid.close()
def cached_part(x, n, minval=0, maxval=2)

Returns the integer partition summing to x with n entries and min and max equal to 0 and 2 (or ploidy level), respectively.

This version uses a cache to speed up repeated evaluations.

x: integer summand. n: number of partition entries. minval: minimum value allowed for partition entries. maxval: maximum value allowed for partition entries.

Expand source code
def cached_part(x,n,minval=0,maxval=2):
    """
    Returns the integer partition summing to x with n entries and
    min and max equal to 0 and 2 (or ploidy level), respectively.

    This version uses a cache to speed up repeated evaluations.
    
    x: integer summand.
    n: number of partition entries.
    minval: minimum value allowed for partition entries.
    maxval: maximum value allowed for partition entries.
    """
    if (x,n,minval,maxval) not in _part_cache:
        _part_cache[x,n,minval,maxval] = list(part(x,n,minval,maxval))
    return _part_cache[x,n,minval,maxval]
def default_grid(pts, crwd=8.0)

An exponentially spaced grid. This is now the default grid.

crwd controls the degree to which grid points crowd against x=0 or x=1. The value of crwd=8 seems to be a good default for integration with large systems. See estimate_best_exp_grid_crwd for some empirical optimizations beyond this.

This grid was contributed by Simon Gravel.

Expand source code
def exponential_grid(pts, crwd=8.):
    """
    An exponentially spaced grid. This is now the default grid.

    crwd controls the degree to which grid points crowd against x=0 or x=1.
    The value of crwd=8 seems to be a good default for integration with large
    systems. See estimate_best_exp_grid_crwd for some empirical optimizations
    beyond this.

    This grid was contributed by Simon Gravel.
    """
    unif = numpy.linspace(-1,1,pts)
    grid = 1./(1. + numpy.exp(-crwd*unif))

    # Normalize
    grid = (grid-grid[0])/(grid[-1]-grid[0])
    return grid
def end_point_first_derivs(xx)

Coefficients for a 5-point one-sided approximation of the first derivative.

xx: grid on which the data to be differentiated lives

Returns ret, a 2x5 array. ret[0] is the coefficients for an approximation of the derivative at xx[0]. It is used by deriv = numpy.dot(ret[0], yy[:5]). ret[1] is the coefficients for the derivative at xx[-1]. It can be used by deriv = dot(ret[1][::-1], yy[-5:]). (Note that we need to reverse the coefficient array here.

Expand source code
def end_point_first_derivs(xx):
    """
    Coefficients for a 5-point one-sided approximation of the first derivative.

    xx: grid on which the data to be differentiated lives

    Returns ret, a 2x5 array. ret[0] is the coefficients for an approximation
    of the derivative at xx[0]. It is used by deriv = numpy.dot(ret[0],
    yy[:5]). ret[1] is the coefficients for the derivative at xx[-1]. It can be
    used by deriv = dot(ret[1][::-1], yy[-5:]). (Note that we need to reverse
    the coefficient array here.
    """
    output = numpy.zeros((2,5))

    # These are the coefficients for a one-sided 1st derivative of f[0].
    # So f'[0] = d10_0 f[0] + d10_1 f[1] + d10_2 f[2] + d10_3 f[3] + d10_4 f[4]
    # This expression is 4th order accurate.
    # To derive it in Mathematica, use NDSolve`FiniteDifferenceDerivative[1, {xx[0], xx[1], xx[2], xx[3], xx[4]}, {f[0], f[1], f[2], f[3], f[4]}, DifferenceOrder -> 4][[1]] // Simplify
    d10_0 = (-1 + ((-1 + ((-2*xx[0] + xx[1] + xx[2]) * (-xx[0] + xx[3]))/ ((xx[0] - xx[1])*(-xx[0] + xx[2])))* (-xx[0] + xx[4]))/(-xx[0] + xx[3]))/ (-xx[0] + xx[4])
    d10_1 = -(((xx[0] - xx[2])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[1])*(xx[1] - xx[2])*(xx[1] - xx[3])* (xx[1] - xx[4])))
    d10_2 = ((xx[0] - xx[1])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[2])*(xx[1] - xx[2])*(xx[2] - xx[3])* (xx[2] - xx[4]))
    d10_3 = -(((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[4]))/ ((xx[0] - xx[3])*(-xx[1] + xx[3])* (-xx[2] + xx[3])*(xx[3] - xx[4])))
    d10_4 = ((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[3]))/ ((xx[0] - xx[4])*(xx[1] - xx[4])*(xx[2] - xx[4])* (xx[3] - xx[4]))

    output[0] = (d10_0, d10_1, d10_2, d10_3, d10_4)

    # These are the coefficients for a one-sided 1st derivative of f[-1].
    # So f'[-1] = d1m1_m1 f[-1] + d1m1_m2 f[-2] + d1m1_m3 f[-3] + d1m1_m4 f[-4]
    #             + d1m1_m5 f[-5]
    d1m1_m1 = (xx[-1]*((3*xx[-2] - 4*xx[-1])*xx[-1] + xx[-3]*(-2*xx[-2] + 3*xx[-1])) + xx[-4]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])) + xx[-5]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-4]*(xx[-3] + xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])))/ ((xx[-4] - xx[-1])*(-xx[-5] + xx[-1])* (-xx[-3] + xx[-1])*(-xx[-2] + xx[-1]))
    d1m1_m2 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-3] - xx[-1]))/ ((xx[-5] - xx[-2])*(-xx[-4] + xx[-2])*(-xx[-3] + xx[-2])*(xx[-2] - xx[-1]))
    d1m1_m3 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-3])*(-xx[-4] + xx[-3])* (xx[-3] - xx[-2])*(xx[-3] - xx[-1]))
    d1m1_m4 = ((xx[-5] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-4] - xx[-3])* (xx[-4] - xx[-2])*(xx[-4] - xx[-1])) 
    d1m1_m5 = ((xx[-4] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-5] - xx[-3])* (xx[-5] - xx[-2])*(-xx[-5] + xx[-1]))

    output[1] = (d1m1_m1, d1m1_m2, d1m1_m3, d1m1_m4, d1m1_m5)

    return output
def estimate_best_exp_grid_crwd(ns)

Emperical "best" values for exponential grid crowding.

These functional forms were estimated by running many simulations at different parameter values and asking when a simulation at pts_l = [max(ns), max(ns)+10, max(ns)+20] was most accurate.

These cannot be considered absolute best values, as that may depend on the model. It does seem broadly true that the optimal value of crwd increases with system size, up to a point.

Expand source code
def estimate_best_exp_grid_crwd(ns):
    """
    Emperical "best" values for exponential grid crowding.

    These functional forms were estimated by running many simulations at
    different parameter values and asking when a simulation at pts_l = [max(ns),
    max(ns)+10, max(ns)+20] was most accurate.

    These cannot be considered absolute best values, as that may depend on the
    model. It does seem broadly true that the optimal value of crwd increases
    with system size, up to a point.
    """
    xx = numpy.mean(ns)
    if len(ns) == 1:
        return min(max(xx**0.4 / 1.3, 1), 9)
    elif len(ns) == 2:
        return min(max(1.5 * xx**0.4, 1), 8)
    else:
        raise ValueError('Due to computational expense, no optimum has been '
                         'determined for 3D or more models. Try sticking with '
                         'crwd=8.')
def exponential_grid(pts, crwd=8.0)

An exponentially spaced grid. This is now the default grid.

crwd controls the degree to which grid points crowd against x=0 or x=1. The value of crwd=8 seems to be a good default for integration with large systems. See estimate_best_exp_grid_crwd for some empirical optimizations beyond this.

This grid was contributed by Simon Gravel.

Expand source code
def exponential_grid(pts, crwd=8.):
    """
    An exponentially spaced grid. This is now the default grid.

    crwd controls the degree to which grid points crowd against x=0 or x=1.
    The value of crwd=8 seems to be a good default for integration with large
    systems. See estimate_best_exp_grid_crwd for some empirical optimizations
    beyond this.

    This grid was contributed by Simon Gravel.
    """
    unif = numpy.linspace(-1,1,pts)
    grid = 1./(1. + numpy.exp(-crwd*unif))

    # Normalize
    grid = (grid-grid[0])/(grid[-1]-grid[0])
    return grid
def intersect_masks(m1, m2)

Versions of m1 and m2 that are masked where either m1 or m2 were masked.

If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns m1 and m2 wrapped as masked_arrays with identical masks.

Expand source code
def intersect_masks(m1, m2):
    """
    Versions of m1 and m2 that are masked where either m1 or m2 were masked.

    If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns
    m1 and m2 wrapped as masked_arrays with identical masks.
    """
    ma = numpy.ma
    if ma.isMaskedArray(m1) and ma.isMaskedArray(m2)\
       and numpy.all(m1.mask == m2.mask):
        return m1,m2

    if ma.isMaskedArray(m1) or ma.isMaskedArray(m2):
        joint_mask = ma.mask_or(ma.getmask(m1), ma.getmask(m2))

        import dadi
        m1 = dadi.Spectrum(m1, mask=joint_mask.copy())
        m2 = dadi.Spectrum(m2, mask=joint_mask.copy())
    return m1,m2
def linear_extrap(ys, xs)

Linearly extrapolate from two x,y pairs to x = 0.

ys: y values from x,y pairs. Note that these can be arrays of values. xs: x values from x,y pairs. These should be scalars.

Returns extrapolated y at x=0.

Expand source code
def linear_extrap(ys, xs):
    """
    Linearly extrapolate from two x,y pairs to x = 0.

    ys: y values from x,y pairs. Note that these can be arrays of values.
    xs: x values from x,y pairs. These should be scalars.

    Returns extrapolated y at x=0.
    """
    y1,y2 = ys
    x1,x2 = xs
    return (x2 * y1 - x1 * y2)/(x2 - x1)
def make_anc_state_misid_func(func)

Generate a version of func accounting for ancestral state misidentification.

func: The function to which misidentification should be incorporated. It is assumed that the first argument of the function is a params vector, to which the misidentification parameter will be added.

Returns a new function which takes in a params vector that is one entry longer than the original function. The fraction misidentification will be the last entry in the new params vector.

Expand source code
def make_anc_state_misid_func(func):
    """
    Generate a version of func accounting for ancestral state misidentification.

    func: The function to which misidentification should be incorporated. It
          is assumed that the first argument of the function is a params
          vector, to which the misidentification parameter will be added.

    Returns a new function which takes in a params vector that is one entry
    longer than the original function. The fraction misidentification will
    be the last entry in the new params vector.
    """
    def misid_func(*args, **kwargs):
        all_params = args[0]
        p_misid = all_params[-1]
        args = list(args)
        args[0] = all_params[:-1]
        fs = func(*args, **kwargs)
        return apply_anc_state_misid(fs, p_misid)
    misid_func.__name__ = func.__name__ + '_misid'
    misid_func.__doc__ = func.__doc__
    return misid_func
def make_extrap_func(func, extrap_x_l=None, extrap_log=False, fail_mag=10)

Generate a version of func that extrapolates to infinitely many gridpoints.

func: A function that returns a single scalar or array and whose last non-keyword argument is 'pts': the number of default_grid points to use in calculation.
extrap_x_l: An explict list of x values to use for extrapolation. If not provided, the extrapolation routine will look for '.extrap_x' attributes on the results of func. The method Spectrum.from_phi will add an extrap_x attribute to resulting Spectra, equal to the x-value of the first non-zero grid point. An explicit list is useful if you want to override this behavior for testing. fail_mag: Simon Gravel noted that there can be numerical instabilities in extrapolation when working with large spectra that have very small entires (of order 1e-24). To avoid these instabilities, we ignore the extrapolation values (and use the input result with the smallest x) if the extrapolation is more than fail_mag orders of magnitude away from the smallest x input result.

Returns a new function whose last argument is a list of numbers of grid points and that returns a result extrapolated to infinitely many grid points.

Expand source code
def make_extrap_func(func, extrap_x_l=None, extrap_log=False, fail_mag=10):
    """
    Generate a version of func that extrapolates to infinitely many gridpoints.

    func: A function that returns a single scalar or array and whose last
        non-keyword argument is 'pts': the number of default_grid points to use
        in calculation.  
    extrap_x_l: An explict list of x values to use for extrapolation. If not 
        provided, the extrapolation routine will look for '.extrap_x'
        attributes on the results of func. The method Spectrum.from_phi will
        add an extrap_x attribute to resulting Spectra, equal to the x-value
        of the first non-zero grid point. An explicit list is useful if you
        want to override this behavior for testing.
    fail_mag:  Simon Gravel noted that there can be numerical instabilities in
        extrapolation when working with large spectra that have very small
        entires (of order 1e-24). To avoid these instabilities, we ignore the 
        extrapolation values (and use the input result with the smallest x) 
        if the extrapolation is more than fail_mag orders of magnitude away
        from the smallest x input result.

    Returns a new function whose last argument is a list of numbers of grid
    points and that returns a result extrapolated to infinitely many grid
    points.
    """
    x_l_from_results = (extrap_x_l is None)

    def extrap_func(*args, **kwargs):
        # Separate pts (or pts_l) from arguments
        if 'pts' not in kwargs:
            other_args, pts_l = args[:-1], args[-1]
        else:
            other_args = args
            pts_l = kwargs['pts']
            del kwargs['pts']

        if 'no_extrap' in kwargs:
            no_extrap = True
            del kwargs['no_extrap']
        else:
            no_extrap = False

        if numpy.isscalar(pts_l):
            pts_l = [pts_l]

        # Create a sub-function that fixes all other arguments and only
        # takes in pts.
        partial_func = functools.partial(func, *other_args, **kwargs)

        ##
        ## The commented-out code implements distribution of fs calculations
        ## among multiple processors. Unfortunately, it doesn't work with
        ## iPython, because pickling functions is very fragile in the
        ## interactive interpreter. If we had some sort of data structure for
        ## defining models, then this would be much easier to implement. Note
        ## that there might still be issues on Windows systems, because of its
        ## poor-man's version of fork().
        ##
        #import cPickle, multiprocessing
        #try:
        #    # Test whether sub-function is picklable. If not, pool.map will
        #    # hang.
        #    cPickle.dumps(partial_func)
        #    pool = multiprocessing.Pool()
        #    result_l = pool.map(partial_func, pts_l)
        #    pool.close()
        #except (cPickle.PicklingError, TypeError):
        #    print('Function passed to extrap func must be picklable for '
        #          'multi-processor executation.')
        #    import sys
        #    sys.exit()

        result_l = list(map(partial_func, pts_l))
        if no_extrap:
            return result_l

        if x_l_from_results:
            try:
                x_l = [r.extrap_x for r in result_l]
            except AttributeError:
                raise ValueError("Extrapolation function error: No explicit extrapolation x_l provided, and results do not have 'extrap_x' attributes. If this is an FS extrapolation, check your from_phi method.")
        else:
            x_l = extrap_x_l

        if extrap_log:
            result_l = [numpy.log(r) for r in result_l]

        # Extrapolate
        if len(pts_l) == 1:
            ex_result = result_l[0]
        elif len(pts_l) == 2:
            ex_result = linear_extrap(result_l, x_l)
        elif len(pts_l) == 3:
            ex_result = quadratic_extrap(result_l, x_l)
        elif len(pts_l) == 4:
            ex_result = cubic_extrap(result_l, x_l)
        elif len(pts_l) == 5:
            ex_result = quartic_extrap(result_l, x_l)
        elif len(pts_l) == 6:
            ex_result = quintic_extrap(result_l, x_l)
        else:
            raise ValueError('Number of calculations to use for extrapolation '
                             'must be between 1 and 6')

        if extrap_log:
            ex_result = numpy.exp(ex_result)

        # Simon Gravel noted that there can be numerical instabilities in
        # extrapolation when working with large spectra that have very small
        # entires (of order 1e-24).
        # To avoid these instabilities, we ignore the extrapolation values
        # if it is too different from the input values.
        if len(pts_l) > 1:
            # Assume the best input value comes from the smallest grid.
            best_result = result_l[numpy.argmin(x_l)]
            if extrap_log:
                best_result = numpy.exp(best_result)

            # The extrapolation is deemed to have failed if it results in a
            # value more than fail_mag orders of magnitude away from the 
            # best input value.
            extrap_failed = abs(numpy.log10(ex_result/best_result)) > fail_mag
            if numpy.any(extrap_failed):
                logger.warning('Extrapolation may have failed. Check resulting '
                            'frequency spectrum for unexpected results.')

            # For entries that fail, use the "best" input result.
            ex_result[extrap_failed] = best_result[extrap_failed]

        try:
            ex_result.pop_ids = result_l[0].pop_ids
        except AttributeError:
            pass
        return ex_result

    extrap_func.__name__ = func.__name__
    extrap_func.__doc__ = func.__doc__

    return extrap_func
def make_extrap_log_func(func, extrap_x_l=None)

Generate a version of func that extrapolates to infinitely many gridpoints.

Note that extrapolation here is done on the log of the function result, so this will fail if any returned values are < 0. It does seem to be better behaved for SFS calculation.

func: A function whose last argument is the number of Numerics.default_grid points to use in calculation and that returns a single scalar or array. extrap_x_l: An explict list of x values to use for extrapolation. If not provided, the extrapolation routine will look for '.extrap_x' attributes on the results of func. The method Spectrum.from_phi will add an extrap_x attribute to resulting Spectra, equal to the x-value of the first non-zero grid point. An explicit list is useful if you want to override this behavior for testing.

Returns a new function whose last argument is a list of numbers of grid points and that returns a result extrapolated to infinitely many grid points.

Expand source code
def make_extrap_log_func(func, extrap_x_l=None):
    """
    Generate a version of func that extrapolates to infinitely many gridpoints.

    Note that extrapolation here is done on the *log* of the function result,
    so this will fail if any returned values are < 0. It does seem to be better
    behaved for SFS calculation.

    func: A function whose last argument is the number of Numerics.default_grid 
          points to use in calculation and that returns a single scalar or 
          array.
    extrap_x_l: An explict list of x values to use for extrapolation. If not 
         provided, the extrapolation routine will look for '.extrap_x'
         attributes on the results of func. The method Spectrum.from_phi will
         add an extrap_x attribute to resulting Spectra, equal to the x-value
         of the first non-zero grid point. An explicit list is useful if you
         want to override this behavior for testing.

    Returns a new function whose last argument is a list of numbers of grid
    points and that returns a result extrapolated to infinitely many grid
    points.
    """
    return make_extrap_func(func, extrap_x_l=extrap_x_l, extrap_log=True)
def multinomln(N)

Get the multinomial coefficient for an array N.

N: array of integers.

Expand source code
def multinomln(N):
    """
    Get the multinomial coefficient for an array N.
    
    N: array of integers.
    """
    if tuple(N) not in _multinomln_cache:
        N_sum = numpy.sum(N)
        res = gammaln(N_sum+1)
        for n in N:
            res -= gammaln(n+1)
        _multinomln_cache[tuple(N)] = res
    return _multinomln_cache[tuple(N)]
def part(x, n, minval=0, maxval=2)

Returns the integer partition summing to x with n entries and min and max equal to 0 and 2 (or ploidy level), respectively.

x: integer summand. n: number of partition entries. minval: minimum value allowed for partition entries. maxval: maximum value allowed for partition entries.

Expand source code
def part(x, n, minval=0, maxval=2):
    """
    Returns the integer partition summing to x with n entries and
    min and max equal to 0 and 2 (or ploidy level), respectively.
    
    x: integer summand.
    n: number of partition entries.
    minval: minimum value allowed for partition entries.
    maxval: maximum value allowed for partition entries.
    """
    if not n * minval <= x <= n * maxval:
        return
    elif n == 0:
        yield []
    else:
        for val in range(minval, maxval + 1):
            for p in part(x - val, n - 1, val, maxval):
                yield [val] + p
def quadratic_extrap(ys, xs)

Quadratically extrapolate from three x,y pairs to x = 0.

ys: y values from x,y pairs. Note that these can be arrays of values. xs: x values from x,y pairs. These should be scalars.

Returns extrapolated y at x=0.

Expand source code
def quadratic_extrap(ys, xs):
    """
    Quadratically extrapolate from three x,y pairs to x = 0.

    ys: y values from x,y pairs. Note that these can be arrays of values.
    xs: x values from x,y pairs. These should be scalars.

    Returns extrapolated y at x=0.
    """
    y1,y2,y3 = ys
    x1,x2,x3 = xs
    return x2*x3/((x1-x2)*(x1-x3)) * y1 + x1*x3/((x2-x1)*(x2-x3)) * y2\
            + x1*x2/((x3-x1)*(x3-x2)) * y3
def quadratic_grid(num_pts)

A nonuniform grid of points on [0,1] with a quadratic pattern of spacings.

The grid is weighted to be denser near 0 and 1, which is useful for population genetic simulations. In between, it smoothly increases and then decreases the step size.

Expand source code
def quadratic_grid(num_pts):
    """
    A nonuniform grid of points on [0,1] with a quadratic pattern of spacings.

    The grid is weighted to be denser near 0 and 1, which is useful for
    population genetic simulations. In between, it smoothly increases and
    then decreases the step size.
    """
    # Rounds down...
    small_pts = int(num_pts / 10)
    large_pts = num_pts - small_pts+1

    grid = numpy.linspace(0, 0.05, small_pts)

    # The interior grid spacings are a quadratic function of x, being
    # approximately x1 at the boundaries. To achieve this, our actual grid
    # positions must come from a cubic function.
    # Here I calculate and apply the appropriate conditions:
    #   x[q = 0] = x_start  =>  d = x_start
    #   dx/dq [q = 0] = dx_start => c = dx_start/dq
    #   x[q = 1] = 1
    #   dx/dq [q = 1] = dx_start
    
    q = numpy.linspace(0, 1, large_pts)
    dq = q[1] - q[0]
    x_start = grid[-1]
    dx_start = grid[-1] - grid[-2]

    d = x_start
    c = dx_start/dq
    b = -3*(-dq + dx_start + dq*x_start)/dq
    a = -2 * b/3
    grid = numpy.concatenate((grid[:-1], a*q**3 + b*q**2 + c*q + d))

    return grid
def reverse_array(arr)

Reverse an array along all axes, so arr[i,j] -> arr[-(i+1),-(j+1)].

Expand source code
def reverse_array(arr):
    """
    Reverse an array along all axes, so arr[i,j] -> arr[-(i+1),-(j+1)].
    """
    reverse_slice = tuple(slice(None, None, -1) for ii in arr.shape)
    return arr[reverse_slice]
def trapz(yy, xx=None, dx=None, axis=-1)

Integrate yy(xx) along given axis using the composite trapezoidal rule.

xx must be one-dimensional and len(xx) must equal yy.shape[axis].

This is modified from the SciPy version to work with n-D yy and 1-D xx.

Expand source code
def trapz(yy, xx=None, dx=None, axis=-1):
    """
    Integrate yy(xx) along given axis using the composite trapezoidal rule.
    
    xx must be one-dimensional and len(xx) must equal yy.shape[axis].

    This is modified from the SciPy version to work with n-D yy and 1-D xx.
    """
    if (xx is None and dx is None)\
       or (xx is not None and dx is not None):
        raise ValueError('One and only one of xx or dx must be specified.')
    elif (xx is not None) and (dx is None):
        dx = numpy.diff(xx)
    yy = numpy.asanyarray(yy)
    nd = yy.ndim

    if yy.shape[axis] != (len(dx)+1):
        raise ValueError('Length of xx must be equal to length of yy along '
                         'specified axis. Here len(xx) = %i and '
                         'yy.shape[axis] = %i.' % (len(dx)+1, yy.shape[axis]))

    slice1 = [slice(None)]*nd
    slice2 = [slice(None)]*nd
    slice1[axis] = slice(1,None)
    slice2[axis] = slice(None,-1)
    sliceX = [numpy.newaxis]*nd
    sliceX[axis] = slice(None)
    slice1, slice2, sliceX = tuple(slice1), tuple(slice2), tuple(sliceX)

    return numpy.sum(dx[sliceX] * (yy[slice1]+yy[slice2])/2.0, axis=axis)