numerics

`advance1D(u, P)`

tridiag breakdown for integration along the diagonal boundary

Parameters:

Name	Type	Description	Default
`u`		density along that diagonal	required
`P`	`array`	transition matrix	required

Source code in dadi/Triallele/numerics.py

def advance1D(u,P):
    """
    tridiag breakdown for integration along the diagonal boundary

    Args:
        u (): density along that diagonal
        P (array): transition matrix
    """
    a = np.concatenate((np.array([0]),np.diag(P,-1)))
    b = np.diag(P)
    c = np.concatenate((np.diag(P,1),np.array([0])))
    u = dadi.Integration.tridiag.tridiag(a,b,c,u)
    return u

`advance_adi(U, U01, P1, P2, x, ii)`

Integrate the ADI components forward in time, alternating which direction occurs first

Parameters:

Name	Type	Description	Default
`U`	`array - like`	density function	required
`U01`	`array - like`	stores which points are in the domain	required
`P1`	`array`	transition matrix	required
`P2`	`array`	transition matrix	required
`x`	`array - like`	grid	required
`ii`	`int`	count of integration step	required

Source code in dadi/Triallele/numerics.py

def advance_adi(U,U01,P1,P2,x,ii):
    """
    Integrate the ADI components forward in time, alternating which direction occurs first

    Args:
        U (array-like): density function
        U01 (array-like): stores which points are in the domain
        P1 (array): transition matrix
        P2 (array): transition matrix
        x (array-like): grid
        ii (int): count of integration step
    """
    if np.mod(ii,2) == 0:
        for jj in range(len(x)):
            if np.sum(U01[:,jj]) > 1:
                U[:,jj] = dadi.Integration.tridiag.tridiag(P1[jj,0,:],P1[jj,1,:],P1[jj,2,:],U[:,jj])
        for ii in range(len(x)):
            if np.sum(U01[ii,:]) > 1:
                U[ii,:] = dadi.Integration.tridiag.tridiag(P2[ii,0,:],P2[ii,1,:],P2[ii,2,:],U[ii,:])
    else:
        for ii in range(len(x)):
            if np.sum(U01[ii,:]) > 1:
                U[ii,:] = dadi.Integration.tridiag.tridiag(P2[ii,0,:],P2[ii,1,:],P2[ii,2,:],U[ii,:])
        for jj in range(len(x)):
            if np.sum(U01[:,jj]) > 1:
                U[:,jj] = dadi.Integration.tridiag.tridiag(P1[jj,0,:],P1[jj,1,:],P1[jj,2,:],U[:,jj])
    return U

`advance_cov(U, C, x, dx)`

Explicit integration of the covariance term, using scipy's sparse matrix for C

Parameters:

Name	Type	Description	Default
`U`	`array - like`	density function	required
`C`	`array - like`	transition matrix	required
`x`	`array - like`	grid	required
`dx`	`array - like`	grid spacing	required

Returns:

Name	Type	Description
`U`	`array - like`	updated density function

Source code in dadi/Triallele/numerics.py

def advance_cov(U,C,x,dx):
    """
    Explicit integration of the covariance term, using scipy's sparse matrix for C

    Args:
        U (array-like): density function
        C (array-like): transition matrix
        x (array-like): grid
        dx (array-like): grid spacing

    Returns:
        U (array-like): updated density function
    """
    U = ( C * U.reshape(len(x)**2)).reshape(len(x),len(x))
    return U

`advance_line(x, phi, P)`

Integrate along the diagonal boundary. Density gets fixed along the boundary, and then diffuses along that boundary until being fixed in one of the two corners

Parameters:

Name	Type	Description	Default
`x`	`array - like`	grid	required
`phi`	`array - like`	density function	required
`P`	`array`	one dimensional transition matrix for the diagonal boundary	required

Source code in dadi/Triallele/numerics.py

def advance_line(x,phi,P):
    """
    Integrate along the diagonal boundary. Density gets fixed along the boundary, and then diffuses along that boundary until being fixed in one of the two corners

    Args:
        x (array-like): grid
        phi (array-like): density function
        P (array): one dimensional transition matrix for the diagonal boundary
    """
    u = np.diag(np.fliplr(phi))
    u = advance1D(u,P)
    for ii in range(len(x)):
        phi[ii,len(x) - ii - 1] = u[ii]
    return phi

`bivariate_lognormal_pdf(xx, params)`

mu_i = mu_yi and sigma_i = sigma_yi are the associated means and variances of the bivariate normal distr which gets exponentiated. We assume for our application that mu1=mu2 and sigma1=sigma2, though this isn't necessary for the general bivariate lognormal distribution.

Source code in dadi/Triallele/numerics.py

def bivariate_lognormal_pdf(xx, params):
    """
    mu_i = mu_yi and sigma_i = sigma_yi are the associated means and variances of the bivariate normal distr which gets exponentiated.
    We assume for our application that mu1=mu2 and sigma1=sigma2, though this isn't necessary for the general bivariate lognormal distribution.
    """
    mu1, mu2, sigma1, sigma2, rho = params
    norm = 1./(2 * np.pi * (sigma1*sigma2) * np.sqrt(1-rho**2) * np.outer(xx,xx) )
    q = 1/(1-rho**2) * ( ((np.log(xx[nuax,:])-mu1)/sigma1)**2 - 2*rho*((np.log(xx[nuax,:])-mu1)/sigma1)*((np.log(xx[:,nuax])-mu2)/sigma2) + ((np.log(xx[:,nuax])-mu2)/sigma2)**2 )
    prob = norm * np.exp( -q/2. )

    return prob

`cached_projection(proj_to, proj_from, hits)`

Coefficients for projection from a larger size to smaller

Parameters:

Name	Type	Description	Default
`proj_to`	`int`	Number of samples to project down to	required
`proj_from`	`int`	Number of samples to project from	required
`hits`	`int`	Number of derived alleles projecting from - tuple of (n1,n3)	required

Source code in dadi/Triallele/numerics.py

def cached_projection(proj_to,proj_from,hits):
    """
    Coefficients for projection from a larger size to smaller

    Args:
        proj_to (int): Number of samples to project down to
        proj_from (int): Number of samples to project from
        hits (int): Number of derived alleles projecting from - tuple of (n1,n3)
    """
    key = (proj_to, proj_from, hits)
    try:
        return projection_cache[key]
    except KeyError:
        pass

    X1, X2 = hits
    X3 = proj_from - X1 - X2
    proj_weights = np.zeros((proj_to+1,proj_to+1))
    for ii in range(X1+1):
        for jj in range(X2+1):
            kk = proj_to - ii - jj
            if kk > X3 or kk <0:
                continue
            f = ln_binomial(X1,ii) + ln_binomial(X2,jj) + ln_binomial(X3,kk) - ln_binomial(proj_from,proj_to)
            proj_weights[ii,jj] = np.exp(f)

    projection_cache[key] = proj_weights
    return proj_weights

`domain(x)`

Constructs a matrix with the same dimension as the density function discretization, a 1 indicates that the corresponding point is inside the triangular domain or on the boundary, while a 0 indicates that point falls outside the domain

Source code in dadi/Triallele/numerics.py

def domain(x):
    """
    Constructs a matrix with the same dimension as the density function discretization, a 1 indicates that the corresponding point is inside the triangular domain or on the boundary, while a 0 indicates that point falls outside the domain
    """
    tol = 1e-12
    U01 = np.ones((len(x),len(x)))
    XX = x[:,nuax] + x[nuax,:]
    U01[np.where(XX > 1+tol)] = 0
    return U01

`fold(spectrum)`

Note

This is now handled in the TriSpectrum class

Given a frequency spectrum over the full domain, fold into a spectrum with major and minor derived alleles

Source code in dadi/Triallele/numerics.py

def fold(spectrum):
    """
    Note: 
        This is now handled in the TriSpectrum class

    Given a frequency spectrum over the full domain, fold into a spectrum with major and minor derived alleles
    """
    spectrum = TriSpectrum(spectrum)
    if spectrum.folded_major == True:
        print("error: trying to fold a spectrum that is already folded")
        return spectrum
    else:
        spectrum = (spectrum + np.transpose(spectrum))
        for ii in range(len(spectrum)):
            spectrum[ii,ii] = spectrum[ii,ii]/2
        spectrum.mask[0,:] = True
        spectrum.mask[:,0] = True
        for ii in range(len(spectrum)):
            spectrum.mask[ii,ii+1:] = True
            spectrum.mask[ii,len(spectrum)-1-ii:] = True
        return spectrum

`fold_ancestral(F)`

Note

This is now handled by the TriSpectrum class.

Don't know ancestral state, so track minor frequencies. Store spectrum of two minor allele frequencies.

Source code in dadi/Triallele/numerics.py

def fold_ancestral(F):
    """
    Note: 
        This is now handled by the TriSpectrum class.

    Don't know ancestral state, so track minor frequencies.
    Store spectrum of two minor allele frequencies.
    """
    F_new = 0*F
    F_new = TriSpectrum(F_new)
    ns = len(F)-1
    for ii in range(ns):
        for jj in range(ns):
            kk = ns-ii-jj
            if F.mask[ii,jj] == True:
                continue
            elif ii <= kk and jj <= kk:
                if ii >= jj:
                    F_new[ii,jj] += F[ii,jj]
                else:
                    F_new[jj,ii] += F[ii,jj]
            elif ii > kk and jj <= kk:
                F_new[kk,jj] += F[ii,jj]
            elif ii <= kk and jj > kk:
                F_new[kk,ii] += F[ii,jj]
            else: # ii > kk and jj > kk
                if ii >= jj:
                    F_new[jj,kk] += F[ii,jj]
                else:
                    F_new[ii,kk] += F[ii,jj]
    # mask if not a valid entry for ancestrally folded spectrum
    for ii in range(ns):
        for jj in range(ns):
            kk = ns-ii-jj
            if not (kk>=ii>=jj):
                F_new.mask[ii,jj] = True
    return F_new

`grid_dx(x)`

We use uniform grids in x, using np.linspace(0,1,numpts) Grid spacing Delta, which is halved at the first and last grid points.

Source code in dadi/Triallele/numerics.py

def grid_dx(x):
    """
    We use uniform grids in x, using np.linspace(0,1,numpts)
    Grid spacing Delta, which is halved at the first and last grid points.
    """
    return (np.concatenate((np.diff(x),np.array([0]))) + np.concatenate((np.array([0]),np.diff(x))))/2

`grid_dx_2d(x, dx)`

The two dimensional grid spacing over the domain. Grid points lie along the diagonal boundary, and Delta for those points is halved

Source code in dadi/Triallele/numerics.py

def grid_dx_2d(x,dx):
    """
    The two dimensional grid spacing over the domain.
    Grid points lie along the diagonal boundary, and Delta for those points is halved
    """
    DXX = dx[:,nuax]*dx[nuax,:]
    for ii in range(len(x)):
        DXX[ii,len(x)-ii-1] *= 1./2
    return DXX

`int2(DXX, U)`

Integrate the density function over the domain

Parameters:

Name	Type	Description	Default
`DXX`	`array - like`	two dimensional grid	required
`U`		density function	required

Source code in dadi/Triallele/numerics.py

def int2(DXX,U):
    """
    Integrate the density function over the domain

    Args:
        DXX (array-like): two dimensional grid
        U (): density function
    """
    return np.sum(DXX*U)

`misidentification(F, p)`

Given folded spectrum, and probability p that one of the derived alleles is the actual ancestral allele Then refold to return folded spectrum

Source code in dadi/Triallele/numerics.py

def misidentification(F, p):
    """
    Given folded spectrum, and probability p that one of the derived alleles is the actual ancestral allele
    Then refold to return folded spectrum
    """
    F = TriF(np.zeros((len(F),len(F))))
    for ii in range(len(F))[1:-1]:
        for jj in range(len(F))[1:ii+1]:
            if ii+jj < len(F):
                F[ii,jj] += (1 - p) * F[ii,jj]
                F[len(F)-1-ii-jj,jj] += p/2. * F[ii,jj]
                F[ii,len(F)-1-ii-jj] += p/2. * F[ii,jj]

    return F.fold()

`move_density_to_bdry(x, phi, P)`

P tells us how much should be removed, by multiplying phi*P Take that density and instead of deleting it, move straight to boundary P - stores how much denstity from each grid point should be moved to diagonal

Source code in dadi/Triallele/numerics.py

def move_density_to_bdry(x,phi,P):
    """
    P tells us how much should be removed, by multiplying phi*P
    Take that density and instead of deleting it, move straight to boundary
    P - stores how much denstity from each grid point should be moved to diagonal
    """
    for ii in range(len(x)):
        for jj in range(len(x)):
            if P[ii,jj] == 0:
                continue
            else:
                amnt = P[ii,jj]
                s = ii+jj
                if ii == 1 and jj == len(x)-3:
                    phi[ii+1,jj] += phi[ii,jj]*amnt/4. * 2
                    phi[ii,jj+1] += phi[ii,jj]*amnt/4. * 2
                    phi[ii-1,jj] += phi[ii,jj]*amnt/2 * 2
                    phi[ii,jj] *= (1-amnt)
                elif ii == len(x)-3 and jj == 1:
                    phi[ii+1,jj] += phi[ii,jj]*amnt/4. * 2
                    phi[ii,jj+1] += phi[ii,jj]*amnt/4. * 2
                    phi[ii,jj-1] += phi[ii,jj]*amnt/2 * 2
                    phi[ii,jj] *= (1-amnt)
                elif (len(x)-1-s) % 2 == 1:
                    # split between two points
                    dist = (len(x)-1-s) // 2
                    phi[ii+dist+1,jj+dist] += phi[ii,jj]*amnt/2. * 2
                    phi[ii+dist,jj+dist+1] += phi[ii,jj]*amnt/2. * 2
                    phi[ii,jj] *= (1-amnt)
                else:
                    # straight to boundary grid point
                    dist = (len(x)-1-s) // 2
                    phi[ii+dist,jj+dist] += phi[ii,jj]*amnt * 2
                    phi[ii,jj] *= (1-amnt)

    return phi

`ms_demo_params_to_dadi(nu_ms, tau_ms)`

convert from ms parameters (which use current pop size) for nu and tau to dadi parameters (which use ancestral pop size)

Source code in dadi/Triallele/numerics.py

def ms_demo_params_to_dadi(nu_ms,tau_ms):
    """
    convert from ms parameters (which use current pop size) for nu and tau to dadi parameters (which use ancestral pop size)
    """
    return 1./nu_ms,2*tau_ms/nu_ms

`remove_diag_density_weights_nonneutral(x, dt, nu, sig1, sig2)`

Numerically determine the amount of density that should be lost to the diagonal boundary. Numerically integrate 1D array with initial point mass at z0, where z0 is the frequency x+y, integrated for time step dt. We then check the fraction of density that is lost to z=1. If sig1 or sig2 are nonzero, estimate the selection pressure on z as sig = sig1x/(x+y) + sig2y/(x+y)

Parameters:

Name	Type	Description	Default
`x`	`array - like`	grid.	required
`dt`	`float`	time step for integration.	required
`nu`	`float`	population size scaling factor.	required
`sig1`	`float`	population scaled selection coefficient for the first derived allele.	required
`sig2`	`float`	population scaled selection coefficients for the second derived allele.	required

Source code in dadi/Triallele/numerics.py

def remove_diag_density_weights_nonneutral(x,dt,nu,sig1,sig2):
    """
    Numerically determine the amount of density that should be lost to the diagonal boundary.
    Numerically integrate 1D array with initial point mass at z0, where z0 is the frequency x+y, integrated for time step dt.
    We then check the fraction of density that is lost to z=1.
    If sig1 or sig2 are nonzero, estimate the selection pressure on z as sig = sig1*x/(x+y) + sig2*y/(x+y)

    Args:
        x (array-like): grid.
        dt (float): time step for integration.
        nu (float): population size scaling factor.
        sig1 (float): population scaled selection coefficient for the first derived allele.
        sig2 (float): population scaled selection coefficients for the second derived allele.
    """
    dx = grid_dx(x)
    P = np.zeros((len(x),len(x)))
    for ii in range(len(x)):
        for jj in range(len(x)):
            if x[ii]+x[jj] < 1.0 and x[ii]+x[jj] > .75:
                sig = sig1*x[ii]/(x[ii]+x[jj]) + sig2*x[jj]/(x[ii]+x[jj])
                V,M = cached_transition1D(len(x)-1,sig)
                P1D = np.eye(len(x)) + dt*(V/nu+M)
                y = np.zeros(len(x))
                y[ii+jj] = 1./dx[ii+jj]
                y = advance1D(y,P1D)
                prob = y[-1]*dx[-1]
                P[ii,jj] = prob
                if ii+jj == len(x)-2:
                    if ii==1:
                        V,M = cached_transition1D(len(x)-1,sig1)
                        P1D = np.eye(len(x)) + dt*(V/nu+M)
                        y = np.zeros(len(x))
                        y[ii] = 1./dx[ii]
                        y = advance1D(y,P1D)
                        prob2 = y[0]*dx[0]
                        P[ii,jj] += prob2
                    elif jj==1:
                        V,M = cached_transition1D(len(x)-1,sig2)
                        P1D = np.eye(len(x)) + dt*(V/nu+M)
                        y = np.zeros(len(x))
                        y[jj] = 1./dx[jj]
                        y = advance1D(y,P1D)
                        prob2 = y[0]*dx[0]
                        P[ii,jj] += prob2

    P[:,0] = 0
    P[0,:] = 0
    return P

`sample(phi, ns, x)`

Obtain the expected sample frequency spectrum from the density function

Source code in dadi/Triallele/numerics.py

def sample(phi, ns, x):
    """
    Obtain the expected sample frequency spectrum from the density function
    """
    dx = grid_dx(x)
    DXX = grid_dx_2d(x,dx)

    # Assume ns is typically a list of length 1, if not, make it into one.
    try:
        ns = tuple(ns)
    except TypeError:
        ns = (ns,)

    # We cache calculations of several big matrices that will be re-used 
    # within and between integrations.
    key = (ns, tuple(x))
    if key not in sample_cache:
        this_cache = {}
        for ii in range(1,ns[0]-1):
            # Create our cache
            this_cache[ii] = (1-x[:,nuax]-x[nuax,:])**ii
            # Somewhat ugly hack to use negative values to store second array
            # to cache.
            this_cache[-ii] = x[nuax,:]**ii
        sample_cache[key] = this_cache
    else:
        this_cache = sample_cache[key]

    #dx = grid_dx(x)
    F = np.zeros((ns[0]+1,ns[0]+1))
    prod_phi = DXX*phi
    for ii in range(len(F)):
        prod_x = prod_phi * x[:,nuax]**ii
        for jj in range(len(F)):
            if ii+jj < ns[0] and ii != 0 and jj != 0:
                #F[ii,jj] = math.factorial(ns)/(math.factorial(ii)*math.factorial(jj)*math.factorial(ns-ii-jj)) * int2(x, dx, phi*x[:,nuax]**ii*x[nuax,:]**jj*(1-x[:,nuax]-x[nuax,:])**(ns-ii-jj) )
                F[ii,jj] = trinomial(ns[0],ii,jj) * np.sum(prod_x * this_cache[-jj] * this_cache[ns[0]-ii-jj])
    F = TriSpectrum(F)
    F.folded_major = False
    F.folded_ancestral = False
    F.extrap_x = x[1]
    return F

`trinomial(ns, ii, jj)`

Return ns!/(ii! * jj! * (ns-ii-jj)!) for large values

Source code in dadi/Triallele/numerics.py

def trinomial(ns,ii,jj):
    """
    Return ns!/(ii! * jj! * (ns-ii-jj)!) for large values
    """
    return np.exp(math.lgamma(ns+1) - math.lgamma(ii+1) - math.lgamma(jj+1) - math.lgamma(ns-ii-jj+1))

`univariate_lognormal_pdf(x, sigma, mu)`

Can compare to scipy.stats.lognorm.pdf(x,sigma,0,np.exp(mu))

Source code in dadi/Triallele/numerics.py

def univariate_lognormal_pdf(x,sigma,mu):
    """
    Can compare to scipy.stats.lognorm.pdf(x,sigma,0,np.exp(mu))
    """
    return 1./(x*sigma*np.sqrt(2*math.pi)) * np.exp ( -(np.log(x) - mu)**2 / (2*sigma**2) )