Skip to content

numerics

advance1D(u, P)

tridiag breakdown for integration along the diagonal boundary

Parameters:

Name Type Description Default
u

density along that diagonal

required
P array

transition matrix

required
Source code in dadi/Triallele/numerics.py
423
424
425
426
427
428
429
430
431
432
433
434
435
def advance1D(u,P):
    """
    tridiag breakdown for integration along the diagonal boundary

    Args:
        u (): density along that diagonal
        P (array): transition matrix
    """
    a = np.concatenate((np.array([0]),np.diag(P,-1)))
    b = np.diag(P)
    c = np.concatenate((np.diag(P,1),np.array([0])))
    u = dadi.Integration.tridiag.tridiag(a,b,c,u)
    return u

advance_adi(U, U01, P1, P2, x, ii)

Integrate the ADI components forward in time, alternating which direction occurs first

Parameters:

Name Type Description Default
U array - like

density function

required
U01 array - like

stores which points are in the domain

required
P1 array

transition matrix

required
P2 array

transition matrix

required
x array - like

grid

required
ii int

count of integration step

required
Source code in dadi/Triallele/numerics.py
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
def advance_adi(U,U01,P1,P2,x,ii):
    """
    Integrate the ADI components forward in time, alternating which direction occurs first

    Args:
        U (array-like): density function
        U01 (array-like): stores which points are in the domain
        P1 (array): transition matrix
        P2 (array): transition matrix
        x (array-like): grid
        ii (int): count of integration step
    """
    if np.mod(ii,2) == 0:
        for jj in range(len(x)):
            if np.sum(U01[:,jj]) > 1:
                U[:,jj] = dadi.Integration.tridiag.tridiag(P1[jj,0,:],P1[jj,1,:],P1[jj,2,:],U[:,jj])
        for ii in range(len(x)):
            if np.sum(U01[ii,:]) > 1:
                U[ii,:] = dadi.Integration.tridiag.tridiag(P2[ii,0,:],P2[ii,1,:],P2[ii,2,:],U[ii,:])
    else:
        for ii in range(len(x)):
            if np.sum(U01[ii,:]) > 1:
                U[ii,:] = dadi.Integration.tridiag.tridiag(P2[ii,0,:],P2[ii,1,:],P2[ii,2,:],U[ii,:])
        for jj in range(len(x)):
            if np.sum(U01[:,jj]) > 1:
                U[:,jj] = dadi.Integration.tridiag.tridiag(P1[jj,0,:],P1[jj,1,:],P1[jj,2,:],U[:,jj])
    return U

advance_cov(U, C, x, dx)

Explicit integration of the covariance term, using scipy's sparse matrix for C

Parameters:

Name Type Description Default
U array - like

density function

required
C array - like

transition matrix

required
x array - like

grid

required
dx array - like

grid spacing

required

Returns:

Name Type Description
U array - like

updated density function

Source code in dadi/Triallele/numerics.py
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
def advance_cov(U,C,x,dx):
    """
    Explicit integration of the covariance term, using scipy's sparse matrix for C

    Args:
        U (array-like): density function
        C (array-like): transition matrix
        x (array-like): grid
        dx (array-like): grid spacing

    Returns:
        U (array-like): updated density function
    """
    U = ( C * U.reshape(len(x)**2)).reshape(len(x),len(x))
    return U

advance_line(x, phi, P)

Integrate along the diagonal boundary. Density gets fixed along the boundary, and then diffuses along that boundary until being fixed in one of the two corners

Parameters:

Name Type Description Default
x array - like

grid

required
phi array - like

density function

required
P array

one dimensional transition matrix for the diagonal boundary

required
Source code in dadi/Triallele/numerics.py
437
438
439
440
441
442
443
444
445
446
447
448
449
450
def advance_line(x,phi,P):
    """
    Integrate along the diagonal boundary. Density gets fixed along the boundary, and then diffuses along that boundary until being fixed in one of the two corners

    Args:
        x (array-like): grid
        phi (array-like): density function
        P (array): one dimensional transition matrix for the diagonal boundary
    """
    u = np.diag(np.fliplr(phi))
    u = advance1D(u,P)
    for ii in range(len(x)):
        phi[ii,len(x) - ii - 1] = u[ii]
    return phi

bivariate_lognormal_pdf(xx, params)

mu_i = mu_yi and sigma_i = sigma_yi are the associated means and variances of the bivariate normal distr which gets exponentiated. We assume for our application that mu1=mu2 and sigma1=sigma2, though this isn't necessary for the general bivariate lognormal distribution.

Source code in dadi/Triallele/numerics.py
550
551
552
553
554
555
556
557
558
559
560
def bivariate_lognormal_pdf(xx, params):
    """
    mu_i = mu_yi and sigma_i = sigma_yi are the associated means and variances of the bivariate normal distr which gets exponentiated.
    We assume for our application that mu1=mu2 and sigma1=sigma2, though this isn't necessary for the general bivariate lognormal distribution.
    """
    mu1, mu2, sigma1, sigma2, rho = params
    norm = 1./(2 * np.pi * (sigma1*sigma2) * np.sqrt(1-rho**2) * np.outer(xx,xx) )
    q = 1/(1-rho**2) * ( ((np.log(xx[nuax,:])-mu1)/sigma1)**2 - 2*rho*((np.log(xx[nuax,:])-mu1)/sigma1)*((np.log(xx[:,nuax])-mu2)/sigma2) + ((np.log(xx[:,nuax])-mu2)/sigma2)**2 )
    prob = norm * np.exp( -q/2. )

    return prob

cached_projection(proj_to, proj_from, hits)

Coefficients for projection from a larger size to smaller

Parameters:

Name Type Description Default
proj_to int

Number of samples to project down to

required
proj_from int

Number of samples to project from

required
hits int

Number of derived alleles projecting from - tuple of (n1,n3)

required
Source code in dadi/Triallele/numerics.py
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
def cached_projection(proj_to,proj_from,hits):
    """
    Coefficients for projection from a larger size to smaller

    Args:
        proj_to (int): Number of samples to project down to
        proj_from (int): Number of samples to project from
        hits (int): Number of derived alleles projecting from - tuple of (n1,n3)
    """
    key = (proj_to, proj_from, hits)
    try:
        return projection_cache[key]
    except KeyError:
        pass

    X1, X2 = hits
    X3 = proj_from - X1 - X2
    proj_weights = np.zeros((proj_to+1,proj_to+1))
    for ii in range(X1+1):
        for jj in range(X2+1):
            kk = proj_to - ii - jj
            if kk > X3 or kk <0:
                continue
            f = ln_binomial(X1,ii) + ln_binomial(X2,jj) + ln_binomial(X3,kk) - ln_binomial(proj_from,proj_to)
            proj_weights[ii,jj] = np.exp(f)

    projection_cache[key] = proj_weights
    return proj_weights

domain(x)

Constructs a matrix with the same dimension as the density function discretization, a 1 indicates that the corresponding point is inside the triangular domain or on the boundary, while a 0 indicates that point falls outside the domain

Source code in dadi/Triallele/numerics.py
43
44
45
46
47
48
49
50
51
def domain(x):
    """
    Constructs a matrix with the same dimension as the density function discretization, a 1 indicates that the corresponding point is inside the triangular domain or on the boundary, while a 0 indicates that point falls outside the domain
    """
    tol = 1e-12
    U01 = np.ones((len(x),len(x)))
    XX = x[:,nuax] + x[nuax,:]
    U01[np.where(XX > 1+tol)] = 0
    return U01

fold(spectrum)

Note

This is now handled in the TriSpectrum class

Given a frequency spectrum over the full domain, fold into a spectrum with major and minor derived alleles

Source code in dadi/Triallele/numerics.py
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
def fold(spectrum):
    """
    Note: 
        This is now handled in the TriSpectrum class

    Given a frequency spectrum over the full domain, fold into a spectrum with major and minor derived alleles
    """
    spectrum = TriSpectrum(spectrum)
    if spectrum.folded_major == True:
        print("error: trying to fold a spectrum that is already folded")
        return spectrum
    else:
        spectrum = (spectrum + np.transpose(spectrum))
        for ii in range(len(spectrum)):
            spectrum[ii,ii] = spectrum[ii,ii]/2
        spectrum.mask[0,:] = True
        spectrum.mask[:,0] = True
        for ii in range(len(spectrum)):
            spectrum.mask[ii,ii+1:] = True
            spectrum.mask[ii,len(spectrum)-1-ii:] = True
        return spectrum

fold_ancestral(F)

Note

This is now handled by the TriSpectrum class.

Don't know ancestral state, so track minor frequencies. Store spectrum of two minor allele frequencies.

Source code in dadi/Triallele/numerics.py
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
def fold_ancestral(F):
    """
    Note: 
        This is now handled by the TriSpectrum class.

    Don't know ancestral state, so track minor frequencies.
    Store spectrum of two minor allele frequencies.
    """
    F_new = 0*F
    F_new = TriSpectrum(F_new)
    ns = len(F)-1
    for ii in range(ns):
        for jj in range(ns):
            kk = ns-ii-jj
            if F.mask[ii,jj] == True:
                continue
            elif ii <= kk and jj <= kk:
                if ii >= jj:
                    F_new[ii,jj] += F[ii,jj]
                else:
                    F_new[jj,ii] += F[ii,jj]
            elif ii > kk and jj <= kk:
                F_new[kk,jj] += F[ii,jj]
            elif ii <= kk and jj > kk:
                F_new[kk,ii] += F[ii,jj]
            else: # ii > kk and jj > kk
                if ii >= jj:
                    F_new[jj,kk] += F[ii,jj]
                else:
                    F_new[ii,kk] += F[ii,jj]
    # mask if not a valid entry for ancestrally folded spectrum
    for ii in range(ns):
        for jj in range(ns):
            kk = ns-ii-jj
            if not (kk>=ii>=jj):
                F_new.mask[ii,jj] = True
    return F_new

grid_dx(x)

We use uniform grids in x, using np.linspace(0,1,numpts) Grid spacing Delta, which is halved at the first and last grid points.

Source code in dadi/Triallele/numerics.py
16
17
18
19
20
21
def grid_dx(x):
    """
    We use uniform grids in x, using np.linspace(0,1,numpts)
    Grid spacing Delta, which is halved at the first and last grid points.
    """
    return (np.concatenate((np.diff(x),np.array([0]))) + np.concatenate((np.array([0]),np.diff(x))))/2

grid_dx_2d(x, dx)

The two dimensional grid spacing over the domain. Grid points lie along the diagonal boundary, and Delta for those points is halved

Source code in dadi/Triallele/numerics.py
23
24
25
26
27
28
29
30
31
def grid_dx_2d(x,dx):
    """
    The two dimensional grid spacing over the domain.
    Grid points lie along the diagonal boundary, and Delta for those points is halved
    """
    DXX = dx[:,nuax]*dx[nuax,:]
    for ii in range(len(x)):
        DXX[ii,len(x)-ii-1] *= 1./2
    return DXX

int2(DXX, U)

Integrate the density function over the domain

Parameters:

Name Type Description Default
DXX array - like

two dimensional grid

required
U

density function

required
Source code in dadi/Triallele/numerics.py
33
34
35
36
37
38
39
40
41
def int2(DXX,U):
    """
    Integrate the density function over the domain

    Args:
        DXX (array-like): two dimensional grid
        U (): density function
    """
    return np.sum(DXX*U)

misidentification(F, p)

Given folded spectrum, and probability p that one of the derived alleles is the actual ancestral allele Then refold to return folded spectrum

Source code in dadi/Triallele/numerics.py
507
508
509
510
511
512
513
514
515
516
517
518
519
520
def misidentification(F, p):
    """
    Given folded spectrum, and probability p that one of the derived alleles is the actual ancestral allele
    Then refold to return folded spectrum
    """
    F = TriF(np.zeros((len(F),len(F))))
    for ii in range(len(F))[1:-1]:
        for jj in range(len(F))[1:ii+1]:
            if ii+jj < len(F):
                F[ii,jj] += (1 - p) * F[ii,jj]
                F[len(F)-1-ii-jj,jj] += p/2. * F[ii,jj]
                F[ii,len(F)-1-ii-jj] += p/2. * F[ii,jj]

    return F.fold()

move_density_to_bdry(x, phi, P)

P tells us how much should be removed, by multiplying phi*P Take that density and instead of deleting it, move straight to boundary P - stores how much denstity from each grid point should be moved to diagonal

Source code in dadi/Triallele/numerics.py
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
def move_density_to_bdry(x,phi,P):
    """
    P tells us how much should be removed, by multiplying phi*P
    Take that density and instead of deleting it, move straight to boundary
    P - stores how much denstity from each grid point should be moved to diagonal
    """
    for ii in range(len(x)):
        for jj in range(len(x)):
            if P[ii,jj] == 0:
                continue
            else:
                amnt = P[ii,jj]
                s = ii+jj
                if ii == 1 and jj == len(x)-3:
                    phi[ii+1,jj] += phi[ii,jj]*amnt/4. * 2
                    phi[ii,jj+1] += phi[ii,jj]*amnt/4. * 2
                    phi[ii-1,jj] += phi[ii,jj]*amnt/2 * 2
                    phi[ii,jj] *= (1-amnt)
                elif ii == len(x)-3 and jj == 1:
                    phi[ii+1,jj] += phi[ii,jj]*amnt/4. * 2
                    phi[ii,jj+1] += phi[ii,jj]*amnt/4. * 2
                    phi[ii,jj-1] += phi[ii,jj]*amnt/2 * 2
                    phi[ii,jj] *= (1-amnt)
                elif (len(x)-1-s) % 2 == 1:
                    # split between two points
                    dist = (len(x)-1-s) // 2
                    phi[ii+dist+1,jj+dist] += phi[ii,jj]*amnt/2. * 2
                    phi[ii+dist,jj+dist+1] += phi[ii,jj]*amnt/2. * 2
                    phi[ii,jj] *= (1-amnt)
                else:
                    # straight to boundary grid point
                    dist = (len(x)-1-s) // 2
                    phi[ii+dist,jj+dist] += phi[ii,jj]*amnt * 2
                    phi[ii,jj] *= (1-amnt)

    return phi

ms_demo_params_to_dadi(nu_ms, tau_ms)

convert from ms parameters (which use current pop size) for nu and tau to dadi parameters (which use ancestral pop size)

Source code in dadi/Triallele/numerics.py
562
563
564
565
566
def ms_demo_params_to_dadi(nu_ms,tau_ms):
    """
    convert from ms parameters (which use current pop size) for nu and tau to dadi parameters (which use ancestral pop size)
    """
    return 1./nu_ms,2*tau_ms/nu_ms

remove_diag_density_weights_nonneutral(x, dt, nu, sig1, sig2)

Numerically determine the amount of density that should be lost to the diagonal boundary. Numerically integrate 1D array with initial point mass at z0, where z0 is the frequency x+y, integrated for time step dt. We then check the fraction of density that is lost to z=1. If sig1 or sig2 are nonzero, estimate the selection pressure on z as sig = sig1x/(x+y) + sig2y/(x+y)

Parameters:

Name Type Description Default
x array - like

grid.

required
dt float

time step for integration.

required
nu float

population size scaling factor.

required
sig1 float

population scaled selection coefficient for the first derived allele.

required
sig2 float

population scaled selection coefficients for the second derived allele.

required
Source code in dadi/Triallele/numerics.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
def remove_diag_density_weights_nonneutral(x,dt,nu,sig1,sig2):
    """
    Numerically determine the amount of density that should be lost to the diagonal boundary.
    Numerically integrate 1D array with initial point mass at z0, where z0 is the frequency x+y, integrated for time step dt.
    We then check the fraction of density that is lost to z=1.
    If sig1 or sig2 are nonzero, estimate the selection pressure on z as sig = sig1*x/(x+y) + sig2*y/(x+y)

    Args:
        x (array-like): grid.
        dt (float): time step for integration.
        nu (float): population size scaling factor.
        sig1 (float): population scaled selection coefficient for the first derived allele.
        sig2 (float): population scaled selection coefficients for the second derived allele.
    """
    dx = grid_dx(x)
    P = np.zeros((len(x),len(x)))
    for ii in range(len(x)):
        for jj in range(len(x)):
            if x[ii]+x[jj] < 1.0 and x[ii]+x[jj] > .75:
                sig = sig1*x[ii]/(x[ii]+x[jj]) + sig2*x[jj]/(x[ii]+x[jj])
                V,M = cached_transition1D(len(x)-1,sig)
                P1D = np.eye(len(x)) + dt*(V/nu+M)
                y = np.zeros(len(x))
                y[ii+jj] = 1./dx[ii+jj]
                y = advance1D(y,P1D)
                prob = y[-1]*dx[-1]
                P[ii,jj] = prob
                if ii+jj == len(x)-2:
                    if ii==1:
                        V,M = cached_transition1D(len(x)-1,sig1)
                        P1D = np.eye(len(x)) + dt*(V/nu+M)
                        y = np.zeros(len(x))
                        y[ii] = 1./dx[ii]
                        y = advance1D(y,P1D)
                        prob2 = y[0]*dx[0]
                        P[ii,jj] += prob2
                    elif jj==1:
                        V,M = cached_transition1D(len(x)-1,sig2)
                        P1D = np.eye(len(x)) + dt*(V/nu+M)
                        y = np.zeros(len(x))
                        y[jj] = 1./dx[jj]
                        y = advance1D(y,P1D)
                        prob2 = y[0]*dx[0]
                        P[ii,jj] += prob2

    P[:,0] = 0
    P[0,:] = 0
    return P

sample(phi, ns, x)

Obtain the expected sample frequency spectrum from the density function

Source code in dadi/Triallele/numerics.py
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
def sample(phi, ns, x):
    """
    Obtain the expected sample frequency spectrum from the density function
    """
    dx = grid_dx(x)
    DXX = grid_dx_2d(x,dx)

    # Assume ns is typically a list of length 1, if not, make it into one.
    try:
        ns = tuple(ns)
    except TypeError:
        ns = (ns,)

    # We cache calculations of several big matrices that will be re-used 
    # within and between integrations.
    key = (ns, tuple(x))
    if key not in sample_cache:
        this_cache = {}
        for ii in range(1,ns[0]-1):
            # Create our cache
            this_cache[ii] = (1-x[:,nuax]-x[nuax,:])**ii
            # Somewhat ugly hack to use negative values to store second array
            # to cache.
            this_cache[-ii] = x[nuax,:]**ii
        sample_cache[key] = this_cache
    else:
        this_cache = sample_cache[key]

    #dx = grid_dx(x)
    F = np.zeros((ns[0]+1,ns[0]+1))
    prod_phi = DXX*phi
    for ii in range(len(F)):
        prod_x = prod_phi * x[:,nuax]**ii
        for jj in range(len(F)):
            if ii+jj < ns[0] and ii != 0 and jj != 0:
                #F[ii,jj] = math.factorial(ns)/(math.factorial(ii)*math.factorial(jj)*math.factorial(ns-ii-jj)) * int2(x, dx, phi*x[:,nuax]**ii*x[nuax,:]**jj*(1-x[:,nuax]-x[nuax,:])**(ns-ii-jj) )
                F[ii,jj] = trinomial(ns[0],ii,jj) * np.sum(prod_x * this_cache[-jj] * this_cache[ns[0]-ii-jj])
    F = TriSpectrum(F)
    F.folded_major = False
    F.folded_ancestral = False
    F.extrap_x = x[1]
    return F

trinomial(ns, ii, jj)

Return ns!/(ii! * jj! * (ns-ii-jj)!) for large values

Source code in dadi/Triallele/numerics.py
498
499
500
501
502
def trinomial(ns,ii,jj):
    """
    Return ns!/(ii! * jj! * (ns-ii-jj)!) for large values
    """
    return np.exp(math.lgamma(ns+1) - math.lgamma(ii+1) - math.lgamma(jj+1) - math.lgamma(ns-ii-jj+1))

univariate_lognormal_pdf(x, sigma, mu)

Can compare to scipy.stats.lognorm.pdf(x,sigma,0,np.exp(mu))

Source code in dadi/Triallele/numerics.py
544
545
546
547
548
def univariate_lognormal_pdf(x,sigma,mu):
    """
    Can compare to scipy.stats.lognorm.pdf(x,sigma,0,np.exp(mu))
    """
    return 1./(x*sigma*np.sqrt(2*math.pi)) * np.exp ( -(np.log(x) - mu)**2 / (2*sigma**2) )