Two Locus Model
import dadi
import dadi.TwoLocus
import numpy as np
Loading and manipulating two-locus spectrum
## 1. We'll first import a saved two-locus spectrum
# this was simulated/sampled with rho=1, nu=2.0, T=0.1
fs = dadi.TwoLocus.TLSpectrum.from_file('example_observed_fs.fs')
# get the sample size of the spectrum
ns = fs.sample_size
# fold the spectrum (ancestral state unknown)
fs_folded = fs.fold()
# project to a smaller sample size
fs_project = dadi.TwoLocus.numerics.project(fs, ns//2)
# compute mean r squared for observed variable sites
r2 = fs.mean_r2()
Calculating expected spectrum using demographic history functions
## 2. Create a two locus spectrum under a demographic history model
# two epoch model, with a recent size change of 2.0 for 0.1 (2*N_e) time units
nu,T = 2.0, 0.1
# sample size
ns = 20
# population size-scaled recombination rate between loci
rho = 1.0
# set grid points and time steps to integrate over
# we want the number of grid points to be larger than the sample size (here 20)
gridpts = [40,50,60]
# we also extrapolate over dt values, which improves accuracy
dts = [0.005,0.0025,0.001]
# note that the first time computing the equilibrium spectrum for a given
# dt and pts can take a while, which is why we cache those spectra
# Here we copy the precached spectra we have provided to the central cache
# location (by default, ~/.dadi/TwoLocus_cache). In general, a user won't need
# to do this. But if you are moving between machines, you may want to copy your
# cache over as well, to save substantial calculation time.
import glob, shutil
for fname in glob.glob('TwoLocus_cache/*.npz'):
shutil.copy(fname, dadi.TwoLocus.demographics.cache_path)
spectra = {}
for dt in dts:
spectra.setdefault(dt,{})
for pts in gridpts:
print('computing for dt, pts = {0}, {1}'.format(dt,pts))
spectra[dt][pts] = dadi.TwoLocus.demographics.two_epoch((nu,T), pts, ns, rho=rho, dt=dt)
# extrap_dt_pts takes a dictionary in the form spectra[dts][pts] and
# extrapolates the calculated spectra over dt and pts to output the expected
# frequency spectrum
F = dadi.TwoLocus.numerics.extrap_dt_pts(spectra)
# save this spectrum to working directory
F.to_file('./two_epoch_example.fs')
computing for dt, pts = 0.005, 40
computing for dt, pts = 0.005, 50
computing for dt, pts = 0.005, 60
computing for dt, pts = 0.0025, 40
computing for dt, pts = 0.0025, 50
computing for dt, pts = 0.0025, 60
computing for dt, pts = 0.001, 40
computing for dt, pts = 0.001, 50
computing for dt, pts = 0.001, 60
Inference demographic parameters from example spectrum
## 3. Fit a two-epoch model to the spectrum that we loaded in 1.
fs = dadi.TwoLocus.TLSpectrum.from_file('example_observed_fs.fs')
# The inference/optimize functions takes a list of two-locus frequency spectra,
# and a list of the rho values for each of the spectra in the list, in the same
# order as the spectra in the list. Here, we only are optimizing over a single
# frequency spectrum for rho = 1.
data_list = [fs]
rho_list = [1.0]
# we know that the sample spectrum was generated under a demography with
# nu = 2.0, T = 0.1, so for illustration purposes we pick initial values close
# to the true values
initial_guess = dadi.Misc.perturb_params([2.0,0.1], fold=0.2)
gridpts = [40,50,60]
dts = [0.005,0.0025,0.001]
# do_calculation computes the expected frequency spectrum for a given sample
# size and rho
def do_calculation(arguments):
params, pts, ns, rho, dt = arguments
# create spectrum for given param set, rho, etc
temps = {}
for dt in dts:
temps.setdefault(dt,{})
for numpts in pts:
temps[dt][numpts] = dadi.TwoLocus.demographics.two_epoch(params,numpts,ns,rho=rho,dt=dt)
return dadi.TwoLocus.TLSpectrum(dadi.TwoLocus.numerics.extrap_dt_pts(temps))
# optimize functions needs a function to output a list of spectra over the rho-values in the rhos list
def model_func(params, ns, pts, dts, rhos=[0]):
inputs = [(params,pts,ns,rho,dts) for rho in rhos]
output = [do_calculation(input) for input in inputs]
return output
opt_params = dadi.TwoLocus.inference.optimize_log_fmin(initial_guess, data_list, model_func, gridpts, dts, rhos=rho_list, verbose=1)
1 , -3413.32 , array([ 1.84088 , 0.093625 ])
2 , -3405.48 , array([ 1.89791 , 0.093625 ])
3 , -3430.38 , array([ 1.84088 , 0.083169 ])
4 , -3396.61 , array([ 1.89791 , 0.105396 ])
5 , -3396.23 , array([ 1.92709 , 0.118646 ])
6 , -3399.13 , array([ 1.98679 , 0.118646 ])
7 , -3446.44 , array([ 2.01734 , 0.150353 ])
8 , -3395.18 , array([ 1.92709 , 0.105396 ])
9 , -3398.65 , array([ 1.86918 , 0.105396 ])
10 , -3395.77 , array([ 1.89791 , 0.108562 ])
11 , -3401.33 , array([ 1.89791 , 0.0964382 ])
12 , -3394.74 , array([ 1.91975 , 0.112655 ])
13 , -3394.55 , array([ 1.94926 , 0.109369 ])
14 , -3394.83 , array([ 1.97546 , 0.109774 ])
15 , -3396.39 , array([ 1.94184 , 0.116902 ])
16 , -3394.67 , array([ 1.93077 , 0.108161 ])
17 , -3394.27 , array([ 1.96045 , 0.105006 ])
18 , -3394.26 , array([ 1.98112 , 0.101379 ])
19 , -3394.09 , array([ 2.0001 , 0.10251 ])
20 , -3393.92 , array([ 2.03569 , 0.0997966 ])
21 , -3395.43 , array([ 2.06896 , 0.0925057 ])
22 , -3394.09 , array([ 1.97852 , 0.104885 ])
23 , -3394.22 , array([ 2.03303 , 0.103248 ])
24 , -3393.95 , array([ 2.01992 , 0.102778 ])
25 , -3394.06 , array([ 2.07829 , 0.0977916 ])
26 , -3394.05 , array([ 2.05289 , 0.0995186 ])
27 , -3393.97 , array([ 2.00301 , 0.103065 ])
28 , -3394.22 , array([ 2.01536 , 0.102166 ])
29 , -3394.08 , array([ 2.02779 , 0.101276 ])
30 , -3393.97 , array([ 2.04427 , 0.0996575 ])
31 , -3393.96 , array([ 2.05224 , 0.0982017 ])
32 , -3393.97 , array([ 2.04362 , 0.0983388 ])