Skip to content

Cache1D_mod

Initially developed by Bernard Kim and published as fitdadi. Kim, Huber, Lohmueller (2017) Genetics. Modified version of the script found at: https://doi.org/10.1534/genetics.116.197145. Based on scripts from: https://groups.google.com/forum/#!topic/dadi-user/4xspqlITcvc.

Cache1D(params, ns, demo_sel_func, pts, gamma_bounds=(0.0001, 2000), gamma_pts=500, additional_gammas=[], cpus=None, gpus=0, verbose=False)

Initialize the Cache1D object.

Parameters:

Name Type Description Default
params tuple

Optimized demographic parameters.

required
ns tuple

Sample size(s) for cached spectra.

required
demo_sel_func function

dadi demographic function with selection. Gamma must be the last argument.

required
pts tuple

Integration/extrapolation grid points settings for demo_sel_func.

required
gamma_bounds tuple

Range of gammas to cache spectra for. Defaults to (1e-4, 2000).

(0.0001, 2000)
gamma_pts int

Number of gamma grid points over which to integrate. Defaults to 500.

500
additional_gammas list

Additional positive values of gamma to cache for. Defaults to [].

[]
cpus int

Number of CPU jobs to launch for multiprocessing. Defaults to None.

None
gpus int

Number of GPU jobs to launch for multiprocessing. Defaults to 0.

0
verbose bool

If True, print messages to track progress of cache generation. Defaults to False.

False
Source code in dadi/DFE/Cache1D_mod.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(self, params, ns, demo_sel_func, pts, 
             gamma_bounds=(1e-4, 2000), gamma_pts=500, 
             additional_gammas=[],
             cpus=None, gpus=0, verbose=False):
    """
    Initialize the Cache1D object.

    Args:
        params (tuple): Optimized demographic parameters.

        ns (tuple): Sample size(s) for cached spectra.

        demo_sel_func (function): dadi demographic function with selection. 
                                  Gamma must be the last argument.

        pts (tuple): Integration/extrapolation grid points settings for demo_sel_func.

        gamma_bounds (tuple, optional): Range of gammas to cache spectra for. Defaults to (1e-4, 2000).

        gamma_pts (int, optional): Number of gamma grid points over which to integrate. Defaults to 500.

        additional_gammas (list, optional): Additional positive values of gamma to cache for. Defaults to [].

        cpus (int, optional): Number of CPU jobs to launch for multiprocessing. Defaults to None.

        gpus (int, optional): Number of GPU jobs to launch for multiprocessing. Defaults to 0.

        verbose (bool, optional): If True, print messages to track progress of cache generation. Defaults to False.
    """
    self.params, self.ns, self.pts = tuple(params), tuple(ns), tuple(pts)

    # Create a vector of gammas that are log-spaced over an interval
    self.gammas = -np.logspace(np.log10(gamma_bounds[1]),
                               np.log10(gamma_bounds[0]), gamma_pts)

    # Record negative gammas, for later use
    self.neg_gammas = self.gammas
    # Add additional gammas to cache
    self.gammas = np.concatenate((self.gammas, additional_gammas))
    self.spectra = [None]*len(self.gammas)
    self.params = params
    self.ns = ns
    self.pts = pts
    self.func_name = demo_sel_func.__name__

    if cpus is None:
        import multiprocessing
        cpus = multiprocessing.cpu_count()

    if not cpus > 1 or gpus > 0:  # For running with a single thread
        self._single_process(verbose, demo_sel_func)
    else:  # For running with multiple cores
        self._multiple_processes(cpus, gpus, verbose, demo_sel_func)

    demo_sel_extrap_func = Numerics.make_extrap_func(demo_sel_func)
    self.neu_spec = demo_sel_extrap_func(tuple(self.params)+(0,), self.ns, self.pts)
    self.spectra = np.array(self.spectra)

integrate(params, ns, sel_dist, theta, pts=None, exterior_int=True)

Integrate spectra over a univariate probability distribution for negative gammas.

Parameters:

Name Type Description Default
params tuple

Parameters for sel_dist.

required
ns tuple

Ignored. Will be retrieved from original caching.

required
sel_dist function

Univariate probability distribution, taking in arguments (xx, params).

required
theta float

Population-scaled mutation rate.

required
pts tuple

Ignored. Defaults to None, will use pts from original caching.

None
exterior_int bool

If False, do not integrate outside sampled domain. Defaults to True.

True

Returns:

Name Type Description
fs Spectrum

Integrated spectrum.

Source code in dadi/DFE/Cache1D_mod.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def integrate(self, params, ns, sel_dist, theta, pts=None, exterior_int=True):
    """
    Integrate spectra over a univariate probability distribution for negative gammas.

    Args:
        params (tuple): Parameters for sel_dist.

        ns (tuple): Ignored. Will be retrieved from original caching.

        sel_dist (function): Univariate probability distribution, taking in arguments (xx, params).

        theta (float): Population-scaled mutation rate.

        pts (tuple, optional): Ignored. Defaults to None, will use pts from original caching.

        exterior_int (bool, optional): If False, do not integrate outside sampled domain. Defaults to True.

    Returns:
        fs (Spectrum): Integrated spectrum.
    """
    # Restrict ourselves to negative gammas
    Nneg = len(self.neg_gammas)
    spectra = self.spectra[:Nneg]

    # Weights for integration
    weights = sel_dist(-self.neg_gammas, params)

    weighted_spectra = 0*spectra
    for ii, w in enumerate(weights):
        weighted_spectra[ii] = w*spectra[ii]

    fs = np.trapz(weighted_spectra, self.neg_gammas, axis=0)
    if not exterior_int:
        return Spectrum(theta*fs)

    smallest_gamma = self.neg_gammas[-1]
    largest_gamma = self.neg_gammas[0]
    # Compute weight for the effectively neutral portion. Not using
    # CDF function because want this to be able to compute weight
    # for arbitrary mass functions
    weight_neu, err_neu = scipy.integrate.quad(sel_dist, 0, -smallest_gamma,
                                               args=params)
    # compute weight for the effectively lethal portion
    weight_del, err = scipy.integrate.quad(sel_dist, -largest_gamma, np.inf,
                                           args=params)

    fs += self.neu_spec*weight_neu
    fs += spectra[0]*weight_del

    return Spectrum(theta*fs)

integrate_point_pos(params, ns, sel_dist, theta, demo_sel_func=None, Npos=1, pts=None, exterior_int=True)

Integrate spectra over a univariate probability distribution for negative gammas, plus one or more point masses of positive selection.

Parameters:

Name Type Description Default
params tuple

Parameters. The last Npos*2 are assumed to be the proportion of positive selection and the gamma for each point mass, in the order (ppos1, gammapos1, ppos2, gammapos2, ...). The remaining parameters are for the continuous point mass.

required
ns tuple

Ignored. Will be retrieved from original caching.

required
sel_dist function

Univariate probability distribution, taking in arguments (xx, params).

required
theta float

Population-scaled mutation rate.

required
demo_sel_func function

DaDi demographic function with selection. Defaults to None.

None
Npos int

Number of positive point masses to model. Defaults to 1.

1
pts tuple

Ignored. Defaults to None and will use pts from original caching.

None
exterior_int bool

If False, do not integrate outside sampled domain. Defaults to True.

True

Returns:

Name Type Description
fs Spectrum

Integrated spectrum.

Source code in dadi/DFE/Cache1D_mod.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def integrate_point_pos(self, params, ns, sel_dist, theta, demo_sel_func=None, 
                        Npos=1, pts=None, exterior_int=True):
    """
    Integrate spectra over a univariate probability distribution for negative gammas,
    plus one or more point masses of positive selection.

    Args:
        params (tuple): Parameters. The last Npos*2 are assumed to be the proportion of
                        positive selection and the gamma for each point mass, in the order
                        (ppos1, gammapos1, ppos2, gammapos2, ...).
                        The remaining parameters are for the continuous point mass.

        ns (tuple): Ignored. Will be retrieved from original caching.

        sel_dist (function): Univariate probability distribution, taking in arguments (xx, params).

        theta (float): Population-scaled mutation rate.

        demo_sel_func (function, optional): DaDi demographic function with selection. Defaults to None.

        Npos (int, optional): Number of positive point masses to model. Defaults to 1.

        pts (tuple, optional): Ignored. Defaults to None and will use pts from original caching.

        exterior_int (bool, optional): If False, do not integrate outside sampled domain. Defaults to True.

    Returns:
        fs (Spectrum): Integrated spectrum.
    """
    pdf_params, ppos, gammapos = params[:-2*Npos], params[-2], params[-1]
    ppos_l, gammapos_l = params[-2*Npos::2], params[-2*Npos+1::2]

    pdf_fs = self.integrate(pdf_params, None, sel_dist, theta, None,
                            exterior_int=exterior_int)
    result = (1-np.sum(ppos_l))*pdf_fs

    if demo_sel_func is not None:
        demo_sel_func = Numerics.make_extrap_func(demo_sel_func)

    for ppos, gammapos in zip(ppos_l, gammapos_l):
        if gammapos not in self.gammas:
            if demo_sel_func is None:
                raise IndexError('Failed to find requested gammapos={0:.4f} '
                                 'in Cache1D spectra. Was it included in '
                                 'additional_gammas during cache generation?'.format(gammapos))
            pos_fs = theta*demo_sel_func(tuple(self.params) + (gammapos,),
                                         self.ns, self.pts)
            self.gammas = np.append(self.gammas, gammapos)
            self.spectra = np.append(self.spectra, [pos_fs.data], axis=0)
        ii = list(self.gammas).index(gammapos)
        pos_fs = Spectrum(self.spectra[ii])
        result += ppos*pos_fs

    return result