Module dadi.Numerics
Numerically useful functions, including extrapolation and default grid.
Expand source code
"""
Numerically useful functions, including extrapolation and default grid.
"""
import logging
logger = logging.getLogger('Numerics')
import functools, os
import numpy
# Account for difference in scipy installations.
from scipy.special import comb
from scipy.special import gammaln, betaln, beta
_multinomln_cache = {}
def multinomln(N):
"""
Get the multinomial coefficient for an array N.
N: array of integers.
"""
if tuple(N) not in _multinomln_cache:
N_sum = numpy.sum(N)
res = gammaln(N_sum+1)
for n in N:
res -= gammaln(n+1)
_multinomln_cache[tuple(N)] = res
return _multinomln_cache[tuple(N)]
_BetaBinomln_cache = {}
def BetaBinomln(i,n,a,b):
if (i,n,a,b) not in _BetaBinomln_cache:
_BetaBinomln_cache[i,n,a,b] = _lncomb(n,i) + betaln(i+a,n-i+b) - betaln(a,b)
return _BetaBinomln_cache[i,n,a,b]
_part_cache = {}
def cached_part(x,n,minval=0,maxval=2):
"""
Returns the integer partition summing to x with n entries and
min and max equal to 0 and 2 (or ploidy level), respectively.
This version uses a cache to speed up repeated evaluations.
x: integer summand.
n: number of partition entries.
minval: minimum value allowed for partition entries.
maxval: maximum value allowed for partition entries.
"""
if (x,n,minval,maxval) not in _part_cache:
_part_cache[x,n,minval,maxval] = list(part(x,n,minval,maxval))
return _part_cache[x,n,minval,maxval]
def part(x, n, minval=0, maxval=2):
"""
Returns the integer partition summing to x with n entries and
min and max equal to 0 and 2 (or ploidy level), respectively.
x: integer summand.
n: number of partition entries.
minval: minimum value allowed for partition entries.
maxval: maximum value allowed for partition entries.
"""
if not n * minval <= x <= n * maxval:
return
elif n == 0:
yield []
else:
for val in range(minval, maxval + 1):
for p in part(x - val, n - 1, val, maxval):
yield [val] + p
def BetaBinomConvolution(i,n,alpha,beta,ploidy=2):
"""
Returns the probability of observing i 'successes' across
n beta binomial random variables with equal alpha,
beta, and number of trials.
"""
res=0.0
# Create list of BetaBinomln results, because list indexing
# is much faster than the function call and hashing inside
# the cached BetaBinomln. Caching in BetaBinomln is still
# marginally useful, even after caching at this level.
BetaBinomln_cache = [BetaBinomln(_,ploidy,alpha,beta) for _ in range(ploidy+1)]
partitions = cached_part(i,n,maxval=ploidy)
for prt in partitions:
tmp=0.0
# Partitions p have many repeated elements. It's faster
# to count each unique element and use that, rather than
# iterating through all elements
coeff = [prt.count(p) for p in range(ploidy+1)]
for p in range(ploidy+1):
tmp += BetaBinomln_cache[p]*coeff[p]
tmp += multinomln(coeff)
res += numpy.exp(tmp)
return res
def apply_anc_state_misid(fs, p_misid):
"""
Model ancestral state misidentification in a frequency spectrum.
fs: Input frequency spectrum.
p_misid: Fraction of sites assumed to suffer from ancestral
state misidentification.
"""
return (1-p_misid)*fs + p_misid*reverse_array(fs)
def make_anc_state_misid_func(func):
"""
Generate a version of func accounting for ancestral state misidentification.
func: The function to which misidentification should be incorporated. It
is assumed that the first argument of the function is a params
vector, to which the misidentification parameter will be added.
Returns a new function which takes in a params vector that is one entry
longer than the original function. The fraction misidentification will
be the last entry in the new params vector.
"""
def misid_func(*args, **kwargs):
all_params = args[0]
p_misid = all_params[-1]
args = list(args)
args[0] = all_params[:-1]
fs = func(*args, **kwargs)
return apply_anc_state_misid(fs, p_misid)
misid_func.__name__ = func.__name__ + '_misid'
misid_func.__doc__ = func.__doc__
return misid_func
def quadratic_grid(num_pts):
"""
A nonuniform grid of points on [0,1] with a quadratic pattern of spacings.
The grid is weighted to be denser near 0 and 1, which is useful for
population genetic simulations. In between, it smoothly increases and
then decreases the step size.
"""
# Rounds down...
small_pts = int(num_pts / 10)
large_pts = num_pts - small_pts+1
grid = numpy.linspace(0, 0.05, small_pts)
# The interior grid spacings are a quadratic function of x, being
# approximately x1 at the boundaries. To achieve this, our actual grid
# positions must come from a cubic function.
# Here I calculate and apply the appropriate conditions:
# x[q = 0] = x_start => d = x_start
# dx/dq [q = 0] = dx_start => c = dx_start/dq
# x[q = 1] = 1
# dx/dq [q = 1] = dx_start
q = numpy.linspace(0, 1, large_pts)
dq = q[1] - q[0]
x_start = grid[-1]
dx_start = grid[-1] - grid[-2]
d = x_start
c = dx_start/dq
b = -3*(-dq + dx_start + dq*x_start)/dq
a = -2 * b/3
grid = numpy.concatenate((grid[:-1], a*q**3 + b*q**2 + c*q + d))
return grid
def estimate_best_exp_grid_crwd(ns):
"""
Emperical "best" values for exponential grid crowding.
These functional forms were estimated by running many simulations at
different parameter values and asking when a simulation at pts_l = [max(ns),
max(ns)+10, max(ns)+20] was most accurate.
These cannot be considered absolute best values, as that may depend on the
model. It does seem broadly true that the optimal value of crwd increases
with system size, up to a point.
"""
xx = numpy.mean(ns)
if len(ns) == 1:
return min(max(xx**0.4 / 1.3, 1), 9)
elif len(ns) == 2:
return min(max(1.5 * xx**0.4, 1), 8)
else:
raise ValueError('Due to computational expense, no optimum has been '
'determined for 3D or more models. Try sticking with '
'crwd=8.')
def exponential_grid(pts, crwd=8.):
"""
An exponentially spaced grid. This is now the default grid.
crwd controls the degree to which grid points crowd against x=0 or x=1.
The value of crwd=8 seems to be a good default for integration with large
systems. See estimate_best_exp_grid_crwd for some empirical optimizations
beyond this.
This grid was contributed by Simon Gravel.
"""
unif = numpy.linspace(-1,1,pts)
grid = 1./(1. + numpy.exp(-crwd*unif))
# Normalize
grid = (grid-grid[0])/(grid[-1]-grid[0])
return grid
default_grid = exponential_grid
def end_point_first_derivs(xx):
"""
Coefficients for a 5-point one-sided approximation of the first derivative.
xx: grid on which the data to be differentiated lives
Returns ret, a 2x5 array. ret[0] is the coefficients for an approximation
of the derivative at xx[0]. It is used by deriv = numpy.dot(ret[0],
yy[:5]). ret[1] is the coefficients for the derivative at xx[-1]. It can be
used by deriv = dot(ret[1][::-1], yy[-5:]). (Note that we need to reverse
the coefficient array here.
"""
output = numpy.zeros((2,5))
# These are the coefficients for a one-sided 1st derivative of f[0].
# So f'[0] = d10_0 f[0] + d10_1 f[1] + d10_2 f[2] + d10_3 f[3] + d10_4 f[4]
# This expression is 4th order accurate.
# To derive it in Mathematica, use NDSolve`FiniteDifferenceDerivative[1, {xx[0], xx[1], xx[2], xx[3], xx[4]}, {f[0], f[1], f[2], f[3], f[4]}, DifferenceOrder -> 4][[1]] // Simplify
d10_0 = (-1 + ((-1 + ((-2*xx[0] + xx[1] + xx[2]) * (-xx[0] + xx[3]))/ ((xx[0] - xx[1])*(-xx[0] + xx[2])))* (-xx[0] + xx[4]))/(-xx[0] + xx[3]))/ (-xx[0] + xx[4])
d10_1 = -(((xx[0] - xx[2])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[1])*(xx[1] - xx[2])*(xx[1] - xx[3])* (xx[1] - xx[4])))
d10_2 = ((xx[0] - xx[1])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[2])*(xx[1] - xx[2])*(xx[2] - xx[3])* (xx[2] - xx[4]))
d10_3 = -(((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[4]))/ ((xx[0] - xx[3])*(-xx[1] + xx[3])* (-xx[2] + xx[3])*(xx[3] - xx[4])))
d10_4 = ((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[3]))/ ((xx[0] - xx[4])*(xx[1] - xx[4])*(xx[2] - xx[4])* (xx[3] - xx[4]))
output[0] = (d10_0, d10_1, d10_2, d10_3, d10_4)
# These are the coefficients for a one-sided 1st derivative of f[-1].
# So f'[-1] = d1m1_m1 f[-1] + d1m1_m2 f[-2] + d1m1_m3 f[-3] + d1m1_m4 f[-4]
# + d1m1_m5 f[-5]
d1m1_m1 = (xx[-1]*((3*xx[-2] - 4*xx[-1])*xx[-1] + xx[-3]*(-2*xx[-2] + 3*xx[-1])) + xx[-4]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])) + xx[-5]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-4]*(xx[-3] + xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])))/ ((xx[-4] - xx[-1])*(-xx[-5] + xx[-1])* (-xx[-3] + xx[-1])*(-xx[-2] + xx[-1]))
d1m1_m2 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-3] - xx[-1]))/ ((xx[-5] - xx[-2])*(-xx[-4] + xx[-2])*(-xx[-3] + xx[-2])*(xx[-2] - xx[-1]))
d1m1_m3 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-3])*(-xx[-4] + xx[-3])* (xx[-3] - xx[-2])*(xx[-3] - xx[-1]))
d1m1_m4 = ((xx[-5] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-4] - xx[-3])* (xx[-4] - xx[-2])*(xx[-4] - xx[-1]))
d1m1_m5 = ((xx[-4] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-5] - xx[-3])* (xx[-5] - xx[-2])*(-xx[-5] + xx[-1]))
output[1] = (d1m1_m1, d1m1_m2, d1m1_m3, d1m1_m4, d1m1_m5)
return output
#
def reverse_array(arr):
"""
Reverse an array along all axes, so arr[i,j] -> arr[-(i+1),-(j+1)].
"""
reverse_slice = tuple(slice(None, None, -1) for ii in arr.shape)
return arr[reverse_slice]
def intersect_masks(m1, m2):
"""
Versions of m1 and m2 that are masked where either m1 or m2 were masked.
If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns
m1 and m2 wrapped as masked_arrays with identical masks.
"""
ma = numpy.ma
if ma.isMaskedArray(m1) and ma.isMaskedArray(m2)\
and numpy.all(m1.mask == m2.mask):
return m1,m2
if ma.isMaskedArray(m1) or ma.isMaskedArray(m2):
joint_mask = ma.mask_or(ma.getmask(m1), ma.getmask(m2))
import dadi
m1 = dadi.Spectrum(m1, mask=joint_mask.copy())
m2 = dadi.Spectrum(m2, mask=joint_mask.copy())
return m1,m2
def trapz(yy, xx=None, dx=None, axis=-1):
"""
Integrate yy(xx) along given axis using the composite trapezoidal rule.
xx must be one-dimensional and len(xx) must equal yy.shape[axis].
This is modified from the SciPy version to work with n-D yy and 1-D xx.
"""
if (xx is None and dx is None)\
or (xx is not None and dx is not None):
raise ValueError('One and only one of xx or dx must be specified.')
elif (xx is not None) and (dx is None):
dx = numpy.diff(xx)
yy = numpy.asanyarray(yy)
nd = yy.ndim
if yy.shape[axis] != (len(dx)+1):
raise ValueError('Length of xx must be equal to length of yy along '
'specified axis. Here len(xx) = %i and '
'yy.shape[axis] = %i.' % (len(dx)+1, yy.shape[axis]))
slice1 = [slice(None)]*nd
slice2 = [slice(None)]*nd
slice1[axis] = slice(1,None)
slice2[axis] = slice(None,-1)
sliceX = [numpy.newaxis]*nd
sliceX[axis] = slice(None)
slice1, slice2, sliceX = tuple(slice1), tuple(slice2), tuple(sliceX)
return numpy.sum(dx[sliceX] * (yy[slice1]+yy[slice2])/2.0, axis=axis)
def make_extrap_func(func, extrap_x_l=None, extrap_log=False, fail_mag=10):
"""
Generate a version of func that extrapolates to infinitely many gridpoints.
func: A function that returns a single scalar or array and whose last
non-keyword argument is 'pts': the number of default_grid points to use
in calculation.
extrap_x_l: An explict list of x values to use for extrapolation. If not
provided, the extrapolation routine will look for '.extrap_x'
attributes on the results of func. The method Spectrum.from_phi will
add an extrap_x attribute to resulting Spectra, equal to the x-value
of the first non-zero grid point. An explicit list is useful if you
want to override this behavior for testing.
fail_mag: Simon Gravel noted that there can be numerical instabilities in
extrapolation when working with large spectra that have very small
entires (of order 1e-24). To avoid these instabilities, we ignore the
extrapolation values (and use the input result with the smallest x)
if the extrapolation is more than fail_mag orders of magnitude away
from the smallest x input result.
Returns a new function whose last argument is a list of numbers of grid
points and that returns a result extrapolated to infinitely many grid
points.
"""
x_l_from_results = (extrap_x_l is None)
def extrap_func(*args, **kwargs):
# Separate pts (or pts_l) from arguments
if 'pts' not in kwargs:
other_args, pts_l = args[:-1], args[-1]
else:
other_args = args
pts_l = kwargs['pts']
del kwargs['pts']
if 'no_extrap' in kwargs:
no_extrap = True
del kwargs['no_extrap']
else:
no_extrap = False
if numpy.isscalar(pts_l):
pts_l = [pts_l]
# Create a sub-function that fixes all other arguments and only
# takes in pts.
partial_func = functools.partial(func, *other_args, **kwargs)
##
## The commented-out code implements distribution of fs calculations
## among multiple processors. Unfortunately, it doesn't work with
## iPython, because pickling functions is very fragile in the
## interactive interpreter. If we had some sort of data structure for
## defining models, then this would be much easier to implement. Note
## that there might still be issues on Windows systems, because of its
## poor-man's version of fork().
##
#import cPickle, multiprocessing
#try:
# # Test whether sub-function is picklable. If not, pool.map will
# # hang.
# cPickle.dumps(partial_func)
# pool = multiprocessing.Pool()
# result_l = pool.map(partial_func, pts_l)
# pool.close()
#except (cPickle.PicklingError, TypeError):
# print('Function passed to extrap func must be picklable for '
# 'multi-processor executation.')
# import sys
# sys.exit()
result_l = list(map(partial_func, pts_l))
if no_extrap:
return result_l
if x_l_from_results:
try:
x_l = [r.extrap_x for r in result_l]
except AttributeError:
raise ValueError("Extrapolation function error: No explicit extrapolation x_l provided, and results do not have 'extrap_x' attributes. If this is an FS extrapolation, check your from_phi method.")
else:
x_l = extrap_x_l
if extrap_log:
result_l = [numpy.log(r) for r in result_l]
# Extrapolate
if len(pts_l) == 1:
ex_result = result_l[0]
elif len(pts_l) == 2:
ex_result = linear_extrap(result_l, x_l)
elif len(pts_l) == 3:
ex_result = quadratic_extrap(result_l, x_l)
elif len(pts_l) == 4:
ex_result = cubic_extrap(result_l, x_l)
elif len(pts_l) == 5:
ex_result = quartic_extrap(result_l, x_l)
elif len(pts_l) == 6:
ex_result = quintic_extrap(result_l, x_l)
else:
raise ValueError('Number of calculations to use for extrapolation '
'must be between 1 and 6')
if extrap_log:
ex_result = numpy.exp(ex_result)
# Simon Gravel noted that there can be numerical instabilities in
# extrapolation when working with large spectra that have very small
# entires (of order 1e-24).
# To avoid these instabilities, we ignore the extrapolation values
# if it is too different from the input values.
if len(pts_l) > 1:
# Assume the best input value comes from the smallest grid.
best_result = result_l[numpy.argmin(x_l)]
if extrap_log:
best_result = numpy.exp(best_result)
# The extrapolation is deemed to have failed if it results in a
# value more than fail_mag orders of magnitude away from the
# best input value.
extrap_failed = abs(numpy.log10(ex_result/best_result)) > fail_mag
if numpy.any(extrap_failed):
logger.warning('Extrapolation may have failed. Check resulting '
'frequency spectrum for unexpected results.')
# For entries that fail, use the "best" input result.
ex_result[extrap_failed] = best_result[extrap_failed]
try:
ex_result.pop_ids = result_l[0].pop_ids
except AttributeError:
pass
return ex_result
extrap_func.__name__ = func.__name__
extrap_func.__doc__ = func.__doc__
return extrap_func
def make_extrap_log_func(func, extrap_x_l=None):
"""
Generate a version of func that extrapolates to infinitely many gridpoints.
Note that extrapolation here is done on the *log* of the function result,
so this will fail if any returned values are < 0. It does seem to be better
behaved for SFS calculation.
func: A function whose last argument is the number of Numerics.default_grid
points to use in calculation and that returns a single scalar or
array.
extrap_x_l: An explict list of x values to use for extrapolation. If not
provided, the extrapolation routine will look for '.extrap_x'
attributes on the results of func. The method Spectrum.from_phi will
add an extrap_x attribute to resulting Spectra, equal to the x-value
of the first non-zero grid point. An explicit list is useful if you
want to override this behavior for testing.
Returns a new function whose last argument is a list of numbers of grid
points and that returns a result extrapolated to infinitely many grid
points.
"""
return make_extrap_func(func, extrap_x_l=extrap_x_l, extrap_log=True)
_projection_cache = {}
def _lncomb(N,k):
"""
Log of N choose k.
"""
return gammaln(N+1) - gammaln(k+1) - gammaln(N-k+1)
def _cached_projection(proj_to, proj_from, hits):
"""
Coefficients for projection from a different fs size.
proj_to: Numper of samples to project down to.
proj_from: Numper of samples to project from.
hits: Number of derived alleles projecting from.
"""
key = (proj_to, proj_from, hits)
try:
return _projection_cache[key]
except KeyError:
pass
if numpy.isscalar(proj_to) and numpy.isscalar(proj_from)\
and proj_from < proj_to:
# Short-circuit calculation.
contrib = numpy.zeros(proj_to+1)
else:
# We set numpy's error reporting so that it will ignore underflows,
# because those just imply that contrib is 0.
previous_err_state = numpy.seterr(under='ignore', divide='raise',
over='raise', invalid='raise')
proj_hits = numpy.arange(proj_to+1)
# For large sample sizes, we need to do the calculation in logs, and it
# is accurate enough for small sizes as well.
lncontrib = _lncomb(proj_to,proj_hits)
lncontrib += _lncomb(proj_from-proj_to,hits-proj_hits)
lncontrib -= _lncomb(proj_from, hits)
contrib = numpy.exp(lncontrib)
numpy.seterr(**previous_err_state)
_projection_cache[key] = contrib
return contrib
def array_from_file(fid, return_comments=False):
"""
Read array from file.
fid: string with file name to read from or an open file object.
return_comments: If True, the return value is (fs, comments), where
comments is a list of strings containing the comments
from the file (without #'s).
The file format is:
# Any number of comment lines beginning with a '#'
A single line containing N integers giving the dimensions of the fs
array. So this line would be '5 5 3' for an SFS that was 5x5x3.
(That would be 4x4x2 *samples*.)
A single line giving the array elements. The order of elements is
e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
"""
newfile = False
# Try to read from fid. If we can't, assume it's something that we can
# use to open a file.
if not hasattr(fid, 'read'):
newfile = True
fid = open(fid, 'r')
line = fid.readline()
# Strip out the comments
comments = []
while line.startswith('#'):
comments.append(line[1:].strip())
line = fid.readline()
# Read the shape of the data
shape = tuple([int(d) for d in line.split()])
data = numpy.fromfile(fid, count=numpy.product(shape), sep=' ')
# fromfile returns a 1-d array. Reshape it to the proper form.
data = data.reshape(*shape)
# If we opened a new file, clean it up.
if newfile:
fid.close()
if not return_comments:
return data
else:
return data,comments
def array_to_file(data, fid, precision=16, comment_lines = []):
"""
Write array to file.
data: array to write
fid: string with file name to write to or an open file object.
precision: precision with which to write out entries of the SFS. (They
are formated via %.<p>g, where <p> is the precision.)
comment lines: list of strings to be used as comment lines in the header
of the output file.
The file format is:
# Any number of comment lines beginning with a '#'
A single line containing N integers giving the dimensions of the fs
array. So this line would be '5 5 3' for an SFS that was 5x5x3.
(That would be 4x4x2 *samples*.)
A single line giving the array elements. The order of elements is
e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
"""
# Open the file object.
newfile = False
if not hasattr(fid, 'write'):
newfile = True
fid = open(fid, 'w')
# Write comments
for line in comment_lines:
fid.write('# ')
fid.write(line.strip())
fid.write(os.linesep)
# Write out the shape of the fs
for elem in data.shape:
fid.write('%i ' % elem)
fid.write(os.linesep)
if hasattr(data, 'filled'):
# Masked entries in the fs will go in as 'nan'
data = data.filled()
# Write to file
data.tofile(fid, ' ', '%%.%ig' % precision)
fid.write(os.linesep)
# Close file
if newfile:
fid.close()
def linear_extrap(ys, xs):
"""
Linearly extrapolate from two x,y pairs to x = 0.
ys: y values from x,y pairs. Note that these can be arrays of values.
xs: x values from x,y pairs. These should be scalars.
Returns extrapolated y at x=0.
"""
y1,y2 = ys
x1,x2 = xs
return (x2 * y1 - x1 * y2)/(x2 - x1)
def quadratic_extrap(ys, xs):
"""
Quadratically extrapolate from three x,y pairs to x = 0.
ys: y values from x,y pairs. Note that these can be arrays of values.
xs: x values from x,y pairs. These should be scalars.
Returns extrapolated y at x=0.
"""
y1,y2,y3 = ys
x1,x2,x3 = xs
return x2*x3/((x1-x2)*(x1-x3)) * y1 + x1*x3/((x2-x1)*(x2-x3)) * y2\
+ x1*x2/((x3-x1)*(x3-x2)) * y3
Functions
def BetaBinomConvolution(i, n, alpha, beta, ploidy=2)
-
Returns the probability of observing i 'successes' across n beta binomial random variables with equal alpha, beta, and number of trials.
Expand source code
def BetaBinomConvolution(i,n,alpha,beta,ploidy=2): """ Returns the probability of observing i 'successes' across n beta binomial random variables with equal alpha, beta, and number of trials. """ res=0.0 # Create list of BetaBinomln results, because list indexing # is much faster than the function call and hashing inside # the cached BetaBinomln. Caching in BetaBinomln is still # marginally useful, even after caching at this level. BetaBinomln_cache = [BetaBinomln(_,ploidy,alpha,beta) for _ in range(ploidy+1)] partitions = cached_part(i,n,maxval=ploidy) for prt in partitions: tmp=0.0 # Partitions p have many repeated elements. It's faster # to count each unique element and use that, rather than # iterating through all elements coeff = [prt.count(p) for p in range(ploidy+1)] for p in range(ploidy+1): tmp += BetaBinomln_cache[p]*coeff[p] tmp += multinomln(coeff) res += numpy.exp(tmp) return res
def BetaBinomln(i, n, a, b)
-
Expand source code
def BetaBinomln(i,n,a,b): if (i,n,a,b) not in _BetaBinomln_cache: _BetaBinomln_cache[i,n,a,b] = _lncomb(n,i) + betaln(i+a,n-i+b) - betaln(a,b) return _BetaBinomln_cache[i,n,a,b]
def apply_anc_state_misid(fs, p_misid)
-
Model ancestral state misidentification in a frequency spectrum.
fs: Input frequency spectrum. p_misid: Fraction of sites assumed to suffer from ancestral state misidentification.
Expand source code
def apply_anc_state_misid(fs, p_misid): """ Model ancestral state misidentification in a frequency spectrum. fs: Input frequency spectrum. p_misid: Fraction of sites assumed to suffer from ancestral state misidentification. """ return (1-p_misid)*fs + p_misid*reverse_array(fs)
def array_from_file(fid, return_comments=False)
-
Read array from file.
fid: string with file name to read from or an open file object. return_comments: If True, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s).
The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] …
Expand source code
def array_from_file(fid, return_comments=False): """ Read array from file. fid: string with file name to read from or an open file object. return_comments: If True, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s). The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... """ newfile = False # Try to read from fid. If we can't, assume it's something that we can # use to open a file. if not hasattr(fid, 'read'): newfile = True fid = open(fid, 'r') line = fid.readline() # Strip out the comments comments = [] while line.startswith('#'): comments.append(line[1:].strip()) line = fid.readline() # Read the shape of the data shape = tuple([int(d) for d in line.split()]) data = numpy.fromfile(fid, count=numpy.product(shape), sep=' ') # fromfile returns a 1-d array. Reshape it to the proper form. data = data.reshape(*shape) # If we opened a new file, clean it up. if newfile: fid.close() if not return_comments: return data else: return data,comments
def array_to_file(data, fid, precision=16, comment_lines=[])
-
Write array to file.
data: array to write fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.
g, where
is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file.
The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] …
Expand source code
def array_to_file(data, fid, precision=16, comment_lines = []): """ Write array to file. data: array to write fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.<p>g, where <p> is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... """ # Open the file object. newfile = False if not hasattr(fid, 'write'): newfile = True fid = open(fid, 'w') # Write comments for line in comment_lines: fid.write('# ') fid.write(line.strip()) fid.write(os.linesep) # Write out the shape of the fs for elem in data.shape: fid.write('%i ' % elem) fid.write(os.linesep) if hasattr(data, 'filled'): # Masked entries in the fs will go in as 'nan' data = data.filled() # Write to file data.tofile(fid, ' ', '%%.%ig' % precision) fid.write(os.linesep) # Close file if newfile: fid.close()
def cached_part(x, n, minval=0, maxval=2)
-
Returns the integer partition summing to x with n entries and min and max equal to 0 and 2 (or ploidy level), respectively.
This version uses a cache to speed up repeated evaluations.
x: integer summand. n: number of partition entries. minval: minimum value allowed for partition entries. maxval: maximum value allowed for partition entries.
Expand source code
def cached_part(x,n,minval=0,maxval=2): """ Returns the integer partition summing to x with n entries and min and max equal to 0 and 2 (or ploidy level), respectively. This version uses a cache to speed up repeated evaluations. x: integer summand. n: number of partition entries. minval: minimum value allowed for partition entries. maxval: maximum value allowed for partition entries. """ if (x,n,minval,maxval) not in _part_cache: _part_cache[x,n,minval,maxval] = list(part(x,n,minval,maxval)) return _part_cache[x,n,minval,maxval]
def default_grid(pts, crwd=8.0)
-
An exponentially spaced grid. This is now the default grid.
crwd controls the degree to which grid points crowd against x=0 or x=1. The value of crwd=8 seems to be a good default for integration with large systems. See estimate_best_exp_grid_crwd for some empirical optimizations beyond this.
This grid was contributed by Simon Gravel.
Expand source code
def exponential_grid(pts, crwd=8.): """ An exponentially spaced grid. This is now the default grid. crwd controls the degree to which grid points crowd against x=0 or x=1. The value of crwd=8 seems to be a good default for integration with large systems. See estimate_best_exp_grid_crwd for some empirical optimizations beyond this. This grid was contributed by Simon Gravel. """ unif = numpy.linspace(-1,1,pts) grid = 1./(1. + numpy.exp(-crwd*unif)) # Normalize grid = (grid-grid[0])/(grid[-1]-grid[0]) return grid
def end_point_first_derivs(xx)
-
Coefficients for a 5-point one-sided approximation of the first derivative.
xx: grid on which the data to be differentiated lives
Returns ret, a 2x5 array. ret[0] is the coefficients for an approximation of the derivative at xx[0]. It is used by deriv = numpy.dot(ret[0], yy[:5]). ret[1] is the coefficients for the derivative at xx[-1]. It can be used by deriv = dot(ret[1][::-1], yy[-5:]). (Note that we need to reverse the coefficient array here.
Expand source code
def end_point_first_derivs(xx): """ Coefficients for a 5-point one-sided approximation of the first derivative. xx: grid on which the data to be differentiated lives Returns ret, a 2x5 array. ret[0] is the coefficients for an approximation of the derivative at xx[0]. It is used by deriv = numpy.dot(ret[0], yy[:5]). ret[1] is the coefficients for the derivative at xx[-1]. It can be used by deriv = dot(ret[1][::-1], yy[-5:]). (Note that we need to reverse the coefficient array here. """ output = numpy.zeros((2,5)) # These are the coefficients for a one-sided 1st derivative of f[0]. # So f'[0] = d10_0 f[0] + d10_1 f[1] + d10_2 f[2] + d10_3 f[3] + d10_4 f[4] # This expression is 4th order accurate. # To derive it in Mathematica, use NDSolve`FiniteDifferenceDerivative[1, {xx[0], xx[1], xx[2], xx[3], xx[4]}, {f[0], f[1], f[2], f[3], f[4]}, DifferenceOrder -> 4][[1]] // Simplify d10_0 = (-1 + ((-1 + ((-2*xx[0] + xx[1] + xx[2]) * (-xx[0] + xx[3]))/ ((xx[0] - xx[1])*(-xx[0] + xx[2])))* (-xx[0] + xx[4]))/(-xx[0] + xx[3]))/ (-xx[0] + xx[4]) d10_1 = -(((xx[0] - xx[2])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[1])*(xx[1] - xx[2])*(xx[1] - xx[3])* (xx[1] - xx[4]))) d10_2 = ((xx[0] - xx[1])*(xx[0] - xx[3])*(xx[0] - xx[4]))/ ((xx[0] - xx[2])*(xx[1] - xx[2])*(xx[2] - xx[3])* (xx[2] - xx[4])) d10_3 = -(((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[4]))/ ((xx[0] - xx[3])*(-xx[1] + xx[3])* (-xx[2] + xx[3])*(xx[3] - xx[4]))) d10_4 = ((xx[0] - xx[1])*(xx[0] - xx[2])*(xx[0] - xx[3]))/ ((xx[0] - xx[4])*(xx[1] - xx[4])*(xx[2] - xx[4])* (xx[3] - xx[4])) output[0] = (d10_0, d10_1, d10_2, d10_3, d10_4) # These are the coefficients for a one-sided 1st derivative of f[-1]. # So f'[-1] = d1m1_m1 f[-1] + d1m1_m2 f[-2] + d1m1_m3 f[-3] + d1m1_m4 f[-4] # + d1m1_m5 f[-5] d1m1_m1 = (xx[-1]*((3*xx[-2] - 4*xx[-1])*xx[-1] + xx[-3]*(-2*xx[-2] + 3*xx[-1])) + xx[-4]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])) + xx[-5]*(xx[-3]*(xx[-2] - 2*xx[-1]) + xx[-4]*(xx[-3] + xx[-2] - 2*xx[-1]) + xx[-1]*(-2*xx[-2] + 3*xx[-1])))/ ((xx[-4] - xx[-1])*(-xx[-5] + xx[-1])* (-xx[-3] + xx[-1])*(-xx[-2] + xx[-1])) d1m1_m2 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-3] - xx[-1]))/ ((xx[-5] - xx[-2])*(-xx[-4] + xx[-2])*(-xx[-3] + xx[-2])*(xx[-2] - xx[-1])) d1m1_m3 = ((xx[-5] - xx[-1])*(xx[-4] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-3])*(-xx[-4] + xx[-3])* (xx[-3] - xx[-2])*(xx[-3] - xx[-1])) d1m1_m4 = ((xx[-5] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-4] - xx[-3])* (xx[-4] - xx[-2])*(xx[-4] - xx[-1])) d1m1_m5 = ((xx[-4] - xx[-1])*(xx[-3] - xx[-1])* (xx[-2] - xx[-1]))/ ((xx[-5] - xx[-4])*(xx[-5] - xx[-3])* (xx[-5] - xx[-2])*(-xx[-5] + xx[-1])) output[1] = (d1m1_m1, d1m1_m2, d1m1_m3, d1m1_m4, d1m1_m5) return output
def estimate_best_exp_grid_crwd(ns)
-
Emperical "best" values for exponential grid crowding.
These functional forms were estimated by running many simulations at different parameter values and asking when a simulation at pts_l = [max(ns), max(ns)+10, max(ns)+20] was most accurate.
These cannot be considered absolute best values, as that may depend on the model. It does seem broadly true that the optimal value of crwd increases with system size, up to a point.
Expand source code
def estimate_best_exp_grid_crwd(ns): """ Emperical "best" values for exponential grid crowding. These functional forms were estimated by running many simulations at different parameter values and asking when a simulation at pts_l = [max(ns), max(ns)+10, max(ns)+20] was most accurate. These cannot be considered absolute best values, as that may depend on the model. It does seem broadly true that the optimal value of crwd increases with system size, up to a point. """ xx = numpy.mean(ns) if len(ns) == 1: return min(max(xx**0.4 / 1.3, 1), 9) elif len(ns) == 2: return min(max(1.5 * xx**0.4, 1), 8) else: raise ValueError('Due to computational expense, no optimum has been ' 'determined for 3D or more models. Try sticking with ' 'crwd=8.')
def exponential_grid(pts, crwd=8.0)
-
An exponentially spaced grid. This is now the default grid.
crwd controls the degree to which grid points crowd against x=0 or x=1. The value of crwd=8 seems to be a good default for integration with large systems. See estimate_best_exp_grid_crwd for some empirical optimizations beyond this.
This grid was contributed by Simon Gravel.
Expand source code
def exponential_grid(pts, crwd=8.): """ An exponentially spaced grid. This is now the default grid. crwd controls the degree to which grid points crowd against x=0 or x=1. The value of crwd=8 seems to be a good default for integration with large systems. See estimate_best_exp_grid_crwd for some empirical optimizations beyond this. This grid was contributed by Simon Gravel. """ unif = numpy.linspace(-1,1,pts) grid = 1./(1. + numpy.exp(-crwd*unif)) # Normalize grid = (grid-grid[0])/(grid[-1]-grid[0]) return grid
def intersect_masks(m1, m2)
-
Versions of m1 and m2 that are masked where either m1 or m2 were masked.
If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns m1 and m2 wrapped as masked_arrays with identical masks.
Expand source code
def intersect_masks(m1, m2): """ Versions of m1 and m2 that are masked where either m1 or m2 were masked. If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns m1 and m2 wrapped as masked_arrays with identical masks. """ ma = numpy.ma if ma.isMaskedArray(m1) and ma.isMaskedArray(m2)\ and numpy.all(m1.mask == m2.mask): return m1,m2 if ma.isMaskedArray(m1) or ma.isMaskedArray(m2): joint_mask = ma.mask_or(ma.getmask(m1), ma.getmask(m2)) import dadi m1 = dadi.Spectrum(m1, mask=joint_mask.copy()) m2 = dadi.Spectrum(m2, mask=joint_mask.copy()) return m1,m2
def linear_extrap(ys, xs)
-
Linearly extrapolate from two x,y pairs to x = 0.
ys: y values from x,y pairs. Note that these can be arrays of values. xs: x values from x,y pairs. These should be scalars.
Returns extrapolated y at x=0.
Expand source code
def linear_extrap(ys, xs): """ Linearly extrapolate from two x,y pairs to x = 0. ys: y values from x,y pairs. Note that these can be arrays of values. xs: x values from x,y pairs. These should be scalars. Returns extrapolated y at x=0. """ y1,y2 = ys x1,x2 = xs return (x2 * y1 - x1 * y2)/(x2 - x1)
def make_anc_state_misid_func(func)
-
Generate a version of func accounting for ancestral state misidentification.
func: The function to which misidentification should be incorporated. It is assumed that the first argument of the function is a params vector, to which the misidentification parameter will be added.
Returns a new function which takes in a params vector that is one entry longer than the original function. The fraction misidentification will be the last entry in the new params vector.
Expand source code
def make_anc_state_misid_func(func): """ Generate a version of func accounting for ancestral state misidentification. func: The function to which misidentification should be incorporated. It is assumed that the first argument of the function is a params vector, to which the misidentification parameter will be added. Returns a new function which takes in a params vector that is one entry longer than the original function. The fraction misidentification will be the last entry in the new params vector. """ def misid_func(*args, **kwargs): all_params = args[0] p_misid = all_params[-1] args = list(args) args[0] = all_params[:-1] fs = func(*args, **kwargs) return apply_anc_state_misid(fs, p_misid) misid_func.__name__ = func.__name__ + '_misid' misid_func.__doc__ = func.__doc__ return misid_func
def make_extrap_func(func, extrap_x_l=None, extrap_log=False, fail_mag=10)
-
Generate a version of func that extrapolates to infinitely many gridpoints.
func: A function that returns a single scalar or array and whose last non-keyword argument is 'pts': the number of default_grid points to use in calculation.
extrap_x_l: An explict list of x values to use for extrapolation. If not provided, the extrapolation routine will look for '.extrap_x' attributes on the results of func. The method Spectrum.from_phi will add an extrap_x attribute to resulting Spectra, equal to the x-value of the first non-zero grid point. An explicit list is useful if you want to override this behavior for testing. fail_mag: Simon Gravel noted that there can be numerical instabilities in extrapolation when working with large spectra that have very small entires (of order 1e-24). To avoid these instabilities, we ignore the extrapolation values (and use the input result with the smallest x) if the extrapolation is more than fail_mag orders of magnitude away from the smallest x input result.Returns a new function whose last argument is a list of numbers of grid points and that returns a result extrapolated to infinitely many grid points.
Expand source code
def make_extrap_func(func, extrap_x_l=None, extrap_log=False, fail_mag=10): """ Generate a version of func that extrapolates to infinitely many gridpoints. func: A function that returns a single scalar or array and whose last non-keyword argument is 'pts': the number of default_grid points to use in calculation. extrap_x_l: An explict list of x values to use for extrapolation. If not provided, the extrapolation routine will look for '.extrap_x' attributes on the results of func. The method Spectrum.from_phi will add an extrap_x attribute to resulting Spectra, equal to the x-value of the first non-zero grid point. An explicit list is useful if you want to override this behavior for testing. fail_mag: Simon Gravel noted that there can be numerical instabilities in extrapolation when working with large spectra that have very small entires (of order 1e-24). To avoid these instabilities, we ignore the extrapolation values (and use the input result with the smallest x) if the extrapolation is more than fail_mag orders of magnitude away from the smallest x input result. Returns a new function whose last argument is a list of numbers of grid points and that returns a result extrapolated to infinitely many grid points. """ x_l_from_results = (extrap_x_l is None) def extrap_func(*args, **kwargs): # Separate pts (or pts_l) from arguments if 'pts' not in kwargs: other_args, pts_l = args[:-1], args[-1] else: other_args = args pts_l = kwargs['pts'] del kwargs['pts'] if 'no_extrap' in kwargs: no_extrap = True del kwargs['no_extrap'] else: no_extrap = False if numpy.isscalar(pts_l): pts_l = [pts_l] # Create a sub-function that fixes all other arguments and only # takes in pts. partial_func = functools.partial(func, *other_args, **kwargs) ## ## The commented-out code implements distribution of fs calculations ## among multiple processors. Unfortunately, it doesn't work with ## iPython, because pickling functions is very fragile in the ## interactive interpreter. If we had some sort of data structure for ## defining models, then this would be much easier to implement. Note ## that there might still be issues on Windows systems, because of its ## poor-man's version of fork(). ## #import cPickle, multiprocessing #try: # # Test whether sub-function is picklable. If not, pool.map will # # hang. # cPickle.dumps(partial_func) # pool = multiprocessing.Pool() # result_l = pool.map(partial_func, pts_l) # pool.close() #except (cPickle.PicklingError, TypeError): # print('Function passed to extrap func must be picklable for ' # 'multi-processor executation.') # import sys # sys.exit() result_l = list(map(partial_func, pts_l)) if no_extrap: return result_l if x_l_from_results: try: x_l = [r.extrap_x for r in result_l] except AttributeError: raise ValueError("Extrapolation function error: No explicit extrapolation x_l provided, and results do not have 'extrap_x' attributes. If this is an FS extrapolation, check your from_phi method.") else: x_l = extrap_x_l if extrap_log: result_l = [numpy.log(r) for r in result_l] # Extrapolate if len(pts_l) == 1: ex_result = result_l[0] elif len(pts_l) == 2: ex_result = linear_extrap(result_l, x_l) elif len(pts_l) == 3: ex_result = quadratic_extrap(result_l, x_l) elif len(pts_l) == 4: ex_result = cubic_extrap(result_l, x_l) elif len(pts_l) == 5: ex_result = quartic_extrap(result_l, x_l) elif len(pts_l) == 6: ex_result = quintic_extrap(result_l, x_l) else: raise ValueError('Number of calculations to use for extrapolation ' 'must be between 1 and 6') if extrap_log: ex_result = numpy.exp(ex_result) # Simon Gravel noted that there can be numerical instabilities in # extrapolation when working with large spectra that have very small # entires (of order 1e-24). # To avoid these instabilities, we ignore the extrapolation values # if it is too different from the input values. if len(pts_l) > 1: # Assume the best input value comes from the smallest grid. best_result = result_l[numpy.argmin(x_l)] if extrap_log: best_result = numpy.exp(best_result) # The extrapolation is deemed to have failed if it results in a # value more than fail_mag orders of magnitude away from the # best input value. extrap_failed = abs(numpy.log10(ex_result/best_result)) > fail_mag if numpy.any(extrap_failed): logger.warning('Extrapolation may have failed. Check resulting ' 'frequency spectrum for unexpected results.') # For entries that fail, use the "best" input result. ex_result[extrap_failed] = best_result[extrap_failed] try: ex_result.pop_ids = result_l[0].pop_ids except AttributeError: pass return ex_result extrap_func.__name__ = func.__name__ extrap_func.__doc__ = func.__doc__ return extrap_func
def make_extrap_log_func(func, extrap_x_l=None)
-
Generate a version of func that extrapolates to infinitely many gridpoints.
Note that extrapolation here is done on the log of the function result, so this will fail if any returned values are < 0. It does seem to be better behaved for SFS calculation.
func: A function whose last argument is the number of Numerics.default_grid points to use in calculation and that returns a single scalar or array. extrap_x_l: An explict list of x values to use for extrapolation. If not provided, the extrapolation routine will look for '.extrap_x' attributes on the results of func. The method Spectrum.from_phi will add an extrap_x attribute to resulting Spectra, equal to the x-value of the first non-zero grid point. An explicit list is useful if you want to override this behavior for testing.
Returns a new function whose last argument is a list of numbers of grid points and that returns a result extrapolated to infinitely many grid points.
Expand source code
def make_extrap_log_func(func, extrap_x_l=None): """ Generate a version of func that extrapolates to infinitely many gridpoints. Note that extrapolation here is done on the *log* of the function result, so this will fail if any returned values are < 0. It does seem to be better behaved for SFS calculation. func: A function whose last argument is the number of Numerics.default_grid points to use in calculation and that returns a single scalar or array. extrap_x_l: An explict list of x values to use for extrapolation. If not provided, the extrapolation routine will look for '.extrap_x' attributes on the results of func. The method Spectrum.from_phi will add an extrap_x attribute to resulting Spectra, equal to the x-value of the first non-zero grid point. An explicit list is useful if you want to override this behavior for testing. Returns a new function whose last argument is a list of numbers of grid points and that returns a result extrapolated to infinitely many grid points. """ return make_extrap_func(func, extrap_x_l=extrap_x_l, extrap_log=True)
def multinomln(N)
-
Get the multinomial coefficient for an array N.
N: array of integers.
Expand source code
def multinomln(N): """ Get the multinomial coefficient for an array N. N: array of integers. """ if tuple(N) not in _multinomln_cache: N_sum = numpy.sum(N) res = gammaln(N_sum+1) for n in N: res -= gammaln(n+1) _multinomln_cache[tuple(N)] = res return _multinomln_cache[tuple(N)]
def part(x, n, minval=0, maxval=2)
-
Returns the integer partition summing to x with n entries and min and max equal to 0 and 2 (or ploidy level), respectively.
x: integer summand. n: number of partition entries. minval: minimum value allowed for partition entries. maxval: maximum value allowed for partition entries.
Expand source code
def part(x, n, minval=0, maxval=2): """ Returns the integer partition summing to x with n entries and min and max equal to 0 and 2 (or ploidy level), respectively. x: integer summand. n: number of partition entries. minval: minimum value allowed for partition entries. maxval: maximum value allowed for partition entries. """ if not n * minval <= x <= n * maxval: return elif n == 0: yield [] else: for val in range(minval, maxval + 1): for p in part(x - val, n - 1, val, maxval): yield [val] + p
def quadratic_extrap(ys, xs)
-
Quadratically extrapolate from three x,y pairs to x = 0.
ys: y values from x,y pairs. Note that these can be arrays of values. xs: x values from x,y pairs. These should be scalars.
Returns extrapolated y at x=0.
Expand source code
def quadratic_extrap(ys, xs): """ Quadratically extrapolate from three x,y pairs to x = 0. ys: y values from x,y pairs. Note that these can be arrays of values. xs: x values from x,y pairs. These should be scalars. Returns extrapolated y at x=0. """ y1,y2,y3 = ys x1,x2,x3 = xs return x2*x3/((x1-x2)*(x1-x3)) * y1 + x1*x3/((x2-x1)*(x2-x3)) * y2\ + x1*x2/((x3-x1)*(x3-x2)) * y3
def quadratic_grid(num_pts)
-
A nonuniform grid of points on [0,1] with a quadratic pattern of spacings.
The grid is weighted to be denser near 0 and 1, which is useful for population genetic simulations. In between, it smoothly increases and then decreases the step size.
Expand source code
def quadratic_grid(num_pts): """ A nonuniform grid of points on [0,1] with a quadratic pattern of spacings. The grid is weighted to be denser near 0 and 1, which is useful for population genetic simulations. In between, it smoothly increases and then decreases the step size. """ # Rounds down... small_pts = int(num_pts / 10) large_pts = num_pts - small_pts+1 grid = numpy.linspace(0, 0.05, small_pts) # The interior grid spacings are a quadratic function of x, being # approximately x1 at the boundaries. To achieve this, our actual grid # positions must come from a cubic function. # Here I calculate and apply the appropriate conditions: # x[q = 0] = x_start => d = x_start # dx/dq [q = 0] = dx_start => c = dx_start/dq # x[q = 1] = 1 # dx/dq [q = 1] = dx_start q = numpy.linspace(0, 1, large_pts) dq = q[1] - q[0] x_start = grid[-1] dx_start = grid[-1] - grid[-2] d = x_start c = dx_start/dq b = -3*(-dq + dx_start + dq*x_start)/dq a = -2 * b/3 grid = numpy.concatenate((grid[:-1], a*q**3 + b*q**2 + c*q + d)) return grid
def reverse_array(arr)
-
Reverse an array along all axes, so arr[i,j] -> arr[-(i+1),-(j+1)].
Expand source code
def reverse_array(arr): """ Reverse an array along all axes, so arr[i,j] -> arr[-(i+1),-(j+1)]. """ reverse_slice = tuple(slice(None, None, -1) for ii in arr.shape) return arr[reverse_slice]
def trapz(yy, xx=None, dx=None, axis=-1)
-
Integrate yy(xx) along given axis using the composite trapezoidal rule.
xx must be one-dimensional and len(xx) must equal yy.shape[axis].
This is modified from the SciPy version to work with n-D yy and 1-D xx.
Expand source code
def trapz(yy, xx=None, dx=None, axis=-1): """ Integrate yy(xx) along given axis using the composite trapezoidal rule. xx must be one-dimensional and len(xx) must equal yy.shape[axis]. This is modified from the SciPy version to work with n-D yy and 1-D xx. """ if (xx is None and dx is None)\ or (xx is not None and dx is not None): raise ValueError('One and only one of xx or dx must be specified.') elif (xx is not None) and (dx is None): dx = numpy.diff(xx) yy = numpy.asanyarray(yy) nd = yy.ndim if yy.shape[axis] != (len(dx)+1): raise ValueError('Length of xx must be equal to length of yy along ' 'specified axis. Here len(xx) = %i and ' 'yy.shape[axis] = %i.' % (len(dx)+1, yy.shape[axis])) slice1 = [slice(None)]*nd slice2 = [slice(None)]*nd slice1[axis] = slice(1,None) slice2[axis] = slice(None,-1) sliceX = [numpy.newaxis]*nd sliceX[axis] = slice(None) slice1, slice2, sliceX = tuple(slice1), tuple(slice2), tuple(sliceX) return numpy.sum(dx[sliceX] * (yy[slice1]+yy[slice2])/2.0, axis=axis)