"""
Copyright 2015-2023 Stephane De Mita, Mathieu Siol
This file is part of EggLib.
EggLib is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
EggLib is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with EggLib. If not, see <http://www.gnu.org/licenses/>.
"""
import sys
from .. import eggwrapper as _eggwrapper, random
from .. import _interface, _tree, alphabets
from . import _param_helpers
[docs]class Simulator(object):
"""
Manager of the coalescent simulator. The constructor takes arguments
controlling the demographic and mutation models used for simulation.
Only the number of populations is required at the time of
construction. Once it is set, it can be never modified. Other
constructor arguments are all optional and can be set or modified
later using the :attr:`.params` instance attribute (either using its
:meth:`~.ParamDict.update` method or the ``[]`` operator, such as in
``simulator.params['theta'] = 2.85``. Keyword arguments are passed as is to
:meth:`~.ParamDict.update`. List-based parameters can be set if all
values are provided in a sequence. Matrix-based parameters cannot be
set here.
:param num_pop: number of populations.
:param migr: migration rate.
Other keyword arguments can be any of the parameters defined in the
table below.
+-----------------+------------------------------------+-----------------+
|Parameter | Definition | Default |
+=================+====================================+=================+
|``num_pop`` | Number of populations | None, required |
+-----------------+------------------------------------+-----------------+
|``num_sites`` | Number of sites | 0 (ISM) |
+-----------------+------------------------------------+-----------------+
|``num_mut`` | Fixed number of mutations | 0 |
+-----------------+------------------------------------+-----------------+
|``theta`` | :math:`4N_0\mu` parameter | 0.0 |
+-----------------+------------------------------------+-----------------+
|``recomb`` | :math:`4N_0c` parameter | 0.0 |
+-----------------+------------------------------------+-----------------+
|``mut_model`` | Mutation model (among ``KAM``, | ``KAM`` |
| | ``IAM``, ``SMM`` and ``TPM``) | |
+-----------------+------------------------------------+-----------------+
|``TPM_proba`` | Probability parameter of TPM | 0.5 |
+-----------------+------------------------------------+-----------------+
|``TPM_param`` | Shape parameter of TPM | 0.5 |
+-----------------+------------------------------------+-----------------+
|``num_alleles`` | Number of alleles for KAM | 2 |
+-----------------+------------------------------------+-----------------+
|``rand_start`` | Pick start allele randomly | False |
| | for KAM (boolean) | |
+-----------------+------------------------------------+-----------------+
|``num_chrom`` | Number of sampled chromosomes | 0 for all |
| | (per population) | |
+-----------------+------------------------------------+-----------------+
|``num_indiv`` | Number of sampled individuals | 0 for all |
| | (per population) | |
+-----------------+------------------------------------+-----------------+
|``N`` | Population size, expressed | 1.0 for all |
| | relatively to :math:`N_0` | |
| | (per population) | |
+-----------------+------------------------------------+-----------------+
|``G`` | Exponential growth/decline rate, | 0.0 for all |
| | negative values mean decline | |
| | (per population) | |
+-----------------+------------------------------------+-----------------+
|``s`` | Population selfing probability | 0.0 for all |
| | (per population) | |
+-----------------+------------------------------------+-----------------+
|``site_pos`` | Site position, as values | Equally spread |
| | between 0.0 and 1.0 | |
| | (per site) | |
+-----------------+------------------------------------+-----------------+
|``site_weight`` | Site mutation weight, controlling | 1.0 for all |
| | the relative probability of sites | |
| | (per site) | |
+-----------------+------------------------------------+-----------------+
|``migr_matrix`` | Pairwise migration rate matrix | 0.0 for all |
| | (the diagonal cannot be set) | |
+-----------------+------------------------------------+-----------------+
|``trans_matrix`` | Matrix of transition weights | 1.0 for all |
| | between pairs of alleles | |
+-----------------+------------------------------------+-----------------+
|``events`` | List of events added by the user | Empty |
+-----------------+------------------------------------+-----------------+
|``max_iter`` | Maximum number of iterations | 100,000 |
+-----------------+------------------------------------+-----------------+
The following table presents the categories of events that can be
added to the ``events`` list using :meth:`~.EventList.add`.
+------------------+-------------------------+----------------------------------+
|Event code | Description | Parameters |
+==================+=========================+==================================+
|``size`` | Change population size | ``T`` -- date (1) |
| | +----------------------------------+
| | | ``N`` -- new size |
| | +----------------------------------+
| | | ``idx`` -- population index (2) |
+------------------+-------------------------+----------------------------------+
|``migr`` | Change all migration | ``T`` -- date (1) |
| | rate +----------------------------------+
| | | ``M`` -- migration rate (all |
| | | pairwise migration rates are |
| | | set to ``M/(num_pop-1)``) |
+------------------+-------------------------+----------------------------------+
|``pair_migr`` | Change pairwise | ``T`` -- date (1) |
| | migration rate +----------------------------------+
| | | ``M`` -- pairwise migration |
| | | rate |
| | +----------------------------------+
| | | ``src`` -- source population |
| | | index |
| | +----------------------------------+
| | | ``dst`` -- destination |
| | | population index |
+------------------+-------------------------+----------------------------------+
|``growth`` | Change population | ``T`` -- date (1) |
| | exponential +----------------------------------+
| | growth/decline rate | ``G`` -- new rate |
| | +----------------------------------+
| | | ``idx`` -- population index (2) |
+------------------+-------------------------+----------------------------------+
|``selfing`` | Change population | ``T`` -- date (1) |
| | self-fertilization +----------------------------------+
| | rate | ``s`` -- new rate |
| | +----------------------------------+
| | | ``idx`` -- population index (2) |
+------------------+-------------------------+----------------------------------+
|``recombination`` | Change recombination | ``T`` -- date (1) |
| | rate +----------------------------------+
| | | ``R`` -- new rate |
+------------------+-------------------------+----------------------------------+
|``bottleneck`` | Apply a bottleneck | ``T`` -- date (1) |
| | +----------------------------------+
| | | ``S`` -- bottleneck strength (3) |
| | +----------------------------------+
| | | ``idx`` -- population index (2) |
+------------------+-------------------------+----------------------------------+
|``admixture`` | Move lineages from one | ``T`` -- date (1) |
| | population to another +----------------------------------+
| | | ``proba`` -- migration |
| | | probability (in [0,1] range |
| | +----------------------------------+
| | | ``src`` -- source population |
| | | index |
| | +----------------------------------+
| | | ``dst`` -- destination |
| | | population index |
+------------------+-------------------------+----------------------------------+
|``merge`` | Merge a population to | ``T`` -- date (1) |
| | another (take all +----------------------------------+
| | lineages from ``src``, | ``src`` -- source population |
| | move them to ``dst``, | index |
| | and remove ``src`` +----------------------------------+
| | | ``dst`` -- destination |
| | | population index |
+------------------+-------------------------+----------------------------------+
|``sample`` | Perform a delayed | ``T`` -- date (1) |
| | a some point in the +----------------------------------+
| | past in one of the | ``idx`` -- population index |
| | populations | |
| | +----------------------------------+
| | | ``label`` -- group label (4) |
| | +----------------------------------+
| | | ``num_chrom`` -- number of |
| | | sampled chromosomes |
| | +----------------------------------+
| | | ``num_indiv`` -- number of |
| | | sampled individuals |
+------------------+-------------------------+----------------------------------+
1. Time is expressed in units of :math:`4N_0` generations.
2. If ``idx`` is omitted, the event is applied to all populations
at once.
3. Bottleneck strength is expressed in time units. A bottleneck is
implemented as a period of time during which coalescences are the
only event allowed to occur.
4. The label of delayed sample can be set to the same value than the
populations index (in this case, delayed samples will have the
same label than normal samples from the same population), or to a
different label, as the user's option.
The parameters ``num_chrom``, ``num_indiv``, ``N``, ``G``, ``s``,
``site_pos``, and ``site_weight`` are represented by
:class:`.ParamList` instances that behave like lists (except that
their length cannot be changed). In particular, :class:`.ParamList`
support subscript indexing and it can also be initialized by passing
a sequence. The parameters ``migr_matrix`` and ``trans_matrix`` are
represented by :class:`.ParamMatrix` instances that support a
double-index subscript system to read/changes values (as in
``params['migr_matrix'][i,j]`` to access the value at row *i* and
column *j*. Diagonal values are read as ``None`` and cannot be
changed. Finally, ``events`` is represented by a :class:`.EventList`
instance that exhibit limited list functionality and provides
methods to add, read, and modify events. See this class for more
information about out editing the list of events.
"""
def __init__(self, num_pop, migr=0.0, **kwargs):
self._alphabet_unltd = alphabets.Alphabet('range', (-_eggwrapper.MAX_ALLELE_RANGE, _eggwrapper.MAX_ALLELE_RANGE+1), (0, 0), name='unlimited')
self._alphabet_positive = alphabets.Alphabet('range', (0, _eggwrapper.MAX_ALLELE_RANGE), (0, 0), name='positive unlimited')
self._alphabets_K = {}
self._params = _param_helpers.ParamDict(num_pop)
self._params.set_migr(migr)
self._params.update(**kwargs)
self._coalesce = _eggwrapper.Coalesce()
self._align = _interface.Align._create_from_data_holder(self._coalesce.data(), self._alphabet_unltd)
@property
def params(self):
"""
Simulation parameters of this instance. This object is a
instance of :class:`.ParamDict`, which is a clone
of :class:`dict` that does not let the users add or remove
parameters, but lets them modify values of parameters
(similarly, the number of items of per-population or per-site
parameters cannot be modified).
"""
return self._params
def _reset_align(self):
self._align._ns = self._params._params.get_nsam()
if self._params['mut_model'] == 'KAM':
K = self._params['num_alleles']
if K not in self._alphabets_K:
self._alphabets_K[K] = alphabets.Alphabet('int', range(K), [], name='KAM:{0}'.format(K))
alph = self._alphabets_K[K]
elif self._params['mut_model'] == 'IAM': alph = self._alphabet_positive
else: alph = self._alphabet_unltd
self._align._alphabet = alph
[docs] def simul(self, dest=None):
"""
Perform a single simulation. The simulation is conditioned on
the current value of parameters stored in :attr:`.params`.
:param dest: An :class:`.Align` to reset using simulated data. All
previous data will be lost.
:return: A new :class:`.Align` instance containing the simulated
data unless *dest* is specified (otherwise ``None``).
.. note::
The method :meth:`.iter_simul` can be much more efficient.
For performance-critical applications, its use is
recommended.
"""
# run the simulation
self._coalesce.simul(self._params._params, True)
# reset the local align instance
self._reset_align()
# return a deep copy of the alignment
if dest is None: return _interface.Align.create(_interface.Align._create_from_data_holder(self._coalesce.data(), self._align._alphabet))
else: dest._reset_from_data_holder(self._coalesce.data())
[docs] def iter_simul(self, nrepet, cs=None, dest_trees=None, **feed_params):
"""
Perform several simulations. Simulations are conditioned on the
current value of parameters
stored in :attr:`.params`. Return an iterator that will loop
over the requested number of simulations. The simulated
alignments are always available at each iteration loop as the
instance attribute :attr:`.align`. By default, all iterations
return a reference to this instance, but if *cs* is specified a
dictionary of statistics is returned at each simulation.
:param nrepet: number of simulations. If ``None``, iterate for
ever (you are required to use ``break`` in loop with your
own stopping criterion is reach).
:param cs: :class:`.ComputeStats` instance properly configured
to compute statistics from simulated data. If *cs* is specified,
each iteration round will yield the dictionary of statistics
obtained from :meth:`~.ComputeStats.process_align` called on
this object. See also *cs_filter* and *cs_struct*.
:param dest_trees: a :class:`list` in which simulated trees will
be appended. Since each simulation can yield several trees
(because of recombination), a new sub-list will be appended
for each simulation, with one item for each tree. Each tree
covered a defined region of the simulated chromosome, so
each item of each sub-list will be ``(tree, start, stop)``
:class:`!tuple` with ``tree`` as a new :class:`.Tree` instance, and
``start`` and ``stop`` as the start and stop positions (real
numbers comprised between 0 and 1). If recombination
occurred, trees will not be sorted within their sub-list. If
recombination did not occur, each simulation will be
represented by a list with a single :class:`!tuple`. Any previously
data contained in *dest_tree* is left untouched.
:param feed_params: other arguments provide sequences of values
for any parameter (except ``num_pop``),
allowing to modify any set of parameters between
simulations. All changes are permanent and affect any later
simulation. All additional options must be in the
``key=value`` format, with have one of the parameters as
``key``, and a sequence of values as ``value``. All
sequences must be of length at least equal to *nrepet*. If
longer, additional values are ignored. For list-based
parameters, each value must be a ``(index, value)`` tuple
and, for matrix-based parameters, each value must be a
``(index1, index2, value)`` tuple.
:return: An iterator, yielding a reference to :attr:`.align` if
*cs* is ``None``, or otherwise a dictionary of computed
statistics.
"""
# reset the local align instance
self._reset_align()
# safety checking
for key, values in feed_params.items():
if key not in self._params._keys: raise ValueError('invalid parameter: `{0}`'.format(key))
if len(values) < nrepet: raise ValueError('not enough values provided for: `{0}`'.format(key))
# main loop
i = 0
stop = -1 if nrepet is None else nrepet
while i != nrepet:
# set the provided params
for key, values in feed_params.items():
value = values[i]
self._params._set(key, value)
# run the simulation
self._coalesce.simul(self._params._params, False)
# export tree if needed
if dest_trees != None:
dest_trees.append([
( _tree.Tree._from_coalesce(self._coalesce, i, str),
self._coalesce.tree_start(i),
self._coalesce.tree_stop(i) )
for i in range(self._coalesce.number_of_trees()) ])
# mutate
self._coalesce.mutate()
# return what is requested
if cs != None: yield cs.process_align(self._align)
else: yield self._align
i += 1
# reset object
self._align.reset()
@property
def align(self):
"""
An :class:`.Align` instance containing simulated data for the
current iteration of :meth:`.iter_simul`. The data in this
instance will be updated at each iteration round, and deleted at
the end of the iteration. If it must be copied, a deep copy is
required (typically with :meth:`.Align.create`).
"""
return self._align