# Source code for egglib.stats._baudry

```
"""
Copyright 2016-2021 Stephane De Mita, Mathieu Siol
This file is part of EggLib.
EggLib is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
EggLib is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with EggLib. If not, see <http://www.gnu.org/licenses/>.
"""
from .. import eggwrapper as _eggwrapper
from .. import alphabets
from .. import _site
from .. import _freq
[docs]class ProbaMisoriented(object):
"""
Error rate of polymorphism orientation using an outgroup.
Only sites that are either variable
within the ingroup or have a fixed difference with respect to the
outgroup are considered. Sites with more than two different alleles
in the ingroup, or more than one allele in the outgroup, are
ignored.
This function is an implementation of the method mentioned in Baudry
and Depaulis (2003), allowing to estimate the probability that a
site oriented using the provided outgroup have be misoriented due to
a homoplasic mutation in the branch leading to the outgroup. Note
that this estimation neglects the probability of shared
polymorphism.
Reference: Baudry, E. & F. Depaulis. 2003. Effect of misoriented
sites on neutrality tests with outgroup.
*Genetics*\ \xa0\ **165**\ : 1619-1622.
:param align: an :class:`.Align` containing the sites to analyse.
:param struct: a :class:`.Structure` instance.
If the instance is created with an alignment as constructor argument,
then the statistics are computed. The method :meth:`.load_align`
does the same from an existing :class:`.ProbaMisoriented` instance.
Otherwise, individual sites can be loaded with :meth:`.load_site`,
and then the statistics can be computed using :meth:`.compute` (the
latter is preferable if generate :class:`.Freq` instances for an
other use).
.. versionadded:: 3.0.0
"""
def __init__(self, align=None, struct=None):
self._A = alphabets.DNA.get_code('A')
self._C = alphabets.DNA.get_code('C')
self._G = alphabets.DNA.get_code('G')
self._T = alphabets.DNA.get_code('T')
self._sd = _eggwrapper.SiteDiversity()
self._site = _site.Site()
self._freq = _freq.Freq()
self.reset()
if align is not None: self.load_align(align, struct)
[docs] def reset(self):
"""
Clear all loaded or computed data.
"""
self._S = 0
self._D = 0
self._Ti = 0
self._Ti_cnt = 0
self._TiTv = None
self._pM = None
self._M = 0
self._M_cnt = 0
[docs] def load_align(self, align, struct):
"""
Load all sites of align that meet criteria. If there are previously
loaded data, they are discarded. This method computes statistics.
Data are required to be DNA sequences.
:param align: an :class:`.Align` instance.
:param struct: a :class:`.Structure` instance.
"""
if align._alphabet._obj.get_type() != 'DNA': raise ValueError('data must be DNA sequences')
self.reset()
for i in range(align._obj.get_nsit_all()):
self._site.from_align(align, i)
self._freq.from_site(self._site, struct)
self.load_site(self._freq)
self.compute()
[docs] def load_site(self, freq):
"""
Load a single site. If there are previously loaded data, they
are retained. To actually compute the misorientation
probability, the user must call :meth:`.compute`.
:param freq: a :class:`.Freq` instance.
.. warning::
The origin of data must be DNA sequences. This is currently
not checked here.
"""
flag = self._sd.process(freq._obj)
if (flag&2) == 0: return
if self._sd.Aglob() < 2: return # skip sites that are fixed overall
if self._sd.Aing() > 2: return # skip sites with 3+ alleles ingroup
if self._sd.Aout() > 1: return # skip sites with 2+ alleles outgroup
if self._sd.Aing() == 2:
self._S += 1
if freq._obj.frq_outgroup().nseff() > 0:
self._M_cnt += 1
if self._sd.Aglob() > self._sd.Aing():
self._M += 1
else:
self._D += 1
if self._sd.Aglob() == 2:
self._Ti_cnt += 1
if self._sd.global_allele(0) == self._A :
if self._sd.global_allele(1) == self._G: self._Ti += 1
elif self._sd.global_allele(0) == self._G:
if self._sd.global_allele(1) == self._A: self._Ti += 1
elif self._sd.global_allele(0) == self._C:
if self._sd.global_allele(1) == self._T: self._Ti += 1
elif self._sd.global_allele(0) == self._T:
if self._sd.global_allele(1) == self._C: self._Ti += 1
[docs] def compute(self):
"""
Compute :attr:`.pM` and :attr:`.TiTv` statistics. Requires that
sites have been loaded using :meth:`.load_site`. This method
does not reset the instance.
"""
self._pM = None
self._TiTv = None
if self._Ti_cnt > 0 and self._M_cnt > 0:
Ti = self._Ti / self._Ti_cnt
a = Ti / 4
b = (1-Ti) / 8
if b > 0:
self._TiTv = a/b
self._pM = ((self._M/self._M_cnt) * a**2 + 2*b**2) / (2*b*(2*a+b))
@property
def S(self):
""" Number of loaded polymorphic sites. Only the ingroup is
considered to classify a site as polymorphic. """
return self._S
@property
def D(self):
"""
Number of loaded sites with a fixed difference to the outgroup.
"""
return self._D
@property
def TiTv(self):
"""
Transition and transversion rate ratio. ``None`` if
the value cannot be computed (no loaded data or null
transversion rate). Requires that :meth:`.compute` has been
called.
"""
return self._TiTv
@property
def pM(self):
"""
Probability of misorientation. ``None`` if the value cannot be
computed (no loaded data, no valid polymorphism, null
transversion rate). Requires that :meth:`.compute` has been
called.
"""
return self._pM
```