Egglib 2.1.11
C++ library reference manual
Consensus.hpp
1 /*
2  Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
3 
4  This file is part of the EggLib library.
5 
6  EggLib is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  EggLib is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with EggLib. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #ifndef EGGLIB_CONSENSUS_HPP
21 #define EGGLIB_CONSENSUS_HPP
22 
23 #include "Align.hpp"
24 #include <sstream>
25 #include <string>
26 #include <vector>
27 
28 namespace egglib {
29 
105  class Consensus {
106  public:
110  Consensus();
111 
115  virtual ~Consensus() {}
116 
118  void setMissing(char);
119 
121  void setDisagreement(char);
122 
124  bool check_sequences(Align& align);
125 
139  Align consensus(Align& align, char separator='_', bool rigorous=true);
140 
142  const std::vector<std::string>& firstSequenceNames();
143 
145  const std::vector<std::string>& secondSequenceNames();
146 
148  const std::vector<std::string>& roots();
149 
151  const std::vector<int>& consistentPositions();
152 
154  const std::vector<int>& complementaryPositions();
155 
157  const std::vector<int>& uninformativePositions();
158 
160  const std::vector<int>& ambiguousPositions();
161 
163  const std::vector<int>& atLeastPartiallyResolvedAmbiguities();
164 
166  const std::vector<std::vector<int> >& inconsistentPositions();
167 
168 
169  private:
170 
174  Consensus(const Consensus& source) { }
175 
179  Consensus& operator=(const Consensus& source) { return *this; }
180 
181  // A private helper
182  class PairwiseConsensus;
183 
184  // report data
185  std::vector<std::string> t_firstSequenceNames;
186  std::vector<std::string> t_secondSequenceNames;
187  std::vector<std::string> t_roots;
188  std::vector<int> t_consistentPositions;
189  std::vector<int> t_complementaryPositions;
190  std::vector<int> t_uninformativePositions;
191  std::vector<int> t_ambiguousPositions;
192  std::vector<int> t_atLeastPartiallyResolvedAmbiguities;
193  std::vector<std::vector<int> > t_inconsistentPositions;
194 
195  // Code for missing data (usually ?)
196  char MISSING;
197 
198  // Code for disgrement
199  char DISAGREEMENT;
200 
201  // Helper class managing a single pair
202  class PairwiseConsensus {
203  public:
204  // Default object creation
205  PairwiseConsensus();
206 
207  // Object destruction
208  virtual ~PairwiseConsensus() {}
209 
210  // Usual object creation
211  PairwiseConsensus(std::string, std::string);
212 
213  // Fills an object created with the default constructor
214  void load(std::string,std::string);
215 
216  // Changes the MISSING character
217  void setUndeterminedCharacter(char);
218 
219  // Changes the DISAGREEMENT character
220  void setDisagreementCharacter(char);
221 
222  /* Uses the conservative mode of consensus
223  *
224  * Tries to avoid to make decisions, and adds the
225  * character set by DISAGREEMENT upon inconsistencies
226  *
227  * return The number of inconsistencies.
228  */
229  int generateSoftConsensus();
230 
231  /* Strict mode of consensus
232  *
233  * The number of inconsistencies.
234  */
235  int generateHardConsensus();
236 
237  // Two fully resolved (including gap) and identical characters
238  int getConsistentPositions();
239 
240  // One informative (including gap) and one missing
241  int getComplementaryPositions();
242 
243  // None missing, but different and incompatible
244  int getInconsistentPositions();
245 
246  // Both are missing
247  int getUninformativePositions();
248 
249  // Both identical or one missing, but not fully resolved
250  int getAmbiguousPositions();
251 
252  // Different, not missing, complementary.
253  int getAtLeastPartiallyResolvedAmbiguities();
254 
255  // Accessor
256  int getThisInconsistentPosition(unsigned int);
257 
258  // Generates the consensus sequence
259  std::string getConsensus();
260 
261  private:
262 
263  inline bool isValid(char c) {
264  switch (c) {
265  case 'A': case 'a':
266  case 'C': case 'c':
267  case 'G': case 'g':
268  case 'T': case 't':
269  return true;
270  default:
271  return false;
272  }
273  }
274 
275  // This initiates a series of embedded objects
276  void setCharacterContainers();
277 
278  // The first sequence
279  std::string seqA;
280 
281  // The second sequence
282  std::string seqB;
283 
284  // The resulting consensus
285  std::string cons;
286 
287  // The vecotr storing the inconsistent positions
288  std::vector<int> posIncons;
289 
290  // The length of the sequences
291  unsigned int ls;
292 
293  // Counter
294  int cntConsistentPositions;
295 
296  // Counter
297  int cntComplementaryPositions;
298 
299  // Counter
300  int cntAmbiguousPositions;
301 
302  // Counter
303  int cntInconsistentPositions;
304 
305  // Counter
306  int cntUninformativePositions;
307 
308  // Counter
309  int cntAtLeastPartiallyResolvedAmbiguities;
310 
311  // Code for missing data (usually ?)
312  char MISSING;
313 
314  // Code for disgrement
315  char DISAGREEMENT;
316 
317  public:
318  // This class manages relationships different symbols
320  public:
321  // Default value: @
323 
324  // Initiates to a given symbol
325  CharacterContainer(const char&);
326 
327  // Assignment operator
328  CharacterContainer& operator=(const char&);
329 
330  // Sets the symbol
331  void setValue(char);
332 
333  // Set the descendants
334  void setSons(std::vector<CharacterContainer>);
335 
336  // Tests whether the symbol is the same
337  bool is(CharacterContainer);
338 
339  // Tests if the query is contained amongst the sons
340  bool has(CharacterContainer);
341 
342  // Tests if the query is contained amongst the sons
343  bool has(char);
344 
345  /* Tests whether the left character has the left one
346  * Should be called on the N object only.
347  */
349 
350  /* Creates the object with the proper sons
351  * Should be called on the N object only.
352  */
353  CharacterContainer init(char);
354 
355  // The symbol
356  char value;
357 
358  // The descendants
359  std::vector<CharacterContainer> sons;
360  };
361 
362  private:
363  // Symbol ?
364  CharacterContainer ccQ;
365 
366  // Symbol A
367  CharacterContainer ccA;
368 
369  // Symbol C
370  CharacterContainer ccC;
371 
372  // Symbol G
373  CharacterContainer ccG;
374 
375  // Symbol T
376  CharacterContainer ccT;
377 
378  // Symbol U
379  CharacterContainer ccU;
380 
381  // Symbol M
382  CharacterContainer ccM;
383 
384  // Symbol R
385  CharacterContainer ccR;
386 
387  // Symbol W
388  CharacterContainer ccW;
389 
390  // Symbol S
391  CharacterContainer ccS;
392 
393  // Symbol Y
394  CharacterContainer ccY;
395 
396  // Symbol K
397  CharacterContainer ccK;
398 
399  // Symbol B
400  CharacterContainer ccB;
401 
402  // Symbol D
403  CharacterContainer ccD;
404 
405  // Symbol H
406  CharacterContainer ccH;
407 
408  // Symbol V
409  CharacterContainer ccV;
410 
411  // Symbol N
412  CharacterContainer ccN;
413 
414  // Symbol -
415  CharacterContainer ccGAP;
416  };
417  };
418 }
419 
420 #endif
421 
const std::vector< int > & complementaryPositions()
Number of complementary positions for all consensed pairs.
Definition: Consensus.cpp:479
const std::vector< std::string > & secondSequenceNames()
Second names of consensed pairs.
Definition: Consensus.cpp:467
void setDisagreement(char)
Sets the character used to point to disagreements (default: Z)
Definition: Consensus.cpp:34
virtual ~Consensus()
Destructor.
Definition: Consensus.hpp:115
const std::vector< int > & uninformativePositions()
Number of uninformative positions for all consensed pairs.
Definition: Consensus.cpp:483
Consensus()
Constructor.
Definition: Consensus.cpp:25
const std::vector< int > & ambiguousPositions()
Number of ambiguous positions for all consensed pairs.
Definition: Consensus.cpp:487
const std::vector< std::vector< int > > & inconsistentPositions()
Vector of inconsistent positions ofr all consensed pairs.
Definition: Consensus.cpp:495
const std::vector< std::string > & roots()
Root names of consensed pairs.
Definition: Consensus.cpp:471
const std::vector< int > & atLeastPartiallyResolvedAmbiguities()
Number of at least partially resolved ambiguities for all consensed pairs.
Definition: Consensus.cpp:491
Align consensus(Align &align, char separator='_', bool rigorous=true)
Reduces the sequence alignment by making consensus sequences.
Definition: Consensus.cpp:38
void setMissing(char)
Sets the character interpreted as missing (default: ?)
Definition: Consensus.cpp:30
bool check_sequences(Align &align)
Checks all the characters.
Definition: Consensus.cpp:437
const std::vector< int > & consistentPositions()
Number of consistent positions for all consensed pairs.
Definition: Consensus.cpp:475
Definition: ABC.cpp:37
Handles a sequence alignment.
Definition: Align.hpp:76
Generates consensus sequences.
Definition: Consensus.hpp:105
const std::vector< std::string > & firstSequenceNames()
First name of consensed pairs.
Definition: Consensus.cpp:463

Hosted by 
Get EggLib at SourceForge.net. Fast, secure and Free Open Source software downloads