-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaligner.hpp
541 lines (439 loc) · 31.7 KB
/
aligner.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
#ifndef VG_ALIGNER_HPP_INCLUDED
#define VG_ALIGNER_HPP_INCLUDED
#include <algorithm>
#include <utility>
#include <vector>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vg/vg.pb.h>
#include "gssw.h"
#include "Variant.h"
#include "Fasta.h"
#include "handle.hpp"
#include "path.hpp"
#include "dozeu_interface.hpp"
#include "deletion_aligner.hpp"
// #define BENCH
// #include "bench.h"
namespace vg {
static constexpr int8_t default_match = 1;
static constexpr int8_t default_mismatch = 4;
static constexpr int8_t default_score_matrix[16] = {
default_match, -default_mismatch, -default_mismatch, -default_mismatch,
-default_mismatch, default_match, -default_mismatch, -default_mismatch,
-default_mismatch, -default_mismatch, default_match, -default_mismatch,
-default_mismatch, -default_mismatch, -default_mismatch, default_match
};
static constexpr int8_t default_gap_open = 6;
static constexpr int8_t default_gap_extension = 1;
static constexpr int8_t default_full_length_bonus = 5;
static constexpr double default_gc_content = 0.5;
/// Score a gap with the given open and extension scores.
int32_t score_gap(size_t gap_length, int32_t gap_open, int32_t gap_extension);
/**
* The abstract interface that any Aligner should implement.
*/
class BaseAligner {
public:
/// Store optimal local alignment against a graph in the Alignment object.
/// Gives the full length bonus separately on each end of the alignment.
virtual void align(Alignment& alignment, const HandleGraph& g, bool traceback_aln) const = 0;
};
/**
* The basic GSSW-based core aligner implementation, which can then be quality-adjusted or not.
*/
class GSSWAligner : public BaseAligner {
protected:
GSSWAligner() = default;
GSSWAligner(const int8_t* _score_matrix,
int8_t _gap_open,
int8_t _gap_extension,
int8_t _full_length_bonus,
double _gc_content);
~GSSWAligner();
// for construction
// needed when constructing an alignable graph from the nodes
gssw_graph* create_gssw_graph(const HandleGraph& g) const;
// identify the IDs of nodes that should be used as pinning points in GSSW for pinned
// alignment ((i.e. non-empty nodes as close as possible to sinks))
unordered_set<id_t> identify_pinning_points(const HandleGraph& graph) const;
// convert graph mapping back into unreversed node positions
void unreverse_graph_mapping(gssw_graph_mapping* gm) const;
// convert from graph sequences back into unrereversed form
void unreverse_graph(gssw_graph* graph) const;
// alignment functions
void gssw_mapping_to_alignment(gssw_graph* graph,
gssw_graph_mapping* gm,
Alignment& alignment,
bool pinned,
bool pin_left) const;
string graph_cigar(gssw_graph_mapping* gm) const;
public:
/// Given a nonempty vector of nonnegative scaled alignment scores,
/// compute the mapping quality of the maximal score in the vector.
/// Sets max_idx_out to the index of that score in the vector.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// the mapping quality is always calculated as if its multiplicity is 1.
static double maximum_mapping_quality_exact(const vector<double>& scaled_scores, size_t* max_idx_out,
const vector<double>* multiplicities = nullptr);
/// Given a nonempty vector of nonnegative scaled alignment scores,
/// approximate the mapping quality of the maximal score in the vector.
/// Sets max_idx_out to the index of that score in the vector.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// the mapping quality is always calculated as if its multiplicity is 1.
static double maximum_mapping_quality_approx(const vector<double>& scaled_scores, size_t* max_idx_out,
const vector<double>* multiplicities = nullptr);
/// Same as maximum_mapping_quality_exact except alway s computes mapping
/// quality for the first score
static double first_mapping_quality_exact(const vector<double>& scaled_scores,
const vector<double>* multiplicities = nullptr);
/// Same as maximum_mapping_quality_approx except alway s computes mapping
/// quality for the first score
static double first_mapping_quality_approx(const vector<double>& scaled_scores,
const vector<double>* multiplicities = nullptr);
protected:
double group_mapping_quality_exact(const vector<double>& scaled_scores, const vector<size_t>& group,
const vector<double>* multiplicities = nullptr) const;
double estimate_next_best_score(int length, double min_diffs) const;
double recover_log_base(const int8_t* score_matrix, double gc_content, double tol) const;
bool verify_valid_log_odds_score_matrix(const int8_t* score_matrix, const double* nt_freqs) const;
double alignment_score_partition_function(double lambda, const int8_t* score_matrix,
const double* nt_freqs) const;
vector<double> all_mapping_qualities_exact(const vector<double>& scaled_scores,
const vector<double>* multiplicities = nullptr) const;
public:
double max_possible_mapping_quality(int length) const;
double estimate_max_possible_mapping_quality(int length, double min_diffs, double next_min_diffs) const;
/// store optimal alignment against a graph in the Alignment object with one end of the sequence
/// guaranteed to align to a source/sink node. if xdrop is selected, use the xdrop heuristic, which
/// does not guarantee an optimal alignment.
///
/// pinning left means that that the alignment starts with the first base of the read sequence and
/// the first base of a source node sequence, pinning right means that the alignment starts with
/// the final base of the read sequence and the final base of a sink node sequence
///
/// Gives the full length bonus only on the non-pinned end of the alignment.
virtual void align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left, bool xdrop = false,
uint16_t xdrop_max_gap_length = default_xdrop_max_gap_length) const = 0;
/// store the top scoring pinned alignments in the vector in descending score order up to a maximum
/// number of alignments (including the optimal one). if there are fewer than the maximum number in
/// the return value, then it includes all alignments with a positive score. the optimal alignment
/// will be stored in both the vector and in the main alignment object
virtual void align_pinned_multi(Alignment& alignment, vector<Alignment>& alt_alignments, const HandleGraph& g,
bool pin_left, int32_t max_alt_alns) const = 0;
/// store optimal global alignment against a graph within a specified band in the Alignment object
/// permissive banding auto detects the width of band needed so that paths can travel
/// through every node in the graph
virtual void align_global_banded(Alignment& alignment, const HandleGraph& g,
int32_t band_padding = 0, bool permissive_banding = true,
const unordered_map<handle_t, bool>* left_align_strand = nullptr) const = 0;
/// store top scoring global alignments in the vector in descending score order up to a maximum number
/// of alternate alignments (including the optimal alignment). if there are fewer than the maximum
/// number of alignments in the return value, then the vector contains all possible alignments. the
/// optimal alignment will be stored in both the vector and the original alignment object
virtual void align_global_banded_multi(Alignment& alignment, vector<Alignment>& alt_alignments,
const HandleGraph& g, int32_t max_alt_alns, int32_t band_padding = 0,
bool permissive_banding = true,
const unordered_map<handle_t, bool>* left_align_strand = nullptr) const = 0;
/// xdrop aligner
virtual void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<MaximalExactMatch>& mems,
bool reverse_complemented, uint16_t max_gap_length = default_xdrop_max_gap_length) const = 0;
/// xdrop aligner, but with a precomputed topological order on the graph, which need not include
/// all of the graph's handles and which may contain both orientations of a handle
virtual void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<handle_t>& order,
const vector<MaximalExactMatch>& mems, bool reverse_complemented,
uint16_t max_gap_length = default_xdrop_max_gap_length) const = 0;
/// Compute the score of an exact match in the given alignment, from the
/// given offset, of the given length.
virtual int32_t score_exact_match(const Alignment& aln, size_t read_offset, size_t length) const = 0;
/// Compute the score of an exact match of the given sequence with the given qualities.
/// Qualities may be ignored by some implementations.
virtual int32_t score_exact_match(const string& sequence, const string& base_quality) const = 0;
/// Compute the score of an exact match of the given range of sequence with the given qualities.
/// Qualities may be ignored by some implementations.
virtual int32_t score_exact_match(string::const_iterator seq_begin, string::const_iterator seq_end,
string::const_iterator base_qual_begin) const = 0;
/// Compute the score of a mismatch of the given range of sequence with the given qualities.
/// Qualities may be ignored by some implementations.
/// Note that the return value is SIGNED, and almost certainly NEGATIVE, because mismatches are bad.
virtual int32_t score_mismatch(string::const_iterator seq_begin, string::const_iterator seq_end,
string::const_iterator base_qual_begin) const = 0;
virtual int32_t score_full_length_bonus(bool left_side, string::const_iterator seq_begin,
string::const_iterator seq_end,
string::const_iterator base_qual_begin) const = 0;
virtual int32_t score_full_length_bonus(bool left_side, const Alignment& alignment) const = 0;
/// Compute the score of a path against the given range of subsequence with the given qualities.
virtual int32_t score_partial_alignment(const Alignment& alignment, const HandleGraph& graph, const path_t& path,
string::const_iterator seq_begin, bool no_read_end_scoring = false) const = 0;
/// Returns the score of an insert or deletion of the given length
int32_t score_gap(size_t gap_length) const;
/// stores -10 * log_10(P_err) in alignment mapping_quality field where P_err is the
/// probability that the alignment is not the correct one (assuming that one of the alignments
/// in the vector is correct). alignments must have been created with this Aligner for quality
/// score to be valid
void compute_mapping_quality(vector<Alignment>& alignments,
int max_mapping_quality,
bool fast_approximation,
double cluster_mq,
bool use_cluster_mq,
int overlap_count,
double mq_estimate,
double maybe_mq_threshold,
double identity_weight) const;
/// same function for paired reads, mapping qualities are stored in both alignments in the pair
void compute_paired_mapping_quality(pair<vector<Alignment>, vector<Alignment>>& alignment_pairs,
const vector<double>& frag_weights,
int max_mapping_quality1,
int max_mapping_quality2,
bool fast_approximation,
double cluster_mq,
bool use_cluster_mq,
int overlap_count1,
int overlap_count2,
double mq_estimate1,
double mq_estimate2,
double maybe_mq_threshold,
double identity_weight) const;
/// Computes mapping quality for the first score in a vector of scores.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// only 1 count can apply toward the mapping quality.
int32_t compute_first_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;
/// Computes mapping quality for the optimal score in a vector of scores.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// only 1 count can apply toward the mapping quality.
int32_t compute_max_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;
/// Computes mapping quality for a group of scores in a vector of scores (group given by indexes).
/// Optionally includes a vector of implicit counts >= 1 for the score, but the mapping quality is always
/// calculated as if each member of the group has a count of 1.
int32_t compute_group_mapping_quality(const vector<double>& scores, const vector<size_t>& group,
const vector<double>* multiplicities = nullptr) const;
/// Computes mapping quality for all of a vector of scores.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// only 1 count can apply toward the mapping quality.
vector<int32_t> compute_all_mapping_qualities(const vector<double>& scores,
const vector<double>* multiplicities = nullptr) const;
/// Returns the difference between an optimal and second-best alignment scores that would
/// result in this mapping quality using the fast mapping quality approximation
double mapping_quality_score_diff(double mapping_quality) const;
/// Convert a score to an unnormalized log likelihood for the sequence.
/// Requires log_base to have been set.
double score_to_unnormalized_likelihood_ln(double score) const;
/// The longest gap detectable from a read position without soft-clipping
size_t longest_detectable_gap(const Alignment& alignment, const string::const_iterator& read_pos) const;
/// The longest gap detectable from a read position without soft-clipping, for a generic read.
size_t longest_detectable_gap(size_t read_length, size_t read_pos) const;
/// The longest gap detectable from any read position without soft-clipping
size_t longest_detectable_gap(const Alignment& alignment) const;
/// The longest gap detectable from any read position without soft-clipping, for a generic read.
size_t longest_detectable_gap(size_t read_length) const;
/// Use the score values in the aligner to score the given alignment,
/// scoring gaps caused by jumping between between nodes using a custom
/// gap length estimation function (which takes the from position, the
/// to position, and a search limit in bp that happens to be the read
/// length).
///
/// May include full length bonus or not. TODO: bool flags are bad.
virtual int32_t score_discontiguous_alignment(const Alignment& aln,
const function<size_t(pos_t, pos_t, size_t)>& estimate_distance,
bool strip_bonuses = false) const;
/// Use the score values in the aligner to score the given alignment assuming
/// that there are no gaps between Mappings in the Path
virtual int32_t score_contiguous_alignment(const Alignment& aln,
bool strip_bonuses = false) const;
/// Without necessarily rescoring the entire alignment, return the score
/// of the given alignment with bonuses removed. Assumes that bonuses
/// are actually included in the score.
/// Needs to know if the alignment was pinned-end or not, and, if so, which end was pinned.
virtual int32_t remove_bonuses(const Alignment& aln, bool pinned = false, bool pin_left = false) const;
// members
DeletionAligner deletion_aligner;
int8_t* nt_table = nullptr;
int8_t* score_matrix = nullptr;
int8_t match;
int8_t mismatch;
int8_t gap_open;
int8_t gap_extension;
int8_t full_length_bonus;
// log of the base of the logarithm underlying the log-odds interpretation of the scores
double log_base = 0.0;
};
/**
* An ordinary aligner.
*/
class Aligner : public GSSWAligner {
public:
Aligner(const int8_t* _score_matrix = default_score_matrix,
int8_t _gap_open = default_gap_open,
int8_t _gap_extension = default_gap_extension,
int8_t _full_length_bonus = default_full_length_bonus,
double _gc_content = default_gc_content);
~Aligner(void) = default;
/// Store optimal local alignment against a graph in the Alignment object.
/// Gives the full length bonus separately on each end of the alignment.
void align(Alignment& alignment, const HandleGraph& g, bool traceback_aln) const;
/// Align against a subgraph induced by a subset of nodes. The topological
/// order of the handles in the subgraph must be provided.
/// Store optimal local alignment in the Alignment object.
/// Gives the full length bonus separately on each end of the alignment.
void align(Alignment& alignment, const HandleGraph& g,
const std::vector<handle_t>& topological_order) const;
/// store optimal alignment against a graph in the Alignment object with one end of the sequence
/// guaranteed to align to a source/sink node. if xdrop is selected, use the xdrop heuristic, which
/// does not guarantee an optimal alignment.
///
/// pinning left means that that the alignment starts with the first base of the read sequence and
/// the first base of a source node sequence, pinning right means that the alignment starts with
/// the final base of the read sequence and the final base of a sink node sequence
///
/// Gives the full length bonus only on the non-pinned end of the alignment.
void align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left, bool xdrop = false,
uint16_t xdrop_max_gap_length = default_xdrop_max_gap_length) const;
/// store the top scoring pinned alignments in the vector in descending score order up to a maximum
/// number of alignments (including the optimal one). if there are fewer than the maximum number in
/// the return value, then it includes all alignments with a positive score. the optimal alignment
/// will be stored in both the vector and in the main alignment object
void align_pinned_multi(Alignment& alignment, vector<Alignment>& alt_alignments, const HandleGraph& g,
bool pin_left, int32_t max_alt_alns) const;
/// store optimal global alignment against a graph within a specified band in the Alignment object
/// permissive banding auto detects the width of band needed so that paths can travel
/// through every node in the graph
void align_global_banded(Alignment& alignment, const HandleGraph& g,
int32_t band_padding = 0, bool permissive_banding = true,
const unordered_map<handle_t, bool>* left_align_strand = nullptr) const;
/// store top scoring global alignments in the vector in descending score order up to a maximum number
/// of alternate alignments (including the optimal alignment). if there are fewer than the maximum
/// number of alignments in the return value, then the vector contains all possible alignments. the
/// optimal alignment will be stored in both the vector and the original alignment object
void align_global_banded_multi(Alignment& alignment, vector<Alignment>& alt_alignments, const HandleGraph& g,
int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true,
const unordered_map<handle_t, bool>* left_align_strand = nullptr) const;
/// xdrop aligner
void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<MaximalExactMatch>& mems,
bool reverse_complemented, uint16_t max_gap_length = default_xdrop_max_gap_length) const;
/// xdrop aligner, but with a precomputed topological order on the graph, which need not include
/// all of the graph's handles and which may contain both orientations of a handle
void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<handle_t>& order,
const vector<MaximalExactMatch>& mems, bool reverse_complemented,
uint16_t max_gap_length = default_xdrop_max_gap_length) const;
int32_t score_exact_match(const Alignment& aln, size_t read_offset, size_t length) const;
int32_t score_exact_match(const string& sequence, const string& base_quality) const;
int32_t score_exact_match(string::const_iterator seq_begin, string::const_iterator seq_end,
string::const_iterator base_qual_begin) const;
int32_t score_exact_match(const string& sequence) const;
int32_t score_exact_match(string::const_iterator seq_begin, string::const_iterator seq_end) const;
int32_t score_mismatch(string::const_iterator seq_begin, string::const_iterator seq_end,
string::const_iterator base_qual_begin) const;
/// Score a mismatch given just the length. Only possible since we ignore qualities.
/// Return value is SIGNED, and almost certainly NEGATIVE
int32_t score_mismatch(size_t length) const;
int32_t score_full_length_bonus(bool left_side, string::const_iterator seq_begin,
string::const_iterator seq_end,
string::const_iterator base_qual_begin) const;
int32_t score_full_length_bonus(bool left_side, const Alignment& alignment) const;
int32_t score_partial_alignment(const Alignment& alignment, const HandleGraph& graph, const path_t& path,
string::const_iterator seq_begin, bool no_read_end_scoring = false) const;
private:
// internal function interacting with gssw for pinned and local alignment
void align_internal(Alignment& alignment, vector<Alignment>* multi_alignments, const HandleGraph& g,
bool pinned, bool pin_left, int32_t max_alt_alns,
bool traceback_aln) const;
// members
vector<XdropAligner> xdrops;
};
/**
* An aligner that uses read base qualities to adjust its scores and alignments.
*/
class QualAdjAligner : public GSSWAligner {
public:
QualAdjAligner(const int8_t* _score_matrix = default_score_matrix,
int8_t _gap_open = default_gap_open,
int8_t _gap_extension = default_gap_extension,
int8_t _full_length_bonus = default_full_length_bonus,
double _gc_content = default_gc_content);
~QualAdjAligner(void);
// base quality adjusted counterparts to functions of same name from Aligner
void align(Alignment& alignment, const HandleGraph& g, bool traceback_aln) const;
void align_global_banded(Alignment& alignment, const HandleGraph& g,
int32_t band_padding = 0, bool permissive_banding = true,
const unordered_map<handle_t, bool>* left_align_strand = nullptr) const;
void align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left, bool xdrop = false,
uint16_t xdrop_max_gap_length = default_xdrop_max_gap_length) const;
void align_global_banded_multi(Alignment& alignment, vector<Alignment>& alt_alignments, const HandleGraph& g,
int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true,
const unordered_map<handle_t, bool>* left_align_strand = nullptr) const;
void align_pinned_multi(Alignment& alignment, vector<Alignment>& alt_alignments, const HandleGraph& g,
bool pin_left, int32_t max_alt_alns) const;
void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<MaximalExactMatch>& mems,
bool reverse_complemented, uint16_t max_gap_length = default_xdrop_max_gap_length) const;
void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<handle_t>& order,
const vector<MaximalExactMatch>& mems, bool reverse_complemented,
uint16_t max_gap_length = default_xdrop_max_gap_length) const;
int32_t score_exact_match(const Alignment& aln, size_t read_offset, size_t length) const;
int32_t score_exact_match(const string& sequence, const string& base_quality) const;
int32_t score_exact_match(string::const_iterator seq_begin, string::const_iterator seq_end,
string::const_iterator base_qual_begin) const;
int32_t score_mismatch(string::const_iterator seq_begin, string::const_iterator seq_end,
string::const_iterator base_qual_begin) const;
int32_t score_full_length_bonus(bool left_side, string::const_iterator seq_begin,
string::const_iterator seq_end,
string::const_iterator base_qual_begin) const;
int32_t score_full_length_bonus(bool left_side, const Alignment& alignment) const;
int32_t score_partial_alignment(const Alignment& alignment, const HandleGraph& graph, const path_t& path,
string::const_iterator seq_begin, bool no_read_end_scoring = false) const;
protected:
int8_t* qual_adjusted_matrix(const int8_t* score_matrix, double gc_content, uint32_t max_qual) const;
int8_t* qual_adjusted_bonuses(int8_t _full_length_bonus, uint32_t max_qual) const;
// internal function interacting with gssw for pinned and local alignment
void align_internal(Alignment& alignment, vector<Alignment>* multi_alignments, const HandleGraph& g,
bool pinned, bool pin_left, int32_t max_alt_alns,
bool traceback_aln) const;
int8_t* qual_adj_full_length_bonuses = nullptr;
// members
vector<QualAdjXdropAligner> xdrops;
};
/**
* Holds a set of alignment scores, and has methods to produce aligners of various types on demand, using those scores.
* Provides a get_aligner() method to get ahold of a useful, possibly quality-adjusted Aligner.
* Base functionality that is shared between alignment and surjections
*/
class AlignerClient {
protected:
/// Create an AlignerClient, which creates the default aligner instances,
/// which can depend on a GC content estimate.
AlignerClient(double gc_content_estimate = vg::default_gc_content);
/// Get the appropriate aligner to use, based on
/// adjust_alignments_for_base_quality. By setting have_qualities to false,
/// you can force the non-quality-adjusted aligner, for reads that lack
/// quality scores.
const GSSWAligner* get_aligner(bool have_qualities = true) const;
// Sometimes you really do need the two kinds of aligners, to pass to code
// that expects one or the other.
const QualAdjAligner* get_qual_adj_aligner() const;
const Aligner* get_regular_aligner() const;
public:
/// Set all the aligner scoring parameters and create the stored aligner instances.
virtual void set_alignment_scores(int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus);
/// Set the algner scoring parameters and create the stored aligner instances. The
/// stream should contain a 4 x 4 whitespace-separated substitution matrix (in the
/// order ACGT)
virtual void set_alignment_scores(std::istream& matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus);
/// Set the algner scoring parameters and create the stored aligner instances. The
/// score matrix should by a 4 x 4 array in the order (ACGT).
/// Other overloads of set_alignment_scores all call this one.
virtual void set_alignment_scores(const int8_t* score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus);
/// Allocates an array to hold a 4x4 substitution matrix and returns it
static int8_t* parse_matrix(std::istream& matrix_stream);
bool adjust_alignments_for_base_quality = false; // use base quality adjusted alignments
private:
// GSSW aligners
unique_ptr<QualAdjAligner> qual_adj_aligner;
unique_ptr<Aligner> regular_aligner;
// GC content estimate that we need for building the aligners.
double gc_content_estimate;
};
} // end namespace vg
#endif