Skip to content

Commit

Permalink
use aligner interface that scales scores
Browse files Browse the repository at this point in the history
jeizenga committed Sep 10, 2021
1 parent fd6ac5c commit c7069f5
Showing 4 changed files with 31 additions and 21 deletions.
18 changes: 14 additions & 4 deletions src/aligner.cpp
Original file line number Diff line number Diff line change
@@ -449,13 +449,13 @@ double GSSWAligner::first_mapping_quality_exact(const vector<double>& scaled_sco
}

double GSSWAligner::first_mapping_quality_approx(const vector<double>& scaled_scores,
const vector<double>* multiplicities) {
const vector<double>* multiplicities) {
return maximum_mapping_quality_approx(scaled_scores, nullptr, multiplicities);
}

double GSSWAligner::maximum_mapping_quality_exact(const vector<double>& scaled_scores, size_t* max_idx_out,
const vector<double>* multiplicities) {

// TODO: this isn't very well-named now that it also supports computing non-maximum
// mapping qualities

@@ -726,8 +726,8 @@ void GSSWAligner::compute_mapping_quality(vector<Alignment>& alignments,
}
}

int32_t GSSWAligner::compute_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities) const {
int32_t GSSWAligner::compute_max_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities) const {

vector<double> scaled_scores(scores.size());
for (size_t i = 0; i < scores.size(); i++) {
@@ -738,6 +738,16 @@ int32_t GSSWAligner::compute_mapping_quality(const vector<double>& scores, bool
: maximum_mapping_quality_exact(scaled_scores, &idx, multiplicities));
}

int32_t GSSWAligner::compute_first_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities) const {
vector<double> scaled_scores(scores.size());
for (size_t i = 0; i < scores.size(); i++) {
scaled_scores[i] = log_base * scores[i];
}
return (int32_t) (fast_approximation ? first_mapping_quality_approx(scaled_scores, multiplicities)
: first_mapping_quality_exact(scaled_scores, multiplicities));
}

int32_t GSSWAligner::compute_group_mapping_quality(const vector<double>& scores, const vector<size_t>& group,
const vector<double>* multiplicities) const {

10 changes: 8 additions & 2 deletions src/aligner.hpp
Original file line number Diff line number Diff line change
@@ -226,11 +226,17 @@ namespace vg {
double maybe_mq_threshold,
double identity_weight) const;

/// Computes mapping quality for the first score in a vector of scores.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// the first mapping quality is always calculated as if it multiplicity is 1.
int32_t compute_first_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;

/// Computes mapping quality for the optimal score in a vector of scores.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// the mapping quality is always calculated as if it multiplicity is 1.
int32_t compute_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;
int32_t compute_max_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;

/// Computes mapping quality for a group of scores in a vector of scores (group given by indexes).
/// Optionally includes a vector of implicit counts >= 1 for the score, but the mapping quality is always
10 changes: 5 additions & 5 deletions src/minimizer_mapper.cpp
Original file line number Diff line number Diff line change
@@ -636,7 +636,7 @@ vector<Alignment> MinimizerMapper::map(Alignment& aln) {
// Compute MAPQ if not unmapped. Otherwise use 0 instead of the 50% this would give us.
// Use exact mapping quality
double mapq = (mappings.front().path().mapping_size() == 0) ? 0 :
get_regular_aligner()->compute_mapping_quality(scores, false) ;
get_regular_aligner()->compute_max_mapping_quality(scores, false) ;

#ifdef print_minimizer_table
double uncapped_mapq = mapq;
@@ -2009,7 +2009,7 @@ pair<vector<Alignment>, vector<Alignment>> MinimizerMapper::map_paired(Alignment
// If all of the alignment pairs were found with rescue, use the multiplicities to determine mapq
// Use exact mapping quality
uncapped_mapq = scores[0] == 0 ? 0 :
get_regular_aligner()->compute_mapping_quality(scores, false, multiplicities);
get_regular_aligner()->compute_max_mapping_quality(scores, false, multiplicities);

//Cap mapq at 1 - 1 / # equivalent or better fragment clusters, including self
if (better_cluster_count_by_mappings.front() > 1) {
@@ -2020,8 +2020,8 @@ pair<vector<Alignment>, vector<Alignment>> MinimizerMapper::map_paired(Alignment

//If one alignment was duplicated in other pairs, cap the mapq for that alignment at the mapq
//of the group of duplicated alignments. Always compute this even if not quite sensible.
mapq_score_groups[0] = get_regular_aligner()->compute_mapping_quality(scores_group_1, false);
mapq_score_groups[1] = get_regular_aligner()->compute_mapping_quality(scores_group_2, false);
mapq_score_groups[0] = get_regular_aligner()->compute_max_mapping_quality(scores_group_1, false);
mapq_score_groups[1] = get_regular_aligner()->compute_max_mapping_quality(scores_group_2, false);

for (auto read_num : {0, 1}) {
// For each fragment
@@ -2062,7 +2062,7 @@ pair<vector<Alignment>, vector<Alignment>> MinimizerMapper::map_paired(Alignment
if (types.front() == unpaired) {
//If this pair came from two different fragment cluster, then cap mapq at the mapq
//from only unpaired alignments of this read
mapq_cap = std::min(mapq_cap, (double)get_regular_aligner()->compute_mapping_quality(unpaired_scores[read_num], false));
mapq_cap = std::min(mapq_cap, (double)get_regular_aligner()->compute_max_mapping_quality(unpaired_scores[read_num], false));
}

// Find the MAPQ to cap
14 changes: 4 additions & 10 deletions src/multipath_mapper.cpp
Original file line number Diff line number Diff line change
@@ -729,9 +729,9 @@ namespace vg {
identify_start_subpaths(rescue_multipath_aln);

vector<double> score(1, aln.score());
int32_t solo_mapq = mapq_scaling_factor * aligner->compute_mapping_quality(score,
mapping_quality_method == None
|| mapping_quality_method == Approx);
int32_t solo_mapq = mapq_scaling_factor * aligner->compute_max_mapping_quality(score,
mapping_quality_method == None
|| mapping_quality_method == Approx);
int32_t adjusted_mapq = min<int32_t>(solo_mapq, min(max_mapping_quality, multipath_aln.mapping_quality()));
rescue_multipath_aln.set_mapping_quality(adjusted_mapq);

@@ -5914,13 +5914,7 @@ namespace vg {
use_exact = true;
}

int32_t raw_mapq;
if (use_exact) {
raw_mapq = aligner->first_mapping_quality_exact(scores, multiplicities);
}
else {
raw_mapq = aligner->first_mapping_quality_approx(scores, multiplicities);
}
int32_t raw_mapq = aligner->compute_first_mapping_quality(scores, !use_exact, multiplicities);

// arbitrary scaling, seems to help performance
raw_mapq *= mapq_scaling_factor;

0 comments on commit c7069f5

Please sign in to comment.