Skip to content

Commit

Permalink
hook up to qual adj dozeu interface
Browse files Browse the repository at this point in the history
jeizenga committed Oct 12, 2020
1 parent eaa0456 commit c5e4d93
Showing 7 changed files with 76 additions and 72 deletions.
2 changes: 1 addition & 1 deletion deps/dozeu
Submodule dozeu updated 1 files
+67 −36 dozeu.h
39 changes: 20 additions & 19 deletions src/aligner.cpp
Original file line number Diff line number Diff line change
@@ -634,10 +634,7 @@ void GSSWAligner::compute_mapping_quality(vector<Alignment>& alignments,
double maybe_mq_threshold,
double identity_weight) const {

if (log_base <= 0.0) {
cerr << "error:[Aligner] must call init_mapping_quality before computing mapping qualities" << endl;
exit(EXIT_FAILURE);
}
assert(log_base > 0.0);

if (alignments.empty()) {
return;
@@ -738,10 +735,7 @@ void GSSWAligner::compute_paired_mapping_quality(pair<vector<Alignment>, vector<
double maybe_mq_threshold,
double identity_weight) const {

if (log_base <= 0.0) {
cerr << "error:[Aligner] must call init_mapping_quality before computing mapping qualities" << endl;
exit(EXIT_FAILURE);
}
assert(log_base > 0.0);

size_t size = min(alignment_pairs.first.size(),
alignment_pairs.second.size());
@@ -1008,7 +1002,7 @@ Aligner::Aligner(const int8_t* _score_matrix,
int num_threads = get_thread_count();
xdrops.reserve(num_threads);
for (size_t i = 0; i < num_threads; ++i) {
xdrops.emplace_back(_score_matrix, _gap_open, _gap_extension, _full_length_bonus);
xdrops.emplace_back(_score_matrix, _gap_open, _gap_extension);
}
}

@@ -1338,7 +1332,7 @@ void Aligner::align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_
}
else {
// do the alignment
xdrop.align_pinned(alignment, overlay, pin_left, xdrop_max_gap_length);
xdrop.align_pinned(alignment, overlay, pin_left, full_length_bonus, xdrop_max_gap_length);

if (overlay.performed_duplications()) {
// the overlay is not a strict subset of the underlying graph, so we may
@@ -1481,7 +1475,8 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector<Alignment>&
void Aligner::align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<MaximalExactMatch>& mems,
bool reverse_complemented, uint16_t max_gap_length) const
{
align_xdrop(alignment, g, algorithms::lazier_topological_order(&g), mems, reverse_complemented, max_gap_length);
align_xdrop(alignment, g, algorithms::lazier_topological_order(&g), mems, reverse_complemented,
max_gap_length);
}

void Aligner::align_xdrop(Alignment& alignment, const HandleGraph& g, const vector<handle_t>& order,
@@ -1491,7 +1486,7 @@ void Aligner::align_xdrop(Alignment& alignment, const HandleGraph& g, const vect
// for every alignment, which meshes poorly with its stack implementation. We achieve
// thread-safety by having one per thread, which makes this method const-ish.
XdropAligner& xdrop = const_cast<XdropAligner&>(xdrops[omp_get_thread_num()]);
xdrop.align(alignment, g, order, mems, reverse_complemented, max_gap_length);
xdrop.align(alignment, g, order, mems, reverse_complemented, full_length_bonus, max_gap_length);
if (!alignment.has_path() && mems.empty()) {
// dozeu couldn't find an alignment, probably because it's seeding heuristic failed
// we'll just fall back on GSSW
@@ -1615,7 +1610,7 @@ QualAdjAligner::QualAdjAligner(const int8_t* _score_matrix,
int num_threads = get_thread_count();
xdrops.reserve(num_threads);
for (size_t i = 0; i < num_threads; ++i) {
xdrops.emplace_back(_score_matrix, score_matrix, _gap_open, _gap_extension, _full_length_bonus);
xdrops.emplace_back(_score_matrix, score_matrix, _gap_open, _gap_extension);
}
}

@@ -1956,9 +1951,6 @@ void QualAdjAligner::align_pinned(Alignment& alignment, const HandleGraph& g, bo
// thread-safety by having one per thread, which makes this method const-ish.
QualAdjXdropAligner& xdrop = const_cast<QualAdjXdropAligner&>(xdrops[omp_get_thread_num()]);

// dozeu declines to produce an alignment when the gap is set to 0
xdrop_max_gap_length = max<uint16_t>(xdrop_max_gap_length, 1);

// wrap the graph so that empty pinning points are handled correctly
DozeuPinningOverlay overlay(&g, !pin_left);
if (overlay.get_node_count() == 0 && g.get_node_count() > 0) {
@@ -1988,9 +1980,14 @@ void QualAdjAligner::align_pinned(Alignment& alignment, const HandleGraph& g, bo
});
}
else {
//

xdrop.align_pinned(alignment, overlay, pin_left, xdrop_max_gap_length);
// dozeu declines to produce an alignment when the gap is set to 0
xdrop_max_gap_length = max<uint16_t>(xdrop_max_gap_length, 1);

// get the quality adjusted bonus
int8_t bonus = qual_adj_full_length_bonuses[pin_left ? alignment.quality().back() : alignment.quality().front()];

xdrop.align_pinned(alignment, overlay, pin_left, bonus, xdrop_max_gap_length);

if (overlay.performed_duplications()) {
// the overlay is not a strict subset of the underlying graph, so we may
@@ -2136,7 +2133,11 @@ void QualAdjAligner::align_xdrop(Alignment& alignment, const HandleGraph& g, con
// for every alignment, which meshes poorly with its stack implementation. We achieve
// thread-safety by having one per thread, which makes this method const-ish.
QualAdjXdropAligner& xdrop = const_cast<QualAdjXdropAligner&>(xdrops[omp_get_thread_num()]);
xdrop.align(alignment, g, order, mems, reverse_complemented, max_gap_length);

// get the quality adjusted bonus
int8_t bonus = qual_adj_full_length_bonuses[reverse_complemented ? alignment.quality().front() : alignment.quality().back()];

xdrop.align(alignment, g, order, mems, reverse_complemented, bonus, max_gap_length);
if (!alignment.has_path() && mems.empty()) {
// dozeu couldn't find an alignment, probably because it's seeding heuristic failed
// we'll just fall back on GSSW
2 changes: 1 addition & 1 deletion src/aligner.hpp
Original file line number Diff line number Diff line change
@@ -421,7 +421,7 @@ namespace vg {
string::const_iterator seq_begin, bool no_read_end_scoring = false) const;


private:
protected:

int8_t* qual_adjusted_matrix(const int8_t* score_matrix, double gc_content, uint32_t max_qual) const;

34 changes: 19 additions & 15 deletions src/dozeu_interface.cpp
Original file line number Diff line number Diff line change
@@ -144,7 +144,7 @@ DozeuInterface::graph_pos_s DozeuInterface::calculate_max_position(const Ordered

pair<DozeuInterface::graph_pos_s, bool> DozeuInterface::scan_seed_position(const OrderedGraph& graph, const Alignment& alignment,
bool direction, vector<const dz_forefront_s*>& forefronts,
uint16_t max_gap_length)
int8_t full_length_bonus, uint16_t max_gap_length)
{
const string& query_seq = alignment.sequence();
const string& query_qual = alignment.quality();
@@ -158,8 +158,8 @@ pair<DozeuInterface::graph_pos_s, bool> DozeuInterface::scan_seed_position(const
}

const dz_query_s* packed_query = (direction
? pack_query_reverse(pack_seq, pack_qual, scan_len)
: pack_query_forward(pack_seq, pack_qual, scan_len)
? pack_query_reverse(pack_seq, pack_qual, full_length_bonus, scan_len)
: pack_query_forward(pack_seq, pack_qual, full_length_bonus, scan_len)
);

// make a root forefront
@@ -604,14 +604,15 @@ void DozeuInterface::debug_print(const Alignment& alignment, const OrderedGraph&
* Then we extend the head seed backing-downstream, and trace that back to find the optimal alignment.
*/
void DozeuInterface::align(Alignment& alignment, const HandleGraph& graph, const vector<MaximalExactMatch>& mems,
bool reverse_complemented, uint16_t max_gap_length)
bool reverse_complemented, int8_t full_length_bonus, uint16_t max_gap_length)
{
vector<handle_t> topological_order = algorithms::lazy_topological_order(&graph);
return align(alignment, graph, topological_order, mems, reverse_complemented, max_gap_length);
}

void DozeuInterface::align(Alignment& alignment, const HandleGraph& graph, const vector<handle_t>& order,
const vector<MaximalExactMatch>& mems, bool reverse_complemented, uint16_t max_gap_length)
const vector<MaximalExactMatch>& mems, bool reverse_complemented,
int8_t full_length_bonus, uint16_t max_gap_length)
{

const OrderedGraph ordered_graph(graph, order);
@@ -635,7 +636,8 @@ void DozeuInterface::align(Alignment& alignment, const HandleGraph& graph, const

// scan seed position mems is empty
bool scan_success;
tie(head_pos, scan_success) = scan_seed_position(ordered_graph, alignment, direction, forefronts, max_gap_length);
tie(head_pos, scan_success) = scan_seed_position(ordered_graph, alignment, direction, forefronts,
full_length_bonus, max_gap_length);
if (!scan_success) {
// we failed to find a seed, so we will not attempt an alignment
// clear the path just in case we're realigning a GAM
@@ -657,8 +659,8 @@ void DozeuInterface::align(Alignment& alignment, const HandleGraph& graph, const

// pack query (upward)
const dz_query_s* packed_query_seq_up = (direction
? pack_query_reverse(pack_seq, pack_qual, seed_pos.query_offset)
: pack_query_forward(pack_seq, pack_qual, query_seq.size() - seed_pos.query_offset)
? pack_query_reverse(pack_seq, pack_qual, full_length_bonus, seed_pos.query_offset)
: pack_query_forward(pack_seq, pack_qual, full_length_bonus, query_seq.size() - seed_pos.query_offset)
);
// upward extension
head_pos = calculate_max_position(ordered_graph, seed_pos,
@@ -669,7 +671,7 @@ void DozeuInterface::align(Alignment& alignment, const HandleGraph& graph, const
// fprintf(stderr, "head_node_index(%lu), rpos(%lu, %u), qpos(%u), direction(%d)\n", head_pos.node_index, head_pos.node_index, head_pos.ref_offset, head_pos.query_offset, direction);

// Now that we have determined head_pos, do the downward alignment from there, and the traceback.
align_downward(alignment, ordered_graph, {head_pos}, reverse_complemented, forefronts, max_gap_length);
align_downward(alignment, ordered_graph, {head_pos}, reverse_complemented, forefronts, full_length_bonus, max_gap_length);

#ifdef DEBUG
if (mems.empty()) {
@@ -680,8 +682,9 @@ void DozeuInterface::align(Alignment& alignment, const HandleGraph& graph, const
// bench_end(bench);
}

void DozeuInterface::align_downward(Alignment &alignment, const OrderedGraph& graph, const vector<graph_pos_s>& head_positions,
bool left_to_right, vector<const dz_forefront_s*>& forefronts, uint16_t max_gap_length)
void DozeuInterface::align_downward(Alignment& alignment, const OrderedGraph& graph, const vector<graph_pos_s>& head_positions,
bool left_to_right, vector<const dz_forefront_s*>& forefronts,
int8_t full_length_bonus, uint16_t max_gap_length)
{

// we're now allowing multiple graph start positions, but not multiple read start positions
@@ -702,8 +705,8 @@ void DozeuInterface::align_downward(Alignment &alignment, const OrderedGraph& gr

// pack query (downward)
const dz_query_s* packed_query_seq_dn = (left_to_right
? pack_query_forward(pack_seq, pack_qual, qlen - head_positions.front().query_offset)
: pack_query_reverse(pack_seq, pack_qual, head_positions.front().query_offset)
? pack_query_forward(pack_seq, pack_qual, full_length_bonus, qlen - head_positions.front().query_offset)
: pack_query_reverse(pack_seq, pack_qual, full_length_bonus, head_positions.front().query_offset)
);

// downward extension
@@ -716,7 +719,8 @@ void DozeuInterface::align_downward(Alignment &alignment, const OrderedGraph& gr
flush();
}

void DozeuInterface::align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left, uint16_t max_gap_length)
void DozeuInterface::align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left,
int8_t full_length_bonus, uint16_t max_gap_length)
{
// Compute our own topological order
vector<handle_t> order = algorithms::lazy_topological_order(&g);
@@ -756,7 +760,7 @@ void DozeuInterface::align_pinned(Alignment& alignment, const HandleGraph& g, bo
vector<const dz_forefront_s*> forefronts(ordered.order.size(), nullptr);

// Do the left-to-right alignment from the fixed head_pos seed, and then do the traceback.
align_downward(alignment, ordered, head_positions, pin_left, forefronts, max_gap_length);
align_downward(alignment, ordered, head_positions, pin_left, forefronts, full_length_bonus, max_gap_length);
}

/**
33 changes: 17 additions & 16 deletions src/dozeu_interface.hpp
Original file line number Diff line number Diff line change
@@ -90,16 +90,17 @@ class DozeuInterface {
* true, and the last occurrence of the first MEM otherwise.
*/
void align(Alignment& alignment, const HandleGraph& graph, const vector<MaximalExactMatch>& mems,
bool reverse_complemented, uint16_t max_gap_length = default_xdrop_max_gap_length);
bool reverse_complemented, int8_t full_length_bonus,
uint16_t max_gap_length = default_xdrop_max_gap_length);

/**
* Same as above except using a precomputed topological order, which
* need not include all handles in the graph, and which may contain both
* orientations of a handle.
*/
void align(Alignment& alignment, const HandleGraph& graph, const vector<handle_t>& order,
const vector<MaximalExactMatch>& mems, bool reverse_complemented,
uint16_t max_gap_length = default_xdrop_max_gap_length);
const vector<MaximalExactMatch>& mems, bool reverse_complemented,
int8_t full_length_bonus, uint16_t max_gap_length = default_xdrop_max_gap_length);

/**
* Compute a pinned alignment, where the start (pin_left=true) or end
@@ -111,7 +112,7 @@ class DozeuInterface {
* order; whichever comes first/last ends up being used for the pin.
*/
void align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left,
uint16_t max_gap_length = default_xdrop_max_gap_length);
int8_t full_length_bonus, uint16_t max_gap_length = default_xdrop_max_gap_length);

protected:
/**
@@ -142,8 +143,10 @@ class DozeuInterface {

// wrappers for dozeu functions that can be used to toggle between between quality
// adjusted and standard alignments
virtual dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual, size_t len) = 0;
virtual dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual, size_t len) = 0;
virtual dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual,
int8_t full_length_bonus, size_t len) = 0;
virtual dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual,
int8_t full_length_bonus, size_t len) = 0;
virtual const dz_forefront_s* scan(const dz_query_s* query, const dz_forefront_s** forefronts,
size_t n_forefronts, const char* ref, int32_t rlen, uint32_t rid,
uint16_t xt) = 0;
@@ -178,7 +181,7 @@ class DozeuInterface {
/// If the scan failed, then the alignment should not be attempted.
pair<graph_pos_s, bool> scan_seed_position(const OrderedGraph& graph, const Alignment& alignment,
bool direction, vector<const dz_forefront_s*>& forefronts,
uint16_t max_gap_length);
int8_t full_length_bonus, uint16_t max_gap_length);

/// Append an edit at the end of the current mapping array.
/// Returns the length passed in.
@@ -234,7 +237,7 @@ class DozeuInterface {
void align_downward(Alignment &alignment, const OrderedGraph& graph,
const vector<graph_pos_s>& head_positions,
bool left_to_right, vector<const dz_forefront_s*>& forefronts,
uint16_t max_gap_length);
int8_t full_length_bonus, uint16_t max_gap_length);


/// The core dozeu class, which does the alignments
@@ -251,17 +254,16 @@ class XdropAligner : public DozeuInterface {
/// Main constructor. Expects a 4 x 4 score matrix.
XdropAligner(const int8_t* _score_matrix,
int8_t _gap_open,
int8_t _gap_extension,
int32_t _full_length_bonus);
int8_t _gap_extension);

// see DozeuInterface::align and DozeuInterface::align_pinned below for alignment
// interface

private:

// implementations of virtual functions from DozeuInterface
dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual, size_t len);
dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual, size_t len);
dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
const dz_forefront_s* scan(const dz_query_s* query, const dz_forefront_s** forefronts,
size_t n_forefronts, const char* ref, int32_t rlen, uint32_t rid,
uint16_t xt);
@@ -297,8 +299,7 @@ class QualAdjXdropAligner : public DozeuInterface {
QualAdjXdropAligner(const int8_t* _score_matrix,
const int8_t* _qual_adj_score_matrix,
int8_t _gap_open,
int8_t _gap_extension,
int32_t _full_length_bonus);
int8_t _gap_extension);


// see DozeuInterface::align and DozeuInterface::align_pinned below for alignment
@@ -307,8 +308,8 @@ class QualAdjXdropAligner : public DozeuInterface {
private:

// implementations of virtual functions from DozeuInterface
dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual, size_t len);
dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual, size_t len);
dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
const dz_forefront_s* scan(const dz_query_s* query, const dz_forefront_s** forefronts,
size_t n_forefronts, const char* ref, int32_t rlen, uint32_t rid,
uint16_t xt);
Loading
Oops, something went wrong.

0 comments on commit c5e4d93

Please sign in to comment.