Skip to content

Commit

Permalink
r846: added --hard-mask-level for lh3#244
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Sep 27, 2018
1 parent c57b59f commit 1077b7d
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 8 deletions.
4 changes: 3 additions & 1 deletion hit.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a)
r->split |= 1, r2->split |= 2;
}

void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff) // and compute mm_reg1_t::subsc
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level) // and compute mm_reg1_t::subsc
{
int i, j, k, *w;
uint64_t *cov;
Expand All @@ -118,6 +118,7 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff
for (i = 1, k = 1; i < n; ++i) {
mm_reg1_t *ri = &r[i];
int si = ri->qs, ei = ri->qe, n_cov = 0, uncov_len = 0;
if (hard_mask_level) goto skip_uncov;
for (j = 0; j < k; ++j) { // traverse existing primary hits to find overlapping hits
mm_reg1_t *rp = &r[w[j]];
int sj = rp->qs, ej = rp->qe;
Expand All @@ -137,6 +138,7 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff
}
if (ei > x) uncov_len += ei - x;
}
skip_uncov:
for (j = 0; j < k; ++j) { // traverse existing primary hits again
mm_reg1_t *rp = &r[w[j]];
int sj = rp->qs, ej = rp->qe, min, max, ol;
Expand Down
4 changes: 3 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "ketopt.h"

#define MM_VERSION "2.12-r845-dirty"
#define MM_VERSION "2.12-r846-dirty"

#ifdef __linux__
#include <sys/resource.h>
Expand Down Expand Up @@ -59,6 +59,7 @@ static ko_longopt_t long_options[] = {
{ "paf-no-hit", ko_no_argument, 333 },
{ "split-prefix", ko_required_argument, 334 },
{ "no-end-flt", ko_no_argument, 335 },
{ "hard-mask-level",ko_no_argument, 336 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
Expand Down Expand Up @@ -188,6 +189,7 @@ int main(int argc, char *argv[])
else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit
else if (c == 334) opt.split_prefix = o.arg; // --split-prefix
else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt
else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
else if (c == 314) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
} else if (c == 315) { // --secondary
Expand Down
8 changes: 4 additions & 4 deletions map.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ static mm128_t *collect_seed_hits(void *km, const mm_mapopt_t *opt, int max_occ,
static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a)
{
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL);
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
if (!(opt->flag & (MM_F_SPLICE|MM_F_SR|MM_F_NO_LJOIN))) // long join not working well without primary chains
Expand All @@ -261,7 +261,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
if (!(opt->flag & MM_F_CIGAR)) return regs;
regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL);
mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
mm_set_sam_pri(*n_regs, regs);
}
Expand Down Expand Up @@ -359,7 +359,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
seg = mm_seg_gen(b->km, hash, n_segs, qlens, n_regs0, regs0, n_regs, regs, a); // split fragment chain to separate segment chains
free(regs0);
for (i = 0; i < n_segs; ++i) {
mm_set_parent(b->km, opt->mask_level, n_regs[i], regs[i], opt->a * 2 + opt->b); // update mm_reg1_t::parent
mm_set_parent(b->km, opt->mask_level, n_regs[i], regs[i], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL); // update mm_reg1_t::parent
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], &n_regs[i], regs[i], seg[i].a);
mm_set_mapq(b->km, n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr);
}
Expand Down Expand Up @@ -502,7 +502,7 @@ static void merge_hits(step_t *s)
}
}
mm_hit_sort(km, &s->n_reg[k], s->reg[k]);
mm_set_parent(km, opt->mask_level, s->n_reg[k], s->reg[k], opt->a * 2 + opt->b);
mm_set_parent(km, opt->mask_level, s->n_reg[k], s->reg[k], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL);
if (!(opt->flag & MM_F_ALL_CHAINS)) {
mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, &s->n_reg[k], s->reg[k]);
mm_set_sam_pri(s->n_reg[k], s->reg[k]);
Expand Down
1 change: 1 addition & 0 deletions minimap.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define MM_F_EQX 0x4000000 // use =/X instead of M
#define MM_F_PAF_NO_HIT 0x8000000 // output unmapped reads to PAF
#define MM_F_NO_END_FLT 0x10000000
#define MM_F_HARD_MLEVEL 0x20000000

#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2
Expand Down
7 changes: 6 additions & 1 deletion minimap2.1
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ Primarily used for all-vs-all read overlapping.
.BI -p \ FLOAT
Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
Between two chains overlaping over half of the shorter chain (controlled by
.BR --mask-level ),
.BR -M ),
the chain with a lower score is secondary to the chain with a higher score.
If the ratio of the scores is below
.IR FLOAT ,
Expand Down Expand Up @@ -226,6 +226,11 @@ Mark as secondary a chain that overlaps with a better chain by
.I FLOAT
or more of the shorter chain [0.5]
.TP
.B --hard-mask-level
Honor option
.B -M
and disable a heurstic to save unmapped subsequences.
.TP
.BI --max-chain-skip \ INT
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
for chaining. The time complexity is quadratic in the number of seeds. This
Expand Down
2 changes: 1 addition & 1 deletion mmpriv.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a);
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a);
int mm_set_sam_pri(int n, mm_reg1_t *r);
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff);
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level);
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r);
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);
Expand Down

0 comments on commit 1077b7d

Please sign in to comment.