Skip to content

Commit

Permalink
r1161: merged the simple and complex models
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Apr 7, 2023
1 parent 7ced0f1 commit 1834b1f
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 43 deletions.
46 changes: 8 additions & 38 deletions ksw2_exts2_sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
uint8_t *qr, *sf, *mem, *mem2 = 0;
__m128i q_, q2_, qe_, zero_, sc_mch_, sc_mis_, sc_N_, m1_;
__m128i *u, *v, *x, *y, *x2, *s, *p = 0, *donor, *acceptor;
const int sp0[4] = { 8, 15, 21, 30 };

ksw_reset_extz(ez);
if (m <= 1 || qlen <= 0 || tlen <= 0 || q2 <= q + e) return;
Expand Down Expand Up @@ -119,45 +118,16 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
memcpy(sf, target, tlen);

// set the donor and acceptor arrays. TODO: this assumes 0/1/2/3 encoding!
if ((flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) && !(flag & KSW_EZ_SPLICE_CMPLX)) {
int semi_cost = flag&KSW_EZ_SPLICE_FLANK? -noncan/2 : 0; // GTr or yAG is worth 0.5 bit; see PMID:18688272
memset(donor, -noncan, tlen_ * 16);
memset(acceptor, -noncan, tlen_ * 16);
if (!(flag & KSW_EZ_REV_CIGAR)) {
for (t = 0; t < tlen - 4; ++t) {
int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 3) can_type = 1; // GTr...
if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 3) can_type = 1; // CTr...
if (can_type && (target[t+3] == 0 || target[t+3] == 2)) can_type = 2;
if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
}
for (t = 2; t < tlen; ++t) {
int can_type = 0;
if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 0 && target[t] == 2) can_type = 1; // ...yAG
if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 0 && target[t] == 1) can_type = 1; // ...yAC
if (can_type && (target[t-2] == 1 || target[t-2] == 3)) can_type = 2;
if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
}
if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) {
const int sp0[4] = { 8, 15, 21, 30 };
int sp[4];
if (flag & KSW_EZ_SPLICE_CMPLX) {
for (t = 0; t < 4; ++t)
sp[t] = (int)((double)sp0[t] / 4. + .499);
} else {
for (t = 0; t < tlen - 4; ++t) {
int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 0) can_type = 1; // GAy...
if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 0) can_type = 1; // CAy...
if (can_type && (target[t+3] == 1 || target[t+3] == 3)) can_type = 2;
if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
}
for (t = 2; t < tlen; ++t) {
int can_type = 0;
if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 3 && target[t] == 2) can_type = 1; // ...rTG
if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 3 && target[t] == 1) can_type = 1; // ...rTC
if (can_type && (target[t-2] == 0 || target[t-2] == 2)) can_type = 2;
if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
}
sp[0] = flag&KSW_EZ_SPLICE_FLANK? noncan / 2 : 0;
sp[1] = sp[2] = sp[3] = noncan;
}
} else if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) { // use the complex splice model
int sp[4];
for (t = 0; t < 4; ++t)
sp[t] = (int)((double)sp0[t] / 4. + .499);
memset(donor, -sp[3], tlen_ * 16);
memset(acceptor, -sp[3], tlen_ * 16);
if (!(flag & KSW_EZ_REV_CIGAR)) {
Expand Down
8 changes: 4 additions & 4 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static ko_longopt_t long_options[] = {
{ "min-dp-len", ko_required_argument, 308 },
{ "print-aln-seq", ko_no_argument, 309 },
{ "splice", ko_no_argument, 310 },
{ "splice-model", ko_no_argument, 311 },
{ "splice-model", ko_no_argument, 'J' },
{ "cost-non-gt-ag", ko_required_argument, 'C' },
{ "no-long-join", ko_no_argument, 312 },
{ "sr", ko_no_argument, 313 },
Expand Down Expand Up @@ -120,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const

int main(int argc, char *argv[])
{
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:j:";
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:j:J";
ketopt_t o = KETOPT_INIT;
mm_mapopt_t opt;
mm_idxopt_t ipt;
Expand Down Expand Up @@ -187,6 +187,7 @@ int main(int argc, char *argv[])
else if (c == 'R') rg = o.arg;
else if (c == 'h') fp_help = stdout;
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
else if (c == 'J') opt.flag |= MM_F_SPLICE_CMPLX; // --splice-model
else if (c == 'o') {
if (strcmp(o.arg, "-") != 0) {
if (freopen(o.arg, "wb", stdout) == NULL) {
Expand All @@ -205,7 +206,6 @@ int main(int argc, char *argv[])
else if (c == 308) opt.min_ksw_len = atoi(o.arg); // --min-dp-len
else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
else if (c == 311) opt.flag |= MM_F_SPLICE_CMPLX; // --splice-model
else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
else if (c == 313) opt.flag |= MM_F_SR; // --sr
else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
Expand Down Expand Up @@ -324,7 +324,7 @@ int main(int argc, char *argv[])
fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
fprintf(fp_help, " -j INT syncmer submer size (overriding -w) []\n");
// fprintf(fp_help, " -j INT syncmer submer size (overriding -w) []\n");
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n");
fprintf(fp_help, " -d FILE dump index to FILE []\n");
fprintf(fp_help, " Mapping:\n");
Expand Down
2 changes: 1 addition & 1 deletion minimap.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <stdio.h>
#include <sys/types.h>

#define MM_VERSION "2.24-r1160-dirty"
#define MM_VERSION "2.24-r1161-dirty"

#define MM_F_NO_DIAG 0x001 // no exact diagonal hit
#define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
Expand Down

0 comments on commit 1834b1f

Please sign in to comment.