Skip to content

Commit df77d9e

Browse files
committed
Expose Smith-Waterman-based prefilter
1 parent 1d62fa0 commit df77d9e

File tree

5 files changed

+65
-3
lines changed

5 files changed

+65
-3
lines changed

src/CommandDeclarations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ extern int profile2repseq(int argc, const char **argv, const Command& command);
9494
extern int proteinaln2nucl(int argc, const char **argv, const Command& command);
9595
extern int rescorediagonal(int argc, const char **argv, const Command& command);
9696
extern int ungappedprefilter(int argc, const char **argv, const Command& command);
97+
extern int gappedprefilter(int argc, const char **argv, const Command& command);
9798
extern int unpackdb(int argc, const char **argv, const Command& command);
9899
extern int rbh(int argc, const char **argv, const Command& command);
99100
extern int result2flat(int argc, const char **argv, const Command& command);

src/MMseqsBase.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,14 @@ std::vector<Command> baseCommands = {
583583
CITATION_MMSEQS2, {{"queryDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
584584
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
585585
{"prefilterDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::prefilterDb }}},
586+
{"gappedprefilter", gappedprefilter, &par.gappedprefilter, COMMAND_PREFILTER,
587+
"Optimal Smith-Waterman-based prefiltering (slow)",
588+
NULL,
589+
"Martin Steinegger <[email protected]>",
590+
"<i:queryDB> <i:targetDB> <o:prefilterDB>",
591+
CITATION_MMSEQS2, {{"queryDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
592+
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
593+
{"prefilterDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::prefilterDb }}},
586594
{"kmermatcher", kmermatcher, &par.kmermatcher, COMMAND_PREFILTER,
587595
"Find bottom-m-hashed k-mer matches within sequence DB",
588596
NULL,

src/commons/Parameters.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,23 @@ Parameters::Parameters():
449449
ungappedprefilter.push_back(&PARAM_COMPRESSED);
450450
ungappedprefilter.push_back(&PARAM_V);
451451
452+
// gappedprefilter
453+
gappedprefilter.push_back(&PARAM_SUB_MAT);
454+
gappedprefilter.push_back(&PARAM_GAP_OPEN);
455+
gappedprefilter.push_back(&PARAM_GAP_EXTEND);
456+
gappedprefilter.push_back(&PARAM_E);
457+
gappedprefilter.push_back(&PARAM_C);
458+
gappedprefilter.push_back(&PARAM_COV_MODE);
459+
gappedprefilter.push_back(&PARAM_NO_COMP_BIAS_CORR);
460+
gappedprefilter.push_back(&PARAM_NO_COMP_BIAS_CORR_SCALE);
461+
gappedprefilter.push_back(&PARAM_MIN_DIAG_SCORE);
462+
gappedprefilter.push_back(&PARAM_MAX_SEQS);
463+
gappedprefilter.push_back(&PARAM_TAXON_LIST);
464+
gappedprefilter.push_back(&PARAM_PRELOAD_MODE);
465+
gappedprefilter.push_back(&PARAM_THREADS);
466+
gappedprefilter.push_back(&PARAM_COMPRESSED);
467+
gappedprefilter.push_back(&PARAM_V);
468+
452469
// clustering
453470
clust.push_back(&PARAM_CLUSTER_MODE);
454471
clust.push_back(&PARAM_MAXITERATIONS);

src/commons/Parameters.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,7 @@ class Parameters {
758758
PARAMETER(PARAM_LOCAL_TMP)
759759
std::vector<MMseqsParameter*> prefilter;
760760
std::vector<MMseqsParameter*> ungappedprefilter;
761+
std::vector<MMseqsParameter*> gappedprefilter;
761762

762763
// alignment
763764
PARAMETER(PARAM_ALIGNMENT_MODE)

src/prefiltering/ungappedprefilter.cpp

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
#include <omp.h>
2121
#endif
2222

23-
24-
int ungappedprefilter(int argc, const char **argv, const Command &command) {
23+
int prefilterInternal(int argc, const char **argv, const Command &command, int mode) {
2524
Parameters &par = Parameters::getInstance();
2625
par.parseParameters(argc, argv, command, true, 0, 0);
2726
DBWriter resultWriter(par.db3.c_str(), par.db3Index.c_str(), 1, par.compressed, Parameters::DBTYPE_PREFILTER_RES);
@@ -141,7 +140,37 @@ int ungappedprefilter(int argc, const char **argv, const Command &command) {
141140
continue;
142141
}
143142

144-
int score = aligner.ungapped_alignment(tSeq.numSequence, tSeq.L);
143+
int score;
144+
if (mode == 0) {
145+
score = aligner.ungapped_alignment(tSeq.numSequence, tSeq.L);
146+
} else {
147+
std::string backtrace;
148+
s_align res;
149+
if (isIdentity) {
150+
res = aligner.scoreIdentical(
151+
tSeq.numSequence, tSeq.L, evaluer, Matcher::SCORE_ONLY, backtrace
152+
);
153+
} else {
154+
res = aligner.ssw_align(
155+
tSeq.numSequence,
156+
tSeq.numConsensusSequence,
157+
tSeq.getAlignmentProfile(),
158+
tSeq.L,
159+
backtrace,
160+
par.gapOpen.values.aminoacid(),
161+
par.gapExtend.values.aminoacid(),
162+
Matcher::SCORE_ONLY,
163+
par.evalThr,
164+
evaluer,
165+
par.covMode,
166+
par.covThr,
167+
par.correlationScoreWeight,
168+
qSeq.L / 2,
169+
tId
170+
);
171+
}
172+
score = res.score1;
173+
}
145174
bool hasDiagScore = (score > par.minDiagScoreThr);
146175
double evalue = 0.0;
147176
// check if evalThr != inf
@@ -199,4 +228,10 @@ int ungappedprefilter(int argc, const char **argv, const Command &command) {
199228
return 0;
200229
}
201230

231+
int ungappedprefilter(int argc, const char **argv, const Command &command) {
232+
return prefilterInternal(argc, argv, command, 0);
233+
}
202234

235+
int gappedprefilter(int argc, const char **argv, const Command &command) {
236+
return prefilterInternal(argc, argv, command, 1);
237+
}

0 commit comments

Comments
 (0)