Skip to content

Commit 5e119e9

Browse files
Allow different prefilter modes
1 parent 5edc79b commit 5e119e9

File tree

5 files changed

+132
-15
lines changed

5 files changed

+132
-15
lines changed

data/workflow/blastp.sh

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,33 @@ fail() {
55
exit 1
66
}
77

8+
abspath() {
9+
if [ -d "$1" ]; then
10+
(cd "$1"; pwd)
11+
elif [ -f "$1" ]; then
12+
if [ -z "${1##*/*}" ]; then
13+
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
14+
else
15+
echo "$(pwd)/$1"
16+
fi
17+
elif [ -d "$(dirname "$1")" ]; then
18+
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
19+
fi
20+
}
21+
22+
fake_pref() {
23+
QDB="$1"
24+
TDB="$2"
25+
RES="$3"
26+
# create link to data file which contains a list of all targets that should be aligned
27+
ln -s "$(abspath "${TDB}.index")" "${RES}"
28+
# create new index repeatedly pointing to same entry
29+
INDEX_SIZE="$(wc -c < "${TDB}.index")"
30+
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
31+
# create dbtype (7)
32+
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
33+
}
34+
835
notExists() {
936
[ ! -f "$1" ]
1037
}
@@ -27,14 +54,23 @@ ALN_RES_MERGE="$TMP_PATH/aln_0"
2754
while [ "$STEP" -lt "$STEPS" ]; do
2855
SENS_PARAM=SENSE_${STEP}
2956
eval SENS="\$$SENS_PARAM"
30-
# call prefilter module
57+
58+
# 1. Prefilter hits
3159
if notExists "$TMP_PATH/pref_$STEP.dbtype"; then
32-
# shellcheck disable=SC2086
33-
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
34-
|| fail "Prefilter died"
60+
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
61+
fake_pref "${INPUT}" "${TARGET}" "$TMP_PATH/pref_$STEP"
62+
elif [ "$PREFMODE" = "UNGAPPED" ]; then
63+
# shellcheck disable=SC2086
64+
$RUNNER "$MMSEQS" ungappedprefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $UNGAPPEDPREFILTER_PAR \
65+
|| fail "Ungapped prefilter died"
66+
else
67+
# shellcheck disable=SC2086
68+
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
69+
|| fail "Prefilter died"
70+
fi
3571
fi
3672

37-
# call alignment module
73+
# 2. alignment module
3874
if [ "$STEPS" -eq 1 ]; then
3975
if notExists "$3.dbtype"; then
4076
# shellcheck disable=SC2086

data/workflow/blastpgp.sh

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,33 @@ fail() {
55
exit 1
66
}
77

8+
abspath() {
9+
if [ -d "$1" ]; then
10+
(cd "$1"; pwd)
11+
elif [ -f "$1" ]; then
12+
if [ -z "${1##*/*}" ]; then
13+
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
14+
else
15+
echo "$(pwd)/$1"
16+
fi
17+
elif [ -d "$(dirname "$1")" ]; then
18+
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
19+
fi
20+
}
21+
22+
fake_pref() {
23+
QDB="$1"
24+
TDB="$2"
25+
RES="$3"
26+
# create link to data file which contains a list of all targets that should be aligned
27+
ln -s "$(abspath "${TDB}.index")" "${RES}"
28+
# create new index repeatedly pointing to same entry
29+
INDEX_SIZE="$(wc -c < "${TDB}.index")"
30+
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
31+
# create dbtype (7)
32+
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
33+
}
34+
835
notExists() {
936
[ ! -f "$1" ]
1037
}
@@ -28,15 +55,26 @@ STEP=0
2855
while [ "$STEP" -lt "$NUM_IT" ]; do
2956
# call prefilter module
3057
if notExists "$TMP_PATH/pref_tmp_${STEP}.done"; then
31-
PARAM="PREFILTER_PAR_$STEP"
32-
eval TMP="\$$PARAM"
58+
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
59+
TMP=""
60+
PREF="fake_pref"
61+
elif [ "$PREFMODE" = "UNGAPPED" ]; then
62+
PARAM="UNGAPPEDPREFILTER_PAR_$STEP"
63+
eval TMP="\$$PARAM"
64+
PREF="${MMSEQS} ungappedprefilter"
65+
else
66+
PARAM="PREFILTER_PAR_$STEP"
67+
eval TMP="\$$PARAM"
68+
PREF="${MMSEQS} prefilter"
69+
fi
70+
3371
if [ "$STEP" -eq 0 ]; then
3472
# shellcheck disable=SC2086
35-
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
73+
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
3674
|| fail "Prefilter died"
3775
else
3876
# shellcheck disable=SC2086
39-
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
77+
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
4078
|| fail "Prefilter died"
4179
fi
4280
touch "$TMP_PATH/pref_tmp_${STEP}.done"

src/commons/Parameters.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ Parameters::Parameters():
163163
PARAM_NUM_ITERATIONS(PARAM_NUM_ITERATIONS_ID, "--num-iterations", "Search iterations", "Number of iterative profile search iterations", typeid(int), (void *) &numIterations, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PROFILE),
164164
PARAM_START_SENS(PARAM_START_SENS_ID, "--start-sens", "Start sensitivity", "Start sensitivity", typeid(float), (void *) &startSens, "^[0-9]*(\\.[0-9]+)?$"),
165165
PARAM_SENS_STEPS(PARAM_SENS_STEPS_ID, "--sens-steps", "Search steps", "Number of search steps performed from --start-sens to -s", typeid(int), (void *) &sensSteps, "^[1-9]{1}$"),
166+
PARAM_PREF_MODE(PARAM_PREF_MODE_ID,"--prefilter-mode", "Prefilter mode", "prefilter mode: 0: kmer/ungapped 1: ungapped, 2: nofilter",typeid(int), (void *) &prefMode, "^[0-2]{1}$"),
166167
PARAM_EXHAUSTIVE_SEARCH(PARAM_EXHAUSTIVE_SEARCH_ID, "--exhaustive-search", "Exhaustive search mode", "For bigger profile DB, run iteratively the search by greedily swapping the search results", typeid(bool), (void *) &exhaustiveSearch, "", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT),
167168
PARAM_EXHAUSTIVE_SEARCH_FILTER(PARAM_EXHAUSTIVE_SEARCH_FILTER_ID, "--exhaustive-search-filter", "Filter results during exhaustive search", "Filter result during search: 0: do not filter, 1: filter", typeid(int), (void *) &exhaustiveFilterMsa, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN | MMseqsParameter::COMMAND_EXPERT),
168169
@@ -1253,6 +1254,7 @@ Parameters::Parameters():
12531254
searchworkflow.push_back(&PARAM_NUM_ITERATIONS);
12541255
searchworkflow.push_back(&PARAM_START_SENS);
12551256
searchworkflow.push_back(&PARAM_SENS_STEPS);
1257+
searchworkflow.push_back(&PARAM_PREF_MODE);
12561258
searchworkflow.push_back(&PARAM_EXHAUSTIVE_SEARCH);
12571259
searchworkflow.push_back(&PARAM_EXHAUSTIVE_SEARCH_FILTER);
12581260
searchworkflow.push_back(&PARAM_STRAND);
@@ -2260,6 +2262,7 @@ void Parameters::setDefaults() {
22602262

22612263
// search workflow
22622264
numIterations = 1;
2265+
prefMode = PREF_MODE_KMER;
22632266
startSens = 4;
22642267
sensSteps = 1;
22652268
exhaustiveSearch = false;

src/commons/Parameters.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,11 @@ class Parameters {
306306
static const int ID_MODE_KEYS = 0;
307307
static const int ID_MODE_LOOKUP = 1;
308308

309+
// prefilter mode
310+
static const int PREF_MODE_KMER = 0;
311+
static const int PREF_MODE_UNGAPPED = 1;
312+
static const int PREF_MODE_EXHAUSTIVE = 2;
313+
309314
// unpackdb
310315
static const int UNPACK_NAME_KEY = 0;
311316
static const int UNPACK_NAME_ACCESSION = 1;
@@ -450,6 +455,7 @@ class Parameters {
450455

451456
// SEARCH WORKFLOW
452457
int numIterations;
458+
int prefMode;
453459
float startSens;
454460
int sensSteps;
455461
bool exhaustiveSearch;
@@ -872,6 +878,7 @@ class Parameters {
872878
PARAMETER(PARAM_NUM_ITERATIONS)
873879
PARAMETER(PARAM_START_SENS)
874880
PARAMETER(PARAM_SENS_STEPS)
881+
PARAMETER(PARAM_PREF_MODE)
875882
PARAMETER(PARAM_EXHAUSTIVE_SEARCH)
876883
PARAMETER(PARAM_EXHAUSTIVE_SEARCH_FILTER)
877884
PARAMETER(PARAM_STRAND)

src/workflow/Search.cpp

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ int search(int argc, const char **argv, const Command& command) {
256256
}
257257
}
258258

259+
260+
259261
const bool isUngappedMode = par.alignmentMode == Parameters::ALIGNMENT_MODE_UNGAPPED;
260262
if (isUngappedMode && (searchMode & (Parameters::SEARCH_MODE_FLAG_QUERY_PROFILE |Parameters::SEARCH_MODE_FLAG_TARGET_PROFILE ))) {
261263
par.printUsageMessage(command, MMseqsParameter::COMMAND_ALIGN | MMseqsParameter::COMMAND_PREFILTER);
@@ -316,6 +318,19 @@ int search(int argc, const char **argv, const Command& command) {
316318
} else {
317319
cmd.addVariable("ALIGN_MODULE", "align");
318320
}
321+
322+
switch(par.prefMode){
323+
case Parameters::PREF_MODE_KMER:
324+
cmd.addVariable("PREFMODE", "KMER");
325+
break;
326+
case Parameters::PREF_MODE_UNGAPPED:
327+
cmd.addVariable("PREFMODE", "UNGAPPED");
328+
break;
329+
case Parameters::PREF_MODE_EXHAUSTIVE:
330+
cmd.addVariable("PREFMODE", "EXHAUSTIVE");
331+
break;
332+
}
333+
319334
cmd.addVariable("REMOVE_TMP", par.removeTmpFiles ? "TRUE" : NULL);
320335
std::string program;
321336
cmd.addVariable("RUNNER", par.runner.c_str());
@@ -334,7 +349,11 @@ int search(int argc, const char **argv, const Command& command) {
334349
par.covMode = Util::swapCoverageMode(par.covMode);
335350
size_t maxResListLen = par.maxResListLen;
336351
par.maxResListLen = std::max((size_t)300, queryDbSize);
337-
cmd.addVariable("PREFILTER_PAR", par.createParameterString(par.prefilter).c_str());
352+
if(par.prefMode == Parameters::PREF_MODE_KMER){
353+
cmd.addVariable("PREFILTER_PAR", par.createParameterString(par.prefilter).c_str());
354+
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
355+
cmd.addVariable("UNGAPPEDPREFILTER_PAR", par.createParameterString(par.ungappedprefilter).c_str());
356+
}
338357
par.maxResListLen = maxResListLen;
339358
double originalEvalThr = par.evalThr;
340359
par.evalThr = std::numeric_limits<double>::max();
@@ -385,8 +404,13 @@ int search(int argc, const char **argv, const Command& command) {
385404
if (i == (par.numIterations - 1)) {
386405
par.evalThr = originalEval;
387406
}
388-
cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
389-
par.createParameterString(par.prefilter).c_str());
407+
if (par.prefMode == Parameters::PREF_MODE_KMER) {
408+
cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
409+
par.createParameterString(par.prefilter).c_str());
410+
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
411+
cmd.addVariable(std::string("UNGAPPEDPREFILTER_PAR_" + SSTR(i)).c_str(),
412+
par.createParameterString(par.ungappedprefilter).c_str());
413+
}
390414
if (isUngappedMode) {
391415
par.rescoreMode = Parameters::RESCORE_MODE_ALIGNMENT;
392416
cmd.addVariable(std::string("ALIGNMENT_PAR_" + SSTR(i)).c_str(),
@@ -439,8 +463,13 @@ int search(int argc, const char **argv, const Command& command) {
439463
par.evalThr = originalEval;
440464
}
441465

442-
cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
443-
par.createParameterString(par.prefilter).c_str());
466+
if (par.prefMode == Parameters::PREF_MODE_KMER) {
467+
cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
468+
par.createParameterString(par.prefilter).c_str());
469+
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
470+
cmd.addVariable(std::string("UNGAPPEDPREFILTER_PAR_" + SSTR(i)).c_str(),
471+
par.createParameterString(par.ungappedprefilter).c_str());
472+
}
444473
if (isUngappedMode) {
445474
par.rescoreMode = Parameters::RESCORE_MODE_ALIGNMENT;
446475
cmd.addVariable(std::string("ALIGNMENT_PAR_" + SSTR(i)).c_str(),
@@ -487,7 +516,11 @@ int search(int argc, const char **argv, const Command& command) {
487516
prefilterWithoutS.push_back(par.prefilter[i]);
488517
}
489518
}
490-
cmd.addVariable("PREFILTER_PAR", par.createParameterString(prefilterWithoutS).c_str());
519+
if (par.prefMode == Parameters::PREF_MODE_KMER) {
520+
cmd.addVariable("PREFILTER_PAR", par.createParameterString(prefilterWithoutS).c_str());
521+
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
522+
cmd.addVariable("UNGAPPEDPREFILTER_PAR", par.createParameterString(par.ungappedprefilter).c_str());
523+
}
491524
if (isUngappedMode) {
492525
par.rescoreMode = Parameters::RESCORE_MODE_ALIGNMENT;
493526
cmd.addVariable("ALIGNMENT_PAR", par.createParameterString(par.rescorediagonal).c_str());

0 commit comments

Comments
 (0)