Skip to content

Commit 5edc79b

Browse files
authored
Add ppos format-output to convertalis for count of positive substitution scores (#723)
Co-authored-by: dohyun-s <ts25>
1 parent fa6c093 commit 5edc79b

File tree

3 files changed

+33
-1
lines changed

3 files changed

+33
-1
lines changed

src/commons/Parameters.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ Parameters::Parameters():
102102
PARAM_V(PARAM_V_ID, "-v", "Verbosity", "Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info", typeid(int), (void *) &verbosity, "^[0-3]{1}$", MMseqsParameter::COMMAND_COMMON),
103103
// convertalignments
104104
PARAM_FORMAT_MODE(PARAM_FORMAT_MODE_ID, "--format-mode", "Alignment format", "Output format:\n0: BLAST-TAB\n1: SAM\n2: BLAST-TAB + query/db length\n3: Pretty HTML\n4: BLAST-TAB + column headers\nBLAST-TAB (0) and BLAST-TAB + column headers (4) support custom output formats (--format-output)", typeid(int), (void *) &formatAlignmentMode, "^[0-4]{1}$"),
105-
PARAM_FORMAT_OUTPUT(PARAM_FORMAT_OUTPUT_ID, "--format-output", "Format alignment output", "Choose comma separated list of output columns from: query,target,evalue,gapopen,pident,fident,nident,qstart,qend,qlen\ntstart,tend,tlen,alnlen,raw,bits,cigar,qseq,tseq,qheader,theader,qaln,taln,qframe,tframe,mismatch,qcov,tcov\nqset,qsetid,tset,tsetid,taxid,taxname,taxlineage,qorfstart,qorfend,torfstart,torfend", typeid(std::string), (void *) &outfmt, ""),
105+
PARAM_FORMAT_OUTPUT(PARAM_FORMAT_OUTPUT_ID, "--format-output", "Format alignment output", "Choose comma separated list of output columns from: query,target,evalue,gapopen,pident,fident,nident,qstart,qend,qlen\ntstart,tend,tlen,alnlen,raw,bits,cigar,qseq,tseq,qheader,theader,qaln,taln,qframe,tframe,mismatch,qcov,tcov\nqset,qsetid,tset,tsetid,taxid,taxname,taxlineage,qorfstart,qorfend,torfstart,torfend,ppos", typeid(std::string), (void *) &outfmt, ""),
106106
PARAM_DB_OUTPUT(PARAM_DB_OUTPUT_ID, "--db-output", "Database output", "Return a result DB instead of a text file", typeid(bool), (void *) &dbOut, "", MMseqsParameter::COMMAND_EXPERT),
107107
// --include-only-extendablediagonal
108108
PARAM_RESCORE_MODE(PARAM_RESCORE_MODE_ID, "--rescore-mode", "Rescore mode", "Rescore diagonals with:\n0: Hamming distance\n1: local alignment (score only)\n2: local alignment\n3: global alignment\n4: longest alignment fulfilling window quality criterion", typeid(int), (void *) &rescoreMode, "^[0-4]{1}$"),
@@ -2828,6 +2828,7 @@ std::vector<int> Parameters::getOutputFormat(int formatMode, const std::string &
28282828
else if (outformatSplit[i].compare("qorfend") == 0){ code = Parameters::OUTFMT_QORFEND;}
28292829
else if (outformatSplit[i].compare("torfstart") == 0){ code = Parameters::OUTFMT_TORFSTART;}
28302830
else if (outformatSplit[i].compare("torfend") == 0){ code = Parameters::OUTFMT_TORFEND;}
2831+
else if (outformatSplit[i].compare("ppos") == 0){ needSequences = true; needBacktrace = true; code = Parameters::OUTFMT_PPOS;}
28312832
else if (outformatSplit[i].compare("empty") == 0){ code = Parameters::OUTFMT_EMPTY;}
28322833
else {
28332834
Debug(Debug::ERROR) << "Format code " << outformatSplit[i] << " does not exist.";

src/commons/Parameters.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ class Parameters {
182182
static const int OUTFMT_TORFSTART = 37;
183183
static const int OUTFMT_TORFEND = 38;
184184
static const int OUTFMT_FIDENT = 39;
185+
static const int OUTFMT_PPOS = 40;
185186

186187
static const int INDEX_SUBSET_NORMAL = 0;
187188
static const int INDEX_SUBSET_NO_HEADERS = 1;

src/util/convertalignments.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,36 @@ int convertalignments(int argc, const char **argv, const Command &command) {
636636
case Parameters::OUTFMT_TORFEND:
637637
result.append(SSTR(res.dbOrfEndPos));
638638
break;
639+
case Parameters::OUTFMT_PPOS: {
640+
float pPositive = 0;
641+
int matchCount = 0;
642+
if (res.backtrace.empty() == false) {
643+
int qPos = 0;
644+
int tPos = 0;
645+
for (size_t pos = 0; pos < res.backtrace.size(); pos++) {
646+
switch (res.backtrace[pos]) {
647+
case 'M': {
648+
char qRes = queryProfile ? queryProfData[qPos] : querySeqData[qPos];
649+
char tRes = targetProfile ? targetProfData[tPos] : targetSeqData[tPos];
650+
pPositive += (subMat->subMatrix[subMat->aa2num[(int)qRes]][subMat->aa2num[(int)tRes]] > 0);
651+
matchCount += 1;
652+
qPos++;
653+
tPos++;
654+
break;
655+
}
656+
case 'D':
657+
qPos++;
658+
break;
659+
case 'I':
660+
tPos++;
661+
break;
662+
}
663+
}
664+
pPositive /= matchCount;
665+
}
666+
result.append(SSTR(pPositive));
667+
break;
668+
}
639669
}
640670
if (i < outcodes.size() - 1) {
641671
result.push_back('\t');

0 commit comments

Comments
 (0)