@@ -44,34 +44,29 @@ int pairaln(int argc, const char **argv, const Command& command) {
4444 std::string output;
4545 output.reserve (100000 );
4646
47- unsigned int referenceDbKey = 0 ;
48- size_t id = alnDbr.getId (referenceDbKey);
49- if (id == UINT_MAX) {
50- break ;
51- }
52- char * data = alnDbr.getData (id, thread_idx);
53- Matcher::readAlignmentResults (result, data, true );
54- for (size_t resIdx = 0 ; resIdx < result.size (); ++resIdx) {
55- unsigned int taxon = mapping->lookup (result[resIdx].dbKey );
56- if (findPair.find (taxon) == findPair.end ()) {
57- findPair.emplace (taxon, 1 );
58- }
59- }
60-
6147 // find intersection between all proteins
6248 for (size_t i = 0 ; i < alnDbr.getSize (); i++) {
63- unsigned int dbKey = alnDbr.getDbKey (i);
64- if (dbKey == referenceDbKey) {
65- continue ;
66- }
6749 result.clear ();
6850 Matcher::readAlignmentResults (result, alnDbr.getData (i, thread_idx), true );
51+ for (size_t resIdx = 0 ; resIdx < result.size (); ++resIdx) {
52+ unsigned int taxon = mapping->lookup (result[resIdx].dbKey );
53+ // we don't want to introduce a new field, reuse existing unused field here
54+ result[resIdx].dbOrfStartPos = taxon;
55+ }
56+ std::stable_sort (result.begin (), result.end (), compareByTaxId);
57+ unsigned int prevTaxon = UINT_MAX;
6958 // find pairs
7059 for (size_t resIdx = 0 ; resIdx < result.size (); ++resIdx) {
7160 unsigned int taxon = mapping->lookup (result[resIdx].dbKey );
61+ if (taxon == prevTaxon){
62+ continue ;
63+ }
7264 if (findPair.find (taxon) != findPair.end ()) {
7365 findPair[taxon]++;
66+ }else {
67+ findPair.emplace (taxon, 1 );
7468 }
69+ prevTaxon = taxon;
7570 }
7671 }
7772
0 commit comments