Skip to content

Commit 07ca4a7

Browse files
committed
Allow kseq_t to read sequences larger than 2^31 bytes
1 parent 3e43617 commit 07ca4a7

File tree

3 files changed

+17
-20
lines changed

3 files changed

+17
-20
lines changed

lib/ksw2/kseq.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <ctype.h>
3232
#include <string.h>
3333
#include <stdlib.h>
34+
#include <stdint.h>
3435

3536
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
3637
#define KS_SEP_TAB 1 // isspace() && !' '
@@ -39,8 +40,9 @@
3940

4041
#define __KS_TYPE(type_t) \
4142
typedef struct __kstream_t { \
42-
char *buf; \
43-
int begin, end, is_eof; \
43+
char *buf; \
44+
int64_t begin, end; \
45+
int is_eof; \
4446
size_t cur_buf_pos; \
4547
size_t newline; \
4648
type_t f; \
@@ -94,13 +96,13 @@ typedef struct __kstring_t {
9496
#endif
9597

9698
#define __KS_GETUNTIL(__read, __bufsize) \
97-
static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
99+
static int64_t ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
98100
{ \
99101
int gotany = 0; \
100102
if (dret) *dret = 0; \
101103
str->l = append? str->l : 0; \
102104
for (;;) { \
103-
int i; \
105+
int64_t i; \
104106
if (ks_err(ks)) return -3; \
105107
if (ks->begin >= ks->end) { \
106108
if (!ks->is_eof) { \
@@ -146,7 +148,7 @@ typedef struct __kstring_t {
146148
str->s[str->l] = '\0'; \
147149
return str->l; \
148150
} \
149-
static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
151+
static inline int64_t ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
150152
{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
151153

152154
#define KSTREAM_INIT(type_t, __read, __bufsize) \
@@ -182,9 +184,10 @@ typedef struct __kstring_t {
182184
-3 error reading stream
183185
*/
184186
#define __KSEQ_READ(SCOPE) \
185-
SCOPE int kseq_read(kseq_t *seq) \
187+
SCOPE int64_t kseq_read(kseq_t *seq) \
186188
{ \
187-
int c,r; \
189+
int c; \
190+
int64_t r; \
188191
kstream_t *ks = seq->f; \
189192
ks->newline = 0; \
190193
if (seq->last_char == 0) { /* then jump to the next header line */ \
@@ -255,6 +258,6 @@ typedef struct __kstring_t {
255258
__KSEQ_TYPE(type_t) \
256259
extern kseq_t *kseq_init(type_t fd); \
257260
void kseq_destroy(kseq_t *ks); \
258-
int kseq_read(kseq_t *seq);
261+
int64_t kseq_read(kseq_t *seq);
259262

260263
#endif

src/commons/KSeqWrapper.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ KSeqFile::KSeqFile(const char* fileName) {
1717

1818
bool KSeqFile::ReadEntry() {
1919
KSEQFILE::kseq_t* s = (KSEQFILE::kseq_t*) seq;
20-
int result = KSEQFILE::kseq_read(s);
21-
if (result < 0)
20+
if (KSEQFILE::kseq_read(s) < 0)
2221
return false;
2322
entry.headerOffset = s->headerOffset;
2423
entry.sequenceOffset = s->sequenceOffset;
@@ -51,8 +50,7 @@ KSeqStream::KSeqStream() {
5150

5251
bool KSeqStream::ReadEntry() {
5352
KSEQSTREAM::kseq_t* s = (KSEQSTREAM::kseq_t*) seq;
54-
int result = KSEQSTREAM::kseq_read(s);
55-
if (result < 0)
53+
if (KSEQSTREAM::kseq_read(s) < 0)
5654
return false;
5755

5856
entry.name = s->name;
@@ -90,8 +88,7 @@ KSeqGzip::KSeqGzip(const char* fileName) {
9088

9189
bool KSeqGzip::ReadEntry() {
9290
KSEQGZIP::kseq_t* s = (KSEQGZIP::kseq_t*) seq;
93-
int result = KSEQGZIP::kseq_read(s);
94-
if (result < 0)
91+
if (KSEQGZIP::kseq_read(s) < 0)
9592
return false;
9693

9794
entry.name = s->name;
@@ -135,8 +132,7 @@ KSeqBzip::KSeqBzip(const char* fileName) {
135132

136133
bool KSeqBzip::ReadEntry() {
137134
KSEQBZIP::kseq_t* s = (KSEQBZIP::kseq_t*) seq;
138-
int result = KSEQBZIP::kseq_read(s);
139-
if (result < 0)
135+
if (KSEQBZIP::kseq_read(s) < 0)
140136
return false;
141137

142138
entry.name = s->name;
@@ -209,8 +205,7 @@ KSeqBuffer::KSeqBuffer(const char* buffer, size_t length) {
209205

210206
bool KSeqBuffer::ReadEntry() {
211207
KSEQBUFFER::kseq_t* s = (KSEQBUFFER::kseq_t*) seq;
212-
int result = KSEQBUFFER::kseq_read(s);
213-
if (result < 0)
208+
if (KSEQBUFFER::kseq_read(s) < 0)
214209
return false;
215210
entry.headerOffset = s->headerOffset;
216211
entry.sequenceOffset = s->sequenceOffset;

src/test/TestAlignmentPerformance.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ std::vector<std::string> readData(std::string fasta_filename){
3636
FILE* fasta_file = fopen(fasta_filename.c_str(), "r");
3737
if(fasta_file == NULL) {std::cout << "Could not open " << fasta_filename<<std::endl; EXIT(1); }
3838
seq = kseq_init(fileno(fasta_file));
39-
int l;
4039
size_t entries_num = 0;
41-
while ((l = kseq_read(seq)) >= 0) {
40+
while (kseq_read(seq) >= 0) {
4241
if (entries_num > 1000)
4342
break;
4443
if (seq->seq.l > 500) {

0 commit comments

Comments
 (0)