forked from andreas-wilm/nimreadfq
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreadfx.nim
109 lines (86 loc) · 2.54 KB
/
readfx.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import zip/zlib
# https://forum.nim-lang.org/t/2668
from os import splitPath
const kseqh = currentSourcePath().splitPath.head & "/readfx/kseq.h"
# https://github.com/nim-lang/nimble/issues/157
{.passL: "-lz".}
type
kstring_t = object
ll: int
m: int
s: ptr char
kstream_t = object
begin: int
endd: int
is_eof: int
kseq_t = object
name: kstring_t
comment: kstring_t
seq: kstring_t
qual: kstring_t
last_char: int
f: ptr kstream_t
gzFile = pointer
# convenience type for FastQ or Fasta records
FQRecordPtr* = object
name*: ptr char
comment*: ptr char# optional
sequence*: ptr char
quality*: ptr char# optional
FQRecord* = object
name*: string
comment*: string# optional
sequence*: string
quality*: string# optional
proc kseq_init*(fp: gzFile): ptr kseq_t {.header: kseqh, importc: "kseq_init".}
proc kseq_rewind*(seq: ptr kseq_t) {.header: kseqh, importc: "kseq_rewind".}
proc kseq_read*(seq: ptr kseq_t): int {.header: kseqh, importc: "kseq_read".}
iterator readFQPtr*(path: string): FQRecordPtr =
# - ptr char will be reused on next iteration
# - for stdin use "-" as path
# - gz[d]open default even for flat file format
var result: FQRecordPtr# 'result' not implicit in iterators
var fp: GzFile
if path == "-":
fp = gzdopen(0, "r")
else:
fp = gzopen(path, "r")
try:
doAssert fp != nil
except:
# Handle the exception here
echo "Download the test data first"
let rec = kseq_init(fp)
while true:
if kseq_read(rec) < 0:
break
result.name = rec.name.s
result.comment = rec.comment.s
result.sequence = rec.seq.s
result.quality = rec.qual.s
yield result
discard gzclose(fp)
iterator readFQ*(path: string): FQRecord =
var result: FQRecord# 'result' not implicit in iterators
for rec in readFQPtr(path):
result.name = $rec.name
result.comment = $rec.comment
result.sequence = $rec.sequence
result.quality = $rec.quality
yield result
proc fqfmt(name: string, comment: string, sequence: string, quality: string): string =
var fastq = false
var header = ">"
if len(quality) > 0:
fastq = true
header = "@"
result = header & name
if comment != "":
result = result & " " & comment
result = result & "\n" & sequence
if fastq:
result = result & "\n+\n" & quality
proc `$`*(rec: FQRecord): string =
return fqfmt(rec.name, rec.comment, rec.sequence, rec.quality)
proc `$`*(rec: FQRecordPtr): string =
return fqfmt($rec.name, $rec.comment, $rec.sequence, $rec.quality)