-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathPattern.cpp
122 lines (108 loc) · 2.58 KB
/
Pattern.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include "Pattern.h"
#include <sstream> // stringstream
#include <cassert>
bool Pattern::color = false; // SOLiD color codes?
Pattern::Pattern(unsigned id_, std::string const & p_,
std::string const & q_,
std::string const & name_)
: id(id_), p(p_), q(q_), name(name_), origq(q_),
rc(false), truncated(0)
{
if (name == "")
{
// Use id as a generic name
std::stringstream out;
out << id;
name = out.str();
}
normalize(p); // Normalized to upper-case
origp = p;
if (!p.empty())
truncate(0); // Truncate color code
assert(q == "" || q.size() == p.size());
}
void Pattern::truncate(std::size_t n)
{
p = origp;
q = origq;
// Check adapter
if (color && (p[0] == 'A' || p[0] == 'C' || p[0] == 'G' || p[0] == 'T'))
{
p = p.substr(2);
if (q != "")
p = q.substr(2);
}
if (2 * n >= p.size())
{
std::cerr << "Warning: truncated read " << name << " is empty!" << std::endl;
p = "";
return;
}
p = p.substr(n, p.size() - 2*n);
if (q != "")
q = q.substr(n, q.size() - 2*n);
truncated = n;
if (rc)
{
rc = false;
this->reverseComplement();
}
}
void Pattern::reverseComplement()
{
rc = !rc;
reverse(p);
reverse(q);
for (std::string::iterator it = p.begin(); it != p.end(); ++it)
switch (*it)
{
case('A'): *it = 'T'; break;
case('C'): *it = 'G'; break;
case('G'): *it = 'C'; break;
case('T'): *it = 'A'; break;
}
}
void Pattern::normalize(std::string &t)
{
bool valid = true;
for (std::string::iterator it = t.begin(); it != t.end(); ++it)
{
switch (*it)
{
case('a'):
*it = 'A';
break;
case('c'):
*it = 'C';
break;
case('g'):
*it = 'G';
break;
case('t'):
*it = 'T';
break;
case('n'):
*it = 'N';
break;
case('A'):
case('C'):
case('G'):
case('T'):
case('N'):
break;
case('0'):
case('1'):
case('2'):
case('3'):
case('.'):
assert(color);
break;
default:
*it = color ? '.' : 'N';
valid = false;
break;
}
}
if (!valid)
std::cerr << "Warning: read " << name << " contains invalid symbols: " << t << std::endl;
}