-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathesa.hxx
130 lines (120 loc) · 4.32 KB
/
esa.hxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
* esa.hxx
* Copyright (c) 2010 Daisuke Okanohara All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _ESA_HXX
#define _ESA_HXX
#include <vector>
#include <utility>
#include <cassert>
#include "sais.hxx"
namespace esaxx_private {
template<typename string_type, typename sarray_type, typename index_type>
index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, index_type n){
if (n == 0){
return 0;
}
sarray_type Psi = L;
Psi[SA[0]] = SA[n-1];
for (index_type i = 1; i < n; ++i){
Psi[SA[i]] = SA[i-1];
}
// Compare at most 2n log n charcters. Practically fastest
// "Permuted Longest-Common-Prefix Array", Juha Karkkainen, CPM 09
sarray_type PLCP = R;
index_type h = 0;
for (index_type i = 0; i < n; ++i){
index_type j = Psi[i];
while (i+h < n && j+h < n &&
T[i+h] == T[j+h]){
++h;
}
PLCP[i] = h;
if (h > 0) --h;
}
sarray_type H = L;
for (index_type i = 0; i < n; ++i){
H[i] = PLCP[SA[i]];
}
H[0] = -1;
std::vector<std::pair<index_type, index_type> > S;
S.push_back(std::make_pair((index_type)-1, (index_type)-1));
size_t nodeNum = 0;
for (index_type i = 0; ; ++i){
std::pair<index_type, index_type> cur (i, (i == n) ? -1 : H[i]);
std::pair<index_type, index_type> cand(S.back());
while (cand.second > cur.second){
if (i - cand.first > 1){
L[nodeNum] = cand.first;
R[nodeNum] = i;
D[nodeNum] = cand.second;
++nodeNum;
}
cur.first = cand.first;
S.pop_back();
cand = S.back();
}
if (cand.second < cur.second){
S.push_back(cur);
}
if (i == n) break;
S.push_back(std::make_pair(i, n - SA[i] + 1));
}
return nodeNum;
}
}
/**
* @brief Build an enhanced suffix array of a given string in linear time
* For an input text T, esaxx() builds an enhancd suffix array in linear time.
* i-th internal node is represented as a triple (L[i], R[i], D[i]);
* L[i] and R[i] is the left/right boundary of the suffix array as SA[L[i]....R[i]-1]
* D[i] is the depth of the internal node
* The number of internal node is at most N-1 and return the actual number by
* @param T[0...n-1] The input string. (random access iterator)
* @param SA[0...n-1] The output suffix array (random access iterator)
* @param L[0...n-1] The output left boundary of internal node (random access iterator)
* @param R[0...n-1] The output right boundary of internal node (random access iterator)
* @param D[0...n-1] The output depth of internal node (random access iterator)
* @param n The length of the input string
* @param k The alphabet size
* @pram nodeNum The output the number of internal node
* @return 0 if succeded, -1 or -2 otherwise
*/
template<typename string_type, typename sarray_type, typename index_type>
int sais_xx(string_type T, sarray_type SA, index_type n, index_type k) {
if ((n < 0) || (k <= 0)) return -1;
int err = saisxx(T, SA, n, k);
if (err != 0){
return err;
}
return 0;
}
template<typename string_type, typename sarray_type, typename index_type>
int esa_xx(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D,
index_type n, index_type k, index_type& nodeNum) {
if ((n < 0) || (k <= 0)) return -1;
nodeNum = esaxx_private::suffixtree(T, SA, L, R, D, n);
return 0;
}
#endif // _ESA_HXX