-
-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathlevenshtein.go
183 lines (161 loc) · 4.89 KB
/
levenshtein.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
package edlib
import "github.com/hbollon/go-edlib/internal/utils"
// LevenshteinDistance calculate the distance between two string
// This algorithm allow insertions, deletions and substitutions to change one string to the second
// Compatible with non-ASCII characters
func LevenshteinDistance(str1, str2 string) int {
// Convert string parameters to rune arrays to be compatible with non-ASCII
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)
// Get and store length of these strings
runeStr1len := len(runeStr1)
runeStr2len := len(runeStr2)
if runeStr1len == 0 {
return runeStr2len
} else if runeStr2len == 0 {
return runeStr1len
} else if utils.Equal(runeStr1, runeStr2) {
return 0
}
column := make([]int, runeStr1len+1)
for y := 1; y <= runeStr1len; y++ {
column[y] = y
}
for x := 1; x <= runeStr2len; x++ {
column[0] = x
lastkey := x - 1
for y := 1; y <= runeStr1len; y++ {
oldkey := column[y]
var i int
if runeStr1[y-1] != runeStr2[x-1] {
i = 1
}
column[y] = utils.Min(
utils.Min(column[y]+1, // insert
column[y-1]+1), // delete
lastkey+i) // substitution
lastkey = oldkey
}
}
return column[runeStr1len]
}
// OSADamerauLevenshteinDistance calculate the distance between two string
// Optimal string alignment distance variant that use extension of the Wagner-Fisher dynamic programming algorithm
// Doesn't allow multiple transformations on a same substring
// Allowing insertions, deletions, substitutions and transpositions to change one string to the second
// Compatible with non-ASCII characters
func OSADamerauLevenshteinDistance(str1, str2 string) int {
// Convert string parameters to rune arrays to be compatible with non-ASCII
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)
// Get and store length of these strings
runeStr1len := len(runeStr1)
runeStr2len := len(runeStr2)
if runeStr1len == 0 {
return runeStr2len
} else if runeStr2len == 0 {
return runeStr1len
} else if utils.Equal(runeStr1, runeStr2) {
return 0
} else if runeStr1len < runeStr2len {
return OSADamerauLevenshteinDistance(str2, str1)
}
// 2D Array
row := utils.Min(runeStr1len+1, 3)
matrix := make([][]int, row)
for i := 0; i < row; i++ {
matrix[i] = make([]int, runeStr2len+1)
matrix[i][0] = i
}
for j := 0; j <= runeStr2len; j++ {
matrix[0][j] = j
}
var count int
for i := 1; i <= runeStr1len; i++ {
matrix[i%3][0] = i
for j := 1; j <= runeStr2len; j++ {
if runeStr1[i-1] == runeStr2[j-1] {
count = 0
} else {
count = 1
}
matrix[i%3][j] = utils.Min(utils.Min(matrix[(i-1)%3][j]+1, matrix[i%3][j-1]+1),
matrix[(i-1)%3][j-1]+count) // insertion, deletion, substitution
if i > 1 && j > 1 && runeStr1[i-1] == runeStr2[j-2] && runeStr1[i-2] == runeStr2[j-1] {
matrix[i%3][j] = utils.Min(matrix[i%3][j], matrix[(i-2)%3][j-2]+1) // translation
}
}
}
return matrix[runeStr1len%3][runeStr2len]
}
// DamerauLevenshteinDistance calculate the distance between two string
// This algorithm computes the true Damerau–Levenshtein distance with adjacent transpositions
// Allowing insertions, deletions, substitutions and transpositions to change one string to the second
// Compatible with non-ASCII characters
func DamerauLevenshteinDistance(str1, str2 string) int {
// Convert string parameters to rune arrays to be compatible with non-ASCII
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)
// Get and store length of these strings
runeStr1len := len(runeStr1)
runeStr2len := len(runeStr2)
if runeStr1len == 0 {
return runeStr2len
} else if runeStr2len == 0 {
return runeStr1len
} else if utils.Equal(runeStr1, runeStr2) {
return 0
}
// Create alphabet based on input strings
da := make(map[rune]int)
for i := 0; i < runeStr1len; i++ {
da[runeStr1[i]] = 0
}
for i := 0; i < runeStr2len; i++ {
da[runeStr2[i]] = 0
}
// 2D Array for distance matrix : matrix[0..str1.length+2][0..s2.length+2]
matrix := make([][]int, runeStr1len+2)
for i := 0; i <= runeStr1len+1; i++ {
matrix[i] = make([]int, runeStr2len+2)
for j := 0; j <= runeStr2len+1; j++ {
matrix[i][j] = 0
}
}
// Maximum possible distance
maxDist := runeStr1len + runeStr2len
// Initialize matrix
matrix[0][0] = maxDist
for i := 0; i <= runeStr1len; i++ {
matrix[i+1][0] = maxDist
matrix[i+1][1] = i
}
for i := 0; i <= runeStr2len; i++ {
matrix[0][i+1] = maxDist
matrix[1][i+1] = i
}
// Process edit distance
var cost int
for i := 1; i <= runeStr1len; i++ {
db := 0
for j := 1; j <= runeStr2len; j++ {
i1 := da[runeStr2[j-1]]
j1 := db
if runeStr1[i-1] == runeStr2[j-1] {
cost = 0
db = j
} else {
cost = 1
}
matrix[i+1][j+1] = utils.Min(
utils.Min(
matrix[i+1][j]+1, // Addition
matrix[i][j+1]+1), // Deletion
utils.Min(
matrix[i][j]+cost, // Substitution
matrix[i1][j1]+(i-i1-1)+1+(j-j1-1))) // Transposition
}
da[runeStr1[i-1]] = i
}
return matrix[runeStr1len+1][runeStr2len+1]
}