-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathtruncate.go
282 lines (239 loc) · 7 KB
/
truncate.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
package ansi
import (
"bytes"
"github.com/charmbracelet/x/ansi/parser"
"github.com/mattn/go-runewidth"
"github.com/rivo/uniseg"
)
// Cut the string, without adding any prefix or tail strings. This function is
// aware of ANSI escape codes and will not break them, and accounts for
// wide-characters (such as East-Asian characters and emojis). Note that the
// [left] parameter is inclusive, while [right] isn't.
// This treats the text as a sequence of graphemes.
func Cut(s string, left, right int) string {
return cut(GraphemeWidth, s, left, right)
}
// CutWc the string, without adding any prefix or tail strings. This function is
// aware of ANSI escape codes and will not break them, and accounts for
// wide-characters (such as East-Asian characters and emojis). Note that the
// [left] parameter is inclusive, while [right] isn't.
// This treats the text as a sequence of wide characters and runes.
func CutWc(s string, left, right int) string {
return cut(WcWidth, s, left, right)
}
func cut(m Method, s string, left, right int) string {
if right <= left {
return ""
}
truncate := Truncate
truncateLeft := TruncateLeft
if m == WcWidth {
truncate = TruncateWc
truncateLeft = TruncateWc
}
if left == 0 {
return truncate(s, right, "")
}
return truncateLeft(Truncate(s, right, ""), left, "")
}
// Truncate truncates a string to a given length, adding a tail to the end if
// the string is longer than the given length. This function is aware of ANSI
// escape codes and will not break them, and accounts for wide-characters (such
// as East-Asian characters and emojis).
// This treats the text as a sequence of graphemes.
func Truncate(s string, length int, tail string) string {
return truncate(GraphemeWidth, s, length, tail)
}
// TruncateWc truncates a string to a given length, adding a tail to the end if
// the string is longer than the given length. This function is aware of ANSI
// escape codes and will not break them, and accounts for wide-characters (such
// as East-Asian characters and emojis).
// This treats the text as a sequence of wide characters and runes.
func TruncateWc(s string, length int, tail string) string {
return truncate(WcWidth, s, length, tail)
}
func truncate(m Method, s string, length int, tail string) string {
if sw := StringWidth(s); sw <= length {
return s
}
tw := StringWidth(tail)
length -= tw
if length < 0 {
return ""
}
var cluster []byte
var buf bytes.Buffer
curWidth := 0
ignoring := false
pstate := parser.GroundState // initial state
b := []byte(s)
i := 0
// Here we iterate over the bytes of the string and collect printable
// characters and runes. We also keep track of the width of the string
// in cells.
//
// Once we reach the given length, we start ignoring characters and only
// collect ANSI escape codes until we reach the end of string.
for i < len(b) {
state, action := parser.Table.Transition(pstate, b[i])
if state == parser.Utf8State {
// This action happens when we transition to the Utf8State.
var width int
cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
if m == WcWidth {
width = runewidth.StringWidth(string(cluster))
}
// increment the index by the length of the cluster
i += len(cluster)
// Are we ignoring? Skip to the next byte
if ignoring {
continue
}
// Is this gonna be too wide?
// If so write the tail and stop collecting.
if curWidth+width > length && !ignoring {
ignoring = true
buf.WriteString(tail)
}
if curWidth+width > length {
continue
}
curWidth += width
buf.Write(cluster)
// Done collecting, now we're back in the ground state.
pstate = parser.GroundState
continue
}
switch action {
case parser.PrintAction:
// Is this gonna be too wide?
// If so write the tail and stop collecting.
if curWidth >= length && !ignoring {
ignoring = true
buf.WriteString(tail)
}
// Skip to the next byte if we're ignoring
if ignoring {
i++
continue
}
// collects printable ASCII
curWidth++
fallthrough
default:
buf.WriteByte(b[i])
i++
}
// Transition to the next state.
pstate = state
// Once we reach the given length, we start ignoring runes and write
// the tail to the buffer.
if curWidth > length && !ignoring {
ignoring = true
buf.WriteString(tail)
}
}
return buf.String()
}
// TruncateLeft truncates a string from the left side by removing n characters,
// adding a prefix to the beginning if the string is longer than n.
// This function is aware of ANSI escape codes and will not break them, and
// accounts for wide-characters (such as East-Asian characters and emojis).
// This treats the text as a sequence of graphemes.
func TruncateLeft(s string, n int, prefix string) string {
return truncateLeft(GraphemeWidth, s, n, prefix)
}
// TruncateLeftWc truncates a string from the left side by removing n characters,
// adding a prefix to the beginning if the string is longer than n.
// This function is aware of ANSI escape codes and will not break them, and
// accounts for wide-characters (such as East-Asian characters and emojis).
// This treats the text as a sequence of wide characters and runes.
func TruncateLeftWc(s string, n int, prefix string) string {
return truncateLeft(WcWidth, s, n, prefix)
}
func truncateLeft(m Method, s string, n int, prefix string) string {
if n <= 0 {
return s
}
var cluster []byte
var buf bytes.Buffer
curWidth := 0
ignoring := true
pstate := parser.GroundState
b := []byte(s)
i := 0
for i < len(b) {
if !ignoring {
buf.Write(b[i:])
break
}
state, action := parser.Table.Transition(pstate, b[i])
if state == parser.Utf8State {
var width int
cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
if m == WcWidth {
width = runewidth.StringWidth(string(cluster))
}
i += len(cluster)
curWidth += width
if curWidth > n && ignoring {
ignoring = false
buf.WriteString(prefix)
}
if ignoring {
continue
}
if curWidth > n {
buf.Write(cluster)
}
pstate = parser.GroundState
continue
}
switch action {
case parser.PrintAction:
curWidth++
if curWidth > n && ignoring {
ignoring = false
buf.WriteString(prefix)
}
if ignoring {
i++
continue
}
fallthrough
default:
buf.WriteByte(b[i])
i++
}
pstate = state
if curWidth > n && ignoring {
ignoring = false
buf.WriteString(prefix)
}
}
return buf.String()
}
// ByteToGraphemeRange takes start and stop byte positions and converts them to
// grapheme-aware char positions.
// You can use this with [Truncate], [TruncateLeft], and [Cut].
func ByteToGraphemeRange(str string, byteStart, byteStop int) (charStart, charStop int) {
bytePos, charPos := 0, 0
gr := uniseg.NewGraphemes(str)
for byteStart > bytePos {
if !gr.Next() {
break
}
bytePos += len(gr.Str())
charPos += max(1, gr.Width())
}
charStart = charPos
for byteStop > bytePos {
if !gr.Next() {
break
}
bytePos += len(gr.Str())
charPos += max(1, gr.Width())
}
charStop = charPos
return
}