-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmysqltsv.go
230 lines (212 loc) · 5.71 KB
/
mysqltsv.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
// Package mysqltsv encodes values for usage in LOAD DATA INFILE's tab separated values.
package mysqltsv
// TODO: Heap escape analyses
import (
"bufio"
"database/sql/driver"
"encoding/json"
"fmt"
"io"
"strconv"
"time"
)
// Escaping explains the escaping this package uses for inclusion in a LOAD DATA INFILE statement.
const Escaping = `CHARACTER SET binary FIELDS TERMINATED BY '\t' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' STARTING BY ''`
/*
type Options struct {
FieldsTerminatedBy string
FieldsEnclosedBy string
FieldsOptionallyEnclosed bool
FieldsEscapedBy string
LinesTerminedBy string
LinesStartingBy string
// Character set?
}
func DefaultOptions() Options {
return Options{
FieldsTerminatedBy: "\t",
FieldsEnclosedBy: `"`,
FieldsEscapedBy: `\`,
LinesTerminedBy: "\n",
}
}
*/
// EncoderOptions are settings that affect encoding.
type EncoderOptions struct {
// Location is the timezone each time.Time will be converted to before being serialized.
Location *time.Location
}
// Encoder encodes values into a CSV file suitable for consumption by LOAD DATA INFILE.
// The number of columns per row must be fixed, and it will automatically advance to the next row once all columns were appended.
// Any errors during appending will be stored and future calls will be ignored.
// The encoder must be Close()d once done to flush and to read any errors that might have occurred.
type Encoder struct {
w *bufio.Writer
numColumnsPerRow int
colsLeftInRow int
err error
encoderOptions *EncoderOptions
}
// NewEncoder starts a new encoder. You should write the same number of columns per line and the Encoder will decide when a row is finished.
// Close must be called to see if any error occurred.
// EncoderOptions is optional.
func NewEncoder(w io.Writer, numColumns int, cfg *EncoderOptions) *Encoder {
return &Encoder{
w: bufio.NewWriterSize(w, 16*1024),
numColumnsPerRow: numColumns,
colsLeftInRow: numColumns,
encoderOptions: cfg,
}
}
func (e *Encoder) writeField(b []byte) {
buf := e.w.AvailableBuffer()
_, e.err = e.w.Write(escapeField(buf, b))
if e.err != nil {
return
}
e.colsLeftInRow--
if e.colsLeftInRow == 0 {
e.err = e.w.WriteByte('\n')
e.colsLeftInRow = e.numColumnsPerRow
} else {
e.err = e.w.WriteByte('\t')
}
}
func (e *Encoder) AppendString(s string) {
e.AppendBytes([]byte(s))
}
func (e *Encoder) AppendBytes(b []byte) {
if e.err != nil {
return
}
e.writeField(b)
}
func (e *Encoder) AppendValue(v any) {
if e.err != nil {
return
}
b, err := valueToBytes(v, e.encoderOptions)
if err != nil {
e.err = err
return
}
e.writeField(b)
}
func (e *Encoder) Close() error {
if e.err != nil {
return e.err
}
if err := e.w.Flush(); err != nil {
return err
}
return nil
}
func (e *Encoder) Error() error {
return e.err
}
// Per https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling
func escapeField(appendTo, data []byte) []byte {
if data == nil {
return []byte{'\\', 'N'}
}
if cap(appendTo) < len(data)+2 {
appendTo = make([]byte, 0, len(data)+5)
}
appendTo = append(appendTo, '"')
for _, c := range data {
switch c {
case 0:
appendTo = append(appendTo, '\\', '0')
case '\b':
appendTo = append(appendTo, '\\', 'b')
case '\n':
appendTo = append(appendTo, '\\', 'n')
case '\r':
appendTo = append(appendTo, '\\', 'r')
case '\t':
appendTo = append(appendTo, '\\', 't')
case 26:
appendTo = append(appendTo, '\\', 'Z')
case '\\':
appendTo = append(appendTo, '\\', '\\')
case '"':
appendTo = append(appendTo, '\\', '"')
default:
appendTo = append(appendTo, c)
}
}
appendTo = append(appendTo, '"')
return appendTo
}
func valueToBytes(v any, cfg *EncoderOptions) ([]byte, error) {
if dv, ok := v.(driver.Valuer); ok {
var err error
v, err = dv.Value()
if err != nil {
return nil, err
}
}
switch v := v.(type) {
case string:
return []byte(v), nil
case []byte:
return v, nil
case json.RawMessage:
return v, nil
case uint8:
return []byte(strconv.FormatUint(uint64(v), 10)), nil
case int8:
return []byte(strconv.FormatInt(int64(v), 10)), nil
case uint16:
return []byte(strconv.FormatUint(uint64(v), 10)), nil
case int16:
return []byte(strconv.FormatInt(int64(v), 10)), nil
case uint32:
return []byte(strconv.FormatUint(uint64(v), 10)), nil
case int32:
return []byte(strconv.FormatInt(int64(v), 10)), nil
case uint64:
return []byte(strconv.FormatUint(v, 10)), nil
case int64:
return []byte(strconv.FormatInt(v, 10)), nil
case int:
return []byte(strconv.FormatInt(int64(v), 10)), nil
case uint:
return []byte(strconv.FormatUint(uint64(v), 10)), nil
case nil:
return nil, nil
case bool:
if v {
return []byte{'1'}, nil
}
return []byte{'0'}, nil
case float32:
return []byte(strconv.FormatFloat(float64(v), 'f', -1, 32)), nil
case float64:
return []byte(strconv.FormatFloat(v, 'f', -1, 64)), nil
case time.Time:
if cfg != nil && cfg.Location != nil {
v = v.In(cfg.Location)
}
hour, min, sec := v.Clock()
nsec := v.Nanosecond()
if hour == 0 && min == 0 && sec == 0 && nsec == 0 {
return []byte(v.Format("2006-01-02")), nil
}
if nsec == 0 {
return []byte(v.Format("2006-01-02 15:04:05")), nil
}
return []byte(v.Format("2006-01-02 15:04:05.999999999")), nil
default:
return nil, fmt.Errorf("can't encode type %T to TSV", v)
}
}
// EscapeValue escapes a value for use in a MySQL CSV. It's escaped as shown in the constant Escaping.
// EncoderOptions is optional.
func EscapeValue(v any, cfg *EncoderOptions) ([]byte, error) {
b, err := valueToBytes(v, cfg)
if err != nil {
return nil, err
}
return escapeField(nil, b), nil
}