-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecode.go
218 lines (197 loc) · 6.46 KB
/
decode.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
package csvutil
import (
"encoding"
"encoding/csv"
"fmt"
"io"
"reflect"
"regexp"
"strconv"
"strings"
)
// Decoder manages reading data from a CSV into tagged structs.
type Decoder struct {
r *csv.Reader
mappings []csvField
numColumns int
}
// NewDecoder initializes itself with the headers of the CSV file to build mappings
// to read data into structs.
func NewDecoder(r io.Reader, dest interface{}) (Decoder, error) {
csvR := csv.NewReader(r)
return NewDecoderFromCSVReader(csvR, dest)
}
// NewDecoderFromCSVReader intializes a decoder using the given csv.Reader.
// This allows the caller to configure options on the csv.Reader (e.g. what
// delimiter to use) instead of using the defaults.
func NewDecoderFromCSVReader(csvR *csv.Reader, dest interface{}) (Decoder, error) {
mappings, err := structureFromStruct(dest)
if err != nil {
return Decoder{}, err
}
// ensure that all "unknown" types have their own text unmarshaler
for _, m := range mappings {
if m.fieldType == reflect.Invalid && !m.customUnmarshaler {
return Decoder{}, fmt.Errorf("unsupported field type found that does not "+
"implement the encoding.TextUnmarshaler interface: %s", m.fieldName)
}
}
headers, err := csvR.Read()
if err != nil {
return Decoder{}, fmt.Errorf("failed to find headers: %s", err)
}
allEmpty := true
numColumns := len(headers)
sortedMappings := make([]csvField, numColumns)
extraHeaders := []string{} // TODO: do anything with this?
headersSeen := map[string]bool{}
// Sort headers in line w/ CSV columns
for i, h := range headers {
h = normalizeHeader(h)
// ensure unique CSV headers
if headersSeen[h] {
return Decoder{}, fmt.Errorf("saw header column '%s' twice, CSV headers must be unique", h)
}
headersSeen[h] = true
// slot field info in array parallel to CSV column
for _, f := range mappings {
if h == normalizeHeader(f.fieldName) {
sortedMappings[i] = f
}
}
// check if field not set
if sortedMappings[i].fieldName == "" {
extraHeaders = append(extraHeaders, h)
} else {
// note that a field exists without an empty name
allEmpty = false
}
}
// Ensure that at least one mapping has a non-empty field name
if allEmpty {
return Decoder{}, fmt.Errorf("all struct fields do not match any CSV headers")
}
// Ensure that all required columns are present
for _, f := range mappings {
if f.required && !headersSeen[normalizeHeader(f.fieldName)] {
return Decoder{}, fmt.Errorf("column '%s' required but not found", f.fieldName)
}
}
return Decoder{
r: csvR,
mappings: sortedMappings,
numColumns: numColumns,
}, nil
}
// normalizeHeader lowercases, trims whitespace and removes non-ascii characters
func normalizeHeader(header string) string {
re := regexp.MustCompile("[[:^ascii:]]")
return strings.ToLower(strings.TrimSpace(re.ReplaceAllLiteralString(header, "")))
}
// Read decodes data from a CSV row into a struct. The struct must be passed as a pointer
// into Read.
// When there is no data left in the reader, an `io.EOF` is returned.
func (d Decoder) Read(dest interface{}) error {
destStruct := reflect.ValueOf(dest)
if dest == nil {
return fmt.Errorf("Destination struct passed in cannot be nil")
} else if destStruct.Type().Kind() != reflect.Ptr {
return fmt.Errorf("Destination struct passed in must be pointer")
} else if destStruct.Elem().Kind() == reflect.Interface {
return fmt.Errorf("Destination struct cannot be an interface")
}
row, err := d.r.Read()
if err == io.EOF {
return io.EOF
} else if err != nil {
return fmt.Errorf("failed to read CSV row: %s", err)
}
if len(row) != d.numColumns {
return fmt.Errorf("expected %d columns, found %d", d.numColumns, len(row))
}
for i, strValue := range row {
strValue = strings.TrimSpace(strValue)
m := d.mappings[i]
// skip column if we have no mapping
if m.fieldName == "" {
continue
}
if strValue == "" {
if m.required {
return fmt.Errorf("column %s required but no value found", m.fieldName)
}
v := destStruct.Elem().Field(m.fieldIndex)
v.Set(reflect.Zero(v.Type()))
continue
}
if m.customUnmarshaler {
v := destStruct.Elem().Field(m.fieldIndex)
if v.Type().Kind() != reflect.Ptr {
// if value is not a pointer we need an addressable value for Unmarshal
v = v.Addr()
} else if v.IsNil() {
// If the value is a pointer, but is nil, instantiate the underlying type
v.Set(reflect.New(v.Type().Elem()))
}
u := v.Interface().(encoding.TextUnmarshaler)
if err := u.UnmarshalText([]byte(strValue)); err != nil {
return fmt.Errorf("failed to coerce value '%s' using custom marshaler for field %s: %s",
strValue, m.fieldName, err)
}
continue
}
switch m.fieldType {
case reflect.String:
destStruct.Elem().Field(m.fieldIndex).SetString(strValue)
case reflect.Int:
intVal, err := strconv.Atoi(strValue)
if err != nil {
return fmt.Errorf("failed to coerce value '%s' into integer for field %s",
strValue, m.fieldName)
}
destStruct.Elem().Field(m.fieldIndex).SetInt(int64(intVal))
case reflect.Bool:
boolVal, err := strconv.ParseBool(strValue)
if err != nil {
return fmt.Errorf("failed to coerce value '%s' into boolean for field %s",
strValue, m.fieldName)
}
destStruct.Elem().Field(m.fieldIndex).SetBool(boolVal)
case reflect.Slice:
arrayStrValues := strings.Split(strValue, ",")
switch m.sliceType {
case reflect.String:
destStruct.Elem().Field(m.fieldIndex).Set(reflect.ValueOf(arrayStrValues))
case reflect.Int:
arrayIntValues := make([]int, len(arrayStrValues))
for i, s := range arrayStrValues {
intVal, err := strconv.Atoi(s)
if err != nil {
return fmt.Errorf("failed to coerce value '%s' (indexed %d) into integer for field %s: %s",
s, i, m.fieldName, err)
}
arrayIntValues[i] = int(intVal)
}
destStruct.Elem().Field(m.fieldIndex).Set(reflect.ValueOf(arrayIntValues))
default:
panic("slice fields can only be string.")
}
default:
panic(fmt.Sprintf("type not found: %s", m.fieldType))
}
}
return nil
}
// MatchedHeaders returns an array of strings (headers) using the Decoder mappings created
// during decoder initialization. Returns an empty array when no headers are matched.
func (d Decoder) MatchedHeaders() []string {
matchedHeaders := []string{}
if d.mappings != nil {
for _, csvField := range d.mappings {
if csvField.fieldName != "" {
matchedHeaders = append(matchedHeaders, csvField.fieldName)
}
}
}
return matchedHeaders
}