Skip to content

Commit

Permalink
Merge pull request #2 from clear-street/skurella/trim-codepoint-indic…
Browse files Browse the repository at this point in the history
…es-when-trimming-line-data

Trim codepoint indices when trimming line data
  • Loading branch information
sidkurella authored Feb 8, 2024
2 parents ee607f5 + 4fb0249 commit 26c0724
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 16 deletions.
56 changes: 40 additions & 16 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,20 +197,26 @@ func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) {
}

func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawValue {
var trimFunc func(string) string
var trimFunc func(in string) (out string, leftRemoved int, rightRemoved int)

switch format.alignment {
case left:
trimFunc = func(s string) string {
return strings.TrimRight(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
out = strings.TrimRight(s, string(format.padChar))
return out, 0, len(s) - len(out)
}
case right:
trimFunc = func(s string) string {
return strings.TrimLeft(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
out = strings.TrimLeft(s, string(format.padChar))
return out, len(s) - len(out), 0
}
default:
trimFunc = func(s string) string {
return strings.Trim(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
leftTrimmed := strings.TrimLeft(s, string(format.padChar))
leftRemoved = len(s) - len(leftTrimmed)
rightTrimmed := strings.TrimRight(leftTrimmed, string(format.padChar))
rightRemoved = len(leftTrimmed) - len(rightTrimmed)
return rightTrimmed, leftRemoved, rightRemoved
}
}

Expand All @@ -228,17 +234,34 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa
lineData = value.data[relevantIndices[0]:value.codepointIndices[endPos]]
}

// We trimmed data from the front of the string.
// We need to adjust the codepoint indices to reflect this, as they have shifted.
removedFromFront := relevantIndices[0]
newIndices := make([]int, 0, len(relevantIndices))
for _, idx := range relevantIndices {
newIndices = append(newIndices, idx-removedFromFront)
newIndices := relevantIndices
if relevantIndices[0] > 0 {
// We trimmed data from the front of the string.
// We need to adjust the codepoint indices to reflect this, as they have shifted.
removedFromFront := relevantIndices[0]
newIndices = make([]int, 0, len(relevantIndices))
for _, idx := range relevantIndices {
newIndices = append(newIndices, idx-removedFromFront)
}
}

// Trim the new line data.
newLineData, leftRemovedBytes, rightRemovedBytes := trimFunc(lineData)
trimmedIndices := newIndices
if leftRemovedBytes > 0 || rightRemovedBytes > 0 {
// We must trim our codepoint indices list in order to match
// the newly trimmed line data string.
trimmedIndices = []int{}
for _, idx := range newIndices {
if idx >= leftRemovedBytes && idx < len(lineData)-rightRemovedBytes {
trimmedIndices = append(trimmedIndices, idx-leftRemovedBytes)
}
}
}

return rawValue{
data: trimFunc(lineData),
codepointIndices: newIndices,
data: newLineData,
codepointIndices: trimmedIndices,
}
} else {
if len(value.data) == 0 || startPos > len(value.data) {
Expand All @@ -247,8 +270,9 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa
if endPos > len(value.data) {
endPos = len(value.data)
}
newLineData, _, _ := trimFunc(value.data[startPos-1 : endPos])
return rawValue{
data: trimFunc(value.data[startPos-1 : endPos]),
data: newLineData,
}
}
}
Expand Down
59 changes: 59 additions & 0 deletions decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,65 @@ func TestDecodeSetUseCodepointIndices_Nested(t *testing.T) {
}
}

func TestDecodeSetUseCodepointIndices_PaddingTrimmed(t *testing.T) {
type Nested struct {
First int64 `fixed:"1,2,right,0"`
Second string `fixed:"3,4"`
Third string `fixed:"5,6"`
Fourth string `fixed:"7,8"`
}
type Test struct {
First Nested `fixed:"1,8"`
Second string `fixed:"9,10"`
}

for _, tt := range []struct {
name string
raw []byte
expected Test
}{
{
name: "All ASCII characters",
raw: []byte("00 11"),
expected: Test{
First: Nested{
First: 0,
Second: "",
Third: "",
Fourth: "",
},
Second: "11",
},
},
{
name: "Multi-byte characters",
raw: []byte("00 ☃☃"),
expected: Test{
First: Nested{
First: 0,
Second: "",
Third: "",
Fourth: "",
},
Second: "☃☃",
},
},
} {
t.Run(tt.name, func(t *testing.T) {
d := NewDecoder(bytes.NewReader(tt.raw))
d.SetUseCodepointIndices(true)
var s Test
err := d.Decode(&s)
if err != nil {
t.Errorf("Unexpected err: %v", err)
}
if !reflect.DeepEqual(tt.expected, s) {
t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s)
}
})
}
}

// Verify the behavior of Decoder.Decode at the end of a file. See
// https://github.com/ianlopshire/go-fixedwidth/issues/6 for more details.
func TestDecode_EOF(t *testing.T) {
Expand Down

0 comments on commit 26c0724

Please sign in to comment.