From cffe9d3d464055682ecde81f5de329b7eea37c55 Mon Sep 17 00:00:00 2001 From: Akshay Bharambe Date: Wed, 12 Feb 2020 22:56:40 +0530 Subject: [PATCH] Add: DecodeBytes ans DecodeString APIs 1. Add: DecodeBytes API to convert jsonc byte array. 2. Add: DecodeBytes API to convert jsonc string. 3. Add: Tests. 100% Coverage. 4. Refactor: make decode() reusable. 5. Update: Readme and examples. --- README.md | 8 ++- examples/README.md | 3 ++ jsonc.go | 77 ++++++++++++++++++++++----- jsonc_test.go | 128 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 200 insertions(+), 16 deletions(-) create mode 100644 examples/README.md diff --git a/README.md b/README.md index 16593bd..811bf69 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,10 @@ Gets converted to (spaces omitted) { "string": "foo", "bool": false, "number": 42, "array": [1, 2, 3] } ``` +## Motivation + +[jsonc](https://github.com/muhammadmuzzammil1998/jsonc) is great. But this package provides significant performance improvements and simple API to use it with standard library. + ## Usage Get this package @@ -43,8 +47,8 @@ go get github.com/akshaybharambe14/go-jsonc ## Example -see [examples](https://github.com/akshaybharambe14/go-jsonc/examples) +see [examples](https://github.com/akshaybharambe14/go-jsonc/tree/master/examples) ## License -`go-jsonc` is available under [MIT License](License.md) +`go-jsonc` is open source and available under [MIT License](License.md) diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..c643159 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,3 @@ +# JSONC examples + +Examples for go-jsonc diff --git a/jsonc.go b/jsonc.go index 1225445..79cffb5 100644 --- a/jsonc.go +++ b/jsonc.go @@ -3,6 +3,8 @@ package jsonc import ( "errors" "io" + "reflect" + "unsafe" ) type ( @@ -41,10 +43,10 @@ const ( ) var ( - ErrUnexpectedEndOfJSON = errors.New("unexpected end of json") + ErrUnexpectedEndOfComment = errors.New("unexpected end of comment") ) -// New a new io.Reader wrapping the provided one. +// NewDecoder returns a new Decoder wrapping the provided io.Reader. The returned decoder implements io.Reader. func NewDecoder(r io.Reader) *Decoder { return &Decoder{ c: comment{}, @@ -52,31 +54,31 @@ func NewDecoder(r io.Reader) *Decoder { } } -// Read reads from underlying writer and processes the stream to omit comments. +// Read reads from underlying reader and processes the stream to omit comments. // A single read doesn't guaranttee a valid JSON. Depends on length of passed slice. // -// Produces ErrUnexpectedEndOfJSON for incomplete comments +// Produces ErrUnexpectedEndOfComment for incomplete comments. func (d *Decoder) Read(p []byte) (int, error) { n, err := d.r.Read(p) if err != nil { - return n, err + return 0, err } shortRead := n <= len(p) - n = d.decode(p[:n]) + n = decode(p[:n], &d.c) - if shortRead && d.c.state != stopped { - return 0, ErrUnexpectedEndOfJSON + if shortRead && !d.c.complete() { + return 0, ErrUnexpectedEndOfComment } return n, nil } -func (d *Decoder) decode(p []byte) int { +func decode(p []byte, c *comment) int { i := 0 for _, s := range p { - if d.c.handle(s) { + if c.handle(s) { p[i] = s i++ } @@ -124,17 +126,66 @@ func (c *comment) handle(s byte) bool { } if s == newLine && !c.multiLn { - c.state = stopped + c.reset() } case canStop: if s == fwdSlash || s == charN { - c.state = stopped - c.multiLn = false + c.reset() } } return false } + +func (c *comment) reset() { + c.state = stopped + c.multiLn = false +} + +func (c *comment) complete() bool { + return c.state == stopped +} + +// DecodeBytes decodes passed commented json byte slice to normal json. +// It modifies the passed slice. The passed slice must be refferred till returned count, if there is no error. +// +// The error doesn't include errors related to invalid json. If not nil, it must be ErrUnexpectedEndOfComment. +// +// The returned json must be checked for validity. +func DecodeBytes(p []byte) (int, error) { + c := &comment{} + n := decode(p, c) + + if !c.complete() { + return 0, ErrUnexpectedEndOfComment + } + + return n, nil +} + +// DecodeString decodes passed commented json to normal json. +// It uses "unsafe" way to convert a byte slice to result string. This saves allocations and improves performance is case of large json. +// +// The error doesn't include errors related to invalid json. If not nil, it must be ErrUnexpectedEndOfComment. +// +// The returned json must be checked for validity. +func DecodeString(s string) (string, error) { + p := []byte(s) + + n, err := DecodeBytes(p) + if err != nil { + return "", err + } + + p = p[:n] + + // following operation is safe to do till p is not being changed. This reduces allocations. + sh := *(*reflect.SliceHeader)(unsafe.Pointer(&p)) + return *(*string)(unsafe.Pointer(&reflect.StringHeader{ + Data: sh.Data, + Len: sh.Len, + })), nil +} diff --git a/jsonc_test.go b/jsonc_test.go index c279ba8..02824ce 100644 --- a/jsonc_test.go +++ b/jsonc_test.go @@ -2,15 +2,22 @@ package jsonc import ( "bytes" + "io" + "reflect" "testing" + "testing/iotest" ) -func ts(b []byte) *Decoder { return &Decoder{r: bytes.NewBuffer(b)} } +func ts(b []byte) *Decoder { return &Decoder{r: bytes.NewBuffer(b)} } +func tsErr(b []byte) *Decoder { return &Decoder{r: iotest.DataErrReader(bytes.NewBuffer(b))} } var ( validSingle = []byte(`{"foo": // this is a single line comment\n"bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`) invalidSingle = []byte(`{"foo": // this is a single line comment "bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`) + validSingleESC = []byte("{\"foo\": // this is a single line comment\n\"bar foo\", \"true\": false, \"number\": 42, \"object\": { \"test\": \"done\" }, \"array\" : [1, 2, 3], \"url\" : \"https://github.com\" }") + invalidSingleESC = []byte("{\"foo\": // this is a single line comment\"bar foo\", \"true\": false, \"number\": 42, \"object\": { \"test\": \"done\" }, \"array\" : [1, 2, 3], \"url\" : \"https://github.com\" }") + validBlock = []byte(`{"foo": /* this is a block comment */ "bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`) invalidBlock = []byte(`{"foo": /* this is a block comment "bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`) ) @@ -42,6 +49,20 @@ func Test_Decoder_Read(t *testing.T) { want: 0, wantErr: true, }, + { + name: "Valid single line comment (escaped json)", + d: ts(validSingleESC), + args: args{p: make([]byte, len(validSingleESC))}, + want: 110, // (163(total) - 34(comments) - 19(spaces)) + wantErr: false, + }, + { + name: "Invalid single line comment (escaped json)", + d: ts(invalidSingleESC), + args: args{p: make([]byte, len(invalidSingleESC))}, + want: 0, + wantErr: true, + }, { name: "Valid block comment", d: ts(validBlock), @@ -56,6 +77,13 @@ func Test_Decoder_Read(t *testing.T) { want: 0, wantErr: true, }, + { + name: "Invalid Read", + d: tsErr(validBlock), + args: args{p: make([]byte, len(validBlock))}, + want: 0, + wantErr: true, + }, } for _, tt := range tests { @@ -71,3 +99,101 @@ func Test_Decoder_Read(t *testing.T) { }) } } + +func TestNewDecoder(t *testing.T) { + type args struct { + r io.Reader + } + tests := []struct { + name string + args args + want *Decoder + }{ + { + name: "Valid Decoder", + args: args{r: nil}, + want: &Decoder{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := NewDecoder(tt.args.r); !reflect.DeepEqual(got, tt.want) { + t.Errorf("NewDecoder() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestDecodeBytes(t *testing.T) { + type args struct { + p []byte + } + tests := []struct { + name string + args args + want int + wantErr bool + }{ + { + name: "Valid input", + args: args{p: []byte(string(validBlock))}, + want: 110, + wantErr: false, + }, + { + name: "Invalid input", + args: args{p: []byte(string(invalidBlock))}, + want: 0, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := DecodeBytes(tt.args.p) + if (err != nil) != tt.wantErr { + t.Errorf("DecodeBytes() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("DecodeBytes() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestDecodeString(t *testing.T) { + type args struct { + s string + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "Valid input", + args: args{s: string(validBlock)}, + want: `{"foo":"bar foo","true":false,"number":42,"object":{"test":"done"},"array":[1,2,3],"url":"https://github.com"}`, + wantErr: false, + }, + { + name: "Invalid input", + args: args{s: string(invalidBlock)}, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := DecodeString(tt.args.s) + if (err != nil) != tt.wantErr { + t.Errorf("DecodeString() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("DecodeString() = %v, want %v", got, tt.want) + } + }) + } +}