From cffe9d3d464055682ecde81f5de329b7eea37c55 Mon Sep 17 00:00:00 2001
From: Akshay Bharambe <akshaybharambe14@gmail.com>
Date: Wed, 12 Feb 2020 22:56:40 +0530
Subject: [PATCH] Add: DecodeBytes ans DecodeString APIs

1.  Add: DecodeBytes API to convert jsonc byte array.
2.  Add: DecodeBytes API to convert jsonc string.
3. Add: Tests. 100% Coverage.
4. Refactor: make decode() reusable.
5. Update: Readme and examples.
---
 README.md          |   8 ++-
 examples/README.md |   3 ++
 jsonc.go           |  77 ++++++++++++++++++++++-----
 jsonc_test.go      | 128 ++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 200 insertions(+), 16 deletions(-)
 create mode 100644 examples/README.md

diff --git a/README.md b/README.md
index 16593bd..811bf69 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,10 @@ Gets converted to (spaces omitted)
 { "string": "foo", "bool": false, "number": 42, "array": [1, 2, 3] }
 ```
 
+## Motivation
+
+[jsonc](https://github.com/muhammadmuzzammil1998/jsonc) is great. But this package provides significant performance improvements and simple API to use it with standard library.
+
 ## Usage
 
 Get this package
@@ -43,8 +47,8 @@ go get github.com/akshaybharambe14/go-jsonc
 
 ## Example
 
-see [examples](https://github.com/akshaybharambe14/go-jsonc/examples)
+see [examples](https://github.com/akshaybharambe14/go-jsonc/tree/master/examples)
 
 ## License
 
-`go-jsonc` is available under [MIT License](License.md)
+`go-jsonc` is open source and available under [MIT License](License.md)
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..c643159
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,3 @@
+# JSONC examples
+
+Examples for go-jsonc
diff --git a/jsonc.go b/jsonc.go
index 1225445..79cffb5 100644
--- a/jsonc.go
+++ b/jsonc.go
@@ -3,6 +3,8 @@ package jsonc
 import (
 	"errors"
 	"io"
+	"reflect"
+	"unsafe"
 )
 
 type (
@@ -41,10 +43,10 @@ const (
 )
 
 var (
-	ErrUnexpectedEndOfJSON = errors.New("unexpected end of json")
+	ErrUnexpectedEndOfComment = errors.New("unexpected end of comment")
 )
 
-// New a new io.Reader wrapping the provided one.
+// NewDecoder returns a new Decoder wrapping the provided io.Reader. The returned decoder implements io.Reader.
 func NewDecoder(r io.Reader) *Decoder {
 	return &Decoder{
 		c: comment{},
@@ -52,31 +54,31 @@ func NewDecoder(r io.Reader) *Decoder {
 	}
 }
 
-// Read reads from underlying writer and processes the stream to omit comments.
+// Read reads from underlying reader and processes the stream to omit comments.
 // A single read doesn't guaranttee a valid JSON. Depends on length of passed slice.
 //
-// Produces ErrUnexpectedEndOfJSON for incomplete comments
+// Produces ErrUnexpectedEndOfComment for incomplete comments.
 func (d *Decoder) Read(p []byte) (int, error) {
 
 	n, err := d.r.Read(p)
 	if err != nil {
-		return n, err
+		return 0, err
 	}
 
 	shortRead := n <= len(p)
-	n = d.decode(p[:n])
+	n = decode(p[:n], &d.c)
 
-	if shortRead && d.c.state != stopped {
-		return 0, ErrUnexpectedEndOfJSON
+	if shortRead && !d.c.complete() {
+		return 0, ErrUnexpectedEndOfComment
 	}
 
 	return n, nil
 }
 
-func (d *Decoder) decode(p []byte) int {
+func decode(p []byte, c *comment) int {
 	i := 0
 	for _, s := range p {
-		if d.c.handle(s) {
+		if c.handle(s) {
 			p[i] = s
 			i++
 		}
@@ -124,17 +126,66 @@ func (c *comment) handle(s byte) bool {
 		}
 
 		if s == newLine && !c.multiLn {
-			c.state = stopped
+			c.reset()
 		}
 
 	case canStop:
 
 		if s == fwdSlash || s == charN {
-			c.state = stopped
-			c.multiLn = false
+			c.reset()
 		}
 
 	}
 
 	return false
 }
+
+func (c *comment) reset() {
+	c.state = stopped
+	c.multiLn = false
+}
+
+func (c *comment) complete() bool {
+	return c.state == stopped
+}
+
+// DecodeBytes decodes passed commented json byte slice to normal json.
+// It modifies the passed slice. The passed slice must be refferred till returned count, if there is no error.
+//
+// The error doesn't include errors related to invalid json. If not nil, it must be ErrUnexpectedEndOfComment.
+//
+// The returned json must be checked for validity.
+func DecodeBytes(p []byte) (int, error) {
+	c := &comment{}
+	n := decode(p, c)
+
+	if !c.complete() {
+		return 0, ErrUnexpectedEndOfComment
+	}
+
+	return n, nil
+}
+
+// DecodeString decodes passed commented json to normal json.
+// It uses "unsafe" way to convert a byte slice to result string. This saves allocations and improves performance is case of large json.
+//
+// The error doesn't include errors related to invalid json. If not nil, it must be ErrUnexpectedEndOfComment.
+//
+// The returned json must be checked for validity.
+func DecodeString(s string) (string, error) {
+	p := []byte(s)
+
+	n, err := DecodeBytes(p)
+	if err != nil {
+		return "", err
+	}
+
+	p = p[:n]
+
+	// following operation is safe to do till p is not being changed. This reduces allocations.
+	sh := *(*reflect.SliceHeader)(unsafe.Pointer(&p))
+	return *(*string)(unsafe.Pointer(&reflect.StringHeader{
+		Data: sh.Data,
+		Len:  sh.Len,
+	})), nil
+}
diff --git a/jsonc_test.go b/jsonc_test.go
index c279ba8..02824ce 100644
--- a/jsonc_test.go
+++ b/jsonc_test.go
@@ -2,15 +2,22 @@ package jsonc
 
 import (
 	"bytes"
+	"io"
+	"reflect"
 	"testing"
+	"testing/iotest"
 )
 
-func ts(b []byte) *Decoder { return &Decoder{r: bytes.NewBuffer(b)} }
+func ts(b []byte) *Decoder    { return &Decoder{r: bytes.NewBuffer(b)} }
+func tsErr(b []byte) *Decoder { return &Decoder{r: iotest.DataErrReader(bytes.NewBuffer(b))} }
 
 var (
 	validSingle   = []byte(`{"foo": // this is a single line comment\n"bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`)
 	invalidSingle = []byte(`{"foo": // this is a single line comment "bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`)
 
+	validSingleESC   = []byte("{\"foo\": // this is a single line comment\n\"bar foo\", \"true\": false, \"number\": 42, \"object\": { \"test\": \"done\" }, \"array\" : [1, 2, 3], \"url\" : \"https://github.com\" }")
+	invalidSingleESC = []byte("{\"foo\": // this is a single line comment\"bar foo\", \"true\": false, \"number\": 42, \"object\": { \"test\": \"done\" }, \"array\" : [1, 2, 3], \"url\" : \"https://github.com\" }")
+
 	validBlock   = []byte(`{"foo": /* this is a block comment */ "bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`)
 	invalidBlock = []byte(`{"foo": /* this is a block comment "bar foo", "true": false, "number": 42, "object": { "test": "done" }, "array" : [1, 2, 3], "url" : "https://github.com" }`)
 )
@@ -42,6 +49,20 @@ func Test_Decoder_Read(t *testing.T) {
 			want:    0,
 			wantErr: true,
 		},
+		{
+			name:    "Valid single line comment (escaped json)",
+			d:       ts(validSingleESC),
+			args:    args{p: make([]byte, len(validSingleESC))},
+			want:    110, // (163(total) - 34(comments) - 19(spaces))
+			wantErr: false,
+		},
+		{
+			name:    "Invalid single line comment (escaped json)",
+			d:       ts(invalidSingleESC),
+			args:    args{p: make([]byte, len(invalidSingleESC))},
+			want:    0,
+			wantErr: true,
+		},
 		{
 			name:    "Valid block comment",
 			d:       ts(validBlock),
@@ -56,6 +77,13 @@ func Test_Decoder_Read(t *testing.T) {
 			want:    0,
 			wantErr: true,
 		},
+		{
+			name:    "Invalid Read",
+			d:       tsErr(validBlock),
+			args:    args{p: make([]byte, len(validBlock))},
+			want:    0,
+			wantErr: true,
+		},
 	}
 
 	for _, tt := range tests {
@@ -71,3 +99,101 @@ func Test_Decoder_Read(t *testing.T) {
 		})
 	}
 }
+
+func TestNewDecoder(t *testing.T) {
+	type args struct {
+		r io.Reader
+	}
+	tests := []struct {
+		name string
+		args args
+		want *Decoder
+	}{
+		{
+			name: "Valid Decoder",
+			args: args{r: nil},
+			want: &Decoder{},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := NewDecoder(tt.args.r); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("NewDecoder() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestDecodeBytes(t *testing.T) {
+	type args struct {
+		p []byte
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    int
+		wantErr bool
+	}{
+		{
+			name:    "Valid input",
+			args:    args{p: []byte(string(validBlock))},
+			want:    110,
+			wantErr: false,
+		},
+		{
+			name:    "Invalid input",
+			args:    args{p: []byte(string(invalidBlock))},
+			want:    0,
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := DecodeBytes(tt.args.p)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("DecodeBytes() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("DecodeBytes() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestDecodeString(t *testing.T) {
+	type args struct {
+		s string
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    string
+		wantErr bool
+	}{
+		{
+			name:    "Valid input",
+			args:    args{s: string(validBlock)},
+			want:    `{"foo":"bar foo","true":false,"number":42,"object":{"test":"done"},"array":[1,2,3],"url":"https://github.com"}`,
+			wantErr: false,
+		},
+		{
+			name:    "Invalid input",
+			args:    args{s: string(invalidBlock)},
+			want:    "",
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := DecodeString(tt.args.s)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("DecodeString() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("DecodeString() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}