diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..333c4d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +*jp +*algo.md + + +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib +*.test + +# Vendor directory (for dependencies) +vendor/ + +# Logs +logs/ + +# IDE/editor specific files +.vscode/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..681e01c --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +build: + go build -o jp main.go + +test: + go build -o jp main.go && ./test.sh + +run: + go run main.go demo.json diff --git a/README.md b/README.md new file mode 100644 index 0000000..b942802 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +### JSON PARSER (from the coding challenges) + +- A JSON parser comprises mainly two parts: + - **Lexical Analysis**: In this step, the input is converted into an array of tokens. + - **Syntactic Analysis**: In this step, the tokens are parsed to check whether they align with the rules of the specified language. + +### Application Details + +- Currently, the software outputs whether the given JSON file is valid or not. +- To test the application yourself, you can put your JSON content inside the `demo.json` file and run `make run`. It will tell you whether the content is valid JSON or not. +- Additionally, there are plenty of test cases inside the `tests` directory. You can run them individually using the command `go run main.go relative-filepath`. +- I have also prepared a bash script for running all the test cases inside the `tests` directory. You can run it using either the `./tests.sh` command, but first make sure you run `make build`. You can also run `make test` which is just an addition of these two commands. + +### Working of the Application + +- In the first step, the input file content is broken into an array of tokens. +- In the next step, the tokens are cleaned up by trimming the whitespaces around them. +- The tokens are then passed into a parser function that parses the tokens to see if they align with the specified language rules. diff --git a/demo.json b/demo.json new file mode 100644 index 0000000..70ddac0 --- /dev/null +++ b/demo.json @@ -0,0 +1,70 @@ +[ + { + "id": "0001", + "type": "donut", + "name": "Cake", + "ppu": 0.55, + "batters": + { + "batter": + [ + { "id": "1001", "type": "Regular" }, + { "id": "1002", "type": "Chocolate" }, + { "id": "1003", "type": "Blueberry" }, + { "id": "1004", "type": "Devil's Food" } + ] + }, + "topping": + [ + { "id": "5001", "type": "None" }, + { "id": "5002", "type": "Glazed" }, + { "id": "5005", "type": "Sugar" }, + { "id": "5007", "type": "Powdered Sugar" }, + { "id": "5006", "type": "Chocolate with Sprinkles" }, + { "id": "5003", "type": "Chocolate" }, + { "id": "5004", "type": "Maple" } + ] + }, + { + "id": "0002", + "type": "donut", + "name": "Raised", + "ppu": 0.55, + "batters": + { + "batter": + [ + { "id": "1001", "type": "Regular" } + ] + }, + "topping": + [ + { "id": "5001", "type": "None" }, + { "id": "5002", "type": "Glazed" }, + { "id": "5005", "type": "Sugar" }, + { "id": "5003", "type": "Chocolate" }, + { "id": "5004", "type": "Maple" } + ] + }, + { + "id": "0003", + "type": "donut", + "name": "Old Fashioned", + "ppu": 0.55, + "batters": + { + "batter": + [ + { "id": "1001", "type": "Regular" }, + { "id": "1002", "type": "Chocolate" } + ] + }, + "topping": + [ + { "id": "5001", "type": "None" }, + { "id": "5002", "type": "Glazed" }, + { "id": "5003", "type": "Chocolate" }, + { "id": "5004", "type": "Maple" } + ] + } +] diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..50b1703 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/melsonic/json-parser + +go 1.21.6 + +require github.com/golang-collections/collections v0.0.0-20130729185459-604e922904d3 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..3a6eee6 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/golang-collections/collections v0.0.0-20130729185459-604e922904d3 h1:zN2lZNZRflqFyxVaTIU61KNKQ9C0055u9CAfpmqUvo4= +github.com/golang-collections/collections v0.0.0-20130729185459-604e922904d3/go.mod h1:nPpo7qLxd6XL3hWJG/O60sR8ZKfMCiIoNap5GvD12KU= diff --git a/main.go b/main.go new file mode 100644 index 0000000..04b4dee --- /dev/null +++ b/main.go @@ -0,0 +1,27 @@ +package main + +// _ +// (_)___ ___ _ __ _ __ __ _ _ __ ___ ___ _ __ +// | / __|/ _ \| '_ \ _____| '_ \ / _` | '__/ __|/ _ \ '__| +// | \__ \ (_) | | | |_____| |_) | (_| | | \__ \ __/ | +// _/ |___/\___/|_| |_| | .__/ \__,_|_| |___/\___|_| +// |__/ |_| +// + +import ( + "log" + "os" + + "github.com/melsonic/json-parser/util" +) + +func main() { + var result bool = false + jsonFileName := os.Args[1] + content, err := os.ReadFile(jsonFileName) + if err != nil { + log.Fatal(err) + } + result = util.Validate(content) + util.PrintResult(result) +} diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..73115f2 --- /dev/null +++ b/test.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +for file in tests/* +do + if [ -d $file ] + then + for rfile in $file/* + do + echo "$rfile" + ./jp $rfile + echo "" + done + else + echo "$file" + ./jp $file + echo "" + fi +done + diff --git a/tests/step1/invalid.json b/tests/step1/invalid.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/step1/valid.json b/tests/step1/valid.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/tests/step1/valid.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/step2/invalid.json b/tests/step2/invalid.json new file mode 100644 index 0000000..d7e32b8 --- /dev/null +++ b/tests/step2/invalid.json @@ -0,0 +1 @@ +{"key": "value",} \ No newline at end of file diff --git a/tests/step2/invalid2.json b/tests/step2/invalid2.json new file mode 100644 index 0000000..eff13a5 --- /dev/null +++ b/tests/step2/invalid2.json @@ -0,0 +1,4 @@ +{ + "key": "value", + key2: "value" +} \ No newline at end of file diff --git a/tests/step2/valid.json b/tests/step2/valid.json new file mode 100644 index 0000000..76519fa --- /dev/null +++ b/tests/step2/valid.json @@ -0,0 +1 @@ +{"key": "value"} diff --git a/tests/step2/valid2.json b/tests/step2/valid2.json new file mode 100644 index 0000000..3c88601 --- /dev/null +++ b/tests/step2/valid2.json @@ -0,0 +1,4 @@ +{ + "key": "value", + "key2": "value" +} \ No newline at end of file diff --git a/tests/step3/invalid.json b/tests/step3/invalid.json new file mode 100644 index 0000000..94d2214 --- /dev/null +++ b/tests/step3/invalid.json @@ -0,0 +1,7 @@ +{ + "key1": true, + "key2": False, + "key3": null, + "key4": "value", + "key5": 101 +} \ No newline at end of file diff --git a/tests/step3/valid.json b/tests/step3/valid.json new file mode 100644 index 0000000..6f99bea --- /dev/null +++ b/tests/step3/valid.json @@ -0,0 +1,7 @@ +{ + "key1": true, + "key2": false, + "key3": null, + "key4": "value", + "key5": 101 +} \ No newline at end of file diff --git a/tests/step4/invalid.json b/tests/step4/invalid.json new file mode 100644 index 0000000..304c553 --- /dev/null +++ b/tests/step4/invalid.json @@ -0,0 +1,8 @@ +{ + "key": "value", + "key-n": 101, + "key-o": { + "inner key": "inner value" + }, + "key-l": ['list value'] +} \ No newline at end of file diff --git a/tests/step4/valid.json b/tests/step4/valid.json new file mode 100644 index 0000000..0299c4e --- /dev/null +++ b/tests/step4/valid.json @@ -0,0 +1,6 @@ +{ + "key": "value", + "key-n": 101, + "key-o": {}, + "key-l": [] +} \ No newline at end of file diff --git a/tests/step4/valid2.json b/tests/step4/valid2.json new file mode 100644 index 0000000..f07abfa --- /dev/null +++ b/tests/step4/valid2.json @@ -0,0 +1,8 @@ +{ + "key": "value", + "key-n": 101, + "key-o": { + "inner key": "inner value" + }, + "key-l": ["list value"] +} diff --git a/util/constants.go b/util/constants.go new file mode 100644 index 0000000..d16e30f --- /dev/null +++ b/util/constants.go @@ -0,0 +1,32 @@ +package util + +var ( + EmptyLineByte byte = byte('\n') + WhiteSpaceByte byte = byte(' ') + ColonByte byte = byte(':') + CommaByte byte = byte(',') + LeftCurlyBraceByte byte = byte('{') + RightCurlyBraceByte byte = byte('}') + LeftSquareBracketByte byte = byte('[') + RightSquareBracketByte byte = byte(']') + DoubleQuoteByte byte = byte('"') + BackSlashByte byte = byte('\\') + ForwardSlashByte byte = byte('/') +) + +var ( + LeftCurlyBrace string = "{" + RightCurlyBrace string = "}" + LeftSquareBrace string = "[" + RightSquareBrace string = "]" + Comma string = "," + Colon string = ":" + DoubleQuote string = "\"" +) + +var BackSlashRune rune = rune('\\') + +var ( + CheckSlice = []byte{LeftCurlyBraceByte, LeftSquareBracketByte, ColonByte, CommaByte, RightSquareBracketByte, RightCurlyBraceByte} + AllowedCharsAfterEscapeChar = []rune{'"', '\\', '/', 'b', 'f', 'n', 'r', 't'} +) diff --git a/util/lexer.go b/util/lexer.go new file mode 100644 index 0000000..2bf4e80 --- /dev/null +++ b/util/lexer.go @@ -0,0 +1,41 @@ +package util + +func Lexer(content []byte) []string { + var isInsideString bool = false + var prevByte byte + var currBytes []byte + var lexResult []string + for _, byt := range content { + if byt != DoubleQuoteByte || prevByte == BackSlashByte { + if isInsideString { + currBytes = append(currBytes, byt) + } else { + var present bool = false + for _, cs := range CheckSlice { + if cs == byt { + present = true + break + } + } + if present { + if len(currBytes) > 0 { + lexResult = append(lexResult, string(currBytes)) + currBytes = nil + } + lexResult = append(lexResult, string(byt)) + } else { + currBytes = append(currBytes, byt) + } + } + } else { + currBytes = append(currBytes, byt) + if isInsideString { + lexResult = append(lexResult, string(currBytes)) + currBytes = nil + } + isInsideString = !isInsideString + } + prevByte = byt + } + return lexResult +} diff --git a/util/parser.go b/util/parser.go new file mode 100644 index 0000000..e94f201 --- /dev/null +++ b/util/parser.go @@ -0,0 +1,17 @@ +package util + +func Parser(lexToken []string) ([]string, bool) { + var result bool = false + token := lexToken[0] + if token == LeftCurlyBrace { + lexToken = lexToken[1:] + lexToken, result = IsValidObject(lexToken) + } else if token == LeftSquareBrace { + lexToken = lexToken[1:] + lexToken, result = IsValidArray(lexToken) + } else { + result = IsValidString(token) || IsValidNumber(token) || IsValidBoolean(token) || IsValidNull(token) + lexToken = lexToken[1:] + } + return lexToken, result +} diff --git a/util/util.go b/util/util.go new file mode 100644 index 0000000..3594fb4 --- /dev/null +++ b/util/util.go @@ -0,0 +1,36 @@ +package util + +import ( + "fmt" + "strings" +) + +// entry function for validator +func Validate(content []byte) bool { + var result bool = false + var lexToken []string = Lexer(content) + lexToken = CleanUp(lexToken) + _, result = Parser(lexToken) + return result +} + +// clean up the spaces around the braces +func CleanUp(lexToken []string) []string { + lenLexToken := len(lexToken) + var trimmedLexToken []string + for i := 0; i < lenLexToken; i++ { + tempStr := strings.TrimSpace(lexToken[i]) + if tempStr != "" { + trimmedLexToken = append(trimmedLexToken, tempStr) + } + } + return trimmedLexToken +} + +func PrintResult(result bool) { + if result { + fmt.Printf("The file is a valid\n") + } else { + fmt.Printf("The file is invalid\n") + } +} diff --git a/util/valid.go b/util/valid.go new file mode 100644 index 0000000..cfacc73 --- /dev/null +++ b/util/valid.go @@ -0,0 +1,143 @@ +package util + +import ( + "strconv" +) + +func IsValidObject(lexToken []string) ([]string, bool) { + if len(lexToken) == 0 { + return lexToken, false + } + if lexToken[0] == RightCurlyBrace { + return lexToken[1:], true + } + for true { + // for json key + if len(lexToken) == 0 { + return lexToken, false + } + json_key := lexToken[0] + lexToken = lexToken[1:] + if !IsValidString(json_key) { + return lexToken, false + } + + // for json key value colon + if len(lexToken) == 0 { + return lexToken, false + } + json_colon := lexToken[0] + lexToken = lexToken[1:] + if json_colon != Colon { + return lexToken, false + } + + // for json value + if len(lexToken) == 0 { + return lexToken, false + } + json_value := lexToken[0] + lexToken = lexToken[1:] + if json_value == LeftCurlyBrace { + var tempResult bool + lexToken, tempResult = IsValidObject(lexToken) + if !tempResult { + return lexToken, false + } + } else if json_value == LeftSquareBrace { + var tempResult bool + lexToken, tempResult = IsValidArray(lexToken) + if !tempResult { + return lexToken, false + } + } else if !IsValidString(json_value) && !IsValidNumber(json_value) && !IsValidBoolean(json_value) && !IsValidNull(json_value) { + return lexToken, false + } + + if len(lexToken) == 0 { + return lexToken, false + } + json_comma_or_curly_brace := lexToken[0] + lexToken = lexToken[1:] + if json_comma_or_curly_brace == RightCurlyBrace { + break + } + if json_comma_or_curly_brace != Comma { + return lexToken, false + } + } + return lexToken, true +} + +func IsValidArray(lexToken []string) ([]string, bool) { + if len(lexToken) == 0 { + return lexToken, false + } + token := lexToken[0] + if token == RightSquareBrace { + return lexToken[1:], true + } + + for true { + var tempResult bool + lexToken, tempResult = Parser(lexToken) + if tempResult == false { + return lexToken, false + } + token := lexToken[0] + if token == RightSquareBrace { + return lexToken[1:], true + } else if token != Comma { + return lexToken[1:], false + } + lexToken = lexToken[1:] + } + + return lexToken, true +} + +func IsValidString(input string) bool { + inputLen := len(input) + if inputLen < 2 { + return false + } + var prevChar rune + if input[0] != DoubleQuoteByte || input[inputLen-1] != DoubleQuoteByte { + return false + } + for _, ch := range input { + if prevChar == BackSlashRune { + var validAfterBackSlash bool = false + for _, allowedChar := range AllowedCharsAfterEscapeChar { + if allowedChar == ch { + validAfterBackSlash = true + break + } + } + if !validAfterBackSlash { + return false + } + prevChar = 0 + } else { + prevChar = ch + } + } + return true +} + +func IsValidNumber(input string) bool { + _, atoiErr := strconv.Atoi(input) + _, floatErr := strconv.ParseFloat(input, 64) + if atoiErr != nil && floatErr != nil { + return false + } + return true +} + +func IsValidBoolean(input string) bool { + return input == "true" || input == "false" +} + +func IsValidNull(input string) bool { + return input == "null" +}