Skip to content

Commit

Permalink
addressed PR comments
Browse files Browse the repository at this point in the history
Signed-off-by: Kenrick Yap <[email protected]>
  • Loading branch information
kenrickyap committed Feb 14, 2025
1 parent 95e996b commit 591fb71
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 33 deletions.
4 changes: 1 addition & 3 deletions core/src/main/java/org/opensearch/sql/utils/JsonUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
package org.opensearch.sql.utils;

import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;

Expand Down Expand Up @@ -93,10 +92,9 @@ public static ExprValue castJson(ExprValue json) {
* @return ExprValue of value at given path of json string.
*/
public static ExprValue extractJson(ExprValue json, ExprValue... paths) {
List<ExprValue> resultList = new ArrayList<>();
List<ExprValue> resultList = new ArrayList<>(paths.length);

for (ExprValue path : paths) {
System.out.println("Processing path: " + path);
if (json.isNull() || json.isMissing()) {
return json;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;

import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.TestInstantiationException;
import org.mockito.junit.jupiter.MockitoExtension;
import org.opensearch.sql.data.model.ExprBooleanValue;
import org.opensearch.sql.data.model.ExprCollectionValue;
Expand Down Expand Up @@ -234,7 +236,7 @@ void json_returnsSemanticCheckException() {
@Test
void json_extract_search() {
ExprValue expected = new ExprIntegerValue(1);
execute_extract_json(expected, "{\"a\":1}", "$.a");
assert_equals_extract_json(expected, "{\"a\":1}", "$.a");
}

@Test
Expand All @@ -256,17 +258,17 @@ void json_extract_search_arrays() {
// extract specific index from JSON list
for (int i = 0; i < expectedExprValues.size(); i++) {
String path = String.format("$.a[%d]", i);
execute_extract_json(expectedExprValues.get(i), jsonArray, path);
assert_equals_extract_json(expectedExprValues.get(i), jsonArray, path);
}

// extract nested object
ExprValue nestedExpected =
ExprTupleValue.fromExprValueMap(Map.of("d", new ExprIntegerValue(1)));
execute_extract_json(nestedExpected, jsonArray, "$.a[5].c");
assert_equals_extract_json(nestedExpected, jsonArray, "$.a[5].c");

// extract * from JSON list
ExprValue starExpected = new ExprCollectionValue(expectedExprValues);
execute_extract_json(starExpected, jsonArray, "$.a[*]");
assert_equals_extract_json(starExpected, jsonArray, "$.a[*]");
}

@Test
Expand All @@ -285,10 +287,11 @@ void json_extract_returns_null() {
"false",
"");

jsonStrings.forEach(str -> execute_extract_json(LITERAL_NULL, str, "$.a.path_not_found_key"));
jsonStrings.forEach(
str -> assert_equals_extract_json(LITERAL_NULL, str, "$.a.path_not_found_key"));

// null string literal
assertEquals(LITERAL_NULL, DSL.jsonExtract(DSL.literal("null"), DSL.literal("$.a")).valueOf());
assert_equals_extract_json(LITERAL_NULL, "null", "$.a");

// null json
assertEquals(
Expand All @@ -300,7 +303,7 @@ void json_extract_returns_null() {
DSL.jsonExtract(DSL.literal(LITERAL_MISSING), DSL.literal("$.a")).valueOf());

// array out of bounds
execute_extract_json(LITERAL_NULL, "{\"a\":[1,2,3]}", "$.a[4]");
assert_equals_extract_json(LITERAL_NULL, "{\"a\":[1,2,3]}", "$.a[4]");
}

@Test
Expand Down Expand Up @@ -334,14 +337,10 @@ void json_extract_throws_SemanticCheckException() {
@Test
void json_extract_throws_ExpressionEvaluationException() {
// null path
assertThrows(
ExpressionEvaluationException.class,
() -> DSL.jsonExtract(DSL.literal("{\"a\":1}"), DSL.literal(LITERAL_NULL)).valueOf());
assert_throws_extract_json(ExpressionEvaluationException.class, "{\"a\":1}", LITERAL_NULL);

// missing path
assertThrows(
ExpressionEvaluationException.class,
() -> DSL.jsonExtract(DSL.literal("{\"a\":1}"), DSL.literal(LITERAL_MISSING)).valueOf());
assert_throws_extract_json(ExpressionEvaluationException.class, "{\"a\":1}", LITERAL_MISSING);
}

@Test
Expand All @@ -350,21 +349,59 @@ void json_extract_search_list_of_paths() {
"{\"foo\": \"foo\", \"fuzz\": true, \"bar\": 1234, \"bar2\": 12.34, \"baz\": null, "
+ "\"obj\": {\"internal\": \"value\"}, \"arr\": [\"string\", true, null]}";

ExprValue expected =
// scalar results with one invalid path
ExprValue expected_scalar_results =
new ExprCollectionValue(
List.of(new ExprStringValue("foo"), new ExprFloatValue(12.34), LITERAL_NULL));
Expression pathExpr1 = DSL.literal(ExprValueUtils.stringValue("$.foo"));
Expression pathExpr2 = DSL.literal(ExprValueUtils.stringValue("$.bar2"));
Expression pathExpr3 = DSL.literal(ExprValueUtils.stringValue("$.potato"));
Expression jsonExpr = DSL.literal(ExprValueUtils.stringValue(objectJson));
ExprValue actual = DSL.jsonExtract(jsonExpr, pathExpr1, pathExpr2, pathExpr3).valueOf();
assertEquals(expected, actual);

assert_equals_extract_json(expected_scalar_results, objectJson, "$.foo", "$.bar2", "$.potato");

ExprValue expected_multivalued_results =
new ExprCollectionValue(
List.of(
new ExprCollectionValue(
List.of(new ExprStringValue("string"), LITERAL_TRUE, LITERAL_NULL)),
ExprTupleValue.fromExprValueMap(Map.of("internal", new ExprStringValue("value"))),
new ExprFloatValue(12.34)));

// path returns array and struct
assert_equals_extract_json(
expected_multivalued_results, objectJson, "$.arr", "$.obj", "$.bar2");

// path returns multivalued result
assert_equals_extract_json(
expected_multivalued_results, objectJson, "$.arr[*]", "$.obj", "$.bar2");
}

private static void execute_extract_json(ExprValue expected, String json, String path) {
Expression pathExpr = DSL.literal(ExprValueUtils.stringValue(path));
Expression jsonExpr = DSL.literal(ExprValueUtils.stringValue(json));
ExprValue actual = DSL.jsonExtract(jsonExpr, pathExpr).valueOf();
private static void assert_equals_extract_json(ExprValue expected, Object json, Object... paths) {
ExprValue actual = execute_extract_json(json, paths);
assertEquals(expected, actual);
}

private static <T extends Throwable> void assert_throws_extract_json(
Class<T> expectedError, Object json, Object... paths) {
assertThrows(expectedError, () -> execute_extract_json(json, paths));
}

private static ExprValue execute_extract_json(Object json, Object[] paths) {
Expression jsonExpr = object_to_expr(json);
List<Expression> pathExpressions =
Arrays.stream(paths).map(JsonFunctionsTest::object_to_expr).toList();

return switch (paths.length) {
case 1 -> DSL.jsonExtract(jsonExpr, pathExpressions.getFirst()).valueOf();
case 2 -> DSL.jsonExtract(jsonExpr, pathExpressions.getFirst(), pathExpressions.get(1))
.valueOf();
case 3 -> DSL.jsonExtract(
jsonExpr, pathExpressions.getFirst(), pathExpressions.get(1), pathExpressions.get(2))
.valueOf();
default -> throw new TestInstantiationException("Invalid number of paths provided.");
};
}

private static Expression object_to_expr(Object val) {
return (val instanceof String)
? DSL.literal(ExprValueUtils.stringValue((String) val))
: DSL.literal((ExprValue) val);
}
}
13 changes: 7 additions & 6 deletions docs/user/ppl/functions/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,18 @@ ____________
Description
>>>>>>>>>>>

Usage: `json_extract(doc, path[, path])` Extracts a JSON value from a json document based on the path specified.
Usage: `json_extract(doc, path[, path[, path])` Extracts a JSON value from a json document based on the path(s) specified.

Argument type: STRING, STRING
Argument type: STRING, STRING[, STRING[, STRING]]

Return type: STRING/BOOLEAN/DOUBLE/INTEGER/NULL/STRUCT/ARRAY

- Up to 3 paths can be provided, and results of each `path` with be returned in an ARRAY.
- Returns an ARRAY if `path` points to multiple results (e.g. $.a[*]) or if the `path` points to an array.
- Up to 3 paths can be provided, and results of all possible `path`s will be returned in an ARRAY.
- If only one `path` is provided, returns an ARRAY if `path` points to multiple results (e.g. $.a[*]) or if the `path` points to an array.
- Return null if `path` is not valid, or if JSON `doc` is MISSING or NULL.
- Throws SemanticCheckException if `doc` or `path` is malformed.
- Throws ExpressionEvaluationException if `path` is missing.
- If multiple paths are provided with paths that are not valid, will return an ARRAY where results of invalid paths are null.
- Throws SemanticCheckException if `doc` or any `path` is malformed.
- Throws ExpressionEvaluationException if any `path` is missing.

Example::

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.opensearch.client.ResponseException;

public class JsonFunctionsIT extends PPLIntegTestCase {
@Override
Expand Down Expand Up @@ -130,6 +131,20 @@ public void test_json() throws IOException {
new JSONObject(Map.of("a", "1", "b", List.of(Map.of("c", "2"), Map.of("c", "3"))))));
}

@Test
public void test_json_throws_SemanticCheckException() throws IOException {
// Invalid json provided
try {
executeQuery(
String.format(
"source=%s | where not json_valid(json_string) | eval"
+ " casted=json(json_string) | fields test_name, casted",
TEST_INDEX_JSON_TEST));
} catch (ResponseException invalidJsonException) {
assertTrue(invalidJsonException.getMessage().contains("SemanticCheckException"));
}
}

@Test
public void test_cast_json_scalar_to_type() throws IOException {
// cast to integer
Expand Down Expand Up @@ -217,4 +232,74 @@ public void test_json_extract() throws IOException {
rows("json empty string", null),
rows("json nested list", new JSONArray(List.of(Map.of("c", "2"), Map.of("c", "3")))));
}

@Test
public void test_json_extract_multiple_paths() throws IOException {
JSONObject resultTwoPaths =
executeQuery(
String.format(
"source=%s | where test_name = 'json nested list' | eval"
+ " extracted=json_extract(json_string, '$.a', '$.b') | fields test_name,"
+ " extracted",
TEST_INDEX_JSON_TEST));
verifySchema(
resultTwoPaths,
schema("test_name", null, "string"),
schema("extracted", null, "undefined"));
verifyDataRows(
resultTwoPaths,
rows(
"json nested list",
List.of("1", new JSONArray(List.of(Map.of("c", "2"), Map.of("c", "3"))))));

JSONObject resultThreePaths =
executeQuery(
String.format(
"source=%s | where test_name = 'json nested list' | eval"
+ " extracted=json_extract(json_string, '$.a', '$.b[0].c', '$.b[1].c') | fields"
+ " test_name, extracted",
TEST_INDEX_JSON_TEST));
verifySchema(
resultThreePaths,
schema("test_name", null, "string"),
schema("extracted", null, "undefined"));
verifyDataRows(resultThreePaths, rows("json nested list", List.of("1", "2", "3")));
}

@Test
public void test_json_extract_throws_SemanticCheckException() throws IOException {
// Invalid json provided
try {
executeQuery(
String.format(
"source=%s | where not json_valid(json_string) | eval"
+ " extracted=json_extract(json_string, '$.a') | fields test_name, extracted",
TEST_INDEX_JSON_TEST));
} catch (ResponseException invalidJsonException) {
assertTrue(invalidJsonException.getMessage().contains("SemanticCheckException"));
}

// Invalid path provided
try {
executeQuery(
String.format(
"source=%s | where test_name = 'json nested list' | eval"
+ " extracted=json_extract(json_string, '$a') | fields test_name, extracted",
TEST_INDEX_JSON_TEST));
} catch (ResponseException invalidPathException) {
assertTrue(invalidPathException.getMessage().contains("SemanticCheckException"));
}

// Invalid path with multiple paths provided
try {
executeQuery(
String.format(
"source=%s | where test_name = 'json nested list' | eval"
+ " extracted=json_extract(json_string, '$a', '$.b') | fields test_name,"
+ " extracted",
TEST_INDEX_JSON_TEST));
} catch (ResponseException invalidPathException) {
assertTrue(invalidPathException.getMessage().contains("SemanticCheckException"));
}
}
}

0 comments on commit 591fb71

Please sign in to comment.