From 6114478c9cbaac920d88d1167e6d7c54d394ab09 Mon Sep 17 00:00:00 2001 From: cip999 Date: Fri, 13 Aug 2021 18:19:05 +0200 Subject: [PATCH 1/4] Improved support of jdbc format for nested fields in SELECT statements --- .../sql/legacy/PrettyFormatResponseIT.java | 29 ++++ .../sql/legacy/SQLIntegTestCase.java | 7 +- .../sql/legacy/TestUtils.java | 5 + .../sql/legacy/TestsConstants.java | 1 + .../executor/format/SelectResultSet.java | 144 ++++++++---------- 5 files changed, 108 insertions(+), 78 deletions(-) diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java index 5039e9e61e..5f6378ff7a 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java @@ -76,6 +76,7 @@ protected void init() throws Exception { loadIndex(Index.PHRASE); loadIndex(Index.GAME_OF_THRONES); loadIndex(Index.NESTED); + loadIndex(Index.BOOKS); } @Override @@ -518,6 +519,34 @@ public void fieldOrderOther() throws IOException { testFieldOrder(expectedFields, expectedValues); } + @Test + public void selectSubSubFieldOfNestedField() throws IOException { + JSONObject response = executeQuery( + String.format(Locale.ROOT, "SELECT nested(authors.info.name, authors), " + + "performance.revenue " + + "FROM %s", TestsConstants.TEST_INDEX_BOOKS)); + + List fields = Arrays.asList("authors.info.name", "performance.revenue"); + JSONArray dataRows = getDataRows(response); + assertContainsColumns(getSchema(response), fields); + assertContainsData(dataRows, fields); + assertEquals(3, dataRows.length()); + } + + @Test + public void selectMultipleNestedFields() throws IOException { + JSONObject response = executeQuery( + String.format(Locale.ROOT, "SELECT nested(authors.info.name, authors), " + + "nested(performance.sells.year) " + + "FROM %s", TestsConstants.TEST_INDEX_BOOKS)); + + List fields = Arrays.asList("authors.info.name", "performance.sells.year"); + JSONArray dataRows = getDataRows(response); + assertContainsColumns(getSchema(response), fields); + assertContainsData(dataRows, fields); + assertEquals(4, dataRows.length()); + } + private void testFieldOrder(final String[] expectedFields, final Object[] expectedValues) throws IOException { diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java index 9a10d3f054..df10a4f1bf 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java @@ -41,6 +41,7 @@ import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getResponseBody; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getStringIndexMapping; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getWeblogsIndexMapping; +import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getBooksIndexMapping; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.isIndexExist; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.loadDataByRestClient; import static com.amazon.opendistroforelasticsearch.sql.legacy.plugin.RestSqlAction.CURSOR_CLOSE_ENDPOINT; @@ -556,7 +557,11 @@ public enum Index { DATA_TYPE_NONNUMERIC(TestsConstants.TEST_INDEX_DATATYPE_NONNUMERIC, "_doc", getDataTypeNonnumericIndexMapping(), - "src/test/resources/datatypes.json"); + "src/test/resources/datatypes.json"), + BOOKS(TestsConstants.TEST_INDEX_BOOKS, + "nestedType", + getBooksIndexMapping(), + "src/test/resources/books.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java index 1bb43bfb8d..d7662cd494 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java @@ -253,6 +253,11 @@ public static String getDataTypeNonnumericIndexMapping() { return getMappingFile(mappingFile); } + public static String getBooksIndexMapping() { + String mappingFile = "books_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into elasticsearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java index a4aec50d98..120de484e0 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java @@ -60,6 +60,7 @@ public class TestsConstants { public final static String TEST_INDEX_STRINGS = TEST_INDEX + "_strings"; public final static String TEST_INDEX_DATATYPE_NUMERIC = TEST_INDEX + "_datatypes_numeric"; public final static String TEST_INDEX_DATATYPE_NONNUMERIC = TEST_INDEX + "_datatypes_nonnumeric"; + public final static String TEST_INDEX_BOOKS = TEST_INDEX + "_nested"; public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java b/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java index a6f4dadeef..231236a5d4 100644 --- a/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java +++ b/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java @@ -579,6 +579,7 @@ private void extractData() { if (queryResult instanceof SearchHits) { SearchHits searchHits = (SearchHits) queryResult; + this.rows = new ArrayList<>(); this.rows = populateRows(searchHits); this.size = rows.size(); this.internalTotalHits = Optional.ofNullable(searchHits.getTotalHits()).map(th -> th.value).orElse(0L); @@ -643,14 +644,30 @@ private long rowsLeft(Integer fetchSize, Integer limit) { private List populateRows(SearchHits searchHits) { List rows = new ArrayList<>(); - Set newKeys = new HashSet<>(head); + List> rowsAsMap = populateRows(head, searchHits); + for (Map rowAsMap : rowsAsMap) { + rows.add(new DataRows.Row(rowAsMap)); + } + return rows; + } + + /** + * Returns a list of "flattened" rows as (path, value) hashmaps. + * The core idea is to flatten non-nested hits, and recur on inner hits + * by calling the method flatNestedField(), which in turn calls back + * populateRows. + */ + private List> populateRows(List keys, SearchHits searchHits) { + List> rows = new ArrayList<>(); for (SearchHit hit : searchHits) { Map rowSource = hit.getSourceAsMap(); - List result; + List> result = new ArrayList<>(); if (!isJoinQuery()) { // Row already flatten in source in join. And join doesn't support nested fields for now. - rowSource = flatRow(head, rowSource); + result = flatNestedField(keys, hit.getInnerHits()); + + rowSource = flatRow(keys, rowSource); rowSource.put(SCORE, hit.getScore()); for (Map.Entry field : hit.getFields().entrySet()) { @@ -659,13 +676,15 @@ private List populateRows(SearchHits searchHits) { if (formatType.equalsIgnoreCase(Format.JDBC.getFormatName())) { dateFieldFormatter.applyJDBCDateFormat(rowSource); } - result = flatNestedField(newKeys, rowSource, hit.getInnerHits()); + + for (Map row : result) { + row.putAll(rowSource); + } } else { if (formatType.equalsIgnoreCase(Format.JDBC.getFormatName())) { dateFieldFormatter.applyJDBCDateFormat(rowSource); } - result = new ArrayList<>(); - result.add(new DataRows.Row(rowSource)); + result.add(rowSource); } rows.addAll(result); @@ -781,6 +800,7 @@ private Map addNumericAggregation(List aggs, Map flatRow(List keys, Map row) { Map flattenedRow = new HashMap<>(); for (String key : keys) { @@ -813,94 +833,64 @@ private Map flatRow(List keys, Map row) } /** - * If innerHits associated with column name exists, flatten both the inner field name and the inner rows in it. - *

- * Sample input: - * newKeys = {'region', 'employees.age'}, row = {'region': 'US'} - * innerHits = employees: { - * hits: [{ - * source: { - * age: 26, - * firstname: 'Hank' - * } - * },{ - * source: { - * age: 30, - * firstname: 'John' - * } - * }] - * } + * Flattens the inner hits passed as argument. + * For each inner hit, iterates on its column names colName and computes a list of new keys. + * The new keys are the original keys that begin with colName + ".", stripped of that prefix. + * Then, calls populateRows on the new keys and the hits associated to colName. + * This also works for nested fields which contain another nested field as a subfield. */ - private List flatNestedField(Set newKeys, Map row, - Map innerHits) { - List result = new ArrayList<>(); - result.add(new DataRows.Row(row)); + private List> flatNestedField(List keys, Map innerHits) { + List> result = new ArrayList<>(); + result.add(new HashMap<>()); if (innerHits == null) { return result; } for (String colName : innerHits.keySet()) { - SearchHit[] colValue = innerHits.get(colName).getHits(); - doFlatNestedFieldName(colName, colValue, newKeys); - result = doFlatNestedFieldValue(colName, colValue, result); - } + List newKeys = new ArrayList<>(); + for (String key : keys) { + if (key.startsWith(colName) && !key.equals(colName)) { + newKeys.add(key.substring(colName.length() + 1)); + } + } - return result; - } + List> innerResult = populateRows(newKeys, innerHits.get(colName)); + for (Map innerRow : innerResult) { + Map row = new HashMap<>(); + for (String path : innerRow.keySet()) { + if (!path.equals(SCORE)) { + row.put(colName + "." + path, innerRow.get(path)); + } else { + row.put(path, innerRow.get(path)); + } + } + innerRow.clear(); + innerRow.putAll(row); + } - private void doFlatNestedFieldName(String colName, SearchHit[] colValue, Set keys) { - Map innerRow = colValue[0].getSourceAsMap(); - for (String field : innerRow.keySet()) { - String innerName = colName + "." + field; - keys.add(innerName); + // In the case of multiple sets of inner hits, returns all possible combinations of entries + result = cartesianProduct(result, innerResult); } - keys.remove(colName); + return result; } /** - * Do Cartesian Product between current outer row and inner rows by nested loop and remove original outer row. - *

- * Sample input: - * colName = 'employees', rows = [{region: 'US'}] - * colValue= [{ - * source: { - * age: 26, - * firstname: 'Hank' - * } - * },{ - * source: { - * age: 30, - * firstname: 'John' - * } - * }] - *

- * Return: - * [ - * {region:'US', employees.age:26, employees.firstname:'Hank'}, - * {region:'US', employees.age:30, employees.firstname:'John'} - * ] + * Performs the "cartesian product" between two sets of rows, + * which is the set of all possible unions of a row in rowsLeft and a row in rowsRight. */ - private List doFlatNestedFieldValue(String colName, SearchHit[] colValue, List rows) { - List result = new ArrayList<>(); - for (DataRows.Row row : rows) { - for (SearchHit hit : colValue) { - Map innerRow = hit.getSourceAsMap(); - Map copy = new HashMap<>(); - - for (String field : row.getContents().keySet()) { - copy.put(field, row.getData(field)); - } - for (String field : innerRow.keySet()) { - copy.put(colName + "." + field, innerRow.get(field)); - } - - copy.remove(colName); - result.add(new DataRows.Row(copy)); + List> cartesianProduct(List> rowsLeft, + List> rowsRight) { + List> result = new ArrayList<>(); + for (Map rowLeft : rowsLeft) { + for (Map rowRight : rowsRight) { + Map union = new HashMap<>(); + union.putAll(rowLeft); + union.putAll(rowRight); + result.add(union); } } - return result; } From 1a9d1aa477d72ffd1289a59e311ed7bf928328cb Mon Sep 17 00:00:00 2001 From: cip999 Date: Fri, 13 Aug 2021 18:20:23 +0200 Subject: [PATCH 2/4] Improved support of jdbc format for nested fields in SELECT statements --- integ-test/src/test/resources/books.json | 4 ++ .../indexDefinitions/books_index_mapping.json | 61 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 integ-test/src/test/resources/books.json create mode 100644 integ-test/src/test/resources/indexDefinitions/books_index_mapping.json diff --git a/integ-test/src/test/resources/books.json b/integ-test/src/test/resources/books.json new file mode 100644 index 0000000000..f6dd1c3286 --- /dev/null +++ b/integ-test/src/test/resources/books.json @@ -0,0 +1,4 @@ +{"index": {"_id": "1"}} +{"authors": [{"id": 8634, "info": {"name": "Andrea", "age": 22}}, {"id": 836, "info": {"name": "Carmen", "age": 19}}], "performance": {"revenue": 932.88, "sells": [{"year": 2018, "quantity": {"italy": 1000000, "abroad": 500000}}, {"year": 2020, "quantity": {"italy": 2001002, "abroad": 0}}]}} +{"index": {"_id": "2"}} +{"authors": [{"id": 8, "info": {"name": "Mark", "age": 80}}], "performance": {"revenue": 555.0, "sells": []}} diff --git a/integ-test/src/test/resources/indexDefinitions/books_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/books_index_mapping.json new file mode 100644 index 0000000000..2db565c72a --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/books_index_mapping.json @@ -0,0 +1,61 @@ +{ + "mappings": { + "properties": { + "authors": { + "type": "nested", + "properties": { + "id": { + "type": "long", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "info": { + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "age": { + "type": "long" + } + } + } + } + }, + "performance": { + "properties": { + "revenue": { + "type": "double" + }, + "sells": { + "type": "nested", + "properties": { + "year": { + "type": "long" + }, + "quantity": { + "properties": { + "italy": { + "type": "long" + }, + "abroad": { + "type": "long" + } + } + } + } + } + } + } + } + } +} From 880c258e154fe126313fe2285122158c16d7d303 Mon Sep 17 00:00:00 2001 From: cip999 Date: Sun, 15 Aug 2021 13:40:38 +0200 Subject: [PATCH 3/4] Now jdbc format takes into account limit (if it exists) when flattening rows --- .../sql/legacy/PrettyFormatResponseIT.java | 18 +++++++ .../executor/format/SelectResultSet.java | 50 +++++++++++++++---- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java index 5f6378ff7a..c496bbf95b 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java @@ -547,6 +547,24 @@ public void selectMultipleNestedFields() throws IOException { assertEquals(4, dataRows.length()); } + @Test + public void nestedFieldWithLimit() throws IOException { + JSONObject response = executeQuery( + String.format(Locale.ROOT, "SELECT nested(authors.info.name, authors) " + + "FROM %s LIMIT 1", TestsConstants.TEST_INDEX_BOOKS)); + assertEquals(1, getDataRows(response).length()); + } + + @Test + public void multipleNestedFieldsWithLimit() throws IOException, InterruptedException { + JSONObject response = executeQuery( + String.format(Locale.ROOT, "SELECT nested(authors.info.name, authors), " + + "nested(performance.sells.year)" + + "FROM %s LIMIT 3", TestsConstants.TEST_INDEX_BOOKS)); + assertEquals(3, getDataRows(response).length()); + } + + private void testFieldOrder(final String[] expectedFields, final Object[] expectedValues) throws IOException { diff --git a/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java b/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java index 231236a5d4..dfdf53fb58 100644 --- a/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java +++ b/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java @@ -87,6 +87,7 @@ public class SelectResultSet extends ResultSet { private long size; private long totalHits; private long internalTotalHits; + private Integer rowCount; private List rows; private Cursor cursor; @@ -118,6 +119,9 @@ public SelectResultSet(Client client, this.schema = new Schema(indexName, typeName, columns); this.head = schema.getHeaders(); this.dateFieldFormatter = new DateFieldFormatter(indexName, columns, fieldAliasMap); + if (query instanceof Select) { + this.rowCount = ((Select) query).getRowCount(); + } extractData(); populateCursor(); @@ -579,7 +583,6 @@ private void extractData() { if (queryResult instanceof SearchHits) { SearchHits searchHits = (SearchHits) queryResult; - this.rows = new ArrayList<>(); this.rows = populateRows(searchHits); this.size = rows.size(); this.internalTotalHits = Optional.ofNullable(searchHits.getTotalHits()).map(th -> th.value).orElse(0L); @@ -644,7 +647,7 @@ private long rowsLeft(Integer fetchSize, Integer limit) { private List populateRows(SearchHits searchHits) { List rows = new ArrayList<>(); - List> rowsAsMap = populateRows(head, searchHits); + List> rowsAsMap = populateRows(head, searchHits, rowCount); for (Map rowAsMap : rowsAsMap) { rows.add(new DataRows.Row(rowAsMap)); } @@ -657,15 +660,23 @@ private List populateRows(SearchHits searchHits) { * by calling the method flatNestedField(), which in turn calls back * populateRows. */ - private List> populateRows(List keys, SearchHits searchHits) { + private List> populateRows(List keys, SearchHits searchHits, Integer rowsLeft) { List> rows = new ArrayList<>(); + for (SearchHit hit : searchHits) { + if (rowsLeft != null && rowsLeft == 0) { + return rows; + } + Map rowSource = hit.getSourceAsMap(); List> result = new ArrayList<>(); if (!isJoinQuery()) { // Row already flatten in source in join. And join doesn't support nested fields for now. - result = flatNestedField(keys, hit.getInnerHits()); + result = flatNestedField(keys, hit.getInnerHits(), rowsLeft); + if (rowsLeft != null) { + rowsLeft -= result.size(); + } rowSource = flatRow(keys, rowSource); rowSource.put(SCORE, hit.getScore()); @@ -685,6 +696,9 @@ private List> populateRows(List keys, SearchHits sea dateFieldFormatter.applyJDBCDateFormat(rowSource); } result.add(rowSource); + if (rowsLeft != null) { + rowsLeft--; + } } rows.addAll(result); @@ -839,7 +853,9 @@ private Map flatRow(List keys, Map row) * Then, calls populateRows on the new keys and the hits associated to colName. * This also works for nested fields which contain another nested field as a subfield. */ - private List> flatNestedField(List keys, Map innerHits) { + private List> flatNestedField(List keys, + Map innerHits, + Integer rowsLeft) { List> result = new ArrayList<>(); result.add(new HashMap<>()); @@ -855,7 +871,9 @@ private List> flatNestedField(List keys, Map> innerResult = populateRows(newKeys, innerHits.get(colName)); + List> innerResult = populateRows(newKeys, innerHits.get(colName), + (rowsLeft == null) ? null : + ((result.isEmpty() ? 0 : ratioCeil(rowsLeft, result.size())))); for (Map innerRow : innerResult) { Map row = new HashMap<>(); for (String path : innerRow.keySet()) { @@ -870,7 +888,7 @@ private List> flatNestedField(List keys, Map> flatNestedField(List keys, Map> cartesianProduct(List> rowsLeft, - List> rowsRight) { + private List> cartesianProduct(List> rowsLeft, + List> rowsRight, + Integer maxRows) { List> result = new ArrayList<>(); for (Map rowLeft : rowsLeft) { + if (maxRows != null && result.size() == maxRows) { + break; + } + for (Map rowRight : rowsRight) { + if (maxRows != null && result.size() == maxRows) { + break; + } + Map union = new HashMap<>(); union.putAll(rowLeft); union.putAll(rowRight); result.add(union); } } + return result; } + private int ratioCeil(int a, int b) { + return (a + b - 1) / b; + } + private Map addMap(String field, Object term) { Map data = new HashMap<>(); data.put(field, term); From 5072613f9e7f4aa1bce4119a103827593e31293f Mon Sep 17 00:00:00 2001 From: cip999 Date: Sun, 15 Aug 2021 16:43:54 +0200 Subject: [PATCH 4/4] Updated some variable names for clarity's sake --- .../sql/legacy/PrettyFormatResponseIT.java | 2 +- .../sql/legacy/SQLIntegTestCase.java | 2 +- .../sql/legacy/TestsConstants.java | 2 +- .../executor/format/SelectResultSet.java | 49 ++++++++++++------- 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java index c496bbf95b..0cff5986ee 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/PrettyFormatResponseIT.java @@ -76,7 +76,7 @@ protected void init() throws Exception { loadIndex(Index.PHRASE); loadIndex(Index.GAME_OF_THRONES); loadIndex(Index.NESTED); - loadIndex(Index.BOOKS); + loadIndex(Index.BOOKS_NESTED_WITH_SUBPROPERTIES); } @Override diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java index df10a4f1bf..63f7e4b4f4 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java @@ -558,7 +558,7 @@ public enum Index { "_doc", getDataTypeNonnumericIndexMapping(), "src/test/resources/datatypes.json"), - BOOKS(TestsConstants.TEST_INDEX_BOOKS, + BOOKS_NESTED_WITH_SUBPROPERTIES(TestsConstants.TEST_INDEX_BOOKS, "nestedType", getBooksIndexMapping(), "src/test/resources/books.json"); diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java index 120de484e0..c8a184ce97 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java @@ -60,7 +60,7 @@ public class TestsConstants { public final static String TEST_INDEX_STRINGS = TEST_INDEX + "_strings"; public final static String TEST_INDEX_DATATYPE_NUMERIC = TEST_INDEX + "_datatypes_numeric"; public final static String TEST_INDEX_DATATYPE_NONNUMERIC = TEST_INDEX + "_datatypes_nonnumeric"; - public final static String TEST_INDEX_BOOKS = TEST_INDEX + "_nested"; + public final static String TEST_INDEX_BOOKS = TEST_INDEX + "_nested_subproperties"; public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java b/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java index dfdf53fb58..18ced20fa4 100644 --- a/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java +++ b/legacy/src/main/java/com/amazon/opendistroforelasticsearch/sql/legacy/executor/format/SelectResultSet.java @@ -87,7 +87,7 @@ public class SelectResultSet extends ResultSet { private long size; private long totalHits; private long internalTotalHits; - private Integer rowCount; + private Integer limitRowCount; private List rows; private Cursor cursor; @@ -120,7 +120,7 @@ public SelectResultSet(Client client, this.head = schema.getHeaders(); this.dateFieldFormatter = new DateFieldFormatter(indexName, columns, fieldAliasMap); if (query instanceof Select) { - this.rowCount = ((Select) query).getRowCount(); + this.limitRowCount = ((Select) query).getRowCount(); } extractData(); @@ -647,7 +647,7 @@ private long rowsLeft(Integer fetchSize, Integer limit) { private List populateRows(SearchHits searchHits) { List rows = new ArrayList<>(); - List> rowsAsMap = populateRows(head, searchHits, rowCount); + List> rowsAsMap = populateRows(head, searchHits, limitRowCount); for (Map rowAsMap : rowsAsMap) { rows.add(new DataRows.Row(rowAsMap)); } @@ -659,12 +659,27 @@ private List populateRows(SearchHits searchHits) { * The core idea is to flatten non-nested hits, and recur on inner hits * by calling the method flatNestedField(), which in turn calls back * populateRows. + *

+ * Sample input: + * keys = {"authors.info.name"} (authors is nested) + * searchHits = { + * { + * key = "authors" + * innerHits = { + * {"info.name": "Andrea"}, + * {"info.name": "Carmen} + * } + * } + * } + * Sample output: + * rows = [{"authors.info.name": "Andrea"}, {"authors.info.name": "Carmen"}] + *

*/ - private List> populateRows(List keys, SearchHits searchHits, Integer rowsLeft) { + private List> populateRows(List keys, SearchHits searchHits, Integer remainingRows) { List> rows = new ArrayList<>(); for (SearchHit hit : searchHits) { - if (rowsLeft != null && rowsLeft == 0) { + if (remainingRows != null && remainingRows == 0) { return rows; } @@ -673,9 +688,9 @@ private List> populateRows(List keys, SearchHits sea if (!isJoinQuery()) { // Row already flatten in source in join. And join doesn't support nested fields for now. - result = flatNestedField(keys, hit.getInnerHits(), rowsLeft); - if (rowsLeft != null) { - rowsLeft -= result.size(); + result = flatNestedField(keys, hit.getInnerHits(), remainingRows); + if (remainingRows != null) { + remainingRows -= result.size(); } rowSource = flatRow(keys, rowSource); @@ -696,8 +711,8 @@ private List> populateRows(List keys, SearchHits sea dateFieldFormatter.applyJDBCDateFormat(rowSource); } result.add(rowSource); - if (rowsLeft != null) { - rowsLeft--; + if (remainingRows != null) { + remainingRows--; } } @@ -855,7 +870,7 @@ private Map flatRow(List keys, Map row) */ private List> flatNestedField(List keys, Map innerHits, - Integer rowsLeft) { + Integer remainingRows) { List> result = new ArrayList<>(); result.add(new HashMap<>()); @@ -872,8 +887,8 @@ private List> flatNestedField(List keys, } List> innerResult = populateRows(newKeys, innerHits.get(colName), - (rowsLeft == null) ? null : - ((result.isEmpty() ? 0 : ratioCeil(rowsLeft, result.size())))); + (remainingRows == null) ? null : + ((result.isEmpty() ? 0 : ratioCeil(remainingRows, result.size())))); for (Map innerRow : innerResult) { Map row = new HashMap<>(); for (String path : innerRow.keySet()) { @@ -888,7 +903,7 @@ private List> flatNestedField(List keys, } // In the case of multiple sets of inner hits, returns all possible combinations of entries - result = cartesianProduct(result, innerResult, rowsLeft); + result = cartesianProduct(result, innerResult, remainingRows); } return result; @@ -900,15 +915,15 @@ private List> flatNestedField(List keys, */ private List> cartesianProduct(List> rowsLeft, List> rowsRight, - Integer maxRows) { + Integer remainingRows) { List> result = new ArrayList<>(); for (Map rowLeft : rowsLeft) { - if (maxRows != null && result.size() == maxRows) { + if (remainingRows != null && result.size() == remainingRows) { break; } for (Map rowRight : rowsRight) { - if (maxRows != null && result.size() == maxRows) { + if (remainingRows != null && result.size() == remainingRows) { break; }