From 681bd433ba910ab2d81c288933a1b396ae87bbdd Mon Sep 17 00:00:00 2001 From: vector-of-bool Date: Mon, 1 Aug 2022 23:38:08 +0000 Subject: [PATCH 1/5] Detect iteration failure in bson_validate() If bson_iter_visit_all() sets an error offset in the iterator, store that as the error offset for the validation. --- src/libbson/src/bson/bson.c | 13 ++++++++++++- src/libbson/tests/binary/invalid-utf8.bson | Bin 0 -> 18 bytes src/libbson/tests/test-bson.c | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 src/libbson/tests/binary/invalid-utf8.bson diff --git a/src/libbson/src/bson/bson.c b/src/libbson/src/bson/bson.c index 87e76c3ae9..93b540d232 100644 --- a/src/libbson/src/bson/bson.c +++ b/src/libbson/src/bson/bson.c @@ -3584,7 +3584,18 @@ _bson_iter_validate_document (const bson_iter_t *iter, state->phase = BSON_VALIDATE_PHASE_LF_REF_KEY; } - (void) bson_iter_visit_all (&child, &bson_validate_funcs, state); + bson_iter_visit_all (&child, &bson_validate_funcs, state); + + if (child.err_off > 0 && state->err_offset < 0) { + // Iteration on a direct element of 'child' failed + state->err_offset = child.err_off; + bson_set_error (&state->error, + BSON_ERROR_INVALID, + BSON_VALIDATE_NONE, + "Iteration failed on element at offset %d", + (int) (iter->off + child.err_off)); + return true; + } if (state->phase == BSON_VALIDATE_PHASE_LF_ID_KEY || state->phase == BSON_VALIDATE_PHASE_LF_REF_UTF8 || diff --git a/src/libbson/tests/binary/invalid-utf8.bson b/src/libbson/tests/binary/invalid-utf8.bson new file mode 100644 index 0000000000000000000000000000000000000000..18a2674aa60d354690a8ef62a8a10de9dd8adb21 GIT binary patch literal 18 XcmWe)U|?WMWMBhQiHZLc6B!r)6EFh* literal 0 HcmV?d00001 diff --git a/src/libbson/tests/test-bson.c b/src/libbson/tests/test-bson.c index 33443642da..0259b022c6 100644 --- a/src/libbson/tests/test-bson.c +++ b/src/libbson/tests/test-bson.c @@ -1198,6 +1198,14 @@ test_bson_validate (void) 9, BSON_VALIDATE_NONE, "corrupt BSON"); + // bson_iter_visit_all does UTF-8 validation before the validator gets a + // chance to do its own checks, so we can't actually use the UTF8 validation + // flag, and we can't tell what actually caused the error. + VALIDATE_TEST ("invalid-utf8.bson", + BSON_VALIDATE_NONE, + 4, + BSON_VALIDATE_NONE, + "Iteration failed on element at offset 4"); VALIDATE_TEST ("overflow4.bson", BSON_VALIDATE_NONE, 9, From fb382614f401d37a94487ee5ec08aa17db1a64ee Mon Sep 17 00:00:00 2001 From: vector-of-bool Date: Tue, 2 Aug 2022 17:26:19 +0000 Subject: [PATCH 2/5] More control over bson_iter_visit validation --- src/libbson/src/bson/bson-iter.c | 71 +++++++++++++++++++++----------- src/libbson/src/bson/bson-iter.h | 45 +++++++++++++++++++- 2 files changed, 91 insertions(+), 25 deletions(-) diff --git a/src/libbson/src/bson/bson-iter.c b/src/libbson/src/bson/bson-iter.c index 7b4630f94c..3b4bf2dd00 100644 --- a/src/libbson/src/bson/bson-iter.c +++ b/src/libbson/src/bson/bson-iter.c @@ -1888,23 +1888,34 @@ bson_iter_array (const bson_iter_t *iter, /* IN */ #define VISIT_MAXKEY VISIT_FIELD (maxkey) #define VISIT_MINKEY VISIT_FIELD (minkey) +bool +bson_iter_visit_all (bson_iter_t *iter, + const bson_visitor_t *visitor, + void *data) +{ + return bson_iter_visit_all_v2 (iter, visitor, BSON_ITER_VISIT_DEFAULT, data); +} bool -bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ - const bson_visitor_t *visitor, /* IN */ - void *data) /* IN */ +bson_iter_visit_all_v2 (bson_iter_t *iter, + const bson_visitor_t *visitor, + enum bson_iter_visit_flags flags, + void *data) { uint32_t bson_type = 0; const char *key = NULL; bool unsupported; + const bool allow_nul = flags & BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8; BSON_ASSERT (iter); BSON_ASSERT (visitor); while (_bson_iter_next_internal (iter, 0, &key, &bson_type, &unsupported)) { - if (*key && !bson_utf8_validate (key, strlen (key), false)) { - iter->err_off = iter->off; - break; + if (flags & BSON_ITER_VISIT_VALIDATE_KEYS) { + if (*key && !bson_utf8_validate (key, strlen (key), false)) { + iter->err_off = iter->off; + break; + } } if (VISIT_BEFORE (iter, key, data)) { @@ -1925,9 +1936,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ utf8 = bson_iter_utf8 (iter, &utf8_len); - if (!bson_utf8_validate (utf8, utf8_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_UTF8) { + if (!bson_utf8_validate (utf8, utf8_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_UTF8 (iter, key, utf8_len, utf8, data)) { @@ -2009,9 +2022,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ const char *options = NULL; regex = bson_iter_regex (iter, &options); - if (!bson_utf8_validate (regex, strlen (regex), true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_REGEX) { + if (!bson_utf8_validate (regex, strlen (regex), allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_REGEX (iter, key, regex, options, data)) { @@ -2025,9 +2040,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ bson_iter_dbpointer (iter, &collection_len, &collection, &oid); - if (!bson_utf8_validate (collection, collection_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_DBPOINTER) { + if (!bson_utf8_validate (collection, collection_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_DBPOINTER ( @@ -2041,9 +2058,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ code = bson_iter_code (iter, &code_len); - if (!bson_utf8_validate (code, code_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_CODE) { + if (!bson_utf8_validate (code, code_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_CODE (iter, key, code_len, code, data)) { @@ -2056,9 +2075,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ symbol = bson_iter_symbol (iter, &symbol_len); - if (!bson_utf8_validate (symbol, symbol_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_SYMBOL) { + if (!bson_utf8_validate (symbol, symbol_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_SYMBOL (iter, key, symbol_len, symbol, data)) { @@ -2074,9 +2095,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ code = bson_iter_codewscope (iter, &length, &doclen, &docbuf); - if (!bson_utf8_validate (code, length, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_CODE) { + if (!bson_utf8_validate (code, length, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (bson_init_static (&b, docbuf, doclen) && diff --git a/src/libbson/src/bson/bson-iter.h b/src/libbson/src/bson/bson-iter.h index 92e1968f74..ed8ee004c1 100644 --- a/src/libbson/src/bson/bson-iter.h +++ b/src/libbson/src/bson/bson-iter.h @@ -511,7 +511,8 @@ bson_iter_overwrite_double (bson_iter_t *iter, double value); BSON_EXPORT (void) -bson_iter_overwrite_decimal128 (bson_iter_t *iter, const bson_decimal128_t *value); +bson_iter_overwrite_decimal128 (bson_iter_t *iter, + const bson_decimal128_t *value); BSON_EXPORT (void) @@ -537,6 +538,48 @@ bson_iter_visit_all (bson_iter_t *iter, const bson_visitor_t *visitor, void *data); +/** + * @brief Flag options to control bson_iter_visit APIs + */ +enum bson_iter_visit_flags { + BSON_ITER_VISIT_NOFLAGS = 0, + /// Validate element keys to be valid UTF-8 + BSON_ITER_VISIT_VALIDATE_KEYS = 1 << 0, + /// Validate UTF-8 elements to be valid UTF-8 + BSON_ITER_VISIT_VALIDATE_UTF8 = 1 << 1, + /// Validate regular expression elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_REGEX = 1 << 2, + /// Validate code and code_w_scope elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_CODE = 1 << 3, + /// Validate dbPointer elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_DBPOINTER = 1 << 4, + /// Validate symbol elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_SYMBOL = 1 << 5, + /// When checking UTF-8 strings, allow a NUL as a valid codepoint + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8 = 1 << 6, + /// Validate all strings in the visited BSON to be valid UTF-8 strings + BSON_ITER_VISIT_VALIDATE_STRINGS = + BSON_ITER_VISIT_VALIDATE_KEYS | BSON_ITER_VISIT_VALIDATE_UTF8 | + BSON_ITER_VISIT_VALIDATE_REGEX | BSON_ITER_VISIT_VALIDATE_CODE | + BSON_ITER_VISIT_VALIDATE_DBPOINTER | BSON_ITER_VISIT_VALIDATE_SYMBOL, + /// Validate all element values to be valid (does not validate keys) + BSON_ITER_VISIT_VALIDATE_VALUES = + BSON_ITER_VISIT_VALIDATE_UTF8 | BSON_ITER_VISIT_VALIDATE_REGEX | + BSON_ITER_VISIT_VALIDATE_CODE | BSON_ITER_VISIT_VALIDATE_DBPOINTER | + BSON_ITER_VISIT_VALIDATE_SYMBOL, + /// The default option for bson_iter_visit_all_v2 (validates all keys and + /// values, allows NUL in UTF-8 strings) + BSON_ITER_VISIT_DEFAULT = BSON_ITER_VISIT_VALIDATE_KEYS | + BSON_ITER_VISIT_VALIDATE_VALUES | + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8, +}; + +BSON_EXPORT (bool) +bson_iter_visit_all_v2 (bson_iter_t *iter, + const bson_visitor_t *visitor, + enum bson_iter_visit_flags flags, + void *data); + BSON_EXPORT (uint32_t) bson_iter_offset (bson_iter_t *iter); From be101cdb2a8420f426291b0f3610e80af3016a3b Mon Sep 17 00:00:00 2001 From: vector-of-bool Date: Thu, 4 Aug 2022 19:57:14 +0000 Subject: [PATCH 3/5] Document visit_all_v2 API --- src/libbson/doc/bson_iter_t.rst | 1 + src/libbson/doc/bson_iter_visit_all.rst | 6 +- src/libbson/doc/bson_iter_visit_all_v2.rst | 118 +++++++++++++++++++++ 3 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 src/libbson/doc/bson_iter_visit_all_v2.rst diff --git a/src/libbson/doc/bson_iter_t.rst b/src/libbson/doc/bson_iter_t.rst index 620388f775..89832351ab 100644 --- a/src/libbson/doc/bson_iter_t.rst +++ b/src/libbson/doc/bson_iter_t.rst @@ -132,6 +132,7 @@ The :symbol:`bson_t` *MUST* be valid for the lifetime of the iter and it is an e bson_iter_utf8 bson_iter_value bson_iter_visit_all + bson_iter_visit_all_v2 Examples -------- diff --git a/src/libbson/doc/bson_iter_visit_all.rst b/src/libbson/doc/bson_iter_visit_all.rst index e7e35231fe..f2a763e5d0 100644 --- a/src/libbson/doc/bson_iter_visit_all.rst +++ b/src/libbson/doc/bson_iter_visit_all.rst @@ -23,7 +23,11 @@ Parameters Description ----------- -A convenience function to iterate all remaining fields of ``iter`` using the callback vtable provided by ``visitor``. +A convenience function to iterate all remaining fields of ``iter`` using the +callback vtable provided by ``visitor``. + +This function is equivalent to :symbol:`bson_iter_visit_all_v2` with +``BSON_ITER_VISIT_DEFAULT`` given for its ``flags`` argument. Returns ------- diff --git a/src/libbson/doc/bson_iter_visit_all_v2.rst b/src/libbson/doc/bson_iter_visit_all_v2.rst new file mode 100644 index 0000000000..4fadd03c8a --- /dev/null +++ b/src/libbson/doc/bson_iter_visit_all_v2.rst @@ -0,0 +1,118 @@ +:man_page: bson_iter_visit_all_v2 + +bson_iter_visit_all_v2() +======================== + +Synopsis +-------- + +.. code-block:: c + + enum bson_iter_visit_flags { + BSON_ITER_VISIT_NOFLAGS = 0, + BSON_ITER_VISIT_VALIDATE_KEYS = 1 << 0, + BSON_ITER_VISIT_VALIDATE_UTF8 = 1 << 1, + BSON_ITER_VISIT_VALIDATE_REGEX = 1 << 2, + BSON_ITER_VISIT_VALIDATE_CODE = 1 << 3, + BSON_ITER_VISIT_VALIDATE_DBPOINTER = 1 << 4, + BSON_ITER_VISIT_VALIDATE_SYMBOL = 1 << 5, + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8 = 1 << 6, + BSON_ITER_VISIT_VALIDATE_STRINGS = + BSON_ITER_VISIT_VALIDATE_KEYS | BSON_ITER_VISIT_VALIDATE_UTF8 | + BSON_ITER_VISIT_VALIDATE_REGEX | BSON_ITER_VISIT_VALIDATE_CODE | + BSON_ITER_VISIT_VALIDATE_DBPOINTER | BSON_ITER_VISIT_VALIDATE_SYMBOL, + BSON_ITER_VISIT_VALIDATE_VALUES = + BSON_ITER_VISIT_VALIDATE_UTF8 | BSON_ITER_VISIT_VALIDATE_REGEX | + BSON_ITER_VISIT_VALIDATE_CODE | BSON_ITER_VISIT_VALIDATE_DBPOINTER | + BSON_ITER_VISIT_VALIDATE_SYMBOL, + BSON_ITER_VISIT_DEFAULT = BSON_ITER_VISIT_VALIDATE_KEYS | + BSON_ITER_VISIT_VALIDATE_VALUES | + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8, + }; + + bool + bson_iter_visit_all_v2 (bson_iter_t *iter, + const bson_visitor_t *visitor, + const bson_iter_visit_flags flags. + void *data); + +Parameters +---------- + +* ``iter``: A :symbol:`bson_iter_t`. +* ``visitor``: A :symbol:`bson_visitor_t`. +* ``flags``: A set of bit flags from ``bson_iter_visit_flags`` to control + visitation (See below). +* ``data``: Optional data for ``visitor``. + +Description +----------- + +A convenience function to iterate all remaining fields of ``iter`` using the +callback vtable provided by ``visitor``. + +This function is an extension of :symbol:`bson_iter_visit_all`, with the +additional ``flags`` parameter that can be used to control the behavior of the +visit iteration. The following flags are defined: + +``BSON_ITER_VISIT_NOFLAGS`` + + None of the behaviors requested by any other options are used. + +``BSON_ITER_VISIT_VALIDATE_KEYS`` + + Validate that element keys are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_VALIDATE_UTF8`` + + Validate that UTF-8 text elements are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_VALIDATE_REGEX`` + + Validate regular expression elements to contain valid UTF-8 encoded strings. + +``BSON_ITER_VISIT_VALIDATE_CODE`` + + Validate code elements that the code component is a valid UTF-8-encoded + string. + +``BSON_ITER_VISIT_VALIDATE_DBPOINTER`` + + Validate that DBPointer element names are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_VALIDATE_SYMBOL`` + + Validate that symbol elements are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8`` + + When validating any UTF-8 strings, permit a zero ``0x00`` code unit as a valid + UTF-8 code unit. + +``BSON_ITER_VISIT_VALIDATE_STRINGS`` + + Validate all strings in all components in top-level elements of the document. + +``BSON_ITER_VISIT_VALIDATE_VALUES`` + + Validate all values in all top-level elements of the document. + +``BSON_ITER_VISIT_DEFAULT`` + + Validate all keys and values. Permits a zero ``0x00`` UTF-8 code unit in + strings. + + This flag has the same behavior as :symbol:`bson_iter_visit_all`. + +.. note:: + + If a requested element fails validation, the ``bson_iter_visit_all_v2`` call + will return and indicate the position of the erring element via ``iter``. + +Returns +------- + +Returns true if visitation was prematurely stopped by a callback function. Returns false either because all elements were visited *or* due to corrupt BSON. + +See :symbol:`bson_visitor_t` for examples of how to set your own callbacks to provide information about the location of corrupt or unsupported BSON document entries. + From d1ac515d9aba628cdfd5111aa8bdcb5cf4a4865c Mon Sep 17 00:00:00 2001 From: vector-of-bool Date: Thu, 4 Aug 2022 20:03:03 +0000 Subject: [PATCH 4/5] Now we can validate UTF-8 properly Fixes CDRIVER-4448 --- src/libbson/src/bson/bson.c | 3 ++- src/libbson/tests/test-bson.c | 9 +++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/libbson/src/bson/bson.c b/src/libbson/src/bson/bson.c index 93b540d232..3940ceaf97 100644 --- a/src/libbson/src/bson/bson.c +++ b/src/libbson/src/bson/bson.c @@ -3584,7 +3584,8 @@ _bson_iter_validate_document (const bson_iter_t *iter, state->phase = BSON_VALIDATE_PHASE_LF_REF_KEY; } - bson_iter_visit_all (&child, &bson_validate_funcs, state); + bson_iter_visit_all_v2 ( + &child, &bson_validate_funcs, BSON_ITER_VISIT_NOFLAGS, state); if (child.err_off > 0 && state->err_offset < 0) { // Iteration on a direct element of 'child' failed diff --git a/src/libbson/tests/test-bson.c b/src/libbson/tests/test-bson.c index 0259b022c6..095cfbb4c3 100644 --- a/src/libbson/tests/test-bson.c +++ b/src/libbson/tests/test-bson.c @@ -1198,14 +1198,11 @@ test_bson_validate (void) 9, BSON_VALIDATE_NONE, "corrupt BSON"); - // bson_iter_visit_all does UTF-8 validation before the validator gets a - // chance to do its own checks, so we can't actually use the UTF8 validation - // flag, and we can't tell what actually caused the error. VALIDATE_TEST ("invalid-utf8.bson", - BSON_VALIDATE_NONE, + BSON_VALIDATE_UTF8, 4, - BSON_VALIDATE_NONE, - "Iteration failed on element at offset 4"); + BSON_VALIDATE_UTF8, + "Invalid utf8 string for key \"a\""); VALIDATE_TEST ("overflow4.bson", BSON_VALIDATE_NONE, 9, From a4b94f2ff60d6a01d6f9bf80827a0293f7352156 Mon Sep 17 00:00:00 2001 From: vector-of-bool Date: Thu, 4 Aug 2022 23:07:16 +0000 Subject: [PATCH 5/5] bson_validate() should check keys for validity: - If bson_iter finds an invalid key string, it will indicate this as a corrupt element. This differs from the rest of its element validation, which will otherwise just halt visitation. - If bson_iter generates an error, we assert that it also informed the validator about this error, rather than trying to recover that error information ourselves. --- src/libbson/src/bson/bson.c | 20 ++++++++------------ src/libbson/tests/binary/invalid-key.bson | Bin 0 -> 18 bytes src/libbson/tests/test-bson.c | 5 +++++ 3 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 src/libbson/tests/binary/invalid-key.bson diff --git a/src/libbson/src/bson/bson.c b/src/libbson/src/bson/bson.c index 3940ceaf97..30d7600c35 100644 --- a/src/libbson/src/bson/bson.c +++ b/src/libbson/src/bson/bson.c @@ -3585,18 +3585,14 @@ _bson_iter_validate_document (const bson_iter_t *iter, } bson_iter_visit_all_v2 ( - &child, &bson_validate_funcs, BSON_ITER_VISIT_NOFLAGS, state); - - if (child.err_off > 0 && state->err_offset < 0) { - // Iteration on a direct element of 'child' failed - state->err_offset = child.err_off; - bson_set_error (&state->error, - BSON_ERROR_INVALID, - BSON_VALIDATE_NONE, - "Iteration failed on element at offset %d", - (int) (iter->off + child.err_off)); - return true; - } + &child, &bson_validate_funcs, BSON_ITER_VISIT_VALIDATE_KEYS, state); + + // Assert: bson_iter_visit_all_v2 will never generate an error and not give + // that error to our visitor as a "corrupt" BSON element. This implies that + // if child.err_off is greater than zero, our own error offset MUST ALSO be + // greater than zero, because the state will have been told about that + // invalid element. + BSON_ASSERT (child.err_off <= 0 || state->err_offset > 0); if (state->phase == BSON_VALIDATE_PHASE_LF_ID_KEY || state->phase == BSON_VALIDATE_PHASE_LF_REF_UTF8 || diff --git a/src/libbson/tests/binary/invalid-key.bson b/src/libbson/tests/binary/invalid-key.bson new file mode 100644 index 0000000000000000000000000000000000000000..54f451dd809923446c728247367a0f830204a10d GIT binary patch literal 18 XcmWe)U|?YS&%g$x5))Gr6B!r)7%u|? literal 0 HcmV?d00001 diff --git a/src/libbson/tests/test-bson.c b/src/libbson/tests/test-bson.c index 095cfbb4c3..1ac314280b 100644 --- a/src/libbson/tests/test-bson.c +++ b/src/libbson/tests/test-bson.c @@ -1203,6 +1203,11 @@ test_bson_validate (void) 4, BSON_VALIDATE_UTF8, "Invalid utf8 string for key \"a\""); + VALIDATE_TEST ("invalid-key.bson", + BSON_VALIDATE_UTF8, + 4, + BSON_VALIDATE_NONE, + "corrupt BSON"); VALIDATE_TEST ("overflow4.bson", BSON_VALIDATE_NONE, 9,