Skip to content

Commit

Permalink
Implement a flat map container for JSON objects (#1352)
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti authored Nov 29, 2024
1 parent 377e315 commit b36b73f
Show file tree
Hide file tree
Showing 6 changed files with 573 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/json/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
noa_library(NAMESPACE sourcemeta PROJECT jsontoolkit NAME json
FOLDER "JSON Toolkit/JSON"
PRIVATE_HEADERS array.h error.h object.h value.h hash.h
PRIVATE_HEADERS array.h error.h object.h value.h hash.h flat_map.h
SOURCES grammar.h parser.h stringify.h json.cc json_value.cc)

if(JSONTOOLKIT_INSTALL)
Expand Down
235 changes: 235 additions & 0 deletions src/json/include/sourcemeta/jsontoolkit/json_flat_map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
#ifndef SOURCEMETA_JSONTOOLKIT_JSON_FLAT_MAP_H_
#define SOURCEMETA_JSONTOOLKIT_JSON_FLAT_MAP_H_

#include <algorithm> // std::swap
#include <cassert> // assert
#include <cstddef> // std::size_t
#include <functional> // std::hash
#include <initializer_list> // std::initializer_list
#include <iterator> // std::advance
#include <utility> // std::pair, std::move
#include <vector> // std::vector

namespace sourcemeta::jsontoolkit {

/// @ingroup json
template <typename Key, typename Value, typename Hash = std::hash<Key>>
class FlatMap {
public:
FlatMap() = default;

using key_type = Key;
using mapped_type = Value;
using value_type = std::pair<Key, Value>;
using underlying_type = std::vector<value_type>;
using size_type = typename underlying_type::size_type;
using difference_type = typename underlying_type::difference_type;
using allocator_type = typename underlying_type::allocator_type;
using reference = typename underlying_type::reference;
using const_reference = typename underlying_type::const_reference;
using pointer = typename underlying_type::pointer;
using const_pointer = typename underlying_type::const_pointer;
using const_iterator = typename underlying_type::const_iterator;
using hash_type = std::size_t;

FlatMap(std::initializer_list<value_type> entries) {
this->hashes.reserve(entries.size());
this->data.reserve(entries.size());
for (auto &&entry : entries) {
this->assign(std::move(entry.first), std::move(entry.second));
}
}

auto begin() const noexcept -> const_iterator { return this->data.begin(); }
auto end() const noexcept -> const_iterator { return this->data.end(); }
auto cbegin() const noexcept -> const_iterator { return this->data.cbegin(); }
auto cend() const noexcept -> const_iterator { return this->data.cend(); }

inline auto hash(const key_type &key) const noexcept -> hash_type {
return this->hasher(key);
}

// TODO: Add an assign overload for const key, rvalue

auto assign(key_type &&key, mapped_type &&value) -> hash_type {
assert(this->data.size() == this->hashes.size());
const auto key_hash{this->hash(key)};
for (size_type index = 0; index < this->hashes.size(); index++) {
if (this->hashes[index] == key_hash && this->data[index].first == key) {
this->data[index].second = std::move(value);
return key_hash;
}
}

// TODO: Make sure we either emplace both, or none
this->data.emplace_back(std::move(key), std::move(value));
this->hashes.emplace_back(key_hash);
return key_hash;
}

auto assign(const key_type &key, const mapped_type &value) -> hash_type {
assert(this->data.size() == this->hashes.size());
const auto key_hash{this->hash(key)};
for (size_type index = 0; index < this->hashes.size(); index++) {
if (this->hashes[index] == key_hash && this->data[index].first == key) {
this->data[index].second = value;
return key_hash;
}
}

// TODO: Make sure we either emplace both, or none
this->data.emplace_back(key, value);
this->hashes.emplace_back(key_hash);
return key_hash;
}

// As a performance optimisation if the hash is known
inline auto find(const key_type &key, const hash_type key_hash) const
-> const_iterator {
assert(this->data.size() == this->hashes.size());
assert(this->hash(key) == key_hash);
for (size_type index = 0; index < this->hashes.size(); index++) {
if (this->hashes[index] == key_hash && this->data[index].first == key) {
auto iterator{this->cbegin()};
std::advance(iterator, index);
return iterator;
}
}

return this->cend();
}

inline auto find(const key_type &key) const -> const_iterator {
return this->find(key, this->hash(key));
}

// As a performance optimisation if the hash is known
inline auto contains(const key_type &key, const hash_type key_hash) const
-> bool {
assert(this->data.size() == this->hashes.size());
assert(this->hash(key) == key_hash);
for (size_type index = 0; index < this->hashes.size(); index++) {
if (this->hashes[index] == key_hash && this->data[index].first == key) {
return true;
}
}

return false;
}

inline auto contains(const key_type &key) const -> bool {
return this->contains(key, this->hash(key));
}

// As a performance optimisation if the hash is known

inline auto at(const key_type &key, const hash_type key_hash) const
-> const mapped_type & {
assert(this->data.size() == this->hashes.size());
assert(this->hash(key) == key_hash);

for (size_type index = 0; index < this->hashes.size(); index++) {
if (this->hashes[index] == key_hash && this->data[index].first == key) {
return this->data[index].second;
}
}

// See https://en.cppreference.com/w/cpp/utility/unreachable
#if defined(_MSC_VER) && !defined(__clang__)
__assume(false);
#else
__builtin_unreachable();
#endif
}

inline auto at(const key_type &key, const hash_type key_hash)
-> mapped_type & {
assert(this->data.size() == this->hashes.size());
assert(this->hash(key) == key_hash);

for (size_type index = 0; index < this->hashes.size(); index++) {
if (this->hashes[index] == key_hash && this->data[index].first == key) {
return this->data[index].second;
}
}

// See https://en.cppreference.com/w/cpp/utility/unreachable
#if defined(_MSC_VER) && !defined(__clang__)
__assume(false);
#else
__builtin_unreachable();
#endif
}

inline auto at(const key_type &key) const -> const mapped_type & {
return this->at(key, this->hash(key));
}

inline auto at(const key_type &key) -> mapped_type & {
return this->at(key, this->hash(key));
}

auto erase(const key_type &key, const hash_type key_hash) -> size_type {
const auto current_size{this->size()};
for (size_type index = 0; index < current_size; index++) {
if (this->hashes[index] == key_hash && this->data[index].first == key) {
std::swap(this->hashes[index], this->hashes.back());
std::swap(this->data[index], this->data.back());
this->hashes.pop_back();
this->data.pop_back();
return current_size - 1;
}
}

return current_size;
}

inline auto erase(const key_type &key) -> size_type {
return this->erase(key, this->hash(key));
}

inline auto size() const noexcept -> size_type {
assert(this->data.size() == this->hashes.size());
return this->data.size();
}

inline auto empty() const noexcept -> bool {
assert(this->data.size() == this->hashes.size());
return this->data.empty();
}

inline auto clear() noexcept -> void {
this->data.clear();
this->hashes.clear();
}

auto operator!=(const FlatMap &other) const -> bool = default;

auto operator==(const FlatMap &other) const -> bool {
if (this->size() != other.size()) {
return false;
}

for (size_type index = 0; index < this->hashes.size(); index++) {
const auto iterator{
other.find(this->data[index].first, this->hashes[index])};
if (iterator == other.cend()) {
return false;
} else if (iterator->second != this->data[index].second) {
return false;
}
}

return true;
}

private:
underlying_type data;
// So that we can loop over hashes faster, potentially vectorizing loops
std::vector<hash_type> hashes;
Hash hasher;
};

} // namespace sourcemeta::jsontoolkit

#endif
36 changes: 15 additions & 21 deletions src/json/include/sourcemeta/jsontoolkit/json_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,15 @@
#include <functional> // std::equal_to, std::less
#include <initializer_list> // std::initializer_list

#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 12)
#include <map> // std::map
#else
#include <unordered_map> // std::unordered_map
#endif
#include <sourcemeta/jsontoolkit/json_flat_map.h>

namespace sourcemeta::jsontoolkit {

/// @ingroup json
template <typename Key, typename Value, typename Hash> class JSONObject {
public:
// Constructors

// Older versions of GCC don't allow `std::unordered_map` to incomplete
// types, and in this case, `Value` is an incomplete type.
using Container =
#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 12)
std::map<Key, Value, std::less<Key>,
#else
std::unordered_map<Key, Value, Hash, std::equal_to<Key>,
#endif
typename Value::template Allocator<
std::pair<const typename Value::String, Value>>>;
using Container = FlatMap<Key, Value, Hash>;

JSONObject() : data{} {}
JSONObject(std::initializer_list<typename Container::value_type> values)
Expand Down Expand Up @@ -92,16 +78,24 @@ template <typename Key, typename Value, typename Hash> class JSONObject {
using const_pointer = typename Container::const_pointer;
using const_iterator = typename Container::const_iterator;

auto begin() const noexcept -> const_iterator { return this->data.begin(); }
inline auto begin() const noexcept -> const_iterator {
return this->data.begin();
}
/// Get a constant end iterator on the object
auto end() const noexcept -> const_iterator { return this->data.end(); }
inline auto end() const noexcept -> const_iterator {
return this->data.end();
}
/// Get a constant begin iterator on the object
auto cbegin() const noexcept -> const_iterator { return this->data.cbegin(); }
inline auto cbegin() const noexcept -> const_iterator {
return this->data.cbegin();
}
/// Get a constant end iterator on the object
auto cend() const noexcept -> const_iterator { return this->data.cend(); }
inline auto cend() const noexcept -> const_iterator {
return this->data.cend();
}

/// Attempt to find an entry by key
auto find(const Key &key) const -> const_iterator {
inline auto find(const Key &key) const -> const_iterator {
return this->data.find(key);
}

Expand Down
11 changes: 5 additions & 6 deletions src/json/json_value.cc
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,13 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & {
[[nodiscard]] auto JSON::at(const JSON::String &key) const -> const JSON & {
assert(this->is_object());
assert(this->defines(key));
return std::get<Object>(this->data).data.find(key)->second;
return std::get<Object>(this->data).data.at(key);
}

[[nodiscard]] auto JSON::at(const JSON::String &key) -> JSON & {
assert(this->is_object());
assert(this->defines(key));
return std::get<Object>(this->data).data.find(key)->second;
return std::get<Object>(this->data).data.at(key);
}

[[nodiscard]] auto JSON::front() -> JSON & {
Expand Down Expand Up @@ -506,8 +506,7 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & {

[[nodiscard]] auto JSON::defines(const JSON::String &key) const -> bool {
assert(this->is_object());
return std::get<Object>(this->data).find(key) !=
std::get<Object>(this->data).cend();
return std::get<Object>(this->data).data.contains(key);
}

[[nodiscard]] auto
Expand Down Expand Up @@ -592,12 +591,12 @@ auto JSON::push_back_if_unique(JSON &&value)

auto JSON::assign(const JSON::String &key, const JSON &value) -> void {
assert(this->is_object());
std::get<Object>(this->data).data.insert_or_assign(key, value);
std::get<Object>(this->data).data.assign(key, value);
}

auto JSON::assign(const JSON::String &key, JSON &&value) -> void {
assert(this->is_object());
std::get<Object>(this->data).data.insert_or_assign(key, std::move(value));
std::get<Object>(this->data).data.assign(key, std::move(value));
}

auto JSON::assign_if_missing(const JSON::String &key, const JSON &value)
Expand Down
1 change: 1 addition & 0 deletions test/json/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ add_executable(sourcemeta_jsontoolkit_json_unit
json_array_test.cc
json_boolean_test.cc
json_error_test.cc
json_flat_map_test.cc
json_integer_test.cc
json_null_test.cc
json_number_test.cc
Expand Down
Loading

4 comments on commit b36b73f

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (macos/llvm)

Benchmark suite Current: b36b73f Previous: 377e315 Ratio
JSON_Array_Of_Objects_Unique 360.8053594234045 ns/iter 382.69442984689624 ns/iter 0.94
JSON_Parse_1 21426.56837863615 ns/iter 21520.534873136898 ns/iter 1.00
JSON_Fast_Hash_Helm_Chart_Lock 46.746859374473416 ns/iter 48.54943604768604 ns/iter 0.96
JSON_Equality_Helm_Chart_Lock 181.59081452550697 ns/iter 303.77746421382824 ns/iter 0.60
Regex_Lower_S_Or_Upper_S_Asterisk 1.6608361637763973 ns/iter 1.6825297514600892 ns/iter 0.99
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 1.6716447994004775 ns/iter 1.6188664148252556 ns/iter 1.03
Regex_Period_Asterisk 1.6896408722398444 ns/iter 1.6160517107906283 ns/iter 1.05
Regex_Group_Period_Asterisk_Group 1.667259200806724 ns/iter 1.5974829926408705 ns/iter 1.04
Regex_Period_Plus 1.990617266096109 ns/iter 1.9286718452454914 ns/iter 1.03
Regex_Period 2.001168800277934 ns/iter 1.9413508729507076 ns/iter 1.03
Regex_Caret_Period_Plus_Dollar 1.9998650434884764 ns/iter 1.9394329114964082 ns/iter 1.03
Regex_Caret_Group_Period_Plus_Group_Dollar 1.9486926601088146 ns/iter 1.998837766200905 ns/iter 0.97
Regex_Caret_Period_Asterisk_Dollar 1.610102218317659 ns/iter 1.6345024281418776 ns/iter 0.99
Regex_Caret_Group_Period_Asterisk_Group_Dollar 1.6674963196522123 ns/iter 1.6065368878825994 ns/iter 1.04
Regex_Caret_X_Hyphen 6.1232208590154915 ns/iter 6.36891109609723 ns/iter 0.96
Regex_Period_Md_Dollar 68.98436520172537 ns/iter 69.05395270875583 ns/iter 1.00
Regex_Caret_Slash_Period_Asterisk 4.596032799560272 ns/iter 4.899448173665108 ns/iter 0.94
Regex_Caret_Period_Range_Dollar 2.328584394179153 ns/iter 2.262724475649273 ns/iter 1.03
Regex_Nested_Backtrack 705.7922050955449 ns/iter 744.0728987471672 ns/iter 0.95
Pointer_Object_Traverse 66.4337277172198 ns/iter 52.535683000369495 ns/iter 1.26
Pointer_Object_Try_Traverse 63.47252993586622 ns/iter 50.84197740337757 ns/iter 1.25

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/llvm)

Benchmark suite Current: b36b73f Previous: 377e315 Ratio
JSON_Array_Of_Objects_Unique 483.999593949099 ns/iter 555.5726957163877 ns/iter 0.87
JSON_Parse_1 30745.70631334537 ns/iter 32677.432990171845 ns/iter 0.94
JSON_Fast_Hash_Helm_Chart_Lock 56.96503937731857 ns/iter 69.8694454592228 ns/iter 0.82
JSON_Equality_Helm_Chart_Lock 233.00584248496244 ns/iter 223.8231201872805 ns/iter 1.04
Regex_Lower_S_Or_Upper_S_Asterisk 2.7837273419727864 ns/iter 2.7863295294829573 ns/iter 1.00
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 2.7855394679375833 ns/iter 2.7854279341099115 ns/iter 1.00
Regex_Period_Asterisk 2.7905675760497504 ns/iter 2.784737032887891 ns/iter 1.00
Regex_Group_Period_Asterisk_Group 2.7900690465023574 ns/iter 2.7828720360513364 ns/iter 1.00
Regex_Period_Plus 2.8278984580084834 ns/iter 2.790476679991872 ns/iter 1.01
Regex_Period 2.7852363752503666 ns/iter 2.7910395314052114 ns/iter 1.00
Regex_Caret_Period_Plus_Dollar 2.8388271485227676 ns/iter 2.788147036245607 ns/iter 1.02
Regex_Caret_Group_Period_Plus_Group_Dollar 2.785393726887076 ns/iter 2.782488814272368 ns/iter 1.00
Regex_Caret_Period_Asterisk_Dollar 4.0394731734175835 ns/iter 4.020357765508417 ns/iter 1.00
Regex_Caret_Group_Period_Asterisk_Group_Dollar 4.022037648277625 ns/iter 4.021283506616653 ns/iter 1.00
Regex_Caret_X_Hyphen 12.38583803414965 ns/iter 10.302891805551694 ns/iter 1.20
Regex_Period_Md_Dollar 105.7262934315212 ns/iter 107.0189073463492 ns/iter 0.99
Regex_Caret_Slash_Period_Asterisk 7.442841217549305 ns/iter 7.580157850996015 ns/iter 0.98
Regex_Caret_Period_Range_Dollar 4.038317477250506 ns/iter 4.022262013827309 ns/iter 1.00
Regex_Nested_Backtrack 804.3331165585756 ns/iter 886.5699579074721 ns/iter 0.91
Pointer_Object_Traverse 70.01082060692866 ns/iter 96.97970746337131 ns/iter 0.72
Pointer_Object_Try_Traverse 83.95938984044852 ns/iter 106.58235132738353 ns/iter 0.79

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/gcc)

Benchmark suite Current: b36b73f Previous: 377e315 Ratio
Pointer_Object_Traverse 110.54919249181793 ns/iter 129.07036872821908 ns/iter 0.86
Pointer_Object_Try_Traverse 70.04008120193295 ns/iter 99.37535479485834 ns/iter 0.70
Regex_Lower_S_Or_Upper_S_Asterisk 1.5478724389282659 ns/iter 1.5465205005199538 ns/iter 1.00
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 1.2376151594304394 ns/iter 1.237392443882773 ns/iter 1.00
Regex_Period_Asterisk 1.5467772403468072 ns/iter 1.5475569541093757 ns/iter 1.00
Regex_Group_Period_Asterisk_Group 1.2390426444949472 ns/iter 1.2390039743293608 ns/iter 1.00
Regex_Period_Plus 1.6352855671272852 ns/iter 1.5496243298396402 ns/iter 1.06
Regex_Period 1.2660951943594225 ns/iter 1.2795390048252429 ns/iter 0.99
Regex_Caret_Period_Plus_Dollar 1.5470686805240512 ns/iter 1.5474778623958074 ns/iter 1.00
Regex_Caret_Group_Period_Plus_Group_Dollar 1.2426966670339057 ns/iter 1.237457386595694 ns/iter 1.00
Regex_Caret_Period_Asterisk_Dollar 1.5465757282516368 ns/iter 1.5464248818191997 ns/iter 1.00
Regex_Caret_Group_Period_Asterisk_Group_Dollar 1.2404322770299563 ns/iter 1.2378746765200093 ns/iter 1.00
Regex_Caret_X_Hyphen 12.675654199553572 ns/iter 12.672893827610963 ns/iter 1.00
Regex_Period_Md_Dollar 104.066631041823 ns/iter 94.19144379382497 ns/iter 1.10
Regex_Caret_Slash_Period_Asterisk 4.645952845380502 ns/iter 4.638015762126131 ns/iter 1.00
Regex_Caret_Period_Range_Dollar 1.5477915046918567 ns/iter 1.5480436911857052 ns/iter 1.00
Regex_Nested_Backtrack 880.4003438921709 ns/iter 874.7182507483815 ns/iter 1.01
JSON_Array_Of_Objects_Unique 436.50033881726205 ns/iter 565.5516714448338 ns/iter 0.77
JSON_Parse_1 35561.07372287811 ns/iter 38493.017337222125 ns/iter 0.92
JSON_Fast_Hash_Helm_Chart_Lock 68.08541390964501 ns/iter 107.11214560712443 ns/iter 0.64
JSON_Equality_Helm_Chart_Lock 258.0426181174469 ns/iter 329.1308254359724 ns/iter 0.78

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (windows/msvc)

Benchmark suite Current: b36b73f Previous: 377e315 Ratio
JSON_Array_Of_Objects_Unique 525.1273214286454 ns/iter 527.3346428570643 ns/iter 1.00
JSON_Parse_1 81188.6830357013 ns/iter 80884.05133927421 ns/iter 1.00
JSON_Fast_Hash_Helm_Chart_Lock 63.43834821428069 ns/iter 62.42858035713133 ns/iter 1.02
JSON_Equality_Helm_Chart_Lock 288.2504201674177 ns/iter 328.4677232142842 ns/iter 0.88
Regex_Lower_S_Or_Upper_S_Asterisk 5.599755357142807 ns/iter 5.4882767857143175 ns/iter 1.02
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 5.785964285712859 ns/iter 5.30532699999867 ns/iter 1.09
Regex_Period_Asterisk 6.406999999999385 ns/iter 5.599661607143551 ns/iter 1.14
Regex_Group_Period_Asterisk_Group 6.678510714284666 ns/iter 5.998249107142735 ns/iter 1.11
Regex_Period_Plus 6.070776785713455 ns/iter 5.985805357143801 ns/iter 1.01
Regex_Period 5.6454357142854406 ns/iter 6.16989107142883 ns/iter 0.91
Regex_Caret_Period_Plus_Dollar 6.514907366071807 ns/iter 5.760315178570912 ns/iter 1.13
Regex_Caret_Group_Period_Plus_Group_Dollar 6.429756249998166 ns/iter 6.6245687500006465 ns/iter 0.97
Regex_Caret_Period_Asterisk_Dollar 5.415309821427984 ns/iter 5.964831473215883 ns/iter 0.91
Regex_Caret_Group_Period_Asterisk_Group_Dollar 5.573633928571146 ns/iter 5.305420535713828 ns/iter 1.05
Regex_Caret_X_Hyphen 14.824572924890427 ns/iter 13.060073214286863 ns/iter 1.14
Regex_Period_Md_Dollar 133.34773065408893 ns/iter 149.62609823097455 ns/iter 0.89
Regex_Caret_Slash_Period_Asterisk 8.499495497768828 ns/iter 9.084069602302073 ns/iter 0.94
Regex_Caret_Period_Range_Dollar 6.145523437499634 ns/iter 5.680679464284871 ns/iter 1.08
Regex_Nested_Backtrack 598.1163999999808 ns/iter 597.3702999999659 ns/iter 1.00
Pointer_Object_Traverse 84.0291792844056 ns/iter 89.31232636998624 ns/iter 0.94
Pointer_Object_Try_Traverse 87.1095982142681 ns/iter 87.80271036594705 ns/iter 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.