-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement a flat map container for JSON objects (#1352)
Signed-off-by: Juan Cruz Viotti <[email protected]>
- Loading branch information
Showing
6 changed files
with
573 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
235 changes: 235 additions & 0 deletions
235
src/json/include/sourcemeta/jsontoolkit/json_flat_map.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
#ifndef SOURCEMETA_JSONTOOLKIT_JSON_FLAT_MAP_H_ | ||
#define SOURCEMETA_JSONTOOLKIT_JSON_FLAT_MAP_H_ | ||
|
||
#include <algorithm> // std::swap | ||
#include <cassert> // assert | ||
#include <cstddef> // std::size_t | ||
#include <functional> // std::hash | ||
#include <initializer_list> // std::initializer_list | ||
#include <iterator> // std::advance | ||
#include <utility> // std::pair, std::move | ||
#include <vector> // std::vector | ||
|
||
namespace sourcemeta::jsontoolkit { | ||
|
||
/// @ingroup json | ||
template <typename Key, typename Value, typename Hash = std::hash<Key>> | ||
class FlatMap { | ||
public: | ||
FlatMap() = default; | ||
|
||
using key_type = Key; | ||
using mapped_type = Value; | ||
using value_type = std::pair<Key, Value>; | ||
using underlying_type = std::vector<value_type>; | ||
using size_type = typename underlying_type::size_type; | ||
using difference_type = typename underlying_type::difference_type; | ||
using allocator_type = typename underlying_type::allocator_type; | ||
using reference = typename underlying_type::reference; | ||
using const_reference = typename underlying_type::const_reference; | ||
using pointer = typename underlying_type::pointer; | ||
using const_pointer = typename underlying_type::const_pointer; | ||
using const_iterator = typename underlying_type::const_iterator; | ||
using hash_type = std::size_t; | ||
|
||
FlatMap(std::initializer_list<value_type> entries) { | ||
this->hashes.reserve(entries.size()); | ||
this->data.reserve(entries.size()); | ||
for (auto &&entry : entries) { | ||
this->assign(std::move(entry.first), std::move(entry.second)); | ||
} | ||
} | ||
|
||
auto begin() const noexcept -> const_iterator { return this->data.begin(); } | ||
auto end() const noexcept -> const_iterator { return this->data.end(); } | ||
auto cbegin() const noexcept -> const_iterator { return this->data.cbegin(); } | ||
auto cend() const noexcept -> const_iterator { return this->data.cend(); } | ||
|
||
inline auto hash(const key_type &key) const noexcept -> hash_type { | ||
return this->hasher(key); | ||
} | ||
|
||
// TODO: Add an assign overload for const key, rvalue | ||
|
||
auto assign(key_type &&key, mapped_type &&value) -> hash_type { | ||
assert(this->data.size() == this->hashes.size()); | ||
const auto key_hash{this->hash(key)}; | ||
for (size_type index = 0; index < this->hashes.size(); index++) { | ||
if (this->hashes[index] == key_hash && this->data[index].first == key) { | ||
this->data[index].second = std::move(value); | ||
return key_hash; | ||
} | ||
} | ||
|
||
// TODO: Make sure we either emplace both, or none | ||
this->data.emplace_back(std::move(key), std::move(value)); | ||
this->hashes.emplace_back(key_hash); | ||
return key_hash; | ||
} | ||
|
||
auto assign(const key_type &key, const mapped_type &value) -> hash_type { | ||
assert(this->data.size() == this->hashes.size()); | ||
const auto key_hash{this->hash(key)}; | ||
for (size_type index = 0; index < this->hashes.size(); index++) { | ||
if (this->hashes[index] == key_hash && this->data[index].first == key) { | ||
this->data[index].second = value; | ||
return key_hash; | ||
} | ||
} | ||
|
||
// TODO: Make sure we either emplace both, or none | ||
this->data.emplace_back(key, value); | ||
this->hashes.emplace_back(key_hash); | ||
return key_hash; | ||
} | ||
|
||
// As a performance optimisation if the hash is known | ||
inline auto find(const key_type &key, const hash_type key_hash) const | ||
-> const_iterator { | ||
assert(this->data.size() == this->hashes.size()); | ||
assert(this->hash(key) == key_hash); | ||
for (size_type index = 0; index < this->hashes.size(); index++) { | ||
if (this->hashes[index] == key_hash && this->data[index].first == key) { | ||
auto iterator{this->cbegin()}; | ||
std::advance(iterator, index); | ||
return iterator; | ||
} | ||
} | ||
|
||
return this->cend(); | ||
} | ||
|
||
inline auto find(const key_type &key) const -> const_iterator { | ||
return this->find(key, this->hash(key)); | ||
} | ||
|
||
// As a performance optimisation if the hash is known | ||
inline auto contains(const key_type &key, const hash_type key_hash) const | ||
-> bool { | ||
assert(this->data.size() == this->hashes.size()); | ||
assert(this->hash(key) == key_hash); | ||
for (size_type index = 0; index < this->hashes.size(); index++) { | ||
if (this->hashes[index] == key_hash && this->data[index].first == key) { | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
inline auto contains(const key_type &key) const -> bool { | ||
return this->contains(key, this->hash(key)); | ||
} | ||
|
||
// As a performance optimisation if the hash is known | ||
|
||
inline auto at(const key_type &key, const hash_type key_hash) const | ||
-> const mapped_type & { | ||
assert(this->data.size() == this->hashes.size()); | ||
assert(this->hash(key) == key_hash); | ||
|
||
for (size_type index = 0; index < this->hashes.size(); index++) { | ||
if (this->hashes[index] == key_hash && this->data[index].first == key) { | ||
return this->data[index].second; | ||
} | ||
} | ||
|
||
// See https://en.cppreference.com/w/cpp/utility/unreachable | ||
#if defined(_MSC_VER) && !defined(__clang__) | ||
__assume(false); | ||
#else | ||
__builtin_unreachable(); | ||
#endif | ||
} | ||
|
||
inline auto at(const key_type &key, const hash_type key_hash) | ||
-> mapped_type & { | ||
assert(this->data.size() == this->hashes.size()); | ||
assert(this->hash(key) == key_hash); | ||
|
||
for (size_type index = 0; index < this->hashes.size(); index++) { | ||
if (this->hashes[index] == key_hash && this->data[index].first == key) { | ||
return this->data[index].second; | ||
} | ||
} | ||
|
||
// See https://en.cppreference.com/w/cpp/utility/unreachable | ||
#if defined(_MSC_VER) && !defined(__clang__) | ||
__assume(false); | ||
#else | ||
__builtin_unreachable(); | ||
#endif | ||
} | ||
|
||
inline auto at(const key_type &key) const -> const mapped_type & { | ||
return this->at(key, this->hash(key)); | ||
} | ||
|
||
inline auto at(const key_type &key) -> mapped_type & { | ||
return this->at(key, this->hash(key)); | ||
} | ||
|
||
auto erase(const key_type &key, const hash_type key_hash) -> size_type { | ||
const auto current_size{this->size()}; | ||
for (size_type index = 0; index < current_size; index++) { | ||
if (this->hashes[index] == key_hash && this->data[index].first == key) { | ||
std::swap(this->hashes[index], this->hashes.back()); | ||
std::swap(this->data[index], this->data.back()); | ||
this->hashes.pop_back(); | ||
this->data.pop_back(); | ||
return current_size - 1; | ||
} | ||
} | ||
|
||
return current_size; | ||
} | ||
|
||
inline auto erase(const key_type &key) -> size_type { | ||
return this->erase(key, this->hash(key)); | ||
} | ||
|
||
inline auto size() const noexcept -> size_type { | ||
assert(this->data.size() == this->hashes.size()); | ||
return this->data.size(); | ||
} | ||
|
||
inline auto empty() const noexcept -> bool { | ||
assert(this->data.size() == this->hashes.size()); | ||
return this->data.empty(); | ||
} | ||
|
||
inline auto clear() noexcept -> void { | ||
this->data.clear(); | ||
this->hashes.clear(); | ||
} | ||
|
||
auto operator!=(const FlatMap &other) const -> bool = default; | ||
|
||
auto operator==(const FlatMap &other) const -> bool { | ||
if (this->size() != other.size()) { | ||
return false; | ||
} | ||
|
||
for (size_type index = 0; index < this->hashes.size(); index++) { | ||
const auto iterator{ | ||
other.find(this->data[index].first, this->hashes[index])}; | ||
if (iterator == other.cend()) { | ||
return false; | ||
} else if (iterator->second != this->data[index].second) { | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
private: | ||
underlying_type data; | ||
// So that we can loop over hashes faster, potentially vectorizing loops | ||
std::vector<hash_type> hashes; | ||
Hash hasher; | ||
}; | ||
|
||
} // namespace sourcemeta::jsontoolkit | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
b36b73f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Benchmark (macos/llvm)
JSON_Array_Of_Objects_Unique
360.8053594234045
ns/iter382.69442984689624
ns/iter0.94
JSON_Parse_1
21426.56837863615
ns/iter21520.534873136898
ns/iter1.00
JSON_Fast_Hash_Helm_Chart_Lock
46.746859374473416
ns/iter48.54943604768604
ns/iter0.96
JSON_Equality_Helm_Chart_Lock
181.59081452550697
ns/iter303.77746421382824
ns/iter0.60
Regex_Lower_S_Or_Upper_S_Asterisk
1.6608361637763973
ns/iter1.6825297514600892
ns/iter0.99
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar
1.6716447994004775
ns/iter1.6188664148252556
ns/iter1.03
Regex_Period_Asterisk
1.6896408722398444
ns/iter1.6160517107906283
ns/iter1.05
Regex_Group_Period_Asterisk_Group
1.667259200806724
ns/iter1.5974829926408705
ns/iter1.04
Regex_Period_Plus
1.990617266096109
ns/iter1.9286718452454914
ns/iter1.03
Regex_Period
2.001168800277934
ns/iter1.9413508729507076
ns/iter1.03
Regex_Caret_Period_Plus_Dollar
1.9998650434884764
ns/iter1.9394329114964082
ns/iter1.03
Regex_Caret_Group_Period_Plus_Group_Dollar
1.9486926601088146
ns/iter1.998837766200905
ns/iter0.97
Regex_Caret_Period_Asterisk_Dollar
1.610102218317659
ns/iter1.6345024281418776
ns/iter0.99
Regex_Caret_Group_Period_Asterisk_Group_Dollar
1.6674963196522123
ns/iter1.6065368878825994
ns/iter1.04
Regex_Caret_X_Hyphen
6.1232208590154915
ns/iter6.36891109609723
ns/iter0.96
Regex_Period_Md_Dollar
68.98436520172537
ns/iter69.05395270875583
ns/iter1.00
Regex_Caret_Slash_Period_Asterisk
4.596032799560272
ns/iter4.899448173665108
ns/iter0.94
Regex_Caret_Period_Range_Dollar
2.328584394179153
ns/iter2.262724475649273
ns/iter1.03
Regex_Nested_Backtrack
705.7922050955449
ns/iter744.0728987471672
ns/iter0.95
Pointer_Object_Traverse
66.4337277172198
ns/iter52.535683000369495
ns/iter1.26
Pointer_Object_Try_Traverse
63.47252993586622
ns/iter50.84197740337757
ns/iter1.25
This comment was automatically generated by workflow using github-action-benchmark.
b36b73f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Benchmark (linux/llvm)
JSON_Array_Of_Objects_Unique
483.999593949099
ns/iter555.5726957163877
ns/iter0.87
JSON_Parse_1
30745.70631334537
ns/iter32677.432990171845
ns/iter0.94
JSON_Fast_Hash_Helm_Chart_Lock
56.96503937731857
ns/iter69.8694454592228
ns/iter0.82
JSON_Equality_Helm_Chart_Lock
233.00584248496244
ns/iter223.8231201872805
ns/iter1.04
Regex_Lower_S_Or_Upper_S_Asterisk
2.7837273419727864
ns/iter2.7863295294829573
ns/iter1.00
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar
2.7855394679375833
ns/iter2.7854279341099115
ns/iter1.00
Regex_Period_Asterisk
2.7905675760497504
ns/iter2.784737032887891
ns/iter1.00
Regex_Group_Period_Asterisk_Group
2.7900690465023574
ns/iter2.7828720360513364
ns/iter1.00
Regex_Period_Plus
2.8278984580084834
ns/iter2.790476679991872
ns/iter1.01
Regex_Period
2.7852363752503666
ns/iter2.7910395314052114
ns/iter1.00
Regex_Caret_Period_Plus_Dollar
2.8388271485227676
ns/iter2.788147036245607
ns/iter1.02
Regex_Caret_Group_Period_Plus_Group_Dollar
2.785393726887076
ns/iter2.782488814272368
ns/iter1.00
Regex_Caret_Period_Asterisk_Dollar
4.0394731734175835
ns/iter4.020357765508417
ns/iter1.00
Regex_Caret_Group_Period_Asterisk_Group_Dollar
4.022037648277625
ns/iter4.021283506616653
ns/iter1.00
Regex_Caret_X_Hyphen
12.38583803414965
ns/iter10.302891805551694
ns/iter1.20
Regex_Period_Md_Dollar
105.7262934315212
ns/iter107.0189073463492
ns/iter0.99
Regex_Caret_Slash_Period_Asterisk
7.442841217549305
ns/iter7.580157850996015
ns/iter0.98
Regex_Caret_Period_Range_Dollar
4.038317477250506
ns/iter4.022262013827309
ns/iter1.00
Regex_Nested_Backtrack
804.3331165585756
ns/iter886.5699579074721
ns/iter0.91
Pointer_Object_Traverse
70.01082060692866
ns/iter96.97970746337131
ns/iter0.72
Pointer_Object_Try_Traverse
83.95938984044852
ns/iter106.58235132738353
ns/iter0.79
This comment was automatically generated by workflow using github-action-benchmark.
b36b73f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Benchmark (linux/gcc)
Pointer_Object_Traverse
110.54919249181793
ns/iter129.07036872821908
ns/iter0.86
Pointer_Object_Try_Traverse
70.04008120193295
ns/iter99.37535479485834
ns/iter0.70
Regex_Lower_S_Or_Upper_S_Asterisk
1.5478724389282659
ns/iter1.5465205005199538
ns/iter1.00
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar
1.2376151594304394
ns/iter1.237392443882773
ns/iter1.00
Regex_Period_Asterisk
1.5467772403468072
ns/iter1.5475569541093757
ns/iter1.00
Regex_Group_Period_Asterisk_Group
1.2390426444949472
ns/iter1.2390039743293608
ns/iter1.00
Regex_Period_Plus
1.6352855671272852
ns/iter1.5496243298396402
ns/iter1.06
Regex_Period
1.2660951943594225
ns/iter1.2795390048252429
ns/iter0.99
Regex_Caret_Period_Plus_Dollar
1.5470686805240512
ns/iter1.5474778623958074
ns/iter1.00
Regex_Caret_Group_Period_Plus_Group_Dollar
1.2426966670339057
ns/iter1.237457386595694
ns/iter1.00
Regex_Caret_Period_Asterisk_Dollar
1.5465757282516368
ns/iter1.5464248818191997
ns/iter1.00
Regex_Caret_Group_Period_Asterisk_Group_Dollar
1.2404322770299563
ns/iter1.2378746765200093
ns/iter1.00
Regex_Caret_X_Hyphen
12.675654199553572
ns/iter12.672893827610963
ns/iter1.00
Regex_Period_Md_Dollar
104.066631041823
ns/iter94.19144379382497
ns/iter1.10
Regex_Caret_Slash_Period_Asterisk
4.645952845380502
ns/iter4.638015762126131
ns/iter1.00
Regex_Caret_Period_Range_Dollar
1.5477915046918567
ns/iter1.5480436911857052
ns/iter1.00
Regex_Nested_Backtrack
880.4003438921709
ns/iter874.7182507483815
ns/iter1.01
JSON_Array_Of_Objects_Unique
436.50033881726205
ns/iter565.5516714448338
ns/iter0.77
JSON_Parse_1
35561.07372287811
ns/iter38493.017337222125
ns/iter0.92
JSON_Fast_Hash_Helm_Chart_Lock
68.08541390964501
ns/iter107.11214560712443
ns/iter0.64
JSON_Equality_Helm_Chart_Lock
258.0426181174469
ns/iter329.1308254359724
ns/iter0.78
This comment was automatically generated by workflow using github-action-benchmark.
b36b73f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Benchmark (windows/msvc)
JSON_Array_Of_Objects_Unique
525.1273214286454
ns/iter527.3346428570643
ns/iter1.00
JSON_Parse_1
81188.6830357013
ns/iter80884.05133927421
ns/iter1.00
JSON_Fast_Hash_Helm_Chart_Lock
63.43834821428069
ns/iter62.42858035713133
ns/iter1.02
JSON_Equality_Helm_Chart_Lock
288.2504201674177
ns/iter328.4677232142842
ns/iter0.88
Regex_Lower_S_Or_Upper_S_Asterisk
5.599755357142807
ns/iter5.4882767857143175
ns/iter1.02
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar
5.785964285712859
ns/iter5.30532699999867
ns/iter1.09
Regex_Period_Asterisk
6.406999999999385
ns/iter5.599661607143551
ns/iter1.14
Regex_Group_Period_Asterisk_Group
6.678510714284666
ns/iter5.998249107142735
ns/iter1.11
Regex_Period_Plus
6.070776785713455
ns/iter5.985805357143801
ns/iter1.01
Regex_Period
5.6454357142854406
ns/iter6.16989107142883
ns/iter0.91
Regex_Caret_Period_Plus_Dollar
6.514907366071807
ns/iter5.760315178570912
ns/iter1.13
Regex_Caret_Group_Period_Plus_Group_Dollar
6.429756249998166
ns/iter6.6245687500006465
ns/iter0.97
Regex_Caret_Period_Asterisk_Dollar
5.415309821427984
ns/iter5.964831473215883
ns/iter0.91
Regex_Caret_Group_Period_Asterisk_Group_Dollar
5.573633928571146
ns/iter5.305420535713828
ns/iter1.05
Regex_Caret_X_Hyphen
14.824572924890427
ns/iter13.060073214286863
ns/iter1.14
Regex_Period_Md_Dollar
133.34773065408893
ns/iter149.62609823097455
ns/iter0.89
Regex_Caret_Slash_Period_Asterisk
8.499495497768828
ns/iter9.084069602302073
ns/iter0.94
Regex_Caret_Period_Range_Dollar
6.145523437499634
ns/iter5.680679464284871
ns/iter1.08
Regex_Nested_Backtrack
598.1163999999808
ns/iter597.3702999999659
ns/iter1.00
Pointer_Object_Traverse
84.0291792844056
ns/iter89.31232636998624
ns/iter0.94
Pointer_Object_Try_Traverse
87.1095982142681
ns/iter87.80271036594705
ns/iter0.99
This comment was automatically generated by workflow using github-action-benchmark.