diff --git a/include/openvic-dataloader/AbstractSyntaxTree.hpp b/include/openvic-dataloader/AbstractSyntaxTree.hpp new file mode 100644 index 0000000..f085148 --- /dev/null +++ b/include/openvic-dataloader/AbstractSyntaxTree.hpp @@ -0,0 +1,105 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace ovdl { + struct AbstractSyntaxTree { + struct SymbolId; + using index_type = std::uint32_t; + using symbol_type = dryad::symbol; + using symbol_interner_type = dryad::symbol_interner; + + symbol_type intern(const char* str, std::size_t length); + symbol_type intern(std::string_view str); + const char* intern_cstr(const char* str, std::size_t length); + const char* intern_cstr(std::string_view str); + symbol_interner_type& symbol_interner(); + const symbol_interner_type& symbol_interner() const; + + protected: + symbol_interner_type _symbol_interner; + }; + + template + concept IsAst = std::derived_from && requires(T t, const T::node_type* node, NodeLocation loc) { + requires IsFile; + typename T::root_node_type; + typename T::node_type; + requires std::derived_from; + { t.set_location(node, loc) } -> std::same_as; + { t.location_of(node) } -> std::same_as; + { t.root() } -> std::same_as; + { const_cast(t).root() } -> std::same_as; + { t.file() } -> std::same_as; + { const_cast(t).file() } -> std::same_as; + }; + + template RootNodeT> + struct BasicAbstractSyntaxTree : AbstractSyntaxTree { + using file_type = FileT; + using root_node_type = RootNodeT; + using node_type = file_type::node_type; + + explicit BasicAbstractSyntaxTree(file_type&& file) : _file(std::move(file)) {} + explicit BasicAbstractSyntaxTree(lexy::buffer&& buffer) : _file(std::move(buffer)) {} + + void set_location(const node_type* n, NodeLocation loc) { + _file.set_location(n, loc); + } + + NodeLocation location_of(const node_type* n) const { + return _file.location_of(n); + } + + root_node_type* root() { + return _tree.root(); + } + + const root_node_type* root() const { + return _tree.root(); + } + + file_type& file() { + return _file; + } + + const file_type& file() const { + return _file; + } + + template + T* create(NodeLocation loc, Args&&... args) { + auto node = _tree.template create(DRYAD_FWD(args)...); + set_location(node, loc); + return node; + } + + template + T* create(const char* begin, const char* end, Args&&... args) { + return create(NodeLocation::make_from(begin, end), DRYAD_FWD(args)...); + } + + void set_root(root_node_type* node) { + _tree.set_root(node); + } + + protected: + dryad::tree _tree; + file_type _file; + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/DiagnosticLogger.hpp b/include/openvic-dataloader/DiagnosticLogger.hpp new file mode 100644 index 0000000..54b5222 --- /dev/null +++ b/include/openvic-dataloader/DiagnosticLogger.hpp @@ -0,0 +1,144 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include + +namespace ovdl { + struct DiagnosticLogger { + using AnnotationKind = lexy_ext::annotation_kind; + using DiagnosticKind = lexy_ext::diagnostic_kind; + + explicit operator bool() const; + bool errored() const; + bool warned() const; + + protected: + bool _errored = false; + bool _warned = false; + }; + + template + struct BasicDiagnosticLogger : DiagnosticLogger { + using file_type = FileT; + + explicit BasicDiagnosticLogger(const file_type& file, std::ostream& error_stream) + : _file(&file), + _stream_iterator(detail::OStreamOutputIterator { error_stream }) {} + + class Writer { + public: + template + [[nodiscard]] Writer& annotation(AnnotationKind kind, + NodeLocation loc, + const CharT* fmt, Args&&... args) { + + auto begin_loc = lexy::get_input_location(_file->buffer(), loc.begin()); + + _impl.write_empty_annotation(_stream_iterator); + _impl.write_annotation(_stream_iterator, kind, begin_loc, loc.end(), + [&](auto out, lexy::visualization_options) { + return lexy::_detail::write_str(out, fmt::format(fmt::runtime(fmt), std::forward(args)...).c_str()); + }); + return *this; + } + + template + [[nodiscard]] Writer& primary(NodeLocation loc, const CharT* fmt, Args&&... args) { + return annotation(AnnotationKind::primary, loc, fmt, std::forward(args)...); + } + + template + [[nodiscard]] Writer& secondary(NodeLocation loc, const CharT* fmt, Args&&... args) { + return annotation(AnnotationKind::secondary, loc, fmt, std::forward(args)...); + } + + void finish() {} + + private: + Writer(const file_type* file, detail::OStreamOutputIterator& stream_iterator) + : _file(file), + _impl(file->buffer(), { lexy::visualize_fancy }), + _stream_iterator(stream_iterator) {} + + const file_type* _file; + lexy_ext::diagnostic_writer> _impl; + detail::OStreamOutputIterator& _stream_iterator; + + friend BasicDiagnosticLogger; + }; + + template + Writer log(DiagnosticKind kind, const CharT* fmt, Args&&... args) { + Writer result(_file, _stream_iterator); + + result._impl.write_message(_stream_iterator, kind, + [&](auto out, lexy::visualization_options) { + return lexy::_detail::write_str(out, fmt::format(fmt::runtime(fmt), std::forward(args)...).c_str()); + }); + result._impl.write_path(_stream_iterator, _file->path()); + + if (kind == DiagnosticKind::error) + _errored = true; + if (kind == DiagnosticKind::warning) + _warned = true; + return result; + } + + template + Writer error(const CharT* fmt, Args&&... args) { + return log(DiagnosticKind::error, fmt, std::forward(args)...); + } + + template + Writer warning(const CharT* fmt, Args&&... args) { + return log(DiagnosticKind::warning, fmt, std::forward(args)...); + } + + template + Writer note(const CharT* fmt, Args&&... args) { + return log(DiagnosticKind::note, fmt, std::forward(args)...); + } + + template + Writer info(const CharT* fmt, Args&&... args) { + return log(DiagnosticKind::info, fmt, std::forward(args)...); + } + + template + Writer debug(const CharT* fmt, Args&&... args) { + return log(DiagnosticKind::debug, fmt, std::forward(args)...); + } + + template + Writer fixit(const CharT* fmt, Args&&... args) { + return log(DiagnosticKind::fixit, fmt, std::forward(args)...); + } + + template + Writer help(const CharT* fmt, Args&&... args) { + return log(DiagnosticKind::help, fmt, std::forward(args)...); + } + + auto error_callback() const { + return ovdl::detail::ReporError.path(_file->path()).to(_stream_iterator); + } + + private: + const file_type* _file; + detail::OStreamOutputIterator _stream_iterator; + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/File.hpp b/include/openvic-dataloader/File.hpp new file mode 100644 index 0000000..ccf09e9 --- /dev/null +++ b/include/openvic-dataloader/File.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include + +#include + +#include + +#include + +namespace ovdl { + template + concept IsEncoding = requires(T t) { + typename T::char_type; + typename T::int_type; + { T::template is_secondary_char_type() } -> std::same_as; + { T::eof() } -> std::same_as; + { T::to_int_type(typename T::char_type {}) } -> std::same_as; + }; + + struct File { + explicit File(const char* path); + + const char* path() const noexcept; + + protected: + const char* _path; + }; + + template + concept IsFile = + std::derived_from && IsEncoding && + requires(T t, const T::node_type* node, NodeLocation location) { + { t.buffer() } -> std::same_as&>; + { t.set_location(node, location) } -> std::same_as; + { t.location_of(node) } -> std::same_as; + }; + + template + struct BasicFile : File { + using encoding_type = EncodingT; + using node_type = NodeT; + + explicit BasicFile(const char* path, lexy::buffer&& buffer) + : File(path), + _buffer(LEXY_MOV(buffer)) {} + + explicit BasicFile(lexy::buffer&& buffer) + : File(""), + _buffer(LEXY_MOV(buffer)) {} + + const lexy::buffer& buffer() const { + return _buffer; + } + + void set_location(const node_type* n, NodeLocation loc) { + _map.insert(n, loc); + } + + NodeLocation location_of(const node_type* n) const { + auto result = _map.lookup(n); + DRYAD_ASSERT(result != nullptr, "every Node should have a NodeLocation"); + return *result; + } + + protected: + lexy::buffer _buffer; + dryad::node_map _map; + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/NodeLocation.hpp b/include/openvic-dataloader/NodeLocation.hpp new file mode 100644 index 0000000..a253b1c --- /dev/null +++ b/include/openvic-dataloader/NodeLocation.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include + +namespace ovdl { + struct NodeLocation { + const char* _begin = nullptr; + const char* _end = nullptr; + + NodeLocation(); + NodeLocation(const char* pos); + NodeLocation(const char* begin, const char* end); + + NodeLocation(const NodeLocation&) noexcept; + NodeLocation& operator=(const NodeLocation&); + + NodeLocation(NodeLocation&&); + NodeLocation& operator=(NodeLocation&&); + + const char* begin() const; + const char* end() const; + + bool is_synthesized() const; + + static NodeLocation make_from(const char* begin, const char* end); + }; + + struct FilePosition { + std::uint32_t start_line = -1, end_line = -1, start_column = -1, end_column = -1; + + inline constexpr bool is_empty() { return start_line == -1 && end_line == -1 && start_column == -1 && end_column == -1; } + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/ParseState.hpp b/include/openvic-dataloader/ParseState.hpp new file mode 100644 index 0000000..262c362 --- /dev/null +++ b/include/openvic-dataloader/ParseState.hpp @@ -0,0 +1,121 @@ +#pragma once + +#include +#include + +#include +#include + +#include + +namespace ovdl { + template + concept IsParseState = requires( + T t, + typename T::ast_type::file_type&& file, + std::ostream& error_stream, + lexy::buffer&& buffer, + const char* path // + ) { + requires IsAst; + requires std::derived_from; + { T { std::move(file), error_stream } } -> std::same_as; + { T { std::move(buffer), error_stream } } -> std::same_as; + { T { path, std::move(buffer), error_stream } } -> std::same_as; + { t.ast() } -> std::same_as; + { const_cast(t).ast() } -> std::same_as; + { t.logger() } -> std::same_as; + { const_cast(t).logger() } -> std::same_as; + }; + + template + struct ParseState { + using ast_type = AstT; + using diagnostic_logger_type = BasicDiagnosticLogger; + + ParseState(ast_type::file_type&& file, std::ostream& error_stream) + : _ast { std::move(file) }, + _logger { _ast.file(), error_stream } {} + + ParseState(lexy::buffer&& buffer, std::ostream& error_stream) + : ParseState(typename ast_type::file_type { std::move(buffer) }, error_stream) {} + + ParseState(const char* path, lexy::buffer&& buffer, std::ostream& error_stream) + : ParseState(typename ast_type::file_type { path, std::move(buffer) }, error_stream) {} + + ast_type& ast() { + return _ast; + } + + const ast_type& ast() const { + return _ast; + } + + diagnostic_logger_type& logger() { + return _logger; + } + + const diagnostic_logger_type& logger() const { + return _logger; + } + + private: + ast_type _ast; + diagnostic_logger_type _logger; + }; + + template + concept IsFileParseState = requires( + T t, + typename T::file_type&& file, + std::ostream& error_stream, + lexy::buffer&& buffer, + const char* path // + ) { + requires IsFile; + requires std::derived_from; + { T { std::move(file), error_stream } } -> std::same_as; + { T { std::move(buffer), error_stream } } -> std::same_as; + { T { path, std::move(buffer), error_stream } } -> std::same_as; + { t.file() } -> std::same_as; + { const_cast(t).file() } -> std::same_as; + { t.logger() } -> std::same_as; + { const_cast(t).logger() } -> std::same_as; + }; + + template + struct FileParseState { + using file_type = FileT; + using diagnostic_logger_type = BasicDiagnosticLogger; + + FileParseState(file_type&& file, std::ostream& error_stream) + : _file { file }, + _logger { file(), error_stream } {} + + FileParseState(lexy::buffer&& buffer, std::ostream& error_stream) + : FileParseState(file_type { std::move(buffer) }, error_stream) {} + + FileParseState(const char* path, lexy::buffer&& buffer, std::ostream& error_stream) + : FileParseState(file_type { path, std::move(buffer) }, error_stream) {} + + file_type& file() { + return _file; + } + + const file_type& file() const { + return _file; + } + + diagnostic_logger_type& logger() { + return _logger; + } + + const diagnostic_logger_type& logger() const { + return _logger; + } + + private: + file_type _file; + diagnostic_logger_type _logger; + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/csv/LineObject.hpp b/include/openvic-dataloader/csv/LineObject.hpp index c6d8e2a..b0c0eff 100644 --- a/include/openvic-dataloader/csv/LineObject.hpp +++ b/include/openvic-dataloader/csv/LineObject.hpp @@ -14,7 +14,7 @@ #include #include -#include +#include namespace ovdl::csv { /// LineObject should be able to recognize the differences between: diff --git a/include/openvic-dataloader/csv/Parser.hpp b/include/openvic-dataloader/csv/Parser.hpp index fffd92a..f1e0dd1 100644 --- a/include/openvic-dataloader/csv/Parser.hpp +++ b/include/openvic-dataloader/csv/Parser.hpp @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace ovdl::csv { enum class EncodingType { @@ -26,7 +26,9 @@ namespace ovdl::csv { constexpr Parser& load_from_file(const char* path); Parser& load_from_file(const std::filesystem::path& path); - constexpr Parser& load_from_file(const detail::Has_c_str auto& path); + constexpr Parser& load_from_file(const detail::HasCstr auto& path) { + return load_from_file(path.c_str()); + } bool parse_csv(bool handle_strings = false); diff --git a/include/openvic-dataloader/detail/ClassExport.hpp b/include/openvic-dataloader/detail/ClassExport.hpp deleted file mode 100644 index 27098ed..0000000 --- a/include/openvic-dataloader/detail/ClassExport.hpp +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#ifdef _MSC_VER -#define OVDL_EXPORT __declspec(dllexport) -#elif defined(__GNUC__) -#define OVDL_EXPORT __attribute__((visibility("default"))) -#else -#define OVDL_EXPORT -#endif \ No newline at end of file diff --git a/include/openvic-dataloader/detail/LexyReportError.hpp b/include/openvic-dataloader/detail/LexyReportError.hpp new file mode 100644 index 0000000..684b5db --- /dev/null +++ b/include/openvic-dataloader/detail/LexyReportError.hpp @@ -0,0 +1,102 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +namespace ovdl::detail { + template + struct _ReportError { + OutputIterator _iter; + lexy::visualization_options _opts; + const char* _path; + + struct _sink { + OutputIterator _iter; + lexy::visualization_options _opts; + const char* _path; + std::size_t _count; + std::vector _errors; + + using return_type = std::vector; + + template + void operator()(const lexy::error_context& context, const lexy::error& error) { + _iter = lexy_ext::_detail::write_error(_iter, context, error, _opts, _path); + ++_count; + + // Convert the context location and error location into line/column information. + auto context_location = lexy::get_input_location(context.input(), context.position()); + auto location = lexy::get_input_location(context.input(), error.position(), context_location.anchor()); + + std::basic_stringstream message; + + // Write the main annotation. + if constexpr (std::is_same_v) { + auto string = lexy::_detail::make_literal_lexeme(error.string(), error.length()); + + message << "expected '" << string.data() << '\''; + } else if constexpr (std::is_same_v) { + auto string = lexy::_detail::make_literal_lexeme(error.string(), error.length()); + + message << "expected keyword '" << string.data() << '\''; + } else if constexpr (std::is_same_v) { + message << "expected " << error.name(); + } else { + message << error.message(); + } + + _errors.push_back( + ParseError { + ParseError::Type::Fatal, // TODO: distinguish recoverable errors from fatal errors + std::move(message.str()), + 0, // TODO: implement proper error codes + ParseData { + context.production(), + context_location.line_nr(), + context_location.column_nr(), + }, + location.line_nr(), + location.column_nr(), + }); + } + + return_type finish() && { + if (_count != 0) + *_iter++ = '\n'; + return _errors; + } + }; + constexpr auto sink() const { + return _sink { _iter, _opts, _path, 0 }; + } + + /// Specifies a path that will be printed alongside the diagnostic. + constexpr _ReportError path(const char* path) const { + return { _iter, _opts, path }; + } + + /// Specifies an output iterator where the errors are written to. + template + constexpr _ReportError to(OI out) const { + return { out, _opts, _path }; + } + + /// Overrides visualization options. + constexpr _ReportError opts(lexy::visualization_options opts) const { + return { _iter, opts, _path }; + } + }; + + constexpr auto ReporError = _ReportError {}; +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/OStreamOutputIterator.hpp b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp new file mode 100644 index 0000000..81f6c89 --- /dev/null +++ b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace ovdl::detail { + struct OStreamOutputIterator { + std::reference_wrapper _stream; + + auto operator*() const noexcept { + return *this; + } + auto operator++(int) const noexcept { + return *this; + } + + OStreamOutputIterator& operator=(char c) { + _stream.get().put(c); + return *this; + } + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Concepts.hpp b/include/openvic-dataloader/detail/utility/Concepts.hpp new file mode 100644 index 0000000..c149edb --- /dev/null +++ b/include/openvic-dataloader/detail/utility/Concepts.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include + +namespace ovdl { + struct NodeLocation; + struct File; + namespace detail { + enum class buffer_error : std::uint8_t; + } +} + +namespace ovdl::detail { + template + concept any_of = (std::same_as || ...); + + template + concept HasCstr = + requires(T t) { + { t.c_str() } -> std::same_as; + }; + + template + concept HasPath = requires(const T* t) { + { t->path() } -> std::convertible_to; + }; + + template + concept LoadCallback = + requires(T&& t, Self&& self, Args&&... args) { + { std::invoke(std::forward(t), std::forward(self), std::forward(args)...) } -> std::same_as; + }; + + template + concept IsEncoding = requires(T t) { + typename T::char_type; + typename T::int_type; + { T::template is_secondary_char_type() } -> std::same_as; + { T::eof() } -> std::same_as; + { T::to_int_type(typename T::char_type {}) } -> std::same_as; + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Constexpr.hpp b/include/openvic-dataloader/detail/utility/Constexpr.hpp new file mode 100644 index 0000000..49479c5 --- /dev/null +++ b/include/openvic-dataloader/detail/utility/Constexpr.hpp @@ -0,0 +1,15 @@ +#pragma once + +// THANK YOU APPLE FOR YOUR UTTER DISREGARD FOR C++20 + +#if __cpp_lib_optional >= 202106L +#define OVDL_OPTIONAL_CONSTEXPR constexpr +#else +#define OVDL_OPTIONAL_CONSTEXPR inline +#endif + +#if __cpp_lib_constexpr_vector >= 201907L +#define OVDL_VECTOR_CONSTEXPR constexpr +#else +#define OVDL_VECTOR_CONSTEXPR inline +#endif \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/TypeName.hpp b/include/openvic-dataloader/detail/utility/TypeName.hpp new file mode 100644 index 0000000..1a34a0f --- /dev/null +++ b/include/openvic-dataloader/detail/utility/TypeName.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include + +namespace ovdl::detail { + + template + constexpr auto substring_as_array(std::string_view str, std::index_sequence) { + return std::array { str[Idxs]... }; + } + + template + constexpr auto type_name_array() { +#if defined(__clang__) + constexpr auto prefix = std::string_view { "[T = " }; + constexpr auto suffix = std::string_view { "]" }; + constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; +#elif defined(__GNUC__) + constexpr auto prefix = std::string_view { "with T = " }; + constexpr auto suffix = std::string_view { "]" }; + constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; +#elif defined(_MSC_VER) + constexpr auto prefix = std::string_view { "type_name_array<" }; + constexpr auto suffix = std::string_view { ">(void)" }; + constexpr auto function = std::string_view { __FUNCSIG__ }; +#else +#error Unsupported compiler +#endif + + constexpr auto start = function.find(prefix) + prefix.size(); + constexpr auto end = function.rfind(suffix); + + static_assert(start < end); + + constexpr auto name = function.substr(start, (end - start)); + return substring_as_array(name, std::make_index_sequence {}); + } + + template + struct type_name_holder { + static inline constexpr auto value = type_name_array(); + }; + + template + constexpr auto type_name() -> std::string_view { + constexpr auto& value = type_name_holder::value; + return std::string_view { value.data(), value.size() }; + } +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Utility.hpp b/include/openvic-dataloader/detail/utility/Utility.hpp new file mode 100644 index 0000000..138a029 --- /dev/null +++ b/include/openvic-dataloader/detail/utility/Utility.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +#include "openvic-dataloader/detail/utility/TypeName.hpp" + +namespace ovdl::detail { + [[noreturn]] inline void unreachable() { + // Uses compiler specific extensions if possible. + // Even if no extension is used, undefined behavior is still raised by + // an empty function body and the noreturn attribute. +#ifdef __GNUC__ // GCC, Clang, ICC + __builtin_unreachable(); +#elif defined(_MSC_VER) // MSVC + __assume(false); +#endif + } + + template + constexpr std::string_view get_kind_name() { + constexpr auto name = type_name(); + + return name; + } + + template + requires std::is_enum_v + constexpr std::underlying_type_t to_underlying(EnumT e) { + return static_cast>(e); + } + + template + requires std::is_enum_v + constexpr EnumT from_underlying(std::underlying_type_t ut) { + return static_cast(ut); + } +} \ No newline at end of file diff --git a/include/openvic-dataloader/v2script/Parser.hpp b/include/openvic-dataloader/v2script/Parser.hpp index 5e9717f..fbea184 100644 --- a/include/openvic-dataloader/v2script/Parser.hpp +++ b/include/openvic-dataloader/v2script/Parser.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include namespace ovdl::v2script { @@ -35,7 +35,9 @@ namespace ovdl::v2script { constexpr Parser& load_from_file(const char* path); Parser& load_from_file(const std::filesystem::path& path); - constexpr Parser& load_from_file(const detail::Has_c_str auto& path); + constexpr Parser& load_from_file(const detail::HasCstr auto& path) { + return load_from_file(path.c_str()); + } bool simple_parse(); bool event_parse(); diff --git a/src/openvic-dataloader/AbstractSyntaxTree.cpp b/src/openvic-dataloader/AbstractSyntaxTree.cpp new file mode 100644 index 0000000..11a90dc --- /dev/null +++ b/src/openvic-dataloader/AbstractSyntaxTree.cpp @@ -0,0 +1,27 @@ +#include + +using namespace ovdl; + +AbstractSyntaxTree::symbol_type AbstractSyntaxTree::intern(const char* str, std::size_t length) { + return _symbol_interner.intern(str, length); +} + +AbstractSyntaxTree::symbol_type AbstractSyntaxTree::intern(std::string_view str) { + return intern(str.data(), str.size()); +} + +const char* AbstractSyntaxTree::intern_cstr(const char* str, std::size_t length) { + return intern(str, length).c_str(_symbol_interner); +} + +const char* AbstractSyntaxTree::intern_cstr(std::string_view str) { + return intern_cstr(str.data(), str.size()); +} + +AbstractSyntaxTree::symbol_interner_type& AbstractSyntaxTree::symbol_interner() { + return _symbol_interner; +} + +const AbstractSyntaxTree::symbol_interner_type& AbstractSyntaxTree::symbol_interner() const { + return _symbol_interner; +} \ No newline at end of file diff --git a/src/openvic-dataloader/DiagnosticLogger.cpp b/src/openvic-dataloader/DiagnosticLogger.cpp new file mode 100644 index 0000000..d303d14 --- /dev/null +++ b/src/openvic-dataloader/DiagnosticLogger.cpp @@ -0,0 +1,10 @@ +#include + +using namespace ovdl; + +DiagnosticLogger::operator bool() const { + return !_errored; +} + +bool DiagnosticLogger::errored() const { return _errored; } +bool DiagnosticLogger::warned() const { return _warned; } \ No newline at end of file diff --git a/src/openvic-dataloader/File.cpp b/src/openvic-dataloader/File.cpp new file mode 100644 index 0000000..9b27bf0 --- /dev/null +++ b/src/openvic-dataloader/File.cpp @@ -0,0 +1,9 @@ +#include + +using namespace ovdl; + +File::File(const char* path) : _path(path) {} + +const char* File::path() const noexcept { + return _path; +} \ No newline at end of file diff --git a/src/openvic-dataloader/NodeLocation.cpp b/src/openvic-dataloader/NodeLocation.cpp new file mode 100644 index 0000000..9e4f669 --- /dev/null +++ b/src/openvic-dataloader/NodeLocation.cpp @@ -0,0 +1,26 @@ +#include + +using namespace ovdl; + +NodeLocation::NodeLocation() = default; +NodeLocation::NodeLocation(const char* pos) : _begin(pos), + _end(pos) {} +NodeLocation::NodeLocation(const char* begin, const char* end) : _begin(begin), + _end(end) {} + +NodeLocation::NodeLocation(const NodeLocation&) noexcept = default; +NodeLocation& NodeLocation::operator=(const NodeLocation&) = default; + +NodeLocation::NodeLocation(NodeLocation&&) = default; +NodeLocation& NodeLocation::operator=(NodeLocation&&) = default; + +const char* NodeLocation::begin() const { return _begin; } +const char* NodeLocation::end() const { return _end; } + +bool NodeLocation::is_synthesized() const { return _begin == nullptr && _end == nullptr; } + +NodeLocation NodeLocation::make_from(const char* begin, const char* end) { + end++; + if (begin >= end) return NodeLocation(begin); + return NodeLocation(begin, end); +} diff --git a/src/openvic-dataloader/csv/CsvGrammar.hpp b/src/openvic-dataloader/csv/CsvGrammar.hpp index aef513b..6e4b463 100644 --- a/src/openvic-dataloader/csv/CsvGrammar.hpp +++ b/src/openvic-dataloader/csv/CsvGrammar.hpp @@ -11,7 +11,7 @@ #include #include -#include "detail/LexyLitRange.hpp" +#include "detail/dsl.hpp" // Grammar Definitions // namespace ovdl::csv::grammar { diff --git a/src/openvic-dataloader/csv/Parser.cpp b/src/openvic-dataloader/csv/Parser.cpp index 14ef553..da0dfa6 100644 --- a/src/openvic-dataloader/csv/Parser.cpp +++ b/src/openvic-dataloader/csv/Parser.cpp @@ -3,7 +3,8 @@ #include #include -#include +#include +#include #include #include @@ -159,11 +160,6 @@ Parser& Parser::load_from_file(const std::filesystem::path& return load_from_file(path.string().c_str()); } -template -constexpr Parser& Parser::load_from_file(const detail::Has_c_str auto& path) { - return load_from_file(path.c_str()); -} - template bool Parser::parse_csv(bool handle_strings) { if (!_buffer_handler->is_valid()) { diff --git a/src/openvic-dataloader/detail/BufferError.hpp b/src/openvic-dataloader/detail/BufferError.hpp new file mode 100644 index 0000000..1fbb0f4 --- /dev/null +++ b/src/openvic-dataloader/detail/BufferError.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace ovdl::detail { + enum class buffer_error : std::uint8_t { + success, + /// An internal OS error, such as failure to read from the file. + os_error, + /// The file was not found. + file_not_found, + /// The file cannot be opened. + permission_denied, + /// The buffer failed to handle the data + buffer_is_null + }; +} \ No newline at end of file diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp index 2045b3c..e9d0350 100644 --- a/src/openvic-dataloader/detail/DetectUtf8.hpp +++ b/src/openvic-dataloader/detail/DetectUtf8.hpp @@ -3,7 +3,7 @@ #include #include -#include "detail/LexyLitRange.hpp" +#include "detail/dsl.hpp" namespace ovdl::detail { namespace detect_utf8 { @@ -18,15 +18,15 @@ namespace ovdl::detail { constexpr auto is_not_ascii_flag = lexy::dsl::context_flag; // & 0b10000000 == 0b00000000 - constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>(); + constexpr auto ascii_values = dsl::make_range<0b00000000, 0b01111111>(); // & 0b11100000 == 0b11000000 - constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>(); + constexpr auto two_byte = dsl::make_range<0b11000000, 0b11011111>(); // & 0b11110000 == 0b11100000 - constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>(); + constexpr auto three_byte = dsl::make_range<0b11100000, 0b11101111>(); // & 0b11111000 == 0b11110000 - constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>(); + constexpr auto four_byte = dsl::make_range<0b11110000, 0b11110111>(); // & 0b11000000 == 0b10000000 - constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>(); + constexpr auto check_bytes = dsl::make_range<0b10000000, 0b10111111>(); constexpr auto utf8_check = ((four_byte >> lexy::dsl::times<3>(check_bytes)) | diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp deleted file mode 100644 index a6761a8..0000000 --- a/src/openvic-dataloader/detail/LexyLitRange.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace ovdl::detail::lexydsl { - template - consteval auto make_range() { - if constexpr (LOW == HIGH) { - return lexy::dsl::lit_c; - } else if constexpr (LOW == (HIGH - 1)) { - return lexy::dsl::lit_c / lexy::dsl::lit_c; - } else { - return lexy::dsl::lit_c / make_range(); - } - } -} \ No newline at end of file diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp index baf9e1b..e5c6f4a 100644 --- a/src/openvic-dataloader/detail/NullBuff.hpp +++ b/src/openvic-dataloader/detail/NullBuff.hpp @@ -22,9 +22,9 @@ namespace ovdl::detail { basic_nullbuf m_sbuf; }; - typedef basic_onullstream onullstream; - typedef basic_onullstream wonullstream; + using onullstream = basic_onullstream; + using wonullstream = basic_onullstream; - inline onullstream cnull; - inline onullstream wcnull; + static inline onullstream cnull; + static inline onullstream wcnull; } \ No newline at end of file diff --git a/src/openvic-dataloader/detail/ParseHandler.hpp b/src/openvic-dataloader/detail/ParseHandler.hpp new file mode 100644 index 0000000..2464b3c --- /dev/null +++ b/src/openvic-dataloader/detail/ParseHandler.hpp @@ -0,0 +1,150 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "detail/BufferError.hpp" + +namespace ovdl::detail { + template + struct ParseHandler { + std::string make_error_from(buffer_error error) + requires HasPath + { + switch (error) { + using enum ovdl::detail::buffer_error; + case buffer_is_null: + return "Buffer could not be loaded."; + case os_error: + return "OS file error for '" + std::string(static_cast(this)->path()) + "'."; + case file_not_found: + return "'" + std::string(static_cast(this)->path()) + "' was not found."; + case permission_denied: + return "Permission denied for reading '" + std::string(static_cast(this)->path()) + "' ."; + default: + return ""; + } + } + + template + constexpr void _run_load_func(detail::LoadCallback auto func, Args... args); + }; + + template + struct BasicFileParseHandler : ParseHandler> { + using parse_state_type = ParseState; + using encoding_type = typename parse_state_type::file_type::encoding_type; + + constexpr bool is_valid() const { + if (!_parse_state) return false; + return buffer().data() != nullptr; + } + + constexpr buffer_error load_buffer_size(std::ostream& error_stream, const char* data, std::size_t size) { + lexy::buffer buffer(data, size); + if (buffer.data() == nullptr) return buffer_error::buffer_is_null; + _parse_state.reset(new parse_state_type { std::move(buffer), error_stream }); + return is_valid() ? buffer_error::success : buffer_error::buffer_is_null; + } + + constexpr buffer_error load_buffer(std::ostream& error_stream, const char* start, const char* end) { + lexy::buffer buffer(start, end); + if (buffer.data() == nullptr) return buffer_error::buffer_is_null; + _parse_state.reset(new parse_state_type { std::move(buffer), error_stream }); + return is_valid() ? buffer_error::success : buffer_error::buffer_is_null; + } + + buffer_error load_file(std::ostream& error_stream, const char* path) { + lexy::read_file_result file = lexy::read_file(path); + if (!file) { + return ovdl::detail::from_underlying(ovdl::detail::to_underlying(file.error())); + } + + _parse_state.reset(new parse_state_type { path, std::move(file).buffer() }); + return is_valid() ? buffer_error::success : buffer_error::buffer_is_null; + } + + const char* path() const { + if (!_parse_state) return ""; + return _parse_state->file().path(); + } + + parse_state_type& parse_state() { + return *_parse_state; + } + + const parse_state_type& parse_state() const { + return *_parse_state; + } + + constexpr const auto& buffer() const { + return _parse_state->file().buffer(); + } + + protected: + std::unique_ptr _parse_state; + }; + + template + struct BasicStateParseHandler : ParseHandler> { + using parse_state_type = ParseState; + using encoding_type = typename parse_state_type::ast_type::file_type::encoding_type; + + constexpr bool is_valid() const { + if (!_parse_state) return false; + return buffer().data() != nullptr; + } + + constexpr buffer_error load_buffer_size(std::ostream& error_stream, const char* data, std::size_t size) { + lexy::buffer buffer(data, size); + _parse_state.reset(new parse_state_type { std::move(buffer), error_stream }); + return is_valid() ? buffer_error::success : buffer_error::buffer_is_null; + } + + constexpr buffer_error load_buffer(std::ostream& error_stream, const char* start, const char* end) { + lexy::buffer buffer(start, end); + _parse_state.reset(new parse_state_type { std::move(buffer), error_stream }); + return is_valid() ? buffer_error::success : buffer_error::buffer_is_null; + } + + buffer_error load_file(std::ostream& error_stream, const char* path) { + lexy::read_file_result file = lexy::read_file(path); + if (!file) { + return ovdl::detail::from_underlying(ovdl::detail::to_underlying(file.error())); + } + + _parse_state.reset(new parse_state_type { path, std::move(file).buffer(), error_stream }); + return is_valid() ? buffer_error::success : buffer_error::buffer_is_null; + } + + const char* path() const { + if (!_parse_state) return ""; + return _parse_state->ast().file().path(); + } + + parse_state_type& parse_state() { + return *_parse_state; + } + + const parse_state_type& parse_state() const { + return *_parse_state; + } + + constexpr const auto& buffer() const { + return _parse_state->ast().file().buffer(); + } + + protected: + std::unique_ptr _parse_state; + }; +} \ No newline at end of file diff --git a/src/openvic-dataloader/detail/StringLiteral.hpp b/src/openvic-dataloader/detail/StringLiteral.hpp new file mode 100644 index 0000000..ae172a1 --- /dev/null +++ b/src/openvic-dataloader/detail/StringLiteral.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace ovdl::detail { + + template + struct string_literal; + + struct _string_literal { + protected: + static LEXY_CONSTEVAL auto _to_string(const auto& input) { return string_literal(input); } + static LEXY_CONSTEVAL auto _concat(const auto&... input) { return string_literal(_to_string(input)...); } + }; + + template + struct string_literal : _string_literal { + CharT data[N]; + + using char_type = CharT; + + LEXY_CONSTEVAL string_literal(const CharT* str) : data {} { + for (auto i = 0u; i != N; ++i) + data[i] = str[i]; + } + LEXY_CONSTEVAL string_literal(CharT c) : data {} { + data[0] = c; + } + + LEXY_CONSTEVAL string_literal(const auto&... input) + requires(sizeof...(input) > 1) + { + _copy(_to_string(input)...); + } + + template + LEXY_CONSTEVAL void _copy(const string_literal(&... input)) { + auto pos = data; + ((pos = std::copy_n(input.data, Ni - 1, pos)), ...); + *pos = 0; + } + + template typename container, typename... T> + LEXY_CONSTEVAL string_literal(const container& input) { + std::apply([this](const auto&... s) constexpr { _copy(_to_string(s)...); }, input); + } + + LEXY_CONSTEVAL auto operator+(const auto& rhs) const { + return _concat(*this, rhs); + } + + friend LEXY_CONSTEVAL auto operator+(const auto& lhs, string_literal rhs) { + return _concat(lhs, rhs); + } + + LEXY_CONSTEVAL operator const char*() const { return data; } + + static LEXY_CONSTEVAL auto size() { + return N; + } + }; + template + string_literal(const CharT (&)[N]) -> string_literal; + template + string_literal(CharT) -> string_literal<1, CharT>; + + string_literal(const auto&... input) + -> string_literal<((sizeof(input.data) - 1) + ... + 1), char>; + + template typename T, string_literal Str, std::size_t... Idx> + auto _to_type_string(lexy::_detail::index_sequence) { + return T {}; + } + template typename T, string_literal Str> + using to_type_string = decltype(_to_type_string(lexy::_detail::make_index_sequence {})); +} + +namespace ovdl::dsl { + template + constexpr auto keyword(lexyd::_id) { + return ovdl::detail::to_type_string>::template get, Str> {}; + } +} \ No newline at end of file diff --git a/src/openvic-dataloader/detail/dsl.hpp b/src/openvic-dataloader/detail/dsl.hpp new file mode 100644 index 0000000..bd78b95 --- /dev/null +++ b/src/openvic-dataloader/detail/dsl.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +namespace ovdl::dsl { + template + constexpr auto callback(Callback... cb) { + return lexy::bind(lexy::callback(cb...), lexy::parse_state, lexy::values); + } + + template + constexpr auto sink(Sink&& sink) { + return lexy::bind_sink(sink, lexy::parse_state); + } + + template + constexpr auto construct = callback( + [](StateType& state, ovdl::NodeLocation loc, auto&& arg) { + if constexpr (std::is_same_v, lexy::nullopt>) + return state.ast().template create(loc); + else + return state.ast().template create(loc, DRYAD_FWD(arg)); + }, + [](StateType& state, ovdl::NodeLocation loc, auto&& arg, auto&&... args) { + return state.ast().template create(loc, DRYAD_FWD(arg), DRYAD_FWD(args)...); + }); + + template + consteval auto make_range() { + if constexpr (LOW == HIGH) { + return ::lexy::dsl::lit_c; + } else if constexpr (LOW == (HIGH - 1)) { + return ::lexy::dsl::lit_c / ::lexy::dsl::lit_c; + } else { + return ::lexy::dsl::lit_c / make_range(); + } + } +} \ No newline at end of file diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp index d7e4106..94c90f8 100644 --- a/src/openvic-dataloader/v2script/Parser.cpp +++ b/src/openvic-dataloader/v2script/Parser.cpp @@ -10,7 +10,9 @@ #include #include -#include +#include +#include +#include #include #include diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp index 99bbfca..96a7bf5 100644 --- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp +++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp @@ -9,7 +9,7 @@ #include #include -#include "detail/LexyLitRange.hpp" +#include "detail/dsl.hpp" // Grammar Definitions // /* REQUIREMENTS: @@ -48,10 +48,10 @@ namespace ovdl::v2script::grammar { lexy::dsl::ascii::alpha_digit_underscore / LEXY_ASCII_ONE_OF("+:@%&'-.") / lexy::dsl::lit_b<0x8A> / lexy::dsl::lit_b<0x8C> / lexy::dsl::lit_b<0x8E> / lexy::dsl::lit_b<0x92> / lexy::dsl::lit_b<0x97> / lexy::dsl::lit_b<0x9A> / lexy::dsl::lit_b<0x9C> / - detail::lexydsl::make_range<0x9E, 0x9F>() / - detail::lexydsl::make_range<0xC0, 0xD6>() / - detail::lexydsl::make_range<0xD8, 0xF6>() / - detail::lexydsl::make_range<0xF8, 0xFF>(); + dsl::make_range<0x9E, 0x9F>() / + dsl::make_range<0xC0, 0xD6>() / + dsl::make_range<0xD8, 0xF6>() / + dsl::make_range<0xF8, 0xFF>(); static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier); @@ -84,11 +84,11 @@ namespace ovdl::v2script::grammar { struct StringExpression { static constexpr auto rule = [] { if constexpr (Options.NoStringEscape) { - auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>; + auto c = dsl::make_range<0x20, 0xFF>() / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>; return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c); } else { // Arbitrary code points that aren't control characters. - auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; + auto c = dsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; // Escape sequences start with a backlash. // They either map one of the symbols,