From d5895cf739ec2bcf6dc541cc62e192a16cb9a48d Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Thu, 23 Jan 2025 00:33:21 +0000 Subject: [PATCH 1/4] Update vendored DuckDB sources to 036aade4 --- CMakeLists.txt | 2 +- .../core_functions/function_list.cpp | 3 + .../core_functions/scalar/date_functions.hpp | 24 ++++ .../core_functions/scalar/date/current.cpp | 38 +++++++ .../scalar/generic/can_implicitly_cast.cpp | 4 +- .../core_functions/scalar/generic/typeof.cpp | 2 +- .../json/json_functions/json_structure.cpp | 4 +- .../parquet/include/resizable_buffer.hpp | 1 + .../extension/parquet/parquet_extension.cpp | 24 ++-- .../src/catalog/default/default_functions.cpp | 3 - src/duckdb/src/common/adbc/adbc.cpp | 57 +--------- .../src/common/compressed_file_system.cpp | 2 +- src/duckdb/src/common/gzip_file_system.cpp | 16 +-- .../aggregate/physical_streaming_window.cpp | 106 ++++++++++-------- .../scanner/column_count_scanner.cpp | 18 +++ .../scanner/string_value_scanner.cpp | 41 ++++--- .../csv_scanner/sniffer/header_detection.cpp | 14 ++- .../src/execution/sample/reservoir_sample.cpp | 4 - src/duckdb/src/function/function_binder.cpp | 2 +- src/duckdb/src/function/table/read_csv.cpp | 2 +- .../table/system/duckdb_extensions.cpp | 4 + .../function/table/version/pragma_version.cpp | 6 +- .../operator/csv_scanner/base_scanner.hpp | 37 +++++- .../csv_scanner/column_count_scanner.hpp | 3 + .../csv_scanner/string_value_scanner.hpp | 33 +----- .../duckdb/function/scalar_function.hpp | 6 +- .../src/include/duckdb/main/connection.hpp | 8 -- .../src/include/duckdb/main/extension.hpp | 1 + .../include/duckdb/main/extension_entries.hpp | 31 ++--- src/duckdb/src/main/connection.cpp | 31 +---- src/duckdb/src/main/extension.cpp | 36 ++++-- .../src/main/extension/extension_helper.cpp | 3 +- .../src/main/extension/extension_load.cpp | 10 +- .../expression/bind_columnref_expression.cpp | 12 +- .../binder/tableref/bind_table_function.cpp | 4 +- .../expression/bound_function_expression.cpp | 18 ++- .../ub_extension_icu_third_party_icu_i18n.cpp | 12 +- 37 files changed, 339 insertions(+), 283 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db9f68653..09b82076d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ add_definitions(-DDUCKDB_EXTENSION_AUTOLOAD_DEFAULT=1 -DDUCKDB_EXTENSION_AUTOINS file(GLOB_RECURSE JAVA_SRC_FILES src/main/java/org/duckdb/*.java) file(GLOB_RECURSE JAVA_TEST_FILES src/test/java/org/duckdb/*.java) -set(DUCKDB_SRC_FILES src/duckdb/ub_src_catalog.cpp src/duckdb/ub_src_catalog_catalog_entry.cpp src/duckdb/ub_src_catalog_catalog_entry_dependency.cpp src/duckdb/ub_src_catalog_default.cpp src/duckdb/ub_src_common_adbc.cpp src/duckdb/ub_src_common_adbc_nanoarrow.cpp src/duckdb/ub_src_common.cpp src/duckdb/ub_src_common_arrow_appender.cpp src/duckdb/ub_src_common_arrow.cpp src/duckdb/ub_src_common_crypto.cpp src/duckdb/ub_src_common_enums.cpp src/duckdb/ub_src_common_exception.cpp src/duckdb/ub_src_common_operator.cpp src/duckdb/ub_src_common_progress_bar.cpp src/duckdb/ub_src_common_row_operations.cpp src/duckdb/ub_src_common_serializer.cpp src/duckdb/ub_src_common_sort.cpp src/duckdb/ub_src_common_tree_renderer.cpp src/duckdb/ub_src_common_types.cpp src/duckdb/ub_src_common_types_column.cpp src/duckdb/ub_src_common_types_row.cpp src/duckdb/ub_src_common_value_operations.cpp src/duckdb/src/common/vector_operations/boolean_operators.cpp src/duckdb/src/common/vector_operations/comparison_operators.cpp src/duckdb/src/common/vector_operations/generators.cpp src/duckdb/src/common/vector_operations/is_distinct_from.cpp src/duckdb/src/common/vector_operations/null_operations.cpp src/duckdb/src/common/vector_operations/numeric_inplace_operators.cpp src/duckdb/src/common/vector_operations/vector_cast.cpp src/duckdb/src/common/vector_operations/vector_copy.cpp src/duckdb/src/common/vector_operations/vector_hash.cpp src/duckdb/src/common/vector_operations/vector_storage.cpp src/duckdb/ub_src_execution.cpp src/duckdb/ub_src_execution_expression_executor.cpp src/duckdb/ub_src_execution_index_art.cpp src/duckdb/ub_src_execution_index.cpp src/duckdb/ub_src_execution_nested_loop_join.cpp src/duckdb/ub_src_execution_operator_aggregate.cpp src/duckdb/ub_src_execution_operator_csv_scanner_buffer_manager.cpp src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp src/duckdb/ub_src_execution_operator_csv_scanner_state_machine.cpp src/duckdb/ub_src_execution_operator_csv_scanner_table_function.cpp src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp src/duckdb/ub_src_execution_operator_filter.cpp src/duckdb/ub_src_execution_operator_helper.cpp src/duckdb/ub_src_execution_operator_join.cpp src/duckdb/ub_src_execution_operator_order.cpp src/duckdb/ub_src_execution_operator_persistent.cpp src/duckdb/ub_src_execution_operator_projection.cpp src/duckdb/ub_src_execution_operator_scan.cpp src/duckdb/ub_src_execution_operator_schema.cpp src/duckdb/ub_src_execution_operator_set.cpp src/duckdb/ub_src_execution_physical_plan.cpp src/duckdb/ub_src_execution_sample.cpp src/duckdb/ub_src_function_aggregate_distributive.cpp src/duckdb/ub_src_function_aggregate.cpp src/duckdb/ub_src_function.cpp src/duckdb/ub_src_function_cast.cpp src/duckdb/ub_src_function_cast_union.cpp src/duckdb/ub_src_function_pragma.cpp src/duckdb/ub_src_function_scalar_compressed_materialization.cpp src/duckdb/ub_src_function_scalar.cpp src/duckdb/ub_src_function_scalar_date.cpp src/duckdb/ub_src_function_scalar_generic.cpp src/duckdb/ub_src_function_scalar_list.cpp src/duckdb/ub_src_function_scalar_map.cpp src/duckdb/ub_src_function_scalar_operator.cpp src/duckdb/ub_src_function_scalar_sequence.cpp src/duckdb/ub_src_function_scalar_string.cpp src/duckdb/ub_src_function_scalar_string_regexp.cpp src/duckdb/ub_src_function_scalar_struct.cpp src/duckdb/ub_src_function_scalar_system.cpp src/duckdb/ub_src_function_table_arrow.cpp src/duckdb/ub_src_function_table.cpp src/duckdb/ub_src_function_table_system.cpp src/duckdb/ub_src_function_table_version.cpp src/duckdb/ub_src_function_window.cpp src/duckdb/ub_src_logging.cpp src/duckdb/ub_src_main.cpp src/duckdb/ub_src_main_buffered_data.cpp src/duckdb/ub_src_main_capi.cpp src/duckdb/ub_src_main_capi_cast.cpp src/duckdb/ub_src_main_chunk_scan_state.cpp src/duckdb/ub_src_main_extension.cpp src/duckdb/ub_src_main_relation.cpp src/duckdb/ub_src_main_secret.cpp src/duckdb/ub_src_main_settings.cpp src/duckdb/ub_src_optimizer.cpp src/duckdb/ub_src_optimizer_compressed_materialization.cpp src/duckdb/ub_src_optimizer_join_order.cpp src/duckdb/ub_src_optimizer_matcher.cpp src/duckdb/ub_src_optimizer_pullup.cpp src/duckdb/ub_src_optimizer_pushdown.cpp src/duckdb/ub_src_optimizer_rule.cpp src/duckdb/ub_src_optimizer_statistics_expression.cpp src/duckdb/ub_src_optimizer_statistics_operator.cpp src/duckdb/ub_src_parallel.cpp src/duckdb/ub_src_parser.cpp src/duckdb/ub_src_parser_constraints.cpp src/duckdb/ub_src_parser_expression.cpp src/duckdb/ub_src_parser_parsed_data.cpp src/duckdb/ub_src_parser_query_node.cpp src/duckdb/ub_src_parser_statement.cpp src/duckdb/ub_src_parser_tableref.cpp src/duckdb/ub_src_parser_transform_constraint.cpp src/duckdb/ub_src_parser_transform_expression.cpp src/duckdb/ub_src_parser_transform_helpers.cpp src/duckdb/ub_src_parser_transform_statement.cpp src/duckdb/ub_src_parser_transform_tableref.cpp src/duckdb/ub_src_planner.cpp src/duckdb/ub_src_planner_binder_expression.cpp src/duckdb/ub_src_planner_binder_query_node.cpp src/duckdb/ub_src_planner_binder_statement.cpp src/duckdb/ub_src_planner_binder_tableref.cpp src/duckdb/ub_src_planner_expression.cpp src/duckdb/ub_src_planner_expression_binder.cpp src/duckdb/ub_src_planner_filter.cpp src/duckdb/ub_src_planner_operator.cpp src/duckdb/ub_src_planner_subquery.cpp src/duckdb/ub_src_storage.cpp src/duckdb/ub_src_storage_buffer.cpp src/duckdb/ub_src_storage_checkpoint.cpp src/duckdb/ub_src_storage_compression_alp.cpp src/duckdb/ub_src_storage_compression.cpp src/duckdb/ub_src_storage_compression_chimp.cpp src/duckdb/ub_src_storage_compression_dictionary.cpp src/duckdb/ub_src_storage_compression_roaring.cpp src/duckdb/ub_src_storage_metadata.cpp src/duckdb/ub_src_storage_serialization.cpp src/duckdb/ub_src_storage_statistics.cpp src/duckdb/ub_src_storage_table.cpp src/duckdb/ub_src_transaction.cpp src/duckdb/src/verification/copied_statement_verifier.cpp src/duckdb/src/verification/deserialized_statement_verifier.cpp src/duckdb/src/verification/external_statement_verifier.cpp src/duckdb/src/verification/fetch_row_verifier.cpp src/duckdb/src/verification/no_operator_caching_verifier.cpp src/duckdb/src/verification/parsed_statement_verifier.cpp src/duckdb/src/verification/prepared_statement_verifier.cpp src/duckdb/src/verification/statement_verifier.cpp src/duckdb/src/verification/unoptimized_statement_verifier.cpp src/duckdb/third_party/fmt/format.cc src/duckdb/third_party/fsst/libfsst.cpp src/duckdb/third_party/miniz/miniz.cpp src/duckdb/third_party/re2/re2/bitmap256.cc src/duckdb/third_party/re2/re2/bitstate.cc src/duckdb/third_party/re2/re2/compile.cc src/duckdb/third_party/re2/re2/dfa.cc src/duckdb/third_party/re2/re2/filtered_re2.cc src/duckdb/third_party/re2/re2/mimics_pcre.cc src/duckdb/third_party/re2/re2/nfa.cc src/duckdb/third_party/re2/re2/onepass.cc src/duckdb/third_party/re2/re2/parse.cc src/duckdb/third_party/re2/re2/perl_groups.cc src/duckdb/third_party/re2/re2/prefilter.cc src/duckdb/third_party/re2/re2/prefilter_tree.cc src/duckdb/third_party/re2/re2/prog.cc src/duckdb/third_party/re2/re2/re2.cc src/duckdb/third_party/re2/re2/regexp.cc src/duckdb/third_party/re2/re2/set.cc src/duckdb/third_party/re2/re2/simplify.cc src/duckdb/third_party/re2/re2/stringpiece.cc src/duckdb/third_party/re2/re2/tostring.cc src/duckdb/third_party/re2/re2/unicode_casefold.cc src/duckdb/third_party/re2/re2/unicode_groups.cc src/duckdb/third_party/re2/util/rune.cc src/duckdb/third_party/re2/util/strutil.cc src/duckdb/third_party/hyperloglog/hyperloglog.cpp src/duckdb/third_party/hyperloglog/sds.cpp src/duckdb/third_party/skiplist/SkipList.cpp src/duckdb/third_party/fastpforlib/bitpacking.cpp src/duckdb/third_party/utf8proc/utf8proc.cpp src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp src/duckdb/third_party/libpg_query/pg_functions.cpp src/duckdb/third_party/libpg_query/postgres_parser.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp src/duckdb/third_party/libpg_query/src_common_keywords.cpp src/duckdb/third_party/mbedtls/library/aes.cpp src/duckdb/third_party/mbedtls/library/aria.cpp src/duckdb/third_party/mbedtls/library/asn1parse.cpp src/duckdb/third_party/mbedtls/library/base64.cpp src/duckdb/third_party/mbedtls/library/bignum.cpp src/duckdb/third_party/mbedtls/library/camellia.cpp src/duckdb/third_party/mbedtls/library/cipher.cpp src/duckdb/third_party/mbedtls/library/cipher_wrap.cpp src/duckdb/third_party/mbedtls/library/constant_time.cpp src/duckdb/third_party/mbedtls/library/entropy.cpp src/duckdb/third_party/mbedtls/library/entropy_poll.cpp src/duckdb/third_party/mbedtls/library/gcm.cpp src/duckdb/third_party/mbedtls/library/md.cpp src/duckdb/third_party/mbedtls/library/oid.cpp src/duckdb/third_party/mbedtls/library/pem.cpp src/duckdb/third_party/mbedtls/library/pk.cpp src/duckdb/third_party/mbedtls/library/pk_wrap.cpp src/duckdb/third_party/mbedtls/library/pkparse.cpp src/duckdb/third_party/mbedtls/library/platform_util.cpp src/duckdb/third_party/mbedtls/library/rsa.cpp src/duckdb/third_party/mbedtls/library/rsa_alt_helpers.cpp src/duckdb/third_party/mbedtls/library/sha1.cpp src/duckdb/third_party/mbedtls/library/sha256.cpp src/duckdb/third_party/mbedtls/library/sha512.cpp src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp src/duckdb/third_party/yyjson/yyjson.cpp src/duckdb/third_party/zstd/common/debug.cpp src/duckdb/third_party/zstd/common/entropy_common.cpp src/duckdb/third_party/zstd/common/error_private.cpp src/duckdb/third_party/zstd/common/fse_decompress.cpp src/duckdb/third_party/zstd/common/pool.cpp src/duckdb/third_party/zstd/common/threading.cpp src/duckdb/third_party/zstd/common/xxhash.cpp src/duckdb/third_party/zstd/common/zstd_common.cpp src/duckdb/third_party/zstd/compress/fse_compress.cpp src/duckdb/third_party/zstd/compress/hist.cpp src/duckdb/third_party/zstd/compress/huf_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp src/duckdb/third_party/zstd/compress/zstd_fast.cpp src/duckdb/third_party/zstd/compress/zstd_lazy.cpp src/duckdb/third_party/zstd/compress/zstd_ldm.cpp src/duckdb/third_party/zstd/compress/zstd_opt.cpp src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp src/duckdb/third_party/zstd/decompress/huf_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp src/duckdb/third_party/zstd/dict/cover.cpp src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp src/duckdb/extension/core_functions/core_functions_extension.cpp src/duckdb/extension/core_functions/lambda_functions.cpp src/duckdb/extension/core_functions/function_list.cpp src/duckdb/ub_extension_core_functions_aggregate_regression.cpp src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp src/duckdb/ub_extension_core_functions_aggregate_nested.cpp src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp src/duckdb/ub_extension_core_functions_scalar_random.cpp src/duckdb/ub_extension_core_functions_scalar_string.cpp src/duckdb/ub_extension_core_functions_scalar_math.cpp src/duckdb/ub_extension_core_functions_scalar_generic.cpp src/duckdb/ub_extension_core_functions_scalar_enum.cpp src/duckdb/ub_extension_core_functions_scalar_map.cpp src/duckdb/ub_extension_core_functions_scalar_operators.cpp src/duckdb/ub_extension_core_functions_scalar_date.cpp src/duckdb/ub_extension_core_functions_scalar_list.cpp src/duckdb/ub_extension_core_functions_scalar_blob.cpp src/duckdb/ub_extension_core_functions_scalar_debug.cpp src/duckdb/ub_extension_core_functions_scalar_array.cpp src/duckdb/ub_extension_core_functions_scalar_union.cpp src/duckdb/ub_extension_core_functions_scalar_struct.cpp src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/extension/parquet/column_reader.cpp src/duckdb/extension/parquet/column_writer.cpp src/duckdb/extension/parquet/parquet_crypto.cpp src/duckdb/extension/parquet/parquet_extension.cpp src/duckdb/extension/parquet/parquet_metadata.cpp src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_timestamp.cpp src/duckdb/extension/parquet/parquet_writer.cpp src/duckdb/extension/parquet/serialize_parquet.cpp src/duckdb/extension/parquet/zstd_file_system.cpp src/duckdb/extension/parquet/geo_parquet.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp src/duckdb/third_party/snappy/snappy.cc src/duckdb/third_party/snappy/snappy-sinksource.cc src/duckdb/third_party/lz4/lz4.cpp src/duckdb/third_party/brotli/common/constants.cpp src/duckdb/third_party/brotli/common/context.cpp src/duckdb/third_party/brotli/common/dictionary.cpp src/duckdb/third_party/brotli/common/platform.cpp src/duckdb/third_party/brotli/common/shared_dictionary.cpp src/duckdb/third_party/brotli/common/transform.cpp src/duckdb/third_party/brotli/dec/bit_reader.cpp src/duckdb/third_party/brotli/dec/decode.cpp src/duckdb/third_party/brotli/dec/huffman.cpp src/duckdb/third_party/brotli/dec/state.cpp src/duckdb/third_party/brotli/enc/backward_references.cpp src/duckdb/third_party/brotli/enc/backward_references_hq.cpp src/duckdb/third_party/brotli/enc/bit_cost.cpp src/duckdb/third_party/brotli/enc/block_splitter.cpp src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp src/duckdb/third_party/brotli/enc/cluster.cpp src/duckdb/third_party/brotli/enc/command.cpp src/duckdb/third_party/brotli/enc/compound_dictionary.cpp src/duckdb/third_party/brotli/enc/compress_fragment.cpp src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp src/duckdb/third_party/brotli/enc/dictionary_hash.cpp src/duckdb/third_party/brotli/enc/encode.cpp src/duckdb/third_party/brotli/enc/encoder_dict.cpp src/duckdb/third_party/brotli/enc/entropy_encode.cpp src/duckdb/third_party/brotli/enc/fast_log.cpp src/duckdb/third_party/brotli/enc/histogram.cpp src/duckdb/third_party/brotli/enc/literal_cost.cpp src/duckdb/third_party/brotli/enc/memory.cpp src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp src/duckdb/extension/icu/./icu-table-range.cpp src/duckdb/extension/icu/./icu-datefunc.cpp src/duckdb/extension/icu/./icu-datepart.cpp src/duckdb/extension/icu/./icu-datetrunc.cpp src/duckdb/extension/icu/./icu_extension.cpp src/duckdb/extension/icu/./icu-timezone.cpp src/duckdb/extension/icu/./icu-timebucket.cpp src/duckdb/extension/icu/./icu-makedate.cpp src/duckdb/extension/icu/./icu-datesub.cpp src/duckdb/extension/icu/./icu-dateadd.cpp src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp src/duckdb/extension/json/buffered_json_reader.cpp src/duckdb/extension/json/json_enums.cpp src/duckdb/extension/json/json_extension.cpp src/duckdb/extension/json/json_common.cpp src/duckdb/extension/json/json_functions.cpp src/duckdb/extension/json/json_scan.cpp src/duckdb/extension/json/json_serializer.cpp src/duckdb/extension/json/json_deserializer.cpp src/duckdb/extension/json/serialize_json.cpp src/duckdb/ub_extension_json_json_functions.cpp) +set(DUCKDB_SRC_FILES src/duckdb/ub_src_catalog.cpp src/duckdb/ub_src_catalog_catalog_entry.cpp src/duckdb/ub_src_catalog_catalog_entry_dependency.cpp src/duckdb/ub_src_catalog_default.cpp src/duckdb/ub_src_common_adbc.cpp src/duckdb/ub_src_common_adbc_nanoarrow.cpp src/duckdb/ub_src_common.cpp src/duckdb/ub_src_common_arrow_appender.cpp src/duckdb/ub_src_common_arrow.cpp src/duckdb/ub_src_common_crypto.cpp src/duckdb/ub_src_common_enums.cpp src/duckdb/ub_src_common_exception.cpp src/duckdb/ub_src_common_operator.cpp src/duckdb/ub_src_common_progress_bar.cpp src/duckdb/ub_src_common_row_operations.cpp src/duckdb/ub_src_common_serializer.cpp src/duckdb/ub_src_common_sort.cpp src/duckdb/ub_src_common_tree_renderer.cpp src/duckdb/ub_src_common_types.cpp src/duckdb/ub_src_common_types_column.cpp src/duckdb/ub_src_common_types_row.cpp src/duckdb/ub_src_common_value_operations.cpp src/duckdb/src/common/vector_operations/boolean_operators.cpp src/duckdb/src/common/vector_operations/comparison_operators.cpp src/duckdb/src/common/vector_operations/generators.cpp src/duckdb/src/common/vector_operations/is_distinct_from.cpp src/duckdb/src/common/vector_operations/null_operations.cpp src/duckdb/src/common/vector_operations/numeric_inplace_operators.cpp src/duckdb/src/common/vector_operations/vector_cast.cpp src/duckdb/src/common/vector_operations/vector_copy.cpp src/duckdb/src/common/vector_operations/vector_hash.cpp src/duckdb/src/common/vector_operations/vector_storage.cpp src/duckdb/ub_src_execution.cpp src/duckdb/ub_src_execution_expression_executor.cpp src/duckdb/ub_src_execution_index_art.cpp src/duckdb/ub_src_execution_index.cpp src/duckdb/ub_src_execution_nested_loop_join.cpp src/duckdb/ub_src_execution_operator_aggregate.cpp src/duckdb/ub_src_execution_operator_csv_scanner_buffer_manager.cpp src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp src/duckdb/ub_src_execution_operator_csv_scanner_state_machine.cpp src/duckdb/ub_src_execution_operator_csv_scanner_table_function.cpp src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp src/duckdb/ub_src_execution_operator_filter.cpp src/duckdb/ub_src_execution_operator_helper.cpp src/duckdb/ub_src_execution_operator_join.cpp src/duckdb/ub_src_execution_operator_order.cpp src/duckdb/ub_src_execution_operator_persistent.cpp src/duckdb/ub_src_execution_operator_projection.cpp src/duckdb/ub_src_execution_operator_scan.cpp src/duckdb/ub_src_execution_operator_schema.cpp src/duckdb/ub_src_execution_operator_set.cpp src/duckdb/ub_src_execution_physical_plan.cpp src/duckdb/ub_src_execution_sample.cpp src/duckdb/ub_src_function_aggregate_distributive.cpp src/duckdb/ub_src_function_aggregate.cpp src/duckdb/ub_src_function.cpp src/duckdb/ub_src_function_cast.cpp src/duckdb/ub_src_function_cast_union.cpp src/duckdb/ub_src_function_pragma.cpp src/duckdb/ub_src_function_scalar_compressed_materialization.cpp src/duckdb/ub_src_function_scalar.cpp src/duckdb/ub_src_function_scalar_date.cpp src/duckdb/ub_src_function_scalar_generic.cpp src/duckdb/ub_src_function_scalar_list.cpp src/duckdb/ub_src_function_scalar_map.cpp src/duckdb/ub_src_function_scalar_operator.cpp src/duckdb/ub_src_function_scalar_sequence.cpp src/duckdb/ub_src_function_scalar_string.cpp src/duckdb/ub_src_function_scalar_string_regexp.cpp src/duckdb/ub_src_function_scalar_struct.cpp src/duckdb/ub_src_function_scalar_system.cpp src/duckdb/ub_src_function_table_arrow.cpp src/duckdb/ub_src_function_table.cpp src/duckdb/ub_src_function_table_system.cpp src/duckdb/ub_src_function_table_version.cpp src/duckdb/ub_src_function_window.cpp src/duckdb/ub_src_logging.cpp src/duckdb/ub_src_main.cpp src/duckdb/ub_src_main_buffered_data.cpp src/duckdb/ub_src_main_capi.cpp src/duckdb/ub_src_main_capi_cast.cpp src/duckdb/ub_src_main_chunk_scan_state.cpp src/duckdb/ub_src_main_extension.cpp src/duckdb/ub_src_main_relation.cpp src/duckdb/ub_src_main_secret.cpp src/duckdb/ub_src_main_settings.cpp src/duckdb/ub_src_optimizer.cpp src/duckdb/ub_src_optimizer_compressed_materialization.cpp src/duckdb/ub_src_optimizer_join_order.cpp src/duckdb/ub_src_optimizer_matcher.cpp src/duckdb/ub_src_optimizer_pullup.cpp src/duckdb/ub_src_optimizer_pushdown.cpp src/duckdb/ub_src_optimizer_rule.cpp src/duckdb/ub_src_optimizer_statistics_expression.cpp src/duckdb/ub_src_optimizer_statistics_operator.cpp src/duckdb/ub_src_parallel.cpp src/duckdb/ub_src_parser.cpp src/duckdb/ub_src_parser_constraints.cpp src/duckdb/ub_src_parser_expression.cpp src/duckdb/ub_src_parser_parsed_data.cpp src/duckdb/ub_src_parser_query_node.cpp src/duckdb/ub_src_parser_statement.cpp src/duckdb/ub_src_parser_tableref.cpp src/duckdb/ub_src_parser_transform_constraint.cpp src/duckdb/ub_src_parser_transform_expression.cpp src/duckdb/ub_src_parser_transform_helpers.cpp src/duckdb/ub_src_parser_transform_statement.cpp src/duckdb/ub_src_parser_transform_tableref.cpp src/duckdb/ub_src_planner.cpp src/duckdb/ub_src_planner_binder_expression.cpp src/duckdb/ub_src_planner_binder_query_node.cpp src/duckdb/ub_src_planner_binder_statement.cpp src/duckdb/ub_src_planner_binder_tableref.cpp src/duckdb/ub_src_planner_expression.cpp src/duckdb/ub_src_planner_expression_binder.cpp src/duckdb/ub_src_planner_filter.cpp src/duckdb/ub_src_planner_operator.cpp src/duckdb/ub_src_planner_subquery.cpp src/duckdb/ub_src_storage.cpp src/duckdb/ub_src_storage_buffer.cpp src/duckdb/ub_src_storage_checkpoint.cpp src/duckdb/ub_src_storage_compression_alp.cpp src/duckdb/ub_src_storage_compression.cpp src/duckdb/ub_src_storage_compression_chimp.cpp src/duckdb/ub_src_storage_compression_dictionary.cpp src/duckdb/ub_src_storage_compression_roaring.cpp src/duckdb/ub_src_storage_metadata.cpp src/duckdb/ub_src_storage_serialization.cpp src/duckdb/ub_src_storage_statistics.cpp src/duckdb/ub_src_storage_table.cpp src/duckdb/ub_src_transaction.cpp src/duckdb/src/verification/copied_statement_verifier.cpp src/duckdb/src/verification/deserialized_statement_verifier.cpp src/duckdb/src/verification/external_statement_verifier.cpp src/duckdb/src/verification/fetch_row_verifier.cpp src/duckdb/src/verification/no_operator_caching_verifier.cpp src/duckdb/src/verification/parsed_statement_verifier.cpp src/duckdb/src/verification/prepared_statement_verifier.cpp src/duckdb/src/verification/statement_verifier.cpp src/duckdb/src/verification/unoptimized_statement_verifier.cpp src/duckdb/third_party/fmt/format.cc src/duckdb/third_party/fsst/libfsst.cpp src/duckdb/third_party/miniz/miniz.cpp src/duckdb/third_party/re2/re2/bitmap256.cc src/duckdb/third_party/re2/re2/bitstate.cc src/duckdb/third_party/re2/re2/compile.cc src/duckdb/third_party/re2/re2/dfa.cc src/duckdb/third_party/re2/re2/filtered_re2.cc src/duckdb/third_party/re2/re2/mimics_pcre.cc src/duckdb/third_party/re2/re2/nfa.cc src/duckdb/third_party/re2/re2/onepass.cc src/duckdb/third_party/re2/re2/parse.cc src/duckdb/third_party/re2/re2/perl_groups.cc src/duckdb/third_party/re2/re2/prefilter.cc src/duckdb/third_party/re2/re2/prefilter_tree.cc src/duckdb/third_party/re2/re2/prog.cc src/duckdb/third_party/re2/re2/re2.cc src/duckdb/third_party/re2/re2/regexp.cc src/duckdb/third_party/re2/re2/set.cc src/duckdb/third_party/re2/re2/simplify.cc src/duckdb/third_party/re2/re2/stringpiece.cc src/duckdb/third_party/re2/re2/tostring.cc src/duckdb/third_party/re2/re2/unicode_casefold.cc src/duckdb/third_party/re2/re2/unicode_groups.cc src/duckdb/third_party/re2/util/rune.cc src/duckdb/third_party/re2/util/strutil.cc src/duckdb/third_party/hyperloglog/hyperloglog.cpp src/duckdb/third_party/hyperloglog/sds.cpp src/duckdb/third_party/skiplist/SkipList.cpp src/duckdb/third_party/fastpforlib/bitpacking.cpp src/duckdb/third_party/utf8proc/utf8proc.cpp src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp src/duckdb/third_party/libpg_query/pg_functions.cpp src/duckdb/third_party/libpg_query/postgres_parser.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp src/duckdb/third_party/libpg_query/src_common_keywords.cpp src/duckdb/third_party/mbedtls/library/aes.cpp src/duckdb/third_party/mbedtls/library/aria.cpp src/duckdb/third_party/mbedtls/library/asn1parse.cpp src/duckdb/third_party/mbedtls/library/base64.cpp src/duckdb/third_party/mbedtls/library/bignum.cpp src/duckdb/third_party/mbedtls/library/camellia.cpp src/duckdb/third_party/mbedtls/library/cipher.cpp src/duckdb/third_party/mbedtls/library/cipher_wrap.cpp src/duckdb/third_party/mbedtls/library/constant_time.cpp src/duckdb/third_party/mbedtls/library/entropy.cpp src/duckdb/third_party/mbedtls/library/entropy_poll.cpp src/duckdb/third_party/mbedtls/library/gcm.cpp src/duckdb/third_party/mbedtls/library/md.cpp src/duckdb/third_party/mbedtls/library/oid.cpp src/duckdb/third_party/mbedtls/library/pem.cpp src/duckdb/third_party/mbedtls/library/pk.cpp src/duckdb/third_party/mbedtls/library/pk_wrap.cpp src/duckdb/third_party/mbedtls/library/pkparse.cpp src/duckdb/third_party/mbedtls/library/platform_util.cpp src/duckdb/third_party/mbedtls/library/rsa.cpp src/duckdb/third_party/mbedtls/library/rsa_alt_helpers.cpp src/duckdb/third_party/mbedtls/library/sha1.cpp src/duckdb/third_party/mbedtls/library/sha256.cpp src/duckdb/third_party/mbedtls/library/sha512.cpp src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp src/duckdb/third_party/yyjson/yyjson.cpp src/duckdb/third_party/zstd/common/debug.cpp src/duckdb/third_party/zstd/common/entropy_common.cpp src/duckdb/third_party/zstd/common/error_private.cpp src/duckdb/third_party/zstd/common/fse_decompress.cpp src/duckdb/third_party/zstd/common/pool.cpp src/duckdb/third_party/zstd/common/threading.cpp src/duckdb/third_party/zstd/common/xxhash.cpp src/duckdb/third_party/zstd/common/zstd_common.cpp src/duckdb/third_party/zstd/compress/fse_compress.cpp src/duckdb/third_party/zstd/compress/hist.cpp src/duckdb/third_party/zstd/compress/huf_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp src/duckdb/third_party/zstd/compress/zstd_fast.cpp src/duckdb/third_party/zstd/compress/zstd_lazy.cpp src/duckdb/third_party/zstd/compress/zstd_ldm.cpp src/duckdb/third_party/zstd/compress/zstd_opt.cpp src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp src/duckdb/third_party/zstd/decompress/huf_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp src/duckdb/third_party/zstd/dict/cover.cpp src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp src/duckdb/extension/core_functions/function_list.cpp src/duckdb/extension/core_functions/lambda_functions.cpp src/duckdb/extension/core_functions/core_functions_extension.cpp src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp src/duckdb/ub_extension_core_functions_aggregate_regression.cpp src/duckdb/ub_extension_core_functions_aggregate_nested.cpp src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp src/duckdb/ub_extension_core_functions_scalar_generic.cpp src/duckdb/ub_extension_core_functions_scalar_math.cpp src/duckdb/ub_extension_core_functions_scalar_date.cpp src/duckdb/ub_extension_core_functions_scalar_struct.cpp src/duckdb/ub_extension_core_functions_scalar_list.cpp src/duckdb/ub_extension_core_functions_scalar_enum.cpp src/duckdb/ub_extension_core_functions_scalar_operators.cpp src/duckdb/ub_extension_core_functions_scalar_union.cpp src/duckdb/ub_extension_core_functions_scalar_map.cpp src/duckdb/ub_extension_core_functions_scalar_array.cpp src/duckdb/ub_extension_core_functions_scalar_random.cpp src/duckdb/ub_extension_core_functions_scalar_blob.cpp src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/ub_extension_core_functions_scalar_debug.cpp src/duckdb/ub_extension_core_functions_scalar_string.cpp src/duckdb/extension/parquet/column_reader.cpp src/duckdb/extension/parquet/column_writer.cpp src/duckdb/extension/parquet/parquet_crypto.cpp src/duckdb/extension/parquet/parquet_extension.cpp src/duckdb/extension/parquet/parquet_metadata.cpp src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_timestamp.cpp src/duckdb/extension/parquet/parquet_writer.cpp src/duckdb/extension/parquet/serialize_parquet.cpp src/duckdb/extension/parquet/zstd_file_system.cpp src/duckdb/extension/parquet/geo_parquet.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp src/duckdb/third_party/snappy/snappy.cc src/duckdb/third_party/snappy/snappy-sinksource.cc src/duckdb/third_party/lz4/lz4.cpp src/duckdb/third_party/brotli/common/constants.cpp src/duckdb/third_party/brotli/common/context.cpp src/duckdb/third_party/brotli/common/dictionary.cpp src/duckdb/third_party/brotli/common/platform.cpp src/duckdb/third_party/brotli/common/shared_dictionary.cpp src/duckdb/third_party/brotli/common/transform.cpp src/duckdb/third_party/brotli/dec/bit_reader.cpp src/duckdb/third_party/brotli/dec/decode.cpp src/duckdb/third_party/brotli/dec/huffman.cpp src/duckdb/third_party/brotli/dec/state.cpp src/duckdb/third_party/brotli/enc/backward_references.cpp src/duckdb/third_party/brotli/enc/backward_references_hq.cpp src/duckdb/third_party/brotli/enc/bit_cost.cpp src/duckdb/third_party/brotli/enc/block_splitter.cpp src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp src/duckdb/third_party/brotli/enc/cluster.cpp src/duckdb/third_party/brotli/enc/command.cpp src/duckdb/third_party/brotli/enc/compound_dictionary.cpp src/duckdb/third_party/brotli/enc/compress_fragment.cpp src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp src/duckdb/third_party/brotli/enc/dictionary_hash.cpp src/duckdb/third_party/brotli/enc/encode.cpp src/duckdb/third_party/brotli/enc/encoder_dict.cpp src/duckdb/third_party/brotli/enc/entropy_encode.cpp src/duckdb/third_party/brotli/enc/fast_log.cpp src/duckdb/third_party/brotli/enc/histogram.cpp src/duckdb/third_party/brotli/enc/literal_cost.cpp src/duckdb/third_party/brotli/enc/memory.cpp src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp src/duckdb/extension/icu/./icu-timebucket.cpp src/duckdb/extension/icu/./icu-datetrunc.cpp src/duckdb/extension/icu/./icu-makedate.cpp src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/extension/icu/./icu-datepart.cpp src/duckdb/extension/icu/./icu-datefunc.cpp src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/extension/icu/./icu-table-range.cpp src/duckdb/extension/icu/./icu_extension.cpp src/duckdb/extension/icu/./icu-timezone.cpp src/duckdb/extension/icu/./icu-datesub.cpp src/duckdb/extension/icu/./icu-dateadd.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp src/duckdb/extension/json/buffered_json_reader.cpp src/duckdb/extension/json/json_enums.cpp src/duckdb/extension/json/json_extension.cpp src/duckdb/extension/json/json_common.cpp src/duckdb/extension/json/json_functions.cpp src/duckdb/extension/json/json_scan.cpp src/duckdb/extension/json/json_serializer.cpp src/duckdb/extension/json/json_deserializer.cpp src/duckdb/extension/json/serialize_json.cpp src/duckdb/ub_extension_json_json_functions.cpp) set(CMAKE_JAVA_COMPILE_FLAGS -source 1.8 -target 1.8 -encoding utf-8) diff --git a/src/duckdb/extension/core_functions/function_list.cpp b/src/duckdb/extension/core_functions/function_list.cpp index fb8550c67..9051c6004 100644 --- a/src/duckdb/extension/core_functions/function_list.cpp +++ b/src/duckdb/extension/core_functions/function_list.cpp @@ -136,6 +136,7 @@ static const StaticFunctionDefinition core_functions[] = { DUCKDB_AGGREGATE_FUNCTION(CovarPopFun), DUCKDB_AGGREGATE_FUNCTION(CovarSampFun), DUCKDB_SCALAR_FUNCTION(CurrentDatabaseFun), + DUCKDB_SCALAR_FUNCTION(CurrentDateFun), DUCKDB_SCALAR_FUNCTION(CurrentQueryFun), DUCKDB_SCALAR_FUNCTION(CurrentSchemaFun), DUCKDB_SCALAR_FUNCTION(CurrentSchemasFun), @@ -192,6 +193,7 @@ static const StaticFunctionDefinition core_functions[] = { DUCKDB_SCALAR_FUNCTION_ALIAS(GenRandomUuidFun), DUCKDB_SCALAR_FUNCTION_SET(GenerateSeriesFun), DUCKDB_SCALAR_FUNCTION(GetBitFun), + DUCKDB_SCALAR_FUNCTION(CurrentTimeFun), DUCKDB_SCALAR_FUNCTION(GetCurrentTimestampFun), DUCKDB_SCALAR_FUNCTION_SET_ALIAS(GradeUpFun), DUCKDB_SCALAR_FUNCTION_SET(GreatestFun), @@ -369,6 +371,7 @@ static const StaticFunctionDefinition core_functions[] = { DUCKDB_SCALAR_FUNCTION(ToTimestampFun), DUCKDB_SCALAR_FUNCTION(ToWeeksFun), DUCKDB_SCALAR_FUNCTION(ToYearsFun), + DUCKDB_SCALAR_FUNCTION_ALIAS(TodayFun), DUCKDB_SCALAR_FUNCTION_ALIAS(TransactionTimestampFun), DUCKDB_SCALAR_FUNCTION(TranslateFun), DUCKDB_SCALAR_FUNCTION_SET(TrimFun), diff --git a/src/duckdb/extension/core_functions/include/core_functions/scalar/date_functions.hpp b/src/duckdb/extension/core_functions/include/core_functions/scalar/date_functions.hpp index 7256502a9..efa9821d1 100644 --- a/src/duckdb/extension/core_functions/include/core_functions/scalar/date_functions.hpp +++ b/src/duckdb/extension/core_functions/include/core_functions/scalar/date_functions.hpp @@ -33,6 +33,21 @@ struct CenturyFun { static ScalarFunctionSet GetFunctions(); }; +struct CurrentDateFun { + static constexpr const char *Name = "current_date"; + static constexpr const char *Parameters = ""; + static constexpr const char *Description = "Returns the current date"; + static constexpr const char *Example = "current_date()"; + + static ScalarFunction GetFunction(); +}; + +struct TodayFun { + using ALIAS = CurrentDateFun; + + static constexpr const char *Name = "today"; +}; + struct DateDiffFun { static constexpr const char *Name = "date_diff"; static constexpr const char *Parameters = "part,startdate,enddate"; @@ -183,6 +198,15 @@ struct EpochNsFun { static ScalarFunctionSet GetFunctions(); }; +struct CurrentTimeFun { + static constexpr const char *Name = "get_current_time"; + static constexpr const char *Parameters = ""; + static constexpr const char *Description = "Returns the current time"; + static constexpr const char *Example = "get_current_time()"; + + static ScalarFunction GetFunction(); +}; + struct EraFun { static constexpr const char *Name = "era"; static constexpr const char *Parameters = "ts"; diff --git a/src/duckdb/extension/core_functions/scalar/date/current.cpp b/src/duckdb/extension/core_functions/scalar/date/current.cpp index 3d25ee80a..61867f271 100644 --- a/src/duckdb/extension/core_functions/scalar/date/current.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/current.cpp @@ -6,6 +6,7 @@ #include "duckdb/main/client_context.hpp" #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/transaction/meta_transaction.hpp" +#include "duckdb/planner/expression/bound_cast_expression.hpp" namespace duckdb { @@ -26,4 +27,41 @@ ScalarFunction GetCurrentTimestampFun::GetFunction() { return current_timestamp; } +static unique_ptr CurrentTimeExpr(FunctionBindExpressionInput &input) { + auto timestamp = GetCurrentTimestampFun::GetFunction(); + timestamp.name = GetCurrentTimestampFun::Name; + + vector> args; + + auto func = make_uniq_base(LogicalType::TIMESTAMP_TZ, timestamp, + std::move(args), nullptr); + + return BoundCastExpression::AddCastToType(input.context, std::move(func), LogicalType::TIME_TZ); +} + +static unique_ptr CurrentDateExpr(FunctionBindExpressionInput &input) { + auto timestamp = GetCurrentTimestampFun::GetFunction(); + timestamp.name = GetCurrentTimestampFun::Name; + + vector> args; + + auto func = make_uniq_base(LogicalType::TIMESTAMP_TZ, timestamp, + std::move(args), nullptr); + return BoundCastExpression::AddCastToType(input.context, std::move(func), LogicalType::DATE); +} + +ScalarFunction CurrentTimeFun::GetFunction() { + ScalarFunction current_time({}, LogicalType::TIME_TZ, nullptr); + current_time.bind_expression = CurrentTimeExpr; + current_time.stability = FunctionStability::CONSISTENT_WITHIN_QUERY; + return current_time; +} + +ScalarFunction CurrentDateFun::GetFunction() { + ScalarFunction current_date({}, LogicalType::DATE, nullptr); + current_date.bind_expression = CurrentDateExpr; + current_date.stability = FunctionStability::CONSISTENT_WITHIN_QUERY; + return current_date; +} + } // namespace duckdb diff --git a/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp b/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp index 5db38d601..5949dcc37 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp @@ -18,8 +18,8 @@ static void CanCastImplicitlyFunction(DataChunk &args, ExpressionState &state, V } unique_ptr BindCanCastImplicitlyExpression(FunctionBindExpressionInput &input) { - auto &source_type = input.function.children[0]->return_type; - auto &target_type = input.function.children[1]->return_type; + auto &source_type = input.children[0]->return_type; + auto &target_type = input.children[1]->return_type; if (source_type.id() == LogicalTypeId::UNKNOWN || source_type.id() == LogicalTypeId::SQLNULL || target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::SQLNULL) { // parameter - unknown return type diff --git a/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp b/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp index 1f7caef84..895d474f4 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp @@ -10,7 +10,7 @@ static void TypeOfFunction(DataChunk &args, ExpressionState &state, Vector &resu } unique_ptr BindTypeOfFunctionExpression(FunctionBindExpressionInput &input) { - auto &return_type = input.function.children[0]->return_type; + auto &return_type = input.children[0]->return_type; if (return_type.id() == LogicalTypeId::UNKNOWN || return_type.id() == LogicalTypeId::SQLNULL) { // parameter - unknown return type return nullptr; diff --git a/src/duckdb/extension/json/json_functions/json_structure.cpp b/src/duckdb/extension/json/json_functions/json_structure.cpp index fbc71366e..260f7984a 100644 --- a/src/duckdb/extension/json/json_functions/json_structure.cpp +++ b/src/duckdb/extension/json/json_functions/json_structure.cpp @@ -662,7 +662,9 @@ static double CalculateTypeSimilarity(const LogicalType &merged, const LogicalTy } // Only maps and structs can be merged into a map - D_ASSERT(type.id() == LogicalTypeId::STRUCT); + if (type.id() != LogicalTypeId::STRUCT) { + return -1; + } return CalculateMapAndStructSimilarity(merged, type, false, max_depth, depth); } case LogicalTypeId::LIST: { diff --git a/src/duckdb/extension/parquet/include/resizable_buffer.hpp b/src/duckdb/extension/parquet/include/resizable_buffer.hpp index 14658ecee..1e225d510 100644 --- a/src/duckdb/extension/parquet/include/resizable_buffer.hpp +++ b/src/duckdb/extension/parquet/include/resizable_buffer.hpp @@ -98,6 +98,7 @@ class ResizeableBuffer : public ByteBuffer { } if (new_size > alloc_len) { alloc_len = NextPowerOfTwo(new_size); + allocated_data.Reset(); // Have to reset before allocating new buffer (otherwise we use ~2x the memory) allocated_data = allocator.Allocate(alloc_len); ptr = allocated_data.get(); } diff --git a/src/duckdb/extension/parquet/parquet_extension.cpp b/src/duckdb/extension/parquet/parquet_extension.cpp index 8ec9924a6..a9e8b20e4 100644 --- a/src/duckdb/extension/parquet/parquet_extension.cpp +++ b/src/duckdb/extension/parquet/parquet_extension.cpp @@ -1497,18 +1497,19 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin // We have to std::move them, otherwise MSVC will complain that it's not a "const T &&" const auto compression_level = SerializeCompressionLevel(bind_data.compression_level); D_ASSERT(DeserializeCompressionLevel(compression_level) == bind_data.compression_level); + ParquetWriteBindData default_value; serializer.WritePropertyWithDefault(109, "compression_level", compression_level); serializer.WritePropertyWithDefault(110, "row_groups_per_file", bind_data.row_groups_per_file, - std::move(ParquetWriteBindData().row_groups_per_file)); + std::move(default_value.row_groups_per_file)); serializer.WritePropertyWithDefault(111, "debug_use_openssl", bind_data.debug_use_openssl, - std::move(ParquetWriteBindData().debug_use_openssl)); + std::move(default_value.debug_use_openssl)); serializer.WritePropertyWithDefault(112, "dictionary_size_limit", bind_data.dictionary_size_limit, - std::move(ParquetWriteBindData().dictionary_size_limit)); + std::move(default_value.dictionary_size_limit)); serializer.WritePropertyWithDefault(113, "bloom_filter_false_positive_ratio", bind_data.bloom_filter_false_positive_ratio, - std::move(ParquetWriteBindData().bloom_filter_false_positive_ratio)); + std::move(default_value.bloom_filter_false_positive_ratio)); serializer.WritePropertyWithDefault(114, "parquet_version", bind_data.parquet_version, - std::move(ParquetWriteBindData().parquet_version)); + std::move(default_value.parquet_version)); } static unique_ptr ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) { @@ -1528,16 +1529,17 @@ static unique_ptr ParquetCopyDeserialize(Deserializer &deserialize deserializer.ReadPropertyWithDefault(109, "compression_level", compression_level); data->compression_level = DeserializeCompressionLevel(compression_level); D_ASSERT(SerializeCompressionLevel(data->compression_level) == compression_level); + ParquetWriteBindData default_value; data->row_groups_per_file = deserializer.ReadPropertyWithExplicitDefault( - 110, "row_groups_per_file", std::move(ParquetWriteBindData().row_groups_per_file)); + 110, "row_groups_per_file", std::move(default_value.row_groups_per_file)); data->debug_use_openssl = deserializer.ReadPropertyWithExplicitDefault( - 111, "debug_use_openssl", std::move(ParquetWriteBindData().debug_use_openssl)); + 111, "debug_use_openssl", std::move(default_value.debug_use_openssl)); data->dictionary_size_limit = deserializer.ReadPropertyWithExplicitDefault( - 112, "dictionary_size_limit", std::move(ParquetWriteBindData().dictionary_size_limit)); + 112, "dictionary_size_limit", std::move(default_value.dictionary_size_limit)); data->bloom_filter_false_positive_ratio = deserializer.ReadPropertyWithExplicitDefault( - 113, "bloom_filter_false_positive_ratio", std::move(ParquetWriteBindData().bloom_filter_false_positive_ratio)); - data->parquet_version = deserializer.ReadPropertyWithExplicitDefault( - 114, "parquet_version", std::move(ParquetWriteBindData().parquet_version)); + 113, "bloom_filter_false_positive_ratio", std::move(default_value.bloom_filter_false_positive_ratio)); + data->parquet_version = + deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", std::move(default_value.parquet_version)); return std::move(data); } diff --git a/src/duckdb/src/catalog/default/default_functions.cpp b/src/duckdb/src/catalog/default/default_functions.cpp index 7702bc4e0..480e05f81 100644 --- a/src/duckdb/src/catalog/default/default_functions.cpp +++ b/src/duckdb/src/catalog/default/default_functions.cpp @@ -163,9 +163,6 @@ static const DefaultMacro internal_macros[] = { // date functions {DEFAULT_SCHEMA, "date_add", {"date", "interval", nullptr}, {{nullptr, nullptr}}, "date + interval"}, - {DEFAULT_SCHEMA, "current_date", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::DATE"}, - {DEFAULT_SCHEMA, "today", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::DATE"}, - {DEFAULT_SCHEMA, "get_current_time", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::TIMETZ"}, // regexp functions {DEFAULT_SCHEMA, "regexp_split_to_table", {"text", "pattern", nullptr}, {{nullptr, nullptr}}, "unnest(string_split_regex(text, pattern))"}, diff --git a/src/duckdb/src/common/adbc/adbc.cpp b/src/duckdb/src/common/adbc/adbc.cpp index 7323f3b1f..35ceb2f34 100644 --- a/src/duckdb/src/common/adbc/adbc.cpp +++ b/src/duckdb/src/common/adbc/adbc.cpp @@ -53,7 +53,6 @@ AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *err adbc_driver->ConnectionGetInfo = duckdb_adbc::ConnectionGetInfo; adbc_driver->StatementGetParameterSchema = duckdb_adbc::StatementGetParameterSchema; adbc_driver->ConnectionGetTableSchema = duckdb_adbc::ConnectionGetTableSchema; - adbc_driver->StatementSetSubstraitPlan = duckdb_adbc::StatementSetSubstraitPlan; return ADBC_STATUS_OK; } @@ -70,7 +69,6 @@ struct DuckDBAdbcStatementWrapper { ArrowArrayStream ingestion_stream; IngestionMode ingestion_mode = IngestionMode::CREATE; bool temporary_table = false; - uint8_t *substrait_plan; uint64_t plan_length; }; @@ -157,36 +155,6 @@ AdbcStatusCode DatabaseNew(struct AdbcDatabase *database, struct AdbcError *erro return CheckResult(res, error, "Failed to allocate"); } -AdbcStatusCode StatementSetSubstraitPlan(struct AdbcStatement *statement, const uint8_t *plan, size_t length, - struct AdbcError *error) { - if (!statement) { - SetError(error, "Statement is not set"); - return ADBC_STATUS_INVALID_ARGUMENT; - } - if (!plan) { - SetError(error, "Substrait Plan is not set"); - return ADBC_STATUS_INVALID_ARGUMENT; - } - if (length == 0) { - SetError(error, "Can't execute plan with size = 0"); - return ADBC_STATUS_INVALID_ARGUMENT; - } - auto wrapper = static_cast(statement->private_data); - if (wrapper->ingestion_stream.release) { - // Release any resources currently held by the ingestion stream before we overwrite it - wrapper->ingestion_stream.release(&wrapper->ingestion_stream); - wrapper->ingestion_stream.release = nullptr; - } - if (wrapper->statement) { - duckdb_destroy_prepare(&wrapper->statement); - wrapper->statement = nullptr; - } - wrapper->substrait_plan = static_cast(malloc(sizeof(uint8_t) * length)); - wrapper->plan_length = length; - memcpy(wrapper->substrait_plan, plan, length); - return ADBC_STATUS_OK; -} - AdbcStatusCode DatabaseSetOption(struct AdbcDatabase *database, const char *key, const char *value, struct AdbcError *error) { if (!database) { @@ -677,7 +645,6 @@ AdbcStatusCode StatementNew(struct AdbcConnection *connection, struct AdbcStatem statement_wrapper->ingestion_stream.release = nullptr; statement_wrapper->ingestion_table_name = nullptr; statement_wrapper->db_schema = nullptr; - statement_wrapper->substrait_plan = nullptr; statement_wrapper->temporary_table = false; statement_wrapper->ingestion_mode = IngestionMode::CREATE; @@ -709,10 +676,6 @@ AdbcStatusCode StatementRelease(struct AdbcStatement *statement, struct AdbcErro free(wrapper->db_schema); wrapper->db_schema = nullptr; } - if (wrapper->substrait_plan) { - free(wrapper->substrait_plan); - wrapper->substrait_plan = nullptr; - } free(statement->private_data); statement->private_data = nullptr; return ADBC_STATUS_OK; @@ -805,25 +768,7 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr if (has_stream && to_table) { return IngestToTableFromBoundStream(wrapper, error); } - if (wrapper->substrait_plan != nullptr) { - auto plan_str = std::string(reinterpret_cast(wrapper->substrait_plan), wrapper->plan_length); - duckdb::vector params; - params.emplace_back(duckdb::Value::BLOB_RAW(plan_str)); - duckdb::unique_ptr query_result; - try { - query_result = reinterpret_cast(wrapper->connection) - ->TableFunction("from_substrait", params) - ->Execute(); - } catch (duckdb::Exception &e) { - std::string error_msg = "It was not possible to execute substrait query. " + std::string(e.what()); - SetError(error, error_msg); - return ADBC_STATUS_INVALID_ARGUMENT; - } - auto arrow_wrapper = new duckdb::ArrowResultWrapper(); - arrow_wrapper->result = - duckdb::unique_ptr_cast(std::move(query_result)); - wrapper->result = reinterpret_cast(arrow_wrapper); - } else if (has_stream) { + if (has_stream) { // A stream was bound to the statement, use that to bind parameters duckdb::unique_ptr result; ArrowArrayStream stream = wrapper->ingestion_stream; diff --git a/src/duckdb/src/common/compressed_file_system.cpp b/src/duckdb/src/common/compressed_file_system.cpp index ddc325cf1..7d2e2cfde 100644 --- a/src/duckdb/src/common/compressed_file_system.cpp +++ b/src/duckdb/src/common/compressed_file_system.cpp @@ -44,7 +44,7 @@ int64_t CompressedFile::ReadData(void *buffer, int64_t remaining) { auto available = MinValue(UnsafeNumericCast(remaining), UnsafeNumericCast(stream_data.out_buff_end - stream_data.out_buff_start)); - memcpy(data_ptr_t(buffer) + total_read, stream_data.out_buff_start, available); + memcpy(static_cast(buffer) + total_read, stream_data.out_buff_start, available); // increment the total read variables as required stream_data.out_buff_start += available; diff --git a/src/duckdb/src/common/gzip_file_system.cpp b/src/duckdb/src/common/gzip_file_system.cpp index ee0a21580..edb72bf91 100644 --- a/src/duckdb/src/common/gzip_file_system.cpp +++ b/src/duckdb/src/common/gzip_file_system.cpp @@ -82,7 +82,7 @@ struct MiniZStreamWrapper : public StreamWrapper { void Close() override; - void FlushStream(); + void FlushStream() const; }; MiniZStreamWrapper::~MiniZStreamWrapper() { @@ -146,7 +146,7 @@ void MiniZStreamWrapper::Initialize(CompressedFile &file, bool write) { bool MiniZStreamWrapper::Read(StreamData &sd) { // Handling for the concatenated files if (sd.refresh) { - auto available = (uint32_t)(sd.in_buff_end - sd.in_buff_start); + auto available = static_cast(sd.in_buff_end - sd.in_buff_start); if (available <= GZIP_FOOTER_SIZE) { // Only footer is available so we just close and return finished Close(); @@ -173,7 +173,7 @@ bool MiniZStreamWrapper::Read(StreamData &sd) { c = UnsafeNumericCast(*body_ptr); body_ptr++; } while (c != '\0' && body_ptr < sd.in_buff_end); - if ((idx_t)(body_ptr - sd.in_buff_start) >= GZIP_HEADER_MAXSIZE) { + if (static_cast(body_ptr - sd.in_buff_start) >= GZIP_HEADER_MAXSIZE) { throw InternalException("Filename resulting in GZIP header larger than defined maximum (%d)", GZIP_HEADER_MAXSIZE); } @@ -193,9 +193,9 @@ bool MiniZStreamWrapper::Read(StreamData &sd) { // actually decompress mz_stream_ptr->next_in = sd.in_buff_start; D_ASSERT(sd.in_buff_end - sd.in_buff_start < NumericLimits::Maximum()); - mz_stream_ptr->avail_in = (uint32_t)(sd.in_buff_end - sd.in_buff_start); + mz_stream_ptr->avail_in = static_cast(sd.in_buff_end - sd.in_buff_start); mz_stream_ptr->next_out = data_ptr_cast(sd.out_buff_end); - mz_stream_ptr->avail_out = (uint32_t)((sd.out_buff.get() + sd.out_buf_size) - sd.out_buff_end); + mz_stream_ptr->avail_out = static_cast((sd.out_buff.get() + sd.out_buf_size) - sd.out_buff_end); auto ret = duckdb_miniz::mz_inflate(mz_stream_ptr.get(), duckdb_miniz::MZ_NO_FLUSH); if (ret != duckdb_miniz::MZ_OK && ret != duckdb_miniz::MZ_STREAM_END) { throw IOException("Failed to decode gzip stream: %s", duckdb_miniz::mz_error(ret)); @@ -248,7 +248,7 @@ void MiniZStreamWrapper::Write(CompressedFile &file, StreamData &sd, data_ptr_t } } -void MiniZStreamWrapper::FlushStream() { +void MiniZStreamWrapper::FlushStream() const { auto &sd = file->stream_data; mz_stream_ptr->next_in = nullptr; mz_stream_ptr->avail_in = 0; @@ -371,7 +371,7 @@ string GZipFileSystem::UncompressGZIPString(const char *data, idx_t size) { do { c = *body_ptr; body_ptr++; - } while (c != '\0' && (idx_t)(body_ptr - data) < size); + } while (c != '\0' && static_cast(body_ptr - data) < size); } // stream is now set to beginning of payload data @@ -384,10 +384,10 @@ string GZipFileSystem::UncompressGZIPString(const char *data, idx_t size) { mz_stream_ptr->next_in = const_uchar_ptr_cast(body_ptr); mz_stream_ptr->avail_in = NumericCast(bytes_remaining); - unsigned char decompress_buffer[BUFSIZ]; string decompressed; while (status == duckdb_miniz::MZ_OK) { + unsigned char decompress_buffer[BUFSIZ]; mz_stream_ptr->next_out = decompress_buffer; mz_stream_ptr->avail_out = sizeof(decompress_buffer); status = mz_inflate(mz_stream_ptr.get(), duckdb_miniz::MZ_NO_FLUSH); diff --git a/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp b/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp index 8d78e6653..aa165678b 100644 --- a/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +++ b/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp @@ -153,8 +153,6 @@ class StreamingWindowState : public OperatorState { ComputeOffset(context, wexpr, offset); ComputeDefault(context, wexpr, dflt); - curr_chunk.Initialize(context, {wexpr.return_type}); - buffered = idx_t(std::abs(offset)); prev.Reference(dflt); prev.Flatten(buffered); @@ -162,6 +160,10 @@ class StreamingWindowState : public OperatorState { } void Execute(ExecutionContext &context, DataChunk &input, DataChunk &delayed, Vector &result) { + if (!curr_chunk.ColumnCount()) { + curr_chunk.Initialize(context.client, {result.GetType()}, delayed.GetCapacity()); + } + if (offset >= 0) { ExecuteLag(context, input, result); } else { @@ -212,7 +214,7 @@ class StreamingWindowState : public OperatorState { idx_t pos = 0; idx_t unified_offset = buffered; if (unified_offset < count) { - curr_chunk.Reset(); + Reset(curr_chunk); executor.Execute(input, curr_chunk); VectorOperations::Copy(curr, result, count, unified_offset, pos); pos += count - unified_offset; @@ -221,7 +223,7 @@ class StreamingWindowState : public OperatorState { // Copy unified[unified_offset:] => result[pos:] idx_t unified_count = count + delayed.size(); if (unified_offset < unified_count) { - curr_chunk.Reset(); + Reset(curr_chunk); executor.Execute(delayed, curr_chunk); idx_t delayed_offset = unified_offset - count; // Only copy as many values as we need @@ -312,6 +314,13 @@ class StreamingWindowState : public OperatorState { initialized = true; } + static inline void Reset(DataChunk &chunk) { + // Reset trashes the capacity... + const auto capacity = chunk.GetCapacity(); + chunk.Reset(); + chunk.SetCapacity(capacity); + } + public: //! We can't initialise until we have an input chunk bool initialized; @@ -470,34 +479,34 @@ void StreamingWindowState::AggregateState::Execute(ExecutionContext &context, Da } } -void PhysicalStreamingWindow::ExecuteFunctions(ExecutionContext &context, DataChunk &chunk, DataChunk &delayed, +void PhysicalStreamingWindow::ExecuteFunctions(ExecutionContext &context, DataChunk &output, DataChunk &delayed, GlobalOperatorState &gstate_p, OperatorState &state_p) const { auto &gstate = gstate_p.Cast(); auto &state = state_p.Cast(); // Compute window functions - const idx_t count = chunk.size(); + const idx_t count = output.size(); const column_t input_width = children[0]->GetTypes().size(); for (column_t expr_idx = 0; expr_idx < select_list.size(); expr_idx++) { column_t col_idx = input_width + expr_idx; auto &expr = *select_list[expr_idx]; - auto &result = chunk.data[col_idx]; + auto &result = output.data[col_idx]; switch (expr.GetExpressionType()) { case ExpressionType::WINDOW_AGGREGATE: - state.aggregate_states[expr_idx]->Execute(context, chunk, result); + state.aggregate_states[expr_idx]->Execute(context, output, result); break; case ExpressionType::WINDOW_FIRST_VALUE: case ExpressionType::WINDOW_PERCENT_RANK: case ExpressionType::WINDOW_RANK: case ExpressionType::WINDOW_RANK_DENSE: { // Reference constant vector - chunk.data[col_idx].Reference(*state.const_vectors[expr_idx]); + output.data[col_idx].Reference(*state.const_vectors[expr_idx]); break; } case ExpressionType::WINDOW_ROW_NUMBER: { // Set row numbers int64_t start_row = gstate.row_number; - auto rdata = FlatVector::GetData(chunk.data[col_idx]); + auto rdata = FlatVector::GetData(output.data[col_idx]); for (idx_t i = 0; i < count; i++) { rdata[i] = NumericCast(start_row + NumericCast(i)); } @@ -505,7 +514,7 @@ void PhysicalStreamingWindow::ExecuteFunctions(ExecutionContext &context, DataCh } case ExpressionType::WINDOW_LAG: case ExpressionType::WINDOW_LEAD: - state.lead_lag_states[expr_idx]->Execute(context, chunk, delayed, result); + state.lead_lag_states[expr_idx]->Execute(context, output, delayed, result); break; default: throw NotImplementedException("%s for StreamingWindow", ExpressionTypeToString(expr.GetExpressionType())); @@ -515,13 +524,13 @@ void PhysicalStreamingWindow::ExecuteFunctions(ExecutionContext &context, DataCh } void PhysicalStreamingWindow::ExecuteInput(ExecutionContext &context, DataChunk &delayed, DataChunk &input, - DataChunk &chunk, GlobalOperatorState &gstate_p, + DataChunk &output, GlobalOperatorState &gstate_p, OperatorState &state_p) const { auto &state = state_p.Cast(); // Put payload columns in place for (idx_t col_idx = 0; col_idx < input.data.size(); col_idx++) { - chunk.data[col_idx].Reference(input.data[col_idx]); + output.data[col_idx].Reference(input.data[col_idx]); } idx_t count = input.size(); @@ -531,51 +540,53 @@ void PhysicalStreamingWindow::ExecuteInput(ExecutionContext &context, DataChunk count -= state.lead_count; input.Copy(delayed, count); } - chunk.SetCardinality(count); + output.SetCardinality(count); - ExecuteFunctions(context, chunk, state.delayed, gstate_p, state_p); + ExecuteFunctions(context, output, state.delayed, gstate_p, state_p); } void PhysicalStreamingWindow::ExecuteShifted(ExecutionContext &context, DataChunk &delayed, DataChunk &input, - DataChunk &chunk, GlobalOperatorState &gstate_p, + DataChunk &output, GlobalOperatorState &gstate_p, OperatorState &state_p) const { auto &state = state_p.Cast(); auto &shifted = state.shifted; - idx_t i = input.size(); - idx_t d = delayed.size(); - shifted.Reset(); + idx_t out = output.size(); + idx_t in = input.size(); + idx_t delay = delayed.size(); + D_ASSERT(out <= delay); + + state.Reset(shifted); // shifted = delayed delayed.Copy(shifted); - delayed.Reset(); + state.Reset(delayed); for (idx_t col_idx = 0; col_idx < delayed.data.size(); ++col_idx) { - // chunk[0:i] = shifted[0:i] - chunk.data[col_idx].Reference(shifted.data[col_idx]); - // delayed[0:i] = chunk[i:d-i] - VectorOperations::Copy(shifted.data[col_idx], delayed.data[col_idx], d, i, 0); - // delayed[d-i:d] = input[0:i] - VectorOperations::Copy(input.data[col_idx], delayed.data[col_idx], i, 0, d - i); + // output[0:out] = delayed[0:out] + output.data[col_idx].Reference(shifted.data[col_idx]); + // delayed[0:out] = delayed[out:delay-out] + VectorOperations::Copy(shifted.data[col_idx], delayed.data[col_idx], delay, out, 0); + // delayed[delay-out:delay-out+in] = input[0:in] + VectorOperations::Copy(input.data[col_idx], delayed.data[col_idx], in, 0, delay - out); } - chunk.SetCardinality(i); - delayed.SetCardinality(d); + delayed.SetCardinality(delay - out + in); - ExecuteFunctions(context, chunk, delayed, gstate_p, state_p); + ExecuteFunctions(context, output, delayed, gstate_p, state_p); } void PhysicalStreamingWindow::ExecuteDelayed(ExecutionContext &context, DataChunk &delayed, DataChunk &input, - DataChunk &chunk, GlobalOperatorState &gstate_p, + DataChunk &output, GlobalOperatorState &gstate_p, OperatorState &state_p) const { // Put payload columns in place for (idx_t col_idx = 0; col_idx < delayed.data.size(); col_idx++) { - chunk.data[col_idx].Reference(delayed.data[col_idx]); + output.data[col_idx].Reference(delayed.data[col_idx]); } idx_t count = delayed.size(); - chunk.SetCardinality(count); + output.SetCardinality(count); - ExecuteFunctions(context, chunk, input, gstate_p, state_p); + ExecuteFunctions(context, output, input, gstate_p, state_p); } -OperatorResultType PhysicalStreamingWindow::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk, +OperatorResultType PhysicalStreamingWindow::Execute(ExecutionContext &context, DataChunk &input, DataChunk &output, GlobalOperatorState &gstate_p, OperatorState &state_p) const { auto &state = state_p.Cast(); if (!state.initialized) { @@ -585,37 +596,37 @@ OperatorResultType PhysicalStreamingWindow::Execute(ExecutionContext &context, D auto &delayed = state.delayed; // We can Reset delayed now that no one can be referencing it. if (!delayed.size()) { - delayed.Reset(); + state.Reset(delayed); } - const idx_t available = delayed.size() + input.size(); - if (available <= state.lead_count) { + if (delayed.size() < state.lead_count) { // If we don't have enough to produce a single row, // then just delay more rows, return nothing // and ask for more data. delayed.Append(input); - chunk.SetCardinality(0); + output.SetCardinality(0); return OperatorResultType::NEED_MORE_INPUT; } else if (input.size() < delayed.size()) { // If we can't consume all of the delayed values, // we need to split them instead of referencing them all - ExecuteShifted(context, delayed, input, chunk, gstate_p, state_p); + output.SetCardinality(input.size()); + ExecuteShifted(context, delayed, input, output, gstate_p, state_p); // We delayed the unused input so ask for more return OperatorResultType::NEED_MORE_INPUT; } else if (delayed.size()) { // We have enough delayed rows so flush them - ExecuteDelayed(context, delayed, input, chunk, gstate_p, state_p); + ExecuteDelayed(context, delayed, input, output, gstate_p, state_p); // Defer resetting delayed as it may be referenced. delayed.SetCardinality(0); // Come back to process the input return OperatorResultType::HAVE_MORE_OUTPUT; } else { // No delayed rows, so emit what we can and delay the rest. - ExecuteInput(context, delayed, input, chunk, gstate_p, state_p); + ExecuteInput(context, delayed, input, output, gstate_p, state_p); return OperatorResultType::NEED_MORE_INPUT; } } -OperatorFinalizeResultType PhysicalStreamingWindow::FinalExecute(ExecutionContext &context, DataChunk &chunk, +OperatorFinalizeResultType PhysicalStreamingWindow::FinalExecute(ExecutionContext &context, DataChunk &output, GlobalOperatorState &gstate_p, OperatorState &state_p) const { auto &state = state_p.Cast(); @@ -624,8 +635,15 @@ OperatorFinalizeResultType PhysicalStreamingWindow::FinalExecute(ExecutionContex auto &delayed = state.delayed; // There are no more input rows auto &input = state.shifted; - input.Reset(); - ExecuteDelayed(context, delayed, input, chunk, gstate_p, state_p); + state.Reset(input); + + if (output.GetCapacity() < delayed.size()) { + // More than one output buffer was delayed, so shift in what we can + output.SetCardinality(output.GetCapacity()); + ExecuteShifted(context, delayed, input, output, gstate_p, state_p); + return OperatorFinalizeResultType::HAVE_MORE_OUTPUT; + } + ExecuteDelayed(context, delayed, input, output, gstate_p, state_p); } return OperatorFinalizeResultType::FINISHED; diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp index 906ace12b..2b3361533 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp @@ -39,7 +39,13 @@ idx_t ColumnCountResult::GetMostFrequentColumnCount() const { } bool ColumnCountResult::AddRow(ColumnCountResult &result, idx_t buffer_pos) { + const LinePosition cur_position(result.cur_buffer_idx, buffer_pos + 1, result.current_buffer_size); + if (cur_position - result.last_position > result.state_machine.options.maximum_line_size.GetValue() && + buffer_pos != NumericLimits::Maximum()) { + result.error = true; + } result.InternalAddRow(); + result.last_position = cur_position; if (!result.states.EmptyLastValue()) { idx_t col_count_idx = result.result_position; for (idx_t i = 0; i < result.result_position + 1; i++) { @@ -99,6 +105,7 @@ ColumnCountScanner::ColumnCountScanner(shared_ptr buffer_manag : BaseScanner(std::move(buffer_manager), state_machine, std::move(error_handler), true, nullptr, iterator), result(states, *state_machine, result_size_p), column_count(1), result_size(result_size_p) { sniffing = true; + result.last_position = {0, 0, cur_buffer_handle->actual_size}; } unique_ptr ColumnCountScanner::UpgradeToStringValueScanner() { @@ -117,6 +124,7 @@ unique_ptr ColumnCountScanner::UpgradeToStringValueScanner() ColumnCountResult &ColumnCountScanner::ParseChunk() { result.result_position = 0; column_count = 1; + result.current_buffer_size = cur_buffer_handle->actual_size; ParseChunkInternal(result); return result; } @@ -139,6 +147,7 @@ void ColumnCountScanner::FinalizeChunkProcess() { if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size) { // Move to next buffer cur_buffer_handle = buffer_manager->GetBuffer(++iterator.pos.buffer_idx); + if (!cur_buffer_handle) { buffer_handle_ptr = nullptr; if (states.IsQuotedCurrent() && !states.IsUnquoted()) { @@ -158,6 +167,15 @@ void ColumnCountScanner::FinalizeChunkProcess() { result.AddRow(result, NumericLimits::Maximum()); } return; + } else { + result.cur_buffer_idx = iterator.pos.buffer_idx; + result.current_buffer_size = cur_buffer_handle->actual_size; + // Do a quick check that the line is still sane + const LinePosition cur_position(result.cur_buffer_idx, 0, result.current_buffer_size); + if (cur_position - result.last_position > result.state_machine.options.maximum_line_size.GetValue()) { + result.error = true; + return; + } } iterator.pos.buffer_pos = 0; buffer_handle_ptr = cur_buffer_handle->Ptr(); diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index 29838280e..f48339a02 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -148,7 +148,7 @@ inline bool IsValueNull(const char *null_str_ptr, const char *value_ptr, const i } bool StringValueResult::HandleTooManyColumnsError(const char *value_ptr, const idx_t size) { - if (cur_col_id >= number_of_columns) { + if (cur_col_id >= number_of_columns && state_machine.state_machine_options.rfc_4180.GetValue()) { bool error = true; if (cur_col_id == number_of_columns && ((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) { // we make an exception if the first over-value is null @@ -220,6 +220,9 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size return; } if (cur_col_id >= number_of_columns) { + if (!state_machine.state_machine_options.rfc_4180.GetValue()) { + return; + } bool error = true; if (cur_col_id == number_of_columns && ((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) { // we make an exception if the first over-value is null @@ -511,6 +514,10 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const return; } } + if (result.cur_col_id >= result.number_of_columns && + !result.state_machine.state_machine_options.rfc_4180.GetValue()) { + return; + } if (!result.HandleTooManyColumnsError(value_ptr, length)) { // If it's an escaped value we have to remove all the escapes, this is not really great // If we are going to escape, this vector must be a varchar vector @@ -520,7 +527,6 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const // We have to write the cast error message. std::ostringstream error; // Casting Error Message - error << "Could not convert string \"" << std::string(value_ptr, length) << "\" to \'" << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'"; auto error_string = error.str(); @@ -533,6 +539,7 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const auto value = StringValueScanner::RemoveEscape( value_ptr, length, result.state_machine.dialect_options.state_machine_options.escape.GetValue(), result.state_machine.dialect_options.state_machine_options.quote.GetValue(), + result.state_machine.dialect_options.state_machine_options.rfc_4180.GetValue(), result.parse_chunk.data[result.chunk_col_id]); result.AddValueToVector(value.GetData(), value.GetSize()); } @@ -806,7 +813,7 @@ bool StringValueResult::AddRowInternal() { quoted_new_line = false; // We need to check if we are getting the correct number of columns here. // If columns are correct, we add it, and that's it. - if (cur_col_id != number_of_columns) { + if (cur_col_id < number_of_columns) { // We have too few columns: if (null_padding) { while (cur_col_id < number_of_columns) { @@ -1231,7 +1238,8 @@ void StringValueScanner::ProcessExtraRow() { } } -string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, Vector &vector) { +string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, bool rfc_4180, + Vector &vector) { // Figure out the exact size idx_t str_pos = 0; bool just_escaped = false; @@ -1239,7 +1247,7 @@ string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char e if (str_ptr[cur_pos] == escape && !just_escaped) { just_escaped = true; } else if (str_ptr[cur_pos] == quote) { - if (just_escaped) { + if (just_escaped || !rfc_4180) { str_pos++; } just_escaped = false; @@ -1259,7 +1267,7 @@ string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char e if (c == escape && !just_escaped) { just_escaped = true; } else if (str_ptr[cur_pos] == quote) { - if (just_escaped) { + if (just_escaped || !rfc_4180) { removed_escapes_ptr[str_pos++] = c; } just_escaped = false; @@ -1289,10 +1297,8 @@ void StringValueScanner::ProcessOverBufferValue() { } if (states.NewRow() || states.NewValue()) { break; - } else { - if (!result.comment) { - over_buffer_string += previous_buffer[i]; - } + } else if (!result.comment) { + over_buffer_string += previous_buffer[i]; } if (states.IsQuoted()) { result.SetQuoted(result, j); @@ -1323,16 +1329,13 @@ void StringValueScanner::ProcessOverBufferValue() { if (states.EmptyLine()) { if (state_machine->dialect_options.num_cols == 1) { break; - } else { - continue; } + continue; } if (states.NewRow() || states.NewValue()) { break; - } else { - if (!result.comment && !states.IsComment()) { - over_buffer_string += buffer_handle_ptr[iterator.pos.buffer_pos]; - } + } else if (!result.comment && !states.IsComment()) { + over_buffer_string += buffer_handle_ptr[iterator.pos.buffer_pos]; } if (states.IsQuoted()) { result.SetQuoted(result, j); @@ -1357,7 +1360,7 @@ void StringValueScanner::ProcessOverBufferValue() { } if (!skip_value) { string_t value; - if (result.quoted) { + if (result.quoted && !result.comment) { value = string_t(over_buffer_string.c_str() + result.quoted_position, UnsafeNumericCast(over_buffer_string.size() - 1 - result.quoted_position)); if (result.escaped) { @@ -1366,6 +1369,7 @@ void StringValueScanner::ProcessOverBufferValue() { value = RemoveEscape(str_ptr, over_buffer_string.size() - 2, state_machine->dialect_options.state_machine_options.escape.GetValue(), state_machine->dialect_options.state_machine_options.quote.GetValue(), + result.state_machine.dialect_options.state_machine_options.rfc_4180.GetValue(), result.parse_chunk.data[result.chunk_col_id]); } } @@ -1376,6 +1380,7 @@ void StringValueScanner::ProcessOverBufferValue() { value = RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(), state_machine->dialect_options.state_machine_options.escape.GetValue(), state_machine->dialect_options.state_machine_options.quote.GetValue(), + result.state_machine.dialect_options.state_machine_options.rfc_4180.GetValue(), result.parse_chunk.data[result.chunk_col_id]); } } @@ -1436,7 +1441,7 @@ bool StringValueScanner::MoveToNextBuffer() { // This means we reached the end of the file, we must add a last line if there is any to be added if (states.EmptyLine() || states.NewRow() || result.added_last_line || states.IsCurrentNewRow() || states.IsNotSet()) { - if (result.cur_col_id == result.number_of_columns) { + if (result.cur_col_id == result.number_of_columns && !result.IsStateCurrent(CSVState::INVALID)) { result.number_of_rows++; } result.cur_col_id = 0; diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp index f973e8784..fa48e5ef1 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp @@ -117,9 +117,7 @@ static void ReplaceNames(vector &detected_names, CSVStateMachine &state_ detected_names.push_back(GenerateColumnName(options.name_list.size(), col++)); best_sql_types_candidates_per_column_idx[i] = {LogicalType::VARCHAR}; } - dialect_options.num_cols = options.name_list.size(); - } else { // we throw an error const auto error = CSVError::HeaderSniffingError( @@ -128,8 +126,16 @@ static void ReplaceNames(vector &detected_names, CSVStateMachine &state_ error_handler.Error(error); } } - for (idx_t i = 0; i < options.name_list.size(); i++) { - detected_names[i] = options.name_list[i]; + if (options.name_list.size() > detected_names.size()) { + // we throw an error + const auto error = + CSVError::HeaderSniffingError(options, best_header_row, options.name_list.size(), + state_machine.dialect_options.state_machine_options.delimiter.GetValue()); + error_handler.Error(error); + } else { + for (idx_t i = 0; i < options.name_list.size(); i++) { + detected_names[i] = options.name_list[i]; + } } } } diff --git a/src/duckdb/src/execution/sample/reservoir_sample.cpp b/src/duckdb/src/execution/sample/reservoir_sample.cpp index ba777b609..402d84a80 100644 --- a/src/duckdb/src/execution/sample/reservoir_sample.cpp +++ b/src/duckdb/src/execution/sample/reservoir_sample.cpp @@ -225,10 +225,6 @@ vector ReservoirSample::GetRandomizedVector(uint32_t range, uint32_t s for (uint32_t i = 0; i < range; i++) { ret.push_back(i); } - if (size == FIXED_SAMPLE_SIZE) { - std::shuffle(ret.begin(), ret.end(), base_reservoir_sample->random); - return ret; - } for (uint32_t i = 0; i < size; i++) { uint32_t random_shuffle = base_reservoir_sample->random.NextRandomInteger32(i, range); if (random_shuffle == i) { diff --git a/src/duckdb/src/function/function_binder.cpp b/src/duckdb/src/function/function_binder.cpp index 671441825..b0e3bbc73 100644 --- a/src/duckdb/src/function/function_binder.cpp +++ b/src/duckdb/src/function/function_binder.cpp @@ -457,7 +457,7 @@ unique_ptr FunctionBinder::BindScalarFunction(ScalarFunction bound_f std::move(children), std::move(bind_info), is_operator); if (result_func->function.bind_expression) { // if a bind_expression callback is registered - call it and emit the resulting expression - FunctionBindExpressionInput input(context, result_func->bind_info.get(), *result_func); + FunctionBindExpressionInput input(context, result_func->bind_info.get(), result_func->children); result = result_func->function.bind_expression(input); } if (!result) { diff --git a/src/duckdb/src/function/table/read_csv.cpp b/src/duckdb/src/function/table/read_csv.cpp index 706712f81..517c7a266 100644 --- a/src/duckdb/src/function/table/read_csv.cpp +++ b/src/duckdb/src/function/table/read_csv.cpp @@ -124,7 +124,7 @@ void SchemaDiscovery(ClientContext &context, ReadCSVData &result, CSVReaderOptio names = best_schema.GetNames(); return_types = best_schema.GetTypes(); } - if (only_header_or_empty_files == current_file) { + if (only_header_or_empty_files == current_file && !options.columns_set) { for (auto &type : return_types) { D_ASSERT(type.id() == LogicalTypeId::BOOLEAN); // we default to varchar if all files are empty or only have a header after all the sniffing diff --git a/src/duckdb/src/function/table/system/duckdb_extensions.cpp b/src/duckdb/src/function/table/system/duckdb_extensions.cpp index 64d26dea9..0edc2c2ff 100644 --- a/src/duckdb/src/function/table/system/duckdb_extensions.cpp +++ b/src/duckdb/src/function/table/system/duckdb_extensions.cpp @@ -149,11 +149,15 @@ unique_ptr DuckDBExtensionsInit(ClientContext &context auto entry = installed_extensions.find(ext_name); if (entry == installed_extensions.end() || !entry->second.installed) { ExtensionInformation &info = installed_extensions[ext_name]; + info.name = ext_name; info.loaded = true; info.extension_version = ext_install_info->version; info.installed = ext_install_info->mode == ExtensionInstallMode::STATICALLY_LINKED; info.install_mode = ext_install_info->mode; + if (ext_data.install_info->mode == ExtensionInstallMode::STATICALLY_LINKED && info.file_path.empty()) { + info.file_path = "(BUILT-IN)"; + } } else { entry->second.loaded = true; entry->second.extension_version = ext_install_info->version; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 901e62c89..5cdd0b306 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "4-dev4815" +#define DUCKDB_PATCH_VERSION "4-dev4889" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.4-dev4815" +#define DUCKDB_VERSION "v1.1.4-dev4889" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "8e68a3e34a" +#define DUCKDB_SOURCE_ID "5d02d69e5c" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp index 93b8bf9ac..b2d9dae68 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp @@ -17,6 +17,35 @@ namespace duckdb { class CSVFileScan; + +//! Class that keeps track of line starts, used for line size verification +class LinePosition { +public: + LinePosition() { + } + LinePosition(idx_t buffer_idx_p, idx_t buffer_pos_p, idx_t buffer_size_p) + : buffer_pos(buffer_pos_p), buffer_size(buffer_size_p), buffer_idx(buffer_idx_p) { + } + + idx_t operator-(const LinePosition &other) const { + if (other.buffer_idx == buffer_idx) { + return buffer_pos - other.buffer_pos; + } + return other.buffer_size - other.buffer_pos + buffer_pos; + } + + bool operator==(const LinePosition &other) const { + return buffer_pos == other.buffer_pos && buffer_idx == other.buffer_idx && buffer_size == other.buffer_size; + } + + idx_t GetGlobalPosition(idx_t requested_buffer_size, bool first_char_nl = false) const { + return requested_buffer_size * buffer_idx + buffer_pos + first_char_nl; + } + idx_t buffer_pos = 0; + idx_t buffer_size = 0; + idx_t buffer_idx = 0; +}; + class ScannerResult { public: ScannerResult(CSVStates &states, CSVStateMachine &state_machine, idx_t result_size); @@ -52,6 +81,10 @@ class ScannerResult { return result.comment == true; } + inline bool IsStateCurrent(CSVState state) const { + return states.states[1] == state; + } + //! Variable to keep information regarding quoted and escaped values bool quoted = false; //! If the current quoted value is unquoted @@ -62,6 +95,8 @@ class ScannerResult { bool comment = false; idx_t quoted_position = 0; + LinePosition last_position; + //! Size of the result const idx_t result_size; @@ -88,7 +123,7 @@ class BaseScanner { //! Returns true if the scanner is finished bool FinishedFile() const; - //! Parses data into a output_chunk + //! Parses data into an output_chunk virtual ScannerResult &ParseChunk(); //! Returns the result from the last Parse call. Shouts at you if you call it wrong diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp index 8cecfe500..5da4d3037 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp @@ -41,6 +41,9 @@ class ColumnCountResult : public ScannerResult { bool error = false; idx_t result_position = 0; bool cur_line_starts_as_comment = false; + + idx_t cur_buffer_idx = 0; + idx_t current_buffer_size = 0; //! How many rows fit a given column count map rows_per_column_count; //! Adds a Value to the result diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp index a2a3d5372..12fd0f427 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp @@ -27,34 +27,6 @@ struct CSVBufferUsage { idx_t buffer_idx; }; -//! Class that keeps track of line starts, used for line size verification -class LinePosition { -public: - LinePosition() { - } - LinePosition(idx_t buffer_idx_p, idx_t buffer_pos_p, idx_t buffer_size_p) - : buffer_pos(buffer_pos_p), buffer_size(buffer_size_p), buffer_idx(buffer_idx_p) { - } - - idx_t operator-(const LinePosition &other) const { - if (other.buffer_idx == buffer_idx) { - return buffer_pos - other.buffer_pos; - } - return other.buffer_size - other.buffer_pos + buffer_pos; - } - - bool operator==(const LinePosition &other) const { - return buffer_pos == other.buffer_pos && buffer_idx == other.buffer_idx && buffer_size == other.buffer_size; - } - - idx_t GetGlobalPosition(idx_t requested_buffer_size, bool first_char_nl = false) const { - return requested_buffer_size * buffer_idx + buffer_pos + first_char_nl; - } - idx_t buffer_pos = 0; - idx_t buffer_size = 0; - idx_t buffer_idx = 0; -}; - //! Keeps track of start and end of line positions in regard to the CSV file class FullLinePosition { public: @@ -181,7 +153,7 @@ class StringValueResult : public ScannerResult { unsafe_vector validity_mask; //! Variables to iterate over the CSV buffers - LinePosition last_position; + char *buffer_ptr; idx_t buffer_size; idx_t position_before_comment; @@ -322,7 +294,8 @@ class StringValueScanner : public BaseScanner { bool FinishedIterator() const; //! Creates a new string with all escaped values removed - static string_t RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, Vector &vector); + static string_t RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, bool rfc_4180, + Vector &vector); //! If we can directly cast the type when consuming the CSV file, or we have to do it later static bool CanDirectlyCast(const LogicalType &type, bool icu_loaded); diff --git a/src/duckdb/src/include/duckdb/function/scalar_function.hpp b/src/duckdb/src/include/duckdb/function/scalar_function.hpp index 236356b9a..f46bb6042 100644 --- a/src/duckdb/src/include/duckdb/function/scalar_function.hpp +++ b/src/duckdb/src/include/duckdb/function/scalar_function.hpp @@ -78,13 +78,13 @@ struct FunctionModifiedDatabasesInput { struct FunctionBindExpressionInput { FunctionBindExpressionInput(ClientContext &context_p, optional_ptr bind_data_p, - BoundFunctionExpression &function_p) - : context(context_p), bind_data(bind_data_p), function(function_p) { + vector> &children_p) + : context(context_p), bind_data(bind_data_p), children(children_p) { } ClientContext &context; optional_ptr bind_data; - BoundFunctionExpression &function; + vector> &children; }; struct ScalarFunctionBindInput { diff --git a/src/duckdb/src/include/duckdb/main/connection.hpp b/src/duckdb/src/include/duckdb/main/connection.hpp index d0935ca8e..f5b46717c 100644 --- a/src/duckdb/src/include/duckdb/main/connection.hpp +++ b/src/duckdb/src/include/duckdb/main/connection.hpp @@ -166,14 +166,6 @@ class Connection { DUCKDB_API shared_ptr RelationFromQuery(unique_ptr select_stmt, const string &alias = "queryrelation", const string &query = ""); - //! Returns a substrait BLOB from a valid query - DUCKDB_API string GetSubstrait(const string &query); - //! Returns a Query Result from a substrait blob - DUCKDB_API unique_ptr FromSubstrait(const string &proto); - //! Returns a substrait BLOB from a valid query - DUCKDB_API string GetSubstraitJSON(const string &query); - //! Returns a Query Result from a substrait JSON - DUCKDB_API unique_ptr FromSubstraitJSON(const string &json); DUCKDB_API void BeginTransaction(); DUCKDB_API void Commit(); DUCKDB_API void Rollback(); diff --git a/src/duckdb/src/include/duckdb/main/extension.hpp b/src/duckdb/src/include/duckdb/main/extension.hpp index 53de1481d..b623daeff 100644 --- a/src/duckdb/src/include/duckdb/main/extension.hpp +++ b/src/duckdb/src/include/duckdb/main/extension.hpp @@ -24,6 +24,7 @@ class Extension { DUCKDB_API virtual std::string Version() const { return ""; } + DUCKDB_API static const char *DefaultVersion(); }; enum class ExtensionABIType : uint8_t { diff --git a/src/duckdb/src/include/duckdb/main/extension_entries.hpp b/src/duckdb/src/include/duckdb/main/extension_entries.hpp index c8ce3db3a..530f388b2 100644 --- a/src/duckdb/src/include/duckdb/main/extension_entries.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_entries.hpp @@ -135,6 +135,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"covar_samp", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY}, {"create_fts_index", "fts", CatalogType::PRAGMA_FUNCTION_ENTRY}, {"current_database", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"current_date", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"current_localtime", "icu", CatalogType::SCALAR_FUNCTION_ENTRY}, {"current_localtimestamp", "icu", CatalogType::SCALAR_FUNCTION_ENTRY}, {"current_query", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -179,8 +180,6 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"from_hex", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"from_json", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, {"from_json_strict", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, - {"from_substrait", "substrait", CatalogType::TABLE_FUNCTION_ENTRY}, - {"from_substrait_json", "substrait", CatalogType::TABLE_FUNCTION_ENTRY}, {"fsum", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY}, {"fuzz_all_functions", "sqlsmith", CatalogType::TABLE_FUNCTION_ENTRY}, {"fuzzyduck", "sqlsmith", CatalogType::TABLE_FUNCTION_ENTRY}, @@ -188,9 +187,8 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"gcd", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"gen_random_uuid", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"get_bit", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"get_current_time", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"get_current_timestamp", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, - {"get_substrait", "substrait", CatalogType::TABLE_FUNCTION_ENTRY}, - {"get_substrait_json", "substrait", CatalogType::TABLE_FUNCTION_ENTRY}, {"grade_up", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"greatest", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"greatest_common_divisor", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -683,6 +681,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"to_timestamp", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"to_weeks", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"to_years", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"today", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"tpcds", "tpcds", CatalogType::PRAGMA_FUNCTION_ENTRY}, {"tpcds_answers", "tpcds", CatalogType::TABLE_FUNCTION_ENTRY}, {"tpcds_queries", "tpcds", CatalogType::TABLE_FUNCTION_ENTRY}, @@ -1069,24 +1068,10 @@ static constexpr ExtensionEntry EXTENSION_SECRET_PROVIDERS[] = { {"mysql/config", "mysql_scanner"}, {"postgres/config", "postgres_scanner"}}; // EXTENSION_SECRET_PROVIDERS -static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {"aws", - "azure", - "autocomplete", - "core_functions", - "delta", - "excel", - "fts", - "httpfs", - "inet", - "icu", - "json", - "motherduck", - "mysql_scanner", - "parquet", - "sqlite_scanner", - "sqlsmith", - "postgres_scanner", - "tpcds", - "tpch"}; // END_OF_AUTOLOADABLE_EXTENSIONS +static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = { + "aws", "azure", "autocomplete", "core_functions", "delta", "excel", + "fts", "httpfs", "iceberg", "inet", "icu", "json", + "motherduck", "mysql_scanner", "parquet", "sqlite_scanner", "sqlsmith", "postgres_scanner", + "tpcds", "tpch", "uc_catalog"}; // END_OF_AUTOLOADABLE_EXTENSIONS } // namespace duckdb diff --git a/src/duckdb/src/main/connection.cpp b/src/duckdb/src/main/connection.cpp index 119b39c89..a5742dbfd 100644 --- a/src/duckdb/src/main/connection.cpp +++ b/src/duckdb/src/main/connection.cpp @@ -28,9 +28,10 @@ Connection::Connection(DatabaseInstance &database) } Connection::Connection(DuckDB &database) : Connection(*database.instance) { + // Initialization of warning_cb happens in the other constructor } -Connection::Connection(Connection &&other) noexcept { +Connection::Connection(Connection &&other) noexcept : warning_cb(nullptr) { std::swap(context, other.context); std::swap(warning_cb, other.warning_cb); } @@ -98,34 +99,6 @@ unique_ptr Connection::Query(const string &query) { return unique_ptr_cast(std::move(result)); } -DUCKDB_API string Connection::GetSubstrait(const string &query) { - vector params; - params.emplace_back(query); - auto result = TableFunction("get_substrait", params)->Execute(); - auto protobuf = result->FetchRaw()->GetValue(0, 0); - return protobuf.GetValueUnsafe().GetString(); -} - -DUCKDB_API unique_ptr Connection::FromSubstrait(const string &proto) { - vector params; - params.emplace_back(Value::BLOB_RAW(proto)); - return TableFunction("from_substrait", params)->Execute(); -} - -DUCKDB_API string Connection::GetSubstraitJSON(const string &query) { - vector params; - params.emplace_back(query); - auto result = TableFunction("get_substrait_json", params)->Execute(); - auto protobuf = result->FetchRaw()->GetValue(0, 0); - return protobuf.GetValueUnsafe().GetString(); -} - -DUCKDB_API unique_ptr Connection::FromSubstraitJSON(const string &json) { - vector params; - params.emplace_back(json); - return TableFunction("from_substrait_json", params)->Execute(); -} - unique_ptr Connection::Query(unique_ptr statement) { auto result = context->Query(std::move(statement), false); D_ASSERT(result->type == QueryResultType::MATERIALIZED_RESULT); diff --git a/src/duckdb/src/main/extension.cpp b/src/duckdb/src/main/extension.cpp index c13971b0c..e07ce4c53 100644 --- a/src/duckdb/src/main/extension.cpp +++ b/src/duckdb/src/main/extension.cpp @@ -50,18 +50,29 @@ string ParsedExtensionMetaData::GetInvalidMetadataError() { const string engine_version = string(ExtensionHelper::GetVersionDirectoryName()); if (engine_version != duckdb_version) { - result += StringUtil::Format("The file was built for DuckDB version '%s', but we can only load extensions " - "built for DuckDB version '%s'.", + result += StringUtil::Format("The file was built specifically for DuckDB version '%s' and can only be " + "loaded with that version of DuckDB. (this version of DuckDB is '%s')", PrettyPrintString(duckdb_version), engine_version); } - // C_STRUCT ABI versioning works when current duckdb version >= required version + // C_STRUCT ABI versioning } else if (abi_type == ExtensionABIType::C_STRUCT) { - - if (!VersioningUtils::IsSupportedCAPIVersion(duckdb_capi_version)) { - result += StringUtil::Format("The file was built for DuckDB version '%s', but we can only load extensions " - "built for DuckDB C API 'v%lld.%lld.%lld' and lower.", - duckdb_capi_version, DUCKDB_EXTENSION_API_VERSION_MAJOR, - DUCKDB_EXTENSION_API_VERSION_MINOR, DUCKDB_EXTENSION_API_VERSION_PATCH); + idx_t major, minor, patch; + if (!VersioningUtils::ParseSemver(duckdb_capi_version, major, minor, patch)) { + result += StringUtil::Format("The file was built for DuckDB C API version '%s', which failed to parse as a " + "recognized version string", + duckdb_capi_version, DUCKDB_EXTENSION_API_VERSION_MAJOR); + } else if (major != DUCKDB_EXTENSION_API_VERSION_MAJOR) { + // Special case where the extension is built for a completely unsupported API + result += + StringUtil::Format("The file was built for DuckDB C API version '%s', but we can only load extensions " + "built for DuckDB C API 'v%lld.x.y'.", + duckdb_capi_version, DUCKDB_EXTENSION_API_VERSION_MAJOR); + } else if (!VersioningUtils::IsSupportedCAPIVersion(major, minor, patch)) { + result += + StringUtil::Format("The file was built for DuckDB C API version '%s', but we can only load extensions " + "built for DuckDB C API 'v%lld.%lld.%lld' and lower.", + duckdb_capi_version, DUCKDB_EXTENSION_API_VERSION_MAJOR, + DUCKDB_EXTENSION_API_VERSION_MINOR, DUCKDB_EXTENSION_API_VERSION_PATCH); } } else { throw InternalException("Unknown ABI type for extension: " + extension_abi_metadata); @@ -137,4 +148,11 @@ bool VersioningUtils::ParseSemver(string &semver, idx_t &major_out, idx_t &minor return true; } +const char *Extension::DefaultVersion() { + if (ExtensionHelper::IsRelease(DuckDB::LibraryVersion())) { + return DuckDB::LibraryVersion(); + } + return DuckDB::SourceID(); +} + } // namespace duckdb diff --git a/src/duckdb/src/main/extension/extension_helper.cpp b/src/duckdb/src/main/extension/extension_helper.cpp index 015d75882..c7b613226 100644 --- a/src/duckdb/src/main/extension/extension_helper.cpp +++ b/src/duckdb/src/main/extension/extension_helper.cpp @@ -114,7 +114,6 @@ static const DefaultExtension internal_extensions[] = { {"postgres_scanner", "Adds support for connecting to a Postgres database", false}, {"inet", "Adds support for IP-related data types and functions", false}, {"spatial", "Geospatial extension that adds support for working with spatial data and functions", false}, - {"substrait", "Adds support for the Substrait integration", false}, {"aws", "Provides features that depend on the AWS SDK", false}, {"arrow", "A zero-copy data integration between Apache Arrow and DuckDB", false}, {"azure", "Adds a filesystem abstraction for Azure blob storage to DuckDB", false}, @@ -140,7 +139,7 @@ DefaultExtension ExtensionHelper::GetDefaultExtension(idx_t index) { // Allow Auto-Install Extensions //===--------------------------------------------------------------------===// static const char *const auto_install[] = {"motherduck", "postgres_scanner", "mysql_scanner", "sqlite_scanner", - nullptr}; + "delta", "iceberg", "uc_catalog", nullptr}; // TODO: unify with new autoload mechanism bool ExtensionHelper::AllowAutoInstall(const string &extension) { diff --git a/src/duckdb/src/main/extension/extension_load.cpp b/src/duckdb/src/main/extension/extension_load.cpp index 23101f722..84b28fef0 100644 --- a/src/duckdb/src/main/extension/extension_load.cpp +++ b/src/duckdb/src/main/extension/extension_load.cpp @@ -399,15 +399,13 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const signature_valid = false; } - if (!signature_valid) { - throw IOException(db.config.error_manager->FormatException(ErrorType::UNSIGNED_EXTENSION, filename) + - metadata_mismatch_error); - } - if (!metadata_mismatch_error.empty()) { - // Signed extensions perform the full check throw InvalidInputException(metadata_mismatch_error); } + + if (!signature_valid) { + throw IOException(db.config.error_manager->FormatException(ErrorType::UNSIGNED_EXTENSION, filename)); + } } else if (!db.config.options.allow_extensions_metadata_mismatch) { if (!metadata_mismatch_error.empty()) { // Unsigned extensions AND configuration allowing n, loading allowed, mainly for diff --git a/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp index 1c9bc238b..e5995d291 100644 --- a/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +++ b/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp @@ -436,11 +436,13 @@ BindResult ExpressionBinder::BindExpression(ColumnRefExpression &col_ref_p, idx_ if (found_alias) { return alias_result; } - - // column was not found - check if it is a SQL value function - auto value_function = GetSQLValueFunction(col_ref_p.GetColumnName()); - if (value_function) { - return BindExpression(value_function, depth); + found_alias = QualifyColumnAlias(col_ref_p); + if (!found_alias) { + // column was not found - check if it is a SQL value function + auto value_function = GetSQLValueFunction(col_ref_p.GetColumnName()); + if (value_function) { + return BindExpression(value_function, depth); + } } } error.AddQueryLocation(col_ref_p); diff --git a/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp b/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp index 3f4c249bd..26dd86c9d 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp @@ -203,7 +203,9 @@ unique_ptr Binder::BindTableFunctionInternal(TableFunction &tab table_function.function_info.get(), this, table_function, ref); if (table_function.bind_replace) { auto new_plan = table_function.bind_replace(context, bind_input); - if (new_plan != nullptr) { + if (new_plan) { + new_plan->alias = ref.alias; + new_plan->column_name_alias = ref.column_name_alias; return CreatePlan(*Bind(*new_plan)); } else if (!table_function.bind) { throw BinderException("Failed to bind \"%s\": nullptr returned from bind_replace without bind function", diff --git a/src/duckdb/src/planner/expression/bound_function_expression.cpp b/src/duckdb/src/planner/expression/bound_function_expression.cpp index 1be272540..be146bd50 100644 --- a/src/duckdb/src/planner/expression/bound_function_expression.cpp +++ b/src/duckdb/src/planner/expression/bound_function_expression.cpp @@ -109,12 +109,28 @@ void BoundFunctionExpression::Serialize(Serializer &serializer) const { unique_ptr BoundFunctionExpression::Deserialize(Deserializer &deserializer) { auto return_type = deserializer.ReadProperty(200, "return_type"); auto children = deserializer.ReadProperty>>(201, "children"); + auto entry = FunctionSerializer::Deserialize( deserializer, CatalogType::SCALAR_FUNCTION_ENTRY, children, return_type); auto function_return_type = entry.first.return_type; + + auto is_operator = deserializer.ReadProperty(202, "is_operator"); + + if (entry.first.bind_expression) { + // bind the function expression + auto &context = deserializer.Get(); + auto bind_input = FunctionBindExpressionInput(context, entry.second, children); + // replace the function expression with the bound expression + auto bound_expression = entry.first.bind_expression(bind_input); + if (bound_expression) { + return bound_expression; + } + // Otherwise, fall thorugh and continue on normally + } + auto result = make_uniq(std::move(function_return_type), std::move(entry.first), std::move(children), std::move(entry.second)); - deserializer.ReadProperty(202, "is_operator", result->is_operator); + result->is_operator = is_operator; if (result->return_type != return_type) { // return type mismatch - push a cast auto &context = deserializer.Get(); diff --git a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp index de69a2f94..07c7c280c 100644 --- a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +++ b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp @@ -348,17 +348,17 @@ #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" - #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" - -#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" - #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" + #include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" + +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" + From 7c6926dc730cc40ddf85c9c7b2d5f7b8518ab442 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Fri, 24 Jan 2025 00:33:17 +0000 Subject: [PATCH 2/4] Update vendored DuckDB sources to d5895cf7 --- .../extension/parquet/parquet_extension.cpp | 22 ++--- .../src/common/serializer/serializer.cpp | 2 +- .../scanner/column_count_scanner.cpp | 12 ++- .../scanner/string_value_scanner.cpp | 11 ++- .../operator/csv_scanner/util/csv_error.cpp | 13 ++- .../function/table/version/pragma_version.cpp | 6 +- .../duckdb/common/multi_file_reader.hpp | 2 - .../duckdb/common/serializer/deserializer.hpp | 6 +- .../duckdb/common/serializer/serializer.hpp | 6 +- .../src/include/duckdb/planner/binder.hpp | 2 + .../src/main/extension/extension_install.cpp | 13 ++- .../planner/binder/statement/bind_create.cpp | 89 +++++++++++-------- 12 files changed, 109 insertions(+), 75 deletions(-) diff --git a/src/duckdb/extension/parquet/parquet_extension.cpp b/src/duckdb/extension/parquet/parquet_extension.cpp index a9e8b20e4..d4a08fb82 100644 --- a/src/duckdb/extension/parquet/parquet_extension.cpp +++ b/src/duckdb/extension/parquet/parquet_extension.cpp @@ -1500,16 +1500,16 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin ParquetWriteBindData default_value; serializer.WritePropertyWithDefault(109, "compression_level", compression_level); serializer.WritePropertyWithDefault(110, "row_groups_per_file", bind_data.row_groups_per_file, - std::move(default_value.row_groups_per_file)); + default_value.row_groups_per_file); serializer.WritePropertyWithDefault(111, "debug_use_openssl", bind_data.debug_use_openssl, - std::move(default_value.debug_use_openssl)); + default_value.debug_use_openssl); serializer.WritePropertyWithDefault(112, "dictionary_size_limit", bind_data.dictionary_size_limit, - std::move(default_value.dictionary_size_limit)); + default_value.dictionary_size_limit); serializer.WritePropertyWithDefault(113, "bloom_filter_false_positive_ratio", bind_data.bloom_filter_false_positive_ratio, - std::move(default_value.bloom_filter_false_positive_ratio)); + default_value.bloom_filter_false_positive_ratio); serializer.WritePropertyWithDefault(114, "parquet_version", bind_data.parquet_version, - std::move(default_value.parquet_version)); + default_value.parquet_version); } static unique_ptr ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) { @@ -1531,15 +1531,15 @@ static unique_ptr ParquetCopyDeserialize(Deserializer &deserialize D_ASSERT(SerializeCompressionLevel(data->compression_level) == compression_level); ParquetWriteBindData default_value; data->row_groups_per_file = deserializer.ReadPropertyWithExplicitDefault( - 110, "row_groups_per_file", std::move(default_value.row_groups_per_file)); - data->debug_use_openssl = deserializer.ReadPropertyWithExplicitDefault( - 111, "debug_use_openssl", std::move(default_value.debug_use_openssl)); + 110, "row_groups_per_file", default_value.row_groups_per_file); + data->debug_use_openssl = + deserializer.ReadPropertyWithExplicitDefault(111, "debug_use_openssl", default_value.debug_use_openssl); data->dictionary_size_limit = deserializer.ReadPropertyWithExplicitDefault( - 112, "dictionary_size_limit", std::move(default_value.dictionary_size_limit)); + 112, "dictionary_size_limit", default_value.dictionary_size_limit); data->bloom_filter_false_positive_ratio = deserializer.ReadPropertyWithExplicitDefault( - 113, "bloom_filter_false_positive_ratio", std::move(default_value.bloom_filter_false_positive_ratio)); + 113, "bloom_filter_false_positive_ratio", default_value.bloom_filter_false_positive_ratio); data->parquet_version = - deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", std::move(default_value.parquet_version)); + deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", default_value.parquet_version); return std::move(data); } diff --git a/src/duckdb/src/common/serializer/serializer.cpp b/src/duckdb/src/common/serializer/serializer.cpp index 91a7f4772..d0f242a3e 100644 --- a/src/duckdb/src/common/serializer/serializer.cpp +++ b/src/duckdb/src/common/serializer/serializer.cpp @@ -15,7 +15,7 @@ void Serializer::WriteValue(const vector &vec) { template <> void Serializer::WritePropertyWithDefault(const field_id_t field_id, const char *tag, const Value &value, - const Value &&default_value) { + const Value &default_value) { // If current value is default, don't write it if (!options.serialize_default_values && ValueOperations::NotDistinctFrom(value, default_value)) { OnOptionalPropertyBegin(field_id, tag, false); diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp index 2b3361533..b4b25f008 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp @@ -105,7 +105,13 @@ ColumnCountScanner::ColumnCountScanner(shared_ptr buffer_manag : BaseScanner(std::move(buffer_manager), state_machine, std::move(error_handler), true, nullptr, iterator), result(states, *state_machine, result_size_p), column_count(1), result_size(result_size_p) { sniffing = true; - result.last_position = {0, 0, cur_buffer_handle->actual_size}; + idx_t actual_size = 0; + if (cur_buffer_handle) { + actual_size = cur_buffer_handle->actual_size; + } + result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, actual_size}; + result.current_buffer_size = actual_size; + result.cur_buffer_idx = iterator.pos.buffer_idx; } unique_ptr ColumnCountScanner::UpgradeToStringValueScanner() { @@ -124,7 +130,9 @@ unique_ptr ColumnCountScanner::UpgradeToStringValueScanner() ColumnCountResult &ColumnCountScanner::ParseChunk() { result.result_position = 0; column_count = 1; - result.current_buffer_size = cur_buffer_handle->actual_size; + if (cur_buffer_handle) { + result.current_buffer_size = cur_buffer_handle->actual_size; + } ParseChunkInternal(result); return result; } diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index f48339a02..670b3fda9 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -488,11 +488,15 @@ void StringValueResult::Reset() { cur_buffer = buffer_handles[iterator.GetBufferIdx()]; } buffer_handles.clear(); + idx_t actual_size = 0; if (cur_buffer) { buffer_handles[cur_buffer->buffer_idx] = cur_buffer; + actual_size = cur_buffer->actual_size; } current_errors.Reset(); borked_rows.clear(); + current_line_position.begin = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, actual_size}; + current_line_position.end = current_line_position.begin; } void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t buffer_pos) { @@ -1524,8 +1528,8 @@ bool StringValueScanner::FirstValueEndsOnQuote(CSVIterator iterator) const { const idx_t to_pos = iterator.GetEndPos(); while (iterator.pos.buffer_pos < to_pos) { state_machine->Transition(current_state, buffer_handle_ptr[iterator.pos.buffer_pos++]); - if ((current_state.IsState(CSVState::DELIMITER) || current_state.IsState(CSVState::CARRIAGE_RETURN) || - current_state.IsState(CSVState::RECORD_SEPARATOR))) { + if (current_state.IsState(CSVState::DELIMITER) || current_state.IsState(CSVState::CARRIAGE_RETURN) || + current_state.IsState(CSVState::RECORD_SEPARATOR)) { return buffer_handle_ptr[iterator.pos.buffer_pos - 2] == state_machine->dialect_options.state_machine_options.quote.GetValue(); } @@ -1704,6 +1708,9 @@ void StringValueScanner::SetStart() { if (!best_row.is_valid && !quoted_row.is_valid && best_row.start_pos < quoted_row.start_pos) { best_row = quoted_row; } + if (quoted_row.is_valid && quoted_row.start_pos < best_row.start_pos) { + best_row = quoted_row; + } } // 3. We are in an escaped value if (!best_row.is_valid && state_machine->dialect_options.state_machine_options.escape.GetValue() != '\0' && diff --git a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp index d4443c173..35e691153 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp @@ -194,11 +194,18 @@ void CSVErrorHandler::FillRejectsTable(InternalAppender &errors_appender, const errors_appender.Append(Value()); break; case CSVErrorType::TOO_FEW_COLUMNS: - D_ASSERT(bind_data.return_names.size() > col_idx + 1); - errors_appender.Append(string_t(bind_data.return_names[col_idx + 1])); + if (col_idx + 1 < bind_data.return_names.size()) { + errors_appender.Append(string_t(bind_data.return_names[col_idx + 1])); + } else { + errors_appender.Append(Value()); + } break; default: - errors_appender.Append(string_t(bind_data.return_names[col_idx])); + if (col_idx < bind_data.return_names.size()) { + errors_appender.Append(string_t(bind_data.return_names[col_idx])); + } else { + errors_appender.Append(Value()); + } } // 8. Error Type errors_appender.Append(string_t(CSVErrorTypeToEnum(error.type))); diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 5cdd0b306..f64967275 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "4-dev4889" +#define DUCKDB_PATCH_VERSION "4-dev4923" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.4-dev4889" +#define DUCKDB_VERSION "v1.1.4-dev4923" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "5d02d69e5c" +#define DUCKDB_SOURCE_ID "d0c4cf8a28" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp b/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp index 5778d77ba..1c73b855c 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp @@ -47,7 +47,6 @@ struct MultiFileReaderColumnDefinition { identifier(other.identifier) { } - MultiFileReaderColumnDefinition(MultiFileReaderColumnDefinition &&other) noexcept = default; MultiFileReaderColumnDefinition &operator=(const MultiFileReaderColumnDefinition &other) { if (this != &other) { name = other.name; @@ -58,7 +57,6 @@ struct MultiFileReaderColumnDefinition { } return *this; } - MultiFileReaderColumnDefinition &operator=(MultiFileReaderColumnDefinition &&other) noexcept = default; public: static vector ColumnsFromNamesAndTypes(const vector &names, diff --git a/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp b/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp index fbfde9fe3..03f30544f 100644 --- a/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +++ b/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp @@ -81,7 +81,7 @@ class Deserializer { } template - inline T ReadPropertyWithExplicitDefault(const field_id_t field_id, const char *tag, T &&default_value) { + inline T ReadPropertyWithExplicitDefault(const field_id_t field_id, const char *tag, T default_value) { if (!OnOptionalPropertyBegin(field_id, tag)) { OnOptionalPropertyEnd(false); return std::forward(default_value); @@ -104,7 +104,7 @@ class Deserializer { } template - inline void ReadPropertyWithExplicitDefault(const field_id_t field_id, const char *tag, T &ret, T &&default_value) { + inline void ReadPropertyWithExplicitDefault(const field_id_t field_id, const char *tag, T &ret, T default_value) { if (!OnOptionalPropertyBegin(field_id, tag)) { ret = std::forward(default_value); OnOptionalPropertyEnd(false); @@ -116,7 +116,7 @@ class Deserializer { template inline void ReadPropertyWithExplicitDefault(const field_id_t field_id, const char *tag, CSVOption &ret, - T &&default_value) { + T default_value) { if (!OnOptionalPropertyBegin(field_id, tag)) { ret = std::forward(default_value); OnOptionalPropertyEnd(false); diff --git a/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp b/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp index 2c2ea5bd7..97aeef51a 100644 --- a/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +++ b/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp @@ -105,7 +105,7 @@ class Serializer { } template - void WritePropertyWithDefault(const field_id_t field_id, const char *tag, const T &value, const T &&default_value) { + void WritePropertyWithDefault(const field_id_t field_id, const char *tag, const T &value, const T &default_value) { // If current value is default, don't write it if (!options.serialize_default_values && (value == default_value)) { OnOptionalPropertyBegin(field_id, tag, false); @@ -120,7 +120,7 @@ class Serializer { // Specialization for Value (default Value comparison throws when comparing nulls) template void WritePropertyWithDefault(const field_id_t field_id, const char *tag, const CSVOption &value, - const T &&default_value) { + const T &default_value) { // If current value is default, don't write it if (!options.serialize_default_values && (value == default_value)) { OnOptionalPropertyBegin(field_id, tag, false); @@ -383,7 +383,7 @@ void Serializer::WriteValue(const vector &vec); // Specialization for Value (default Value comparison throws when comparing nulls) template <> void Serializer::WritePropertyWithDefault(const field_id_t field_id, const char *tag, const Value &value, - const Value &&default_value); + const Value &default_value); // List Impl template diff --git a/src/duckdb/src/include/duckdb/planner/binder.hpp b/src/duckdb/src/include/duckdb/planner/binder.hpp index 0cd7c0e6a..c0364c88a 100644 --- a/src/duckdb/src/include/duckdb/planner/binder.hpp +++ b/src/duckdb/src/include/duckdb/planner/binder.hpp @@ -419,6 +419,8 @@ class Binder : public enable_shared_from_this { const string BindCatalog(string &catalog_name); SchemaCatalogEntry &BindCreateSchema(CreateInfo &info); + LogicalType BindLogicalTypeInternal(const LogicalType &type, optional_ptr catalog, const string &schema); + unique_ptr BindSelectNode(SelectNode &statement, unique_ptr from_table); unique_ptr BindCopyDatabaseSchema(Catalog &source_catalog, const string &target_database_name); diff --git a/src/duckdb/src/main/extension/extension_install.cpp b/src/duckdb/src/main/extension/extension_install.cpp index 5675bb9ac..e8ab595ab 100644 --- a/src/duckdb/src/main/extension/extension_install.cpp +++ b/src/duckdb/src/main/extension/extension_install.cpp @@ -90,6 +90,12 @@ string ExtensionHelper::GetExtensionDirectoryPath(DatabaseInstance &db, FileSyst extension_directory = fs.ConvertSeparators(extension_directory); // expand ~ in extension directory extension_directory = fs.ExpandPath(extension_directory); + + auto path_components = PathComponents(); + for (auto &path_ele : path_components) { + extension_directory = fs.JoinPath(extension_directory, path_ele); + } + return extension_directory; } @@ -117,13 +123,6 @@ string ExtensionHelper::ExtensionDirectory(DatabaseInstance &db, FileSystem &fs) } D_ASSERT(fs.DirectoryExists(extension_directory)); - auto path_components = PathComponents(); - for (auto &path_ele : path_components) { - extension_directory = fs.JoinPath(extension_directory, path_ele); - if (!fs.DirectoryExists(extension_directory)) { - fs.CreateDirectory(extension_directory); - } - } return extension_directory; } diff --git a/src/duckdb/src/planner/binder/statement/bind_create.cpp b/src/duckdb/src/planner/binder/statement/bind_create.cpp index 49c2f37b7..4f1a43a30 100644 --- a/src/duckdb/src/planner/binder/statement/bind_create.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_create.cpp @@ -41,6 +41,7 @@ #include "duckdb/planner/tableref/bound_basetableref.hpp" #include "duckdb/storage/storage_extension.hpp" #include "duckdb/common/extension_type_info.hpp" +#include "duckdb/common/type_visitor.hpp" namespace duckdb { @@ -253,54 +254,56 @@ static bool IsValidUserType(optional_ptr entry) { return entry->Cast().user_type.id() != LogicalTypeId::INVALID; } -void Binder::BindLogicalType(LogicalType &type, optional_ptr catalog, const string &schema) { +LogicalType Binder::BindLogicalTypeInternal(const LogicalType &type, optional_ptr catalog, + const string &schema) { if (type.id() != LogicalTypeId::USER) { - // Recursive types, make sure to bind any nested user types recursively - auto alias = type.GetAlias(); - auto ext_info = type.HasExtensionInfo() ? make_uniq(*type.GetExtensionInfo()) : nullptr; - + // Nested type, make sure to bind any nested user types recursively + LogicalType result; switch (type.id()) { case LogicalTypeId::LIST: { - auto child_type = ListType::GetChildType(type); - BindLogicalType(child_type, catalog, schema); - type = LogicalType::LIST(child_type); - } break; + auto child_type = BindLogicalTypeInternal(ListType::GetChildType(type), catalog, schema); + result = LogicalType::LIST(child_type); + break; + } case LogicalTypeId::MAP: { - auto key_type = MapType::KeyType(type); - BindLogicalType(key_type, catalog, schema); - auto value_type = MapType::ValueType(type); - BindLogicalType(value_type, catalog, schema); - type = LogicalType::MAP(key_type, value_type); - } break; + auto key_type = BindLogicalTypeInternal(MapType::KeyType(type), catalog, schema); + auto value_type = BindLogicalTypeInternal(MapType::ValueType(type), catalog, schema); + result = LogicalType::MAP(std::move(key_type), std::move(value_type)); + break; + } case LogicalTypeId::ARRAY: { - auto child_type = ArrayType::GetChildType(type); + auto child_type = BindLogicalTypeInternal(ArrayType::GetChildType(type), catalog, schema); auto array_size = ArrayType::GetSize(type); - BindLogicalType(child_type, catalog, schema); - type = LogicalType::ARRAY(child_type, array_size); - } break; + result = LogicalType::ARRAY(child_type, array_size); + break; + } case LogicalTypeId::STRUCT: { auto child_types = StructType::GetChildTypes(type); - for (auto &child_type : child_types) { - BindLogicalType(child_type.second, catalog, schema); + child_list_t new_child_types; + for (auto &entry : child_types) { + new_child_types.emplace_back(entry.first, BindLogicalTypeInternal(entry.second, catalog, schema)); } - type = LogicalType::STRUCT(child_types); - } break; + result = LogicalType::STRUCT(std::move(new_child_types)); + break; + } case LogicalTypeId::UNION: { - auto member_types = UnionType::CopyMemberTypes(type); - for (auto &member_type : member_types) { - BindLogicalType(member_type.second, catalog, schema); + child_list_t member_types; + for (idx_t i = 0; i < UnionType::GetMemberCount(type); i++) { + auto child_type = BindLogicalTypeInternal(UnionType::GetMemberType(type, i), catalog, schema); + member_types.emplace_back(UnionType::GetMemberName(type, i), std::move(child_type)); } - type = LogicalType::UNION(member_types); - } break; - default: + result = LogicalType::UNION(std::move(member_types)); break; } + default: + return type; + } // Set the alias and extension info back - type.SetAlias(alias); - type.SetExtensionInfo(std::move(ext_info)); - - return; + result.SetAlias(type.GetAlias()); + auto ext_info = type.HasExtensionInfo() ? make_uniq(*type.GetExtensionInfo()) : nullptr; + result.SetExtensionInfo(std::move(ext_info)); + return result; } // User type, bind the user type @@ -310,6 +313,7 @@ void Binder::BindLogicalType(LogicalType &type, optional_ptr catalog, c bind_logical_type_function_t user_bind_modifiers_func = nullptr; + LogicalType result; if (catalog) { // The search order is: // 1) In the explicitly set schema (my_schema.my_type) @@ -335,7 +339,7 @@ void Binder::BindLogicalType(LogicalType &type, optional_ptr catalog, c OnEntryNotFound::THROW_EXCEPTION); } auto &type_entry = entry->Cast(); - type = type_entry.user_type; + result = type_entry.user_type; user_bind_modifiers_func = type_entry.bind_function; } else { string type_catalog = UserType::GetCatalog(type); @@ -344,24 +348,33 @@ void Binder::BindLogicalType(LogicalType &type, optional_ptr catalog, c BindSchemaOrCatalog(context, type_catalog, type_schema); auto entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, type_catalog, type_schema, user_type_name); auto &type_entry = entry->Cast(); - type = type_entry.user_type; + result = type_entry.user_type; user_bind_modifiers_func = type_entry.bind_function; } // Now we bind the inner user type - BindLogicalType(type, catalog, schema); + BindLogicalType(result, catalog, schema); // Apply the type modifiers (if any) if (user_bind_modifiers_func) { // If an explicit bind_modifiers function was provided, use that to construct the type - BindLogicalTypeInput input {context, type, user_type_mods}; - type = user_bind_modifiers_func(input); + BindLogicalTypeInput input {context, result, user_type_mods}; + result = user_bind_modifiers_func(input); } else { if (!user_type_mods.empty()) { throw BinderException("Type '%s' does not take any type modifiers", user_type_name); } } + return result; +} + +void Binder::BindLogicalType(LogicalType &type, optional_ptr catalog, const string &schema) { + // check if we need to bind this type at all + if (!TypeVisitor::Contains(type, LogicalTypeId::USER)) { + return; + } + type = BindLogicalTypeInternal(type, catalog, schema); } unique_ptr DuckCatalog::BindCreateIndex(Binder &binder, CreateStatement &stmt, From c415a3321b99831aa142b3016efa62443561fd20 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sat, 25 Jan 2025 00:32:23 +0000 Subject: [PATCH 3/4] Update vendored DuckDB sources to 7c6926dc --- .../extension/parquet/column_writer.cpp | 2 +- .../src/common/arrow/arrow_converter.cpp | 4 +- .../src/common/arrow/arrow_type_extension.cpp | 51 +++++++++----- src/duckdb/src/common/error_data.cpp | 2 +- src/duckdb/src/common/types.cpp | 2 +- .../execution/index/fixed_size_allocator.cpp | 60 ++++++++-------- .../src/execution/index/fixed_size_buffer.cpp | 3 +- .../scanner/string_value_scanner.cpp | 52 +++++++------- .../csv_scanner/sniffer/dialect_detection.cpp | 10 +-- .../state_machine/csv_state_machine_cache.cpp | 16 ++--- .../operator/csv_scanner/util/csv_error.cpp | 35 +++++++--- .../csv_scanner/util/csv_reader_options.cpp | 21 +++--- .../src/execution/sample/reservoir_sample.cpp | 6 +- .../table/arrow/arrow_duck_schema.cpp | 7 +- .../src/function/table/arrow_conversion.cpp | 20 +++--- src/duckdb/src/function/table/copy_csv.cpp | 2 +- src/duckdb/src/function/table/read_csv.cpp | 2 +- .../function/table/version/pragma_version.cpp | 6 +- .../src/function/window/window_token_tree.cpp | 7 +- .../common/arrow/arrow_type_extension.hpp | 4 +- .../execution/index/fixed_size_allocator.hpp | 8 +-- .../execution/index/fixed_size_buffer.hpp | 69 ++++++++++--------- .../csv_scanner/state_machine_options.hpp | 10 +-- .../csv_scanner/string_value_scanner.hpp | 2 +- .../src/include/duckdb/logging/logging.hpp | 2 - src/duckdb/src/include/duckdb/main/config.hpp | 1 + .../duckdb/storage/string_uncompressed.hpp | 7 +- src/duckdb/src/main/client_context.cpp | 4 +- src/duckdb/src/storage/compression/fsst.cpp | 4 +- .../compression/string_uncompressed.cpp | 17 +++-- src/duckdb/src/storage/magic_bytes.cpp | 2 +- .../storage/serialization/serialize_nodes.cpp | 6 +- 32 files changed, 246 insertions(+), 198 deletions(-) diff --git a/src/duckdb/extension/parquet/column_writer.cpp b/src/duckdb/extension/parquet/column_writer.cpp index 5ba51a7c1..2502484b5 100644 --- a/src/duckdb/extension/parquet/column_writer.cpp +++ b/src/duckdb/extension/parquet/column_writer.cpp @@ -1173,7 +1173,7 @@ void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const string_t &value // helpers to get size from strings template -static constexpr idx_t GetDlbaStringSize(const SRC &src_value) { +static idx_t GetDlbaStringSize(const SRC &src_value) { return 0; } diff --git a/src/duckdb/src/common/arrow/arrow_converter.cpp b/src/duckdb/src/common/arrow/arrow_converter.cpp index c4ad7b01c..02b3999a0 100644 --- a/src/duckdb/src/common/arrow/arrow_converter.cpp +++ b/src/duckdb/src/common/arrow/arrow_converter.cpp @@ -80,8 +80,8 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, bool SetArrowExtension(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type, ClientContext &context) { auto &config = DBConfig::GetConfig(context); - if (config.HasArrowExtension(type.id())) { - auto arrow_extension = config.GetArrowExtension(type.id()); + if (config.HasArrowExtension(type)) { + auto arrow_extension = config.GetArrowExtension(type); arrow_extension.PopulateArrowSchema(root_holder, child, type, context, arrow_extension); return true; } diff --git a/src/duckdb/src/common/arrow/arrow_type_extension.cpp b/src/duckdb/src/common/arrow/arrow_type_extension.cpp index de2e06970..dee514cad 100644 --- a/src/duckdb/src/common/arrow/arrow_type_extension.cpp +++ b/src/duckdb/src/common/arrow/arrow_type_extension.cpp @@ -126,14 +126,14 @@ ArrowExtensionMetadata ArrowTypeExtension::GetInfo() const { return extension_metadata; } -shared_ptr ArrowTypeExtension::GetType(const ArrowSchema &schema, +unique_ptr ArrowTypeExtension::GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) const { if (get_type) { return get_type(schema, schema_metadata); } // FIXME: THis is not good auto duckdb_type = type_extension->GetDuckDBType(); - return make_shared_ptr(duckdb_type); + return make_uniq(duckdb_type); } shared_ptr ArrowTypeExtension::GetTypeExtension() const { @@ -239,18 +239,33 @@ bool DBConfig::HasArrowExtension(const LogicalType &type) const { return !arrow_extensions->type_to_info[type_info].empty(); } +bool DBConfig::HasArrowExtension(ArrowExtensionMetadata info) const { + lock_guard l(arrow_extensions->lock); + auto type_extensions = arrow_extensions->type_extensions; + + if (type_extensions.find(info) != type_extensions.end()) { + return true; + } + + auto og_info = info; + info.SetArrowFormat(""); + if (type_extensions.find(info) != type_extensions.end()) { + return true; + } + + return false; +} + struct ArrowJson { - static shared_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { + static unique_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { const auto format = string(schema.format); if (format == "u") { - return make_shared_ptr(LogicalType::JSON(), - make_uniq(ArrowVariableSizeType::NORMAL)); + return make_uniq(LogicalType::JSON(), make_uniq(ArrowVariableSizeType::NORMAL)); } else if (format == "U") { - return make_shared_ptr(LogicalType::JSON(), - make_uniq(ArrowVariableSizeType::NORMAL)); + return make_uniq(LogicalType::JSON(), + make_uniq(ArrowVariableSizeType::SUPER_SIZE)); } else if (format == "vu") { - return make_shared_ptr(LogicalType::JSON(), - make_uniq(ArrowVariableSizeType::NORMAL)); + return make_uniq(LogicalType::JSON(), make_uniq(ArrowVariableSizeType::VIEW)); } throw InvalidInputException("Arrow extension type \"%s\" not supported for arrow.json", format.c_str()); } @@ -275,14 +290,13 @@ struct ArrowJson { }; struct ArrowBit { - static shared_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { + static unique_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { const auto format = string(schema.format); if (format == "z") { - return make_shared_ptr(LogicalType::BIT, - make_uniq(ArrowVariableSizeType::NORMAL)); + return make_uniq(LogicalType::BIT, make_uniq(ArrowVariableSizeType::NORMAL)); } else if (format == "Z") { - return make_shared_ptr(LogicalType::BIT, - make_uniq(ArrowVariableSizeType::SUPER_SIZE)); + return make_uniq(LogicalType::BIT, + make_uniq(ArrowVariableSizeType::SUPER_SIZE)); } throw InvalidInputException("Arrow extension type \"%s\" not supported for BIT type", format.c_str()); } @@ -303,14 +317,13 @@ struct ArrowBit { }; struct ArrowVarint { - static shared_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { + static unique_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { const auto format = string(schema.format); if (format == "z") { - return make_shared_ptr(LogicalType::VARINT, - make_uniq(ArrowVariableSizeType::NORMAL)); + return make_uniq(LogicalType::VARINT, make_uniq(ArrowVariableSizeType::NORMAL)); } else if (format == "Z") { - return make_shared_ptr(LogicalType::VARINT, - make_uniq(ArrowVariableSizeType::SUPER_SIZE)); + return make_uniq(LogicalType::VARINT, + make_uniq(ArrowVariableSizeType::SUPER_SIZE)); } throw InvalidInputException("Arrow extension type \"%s\" not supported for Varint", format.c_str()); } diff --git a/src/duckdb/src/common/error_data.cpp b/src/duckdb/src/common/error_data.cpp index ee1da95a6..d79799876 100644 --- a/src/duckdb/src/common/error_data.cpp +++ b/src/duckdb/src/common/error_data.cpp @@ -95,7 +95,7 @@ bool ErrorData::operator==(const ErrorData &other) const { } void ErrorData::ConvertErrorToJSON() { - if (raw_message.empty() || raw_message[0] == '{') { + if (!raw_message.empty() && raw_message[0] == '{') { // empty or already JSON return; } diff --git a/src/duckdb/src/common/types.cpp b/src/duckdb/src/common/types.cpp index 342a84bc0..4ec1ce3f3 100644 --- a/src/duckdb/src/common/types.cpp +++ b/src/duckdb/src/common/types.cpp @@ -1314,7 +1314,7 @@ void LogicalType::Verify() const { switch (id_) { case LogicalTypeId::DECIMAL: D_ASSERT(DecimalType::GetWidth(*this) >= 1 && DecimalType::GetWidth(*this) <= Decimal::MAX_WIDTH_DECIMAL); - D_ASSERT(DecimalType::GetScale(*this) >= 0 && DecimalType::GetScale(*this) <= DecimalType::GetWidth(*this)); + D_ASSERT(DecimalType::GetScale(*this) <= DecimalType::GetWidth(*this)); break; case LogicalTypeId::STRUCT: { // verify child types diff --git a/src/duckdb/src/execution/index/fixed_size_allocator.cpp b/src/duckdb/src/execution/index/fixed_size_allocator.cpp index 24ea09181..f026ddd98 100644 --- a/src/duckdb/src/execution/index/fixed_size_allocator.cpp +++ b/src/duckdb/src/execution/index/fixed_size_allocator.cpp @@ -49,14 +49,13 @@ IndexPointer FixedSizeAllocator::New() { // add a new buffer auto buffer_id = GetAvailableBufferId(); - FixedSizeBuffer new_buffer(block_manager); - buffers.insert(make_pair(buffer_id, std::move(new_buffer))); + buffers[buffer_id] = make_uniq(block_manager); buffers_with_free_space.insert(buffer_id); // set the bitmask D_ASSERT(buffers.find(buffer_id) != buffers.end()); auto &buffer = buffers.find(buffer_id)->second; - ValidityMask mask(reinterpret_cast(buffer.Get()), available_segments_per_buffer); + ValidityMask mask(reinterpret_cast(buffer->Get()), available_segments_per_buffer); // zero-initialize the bitmask to avoid leaking memory to disk auto data = mask.GetData(); @@ -74,16 +73,16 @@ IndexPointer FixedSizeAllocator::New() { D_ASSERT(buffers.find(buffer_id) != buffers.end()); auto &buffer = buffers.find(buffer_id)->second; - auto offset = buffer.GetOffset(bitmask_count, available_segments_per_buffer); + auto offset = buffer->GetOffset(bitmask_count, available_segments_per_buffer); total_segment_count++; - buffer.segment_count++; - if (buffer.segment_count == available_segments_per_buffer) { + buffer->segment_count++; + if (buffer->segment_count == available_segments_per_buffer) { buffers_with_free_space.erase(buffer_id); } // zero-initialize that segment - auto buffer_ptr = buffer.Get(); + auto buffer_ptr = buffer->Get(); auto offset_in_buffer = buffer_ptr + offset * segment_size + bitmask_offset; memset(offset_in_buffer, 0, segment_size); @@ -98,24 +97,21 @@ void FixedSizeAllocator::Free(const IndexPointer ptr) { D_ASSERT(buffers.find(buffer_id) != buffers.end()); auto &buffer = buffers.find(buffer_id)->second; - auto bitmask_ptr = reinterpret_cast(buffer.Get()); + auto bitmask_ptr = reinterpret_cast(buffer->Get()); ValidityMask mask(bitmask_ptr, offset + 1); // FIXME D_ASSERT(!mask.RowIsValid(offset)); mask.SetValid(offset); D_ASSERT(total_segment_count > 0); - D_ASSERT(buffer.segment_count > 0); + D_ASSERT(buffer->segment_count > 0); // adjust the allocator fields buffers_with_free_space.insert(buffer_id); total_segment_count--; - buffer.segment_count--; + buffer->segment_count--; } void FixedSizeAllocator::Reset() { - for (auto &buffer : buffers) { - buffer.second.Destroy(); - } buffers.clear(); buffers_with_free_space.clear(); total_segment_count = 0; @@ -124,7 +120,7 @@ void FixedSizeAllocator::Reset() { idx_t FixedSizeAllocator::GetInMemorySize() const { idx_t memory_usage = 0; for (auto &buffer : buffers) { - if (buffer.second.InMemory()) { + if (buffer.second->InMemory()) { memory_usage += block_manager.GetBlockSize(); } } @@ -179,9 +175,9 @@ bool FixedSizeAllocator::InitializeVacuum() { idx_t available_segments_in_memory = 0; for (auto &buffer : buffers) { - buffer.second.vacuum = false; - if (buffer.second.InMemory()) { - auto available_segments_in_buffer = available_segments_per_buffer - buffer.second.segment_count; + buffer.second->vacuum = false; + if (buffer.second->InMemory()) { + auto available_segments_in_buffer = available_segments_per_buffer - buffer.second->segment_count; available_segments_in_memory += available_segments_in_buffer; temporary_vacuum_buffers.emplace(available_segments_in_buffer, buffer.first); } @@ -216,7 +212,7 @@ bool FixedSizeAllocator::InitializeVacuum() { for (auto &vacuum_buffer : temporary_vacuum_buffers) { auto buffer_id = vacuum_buffer.second; D_ASSERT(buffers.find(buffer_id) != buffers.end()); - buffers.find(buffer_id)->second.vacuum = true; + buffers.find(buffer_id)->second->vacuum = true; buffers_with_free_space.erase(buffer_id); } @@ -232,8 +228,7 @@ void FixedSizeAllocator::FinalizeVacuum() { for (auto &buffer_id : vacuum_buffers) { D_ASSERT(buffers.find(buffer_id) != buffers.end()); auto &buffer = buffers.find(buffer_id)->second; - D_ASSERT(buffer.InMemory()); - buffer.Destroy(); + D_ASSERT(buffer->InMemory()); buffers.erase(buffer_id); } vacuum_buffers.clear(); @@ -259,9 +254,9 @@ FixedSizeAllocatorInfo FixedSizeAllocator::GetInfo() const { for (const auto &buffer : buffers) { info.buffer_ids.push_back(buffer.first); - info.block_pointers.push_back(buffer.second.block_pointer); - info.segment_counts.push_back(buffer.second.segment_count); - info.allocation_sizes.push_back(buffer.second.allocation_size); + info.block_pointers.push_back(buffer.second->block_pointer); + info.segment_counts.push_back(buffer.second->segment_count); + info.allocation_sizes.push_back(buffer.second->allocation_size); } for (auto &buffer_id : buffers_with_free_space) { @@ -273,7 +268,7 @@ FixedSizeAllocatorInfo FixedSizeAllocator::GetInfo() const { void FixedSizeAllocator::SerializeBuffers(PartialBlockManager &partial_block_manager) { for (auto &buffer : buffers) { - buffer.second.Serialize(partial_block_manager, available_segments_per_buffer, segment_size, bitmask_offset); + buffer.second->Serialize(partial_block_manager, available_segments_per_buffer, segment_size, bitmask_offset); } } @@ -281,8 +276,8 @@ vector FixedSizeAllocator::InitSerializationToWAL() { vector buffer_infos; for (auto &buffer : buffers) { - buffer.second.SetAllocationSize(available_segments_per_buffer, segment_size, bitmask_offset); - buffer_infos.emplace_back(buffer.second.Get(), buffer.second.allocation_size); + buffer.second->SetAllocationSize(available_segments_per_buffer, segment_size, bitmask_offset); + buffer_infos.emplace_back(buffer.second->Get(), buffer.second->allocation_size); } return buffer_infos; } @@ -300,8 +295,8 @@ void FixedSizeAllocator::Init(const FixedSizeAllocatorInfo &info) { auto allocation_size = info.allocation_sizes[i]; // create the FixedSizeBuffer - FixedSizeBuffer new_buffer(block_manager, segment_count, allocation_size, buffer_block_pointer); - buffers.insert(make_pair(buffer_id, std::move(new_buffer))); + buffers[buffer_id] = + make_uniq(block_manager, segment_count, allocation_size, buffer_block_pointer); total_segment_count += segment_count; } @@ -324,8 +319,8 @@ void FixedSizeAllocator::Deserialize(MetadataManager &metadata_manager, const Bl auto buffer_block_pointer = reader.Read(); auto segment_count = reader.Read(); auto allocation_size = reader.Read(); - FixedSizeBuffer new_buffer(block_manager, segment_count, allocation_size, buffer_block_pointer); - buffers.insert(make_pair(buffer_id, std::move(new_buffer))); + buffers[buffer_id] = + make_uniq(block_manager, segment_count, allocation_size, buffer_block_pointer); total_segment_count += segment_count; } for (idx_t i = 0; i < buffers_with_free_space_count; i++) { @@ -346,13 +341,12 @@ void FixedSizeAllocator::RemoveEmptyBuffers() { auto buffer_it = buffers.begin(); while (buffer_it != buffers.end()) { - if (buffer_it->second.segment_count != 0) { - buffer_it++; + if (buffer_it->second->segment_count != 0) { + ++buffer_it; continue; } buffers_with_free_space.erase(buffer_it->first); - buffer_it->second.Destroy(); buffer_it = buffers.erase(buffer_it); } } diff --git a/src/duckdb/src/execution/index/fixed_size_buffer.cpp b/src/duckdb/src/execution/index/fixed_size_buffer.cpp index 336d96396..d334541f6 100644 --- a/src/duckdb/src/execution/index/fixed_size_buffer.cpp +++ b/src/duckdb/src/execution/index/fixed_size_buffer.cpp @@ -54,7 +54,8 @@ FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segmen D_ASSERT(block_handle->BlockId() < MAXIMUM_BLOCK); } -void FixedSizeBuffer::Destroy() { +FixedSizeBuffer::~FixedSizeBuffer() { + lock_guard l(lock); if (InMemory()) { // we can have multiple readers on a pinned block, and unpinning the buffer handle // decrements the reader count on the underlying block handle (Destroy() unpins) diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index 670b3fda9..c922f7294 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -148,7 +148,7 @@ inline bool IsValueNull(const char *null_str_ptr, const char *value_ptr, const i } bool StringValueResult::HandleTooManyColumnsError(const char *value_ptr, const idx_t size) { - if (cur_col_id >= number_of_columns && state_machine.state_machine_options.rfc_4180.GetValue()) { + if (cur_col_id >= number_of_columns && state_machine.state_machine_options.strict_mode.GetValue()) { bool error = true; if (cur_col_id == number_of_columns && ((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) { // we make an exception if the first over-value is null @@ -220,7 +220,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size return; } if (cur_col_id >= number_of_columns) { - if (!state_machine.state_machine_options.rfc_4180.GetValue()) { + if (!state_machine.state_machine_options.strict_mode.GetValue()) { return; } bool error = true; @@ -248,9 +248,9 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size } if (((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) { - // Check for the occurrence of escaped null string like \N only if RFC 4180 conformance is disabled + // Check for the occurrence of escaped null string like \N only if strict_mode is disabled const bool check_unquoted_escaped_null = - state_machine.state_machine_options.rfc_4180.GetValue() == false && escaped && !quoted && size == 1; + state_machine.state_machine_options.strict_mode.GetValue() == false && escaped && !quoted && size == 1; for (idx_t i = 0; i < null_str_count; i++) { bool is_null = false; if (null_str_size[i] == 2 && null_str_ptr[i][0] == state_machine.state_machine_options.escape.GetValue()) { @@ -519,7 +519,7 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const } } if (result.cur_col_id >= result.number_of_columns && - !result.state_machine.state_machine_options.rfc_4180.GetValue()) { + !result.state_machine.state_machine_options.strict_mode.GetValue()) { return; } if (!result.HandleTooManyColumnsError(value_ptr, length)) { @@ -543,7 +543,7 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const auto value = StringValueScanner::RemoveEscape( value_ptr, length, result.state_machine.dialect_options.state_machine_options.escape.GetValue(), result.state_machine.dialect_options.state_machine_options.quote.GetValue(), - result.state_machine.dialect_options.state_machine_options.rfc_4180.GetValue(), + result.state_machine.dialect_options.state_machine_options.strict_mode.GetValue(), result.parse_chunk.data[result.chunk_col_id]); result.AddValueToVector(value.GetData(), value.GetSize()); } @@ -1242,7 +1242,7 @@ void StringValueScanner::ProcessExtraRow() { } } -string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, bool rfc_4180, +string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, bool strict_mode, Vector &vector) { // Figure out the exact size idx_t str_pos = 0; @@ -1251,7 +1251,7 @@ string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char e if (str_ptr[cur_pos] == escape && !just_escaped) { just_escaped = true; } else if (str_ptr[cur_pos] == quote) { - if (just_escaped || !rfc_4180) { + if (just_escaped || !strict_mode) { str_pos++; } just_escaped = false; @@ -1271,7 +1271,7 @@ string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char e if (c == escape && !just_escaped) { just_escaped = true; } else if (str_ptr[cur_pos] == quote) { - if (just_escaped || !rfc_4180) { + if (just_escaped || !strict_mode) { removed_escapes_ptr[str_pos++] = c; } just_escaped = false; @@ -1370,22 +1370,24 @@ void StringValueScanner::ProcessOverBufferValue() { if (result.escaped) { if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) { const auto str_ptr = over_buffer_string.c_str() + result.quoted_position; - value = RemoveEscape(str_ptr, over_buffer_string.size() - 2, - state_machine->dialect_options.state_machine_options.escape.GetValue(), - state_machine->dialect_options.state_machine_options.quote.GetValue(), - result.state_machine.dialect_options.state_machine_options.rfc_4180.GetValue(), - result.parse_chunk.data[result.chunk_col_id]); + value = + RemoveEscape(str_ptr, over_buffer_string.size() - 2, + state_machine->dialect_options.state_machine_options.escape.GetValue(), + state_machine->dialect_options.state_machine_options.quote.GetValue(), + result.state_machine.dialect_options.state_machine_options.strict_mode.GetValue(), + result.parse_chunk.data[result.chunk_col_id]); } } } else { value = string_t(over_buffer_string.c_str(), UnsafeNumericCast(over_buffer_string.size())); if (result.escaped) { if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) { - value = RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(), - state_machine->dialect_options.state_machine_options.escape.GetValue(), - state_machine->dialect_options.state_machine_options.quote.GetValue(), - result.state_machine.dialect_options.state_machine_options.rfc_4180.GetValue(), - result.parse_chunk.data[result.chunk_col_id]); + value = + RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(), + state_machine->dialect_options.state_machine_options.escape.GetValue(), + state_machine->dialect_options.state_machine_options.quote.GetValue(), + result.state_machine.dialect_options.state_machine_options.strict_mode.GetValue(), + result.parse_chunk.data[result.chunk_col_id]); } } } @@ -1462,7 +1464,7 @@ bool StringValueScanner::MoveToNextBuffer() { } lines_read++; } else if (states.IsQuotedCurrent() && - state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) { + state_machine->dialect_options.state_machine_options.strict_mode.GetValue()) { // Unterminated quote LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size}; @@ -1474,7 +1476,7 @@ bool StringValueScanner::MoveToNextBuffer() { result.UnsetComment(result, iterator.pos.buffer_pos); } else { if (result.quoted && states.IsDelimiterBytes() && - state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) { + state_machine->dialect_options.state_machine_options.strict_mode.GetValue()) { result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position); } @@ -1684,9 +1686,9 @@ void StringValueScanner::SetStart() { // We need to initialize our strict state machine auto &state_machine_cache = CSVStateMachineCache::Get(buffer_manager->context); auto state_options = state_machine->state_machine_options; - // To set the state machine to be strict we ensure that rfc_4180 is set to true - if (!state_options.rfc_4180.IsSetByUser()) { - state_options.rfc_4180 = true; + // To set the state machine to be strict we ensure that strict_mode is set to true + if (!state_options.strict_mode.IsSetByUser()) { + state_options.strict_mode = true; } state_machine_strict = make_shared_ptr(state_machine_cache.Get(state_options), state_machine->options); @@ -1806,7 +1808,7 @@ void StringValueScanner::FinalizeChunkProcess() { } } if (states.IsQuotedCurrent() && !found_error && - state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) { + state_machine->dialect_options.state_machine_options.strict_mode.GetValue()) { // If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated // quotes result.current_errors.Insert(type, result.cur_col_id, result.chunk_col_id, result.last_position); diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp index 5cced646d..37d8e5835 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp @@ -156,11 +156,6 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector(options, state_machine_options, state_machine_cache); if (options.dialect_options.skip_rows.IsSetByUser()) { diff --git a/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp b/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp index 29fda8863..d4e552454 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp @@ -31,7 +31,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op InitializeTransitionArray(transition_array, cur_state, CSVState::QUOTED); break; case CSVState::UNQUOTED: - if (state_machine_options.rfc_4180.GetValue()) { + if (state_machine_options.strict_mode.GetValue()) { // If we have an unquoted state, following rfc 4180, our base state is invalid InitializeTransitionArray(transition_array, cur_state, CSVState::INVALID); } else { @@ -58,7 +58,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op const bool multi_byte_delimiter = delimiter_value.size() != 1; - const bool enable_unquoted_escape = state_machine_options.rfc_4180.GetValue() == false && + const bool enable_unquoted_escape = state_machine_options.strict_mode.GetValue() == false && state_machine_options.quote != state_machine_options.escape && state_machine_options.escape != '\0'; // Now set values depending on configuration @@ -75,7 +75,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op transition_array[static_cast('\r')][state] = CSVState::CARRIAGE_RETURN; if (state == static_cast(CSVState::STANDARD_NEWLINE)) { transition_array[static_cast('\n')][state] = CSVState::STANDARD; - } else if (!state_machine_options.rfc_4180.GetValue()) { + } else if (!state_machine_options.strict_mode.GetValue()) { transition_array[static_cast('\n')][state] = CSVState::RECORD_SEPARATOR; } else { transition_array[static_cast('\n')][state] = CSVState::INVALID; @@ -227,7 +227,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op if (state_machine_options.quote == state_machine_options.escape) { transition_array[quote][static_cast(CSVState::UNQUOTED)] = CSVState::QUOTED; } - if (state_machine_options.rfc_4180 == false) { + if (state_machine_options.strict_mode == false) { if (escape == '\0') { // If escape is defined, it limits a bit how relaxed quotes can be in a reliable way. transition_array[quote][static_cast(CSVState::UNQUOTED)] = CSVState::MAYBE_QUOTED; @@ -413,10 +413,10 @@ CSVStateMachineCache::CSVStateMachineCache() { const auto &escape_candidates = default_escape[static_cast(quote_rule)]; for (const auto &escape : escape_candidates) { for (const auto &comment : default_comment) { - for (const bool rfc_4180 : {true, false}) { - Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_N, rfc_4180}); - Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_R, rfc_4180}); - Insert({delimiter, quote, escape, comment, NewLineIdentifier::CARRY_ON, rfc_4180}); + for (const bool strict_mode : {true, false}) { + Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_N, strict_mode}); + Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_R, strict_mode}); + Insert({delimiter, quote, escape, comment, NewLineIdentifier::CARRY_ON, strict_mode}); } } } diff --git a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp index 35e691153..1f206f797 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp @@ -328,11 +328,13 @@ CSVError CSVError::InvalidState(const CSVReaderOptions &options, idx_t current_c std::ostringstream error; error << "The CSV Parser state machine reached an invalid state.\nThis can happen when is not possible to parse " "your CSV File with the given options, or the CSV File is not RFC 4180 compliant "; - std::ostringstream how_to_fix_it; - how_to_fix_it << "Possible fixes:" << '\n'; - how_to_fix_it << "* Enable scanning files that are not RFC 4180 compliant (rfc_4180=false)." << '\n'; - + if (options.dialect_options.state_machine_options.strict_mode.GetValue()) { + how_to_fix_it << "Possible fixes:" << '\n'; + how_to_fix_it << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not " + "comply with the CSV standard." + << '\n'; + } return CSVError(error.str(), INVALID_STATE, current_column, csv_row, error_info, row_byte_position, byte_position, options, how_to_fix_it.str(), current_path); } @@ -363,6 +365,11 @@ CSVError CSVError::HeaderSniffingError(const CSVReaderOptions &options, const ve // 3. Suggest how to fix it! error << "Possible fixes:" << '\n'; + if (options.dialect_options.state_machine_options.strict_mode.GetValue()) { + error << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not comply with " + "the CSV standard." + << '\n'; + } // header if (!options.dialect_options.header.IsSetByUser()) { error << "* Set header (header = true) if your CSV has a header, or (header = false) if it doesn't" << '\n'; @@ -402,6 +409,11 @@ CSVError CSVError::SniffingError(const CSVReaderOptions &options, const string & // 3. Suggest how to fix it! error << "Possible fixes:" << '\n'; // 3.1 Inform the reader of the dialect + if (options.dialect_options.state_machine_options.strict_mode.GetValue()) { + error << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not comply with " + "the CSV standard." + << '\n'; + } // delimiter if (!options.dialect_options.state_machine_options.delimiter.IsSetByUser()) { error << "* Set delimiter (e.g., delim=\',\')" << '\n'; @@ -447,11 +459,6 @@ CSVError CSVError::SniffingError(const CSVReaderOptions &options, const string & error << "* Be sure that the maximum line size is set to an appropriate value, otherwise set it (e.g., " "max_line_size=10000000)" << "\n"; - - if (options.dialect_options.state_machine_options.rfc_4180.GetValue() != false || - !options.dialect_options.state_machine_options.rfc_4180.IsSetByUser()) { - error << "* Enable scanning files that are not RFC 4180 compliant (rfc_4180=false). " << '\n'; - } return CSVError(error.str(), SNIFFING, {}); } @@ -473,6 +480,11 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_ error << "Value with unterminated quote found." << '\n'; std::ostringstream how_to_fix_it; how_to_fix_it << "Possible fixes:" << '\n'; + if (options.dialect_options.state_machine_options.strict_mode.GetValue()) { + how_to_fix_it << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not " + "comply with the CSV standard." + << '\n'; + } how_to_fix_it << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n'; how_to_fix_it << "* Set quote to empty or to a different value (e.g., quote=\'\')" << '\n'; return CSVError(error.str(), UNTERMINATED_QUOTES, current_column, csv_row, error_info, row_byte_position, @@ -486,6 +498,11 @@ CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, i // We don't have a fix for this std::ostringstream how_to_fix_it; how_to_fix_it << "Possible fixes:" << '\n'; + if (options.dialect_options.state_machine_options.strict_mode.GetValue()) { + how_to_fix_it << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not " + "comply with the CSV standard." + << '\n'; + } if (!options.null_padding) { how_to_fix_it << "* Enable null padding (null_padding=true) to replace missing values with NULL" << '\n'; } diff --git a/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp index ac18d42eb..7957f2c47 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp @@ -189,11 +189,11 @@ void CSVReaderOptions::SetNewline(const string &input) { } bool CSVReaderOptions::GetRFC4180() const { - return this->dialect_options.state_machine_options.rfc_4180.GetValue(); + return this->dialect_options.state_machine_options.strict_mode.GetValue(); } void CSVReaderOptions::SetRFC4180(bool input) { - this->dialect_options.state_machine_options.rfc_4180.Set(input); + this->dialect_options.state_machine_options.strict_mode.Set(input); } bool CSVReaderOptions::IgnoreErrors() const { @@ -413,7 +413,7 @@ bool CSVReaderOptions::SetBaseOption(const string &loption, const Value &value, } else if (loption == "compression") { SetCompression(ParseString(value, loption)); - } else if (loption == "rfc_4180") { + } else if (loption == "strict_mode") { SetRFC4180(ParseBoolean(value, loption)); } else { // unrecognized option in base CSV @@ -440,7 +440,7 @@ string CSVReaderOptions::ToString(const string ¤t_file_path) const { auto &escape = dialect_options.state_machine_options.escape; auto &comment = dialect_options.state_machine_options.comment; auto &new_line = dialect_options.state_machine_options.new_line; - auto &rfc_4180 = dialect_options.state_machine_options.rfc_4180; + auto &strict_mode = dialect_options.state_machine_options.strict_mode; auto &skip_rows = dialect_options.skip_rows; auto &header = dialect_options.header; @@ -460,8 +460,8 @@ string CSVReaderOptions::ToString(const string ¤t_file_path) const { error += FormatOptionLine("skip_rows", skip_rows); // comment error += FormatOptionLine("comment", comment); - // rfc_4180 - error += FormatOptionLine("rfc_4180", rfc_4180); + // strict_mode + error += FormatOptionLine("strict_mode", strict_mode); // date format error += FormatOptionLine("date_format", dialect_options.date_format.at(LogicalType::DATE)); // timestamp format @@ -638,6 +638,9 @@ void CSVReaderOptions::FromNamedParameters(const named_parameter_map_t &in, Clie } auto &children = ListValue::GetChildren(kv.second); for (auto &child : children) { + if (child.IsNull()) { + throw BinderException("read_csv %s parameter cannot have a NULL value", kv.first); + } name_list.push_back(StringValue::Get(child)); } for (auto &name : name_list) { @@ -716,7 +719,7 @@ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) co auto "e = dialect_options.state_machine_options.quote; auto &escape = dialect_options.state_machine_options.escape; auto &comment = dialect_options.state_machine_options.comment; - auto &rfc_4180 = dialect_options.state_machine_options.rfc_4180; + auto &strict_mode = dialect_options.state_machine_options.strict_mode; auto &header = dialect_options.header; if (delimiter.IsSetByUser()) { named_params["delim"] = Value(GetDelimiter()); @@ -736,8 +739,8 @@ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) co if (header.IsSetByUser()) { named_params["header"] = Value(GetHeader()); } - if (rfc_4180.IsSetByUser()) { - named_params["rfc_4180"] = Value(GetRFC4180()); + if (strict_mode.IsSetByUser()) { + named_params["strict_mode"] = Value(GetRFC4180()); } named_params["max_line_size"] = Value::BIGINT(NumericCast(maximum_line_size.GetValue())); if (dialect_options.skip_rows.IsSetByUser()) { diff --git a/src/duckdb/src/execution/sample/reservoir_sample.cpp b/src/duckdb/src/execution/sample/reservoir_sample.cpp index 402d84a80..334b613d1 100644 --- a/src/duckdb/src/execution/sample/reservoir_sample.cpp +++ b/src/duckdb/src/execution/sample/reservoir_sample.cpp @@ -50,7 +50,10 @@ ReservoirSample::ReservoirSample(idx_t sample_count, unique_ptr if (reservoir_chunk) { this->reservoir_chunk = std::move(reservoir_chunk); sel_size = this->reservoir_chunk->chunk.size(); - sel = SelectionVector(0, sel_size); + sel = SelectionVector(FIXED_SAMPLE_SIZE); + for (idx_t i = 0; i < sel_size; i++) { + sel.set_index(i, i); + } ExpandSerializedSample(); } stats_sample = true; @@ -301,6 +304,7 @@ void ReservoirSample::SimpleMerge(ReservoirSample &other) { auto offset = reservoir_chunk->chunk.size(); for (idx_t i = keep_from_this; i < size_after_merge; i++) { if (i >= GetActiveSampleCount()) { + D_ASSERT(sel_size >= GetActiveSampleCount()); sel.set_index(GetActiveSampleCount(), offset); sel_size += 1; } else { diff --git a/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp b/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp index 2220c3636..964f9d4a5 100644 --- a/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +++ b/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp @@ -369,8 +369,13 @@ unique_ptr ArrowType::GetTypeFromSchema(DBConfig &config, ArrowSchema auto arrow_type = GetTypeFromFormat(config, schema, format); if (schema_metadata.HasExtension()) { auto extension_info = schema_metadata.GetExtensionInfo(string(format)); - arrow_type->extension_data = config.GetArrowExtension(extension_info).GetTypeExtension(); + if (config.HasArrowExtension(extension_info)) { + auto extension = config.GetArrowExtension(extension_info); + arrow_type = extension.GetType(schema, schema_metadata); + arrow_type->extension_data = extension.GetTypeExtension(); + } } + return arrow_type; } diff --git a/src/duckdb/src/function/table/arrow_conversion.cpp b/src/duckdb/src/function/table/arrow_conversion.cpp index 666712ffc..3ba7d2b05 100644 --- a/src/duckdb/src/function/table/arrow_conversion.cpp +++ b/src/duckdb/src/function/table/arrow_conversion.cpp @@ -118,7 +118,8 @@ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, const ArrowArray &a static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, const ArrowType &arrow_type, int64_t nested_offset = -1, - ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0); + ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0, + bool ignore_extensions = false); static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, const ArrowType &arrow_type, int64_t nested_offset = -1, @@ -765,17 +766,15 @@ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, const ArrowArray &a static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask, - uint64_t parent_offset) { + uint64_t parent_offset, bool ignore_extensions) { auto &scan_state = array_state.state; D_ASSERT(!array.dictionary); - if (arrow_type.HasExtension()) { + if (!ignore_extensions && arrow_type.HasExtension()) { if (arrow_type.extension_data->arrow_to_duckdb) { - // We allocate with the internal type, and cast to the end result + // Convert the storage and then call the cast function Vector input_data(arrow_type.extension_data->GetInternalType()); - // FIXME do we need this? - auto input_arrow_type = ArrowType(arrow_type.extension_data->GetInternalType()); - ColumnArrowToDuckDB(input_data, array, array_state, size, input_arrow_type, nested_offset, parent_mask, - parent_offset); + ColumnArrowToDuckDB(input_data, array, array_state, size, arrow_type, nested_offset, parent_mask, + parent_offset, /*ignore_extensions*/ true); arrow_type.extension_data->arrow_to_duckdb(array_state.context, input_data, vector, size); return; } @@ -1105,7 +1104,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca break; case ArrowArrayPhysicalType::DEFAULT: ColumnArrowToDuckDB(child_entry, child_array, child_state, size, child_type, nested_offset, - &struct_validity_mask, NumericCast(array.offset)); + &struct_validity_mask, NumericCast(array.offset), false); break; default: throw NotImplementedException("ArrowArrayPhysicalType not recognized"); @@ -1138,7 +1137,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca ColumnArrowToDuckDBRunEndEncoded(child, child_array, child_state, size, child_type); break; case ArrowArrayPhysicalType::DEFAULT: - ColumnArrowToDuckDB(child, child_array, child_state, size, child_type, nested_offset, &validity_mask); + ColumnArrowToDuckDB(child, child_array, child_state, size, child_type, nested_offset, &validity_mask, + false); break; default: throw NotImplementedException("ArrowArrayPhysicalType not recognized"); diff --git a/src/duckdb/src/function/table/copy_csv.cpp b/src/duckdb/src/function/table/copy_csv.cpp index 42e3f0610..27706065f 100644 --- a/src/duckdb/src/function/table/copy_csv.cpp +++ b/src/duckdb/src/function/table/copy_csv.cpp @@ -97,7 +97,7 @@ void BaseCSVData::Finalize() { const char escape = options.dialect_options.state_machine_options.escape.GetValue(); // Allow nullstr to be escape character + some non-special character, e.g., "\N" (MySQL default). // In this case, only unquoted occurrences of the nullstr will be recognized as null values. - if (options.dialect_options.state_machine_options.rfc_4180 == false && null_str.size() == 2 && + if (options.dialect_options.state_machine_options.strict_mode == false && null_str.size() == 2 && null_str[0] == escape && null_str[1] != '\0') { continue; } diff --git a/src/duckdb/src/function/table/read_csv.cpp b/src/duckdb/src/function/table/read_csv.cpp index 517c7a266..db9bf4d8d 100644 --- a/src/duckdb/src/function/table/read_csv.cpp +++ b/src/duckdb/src/function/table/read_csv.cpp @@ -350,7 +350,7 @@ void ReadCSVTableFunction::ReadCSVAddNamedParameters(TableFunction &table_functi table_function.named_parameters["column_names"] = LogicalType::LIST(LogicalType::VARCHAR); table_function.named_parameters["comment"] = LogicalType::VARCHAR; table_function.named_parameters["encoding"] = LogicalType::VARCHAR; - table_function.named_parameters["rfc_4180"] = LogicalType::BOOLEAN; + table_function.named_parameters["strict_mode"] = LogicalType::BOOLEAN; MultiFileReader::AddParameters(table_function); } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index f64967275..cc12550c4 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "4-dev4923" +#define DUCKDB_PATCH_VERSION "4-dev4987" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.4-dev4923" +#define DUCKDB_VERSION "v1.1.4-dev4987" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "d0c4cf8a28" +#define DUCKDB_SOURCE_ID "dc4b8892e2" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/function/window/window_token_tree.cpp b/src/duckdb/src/function/window/window_token_tree.cpp index 82b5124e4..85170f7b6 100644 --- a/src/duckdb/src/function/window/window_token_tree.cpp +++ b/src/duckdb/src/function/window/window_token_tree.cpp @@ -122,13 +122,14 @@ idx_t WindowTokenTree::Rank(const idx_t lower, const idx_t upper, const idx_t ro template static idx_t NextPeer(const TREE &tree, const idx_t lower, const idx_t upper, const idx_t row_idx) { - idx_t rank = 0; + // We return an index, not a relative position + idx_t idx = lower; // Because tokens are dense, we can find the next peer by adding 1 to the probed token value const auto needle = tree.LowestLevel()[row_idx] + 1; tree.AggregateLowerBound(lower, upper, needle, [&](idx_t level, const idx_t run_begin, const idx_t run_pos) { - rank += run_pos - run_begin; + idx += run_pos - run_begin; }); - return rank; + return idx; } idx_t WindowTokenTree::PeerEnd(const idx_t lower, const idx_t upper, const idx_t row_idx) const { diff --git a/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp b/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp index a8fd35bc9..87ccd900c 100644 --- a/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +++ b/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp @@ -65,7 +65,7 @@ typedef void (*populate_arrow_schema_t)(DuckDBArrowSchemaHolder &root_holder, Ar const LogicalType &type, ClientContext &context, const ArrowTypeExtension &extension); -typedef shared_ptr (*get_type_t)(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata); +typedef unique_ptr (*get_type_t)(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata); class ArrowTypeExtension { public: @@ -86,7 +86,7 @@ class ArrowTypeExtension { ArrowExtensionMetadata GetInfo() const; - shared_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) const; + unique_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) const; shared_ptr GetTypeExtension() const; diff --git a/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp b/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp index 520e24376..d4a1b708d 100644 --- a/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp @@ -55,7 +55,7 @@ class FixedSizeAllocator { D_ASSERT(buffers.find(ptr.GetBufferId()) != buffers.end()); auto &buffer = buffers.find(ptr.GetBufferId())->second; - auto buffer_ptr = buffer.Get(dirty); + auto buffer_ptr = buffer->Get(dirty); return buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset; } @@ -71,11 +71,11 @@ class FixedSizeAllocator { D_ASSERT(buffers.find(ptr.GetBufferId()) != buffers.end()); auto &buffer = buffers.find(ptr.GetBufferId())->second; - if (!buffer.InMemory()) { + if (!buffer->InMemory()) { return nullptr; } - auto buffer_ptr = buffer.Get(); + auto buffer_ptr = buffer->Get(); auto raw_ptr = buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset; return raw_ptr; } @@ -152,7 +152,7 @@ class FixedSizeAllocator { idx_t total_segment_count; //! Buffers containing the segments - unordered_map buffers; + unordered_map> buffers; //! Buffers with free space unordered_set buffers_with_free_space; //! Buffers qualifying for a vacuum (helper field to allow for fast NeedsVacuum checks) diff --git a/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp b/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp index bf9b35d14..edeb0ab17 100644 --- a/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp @@ -34,6 +34,8 @@ struct PartialBlockForIndex : public PartialBlock { //! yet in memory, and it only serializes dirty and non-written buffers to disk during //! serialization. class FixedSizeBuffer { + friend class FixedSizeAllocator; + public: //! Constants for fast offset calculations in the bitmask static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1}; @@ -46,33 +48,12 @@ class FixedSizeBuffer { FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const idx_t allocation_size, const BlockPointer &block_pointer); - //! Block manager of the database instance - BlockManager &block_manager; - - //! The number of allocated segments - idx_t segment_count; - //! The size of allocated memory in this buffer (necessary for copying while pinning) - idx_t allocation_size; - - //! True: the in-memory buffer is no longer consistent with a (possibly existing) copy on disk - bool dirty; - //! True: can be vacuumed after the vacuum operation - bool vacuum; - - //! Partial block id and offset - BlockPointer block_pointer; + ~FixedSizeBuffer(); -public: - //! Returns true, if the buffer is in-memory - inline bool InMemory() const { - return buffer_handle.IsValid(); - } - //! Returns true, if the block is on-disk - inline bool OnDisk() const { - return block_pointer.IsValid(); - } +private: //! Returns a pointer to the buffer in memory, and calls Deserialize, if the buffer is not in memory - inline data_ptr_t Get(const bool dirty_p = true) { + data_ptr_t Get(const bool dirty_p = true) { + lock_guard l(lock); if (!InMemory()) { Pin(); } @@ -81,8 +62,17 @@ class FixedSizeBuffer { } return buffer_handle.Ptr(); } - //! Destroys the in-memory buffer and the on-disk block - void Destroy(); + + //! Returns true, if the buffer is in-memory + bool InMemory() const { + return buffer_handle.IsValid(); + } + + //! Returns true, if the block is on-disk + bool OnDisk() const { + return block_pointer.IsValid(); + } + //! Serializes a buffer (if dirty or not on disk) void Serialize(PartialBlockManager &partial_block_manager, const idx_t available_segments, const idx_t segment_size, const idx_t bitmask_offset); @@ -92,17 +82,32 @@ class FixedSizeBuffer { uint32_t GetOffset(const idx_t bitmask_count, const idx_t available_segments); //! Sets the allocation size, if dirty void SetAllocationSize(const idx_t available_segments, const idx_t segment_size, const idx_t bitmask_offset); + //! Sets all uninitialized regions of a buffer in the respective partial block allocation + void SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size, const idx_t offset, + const idx_t bitmask_offset, const idx_t available_segments); private: + //! Block manager of the database instance + BlockManager &block_manager; + + //! The number of allocated segments + idx_t segment_count; + //! The size of allocated memory in this buffer (necessary for copying while pinning) + idx_t allocation_size; + + //! True: the in-memory buffer is no longer consistent with a (possibly existing) copy on disk + bool dirty; + //! True: can be vacuumed after the vacuum operation + bool vacuum; + + //! Partial block id and offset + BlockPointer block_pointer; //! The buffer handle of the in-memory buffer BufferHandle buffer_handle; //! The block handle of the on-disk buffer shared_ptr block_handle; - -private: - //! Sets all uninitialized regions of a buffer in the respective partial block allocation - void SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size, const idx_t offset, - const idx_t bitmask_offset, const idx_t available_segments); + //! The lock for this fixed size buffer handle + mutex lock; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp index 53b306585..d00ff3465 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp @@ -16,9 +16,9 @@ namespace duckdb { struct CSVStateMachineOptions { CSVStateMachineOptions() {}; CSVStateMachineOptions(string delimiter_p, char quote_p, char escape_p, char comment_p, - NewLineIdentifier new_line_p, bool rfc_4180_p) + NewLineIdentifier new_line_p, bool strict_mode_p) : delimiter(std::move(delimiter_p)), quote(quote_p), escape(escape_p), comment(comment_p), new_line(new_line_p), - rfc_4180(rfc_4180_p) {}; + strict_mode(strict_mode_p) {}; //! Delimiter to separate columns within each line CSVOption delimiter {","}; @@ -30,12 +30,12 @@ struct CSVStateMachineOptions { CSVOption comment = '\0'; //! New Line separator CSVOption new_line = NewLineIdentifier::NOT_SET; - //! RFC 4180 conformance - CSVOption rfc_4180 = false; + //! How Strict the parser should be + CSVOption strict_mode = true; bool operator==(const CSVStateMachineOptions &other) const { return delimiter == other.delimiter && quote == other.quote && escape == other.escape && - new_line == other.new_line && comment == other.comment && rfc_4180 == other.rfc_4180; + new_line == other.new_line && comment == other.comment && strict_mode == other.strict_mode; } }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp index 12fd0f427..7ce7ab577 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp @@ -294,7 +294,7 @@ class StringValueScanner : public BaseScanner { bool FinishedIterator() const; //! Creates a new string with all escaped values removed - static string_t RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, bool rfc_4180, + static string_t RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, bool strict_mode, Vector &vector); //! If we can directly cast the type when consuming the CSV file, or we have to do it later diff --git a/src/duckdb/src/include/duckdb/logging/logging.hpp b/src/duckdb/src/include/duckdb/logging/logging.hpp index ff354afcd..6d6c4f2e8 100644 --- a/src/duckdb/src/include/duckdb/logging/logging.hpp +++ b/src/duckdb/src/include/duckdb/logging/logging.hpp @@ -71,8 +71,6 @@ struct LoggingContext { optional_idx thread; optional_idx client_context; optional_idx transaction_id; - - const char *default_log_type = "default"; }; struct RegisteredLoggingContext { diff --git a/src/duckdb/src/include/duckdb/main/config.hpp b/src/duckdb/src/include/duckdb/main/config.hpp index 8cc69b13e..064c794b6 100644 --- a/src/duckdb/src/include/duckdb/main/config.hpp +++ b/src/duckdb/src/include/duckdb/main/config.hpp @@ -384,6 +384,7 @@ struct DBConfig { DUCKDB_API ArrowTypeExtension GetArrowExtension(ArrowExtensionMetadata info) const; DUCKDB_API ArrowTypeExtension GetArrowExtension(const LogicalType &type) const; DUCKDB_API bool HasArrowExtension(const LogicalType &type) const; + DUCKDB_API bool HasArrowExtension(ArrowExtensionMetadata info) const; DUCKDB_API void RegisterArrowExtension(const ArrowTypeExtension &extension) const; bool operator==(const DBConfig &other); diff --git a/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp b/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp index 981d4e4ef..9873b6761 100644 --- a/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +++ b/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp @@ -207,6 +207,7 @@ struct UncompressedStringStorage { static void SetDictionary(ColumnSegment &segment, BufferHandle &handle, StringDictionaryContainer dict); static StringDictionaryContainer GetDictionary(ColumnSegment &segment, BufferHandle &handle); + static uint32_t GetDictionaryEnd(ColumnSegment &segment, BufferHandle &handle); static idx_t RemainingSpace(ColumnSegment &segment, BufferHandle &handle); static void WriteString(ColumnSegment &segment, string_t string, block_id_t &result_block, int32_t &result_offset); static void WriteStringMemory(ColumnSegment &segment, string_t string, block_id_t &result_block, @@ -217,12 +218,12 @@ struct UncompressedStringStorage { static void WriteStringMarker(data_ptr_t target, block_id_t block_id, int32_t offset); static void ReadStringMarker(data_ptr_t target, block_id_t &block_id, int32_t &offset); - inline static string_t FetchStringFromDict(ColumnSegment &segment, StringDictionaryContainer dict, Vector &result, + inline static string_t FetchStringFromDict(ColumnSegment &segment, uint32_t dict_end_offset, Vector &result, data_ptr_t base_ptr, int32_t dict_offset, uint32_t string_length) { D_ASSERT(dict_offset <= NumericCast(segment.GetBlockManager().GetBlockSize())); if (DUCKDB_LIKELY(dict_offset >= 0)) { // regular string - fetch from dictionary - auto dict_end = base_ptr + dict.end; + auto dict_end = base_ptr + dict_end_offset; auto dict_pos = dict_end - dict_offset; auto str_ptr = char_ptr_cast(dict_pos); @@ -231,7 +232,7 @@ struct UncompressedStringStorage { // read overflow string block_id_t block_id; int32_t offset; - ReadStringMarker(base_ptr + dict.end - AbsValue(dict_offset), block_id, offset); + ReadStringMarker(base_ptr + dict_end_offset - AbsValue(dict_offset), block_id, offset); return ReadOverflowString(segment, result, block_id, offset); } diff --git a/src/duckdb/src/main/client_context.cpp b/src/duckdb/src/main/client_context.cpp index 3b9bc8f72..473178dfe 100644 --- a/src/duckdb/src/main/client_context.cpp +++ b/src/duckdb/src/main/client_context.cpp @@ -1049,7 +1049,9 @@ unique_ptr ClientContext::PendingQuery(const string &query, return PendingQueryInternal(*lock, std::move(statements[0]), params, true); } catch (std::exception &ex) { - return make_uniq(ErrorData(ex)); + ErrorData error(ex); + ProcessError(error, query); + return make_uniq(std::move(error)); } } diff --git a/src/duckdb/src/storage/compression/fsst.cpp b/src/duckdb/src/storage/compression/fsst.cpp index bc5c52228..209bf8a5a 100644 --- a/src/duckdb/src/storage/compression/fsst.cpp +++ b/src/duckdb/src/storage/compression/fsst.cpp @@ -658,7 +658,7 @@ void FSSTStorage::StringScanPartial(ColumnSegment &segment, ColumnScanState &sta for (idx_t i = 0; i < scan_count; i++) { uint32_t string_length = bitunpack_buffer[i + offsets.scan_offset]; result_data[i] = UncompressedStringStorage::FetchStringFromDict( - segment, dict, result, baseptr, + segment, dict.end, result, baseptr, UnsafeNumericCast(delta_decode_buffer[i + offsets.unused_delta_decoded_values]), string_length); FSSTVector::SetCount(result, scan_count); @@ -736,7 +736,7 @@ void FSSTStorage::StringFetchRow(ColumnSegment &segment, ColumnFetchState &state uint32_t string_length = bitunpack_buffer[offsets.scan_offset]; string_t compressed_string = UncompressedStringStorage::FetchStringFromDict( - segment, dict, result, base_ptr, + segment, dict.end, result, base_ptr, UnsafeNumericCast(delta_decode_buffer[offsets.unused_delta_decoded_values]), string_length); vector uncompress_buffer; diff --git a/src/duckdb/src/storage/compression/string_uncompressed.cpp b/src/duckdb/src/storage/compression/string_uncompressed.cpp index a2279529f..fe9cf7035 100644 --- a/src/duckdb/src/storage/compression/string_uncompressed.cpp +++ b/src/duckdb/src/storage/compression/string_uncompressed.cpp @@ -94,7 +94,7 @@ void UncompressedStringStorage::StringScanPartial(ColumnSegment &segment, Column auto start = segment.GetRelativeIndex(state.row_index); auto baseptr = scan_state.handle.Ptr() + segment.GetBlockOffset(); - auto dict = GetDictionary(segment, scan_state.handle); + auto dict_end = GetDictionaryEnd(segment, scan_state.handle); auto base_data = reinterpret_cast(baseptr + DICTIONARY_HEADER_SIZE); auto result_data = FlatVector::GetData(result); @@ -105,7 +105,7 @@ void UncompressedStringStorage::StringScanPartial(ColumnSegment &segment, Column auto current_offset = base_data[start + i]; auto string_length = UnsafeNumericCast(std::abs(current_offset) - std::abs(previous_offset)); result_data[result_offset + i] = - FetchStringFromDict(segment, dict, result, baseptr, current_offset, string_length); + FetchStringFromDict(segment, dict_end, result, baseptr, current_offset, string_length); previous_offset = base_data[start + i]; } } @@ -125,7 +125,7 @@ void UncompressedStringStorage::Select(ColumnSegment &segment, ColumnScanState & auto start = segment.GetRelativeIndex(state.row_index); auto baseptr = scan_state.handle.Ptr() + segment.GetBlockOffset(); - auto dict = GetDictionary(segment, scan_state.handle); + auto dict_end = GetDictionaryEnd(segment, scan_state.handle); auto base_data = reinterpret_cast(baseptr + DICTIONARY_HEADER_SIZE); auto result_data = FlatVector::GetData(result); @@ -134,7 +134,7 @@ void UncompressedStringStorage::Select(ColumnSegment &segment, ColumnScanState & auto current_offset = base_data[index]; auto prev_offset = index > 0 ? base_data[index - 1] : 0; auto string_length = UnsafeNumericCast(std::abs(current_offset) - std::abs(prev_offset)); - result_data[i] = FetchStringFromDict(segment, dict, result, baseptr, current_offset, string_length); + result_data[i] = FetchStringFromDict(segment, dict_end, result, baseptr, current_offset, string_length); } } @@ -164,7 +164,7 @@ void UncompressedStringStorage::StringFetchRow(ColumnSegment &segment, ColumnFet auto &handle = state.GetOrInsertHandle(segment); auto baseptr = handle.Ptr() + segment.GetBlockOffset(); - auto dict = GetDictionary(segment, handle); + auto dict_end = GetDictionaryEnd(segment, handle); auto base_data = reinterpret_cast(baseptr + DICTIONARY_HEADER_SIZE); auto result_data = FlatVector::GetData(result); @@ -176,7 +176,7 @@ void UncompressedStringStorage::StringFetchRow(ColumnSegment &segment, ColumnFet } else { string_length = NumericCast(std::abs(dict_offset) - std::abs(base_data[row_id - 1])); } - result_data[result_idx] = FetchStringFromDict(segment, dict, result, baseptr, dict_offset, string_length); + result_data[result_idx] = FetchStringFromDict(segment, dict_end, result, baseptr, dict_offset, string_length); } //===--------------------------------------------------------------------===// @@ -301,6 +301,11 @@ StringDictionaryContainer UncompressedStringStorage::GetDictionary(ColumnSegment return container; } +uint32_t UncompressedStringStorage::GetDictionaryEnd(ColumnSegment &segment, BufferHandle &handle) { + auto startptr = handle.Ptr() + segment.GetBlockOffset(); + return Load(startptr + sizeof(uint32_t)); +} + idx_t UncompressedStringStorage::RemainingSpace(ColumnSegment &segment, BufferHandle &handle) { auto dictionary = GetDictionary(segment, handle); D_ASSERT(dictionary.end == segment.SegmentSize()); diff --git a/src/duckdb/src/storage/magic_bytes.cpp b/src/duckdb/src/storage/magic_bytes.cpp index 303094108..602e7ffef 100644 --- a/src/duckdb/src/storage/magic_bytes.cpp +++ b/src/duckdb/src/storage/magic_bytes.cpp @@ -14,7 +14,7 @@ DataFileType MagicBytes::CheckMagicBytes(FileSystem &fs, const string &path) { } constexpr const idx_t MAGIC_BYTES_READ_SIZE = 16; - char buffer[MAGIC_BYTES_READ_SIZE]; + char buffer[MAGIC_BYTES_READ_SIZE] = {}; handle->Read(buffer, MAGIC_BYTES_READ_SIZE); if (memcmp(buffer, "SQLite format 3\0", 16) == 0) { diff --git a/src/duckdb/src/storage/serialization/serialize_nodes.cpp b/src/duckdb/src/storage/serialization/serialize_nodes.cpp index 6c0bc4ab1..10fe3f949 100644 --- a/src/duckdb/src/storage/serialization/serialize_nodes.cpp +++ b/src/duckdb/src/storage/serialization/serialize_nodes.cpp @@ -209,7 +209,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault>(137, "comment", dialect_options.state_machine_options.comment, CSVOption('\0')); serializer.WritePropertyWithDefault(138, "rows_until_header", dialect_options.rows_until_header); serializer.WritePropertyWithDefault(139, "encoding", encoding); - serializer.WriteProperty>(140, "rfc_4180", dialect_options.state_machine_options.rfc_4180); + serializer.WriteProperty>(140, "strict_mode", dialect_options.state_machine_options.strict_mode); serializer.WriteProperty>(141, "multi_byte_delimiter", GetMultiByteDelimiter()); serializer.WritePropertyWithDefault(142, "multi_file_reader", multi_file_reader); serializer.WriteProperty>(143, "buffer_size_option", buffer_size_option); @@ -256,7 +256,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) { auto dialect_options_state_machine_options_comment = deserializer.ReadPropertyWithExplicitDefault>(137, "comment", CSVOption('\0')); auto dialect_options_rows_until_header = deserializer.ReadPropertyWithDefault(138, "rows_until_header"); auto encoding = deserializer.ReadPropertyWithDefault(139, "encoding"); - auto dialect_options_state_machine_options_rfc_4180 = deserializer.ReadProperty>(140, "rfc_4180"); + auto dialect_options_state_machine_options_strict_mode = deserializer.ReadProperty>(140, "strict_mode"); auto multi_byte_delimiter = deserializer.ReadProperty>(141, "multi_byte_delimiter"); CSVReaderOptions result(dialect_options_state_machine_options_delimiter, multi_byte_delimiter); result.ignore_errors = ignore_errors; @@ -295,7 +295,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) { result.dialect_options.state_machine_options.comment = dialect_options_state_machine_options_comment; result.dialect_options.rows_until_header = dialect_options_rows_until_header; result.encoding = std::move(encoding); - result.dialect_options.state_machine_options.rfc_4180 = dialect_options_state_machine_options_rfc_4180; + result.dialect_options.state_machine_options.strict_mode = dialect_options_state_machine_options_strict_mode; deserializer.ReadPropertyWithDefault(142, "multi_file_reader", result.multi_file_reader); deserializer.ReadProperty>(143, "buffer_size_option", result.buffer_size_option); return result; From 97d9063a6ea4265ab7957b6e7ace01890b59e51b Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Mon, 27 Jan 2025 00:34:25 +0000 Subject: [PATCH 4/4] Update vendored DuckDB sources to c415a332 --- .../src/common/compressed_file_system.cpp | 6 +++- src/duckdb/src/common/multi_file_reader.cpp | 2 +- .../execution/index/fixed_size_allocator.cpp | 3 +- src/duckdb/src/function/table/read_csv.cpp | 3 +- .../function/table/version/pragma_version.cpp | 6 ++-- .../expression/transform_function.cpp | 28 +++++++++++++++++-- 6 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/duckdb/src/common/compressed_file_system.cpp b/src/duckdb/src/common/compressed_file_system.cpp index 7d2e2cfde..b8f032a65 100644 --- a/src/duckdb/src/common/compressed_file_system.cpp +++ b/src/duckdb/src/common/compressed_file_system.cpp @@ -11,7 +11,11 @@ CompressedFile::CompressedFile(CompressedFileSystem &fs, unique_ptr } CompressedFile::~CompressedFile() { - CompressedFile::Close(); + try { + // stream_wrapper->Close() might throw + CompressedFile::Close(); + } catch (...) { // NOLINT - cannot throw in exception + } } void CompressedFile::Initialize(bool write) { diff --git a/src/duckdb/src/common/multi_file_reader.cpp b/src/duckdb/src/common/multi_file_reader.cpp index 813d13f70..e03f667f3 100644 --- a/src/duckdb/src/common/multi_file_reader.cpp +++ b/src/duckdb/src/common/multi_file_reader.cpp @@ -125,7 +125,7 @@ bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFile "'hive_types' only accepts a STRUCT('name':VARCHAR, ...), but '%s' was provided", val.type().ToString()); } - // verify that that all the children of the struct value are VARCHAR + // verify that all the children of the struct value are VARCHAR auto &children = StructValue::GetChildren(val); for (idx_t i = 0; i < children.size(); i++) { const Value &child = children[i]; diff --git a/src/duckdb/src/execution/index/fixed_size_allocator.cpp b/src/duckdb/src/execution/index/fixed_size_allocator.cpp index f026ddd98..860e45d46 100644 --- a/src/duckdb/src/execution/index/fixed_size_allocator.cpp +++ b/src/duckdb/src/execution/index/fixed_size_allocator.cpp @@ -227,8 +227,7 @@ void FixedSizeAllocator::FinalizeVacuum() { for (auto &buffer_id : vacuum_buffers) { D_ASSERT(buffers.find(buffer_id) != buffers.end()); - auto &buffer = buffers.find(buffer_id)->second; - D_ASSERT(buffer->InMemory()); + D_ASSERT(buffers.find(buffer_id)->second->InMemory()); buffers.erase(buffer_id); } vacuum_buffers.clear(); diff --git a/src/duckdb/src/function/table/read_csv.cpp b/src/duckdb/src/function/table/read_csv.cpp index db9bf4d8d..e5bce2264 100644 --- a/src/duckdb/src/function/table/read_csv.cpp +++ b/src/duckdb/src/function/table/read_csv.cpp @@ -374,7 +374,8 @@ void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionD MultiFileReader().ComplexFilterPushdown(context, file_list, data.options.file_options, info, filters); if (filtered_list) { data.files = filtered_list->GetAllFiles(); - MultiFileReader::PruneReaders(data, file_list); + SimpleMultiFileList simple_filtered_list(data.files); + MultiFileReader::PruneReaders(data, simple_filtered_list); } else { data.files = file_list.GetAllFiles(); } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index cc12550c4..0edf48f50 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "4-dev4987" +#define DUCKDB_PATCH_VERSION "4-dev5006" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.4-dev4987" +#define DUCKDB_VERSION "v1.1.4-dev5006" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "dc4b8892e2" +#define DUCKDB_SOURCE_ID "e70015aeac" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/parser/transform/expression/transform_function.cpp b/src/duckdb/src/parser/transform/expression/transform_function.cpp index d33bf00d0..6684d489e 100644 --- a/src/duckdb/src/parser/transform/expression/transform_function.cpp +++ b/src/duckdb/src/parser/transform/expression/transform_function.cpp @@ -1,8 +1,6 @@ #include "duckdb/common/enum_util.hpp" #include "duckdb/common/string_util.hpp" -#include "duckdb/common/to_string.hpp" #include "duckdb/parser/expression/case_expression.hpp" -#include "duckdb/parser/expression/cast_expression.hpp" #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/expression/function_expression.hpp" @@ -47,6 +45,27 @@ static inline WindowBoundary TransformFrameOption(const int frameOptions, const } } +static bool IsExcludableWindowFunction(ExpressionType type) { + switch (type) { + case ExpressionType::WINDOW_FIRST_VALUE: + case ExpressionType::WINDOW_LAST_VALUE: + case ExpressionType::WINDOW_NTH_VALUE: + case ExpressionType::WINDOW_AGGREGATE: + return true; + case ExpressionType::WINDOW_RANK_DENSE: + case ExpressionType::WINDOW_RANK: + case ExpressionType::WINDOW_PERCENT_RANK: + case ExpressionType::WINDOW_ROW_NUMBER: + case ExpressionType::WINDOW_NTILE: + case ExpressionType::WINDOW_CUME_DIST: + case ExpressionType::WINDOW_LEAD: + case ExpressionType::WINDOW_LAG: + return false; + default: + throw InternalException("Unknown excludable window type %s", ExpressionTypeToString(type).c_str()); + } +} + void Transformer::TransformWindowFrame(duckdb_libpgquery::PGWindowDef &window_spec, WindowExpression &expr) { // finally: specifics of bounds expr.start_expr = TransformExpression(window_spec.startOffset); @@ -101,6 +120,11 @@ void Transformer::TransformWindowFrame(duckdb_libpgquery::PGWindowDef &window_sp } else { expr.exclude_clause = WindowExcludeMode::NO_OTHER; } + + if (expr.exclude_clause != WindowExcludeMode::NO_OTHER && !expr.arg_orders.empty() && + !IsExcludableWindowFunction(expr.type)) { + throw ParserException("EXCLUDE is not supported for the window function \"%s\"", expr.function_name.c_str()); + } } bool Transformer::ExpressionIsEmptyStar(ParsedExpression &expr) {