diff --git a/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp b/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp index b1e087562..b43d2660e 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp @@ -111,8 +111,8 @@ static unique_ptr BindMakeTypeFunctionExpression(FunctionBindExpress auto type_name = args.front().second.GetValue(); auto qualified_name = QualifiedName::Parse(type_name); - auto unbound_type = LogicalType::UNBOUND(make_uniq(qualified_name.catalog, qualified_name.schema, - qualified_name.name, std::move(type_args))); + auto unbound_type = LogicalType::UNBOUND(make_uniq( + qualified_name.Catalog(), qualified_name.Schema(), qualified_name.Name(), std::move(type_args))); // Bind the unbound type auto binder = Binder::CreateBinder(input.context); diff --git a/src/duckdb/extension/parquet/column_reader.cpp b/src/duckdb/extension/parquet/column_reader.cpp index f82f8f238..c9ad5bd72 100644 --- a/src/duckdb/extension/parquet/column_reader.cpp +++ b/src/duckdb/extension/parquet/column_reader.cpp @@ -1,6 +1,7 @@ #include "column_reader.hpp" #include "duckdb/common/vector/flat_vector.hpp" +#include "duckdb/common/vector/constant_vector.hpp" #include #include @@ -206,6 +207,19 @@ idx_t ColumnReader::GroupRowsAvailable() { return group_rows_available; } +bool ColumnReader::AllValuesAreNull() const { + // for repeated columns the null_count/num_values statistics do not reliably indicate that every value is NULL + // (num_values counts leaf slots, not rows), so we only trust this for non-repeated columns + if (MaxRepeat() != 0 || !chunk || !chunk->__isset.meta_data) { + return false; + } + auto &chunk_meta = chunk->meta_data; + if (!chunk_meta.__isset.statistics || !chunk_meta.statistics.__isset.null_count) { + return false; + } + return chunk_meta.statistics.null_count == chunk_meta.num_values; +} + void ColumnReader::PlainSkip(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values) { throw NotImplementedException("PlainSkip not implemented"); } @@ -731,6 +745,20 @@ void ColumnReader::ReadData(idx_t read_now, data_ptr_t define_out, data_ptr_t re } // read the defines/repeats const auto all_valid = PrepareRead(read_now, define_out, repeat_out, result_offset); + if (!IsRoot() && AllValuesAreNull()) { + // every value is NULL: the parent still needs the define/repeat levels we just read, but there are no + // values to decode - set the result to NULL and skip the encoding read + if (result_offset == 0) { + // we own the entire vector - emit a constant NULL + ConstantVector::SetNull(result, count_t(read_now)); + } else { + for (idx_t i = 0; i < read_now; i++) { + FlatVector::SetNull(result, result_offset + i, true); + } + } + page_rows_available -= read_now; + return; + } // read the data according to the encoder const auto define_ptr = all_valid ? nullptr : static_cast(define_out); switch (encoding) { @@ -790,6 +818,12 @@ idx_t ColumnReader::ReadInternal(ColumnReaderInput &input, Vector &result) { } idx_t ColumnReader::Read(ColumnReaderInput &input, Vector &result) { + if (IsRoot() && AllValuesAreNull()) { + // a top-level column that is entirely NULL - emit a constant NULL vector without reading anything. + // (nested columns are handled in ReadData: they still need to emit their define/repeat levels) + ConstantVector::SetNull(result, count_t(input.num_values)); + return input.num_values; + } BeginRead(input.define_out, input.repeat_out); return ReadInternal(input, result); } @@ -797,7 +831,7 @@ idx_t ColumnReader::Read(ColumnReaderInput &input, Vector &result) { void ColumnReader::Select(ColumnReaderInput &input, Vector &result, const SelectionVector &sel, idx_t approved_tuple_count) { auto &num_values = input.num_values; - if (SupportsDirectSelect() && approved_tuple_count < num_values) { + if (SupportsDirectSelect() && approved_tuple_count < num_values && !(IsRoot() && AllValuesAreNull())) { DirectSelect(input, result, sel, approved_tuple_count); return; } @@ -834,7 +868,7 @@ void ColumnReader::Filter(ColumnReaderInput &input, Vector &result, const TableF TableFilterState &filter_state, SelectionVector &sel, idx_t &approved_tuple_count, bool is_first_filter) { auto &num_values = input.num_values; - if (SupportsDirectFilter() && is_first_filter) { + if (SupportsDirectFilter() && is_first_filter && !(IsRoot() && AllValuesAreNull())) { DirectFilter(input, result, filter, filter_state, sel, approved_tuple_count); return; } diff --git a/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp b/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp index 781c9cdaa..16842d46b 100644 --- a/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp +++ b/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp @@ -6,7 +6,9 @@ #include "column_reader.hpp" #include "parquet_reader.hpp" +#include "duckdb/common/exception.hpp" #include "duckdb/common/helper.hpp" +#include "duckdb/common/operator/multiply.hpp" #include "duckdb/common/unique_ptr.hpp" #include "duckdb/common/vector.hpp" #include "parquet_column_schema.hpp" @@ -26,7 +28,12 @@ void DeltaByteArrayDecoder::ReadDbpData(Allocator &allocator, ResizeableBuffer & auto decoder = make_uniq(buffer.ptr, buffer.len); value_count = decoder->TotalValues(); result_buffer.reset(); - result_buffer.resize(allocator, sizeof(uint32_t) * value_count); + // value_count is read from the file, so the buffer size can overflow on a corrupt input + idx_t result_size; + if (!TryMultiplyOperator::Operation(value_count, sizeof(uint32_t), result_size)) { + throw InvalidInputException("DELTA_BYTE_ARRAY value count is too large - corrupt file?"); + } + result_buffer.resize(allocator, result_size); decoder->GetBatch(result_buffer.ptr, value_count); decoder->Finalize(); buffer.inc(buffer.len - decoder->BufferPtr().len); diff --git a/src/duckdb/extension/parquet/include/column_reader.hpp b/src/duckdb/extension/parquet/include/column_reader.hpp index 40bef960d..a99c81f21 100644 --- a/src/duckdb/extension/parquet/include/column_reader.hpp +++ b/src/duckdb/extension/parquet/include/column_reader.hpp @@ -140,6 +140,16 @@ class ColumnReader { inline bool IsSkipped() const { return !chunk; } + //! Set the parent reader (the composite reader this reader is a child of). Top-level readers have no parent. + void SetParent(ColumnReader &parent_p) { + parent = parent_p; + } + //! Whether this is a top-level reader (i.e. it has no parent reader) + bool IsRoot() const { + return !parent; + } + //! Whether every value in the current row group's column chunk is NULL (according to its statistics) + bool AllValuesAreNull() const; void InitializeCryptoMetadata(const duckdb_parquet::EncryptionAlgorithm &encryption_algorithm, idx_t row_group_ordinal_p) { @@ -364,6 +374,8 @@ class ColumnReader { void DecompressInternal(CompressionCodec::type codec, const_data_ptr_t src, idx_t src_size, data_ptr_t dst, idx_t dst_size); const ColumnChunk *chunk = nullptr; + //! The composite reader this reader is a child of (struct/list/variant/expression). Null for top-level readers. + optional_ptr parent; TProtocol *protocol; idx_t page_rows_available; diff --git a/src/duckdb/extension/parquet/include/parquet_reader.hpp b/src/duckdb/extension/parquet/include/parquet_reader.hpp index 35c254970..1bdfefd02 100644 --- a/src/duckdb/extension/parquet/include/parquet_reader.hpp +++ b/src/duckdb/extension/parquet/include/parquet_reader.hpp @@ -188,28 +188,21 @@ struct ParquetPrefetchMetrics { } }; -//! Where the scan is in its async execution. -enum class ParquetScanState : uint8_t { - SCHEDULE, //! schedule the next row group's I/O - PROCESS, //! process the current row group into a output chunk - RESUME_PAYLOAD, //! resume decoding the payload columns after the filter-column I/O blocked - FINISHED //! the scan is done -}; - struct ParquetReaderScanState { public: ColumnReader &GetColumnReader(idx_t i); public: - vector group_idx_list; - int64_t current_group; + //! The row group index this scan state decodes + idx_t group_index; idx_t offset_in_group; idx_t group_offset; shared_ptr file_handle; vector> column_readers; duckdb_base_std::unique_ptr thrift_file_proto; - ParquetScanState scan_state; + //! Set while resuming payload-column decode after the filter-column I/O blocked (vs a fresh row-group pass) + bool resuming_payload = false; SelectionVector sel; ResizeableBuffer define_buf; @@ -334,14 +327,15 @@ class ParquetReader : public BaseFileReader { LocalTableFunctionState &lstate) override; void PrepareScan(ClientContext &context, GlobalTableFunctionState &gstate_p, LocalTableFunctionState &lstate_p) override; + AsyncResult ScheduleIO(ClientContext &context, GlobalTableFunctionState &gstate, + LocalTableFunctionState &lstate) override; AsyncResult Scan(ClientContext &context, GlobalTableFunctionState &global_state, LocalTableFunctionState &local_state, DataChunk &chunk) override; void FinishFile(ClientContext &context, GlobalTableFunctionState &gstate_p) override; double GetProgressInFile(ClientContext &context) override; public: - void InitializeScan(ClientContext &context, ParquetReaderScanState &state, vector groups_to_read) const; - AsyncResult Scan(ClientContext &context, ParquetReaderScanState &state, DataChunk &output); + void InitializeScan(ClientContext &context, ParquetReaderScanState &state, idx_t group_to_read) const; idx_t NumRows() const; idx_t NumRowGroups() const; @@ -410,10 +404,14 @@ class ParquetReader : public BaseFileReader { ParquetPrefetchStrategy ColumnWisePrefetch(ParquetReaderScanState &state, ThriftFileTransport &trans, const duckdb_parquet::RowGroup &group, bool filters_look_unselective, bool log_prefetch) const; - //! Switch to the next row group and schedule its I/O (prepare column buffers, prefetch the bytes). - AsyncResult Schedule(ClientContext &context, ParquetReaderScanState &state, DataChunk &result, bool log_prefetch); + //! Register the read-heads to fetch, and select prefetch strategy + ParquetPrefetchStrategy RegisterRowGroupReads(ClientContext &context, ParquetReaderScanState &state); + //! Build the async I/O tasks for the registered read-heads + AsyncResult ScheduleRowGroupReads(ParquetReaderScanState &state, ParquetPrefetchStrategy strategy); //! Process up to STANDARD_VECTOR_SIZE rows of the current row group into result. - AsyncResult Process(ParquetReaderScanState &state, DataChunk &result, bool log_prefetch); + AsyncResult Process(ClientContext &context, ParquetReaderScanState &state, DataChunk &result); + //! Log and finalize the row group's prefetch metrics + void FinishRowGroup(ClientContext &context, ParquetReaderScanState &state, bool log_prefetch); //! Process filters AsyncResult ProcessFilters(ParquetReaderScanState &state, DataChunk &result, idx_t scan_count, uint8_t *define_ptr, uint8_t *repeat_ptr, bool log_prefetch); diff --git a/src/duckdb/extension/parquet/include/reader/variant/parquet_variant_iterator.hpp b/src/duckdb/extension/parquet/include/reader/variant/parquet_variant_iterator.hpp index 28393e5cb..c4097f625 100644 --- a/src/duckdb/extension/parquet/include/reader/variant/parquet_variant_iterator.hpp +++ b/src/duckdb/extension/parquet/include/reader/variant/parquet_variant_iterator.hpp @@ -35,6 +35,9 @@ struct ShreddedGroupView { bool has_typed_value = false; ParquetGroupKind kind = ParquetGroupKind::LEAF; LogicalType typed_type; + //! The raw 'typed_value' Vector (LEAF primitive / OBJECT struct / ARRAY list) - used to Reference it + //! directly into a shredded output (see ParquetVariantConversion::ConvertToShredded) + optional_ptr typed_value_vec; //! LEAF: the typed primitive values (type-erased; read typed via GetData where T is known) UnifiedVectorFormat leaf_format; @@ -222,6 +225,13 @@ class ParquetVariantIterator { //! The (lazily-decoded) Variant metadata of the current row const VariantMetadata &GetMetadata() const; + //! The recursive view of the Parquet group tree (used by the shredded-conversion path) + const ShreddedGroupView &GetRootView() const { + return root_view; + } + //! Emit the binary value in ['data', 'end') of the current row into the builder (BeginRow must precede) + void EmitBinary(const_data_ptr_t data, const_data_ptr_t end, VariantBuilder &builder) const; + private: ShreddedGroupView root_view; @@ -231,20 +241,11 @@ class ParquetVariantIterator { mutable unique_ptr current_metadata; }; -//! BuildVariant source wrapping a ParquetVariantIterator (mirrors VariantIteratorSource in core) -struct ParquetVariantIteratorSource { - explicit ParquetVariantIteratorSource(ParquetVariantIterator &iterator) : iterator(iterator) { - } - bool Emit(idx_t row, VariantBuilder &builder); - - ParquetVariantIterator &iterator; -}; - -//! Convert a shredded Parquet VARIANT (metadata + group) into the canonical VARIANT 'result' in a single -//! pass through the shared VariantBuilder +//! Convert a Parquet VARIANT (metadata + group) into DuckDB's SHREDDED VARIANT format: the Parquet +//! typed_value columns are referenced directly where they map exactly, and leftover/binary 'value' data +//! (including the entire value when there is no 'typed_value' at all) goes into the unshredded component. class ParquetVariantConversion { public: - static void Convert(Vector &metadata, Vector &group, Vector &result, idx_t count); //! Convert binary Variant values (each row being the metadata blob followed by the value blob) into the //! canonical VARIANT 'result' in a single pass static void ConvertBinary(Vector &metadata_and_value, Vector &result, idx_t count); diff --git a/src/duckdb/extension/parquet/include/reader/variant_column_reader.hpp b/src/duckdb/extension/parquet/include/reader/variant_column_reader.hpp index 55865f536..5e82931c5 100644 --- a/src/duckdb/extension/parquet/include/reader/variant_column_reader.hpp +++ b/src/duckdb/extension/parquet/include/reader/variant_column_reader.hpp @@ -10,6 +10,7 @@ #include "column_reader.hpp" #include "reader/templated_column_reader.hpp" +#include "duckdb/common/types/data_chunk.hpp" namespace duckdb { @@ -33,6 +34,8 @@ class VariantColumnReader : public ColumnReader { void Skip(idx_t num_values) override; idx_t GroupRowsAvailable() override; + void Convert(Vector &metadata, Vector &group, Vector &result, idx_t count); + void PrepareChunk(DataChunk &chunk, idx_t &capacity, const vector &types, idx_t count); uint64_t TotalCompressedSize() override; void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override; static bool TypedValueLayoutToType(const LogicalType &typed_value, LogicalType &logical_type); @@ -40,6 +43,12 @@ class VariantColumnReader : public ColumnReader { protected: idx_t metadata_reader_idx; idx_t value_reader_idx; + + DataChunk intermediate_chunk; + idx_t intermediate_capacity = 0; + + DataChunk shredded_chunk; + idx_t shredded_capacity = 0; }; } // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_multi_file_info.cpp b/src/duckdb/extension/parquet/parquet_multi_file_info.cpp index 258dead1a..7383d3228 100644 --- a/src/duckdb/extension/parquet/parquet_multi_file_info.cpp +++ b/src/duckdb/extension/parquet/parquet_multi_file_info.cpp @@ -84,20 +84,17 @@ struct ParquetReadBindData : public TableFunctionData { }; struct ParquetReadGlobalState : public GlobalTableFunctionState { - explicit ParquetReadGlobalState(optional_ptr op_p) - : row_group_index(0), batch_index(0), op(op_p) { + explicit ParquetReadGlobalState(optional_ptr op_p) : row_group_index(0), op(op_p) { } //! Index of row group within file currently up for scanning idx_t row_group_index; - //! Batch index of the next row group to be scanned - idx_t batch_index; //! (Optional) pointer to physical operator performing the scan optional_ptr op; }; struct ParquetReadLocalState : public LocalTableFunctionState { ParquetReaderScanState scan_state; - vector group_indexes; + idx_t group_index; }; static void ParseFileRowNumberOption(MultiFileReaderBindData &bind_data, ParquetOptions &options, @@ -763,15 +760,24 @@ bool ParquetReader::TryInitializeScan(ClientContext &context, GlobalTableFunctio return false; } // The current reader has rowgroups left to be scanned - lstate.group_indexes = {gstate.row_group_index}; + lstate.group_index = gstate.row_group_index; gstate.row_group_index++; return true; } void ParquetReader::PrepareScan(ClientContext &context, GlobalTableFunctionState &gstate_p, LocalTableFunctionState &lstate_p) { + auto &gstate = gstate_p.Cast(); + auto &lstate = lstate_p.Cast(); + lstate.scan_state.op = gstate.op; + InitializeScan(context, lstate.scan_state, lstate.group_index); +} + +AsyncResult ParquetReader::ScheduleIO(ClientContext &context, GlobalTableFunctionState &gstate_p, + LocalTableFunctionState &lstate_p) { auto &lstate = lstate_p.Cast(); - InitializeScan(context, lstate.scan_state, lstate.group_indexes); + auto strategy = RegisterRowGroupReads(context, lstate.scan_state); + return ScheduleRowGroupReads(lstate.scan_state, strategy); } void ParquetReader::FinishFile(ClientContext &context, GlobalTableFunctionState &gstate_p) { @@ -789,10 +795,8 @@ AsyncResult ParquetReader::Scan(ClientContext &context, GlobalTableFunctionState } } #endif - auto &gstate = gstate_p.Cast(); auto &local_state = local_state_p.Cast(); - local_state.scan_state.op = gstate.op; - return Scan(context, local_state.scan_state, chunk); + return Process(context, local_state.scan_state, chunk); } unique_ptr ParquetMultiFileInfo::Copy() { diff --git a/src/duckdb/extension/parquet/parquet_reader.cpp b/src/duckdb/extension/parquet/parquet_reader.cpp index facb371e5..6fcf2cb71 100644 --- a/src/duckdb/extension/parquet/parquet_reader.cpp +++ b/src/duckdb/extension/parquet/parquet_reader.cpp @@ -1259,9 +1259,8 @@ static idx_t GetRowGroupOffset(const ParquetReader &reader, idx_t group_idx) { const ParquetRowGroup &ParquetReader::GetGroup(ParquetReaderScanState &state) { auto file_meta_data = GetFileMetadata(); - D_ASSERT(state.current_group >= 0 && (idx_t)state.current_group < state.group_idx_list.size()); - D_ASSERT(state.group_idx_list[state.current_group] < file_meta_data->row_groups.size()); - return file_meta_data->row_groups[state.group_idx_list[state.current_group]]; + D_ASSERT(state.group_index < file_meta_data->row_groups.size()); + return file_meta_data->row_groups[state.group_index]; } uint64_t ParquetReader::GetGroupCompressedSize(ParquetReaderScanState &state) { @@ -1371,7 +1370,7 @@ void ParquetReader::PrepareRowGroupBuffer(ClientContext &context, ParquetReaderS } if (filters) { - auto stats = column_reader.Stats(state.group_idx_list[state.current_group], group.columns); + auto stats = column_reader.Stats(state.group_index, group.columns); // filters contain output chunk index, not file col idx! auto filter_entry = filters->TryGetFilterByColumnIndex(col_idx); if (stats && filter_entry) { @@ -1420,7 +1419,7 @@ void ParquetReader::PrepareRowGroupBuffer(ClientContext &context, ParquetReaderS } } - column_reader.InitializeRead(state.group_idx_list[state.current_group], group.columns, *state.thrift_file_proto); + column_reader.InitializeRead(state.group_index, group.columns, *state.thrift_file_proto); } idx_t ParquetReader::NumRows() const { @@ -1451,13 +1450,11 @@ ParquetScanFilter::ParquetScanFilter(ClientContext &context, ProjectionIndex fil ParquetScanFilter::~ParquetScanFilter() { } -void ParquetReader::InitializeScan(ClientContext &context, ParquetReaderScanState &state, - vector groups_to_read) const { - state.current_group = -1; - state.scan_state = ParquetScanState::SCHEDULE; +void ParquetReader::InitializeScan(ClientContext &context, ParquetReaderScanState &state, idx_t group_to_read) const { + state.resuming_payload = false; state.offset_in_group = 0; state.filter_count = 0; - state.group_idx_list = std::move(groups_to_read); + state.group_index = group_to_read; state.sel.Initialize(STANDARD_VECTOR_SIZE); if (!state.file_handle || state.file_handle->GetPath() != file_handle->GetPath()) { auto flags = FileFlags::FILE_FLAGS_READ; @@ -1716,31 +1713,11 @@ ParquetPrefetchStrategy ParquetReader::ColumnWisePrefetch(ParquetReaderScanState return strategy; } -AsyncResult ParquetReader::Scan(ClientContext &context, ParquetReaderScanState &state, DataChunk &result) { +ParquetPrefetchStrategy ParquetReader::RegisterRowGroupReads(ClientContext &context, ParquetReaderScanState &state) { const bool log_prefetch = Logger::Get(context).ShouldLog(ParquetPrefetchLogType::NAME, ParquetPrefetchLogType::LEVEL); - - switch (state.scan_state) { - case ParquetScanState::FINISHED: - result.Reset(); - return SourceResultType::FINISHED; - case ParquetScanState::SCHEDULE: - result.Reset(); - return Schedule(context, state, result, log_prefetch); - case ParquetScanState::PROCESS: - result.Reset(); - return Process(state, result, log_prefetch); - case ParquetScanState::RESUME_PAYLOAD: - return Process(state, result, log_prefetch); - default: - throw InternalException("Unexpected ParquetScanState"); - } -} - -AsyncResult ParquetReader::Schedule(ClientContext &context, ParquetReaderScanState &state, DataChunk &result, - bool log_prefetch) { - state.current_group++; state.offset_in_group = 0; + ParquetPrefetchStrategy strategy = ParquetPrefetchStrategy::NONE; auto &trans = reinterpret_cast(*state.thrift_file_proto->getTransport()); trans.ClearPrefetch(); @@ -1750,18 +1727,8 @@ AsyncResult ParquetReader::Schedule(ClientContext &context, ParquetReaderScanSta trans.SetAcceptedColumnGap(DetermineAcceptedColumnGap(context, state)); } - if (log_prefetch && state.prefetch_metrics.filter_ran && state.current_group > 0) { - LogRowGroupPrefetch(context, file.path, state.group_idx_list[state.current_group - 1], state); - } - state.prefetch_metrics.FinalizeRowGroupSelectivity(); - - if ((idx_t)state.current_group == state.group_idx_list.size()) { - state.scan_state = ParquetScanState::FINISHED; - return SourceResultType::FINISHED; - } - // TODO: only need this if we have a deletion vector? - state.group_offset = GetRowGroupOffset(*this, state.group_idx_list[state.current_group]); + state.group_offset = GetRowGroupOffset(*this, state.group_index); uint64_t to_scan_compressed_bytes = 0; for (idx_t i = 0; i < column_ids.size(); i++) { @@ -1780,11 +1747,10 @@ AsyncResult ParquetReader::Schedule(ClientContext &context, ParquetReaderScanSta if (state.op) { DUCKDB_LOG(context, PhysicalOperatorLogType, *state.op, "ParquetReader", row_group_skipped ? "SkipRowGroup" : "ReadRowGroup", - {{"file", file.path}, {"row_group_id", to_string(state.group_idx_list[state.current_group])}}); + {{"file", file.path}, {"row_group_id", to_string(state.group_index)}}); } - vector> io_tasks; - if (state.prefetch_mode && state.offset_in_group != (idx_t)group.num_rows) { + if (state.prefetch_mode && !row_group_skipped) { uint64_t total_row_group_span = GetGroupSpan(state); double scan_percentage = (double)(to_scan_compressed_bytes) / static_cast(total_row_group_span); @@ -1802,7 +1768,6 @@ AsyncResult ParquetReader::Schedule(ClientContext &context, ParquetReaderScanSta } if (state.prefetch_mode) { // whole group and column wise prefetch fetch eagerly, filter prefetch fetches lazily - ParquetPrefetchStrategy strategy = ParquetPrefetchStrategy::NONE; if (parquet_options.prefetch_strategy == ParquetPrefetchStrategyOption::WHOLE_GROUP) { strategy = WholeGroupPrefetch(state, trans, group, total_row_group_span, log_prefetch); } else { @@ -1824,35 +1789,49 @@ AsyncResult ParquetReader::Schedule(ClientContext &context, ParquetReaderScanSta strategy = ColumnWisePrefetch(state, trans, group, filters_look_unselective, log_prefetch); } } - auto read_head_count = trans.GetReadHeads().size(); - switch (strategy) { - case ParquetPrefetchStrategy::PREFETCH_FILTERS: - // schedule only the filter columns' I/O, they are last in our list - io_tasks = CollectIOTasks(trans, state.file_handle, read_head_count - state.filter_head_count, - read_head_count); - break; - case ParquetPrefetchStrategy::WHOLE_GROUP: - case ParquetPrefetchStrategy::COLUMN_WISE_EAGER: - // schedule the I/O for all columns up front - io_tasks = CollectIOTasks(trans, state.file_handle, 0, read_head_count); - break; - default: - throw InternalException("Unexpected parquet prefetch strategy when scheduling I/O"); - } if (log_prefetch) { state.prefetch_metrics.logger.accepted_column_gap = trans.GetAcceptedColumnGap(); } } } - result.Reset(); - // a skipped row group has no rows to decode, so we loop back to schedule the next one - state.scan_state = row_group_skipped ? ParquetScanState::SCHEDULE : ParquetScanState::PROCESS; + state.resuming_payload = false; + return strategy; +} + +AsyncResult ParquetReader::ScheduleRowGroupReads(ParquetReaderScanState &state, ParquetPrefetchStrategy strategy) { + if (strategy == ParquetPrefetchStrategy::NONE) { + // nothing to prefetch + return SourceResultType::HAVE_MORE_OUTPUT; + } + auto &trans = reinterpret_cast(*state.thrift_file_proto->getTransport()); + auto read_head_count = trans.GetReadHeads().size(); + vector> io_tasks; + switch (strategy) { + case ParquetPrefetchStrategy::PREFETCH_FILTERS: + // schedule only the filter columns' I/O, they are last in our list + io_tasks = CollectIOTasks(trans, state.file_handle, read_head_count - state.filter_head_count, read_head_count); + break; + case ParquetPrefetchStrategy::WHOLE_GROUP: + case ParquetPrefetchStrategy::COLUMN_WISE_EAGER: + // schedule the I/O for all columns up front + io_tasks = CollectIOTasks(trans, state.file_handle, 0, read_head_count); + break; + default: + throw InternalException("Unexpected parquet prefetch strategy when scheduling I/O"); + } if (!io_tasks.empty()) { return AsyncResult(std::move(io_tasks), TaskSchedulerType::ASYNC); } return SourceResultType::HAVE_MORE_OUTPUT; } +void ParquetReader::FinishRowGroup(ClientContext &context, ParquetReaderScanState &state, bool log_prefetch) { + if (log_prefetch && state.prefetch_metrics.filter_ran) { + LogRowGroupPrefetch(context, file.path, state.group_index, state); + } + state.prefetch_metrics.FinalizeRowGroupSelectivity(); +} + idx_t ParquetReader::EvaluateFilters(ParquetReaderScanState &state, DataChunk &result, idx_t scan_count, uint8_t *define_ptr, uint8_t *repeat_ptr, bool log_prefetch) { idx_t filter_count = result.size(); @@ -1956,13 +1935,13 @@ vector> ParquetReader::ScheduleRemainingColumns(ParquetRea // no payload to do return {}; } - state.scan_state = ParquetScanState::RESUME_PAYLOAD; + state.resuming_payload = true; return io_tasks; } AsyncResult ParquetReader::ProcessFilters(ParquetReaderScanState &state, DataChunk &result, idx_t scan_count, uint8_t *define_ptr, uint8_t *repeat_ptr, bool log_prefetch) { - if (state.scan_state == ParquetScanState::PROCESS) { + if (!state.resuming_payload) { state.filter_count = EvaluateFilters(state, result, scan_count, define_ptr, repeat_ptr, log_prefetch); auto io_tasks = ScheduleRemainingColumns(state, result, scan_count); if (!io_tasks.empty()) { @@ -1976,16 +1955,18 @@ AsyncResult ParquetReader::ProcessFilters(ParquetReaderScanState &state, DataChu return SourceResultType::HAVE_MORE_OUTPUT; } -AsyncResult ParquetReader::Process(ParquetReaderScanState &state, DataChunk &result, bool log_prefetch) { +AsyncResult ParquetReader::Process(ClientContext &context, ParquetReaderScanState &state, DataChunk &result) { + const bool log_prefetch = + Logger::Get(context).ShouldLog(ParquetPrefetchLogType::NAME, ParquetPrefetchLogType::LEVEL); const idx_t group_num_rows = GetGroup(state).num_rows; auto scan_count = MinValue(STANDARD_VECTOR_SIZE, group_num_rows - state.offset_in_group); - if (state.scan_state == ParquetScanState::PROCESS) { + if (!state.resuming_payload) { result.SetChildCardinality(scan_count); } if (scan_count == 0) { - state.scan_state = ParquetScanState::FINISHED; - // end of last group, we are done + // the row group is fully consumed + FinishRowGroup(context, state, log_prefetch); return SourceResultType::FINISHED; } @@ -2025,8 +2006,7 @@ AsyncResult ParquetReader::Process(ParquetReaderScanState &state, DataChunk &res result.SetChildCardinality(result.size()); rows_read += scan_count; state.offset_in_group += scan_count; - // once the group is fully consumed we schedule the next one, otherwise we keep processing this group - state.scan_state = state.offset_in_group >= group_num_rows ? ParquetScanState::SCHEDULE : ParquetScanState::PROCESS; + state.resuming_payload = false; return SourceResultType::HAVE_MORE_OUTPUT; } diff --git a/src/duckdb/extension/parquet/parquet_statistics.cpp b/src/duckdb/extension/parquet/parquet_statistics.cpp index e789f657c..587dc6d41 100644 --- a/src/duckdb/extension/parquet/parquet_statistics.cpp +++ b/src/duckdb/extension/parquet/parquet_statistics.cpp @@ -361,21 +361,30 @@ bool IsVariantNull(const string &str) { return str.size() == 1 && str[0] == '\0'; } -static bool ConvertUnshreddedStats(BaseStatistics &result, optional_ptr input_p) { +// The conversion is best-effort and non-fatal: when the statistics of a particular (sub)node cannot be +// converted, that node is left as UNKNOWN stats (which makes it "not fully shredded", so consumers fall +// back to a full scan for that field) instead of discarding the statistics of the entire variant. +static void ConvertUnshreddedStats(BaseStatistics &result, optional_ptr input_p) { D_ASSERT(result.GetType().id() == LogicalTypeId::UINTEGER); if (!input_p) { - return false; + //! No overlay statistics -> conservatively unknown (this node is not "fully shredded") + result.Copy(BaseStatistics::CreateUnknown(LogicalType::UINTEGER)); + return; } auto &input = *input_p; D_ASSERT(input.GetType().id() == LogicalTypeId::BLOB); result.CopyValidity(input); if (!result.CanHaveNoNull()) { - return true; + //! The overlay is entirely NULL -> no overlay values -> fully shredded + return; } if (!StringStats::HasMinMax(input)) { - return false; + //! The overlay may contain values but we can't tell what they are (e.g. the writer dropped min/max for + //! a large blob value) -> conservatively unknown so this node is treated as not fully shredded + result.Copy(BaseStatistics::CreateUnknown(LogicalType::UINTEGER)); + return; } auto min = StringStats::Min(input); @@ -386,12 +395,12 @@ static bool ConvertUnshreddedStats(BaseStatistics &result, optional_ptr(result, 0); result.SetHasNoNull(); } - return true; + //! else: there are real overlay values -> leave min/max unset, so this node is not fully shredded } -static bool ConvertShreddedStats(BaseStatistics &result, optional_ptr input_p); +static void ConvertShreddedStats(BaseStatistics &result, optional_ptr input_p); -static bool ConvertShreddedStatsItem(BaseStatistics &result, BaseStatistics &input) { +static void ConvertShreddedStatsItem(BaseStatistics &result, BaseStatistics &input) { D_ASSERT(result.GetType().id() == LogicalTypeId::STRUCT); D_ASSERT(input.GetType().id() == LogicalTypeId::STRUCT); @@ -403,41 +412,37 @@ static bool ConvertShreddedStatsItem(BaseStatistics &result, BaseStatistics &inp auto &value_stats = StructStats::GetChildStats(input, 0); auto &typed_value_input = StructStats::GetChildStats(input, 1); - if (!ConvertUnshreddedStats(untyped_value_index_stats, value_stats)) { - return false; - } - if (!ConvertShreddedStats(typed_value_result, typed_value_input)) { - return false; - } - return true; + ConvertUnshreddedStats(untyped_value_index_stats, value_stats); + ConvertShreddedStats(typed_value_result, typed_value_input); } -static bool ConvertShreddedStats(BaseStatistics &result, optional_ptr input_p) { +static void ConvertShreddedStats(BaseStatistics &result, optional_ptr input_p) { if (!input_p) { - return false; + //! No statistics for this shredded subtree -> leave it unknown (conservative) + result.Copy(BaseStatistics::CreateUnknown(result.GetType())); + return; } auto &input = *input_p; result.CopyValidity(input); auto type_id = result.GetType().id(); if (type_id == LogicalTypeId::LIST) { - auto &child_result = ListStats::GetChildStats(result); - auto &child_input = ListStats::GetChildStats(input); - return ConvertShreddedStatsItem(child_result, child_input); + ConvertShreddedStatsItem(ListStats::GetChildStats(result), ListStats::GetChildStats(input)); + return; } if (type_id == LogicalTypeId::STRUCT) { auto field_count = StructType::GetChildCount(result.GetType()); for (idx_t i = 0; i < field_count; i++) { - auto &result_field = StructStats::GetChildStats(result, i); - auto &input_field = StructStats::GetChildStats(input, i); - if (!ConvertShreddedStatsItem(result_field, input_field)) { - return false; - } + ConvertShreddedStatsItem(StructStats::GetChildStats(result, i), StructStats::GetChildStats(input, i)); } - return true; + return; + } + //! Primitive leaf - copy the parquet stats if the types line up, otherwise leave it unknown + if (result.GetType() == input.GetType()) { + result.Copy(input); + } else { + result.Copy(BaseStatistics::CreateUnknown(result.GetType())); } - result.Copy(input); - return true; } bool StringStatsAreValid(const string &stats, bool is_varchar, StringStatsType stats_type) { @@ -629,17 +634,13 @@ unique_ptr ParquetStatisticsUtils::TransformColumnStatistics(con auto &value = schema.children[1]; D_ASSERT(value.name == "value"); auto value_stats = ParquetStatisticsUtils::TransformColumnStatistics(value, columns, can_have_nan); - if (!ConvertUnshreddedStats(untyped_value_index_stats, value_stats.get())) { - //! Couldn't convert the stats, or there are no stats - return nullptr; - } + //! Best-effort: nodes whose stats can't be converted are left UNKNOWN (not fully shredded) rather + //! than discarding the statistics for the entire variant column + ConvertUnshreddedStats(untyped_value_index_stats, value_stats.get()); auto parquet_typed_value_stats = ParquetStatisticsUtils::TransformColumnStatistics(typed_value, columns, can_have_nan); - if (!ConvertShreddedStats(typed_value_stats, parquet_typed_value_stats.get())) { - //! Couldn't convert the stats, or there are no stats - return nullptr; - } + ConvertShreddedStats(typed_value_stats, parquet_typed_value_stats.get()); //! Set validity to UNKNOWN variant_stats.SetHasNoNull(); variant_stats.SetHasNull(); diff --git a/src/duckdb/extension/parquet/reader/expression_column_reader.cpp b/src/duckdb/extension/parquet/reader/expression_column_reader.cpp index 5e8cdf1f0..0fed70af4 100644 --- a/src/duckdb/extension/parquet/reader/expression_column_reader.cpp +++ b/src/duckdb/extension/parquet/reader/expression_column_reader.cpp @@ -30,6 +30,11 @@ ExpressionColumnReader::ExpressionColumnReader(ClientContext &context, vectorSetParent(*this); + } + } InitializeChunk(); } @@ -41,6 +46,11 @@ ExpressionColumnReader::ExpressionColumnReader(ClientContext &context, vectorSetParent(*this); + } + } InitializeChunk(); } diff --git a/src/duckdb/extension/parquet/reader/list_column_reader.cpp b/src/duckdb/extension/parquet/reader/list_column_reader.cpp index 8bedf4219..7a1737281 100644 --- a/src/duckdb/extension/parquet/reader/list_column_reader.cpp +++ b/src/duckdb/extension/parquet/reader/list_column_reader.cpp @@ -200,6 +200,9 @@ ListColumnReader::ListColumnReader(const ParquetReader &reader, const ParquetCol child_repeats.resize(reader.allocator, STANDARD_VECTOR_SIZE); child_defines_ptr = (uint8_t *)child_defines.ptr; child_repeats_ptr = (uint8_t *)child_repeats.ptr; + if (child_column_reader) { + child_column_reader->SetParent(*this); + } } void ListColumnReader::ApplyPendingSkips(data_ptr_t define_out, data_ptr_t repeat_out) { diff --git a/src/duckdb/extension/parquet/reader/struct_column_reader.cpp b/src/duckdb/extension/parquet/reader/struct_column_reader.cpp index fee57a4ee..4bf69db2e 100644 --- a/src/duckdb/extension/parquet/reader/struct_column_reader.cpp +++ b/src/duckdb/extension/parquet/reader/struct_column_reader.cpp @@ -42,6 +42,11 @@ StructColumnReader::StructColumnReader(const ParquetReader &reader, const Parque vector> child_readers_p) : ColumnReader(reader, schema), child_readers(std::move(child_readers_p)) { D_ASSERT(Type().InternalType() == PhysicalType::STRUCT); + for (auto &child : child_readers) { + if (child) { + child->SetParent(*this); + } + } } ColumnReader &StructColumnReader::GetChildReader(idx_t child_idx) { diff --git a/src/duckdb/extension/parquet/reader/variant/parquet_variant_iterator.cpp b/src/duckdb/extension/parquet/reader/variant/parquet_variant_iterator.cpp index cad60478b..307c8b042 100644 --- a/src/duckdb/extension/parquet/reader/variant/parquet_variant_iterator.cpp +++ b/src/duckdb/extension/parquet/reader/variant/parquet_variant_iterator.cpp @@ -335,6 +335,7 @@ void ShreddedGroupView::Build(Vector &group) { } has_typed_value = true; typed_type = typed_vec->GetType(); + typed_value_vec = typed_vec; switch (typed_type.id()) { case LogicalTypeId::STRUCT: { @@ -667,30 +668,8 @@ ParquetVariantNode ParquetArrayIterator::operator[](idx_t i) const { return ParquetVariantNode::MakeBinary(state.get(), child, binary_end); } -//===--------------------------------------------------------------------===// -// ParquetVariantIteratorSource -//===--------------------------------------------------------------------===// -bool ParquetVariantIteratorSource::Emit(idx_t row, VariantBuilder &builder) { - iterator.BeginRow(row); - auto root = iterator.Root(row); - if (root.IsNull()) { - return true; - } - //! A root that resolves to a (variant) NULL is a genuine SQL NULL row - if (root.GetTypeId() == VariantLogicalType::VARIANT_NULL) { - return true; - } - EmitIterator(root, builder); - return false; -} - -//===--------------------------------------------------------------------===// -// ParquetVariantConversion -//===--------------------------------------------------------------------===// -void ParquetVariantConversion::Convert(Vector &metadata, Vector &group, Vector &result, idx_t count) { - ParquetVariantIterator iterator(metadata, group); - ParquetVariantIteratorSource source(iterator); - BuildVariant(source, count, result); +void ParquetVariantIterator::EmitBinary(const_data_ptr_t data, const_data_ptr_t end, VariantBuilder &builder) const { + EmitIterator(ParquetVariantNode::MakeBinary(*this, data, end), builder); } namespace { diff --git a/src/duckdb/extension/parquet/reader/variant/parquet_variant_shredding.cpp b/src/duckdb/extension/parquet/reader/variant/parquet_variant_shredding.cpp new file mode 100644 index 000000000..762f41724 --- /dev/null +++ b/src/duckdb/extension/parquet/reader/variant/parquet_variant_shredding.cpp @@ -0,0 +1,396 @@ +#include "reader/variant/parquet_variant_iterator.hpp" +#include "reader/variant_column_reader.hpp" + +#include "duckdb/common/types/variant/variant_builder.hpp" +#include "duckdb/function/variant/variant_shredding.hpp" +#include "duckdb/common/vector/struct_vector.hpp" +#include "duckdb/common/vector/list_vector.hpp" +#include "duckdb/common/vector/flat_vector.hpp" + +#include + +namespace duckdb { + +namespace { + +//! A shredded node is either a "picked off" bare primitive (fully convertible - referenced directly), or a +//! wrapper STRUCT("typed_value" , "untyped_value_index" UINTEGER) (see SetShreddedType in +//! variant_shredding.cpp). The (row-local, 1-based) untyped_value_index points at the leftover value in the +//! row's unshredded pool; NULL means "no leftover" (VARIANT_NULL / missing -> see is_object_field). +constexpr idx_t SHRED_TYPED_VALUE = 0; +constexpr idx_t SHRED_UNTYPED_INDEX = 1; + +//===--------------------------------------------------------------------===// +// Analysis: which subtrees can be "picked off" (fully shredded -> flattened + referenced) +//===--------------------------------------------------------------------===// +//! Whether the group's binary 'value' column is entirely NULL (no leftover anywhere in the chunk) +bool ValueAllNull(const ShreddedGroupView &view, idx_t count) { + for (idx_t i = 0; i < count; i++) { + if ((*view.value)[i].IsValid()) { + return false; + } + } + return true; +} + +//! A plan mirroring the group tree: 'flat' marks a primitive leaf that is fully convertible and is emitted +//! as a bare (referenced) column instead of a wrapper. +struct ShredPlan { + bool flat = false; + vector fields; //! OBJECT + unique_ptr element; +}; + +ShredPlan AnalyzeShred(const ShreddedGroupView &view, idx_t count, bool is_object_field) { + ShredPlan plan; + if (!view.has_typed_value) { + return plan; //! everything is leftover -> wrapper + } + switch (view.kind) { + case ParquetGroupKind::LEAF: { + //! A leaf is fully convertible if there is no binary leftover; an OBJECT field additionally must never + //! be missing (a NULL typed_value of a flat field would read as VARIANT_NULL, not "missing") + bool no_leftover = ValueAllNull(view, count); + bool no_missing = !is_object_field || view.leaf_format.validity.CheckAllValid(count); + plan.flat = no_leftover && no_missing; + break; + } + case ParquetGroupKind::OBJECT: { + plan.fields.reserve(view.fields.size()); + for (auto &field : view.fields) { + plan.fields.push_back(AnalyzeShred(*field, count, true)); + } + break; + } + default: { + D_ASSERT(view.kind == ParquetGroupKind::ARRAY); + auto child_count = ListVector::GetListSize(*view.typed_value_vec); + plan.element = make_uniq(AnalyzeShred(*view.element, child_count, false)); + break; + } + } + return plan; +} + +//===--------------------------------------------------------------------===// +// Stage 1: derive the shred type +//===--------------------------------------------------------------------===// +LogicalType DeriveShredNodeType(const ShreddedGroupView &view, const ShredPlan &plan); + +//! The 'typed_value' () of a node: a primitive, a STRUCT of field nodes, or a LIST of an element node. +LogicalType DeriveTypedValueType(const ShreddedGroupView &view, const ShredPlan &plan) { + if (!view.has_typed_value) { + //! Nothing is shredded here - a placeholder all-NULL column, everything goes to the unshredded pool + return LogicalType::INTEGER; + } + switch (view.kind) { + case ParquetGroupKind::LEAF: + return view.typed_type; + case ParquetGroupKind::OBJECT: { + child_list_t fields; + for (idx_t i = 0; i < view.fields.size(); i++) { + fields.emplace_back(view.field_names[i], DeriveShredNodeType(*view.fields[i], plan.fields[i])); + } + return LogicalType::STRUCT(std::move(fields)); + } + default: + D_ASSERT(view.kind == ParquetGroupKind::ARRAY); + return LogicalType::LIST(DeriveShredNodeType(*view.element, *plan.element)); + } +} + +LogicalType DeriveShredNodeType(const ShreddedGroupView &view, const ShredPlan &plan) { + if (plan.flat) { + //! picked off: a bare primitive column (no untyped_value_index needed) + return DeriveTypedValueType(view, plan); + } + child_list_t children; + children.emplace_back("typed_value", DeriveTypedValueType(view, plan)); + children.emplace_back("untyped_value_index", LogicalType::UINTEGER); + return LogicalType::STRUCT(std::move(children)); +} + +//===--------------------------------------------------------------------===// +// Stage 2a: fill the typed_value tree (referencing Parquet leaves where possible) +//===--------------------------------------------------------------------===// +//! Reference the Parquet leaf into 'out' (BLOB stays raw to preserve the type; base64 happens at JSON time) +void FillLeafTypedValue(const ShreddedGroupView &view, Vector &out, idx_t count) { + out.Reference(*view.typed_value_vec); +} + +//! Captures the (per-node) untyped_value_index target so the leftover pass can wire it up. A "flat" node has +//! no untyped_value_index (untyped_index_data stays null) and never contributes a leftover. +struct ShredNodeWriter { + uint32_t *untyped_index_data = nullptr; + ValidityMask *untyped_index_validity = nullptr; + vector> fields; //! OBJECT + unique_ptr element; //! ARRAY + //! Whether this node OR any descendant carries a binary 'value' leftover anywhere in the chunk. When + //! false the whole subtree is fully shredded and the per-row leftover pass can skip it entirely. + bool subtree_has_leftover = false; +}; + +//! Recursively NULL a shred node (and all its descendants) at 'row' - a child of a NULL struct must itself be +//! NULL. Only the synthesized parts are written: the wrapper struct validity, its untyped_value_index, and the +//! validity of any nested STRUCT/LIST typed_value. A picked-off (flat) leaf and a LEAF's referenced Parquet +//! leaf are skipped: they already read as NULL wherever their object is absent. +void SetShredNodeNull(const ShreddedGroupView &view, const ShredPlan &plan, Vector &node, idx_t row) { + if (plan.flat) { + return; + } + auto &entries = StructVector::GetEntries(node); + auto &typed_value = entries[SHRED_TYPED_VALUE]; + auto &untyped_index = entries[SHRED_UNTYPED_INDEX]; + FlatVector::ValidityMutable(node).SetInvalid(row); + FlatVector::ValidityMutable(untyped_index).SetInvalid(row); + if (!view.has_typed_value) { + //! constant-NULL placeholder typed_value + return; + } + switch (view.kind) { + case ParquetGroupKind::LEAF: + //! typed_value references the Parquet leaf, already NULL where its object is absent + break; + case ParquetGroupKind::OBJECT: { + FlatVector::ValidityMutable(typed_value).SetInvalid(row); + auto &field_entries = StructVector::GetEntries(typed_value); + for (idx_t f = 0; f < view.fields.size(); f++) { + SetShredNodeNull(*view.fields[f], plan.fields[f], field_entries[f], row); + } + break; + } + default: + D_ASSERT(view.kind == ParquetGroupKind::ARRAY); + //! typed_value is a LIST; the entry at this row is an empty/NULL list (no element rows to recurse into) + FlatVector::ValidityMutable(typed_value).SetInvalid(row); + break; + } +} + +//! 'is_object_field': whether this node is a field of an OBJECT. For object fields the "no leftover" default +//! is 0 (== a missing field), since a NULL untyped_value_index means a present VARIANT_NULL value. For the +//! root / array elements (no notion of "missing") the default is NULL (== VARIANT_NULL). +void FillShredNode(const ShreddedGroupView &view, const ShredPlan &plan, Vector &node, idx_t count, + ShredNodeWriter &writer, bool is_object_field) { + if (plan.flat) { + //! picked off: 'node' is the bare primitive column - reference it, no untyped_value_index + FillLeafTypedValue(view, node, count); + return; + } + + auto &entries = StructVector::GetEntries(node); + auto &typed_value = entries[SHRED_TYPED_VALUE]; + auto &untyped_index = entries[SHRED_UNTYPED_INDEX]; + + writer.untyped_index_data = FlatVector::GetDataMutable(untyped_index); + writer.untyped_index_validity = &FlatVector::ValidityMutable(untyped_index); + if (is_object_field) { + memset(writer.untyped_index_data, 0, count * sizeof(uint32_t)); + } else { + writer.untyped_index_validity->SetAllInvalid(count); + } + + //! This node has a leftover if its own binary 'value' is present for any row; OR'd below with its children + bool has_leftover = !ValueAllNull(view, count); + + if (!view.has_typed_value) { + //! Nothing is shredded here - the typed_value is a never-read all-NULL placeholder, so make it constant + ConstantVector::SetNull(typed_value, count_t(count)); + writer.subtree_has_leftover = has_leftover; + return; + } + + switch (view.kind) { + case ParquetGroupKind::LEAF: + FillLeafTypedValue(view, typed_value, count); + break; + case ParquetGroupKind::OBJECT: { + //! typed_value is a STRUCT of field nodes; its validity is the object's shredded-ness + auto &dst_validity = FlatVector::ValidityMutable(typed_value); + auto &field_entries = StructVector::GetEntries(typed_value); + writer.fields.resize(view.fields.size()); + for (idx_t i = 0; i < view.fields.size(); i++) { + writer.fields[i] = make_uniq(); + FillShredNode(*view.fields[i], plan.fields[i], field_entries[i], count, *writer.fields[i], true); + has_leftover |= writer.fields[i]->subtree_has_leftover; + } + //! Where the object is not shredded, the typed_value struct is NULL - recursively NULL the field nodes + //! too, since a child of a NULL struct must itself be NULL + for (idx_t i = 0; i < count; i++) { + if (view.typed_validity->IsValid(i)) { + continue; + } + dst_validity.SetInvalid(i); + for (idx_t f = 0; f < view.fields.size(); f++) { + SetShredNodeNull(*view.fields[f], plan.fields[f], field_entries[f], i); + } + } + break; + } + default: { + D_ASSERT(view.kind == ParquetGroupKind::ARRAY); + //! typed_value is a LIST of element nodes; copy the Parquet list entries (the element child is built + //! 1:1, so the offsets align) and recurse into the element node over the list child + auto out_data = FlatVector::GetDataMutable(typed_value); + auto &out_validity = FlatVector::ValidityMutable(typed_value); + for (idx_t i = 0; i < count; i++) { + auto entry = (*view.list)[i]; + if (!entry.IsValid()) { + out_validity.SetInvalid(i); + out_data[i] = list_entry_t(0, 0); + } else { + out_data[i] = entry.GetValueUnsafe(); + } + } + auto child_count = ListVector::GetListSize(*view.typed_value_vec); + ListVector::Reserve(typed_value, child_count); + ListVector::SetListSize(typed_value, child_count); + writer.element = make_uniq(); + FillShredNode(*view.element, *plan.element, ListVector::GetChildMutable(typed_value), child_count, + *writer.element, false); + has_leftover |= writer.element->subtree_has_leftover; + break; + } + } + writer.subtree_has_leftover = has_leftover; +} + +//===--------------------------------------------------------------------===// +// Stage 2b: build the unshredded pool + wire untyped_value_index (per row) +//===--------------------------------------------------------------------===// +bool TypedValid(const ShreddedGroupView &view, idx_t index) { + switch (view.kind) { + case ParquetGroupKind::LEAF: + return view.leaf_format.validity.RowIsValid(view.leaf_format.sel->get_index(index)); + case ParquetGroupKind::ARRAY: + return (*view.list)[index].IsValid(); + default: + D_ASSERT(view.kind == ParquetGroupKind::OBJECT); + return view.typed_validity->IsValid(index); + } +} + +//! A root that resolves to a (variant) NULL / missing is a genuine SQL NULL row (matches the unshredded path) +bool IsRowNull(const ParquetVariantNode &root) { + return root.IsNull() || root.GetTypeId() == VariantLogicalType::VARIANT_NULL; +} + +//! Set the shredded struct validity for the SQL NULL rows (used when there is no leftover to build, so the +//! per-row null marking otherwise done by ShreddedLeftoverSource still has to happen) +void MarkShreddedNullRows(ParquetVariantIterator &iterator, idx_t count, ValidityMask &shredded_validity) { + for (idx_t row = 0; row < count; row++) { + iterator.BeginRow(row); + if (IsRowNull(iterator.Root(row))) { + shredded_validity.SetInvalid(row); + } + } +} + +struct ShreddedLeftoverSource { + ParquetVariantIterator &iterator; + ShredNodeWriter &root_writer; + ValidityMask &shredded_validity; + + bool Emit(idx_t row, VariantBuilder &builder) { + iterator.BeginRow(row); + auto root = iterator.Root(row); + if (IsRowNull(root)) { + shredded_validity.SetInvalid(row); + return true; + } + EmitNode(iterator.GetRootView(), root_writer, row, builder); + return false; + } + + //! Emit the binary value (one subtree) into the unshredded pool, recording its 1-based row-local index in + //! this node's untyped_value_index + void EmitLeftover(const string_t &value, ShredNodeWriter &writer, idx_t index, VariantBuilder &builder) { + auto data = const_data_ptr_cast(value.GetData()); + auto local = builder.LocalValue(); + iterator.EmitBinary(data, data + value.GetSize(), builder); + writer.untyped_index_data[index] = local + 1; + writer.untyped_index_validity->SetValid(index); + } + + void EmitNode(const ShreddedGroupView &view, ShredNodeWriter &writer, idx_t index, VariantBuilder &builder) { + auto value_entry = (*view.value)[index]; + bool value_present = value_entry.IsValid(); + bool typed_valid = view.has_typed_value && TypedValid(view, index); + + if (typed_valid) { + if (view.kind == ParquetGroupKind::OBJECT) { + //! A partially-shredded object: the 'value' blob holds the leftover (overlay) fields + if (value_present) { + EmitLeftover(value_entry.GetValueUnsafe(), writer, index, builder); + } + //! Descend only into fields whose subtree carries a leftover somewhere in the chunk; a + //! fully-shredded field contributes nothing and its untyped_value_index keeps its default + for (idx_t i = 0; i < view.fields.size(); i++) { + if (writer.fields[i]->subtree_has_leftover) { + EmitNode(*view.fields[i], *writer.fields[i], index, builder); + } + } + } else if (view.kind == ParquetGroupKind::ARRAY) { + //! Same pruning for arrays: a fully-shredded element type means no element can have a leftover + if (writer.element->subtree_has_leftover) { + auto entry = (*view.list)[index].GetValueUnsafe(); + for (idx_t j = 0; j < entry.length; j++) { + EmitNode(*view.element, *writer.element, entry.offset + j, builder); + } + } + } + //! LEAF (incl. picked-off): fully shredded, no leftover + } else if (value_present) { + //! Not shredded - the whole value is a leftover (do not recurse: the subtree lives in this value) + EmitLeftover(value_entry.GetValueUnsafe(), writer, index, builder); + } + //! else: missing / VARIANT_NULL - leave untyped_value_index at its default + } +}; + +} // namespace + +void VariantColumnReader::Convert(Vector &metadata, Vector &group, Vector &result, idx_t count) { + ParquetVariantIterator iterator(metadata, group); + auto &root_view = iterator.GetRootView(); + + //! Analysis + Stage 1: pick off fully-convertible subtrees, derive STRUCT("unshredded", "shredded") + auto root_plan = AnalyzeShred(root_view, count, false); + child_list_t shredded_data_children; + shredded_data_children.emplace_back("unshredded", VariantShredding::GetUnshreddedType()); + shredded_data_children.emplace_back("shredded", DeriveShredNodeType(root_view, root_plan)); + auto shredded_data_type = LogicalType::STRUCT(std::move(shredded_data_children)); + + PrepareChunk(shredded_chunk, shredded_capacity, {shredded_data_type}, count); + auto &shredded_data = shredded_chunk.data[0]; + + auto &shredded_data_entries = StructVector::GetEntries(shredded_data); + auto &unshredded = shredded_data_entries[0]; + auto &shredded = shredded_data_entries[1]; + + if (root_plan.flat) { + //! The whole column is a fully-shredded primitive: reference it; the unshredded pool is unused + FillLeafTypedValue(root_view, shredded, count); + BuildEmptyVariant(count, unshredded); + } else { + //! Stage 2a: fill the typed_value tree (referencing leaves that map exactly); this also computes, per + //! node, whether its subtree carries any leftover (root_writer.subtree_has_leftover for the whole vector) + ShredNodeWriter root_writer; + FillShredNode(root_view, root_plan, shredded, count, root_writer, false); + if (root_writer.subtree_has_leftover) { + //! Stage 2b: build the unshredded pool from leftovers, wire untyped_value_index, set row validity + ShreddedLeftoverSource source {iterator, root_writer, FlatVector::ValidityMutable(shredded)}; + BuildVariant(source, count, unshredded); + } else { + //! No leftover anywhere in this vector: skip building the (never-consulted) unshredded pool, only + //! mark the SQL NULL rows on the shredded component + MarkShreddedNullRows(iterator, count, FlatVector::ValidityMutable(shredded)); + BuildEmptyVariant(count, unshredded); + } + } + + FlatVector::SetSize(shredded_data, count_t(count)); + result.Shred(shredded_data, count); +} + +} // namespace duckdb diff --git a/src/duckdb/extension/parquet/reader/variant_column_reader.cpp b/src/duckdb/extension/parquet/reader/variant_column_reader.cpp index 16a863fdd..841b123dc 100644 --- a/src/duckdb/extension/parquet/reader/variant_column_reader.cpp +++ b/src/duckdb/extension/parquet/reader/variant_column_reader.cpp @@ -45,6 +45,12 @@ VariantColumnReader::VariantColumnReader(ClientContext &context, const ParquetRe : ColumnReader(reader, schema), context(context), child_readers(std::move(child_readers_p)) { D_ASSERT(Type().InternalType() == PhysicalType::STRUCT); + for (auto &child : child_readers) { + if (child) { + child->SetParent(*this); + } + } + if (child_readers[0]->Schema().name == "metadata" && child_readers[1]->Schema().name == "value") { metadata_reader_idx = 0; value_reader_idx = 1; @@ -83,6 +89,21 @@ static LogicalType GetIntermediateGroupType(optional_ptr typed_val return LogicalType::STRUCT(std::move(children)); } +void VariantColumnReader::PrepareChunk(DataChunk &chunk, idx_t &capacity, const vector &types, + idx_t count) { + bool needs_init = chunk.ColumnCount() != types.size() || count > capacity; + for (idx_t i = 0; !needs_init && i < types.size(); i++) { + needs_init = chunk.data[i].GetType() != types[i]; + } + if (needs_init) { + chunk.Destroy(); + chunk.Initialize(context, types, count); + capacity = count; + } else { + chunk.Reset(); + } +} + idx_t VariantColumnReader::Read(ColumnReaderInput &input, Vector &result) { if (pending_skips > 0) { throw InternalException("VariantColumnReader cannot have pending skips"); @@ -97,10 +118,10 @@ idx_t VariantColumnReader::Read(ColumnReaderInput &input, Vector &result) { // So, we just initialize them to all be valid beforehand std::fill_n(define_out, num_values, MaxDefine()); - optional_idx read_count; - - Vector metadata_intermediate(LogicalType::BLOB, num_values); - Vector intermediate_group(GetIntermediateGroupType(typed_value_reader), num_values); + auto group_type = GetIntermediateGroupType(typed_value_reader); + PrepareChunk(intermediate_chunk, intermediate_capacity, {LogicalType::BLOB, group_type}, num_values); + auto &metadata_intermediate = intermediate_chunk.data[0]; + auto &intermediate_group = intermediate_chunk.data[1]; auto &group_entries = StructVector::GetEntries(intermediate_group); auto &value_intermediate = group_entries[0]; @@ -126,10 +147,10 @@ idx_t VariantColumnReader::Read(ColumnReaderInput &input, Vector &result) { "The shredded Variant column did not contain the same amount of values for 'typed_value' and 'value'"); } } - ParquetVariantConversion::Convert(metadata_intermediate, intermediate_group, result, num_values); + // convert the actual columns + Convert(metadata_intermediate, intermediate_group, result, num_values); - read_count = value_values; - return read_count.GetIndex(); + return value_values; } void VariantColumnReader::Skip(idx_t num_values) { diff --git a/src/duckdb/src/catalog/catalog.cpp b/src/duckdb/src/catalog/catalog.cpp index 50f5920ff..1a591b509 100644 --- a/src/duckdb/src/catalog/catalog.cpp +++ b/src/duckdb/src/catalog/catalog.cpp @@ -148,7 +148,7 @@ optional_ptr Catalog::CreateTable(CatalogTransaction transaction, } optional_ptr Catalog::CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) { - auto &schema = GetSchema(transaction, info.base->schema); + auto &schema = GetSchema(transaction, info.base->Schema()); return CreateTable(transaction, schema, info); } @@ -156,7 +156,7 @@ optional_ptr Catalog::CreateTable(CatalogTransaction transaction, // View //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateView(CatalogTransaction transaction, CreateViewInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateView(transaction, schema, info); } @@ -173,7 +173,7 @@ optional_ptr Catalog::CreateView(CatalogTransaction transaction, S // Sequence //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateSequence(CatalogTransaction transaction, CreateSequenceInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateSequence(transaction, schema, info); } @@ -190,7 +190,7 @@ optional_ptr Catalog::CreateSequence(CatalogTransaction transactio // Type //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateType(CatalogTransaction transaction, CreateTypeInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateType(transaction, schema, info); } @@ -207,7 +207,7 @@ optional_ptr Catalog::CreateType(CatalogTransaction transaction, S // Table Function //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateTableFunction(CatalogTransaction transaction, CreateTableFunctionInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateTableFunction(transaction, schema, info); } @@ -229,7 +229,7 @@ optional_ptr Catalog::CreateTableFunction(ClientContext &context, // Copy Function //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateCopyFunction(CatalogTransaction transaction, CreateCopyFunctionInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateCopyFunction(transaction, schema, info); } @@ -247,7 +247,7 @@ optional_ptr Catalog::CreateCopyFunction(CatalogTransaction transa //===--------------------------------------------------------------------===// optional_ptr Catalog::CreatePragmaFunction(CatalogTransaction transaction, CreatePragmaFunctionInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreatePragmaFunction(transaction, schema, info); } @@ -264,7 +264,7 @@ optional_ptr Catalog::CreatePragmaFunction(CatalogTransaction tran // Function //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateFunction(transaction, schema, info); } @@ -286,7 +286,7 @@ optional_ptr Catalog::AddFunction(ClientContext &context, CreateFu // Collation //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateCollation(transaction, schema, info); } @@ -304,7 +304,7 @@ optional_ptr Catalog::CreateCollation(CatalogTransaction transacti //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateCoordinateSystem(CatalogTransaction transaction, CreateCoordinateSystemInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return CreateCoordinateSystem(transaction, schema, info); } @@ -321,7 +321,7 @@ optional_ptr Catalog::CreateCoordinateSystem(CatalogTransaction tr // Index //===--------------------------------------------------------------------===// optional_ptr Catalog::CreateIndex(CatalogTransaction transaction, CreateIndexInfo &info) { - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); auto &table = schema.GetEntry(transaction, CatalogType::TABLE_ENTRY, info.table)->Cast(); return schema.CreateIndex(transaction, info, table); } @@ -397,8 +397,8 @@ void Catalog::DropEntry(ClientContext &context, DropInfo &info) { } CatalogEntryRetriever retriever(context); - EntryLookupInfo lookup_info(info.type, info.name); - auto lookup = LookupEntry(retriever, info.schema.GetIdentifierName(), lookup_info, info.if_not_found); + EntryLookupInfo lookup_info(info.type, info.Name()); + auto lookup = LookupEntry(retriever, info.Schema().GetIdentifierName(), lookup_info, info.if_not_found); if (!lookup.Found()) { return; } @@ -1239,15 +1239,15 @@ vector> Catalog::GetAllEntries(ClientContext &context, C void Catalog::Alter(CatalogTransaction transaction, AlterInfo &info) { if (transaction.HasContext()) { CatalogEntryRetriever retriever(transaction.GetContext()); - EntryLookupInfo lookup_info(info.GetCatalogType(), info.name); - auto lookup = LookupEntry(retriever, info.schema.GetIdentifierName(), lookup_info, info.if_not_found); + EntryLookupInfo lookup_info(info.GetCatalogType(), info.Name()); + auto lookup = LookupEntry(retriever, info.Schema().GetIdentifierName(), lookup_info, info.if_not_found); if (!lookup.Found()) { return; } return lookup.schema->Alter(transaction, info); } D_ASSERT(info.if_not_found == OnEntryNotFound::THROW_EXCEPTION); - auto &schema = GetSchema(transaction, info.schema); + auto &schema = GetSchema(transaction, info.Schema()); return schema.Alter(transaction, info); } diff --git a/src/duckdb/src/catalog/catalog_entry/copy_function_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/copy_function_catalog_entry.cpp index 1f6604a29..15a5a0521 100644 --- a/src/duckdb/src/catalog/catalog_entry/copy_function_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/copy_function_catalog_entry.cpp @@ -7,7 +7,8 @@ constexpr const char *CopyFunctionCatalogEntry::Name; CopyFunctionCatalogEntry::CopyFunctionCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateCopyFunctionInfo &info) - : StandardEntry(CatalogType::COPY_FUNCTION_ENTRY, schema, catalog, info.name), function(info.function) { + : StandardEntry(CatalogType::COPY_FUNCTION_ENTRY, schema, catalog, info.GetCopyFunctionName()), + function(info.function) { this->extension_name = info.extension_name; } diff --git a/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp b/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp index cc0fe3afb..6c2600e60 100644 --- a/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp @@ -57,7 +57,7 @@ static void FindForeignKeyInformation(TableCatalogEntry &table, AlterForeignKeyT } auto &fk = cond->Cast(); if (fk.info.type == ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE) { - AlterEntryData alter_data(catalog.GetName(), fk.info.schema, fk.info.table, + AlterEntryData alter_data(QualifiedName(catalog.GetName(), fk.info.schema, fk.info.table), OnEntryNotFound::THROW_EXCEPTION); fk_arrays.push_back(make_uniq(std::move(alter_data), name, fk.pk_columns, fk.fk_columns, fk.info.pk_keys, fk.info.fk_keys, @@ -163,7 +163,7 @@ optional_ptr DuckSchemaEntry::CreateTable(CatalogTransaction trans // make a dependency between this table and referenced table auto &set = GetCatalogSet(CatalogType::TABLE_ENTRY); - info.dependencies.AddDependency(*set.GetEntry(transaction, fk_info.name)); + info.dependencies.AddDependency(*set.GetEntry(transaction, fk_info.Name())); } for (auto &dep : info.dependencies.Set()) { table->dependencies.AddDependency(dep); @@ -181,7 +181,7 @@ optional_ptr DuckSchemaEntry::CreateFunction(CatalogTransaction tr if (info.on_conflict == OnCreateConflict::ALTER_ON_CONFLICT) { // check if the original entry exists auto &catalog_set = GetCatalogSet(info.type); - auto current_entry = catalog_set.GetEntry(transaction, info.name); + auto current_entry = catalog_set.GetEntry(transaction, info.GetFunctionName()); if (current_entry) { // the current entry exists - alter it instead auto alter_info = info.GetAlterInfo(); @@ -255,8 +255,8 @@ optional_ptr DuckSchemaEntry::CreateIndex(CatalogTransaction trans // currently, we can not alter PK/FK/UNIQUE constraints // concurrency-safe name checks against other INDEX catalog entries happens in the catalog if (info.on_conflict != OnCreateConflict::IGNORE_ON_CONFLICT && - !table.GetStorage().IndexNameIsUnique(info.index_name.GetIdentifierName())) { - throw CatalogException("An index with the name " + info.index_name + " already exists!"); + !table.GetStorage().IndexNameIsUnique(info.GetIndexName().GetIdentifierName())) { + throw CatalogException("An index with the name " + info.GetIndexName() + " already exists!"); } auto index = make_uniq(catalog, *this, info, table); @@ -307,7 +307,7 @@ void DuckSchemaEntry::Alter(CatalogTransaction transaction, AlterInfo &info) { throw CatalogException("Couldn't change ownership!"); } } else { - auto &name = info.name; + auto &name = info.Name(); if (!set.AlterEntry(transaction, name, info)) { throw CatalogException::MissingEntry(type, name, string()); } @@ -333,12 +333,12 @@ void DuckSchemaEntry::DropEntry(ClientContext &context, DropInfo &info) { // first find the entry auto transaction = GetCatalogTransaction(context); - auto existing_entry = set.GetEntry(transaction, info.name); + auto existing_entry = set.GetEntry(transaction, info.Name()); if (!existing_entry) { - throw InternalException("Failed to drop entry \"%s\" - entry could not be found", info.name); + throw InternalException("Failed to drop entry \"%s\" - entry could not be found", info.Name()); } if (existing_entry->type != info.type) { - throw CatalogException("Existing object %s is of type %s, trying to drop type %s", info.name, + throw CatalogException("Existing object %s is of type %s, trying to drop type %s", info.Name(), CatalogTypeToString(existing_entry->type), CatalogTypeToString(info.type)); } @@ -350,7 +350,7 @@ void DuckSchemaEntry::DropEntry(ClientContext &context, DropInfo &info) { } OnDropEntry(transaction, *existing_entry); - if (!set.DropEntry(transaction, info.name, info.cascade, info.allow_drop_internal)) { + if (!set.DropEntry(transaction, info.Name(), info.cascade, info.allow_drop_internal)) { throw InternalException("Could not drop element because of an internal error"); } diff --git a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp index 8f8c88ab1..e1c399b63 100644 --- a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp @@ -35,7 +35,7 @@ IndexStorageInfo GetIndexInfo(const IndexConstraintType type, const bool v1_0_0_ const idx_t id) { auto &table_info = info->Cast(); auto constraint_name = EnumUtil::ToString(type) + "_"; - auto name = constraint_name + table_info.table + "_" + to_string(id); + auto name = constraint_name + table_info.GetTableName() + "_" + to_string(id); IndexStorageInfo index_info {Identifier(name)}; if (!v1_0_0_storage) { index_info.options.emplace("v1_0_0_storage", v1_0_0_storage); @@ -1216,7 +1216,7 @@ unique_ptr DuckTableEntry::AddForeignKeyConstraint(AlterForeignKey } ForeignKeyInfo fk_info; fk_info.type = ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE; - fk_info.schema = info.schema; + fk_info.schema = info.Schema(); fk_info.table = info.fk_table; fk_info.pk_keys = info.pk_keys; fk_info.fk_keys = info.fk_keys; @@ -1320,7 +1320,8 @@ unique_ptr DuckTableEntry::AddConstraint(ClientContext &context, A // We create a physical table with a new constraint and a new unique index. const auto binder = Binder::CreateBinder(context); - const auto bound_constraint = binder->BindConstraint(*info.constraint, table_info.table, table_info.columns); + const auto bound_constraint = + binder->BindConstraint(*info.constraint, table_info.GetTableName(), table_info.columns); const auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info), schema, info.bind_mode); auto new_storage = make_shared_ptr(context, *storage, *bound_constraint); diff --git a/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp index 7ca1f369d..9b3519377 100644 --- a/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp @@ -3,8 +3,9 @@ namespace duckdb { IndexCatalogEntry::IndexCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateIndexInfo &info) - : StandardEntry(CatalogType::INDEX_ENTRY, schema, catalog, info.index_name), sql(info.sql), options(info.options), - index_type(info.index_type), index_constraint_type(info.constraint_type), column_ids(info.column_ids) { + : StandardEntry(CatalogType::INDEX_ENTRY, schema, catalog, info.GetIndexName()), sql(info.sql), + options(info.options), index_type(info.index_type), index_constraint_type(info.constraint_type), + column_ids(info.column_ids) { this->temporary = info.temporary; this->dependencies = info.dependencies; this->comment = info.comment; @@ -20,12 +21,12 @@ IndexCatalogEntry::IndexCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schem unique_ptr IndexCatalogEntry::GetInfo() const { auto result = make_uniq(); - result->schema = GetSchemaName(); + result->SchemaMutable() = GetSchemaName(); result->table = GetTableName(); result->temporary = temporary; result->sql = sql; - result->index_name = name; + result->SetIndexName(name); result->index_type = index_type; result->constraint_type = index_constraint_type; result->column_ids = column_ids; diff --git a/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp index 72c29cda6..77dc20362 100644 --- a/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp @@ -41,9 +41,9 @@ unique_ptr TableMacroCatalogEntry::Copy(ClientContext &context) co unique_ptr MacroCatalogEntry::GetInfo() const { auto info = make_uniq(type); - info->catalog = catalog.GetName(); - info->schema = schema.name; - info->name = name; + info->CatalogMutable() = catalog.GetName(); + info->SchemaMutable() = schema.name; + info->SetFunctionName(name); for (auto &function : macros) { info->macros.push_back(function->Copy()); } diff --git a/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp index a5c29faeb..7073b4acd 100644 --- a/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp @@ -11,7 +11,7 @@ namespace duckdb { SchemaCatalogEntry::SchemaCatalogEntry(Catalog &catalog, CreateSchemaInfo &info) - : InCatalogEntry(CatalogType::SCHEMA_ENTRY, catalog, info.schema) { + : InCatalogEntry(CatalogType::SCHEMA_ENTRY, catalog, info.Schema()) { this->internal = info.internal; this->comment = info.comment; this->tags = info.tags; @@ -61,7 +61,7 @@ CatalogSet::EntryLookup SchemaCatalogEntry::LookupEntryDetailed(CatalogTransacti unique_ptr SchemaCatalogEntry::GetInfo() const { auto result = make_uniq(); - result->schema = name; + result->SchemaMutable() = name; result->comment = comment; result->tags = tags; return std::move(result); diff --git a/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp index 43b771f43..1c8e4f59a 100644 --- a/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp @@ -21,7 +21,7 @@ SequenceData::SequenceData(CreateSequenceInfo &info) } SequenceCatalogEntry::SequenceCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateSequenceInfo &info) - : StandardEntry(CatalogType::SEQUENCE_ENTRY, schema, catalog, info.name), data(info) { + : StandardEntry(CatalogType::SEQUENCE_ENTRY, schema, catalog, info.GetSequenceName()), data(info) { this->temporary = info.temporary; this->comment = info.comment; this->tags = info.tags; @@ -93,9 +93,9 @@ unique_ptr SequenceCatalogEntry::GetInfo() const { auto seq_data = GetData(); auto result = make_uniq(); - result->catalog = catalog.GetName(); - result->schema = schema.name; - result->name = name; + result->CatalogMutable() = catalog.GetName(); + result->SchemaMutable() = schema.name; + result->SetSequenceName(name); result->usage_count = seq_data.usage_count; result->increment = seq_data.increment; result->min_value = seq_data.min_value; diff --git a/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp index d42a388d0..6360b884c 100644 --- a/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp @@ -27,7 +27,7 @@ namespace duckdb { constexpr const char *TableCatalogEntry::Name; TableCatalogEntry::TableCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info) - : StandardEntry(CatalogType::TABLE_ENTRY, schema, catalog, info.table), columns(std::move(info.columns)), + : StandardEntry(CatalogType::TABLE_ENTRY, schema, catalog, info.GetTableName()), columns(std::move(info.columns)), constraints(std::move(info.constraints)) { this->temporary = info.temporary; this->dependencies = info.dependencies; @@ -96,9 +96,9 @@ vector TableCatalogEntry::GetTypes() const { unique_ptr TableCatalogEntry::GetInfo() const { auto result = make_uniq(); - result->catalog = catalog.GetName(); - result->schema = schema.name; - result->table = name; + result->CatalogMutable() = catalog.GetName(); + result->SchemaMutable() = schema.name; + result->SetTableName(name); result->columns = columns.Copy(); result->constraints.reserve(constraints.size()); result->dependencies = dependencies; diff --git a/src/duckdb/src/catalog/catalog_entry/trigger_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/trigger_catalog_entry.cpp index 8ac215d86..62b4ea665 100644 --- a/src/duckdb/src/catalog/catalog_entry/trigger_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/trigger_catalog_entry.cpp @@ -9,7 +9,7 @@ namespace duckdb { TriggerCatalogEntry::TriggerCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTriggerInfo &info) - : StandardEntry(CatalogType::TRIGGER_ENTRY, schema, catalog, info.trigger_name), + : StandardEntry(CatalogType::TRIGGER_ENTRY, schema, catalog, info.GetTriggerName()), base_table(unique_ptr_cast(info.base_table->Copy())), timing(info.timing), event_type(info.event_type), columns(info.columns), for_each(info.for_each), referencing_new_table(info.referencing_new_table), referencing_old_table(info.referencing_old_table), @@ -27,9 +27,9 @@ unique_ptr TriggerCatalogEntry::Copy(ClientContext &context) const unique_ptr TriggerCatalogEntry::GetInfo() const { auto result = make_uniq(); - result->catalog = catalog.GetName(); - result->schema = schema.name; - result->trigger_name = name; + result->CatalogMutable() = catalog.GetName(); + result->SchemaMutable() = schema.name; + result->SetTriggerName(name); result->base_table = unique_ptr_cast(base_table->Copy()); result->timing = timing; result->event_type = event_type; @@ -62,7 +62,7 @@ string TriggerCatalogEntry::ToSQL() const { } } ss << " ON "; - ss << ParseInfo::QualifierToString(base_table->catalog_name, base_table->schema_name, base_table->table_name); + ss << ParseInfo::QualifierToString(base_table->Catalog(), base_table->Schema(), base_table->Table()); if (!referencing_new_table.empty() || !referencing_old_table.empty()) { ss << " REFERENCING"; if (!referencing_new_table.empty()) { diff --git a/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp index 573200c5d..54a316f6d 100644 --- a/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp @@ -12,7 +12,7 @@ namespace duckdb { constexpr const char *TypeCatalogEntry::Name; TypeCatalogEntry::TypeCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info) - : StandardEntry(CatalogType::TYPE_ENTRY, schema, catalog, info.name), user_type(info.type), + : StandardEntry(CatalogType::TYPE_ENTRY, schema, catalog, info.GetTypeName()), user_type(info.type), bind_function(info.bind_function) { this->temporary = info.temporary; this->internal = info.internal; @@ -31,9 +31,9 @@ unique_ptr TypeCatalogEntry::Copy(ClientContext &context) const { unique_ptr TypeCatalogEntry::GetInfo() const { auto result = make_uniq(); - result->catalog = catalog.GetName(); - result->schema = schema.name; - result->name = name; + result->CatalogMutable() = catalog.GetName(); + result->SchemaMutable() = schema.name; + result->SetTypeName(name); result->type = user_type; result->extension_name = extension_name; result->dependencies = dependencies; diff --git a/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp b/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp index c0df1cbf9..03e3d6b65 100644 --- a/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp @@ -42,14 +42,14 @@ void ViewCatalogEntry::Initialize(CreateViewInfo &info) { } ViewCatalogEntry::ViewCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateViewInfo &info) - : StandardEntry(CatalogType::VIEW_ENTRY, schema, catalog, info.view_name), bind_state(ViewBindState::UNBOUND) { + : StandardEntry(CatalogType::VIEW_ENTRY, schema, catalog, info.GetViewName()), bind_state(ViewBindState::UNBOUND) { Initialize(info); } unique_ptr ViewCatalogEntry::GetInfo() const { auto result = make_uniq(); - result->schema = schema.name; - result->view_name = name; + result->SchemaMutable() = schema.name; + result->SetViewName(name); result->sql = sql; result->query = query ? unique_ptr_cast(query->Copy()) : nullptr; result->aliases = aliases; diff --git a/src/duckdb/src/catalog/catalog_set.cpp b/src/duckdb/src/catalog/catalog_set.cpp index fd931168f..7e4cad533 100644 --- a/src/duckdb/src/catalog/catalog_set.cpp +++ b/src/duckdb/src/catalog/catalog_set.cpp @@ -245,7 +245,7 @@ bool CatalogSet::AlterOwnership(CatalogTransaction transaction, ChangeOwnershipI // lock the catalog for writing unique_lock write_lock(catalog.GetWriteLock()); - auto entry = GetEntryInternal(transaction, info.name); + auto entry = GetEntryInternal(transaction, info.Name()); if (!entry) { return false; } diff --git a/src/duckdb/src/catalog/default/default_functions.cpp b/src/duckdb/src/catalog/default/default_functions.cpp index 454802727..277df44b0 100644 --- a/src/duckdb/src/catalog/default/default_functions.cpp +++ b/src/duckdb/src/catalog/default/default_functions.cpp @@ -250,8 +250,8 @@ unique_ptr DefaultFunctionGenerator::CreateInternalMacroInfo(co } } bind_info->macros = std::move(macro_info.macros); - bind_info->schema = Identifier(default_macro.schema); - bind_info->name = Identifier(default_macro.name); + bind_info->SchemaMutable() = Identifier(default_macro.schema); + bind_info->SetFunctionName(Identifier(default_macro.name)); bind_info->temporary = true; bind_info->internal = true; return bind_info; diff --git a/src/duckdb/src/catalog/default/default_schemas.cpp b/src/duckdb/src/catalog/default/default_schemas.cpp index b1f66d9a8..ed97c30af 100644 --- a/src/duckdb/src/catalog/default/default_schemas.cpp +++ b/src/duckdb/src/catalog/default/default_schemas.cpp @@ -27,7 +27,7 @@ unique_ptr DefaultSchemaGenerator::CreateDefaultEntry(CatalogTrans const Identifier &entry_name) { if (IsDefaultSchema(entry_name)) { CreateSchemaInfo info; - info.schema = Identifier(StringUtil::Lower(entry_name.GetIdentifierName())); + info.SchemaMutable() = Identifier(StringUtil::Lower(entry_name.GetIdentifierName())); info.internal = true; return make_uniq_base(catalog, info); } diff --git a/src/duckdb/src/catalog/default/default_table_functions.cpp b/src/duckdb/src/catalog/default/default_table_functions.cpp index 5f5c403e5..3b859fe02 100644 --- a/src/duckdb/src/catalog/default/default_table_functions.cpp +++ b/src/duckdb/src/catalog/default/default_table_functions.cpp @@ -110,8 +110,8 @@ DefaultTableFunctionGenerator::CreateInternalTableMacroInfo(const DefaultTableMa auto type = CatalogType::TABLE_MACRO_ENTRY; auto bind_info = make_uniq(type); - bind_info->schema = Identifier(default_macro.schema); - bind_info->name = Identifier(default_macro.name); + bind_info->SchemaMutable() = Identifier(default_macro.schema); + bind_info->SetFunctionName(Identifier(default_macro.name)); bind_info->temporary = true; bind_info->internal = true; bind_info->macros.push_back(std::move(function)); diff --git a/src/duckdb/src/catalog/default/default_types.cpp b/src/duckdb/src/catalog/default/default_types.cpp index f19c913e2..2a2dd617e 100644 --- a/src/duckdb/src/catalog/default/default_types.cpp +++ b/src/duckdb/src/catalog/default/default_types.cpp @@ -604,7 +604,7 @@ unique_ptr DefaultTypeGenerator::CreateDefaultEntry(ClientContext return nullptr; } CreateTypeInfo info; - info.name = entry_name; + info.SetTypeName(entry_name); info.type = LogicalType(entry->type); info.internal = true; info.temporary = true; diff --git a/src/duckdb/src/catalog/default/default_views.cpp b/src/duckdb/src/catalog/default/default_views.cpp index 6323beb10..5a8f7f99f 100644 --- a/src/duckdb/src/catalog/default/default_views.cpp +++ b/src/duckdb/src/catalog/default/default_views.cpp @@ -227,8 +227,8 @@ static unique_ptr GetDefaultView(ClientContext &context, const I for (idx_t index = 0; internal_views[index].name != nullptr; index++) { if (internal_views[index].schema == schema && internal_views[index].name == name) { auto result = make_uniq(); - result->schema = Identifier(schema); - result->view_name = Identifier(name); + result->SchemaMutable() = Identifier(schema); + result->SetViewName(Identifier(name)); result->sql = internal_views[index].sql; result->temporary = true; result->internal = true; diff --git a/src/duckdb/src/catalog/duck_catalog.cpp b/src/duckdb/src/catalog/duck_catalog.cpp index 44bb3054a..6a2511b08 100644 --- a/src/duckdb/src/catalog/duck_catalog.cpp +++ b/src/duckdb/src/catalog/duck_catalog.cpp @@ -29,7 +29,7 @@ void DuckCatalog::Initialize(bool load_builtin) { // create the default schema CreateSchemaInfo info; - info.schema = Identifier::DefaultSchema(); + info.SchemaMutable() = Identifier::DefaultSchema(); info.internal = true; info.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT; CreateSchema(data, info); @@ -63,29 +63,29 @@ optional_ptr DuckCatalog::GetDependencyManager() { optional_ptr DuckCatalog::CreateSchemaInternal(CatalogTransaction transaction, CreateSchemaInfo &info) { LogicalDependencyList dependencies; - if (!info.internal && DefaultSchemaGenerator::IsDefaultSchema(info.schema)) { + if (!info.internal && DefaultSchemaGenerator::IsDefaultSchema(info.Schema())) { return nullptr; } auto entry = make_uniq(*this, info); auto result = entry.get(); - if (!schemas->CreateEntry(transaction, info.schema, std::move(entry), dependencies)) { + if (!schemas->CreateEntry(transaction, info.Schema(), std::move(entry), dependencies)) { return nullptr; } return result; } optional_ptr DuckCatalog::CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) { - D_ASSERT(!info.schema.empty()); + D_ASSERT(!info.Schema().empty()); auto result = CreateSchemaInternal(transaction, info); if (!result) { switch (info.on_conflict) { case OnCreateConflict::ERROR_ON_CONFLICT: - throw CatalogException::EntryAlreadyExists(CatalogType::SCHEMA_ENTRY, info.schema); + throw CatalogException::EntryAlreadyExists(CatalogType::SCHEMA_ENTRY, info.Schema()); case OnCreateConflict::REPLACE_ON_CONFLICT: { DropInfo drop_info; drop_info.type = CatalogType::SCHEMA_ENTRY; - drop_info.catalog = info.catalog; - drop_info.name = info.schema; + drop_info.CatalogMutable() = info.Catalog(); + drop_info.NameMutable() = info.Schema(); DropSchema(transaction, drop_info); result = CreateSchemaInternal(transaction, info); if (!result) { @@ -104,10 +104,10 @@ optional_ptr DuckCatalog::CreateSchema(CatalogTransaction transact } void DuckCatalog::DropSchema(CatalogTransaction transaction, DropInfo &info) { - D_ASSERT(!info.name.empty()); - if (!schemas->DropEntry(transaction, info.name, info.cascade)) { + D_ASSERT(!info.Name().empty()); + if (!schemas->DropEntry(transaction, info.Name(), info.cascade)) { if (info.if_not_found == OnEntryNotFound::THROW_EXCEPTION) { - throw CatalogException::MissingEntry(CatalogType::SCHEMA_ENTRY, info.name, string()); + throw CatalogException::MissingEntry(CatalogType::SCHEMA_ENTRY, info.Name(), string()); } } } diff --git a/src/duckdb/src/common/arrow/arrow_type_extension.cpp b/src/duckdb/src/common/arrow/arrow_type_extension.cpp index bb43c8d29..6c362e671 100644 --- a/src/duckdb/src/common/arrow/arrow_type_extension.cpp +++ b/src/duckdb/src/common/arrow/arrow_type_extension.cpp @@ -7,8 +7,7 @@ #include "duckdb/common/arrow/schema_metadata.hpp" #include "duckdb/common/types/vector.hpp" #include "duckdb/common/types/geometry_crs.hpp" - -#include "yyjson.hpp" +#include "duckdb/common/json_document.hpp" namespace duckdb { @@ -381,44 +380,29 @@ struct ArrowGeometry { unique_ptr duckdb_crs; if (!extension_metadata.empty()) { - unique_ptr doc( - duckdb_yyjson::yyjson_read(extension_metadata.data(), extension_metadata.size(), - duckdb_yyjson::YYJSON_READ_NOFLAG), - duckdb_yyjson::yyjson_doc_free); + JSONParseError error; + auto doc = JSONDocument::TryParse(extension_metadata.data(), extension_metadata.size(), error); if (!doc) { throw SerializationException("Invalid JSON in GeoArrow metadata"); } - duckdb_yyjson::yyjson_val *val = yyjson_doc_get_root(doc.get()); - if (!yyjson_is_obj(val)) { + auto val = doc->GetRoot(); + if (!val.IsObject()) { throw SerializationException("Invalid GeoArrow metadata: not a JSON object"); } - duckdb_yyjson::yyjson_val *edges = yyjson_obj_get(val, "edges"); - if (edges && yyjson_is_str(edges) && std::strcmp(yyjson_get_str(edges), "planar") != 0) { + auto edges = val.GetMember("edges"); + if (edges.IsString() && edges.GetString() != "planar") { throw NotImplementedException("Can't import non-planar edges"); } // Pick out the CRS if present - duckdb_yyjson::yyjson_val *crs = yyjson_obj_get(val, "crs"); - - if (crs) { - if (duckdb_yyjson::yyjson_is_str(crs)) { - const char *crs_str = duckdb_yyjson::yyjson_get_str(crs); - duckdb_crs = CoordinateReferenceSystem::TryIdentify(context, crs_str); - } else if (duckdb_yyjson::yyjson_is_obj(crs)) { - // Stringify the object - duckdb_yyjson::yyjson_write_flag write_flags = duckdb_yyjson::YYJSON_WRITE_NOFLAG; - size_t len = 0; - const auto crs_str = duckdb_yyjson::yyjson_val_write(crs, write_flags, &len); - if (crs_str) { - const auto str = string(crs_str, len); - free(crs_str); - duckdb_crs = CoordinateReferenceSystem::TryIdentify(context, str); - } else { - throw SerializationException("Could not serialize CRS object from GeoArrow metadata"); - } - } + auto crs = val.GetMember("crs"); + if (crs.IsString()) { + duckdb_crs = CoordinateReferenceSystem::TryIdentify(context, crs.GetString()); + } else if (crs.IsObject()) { + // Stringify the object + duckdb_crs = CoordinateReferenceSystem::TryIdentify(context, crs.ToString(JSONWriteFlags::NONE)); } } @@ -439,7 +423,7 @@ struct ArrowGeometry { throw InvalidInputException("Arrow extension type \"%s\" not supported for geoarrow.wkb", format.c_str()); } - static void WriteCRS(duckdb_yyjson::yyjson_mut_doc *doc, const CoordinateReferenceSystem &crs, + static void WriteCRS(JSONWriter &writer, JSONMutableValue &root, const CoordinateReferenceSystem &crs, ClientContext &context) { // Try to convert to preferred formats, in order auto converted = CoordinateReferenceSystem::TryConvert(context, crs, CoordinateReferenceSystemType::PROJJSON); @@ -459,35 +443,28 @@ struct ArrowGeometry { const auto &crs_def = converted ? converted->GetDefinition() : crs.GetDefinition(); const auto &crs_type = converted ? converted->GetType() : crs.GetType(); - const auto root = duckdb_yyjson::yyjson_mut_doc_get_root(doc); - switch (crs_type) { case CoordinateReferenceSystemType::PROJJSON: { - const auto projjson_doc = - duckdb_yyjson::yyjson_read(crs_def.c_str(), crs_def.size(), duckdb_yyjson::YYJSON_READ_NOFLAG); + JSONParseError error; + auto projjson_doc = JSONDocument::TryParse(crs_def.c_str(), crs_def.size(), error); if (projjson_doc) { - const auto projjson_val = duckdb_yyjson::yyjson_doc_get_root(projjson_doc); - const auto projjson_obj = duckdb_yyjson::yyjson_val_mut_copy(doc, projjson_val); - - duckdb_yyjson::yyjson_mut_obj_add_str(doc, root, "crs_type", "projjson"); - duckdb_yyjson::yyjson_mut_obj_add_val(doc, root, "crs", projjson_obj); - - duckdb_yyjson::yyjson_doc_free(projjson_doc); + root.AddString("crs_type", "projjson"); + root.Add("crs", writer.CreateCopy(projjson_doc->GetRoot())); } else { throw SerializationException("Could not parse PROJJSON CRS for GeoArrow metadata"); } } break; case CoordinateReferenceSystemType::AUTH_CODE: { - duckdb_yyjson::yyjson_mut_obj_add_str(doc, root, "crs_type", "authority_code"); - duckdb_yyjson::yyjson_mut_obj_add_str(doc, root, "crs", crs_def.c_str()); + root.AddString("crs_type", "authority_code"); + root.AddString("crs", crs_def); } break; case CoordinateReferenceSystemType::SRID: { - duckdb_yyjson::yyjson_mut_obj_add_str(doc, root, "crs_type", "srid"); - duckdb_yyjson::yyjson_mut_obj_add_str(doc, root, "crs", crs_def.c_str()); + root.AddString("crs_type", "srid"); + root.AddString("crs", crs_def); } break; case CoordinateReferenceSystemType::WKT2_2019: { - duckdb_yyjson::yyjson_mut_obj_add_str(doc, root, "crs_type", "wkt2:2019"); - duckdb_yyjson::yyjson_mut_obj_add_str(doc, root, "crs", crs_def.c_str()); + root.AddString("crs_type", "wkt2:2019"); + root.AddString("crs", crs_def); } break; default: throw SerializationException("Could not serialize CRS of type %d for GeoArrow metadata", @@ -502,29 +479,15 @@ struct ArrowGeometry { schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_EXTENSION_NAME, "geoarrow.wkb"); // Make a CRS entry if the type has a CRS - const auto doc = duckdb_yyjson::yyjson_mut_doc_new(nullptr); - const auto root = duckdb_yyjson::yyjson_mut_obj(doc); - duckdb_yyjson::yyjson_mut_doc_set_root(doc, root); + JSONWriter writer; + auto root = writer.CreateObject(); + writer.SetRoot(root); if (GeoType::HasCRS(type)) { - try { - WriteCRS(doc, GeoType::GetCRS(type), context); - } catch (...) { - duckdb_yyjson::yyjson_mut_doc_free(doc); - throw; - } + WriteCRS(writer, root, GeoType::GetCRS(type), context); } - size_t json_size = 0; - const auto json_text = duckdb_yyjson::yyjson_mut_write(doc, duckdb_yyjson::YYJSON_WRITE_NOFLAG, &json_size); - if (json_text) { - schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_METADATA_KEY, json_text); - duckdb_yyjson::yyjson_mut_doc_free(doc); - free(json_text); - } else { - duckdb_yyjson::yyjson_mut_doc_free(doc); - schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_METADATA_KEY, "{}"); - } + schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_METADATA_KEY, writer.ToString(JSONWriteFlags::NONE)); root_holder.metadata_info.emplace_back(schema_metadata.SerializeMetadata()); schema.metadata = root_holder.metadata_info.back().get(); diff --git a/src/duckdb/src/common/arrow/schema_metadata.cpp b/src/duckdb/src/common/arrow/schema_metadata.cpp index 4e17c4a47..59ea6a0ee 100644 --- a/src/duckdb/src/common/arrow/schema_metadata.cpp +++ b/src/duckdb/src/common/arrow/schema_metadata.cpp @@ -38,8 +38,6 @@ ArrowSchemaMetadata::ArrowSchemaMetadata(const char *metadata) { } ArrowSchemaMetadata::ArrowSchemaMetadata() { - // Always initialize out metadata map - extension_metadata_map = make_uniq(); } void ArrowSchemaMetadata::AddOption(const string &key, const string &value) { @@ -66,9 +64,9 @@ ArrowSchemaMetadata ArrowSchemaMetadata::NonCanonicalType(const string &type_nam ArrowSchemaMetadata metadata; metadata.AddOption(ARROW_EXTENSION_NAME, ArrowExtensionMetadata::ARROW_EXTENSION_NON_CANONICAL); // We have to set the metadata key with type_name and vendor_name. - metadata.extension_metadata_map->AddObjectEntry("vendor_name", make_uniq(vendor_name)); - metadata.extension_metadata_map->AddObjectEntry("type_name", make_uniq(type_name)); - metadata.AddOption(ARROW_METADATA_KEY, StringUtil::ToComplexJSONMap(*metadata.extension_metadata_map)); + metadata.extension_metadata_map["vendor_name"] = vendor_name; + metadata.extension_metadata_map["type_name"] = type_name; + metadata.AddOption(ARROW_METADATA_KEY, StringUtil::ToJSONMap(metadata.extension_metadata_map)); return metadata; } @@ -77,9 +75,14 @@ bool ArrowSchemaMetadata::HasExtension() const { return !arrow_extension.empty(); } +static string GetMapValue(const unordered_map &map, const string &key) { + auto it = map.find(key); + return it == map.end() ? string() : it->second; +} + ArrowExtensionMetadata ArrowSchemaMetadata::GetExtensionInfo(string format) { - return {schema_metadata_map[ARROW_EXTENSION_NAME], extension_metadata_map->GetValue("vendor_name"), - extension_metadata_map->GetValue("type_name"), std::move(format)}; + return {schema_metadata_map[ARROW_EXTENSION_NAME], GetMapValue(extension_metadata_map, "vendor_name"), + GetMapValue(extension_metadata_map, "type_name"), std::move(format)}; } unsafe_unique_array ArrowSchemaMetadata::SerializeMetadata() const { diff --git a/src/duckdb/src/common/box_renderer.cpp b/src/duckdb/src/common/box_renderer.cpp index b2b839a7b..3a36a1682 100644 --- a/src/duckdb/src/common/box_renderer.cpp +++ b/src/duckdb/src/common/box_renderer.cpp @@ -103,6 +103,33 @@ const string &StringResultRenderer::str() { return result; } +PrinterResultRenderer::PrinterResultRenderer(OutputStream stream) : stream(stream) { +} + +void PrinterResultRenderer::RenderLayout(const string &text) { + Printer::RawPrint(stream, text); +} + +void PrinterResultRenderer::RenderColumnName(const string &text) { + Printer::RawPrint(stream, text); +} + +void PrinterResultRenderer::RenderType(const string &text) { + Printer::RawPrint(stream, text); +} + +void PrinterResultRenderer::RenderValue(const string &text, const LogicalType &type) { + Printer::RawPrint(stream, text); +} + +void PrinterResultRenderer::RenderNull(const string &text, const LogicalType &type) { + Printer::RawPrint(stream, text); +} + +void PrinterResultRenderer::RenderFooter(const string &text) { + Printer::RawPrint(stream, text); +} + //===--------------------------------------------------------------------===// // Box Renderer Implementation //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/common/complex_json.cpp b/src/duckdb/src/common/complex_json.cpp deleted file mode 100644 index e72efb983..000000000 --- a/src/duckdb/src/common/complex_json.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "duckdb/common/complex_json.hpp" - -namespace duckdb { -ComplexJSON::ComplexJSON(const string &str) : str_value(str), type(ComplexJSONType::VALUE) { -} - -ComplexJSON::ComplexJSON() : type(ComplexJSONType::VALUE) { -} - -void ComplexJSON::AddObjectEntry(const string &key, unique_ptr object) { - type = ComplexJSONType::OBJECT; - obj_value[key] = std::move(object); -} - -void ComplexJSON::AddArrayElement(unique_ptr object) { - type = ComplexJSONType::ARRAY; - arr_value.push_back(std::move(object)); -} - -ComplexJSON &ComplexJSON::GetObject(const string &key) { - if (type == ComplexJSONType::OBJECT) { - if (obj_value.find(key) == obj_value.end()) { - throw InvalidInputException("Complex JSON Key not found"); - } - return *obj_value[key]; - } - throw InvalidInputException("ComplexJson is not an object"); -} - -ComplexJSON &ComplexJSON::GetArrayElement(const idx_t &index) { - if (type == ComplexJSONType::ARRAY) { - if (index >= arr_value.size()) { - throw InvalidInputException("Complex JSON array element out of bounds"); - } - return *arr_value[index]; - } - throw InvalidInputException("ComplexJson is not an array"); -} - -unordered_map ComplexJSON::Flatten() const { - unordered_map result; - for (auto &obj : obj_value) { - result[obj.first] = obj.second->GetValueRecursive(*obj.second); - } - return result; -} - -} // namespace duckdb diff --git a/src/duckdb/src/common/encryption_state.cpp b/src/duckdb/src/common/encryption_state.cpp index a63bd9456..5cd3ee499 100644 --- a/src/duckdb/src/common/encryption_state.cpp +++ b/src/duckdb/src/common/encryption_state.cpp @@ -2,6 +2,46 @@ namespace duckdb { +idx_t CryptoHash::GetDigestSize(CryptoHashFunction function) { + switch (function) { + case CryptoHashFunction::MD5: + return 16; + case CryptoHashFunction::SHA1: + return 20; + case CryptoHashFunction::SHA256: + return 32; + default: + throw InternalException("Unsupported crypto hash function"); + } +} + +idx_t CryptoHash::GetHexDigestSize(CryptoHashFunction function) { + return GetDigestSize(function) * 2; +} + +void CryptoHash::ToHex(const_data_ptr_t input, idx_t input_len, char *output) { + static constexpr char HEX_CODES[] = "0123456789abcdef"; + for (idx_t input_idx = 0, output_idx = 0; input_idx < input_len; input_idx++) { + auto byte = input[input_idx]; + output[output_idx++] = HEX_CODES[(byte >> 4) & 0xf]; + output[output_idx++] = HEX_CODES[byte & 0xf]; + } +} + +CryptoHashState::CryptoHashState(CryptoHashFunction function_p) : function(function_p) { +} + +CryptoHashState::~CryptoHashState() { +} + +void CryptoHashState::HashHex(const_data_ptr_t input, idx_t input_len, char *output) { + auto digest_size = CryptoHash::GetDigestSize(function); + data_t digest[CryptoHash::MAX_DIGEST_SIZE]; + D_ASSERT(digest_size <= CryptoHash::MAX_DIGEST_SIZE); + Hash(input, input_len, digest); + CryptoHash::ToHex(digest, digest_size, output); +} + EncryptionState::EncryptionState(unique_ptr metadata_p) : metadata(std::move(metadata_p)) { } @@ -28,4 +68,46 @@ void EncryptionState::GenerateRandomData(data_ptr_t, idx_t) { throw NotImplementedException("EncryptionState Abstract Class is called"); } +void EncryptionUtil::Hash(CryptoHashFunction, const_data_ptr_t, idx_t, data_ptr_t) const { + throw NotImplementedException("EncryptionUtil does not implement hashing"); +} + +void EncryptionUtil::HashHex(CryptoHashFunction function, const_data_ptr_t input, idx_t input_len, char *output) const { + auto digest_size = CryptoHash::GetDigestSize(function); + data_t digest[CryptoHash::MAX_DIGEST_SIZE]; + D_ASSERT(digest_size <= CryptoHash::MAX_DIGEST_SIZE); + Hash(function, input, input_len, digest); + CryptoHash::ToHex(digest, digest_size, output); +} + +class EncryptionUtilCryptoHashState : public CryptoHashState { +public: + EncryptionUtilCryptoHashState(const EncryptionUtil &encryption_util_p, CryptoHashFunction function) + : CryptoHashState(function), encryption_util(encryption_util_p) { + } + + void Hash(const_data_ptr_t input, idx_t input_len, data_ptr_t output) override { + encryption_util.Hash(GetFunction(), input, input_len, output); + } + +private: + const EncryptionUtil &encryption_util; +}; + +unique_ptr EncryptionUtil::CreateHashState(CryptoHashFunction function) const { + return make_uniq(*this, function); +} + +void EncryptionUtil::Hmac(CryptoHashFunction, const_data_ptr_t, idx_t, const_data_ptr_t, idx_t, data_ptr_t) const { + throw NotImplementedException("EncryptionUtil does not implement HMAC"); +} + +bool EncryptionUtil::SupportsHash(CryptoHashFunction) const { + return false; +} + +bool EncryptionUtil::SupportsHmac(CryptoHashFunction) const { + return false; +} + } // namespace duckdb diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index c07a15708..41d5502f5 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -15,6 +15,7 @@ #include "duckdb/catalog/catalog_entry/table_column_type.hpp" #include "duckdb/common/box_renderer.hpp" #include "duckdb/common/column_index.hpp" +#include "duckdb/common/encryption_state.hpp" #include "duckdb/common/enums/access_mode.hpp" #include "duckdb/common/enums/aggregate_handling.hpp" #include "duckdb/common/enums/allow_parser_override.hpp" @@ -62,6 +63,7 @@ #include "duckdb/common/enums/prepared_statement_mode.hpp" #include "duckdb/common/enums/preserve_order.hpp" #include "duckdb/common/enums/quantile_enum.hpp" +#include "duckdb/common/enums/regex_match_operator_semantics.hpp" #include "duckdb/common/enums/relation_type.hpp" #include "duckdb/common/enums/row_group_append_mode.hpp" #include "duckdb/common/enums/set_operation_type.hpp" @@ -91,6 +93,7 @@ #include "duckdb/common/multi_file/multi_file_data.hpp" #include "duckdb/common/multi_file/multi_file_list.hpp" #include "duckdb/common/multi_file/multi_file_options.hpp" +#include "duckdb/common/multi_file/multi_file_states.hpp" #include "duckdb/common/operator/decimal_cast_operators.hpp" #include "duckdb/common/printer.hpp" #include "duckdb/common/sorting/sort_key.hpp" @@ -1494,6 +1497,25 @@ CopyToType EnumUtil::FromString(const char *value) { return static_cast(StringUtil::StringToEnum(GetCopyToTypeValues(), 2, "CopyToType", value)); } +const StringUtil::EnumStringLiteral *GetCryptoHashFunctionValues() { + static constexpr StringUtil::EnumStringLiteral values[] { + { static_cast(CryptoHashFunction::MD5), "MD5" }, + { static_cast(CryptoHashFunction::SHA1), "SHA1" }, + { static_cast(CryptoHashFunction::SHA256), "SHA256" } + }; + return values; +} + +template<> +const char* EnumUtil::ToChars(CryptoHashFunction value) { + return StringUtil::EnumToString(GetCryptoHashFunctionValues(), 3, "CryptoHashFunction", static_cast(value)); +} + +template<> +CryptoHashFunction EnumUtil::FromString(const char *value) { + return static_cast(StringUtil::StringToEnum(GetCryptoHashFunctionValues(), 3, "CryptoHashFunction", value)); +} + const StringUtil::EnumStringLiteral *GetDataFileTypeValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(DataFileType::FILE_DOES_NOT_EXIST), "FILE_DOES_NOT_EXIST" }, @@ -3498,6 +3520,24 @@ MultiFileFileState EnumUtil::FromString(const char *value) { return static_cast(StringUtil::StringToEnum(GetMultiFileFileStateValues(), 5, "MultiFileFileState", value)); } +const StringUtil::EnumStringLiteral *GetMultiFileScanPhaseValues() { + static constexpr StringUtil::EnumStringLiteral values[] { + { static_cast(MultiFileScanPhase::SCHEDULE), "SCHEDULE" }, + { static_cast(MultiFileScanPhase::DECODE), "DECODE" } + }; + return values; +} + +template<> +const char* EnumUtil::ToChars(MultiFileScanPhase value) { + return StringUtil::EnumToString(GetMultiFileScanPhaseValues(), 2, "MultiFileScanPhase", static_cast(value)); +} + +template<> +MultiFileScanPhase EnumUtil::FromString(const char *value) { + return static_cast(StringUtil::StringToEnum(GetMultiFileScanPhaseValues(), 2, "MultiFileScanPhase", value)); +} + const StringUtil::EnumStringLiteral *GetNTypeValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(NType::PREFIX), "PREFIX" }, @@ -4488,6 +4528,24 @@ RecursiveProbeSidePreference EnumUtil::FromString( return static_cast(StringUtil::StringToEnum(GetRecursiveProbeSidePreferenceValues(), 3, "RecursiveProbeSidePreference", value)); } +const StringUtil::EnumStringLiteral *GetRegexMatchOperatorSemanticsValues() { + static constexpr StringUtil::EnumStringLiteral values[] { + { static_cast(RegexMatchOperatorSemantics::PARTIAL), "PARTIAL" }, + { static_cast(RegexMatchOperatorSemantics::FULL), "FULL" } + }; + return values; +} + +template<> +const char* EnumUtil::ToChars(RegexMatchOperatorSemantics value) { + return StringUtil::EnumToString(GetRegexMatchOperatorSemanticsValues(), 2, "RegexMatchOperatorSemantics", static_cast(value)); +} + +template<> +RegexMatchOperatorSemantics EnumUtil::FromString(const char *value) { + return static_cast(StringUtil::StringToEnum(GetRegexMatchOperatorSemanticsValues(), 2, "RegexMatchOperatorSemantics", value)); +} + const StringUtil::EnumStringLiteral *GetRelationTypeValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(RelationType::INVALID_RELATION), "INVALID_RELATION" }, diff --git a/src/duckdb/src/common/error_data.cpp b/src/duckdb/src/common/error_data.cpp index dda2da4e3..55aa1784a 100644 --- a/src/duckdb/src/common/error_data.cpp +++ b/src/duckdb/src/common/error_data.cpp @@ -42,7 +42,7 @@ ErrorData::ErrorData(const string &message) } // JSON-formatted message. - auto info = StringUtil::ParseJSONMap(message)->Flatten(); + auto info = StringUtil::ParseJSONMap(message); for (auto &entry : info) { if (entry.first == "exception_type") { type = Exception::StringToExceptionType(entry.second); diff --git a/src/duckdb/src/common/json_document.cpp b/src/duckdb/src/common/json_document.cpp new file mode 100644 index 000000000..5b0ce012d --- /dev/null +++ b/src/duckdb/src/common/json_document.cpp @@ -0,0 +1,334 @@ +#include "duckdb/common/json_document.hpp" + +#include "duckdb/common/exception.hpp" +#include "yyjson.hpp" + +using namespace duckdb_yyjson; // NOLINT + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Flag translation +//===--------------------------------------------------------------------===// +static yyjson_read_flag TranslateReadFlags(JSONReadFlags flags) { + const auto raw = static_cast(flags); + yyjson_read_flag result = 0; + if (raw & static_cast(JSONReadFlags::ALLOW_INVALID_UNICODE)) { + result |= YYJSON_READ_ALLOW_INVALID_UNICODE; + } + if (raw & static_cast(JSONReadFlags::ALLOW_INF_AND_NAN)) { + result |= YYJSON_READ_ALLOW_INF_AND_NAN; + } + if (raw & static_cast(JSONReadFlags::ALLOW_TRAILING_COMMAS)) { + result |= YYJSON_READ_ALLOW_TRAILING_COMMAS; + } + if (raw & static_cast(JSONReadFlags::BIGNUM_AS_RAW)) { + result |= YYJSON_READ_BIGNUM_AS_RAW; + } + return result; +} + +static yyjson_write_flag TranslateWriteFlags(JSONWriteFlags flags) { + const auto raw = static_cast(flags); + yyjson_write_flag result = 0; + if (raw & static_cast(JSONWriteFlags::ALLOW_INVALID_UNICODE)) { + result |= YYJSON_WRITE_ALLOW_INVALID_UNICODE; + } + if (raw & static_cast(JSONWriteFlags::ALLOW_INF_AND_NAN)) { + result |= YYJSON_WRITE_ALLOW_INF_AND_NAN; + } + if (raw & static_cast(JSONWriteFlags::PRETTY)) { + result |= YYJSON_WRITE_PRETTY; + } + return result; +} + +//===--------------------------------------------------------------------===// +// JSONValue +//===--------------------------------------------------------------------===// +JSONValue::JSONValue() : val(nullptr) { +} + +JSONValue::JSONValue(yyjson_val *val_p) : val(val_p) { +} + +bool JSONValue::IsValid() const { + return val != nullptr; +} + +JSONValueType JSONValue::GetType() const { + if (!val) { + return JSONValueType::INVALID; + } + switch (yyjson_get_tag(val)) { + case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE: + return JSONValueType::ARRAY; + case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE: + return JSONValueType::OBJECT; + case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC: + case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE: + return JSONValueType::STRING; + case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE: + case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE: + return JSONValueType::BOOLEAN; + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT: + return JSONValueType::UNSIGNED_INTEGER; + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT: + return JSONValueType::SIGNED_INTEGER; + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL: + return JSONValueType::DOUBLE; + case YYJSON_TYPE_RAW | YYJSON_SUBTYPE_NONE: + return JSONValueType::RAW; + case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE: + return JSONValueType::JSON_NULL; + default: + return JSONValueType::INVALID; + } +} + +bool JSONValue::IsNull() const { + return val && yyjson_is_null(val); +} + +bool JSONValue::IsString() const { + return val && yyjson_is_str(val); +} + +bool JSONValue::IsArray() const { + return val && yyjson_is_arr(val); +} + +bool JSONValue::IsObject() const { + return val && yyjson_is_obj(val); +} + +bool JSONValue::IsInteger() const { + return val && yyjson_is_int(val); +} + +string JSONValue::GetString() const { + const auto str = yyjson_get_str(val); + const auto len = yyjson_get_len(val); + return string(str, len); +} + +bool JSONValue::GetBoolean() const { + return yyjson_get_bool(val); +} + +uint64_t JSONValue::GetUnsignedInteger() const { + return unsafe_yyjson_get_uint(val); +} + +int64_t JSONValue::GetSignedInteger() const { + return unsafe_yyjson_get_sint(val); +} + +double JSONValue::GetDouble() const { + return unsafe_yyjson_get_real(val); +} + +void JSONValue::IterateArray(const std::function &callback) const { + size_t idx, max; + yyjson_val *child; + yyjson_arr_foreach(val, idx, max, child) { + callback(JSONValue(child)); + } +} + +void JSONValue::IterateObject(const std::function &callback) const { + size_t idx, max; + yyjson_val *key, *value; + yyjson_obj_foreach(val, idx, max, key, value) { + callback(string(yyjson_get_str(key), yyjson_get_len(key)), JSONValue(value)); + } +} + +JSONValue JSONValue::GetMember(const string &key) const { + if (!val || !yyjson_is_obj(val)) { + return JSONValue(); + } + return JSONValue(yyjson_obj_get(val, key.c_str())); +} + +string JSONValue::ToString(JSONWriteFlags flags) const { + size_t len; + auto json = yyjson_val_write(val, TranslateWriteFlags(flags), &len); + if (!json) { + throw SerializationException("Failed to write JSON string"); + } + string result(json, len); + free(json); + return result; +} + +//===--------------------------------------------------------------------===// +// JSONDocument +//===--------------------------------------------------------------------===// +JSONDocument::JSONDocument() : doc(nullptr) { +} + +JSONDocument::~JSONDocument() { + if (doc) { + yyjson_doc_free(doc); + } +} + +JSONDocument::JSONDocument(JSONDocument &&other) noexcept : doc(other.doc) { + other.doc = nullptr; +} + +JSONDocument &JSONDocument::operator=(JSONDocument &&other) noexcept { + std::swap(doc, other.doc); + return *this; +} + +unique_ptr JSONDocument::TryParse(const char *data, idx_t len, JSONParseError &error, + JSONReadFlags flags) { + yyjson_read_err read_error; + auto parsed = yyjson_read_opts((char *)data, len, TranslateReadFlags(flags), nullptr, &read_error); // NOLINT + if (!parsed || read_error.code != YYJSON_READ_SUCCESS) { + error.has_error = true; + error.position = read_error.pos; + error.message = read_error.msg ? read_error.msg : "Unknown error"; + if (parsed) { + yyjson_doc_free(parsed); + } + return nullptr; + } + auto result = make_uniq(); + result->doc = parsed; + return result; +} + +unique_ptr JSONDocument::Parse(const char *data, idx_t len, JSONReadFlags flags) { + JSONParseError error; + auto result = TryParse(data, len, error, flags); + if (!result) { + throw InvalidInputException("Failed to parse JSON at byte %llu of input: %s", error.position, error.message); + } + return result; +} + +JSONValue JSONDocument::GetRoot() const { + return JSONValue(doc ? yyjson_doc_get_root(doc) : nullptr); +} + +string JSONDocument::ToString(JSONWriteFlags flags) const { + size_t len; + auto json = yyjson_write(doc, TranslateWriteFlags(flags), &len); + if (!json) { + throw SerializationException("Failed to write JSON string"); + } + string result(json, len); + free(json); + return result; +} + +//===--------------------------------------------------------------------===// +// JSONMutableValue +//===--------------------------------------------------------------------===// +JSONMutableValue::JSONMutableValue() : doc(nullptr), val(nullptr) { +} + +JSONMutableValue::JSONMutableValue(yyjson_mut_doc *doc_p, yyjson_mut_val *val_p) : doc(doc_p), val(val_p) { +} + +bool JSONMutableValue::IsValid() const { + return val != nullptr; +} + +void JSONMutableValue::Add(const string &key, JSONMutableValue value) { + auto key_val = yyjson_mut_strncpy(doc, key.c_str(), key.size()); + yyjson_mut_obj_add(val, key_val, value.val); +} + +void JSONMutableValue::AddString(const string &key, const string &value) { + auto key_val = yyjson_mut_strncpy(doc, key.c_str(), key.size()); + auto value_val = yyjson_mut_strncpy(doc, value.c_str(), value.size()); + yyjson_mut_obj_add(val, key_val, value_val); +} + +void JSONMutableValue::Append(JSONMutableValue value) { + yyjson_mut_arr_append(val, value.val); +} + +void JSONMutableValue::AppendString(const string &value) { + auto value_val = yyjson_mut_strncpy(doc, value.c_str(), value.size()); + yyjson_mut_arr_append(val, value_val); +} + +//===--------------------------------------------------------------------===// +// JSONWriter +//===--------------------------------------------------------------------===// +JSONWriter::JSONWriter() : doc(yyjson_mut_doc_new(nullptr)) { +} + +JSONWriter::~JSONWriter() { + if (doc) { + yyjson_mut_doc_free(doc); + } +} + +JSONWriter::JSONWriter(JSONWriter &&other) noexcept : doc(other.doc) { + other.doc = nullptr; +} + +JSONWriter &JSONWriter::operator=(JSONWriter &&other) noexcept { + std::swap(doc, other.doc); + return *this; +} + +JSONMutableValue JSONWriter::CreateObject() { + return JSONMutableValue(doc, yyjson_mut_obj(doc)); +} + +JSONMutableValue JSONWriter::CreateArray() { + return JSONMutableValue(doc, yyjson_mut_arr(doc)); +} + +JSONMutableValue JSONWriter::CreateString(const string &value) { + return JSONMutableValue(doc, yyjson_mut_strncpy(doc, value.c_str(), value.size())); +} + +JSONMutableValue JSONWriter::CreateNull() { + return JSONMutableValue(doc, yyjson_mut_null(doc)); +} + +JSONMutableValue JSONWriter::CreateBoolean(bool value) { + return JSONMutableValue(doc, yyjson_mut_bool(doc, value)); +} + +JSONMutableValue JSONWriter::CreateUnsignedInteger(uint64_t value) { + return JSONMutableValue(doc, yyjson_mut_uint(doc, value)); +} + +JSONMutableValue JSONWriter::CreateSignedInteger(int64_t value) { + return JSONMutableValue(doc, yyjson_mut_sint(doc, value)); +} + +JSONMutableValue JSONWriter::CreateDouble(double value) { + return JSONMutableValue(doc, yyjson_mut_real(doc, value)); +} + +JSONMutableValue JSONWriter::CreateCopy(const JSONValue &value) { + return JSONMutableValue(doc, yyjson_val_mut_copy(doc, value.val)); +} + +void JSONWriter::SetRoot(JSONMutableValue value) { + yyjson_mut_doc_set_root(doc, value.val); +} + +string JSONWriter::ToString(JSONWriteFlags flags) const { + yyjson_write_err err; + size_t len; + auto json = yyjson_mut_write_opts(doc, TranslateWriteFlags(flags), nullptr, &len, &err); + if (!json) { + throw SerializationException("Failed to write JSON string: %s", err.msg); + } + string result(json, len); + free(json); + return result; +} + +} // namespace duckdb diff --git a/src/duckdb/src/common/multi_file/base_file_reader.cpp b/src/duckdb/src/common/multi_file/base_file_reader.cpp index caf569afa..0c03f8957 100644 --- a/src/duckdb/src/common/multi_file/base_file_reader.cpp +++ b/src/duckdb/src/common/multi_file/base_file_reader.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/multi_file/base_file_reader.hpp" #include "duckdb/storage/statistics/base_statistics.hpp" +#include "duckdb/parallel/async_result.hpp" namespace duckdb { @@ -14,6 +15,10 @@ shared_ptr BaseFileReader::GetUnionData(idx_t file_idx) { void BaseFileReader::PrepareScan(ClientContext &, GlobalTableFunctionState &, LocalTableFunctionState &) { } +AsyncResult BaseFileReader::ScheduleIO(ClientContext &, GlobalTableFunctionState &, LocalTableFunctionState &) { + return SourceResultType::HAVE_MORE_OUTPUT; +} + void BaseFileReader::PrepareReader(ClientContext &context, GlobalTableFunctionState &) { } diff --git a/src/duckdb/src/common/radix_partitioning.cpp b/src/duckdb/src/common/radix_partitioning.cpp index 695b36aa1..fc125f896 100644 --- a/src/duckdb/src/common/radix_partitioning.cpp +++ b/src/duckdb/src/common/radix_partitioning.cpp @@ -10,17 +10,8 @@ namespace duckdb { //! Templated radix partitioning constants, can be templated to the number of radix bits template struct RadixPartitioningConstants { -public: - //! Bitmask of the upper bits starting at the 5th byte - static constexpr idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits); - static constexpr idx_t SHIFT = RadixPartitioning::Shift(radix_bits); - static constexpr hash_t MASK = RadixPartitioning::Mask(radix_bits); - -public: - //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS static hash_t ApplyMask(const hash_t hash) { - D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS); - return (hash & MASK) >> SHIFT; + return RadixPartitioning::ApplyMask(hash, radix_bits); } }; diff --git a/src/duckdb/src/common/sort/hashed_sort.cpp b/src/duckdb/src/common/sort/hashed_sort.cpp index ef1e75f16..4ef11cafc 100644 --- a/src/duckdb/src/common/sort/hashed_sort.cpp +++ b/src/duckdb/src/common/sort/hashed_sort.cpp @@ -1,7 +1,9 @@ #include "duckdb/common/sorting/hashed_sort.hpp" +#include "duckdb/common/sorting/partition_key_tracker.hpp" #include "duckdb/common/sorting/sorted_run.hpp" #include "duckdb/common/radix_partitioning.hpp" #include "duckdb/common/types/hyperloglog.hpp" +#include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/parallel/thread_context.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" @@ -29,6 +31,7 @@ class HashedSortGroup { TupleDataParallelScanState parallel_scan; atomic tasks_completed; unique_ptr sort_source; + unique_ptr direct_column_data; }; HashedSortGroup::HashedSortGroup(ClientContext &client, Sort &sort, idx_t group_idx) @@ -52,10 +55,14 @@ class HashedSortGlobalSinkState : public GlobalSinkState { // OVER(PARTITION BY...) (hash grouping) unique_ptr CreatePartition(idx_t new_bits) const; + unique_ptr CreateRepartitionKeyTracker(PartitionKeyTracker &tracker) const; void SyncPartitioning(const HashedSortGlobalSinkState &other); - void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &partition_append); - void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append); + void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &partition_append, + optional_ptr local_tracker); + void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append, + optional_ptr local_tracker); ProgressData GetSinkProgress(ClientContext &context, const ProgressData source_progress) const; + bool CanBypassSort(idx_t hash_bin) const; //! System and query state ClientContext &client; @@ -68,6 +75,7 @@ class HashedSortGlobalSinkState : public GlobalSinkState { GroupingPartition grouping_data; //! Payload plus hash column shared_ptr grouping_types_ptr; + unique_ptr single_key_tracker; //! The number of radix bits if this partition is being synced with another idx_t fixed_bits; @@ -80,12 +88,17 @@ class HashedSortGlobalSinkState : public GlobalSinkState { private: void Rehash(idx_t cardinality); - void SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append); + void SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append, + optional_ptr local_tracker); }; HashedSortGlobalSinkState::HashedSortGlobalSinkState(ClientContext &client, const HashedSort &hashed_sort) : client(client), hashed_sort(hashed_sort), buffer_manager(BufferManager::GetBufferManager(client)), allocator(Allocator::Get(client)), fixed_bits(0), max_bits(1), count(0) { + if (hashed_sort.can_bypass_single_key_sort) { + single_key_tracker = make_uniq(allocator, hashed_sort.partition_key_types); + } + const auto memory_per_thread = PhysicalOperator::GetMaxThreadMemory(client); const auto thread_pages = PreviousPowerOfTwo(memory_per_thread / (4 * buffer_manager.GetBlockAllocSize())); while (max_bits < 8 && (thread_pages >> max_bits) > 1) { @@ -107,6 +120,12 @@ unique_ptr HashedSortGlobalSinkState::CreatePartition hash_col_idx); } +unique_ptr +HashedSortGlobalSinkState::CreateRepartitionKeyTracker(PartitionKeyTracker &tracker) const { + return make_uniq(allocator, tracker, hashed_sort.partition_key_types, + hashed_sort.partition_key_ids); +} + void HashedSortGlobalSinkState::Rehash(idx_t cardinality) { // Have we started to combine? Then just live with it. if (fixed_bits) { @@ -123,10 +142,14 @@ void HashedSortGlobalSinkState::Rehash(idx_t cardinality) { // Repartition the grouping data if (new_bits != bits) { grouping_data = CreatePartition(new_bits); + if (single_key_tracker) { + single_key_tracker->Reset(new_bits); + } } } -void HashedSortGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) { +void HashedSortGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append, + optional_ptr local_tracker) { // We are done if the local_partition is right sized. const auto new_bits = grouping_data->GetRadixBits(); if (local_partition->GetRadixBits() == new_bits) { @@ -136,7 +159,12 @@ void HashedSortGlobalSinkState::SyncLocalPartition(GroupingPartition &local_part // If the local partition is now too small, flush it and reallocate auto new_partition = CreatePartition(new_bits); local_partition->FlushAppendState(*local_append); - local_partition->Repartition(client, *new_partition); + unique_ptr key_tracker; + if (local_tracker) { + local_tracker->Reset(new_bits); + key_tracker = CreateRepartitionKeyTracker(*local_tracker); + } + local_partition->Repartition(client, *new_partition, key_tracker.get()); local_partition = std::move(new_partition); local_append = make_uniq(); @@ -144,13 +172,17 @@ void HashedSortGlobalSinkState::SyncLocalPartition(GroupingPartition &local_part } void HashedSortGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_partition, - GroupingAppend &partition_append) { + GroupingAppend &partition_append, + optional_ptr local_tracker) { // First call: initialize the local partition if (!local_partition) { lock_guard guard(lock); local_partition = CreatePartition(grouping_data->GetRadixBits()); partition_append = make_uniq(); local_partition->InitializeAppendState(*partition_append); + if (local_tracker) { + local_tracker->Reset(local_partition->GetRadixBits()); + } return; } @@ -169,7 +201,12 @@ void HashedSortGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_pa auto new_partition = CreatePartition(new_bits); local_partition->FlushAppendState(*partition_append); - local_partition->Repartition(client, *new_partition); + unique_ptr key_tracker; + if (local_tracker) { + local_tracker->Reset(new_bits); + key_tracker = CreateRepartitionKeyTracker(*local_tracker); + } + local_partition->Repartition(client, *new_partition, key_tracker.get()); local_partition = std::move(new_partition); partition_append = make_uniq(); local_partition->InitializeAppendState(*partition_append); @@ -182,11 +219,14 @@ void HashedSortGlobalSinkState::SyncPartitioning(const HashedSortGlobalSinkState const auto old_bits = grouping_data ? grouping_data->GetRadixBits() : 0; if (fixed_bits != old_bits) { grouping_data = CreatePartition(fixed_bits); + if (single_key_tracker) { + single_key_tracker->Reset(fixed_bits); + } } } -void HashedSortGlobalSinkState::CombineLocalPartition(GroupingPartition &local_partition, - GroupingAppend &local_append) { +void HashedSortGlobalSinkState::CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append, + optional_ptr local_tracker) { if (!local_partition) { return; } @@ -195,7 +235,7 @@ void HashedSortGlobalSinkState::CombineLocalPartition(GroupingPartition &local_p // Make sure grouping_data doesn't change under us. // Combine has an internal mutex, so this is single-threaded anyway. lock_guard guard(lock); - SyncLocalPartition(local_partition, local_append); + SyncLocalPartition(local_partition, local_append, local_tracker); fixed_bits = true; // We now know the number of hash_groups (some may be empty) @@ -218,9 +258,16 @@ void HashedSortGlobalSinkState::CombineLocalPartition(GroupingPartition &local_p } // Combine the thread data into the global data + if (single_key_tracker && local_tracker) { + single_key_tracker->Combine(*local_tracker); + } grouping_data->Combine(*local_partition); } +bool HashedSortGlobalSinkState::CanBypassSort(idx_t hash_bin) const { + return single_key_tracker && single_key_tracker->CanBypass(hash_bin); +} + ProgressData HashedSortGlobalSinkState::GetSinkProgress(ClientContext &client, const ProgressData source) const { ProgressData result; result.done = source.done / 2; @@ -311,6 +358,7 @@ class HashedSortLocalSinkState : public LocalSinkState { //! OVER(PARTITION BY...) (hash grouping) GroupingPartition local_grouping; GroupingAppend grouping_append; + unique_ptr single_key_tracker; //! (optional) HyperLogLog state optional_ptr hll; @@ -320,9 +368,10 @@ HashedSortLocalSinkState::HashedSortLocalSinkState(ExecutionContext &context, co : hashed_sort(hashed_sort), allocator(Allocator::Get(context.client)), hash_exec(context.client), sort_exec(context.client) { vector group_types; + group_types.reserve(hashed_sort.partitions.size()); for (idx_t prt_idx = 0; prt_idx < hashed_sort.partitions.size(); prt_idx++) { auto &pexpr = *hashed_sort.partitions[prt_idx].expression.get(); - group_types.push_back(pexpr.GetReturnType()); + group_types.push_back(hashed_sort.partition_key_types[prt_idx]); hash_exec.AddExpression(pexpr); } @@ -337,6 +386,10 @@ HashedSortLocalSinkState::HashedSortLocalSinkState(ExecutionContext &context, co group_chunk.Initialize(allocator, group_types); hash_types.emplace_back(LogicalType::HASH); payload_chunk.Initialize(allocator, hash_types); + + if (hashed_sort.can_bypass_single_key_sort) { + single_key_tracker = make_uniq(allocator, group_types); + } } void HashedSort::Synchronize(const GlobalSinkState &source, GlobalSinkState &target) const { @@ -404,8 +457,11 @@ SinkResultType HashedSort::Sink(ExecutionContext &context, DataChunk &input_chun auto &local_grouping = lstate.local_grouping; auto &grouping_append = lstate.grouping_append; - gstate.UpdateLocalPartition(local_grouping, grouping_append); + gstate.UpdateLocalPartition(local_grouping, grouping_append, lstate.single_key_tracker.get()); local_grouping->Append(*grouping_append, payload_chunk); + if (lstate.single_key_tracker) { + lstate.single_key_tracker->Update(lstate.group_chunk, hash_vector, *grouping_append, input_chunk.size()); + } return SinkResultType::NEED_MORE_INPUT; } @@ -422,49 +478,85 @@ SinkCombineResultType HashedSort::Combine(ExecutionContext &context, OperatorSin // Flush our data and lock the bit count auto &grouping_append = lstate.grouping_append; - gstate.CombineLocalPartition(local_grouping, grouping_append); + gstate.CombineLocalPartition(local_grouping, grouping_append, lstate.single_key_tracker.get()); return SinkCombineResultType::FINISHED; } +static void BuildDirectColumnData(ClientContext &client, const vector &payload_types, + TupleDataCollection &partition, HashedSortGroup &hash_group) { + DataChunk chunk; + partition.InitializeScanChunk(hash_group.parallel_scan.scan_state, chunk); + TupleDataLocalScanState local_scan; + partition.InitializeScan(local_scan); + + auto local_column_data = + make_uniq(client, payload_types, ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR); + ColumnDataAppendState append_state; + local_column_data->InitializeAppend(append_state); + + idx_t combined = 0; + while (partition.Scan(hash_group.parallel_scan, local_scan, chunk)) { + local_column_data->Append(append_state, chunk); + combined += chunk.size(); + } + + lock_guard direct_guard(hash_group.scan_lock); + if (combined) { + if (!hash_group.direct_column_data) { + hash_group.direct_column_data = std::move(local_column_data); + } else { + hash_group.direct_column_data->Combine(*local_column_data); + } + hash_group.count += combined; + } +} + void HashedSort::SortColumnData(ExecutionContext &context, hash_t hash_bin, OperatorSinkFinalizeInput &finalize) const { auto &gstate = finalize.global_state.Cast(); // Loop over the partitions and add them to each hash group's global sort state auto &partitions = gstate.grouping_data->GetPartitions(); - if (hash_bin < partitions.size()) { - auto &partition = *partitions[hash_bin]; - if (!partition.Count()) { - return; - } + if (hash_bin >= partitions.size()) { + return; + } - auto &hash_group = *gstate.hash_groups[hash_bin]; - auto ¶llel_scan = hash_group.parallel_scan; + auto &partition = *partitions[hash_bin]; + if (!partition.Count()) { + return; + } - DataChunk chunk; - partition.InitializeScanChunk(parallel_scan.scan_state, chunk); - TupleDataLocalScanState local_scan; - partition.InitializeScan(local_scan); + auto &hash_group = *gstate.hash_groups[hash_bin]; - auto sort_local = sort->GetLocalSinkState(context); - OperatorSinkInput sink {*hash_group.sort_global, *sort_local, finalize.interrupt_state}; - idx_t combined = 0; - while (partition.Scan(hash_group.parallel_scan, local_scan, chunk)) { - sort->Sink(context, chunk, sink); - combined += chunk.size(); - } + if (gstate.CanBypassSort(hash_bin)) { + BuildDirectColumnData(context.client, payload_types, partition, hash_group); + return; + } - OperatorSinkCombineInput combine {*hash_group.sort_global, *sort_local, finalize.interrupt_state}; - sort->Combine(context, combine); - hash_group.count += combined; + auto ¶llel_scan = hash_group.parallel_scan; + DataChunk chunk; + partition.InitializeScanChunk(parallel_scan.scan_state, chunk); + TupleDataLocalScanState local_scan; + partition.InitializeScan(local_scan); - // Whoever finishes last can Finalize - lock_guard finalize_guard(hash_group.scan_lock); - if (hash_group.count == partition.Count() && !hash_group.sort_source) { - OperatorSinkFinalizeInput lfinalize {*hash_group.sort_global, finalize.interrupt_state}; - sort->Finalize(context.client, lfinalize); - hash_group.sort_source = sort->GetGlobalSourceState(context.client, *hash_group.sort_global); - } + auto sort_local = sort->GetLocalSinkState(context); + OperatorSinkInput sink {*hash_group.sort_global, *sort_local, finalize.interrupt_state}; + idx_t combined = 0; + while (partition.Scan(hash_group.parallel_scan, local_scan, chunk)) { + sort->Sink(context, chunk, sink); + combined += chunk.size(); + } + + OperatorSinkCombineInput combine {*hash_group.sort_global, *sort_local, finalize.interrupt_state}; + sort->Combine(context, combine); + hash_group.count += combined; + + // Whoever finishes last can Finalize + lock_guard finalize_guard(hash_group.scan_lock); + if (hash_group.count == partition.Count() && !hash_group.sort_source) { + OperatorSinkFinalizeInput lfinalize {*hash_group.sort_global, finalize.interrupt_state}; + sort->Finalize(context.client, lfinalize); + hash_group.sort_source = sort->GetGlobalSourceState(context.client, *hash_group.sort_global); } } @@ -535,6 +627,12 @@ HashedSort::HashedSort(ClientContext &client, const vector 0 && order_bys.empty(); + partition_key_types.reserve(partition_key_count); + for (idx_t key_idx = 0; key_idx < partition_key_count; key_idx++) { + partition_key_types.push_back(partitions[key_idx].expression->GetReturnType()); + } // We have to compute ordering expressions ourselves and materialise them. // To do this, we scan the orders and add generate extra payload columns that we can reference. @@ -555,6 +653,9 @@ HashedSort::HashedSort(ClientContext &client, const vector reset_guard(hash_group.scan_lock); @@ -613,6 +716,10 @@ static SourceResultType MaterializeHashGroupData(ExecutionContext &context, idx_ } } + if (!build_runs && gsink.CanBypassSort(hash_bin)) { + return SourceResultType::FINISHED; + } + auto &sort = hash_group.sort; auto &sort_global = *hash_group.sort_source; auto sort_local = sort.GetLocalSourceState(context, sort_global); @@ -635,6 +742,14 @@ HashedSort::HashGroupPtr HashedSort::GetColumnData(idx_t hash_bin, OperatorSourc auto &gsink = gsource.gsink; auto &hash_group = *gsink.hash_groups[hash_bin]; + if (gsink.CanBypassSort(hash_bin)) { + auto result = std::move(hash_group.direct_column_data); + if (result && result->Count() == hash_group.count) { + return result; + } + return nullptr; + } + auto &sort = hash_group.sort; auto &sort_global = *hash_group.sort_source; @@ -662,6 +777,8 @@ HashedSort::SortedRunPtr HashedSort::GetSortedRun(ClientContext &client, idx_t h auto &gsink = gsource.gsink; auto &hash_group = *gsink.hash_groups[hash_bin]; + D_ASSERT(!gsink.CanBypassSort(hash_bin)); + auto &sort = hash_group.sort; auto &sort_global = *hash_group.sort_source; diff --git a/src/duckdb/src/common/sort/partition_key_tracker.cpp b/src/duckdb/src/common/sort/partition_key_tracker.cpp new file mode 100644 index 000000000..58c6db75e --- /dev/null +++ b/src/duckdb/src/common/sort/partition_key_tracker.cpp @@ -0,0 +1,277 @@ +#include "duckdb/common/sorting/partition_key_tracker.hpp" + +#include "duckdb/common/radix_partitioning.hpp" +#include "duckdb/common/vector_operations/vector_operations.hpp" +#include "duckdb/common/vector/flat_vector.hpp" + +namespace duckdb { + +template +static idx_t GetPartitionRowIndex(PartitionedTupleDataAppendState &append_state, idx_t idx) { + if constexpr (USE_PARTITION_SEL) { + return append_state.partition_sel.get_index_unsafe(idx); + } + return idx; +} + +PartitionKeyTracker::PartitionKeyTracker(Allocator &allocator_p, const vector &key_types_p) + : key_count(key_types_p.size()) { + single_value_sel.Initialize(); + candidate_input_sel.Initialize(); + candidate_rep_sel.Initialize(); + mismatch_sel.Initialize(); + representatives.Initialize(allocator_p, key_types_p, 4096); +} + +void PartitionKeyTracker::Reset(idx_t radix_bits_p) { + radix_bits = radix_bits_p; + const auto partition_count = RadixPartitioning::NumberOfPartitions(radix_bits); + D_ASSERT(partition_count <= STANDARD_VECTOR_SIZE); + states.clear(); + states.resize(partition_count, PartitionKeyTrackerState::EMPTY); + hashes.clear(); + hashes.resize(partition_count); + representatives.Reset(); + representatives.SetChildCardinality(partition_count); +} + +bool PartitionKeyTracker::CanBypass(idx_t hash_bin) const { + return hash_bin < states.size() && states[hash_bin] == PartitionKeyTrackerState::SINGLE_KEY; +} + +void PartitionKeyTracker::Update(DataChunk &keys, Vector &input_hashes, PartitionedTupleDataAppendState &append_state, + idx_t count) { + if (states.empty() || !count) { + return; + } + + D_ASSERT(keys.ColumnCount() == key_count); + D_ASSERT(count <= STANDARD_VECTOR_SIZE); + + idx_t candidate_count; + if (append_state.fixed_partition_entries.size()) { + if (append_state.fixed_partition_entries.size() > 1) { + candidate_count = BuildCandidates(keys, input_hashes, append_state, count); + } else { + candidate_count = BuildCandidates(keys, input_hashes, append_state, count); + } + } else { + if (append_state.partition_entries.size() > 1) { + candidate_count = BuildCandidates(keys, input_hashes, append_state, count); + } else { + candidate_count = BuildCandidates(keys, input_hashes, append_state, count); + } + } + if (candidate_count) { + CompareCandidates(keys, candidate_count); + } +} + +void PartitionKeyTracker::Combine(const PartitionKeyTracker &other) { + if (other.states.empty()) { + return; + } + if (states.empty()) { + Reset(other.radix_bits); + } + D_ASSERT(radix_bits == other.radix_bits); + D_ASSERT(states.size() == other.states.size()); + D_ASSERT(states.size() <= STANDARD_VECTOR_SIZE); + + idx_t candidate_count = 0; + for (idx_t bin_idx = 0; bin_idx < states.size(); bin_idx++) { + CombineBin(other, bin_idx, candidate_count); + } + if (candidate_count) { + CompareTrackerCandidates(other, candidate_count); + } +} + +void PartitionKeyTracker::StoreRepresentative(DataChunk &keys, idx_t row_idx, hash_t hash, idx_t bin_idx) { + states[bin_idx] = PartitionKeyTrackerState::SINGLE_KEY; + this->hashes[bin_idx] = hash; + single_value_sel.set_index(0, row_idx); + for (idx_t col_idx = 0; col_idx < key_count; col_idx++) { + representatives.data[col_idx].Copy(keys.data[col_idx], single_value_sel, 1, 0, bin_idx, 1); + } +} + +void PartitionKeyTracker::StoreRepresentative(const PartitionKeyTracker &source, idx_t source_bin, idx_t target_bin) { + states[target_bin] = PartitionKeyTrackerState::SINGLE_KEY; + hashes[target_bin] = source.hashes[source_bin]; + single_value_sel.set_index(0, source_bin); + for (idx_t col_idx = 0; col_idx < key_count; col_idx++) { + representatives.data[col_idx].Copy(source.representatives.data[col_idx], single_value_sel, 1, 0, target_bin, 1); + } +} + +void PartitionKeyTracker::MarkMixed(idx_t bin_idx) { + states[bin_idx] = PartitionKeyTrackerState::MULTIPLE_KEYS; +} + +template +idx_t PartitionKeyTracker::BuildCandidates(DataChunk &keys, Vector &input_hashes, + PartitionedTupleDataAppendState &append_state, const idx_t count) { + using GETTER = TemplatedMapGetter; + auto &partition_entries = append_state.GetMap(); + UnifiedVectorFormat hash_data; + input_hashes.ToUnifiedFormat(hash_data); + const auto hash_values = UnifiedVectorFormat::GetData(hash_data); + idx_t candidate_count = 0; + for (auto it = partition_entries.begin(); it != partition_entries.end(); ++it) { + const auto bin_idx = GETTER::GetKey(it); + const auto &entry = GETTER::GetValue(it); + + if (states[bin_idx] == PartitionKeyTrackerState::MULTIPLE_KEYS) { + continue; + } + + idx_t entry_idx = 0; + if (states[bin_idx] == PartitionKeyTrackerState::EMPTY) { + const auto row_idx = GetPartitionRowIndex(append_state, entry.offset); + D_ASSERT(row_idx < count); + const auto hash_idx = hash_data.sel->get_index(row_idx); + StoreRepresentative(keys, row_idx, hash_values[hash_idx], bin_idx); + entry_idx++; + } + + const auto candidate_start = candidate_count; + bool hash_mismatch = false; + for (; entry_idx < entry.length; entry_idx++) { + const auto row_idx = GetPartitionRowIndex(append_state, entry.offset + entry_idx); + D_ASSERT(row_idx < count); + const auto hash_idx = hash_data.sel->get_index(row_idx); + const auto hash_match = this->hashes[bin_idx] == hash_values[hash_idx]; + hash_mismatch |= !hash_match; + candidate_input_sel.set_index(candidate_count, row_idx); + candidate_rep_sel.set_index(candidate_count, bin_idx); + candidate_count += hash_match; + } + states[bin_idx] = hash_mismatch ? PartitionKeyTrackerState::MULTIPLE_KEYS : states[bin_idx]; + candidate_count = hash_mismatch ? candidate_start : candidate_count; + } + return candidate_count; +} + +idx_t PartitionKeyTracker::CompactCandidates(idx_t candidate_count) { + idx_t new_count = 0; + for (idx_t candidate_idx = 0; candidate_idx < candidate_count; candidate_idx++) { + const auto bin_idx = candidate_rep_sel.get_index_unsafe(candidate_idx); + const auto input_idx = candidate_input_sel.get_index_unsafe(candidate_idx); + const auto keep = states[bin_idx] == PartitionKeyTrackerState::SINGLE_KEY; + candidate_input_sel.set_index(new_count, input_idx); + candidate_rep_sel.set_index(new_count, bin_idx); + new_count += keep; + } + return new_count; +} + +void PartitionKeyTracker::CompareCandidates(DataChunk &keys, idx_t candidate_count) { + D_ASSERT(candidate_count <= STANDARD_VECTOR_SIZE); + for (idx_t col_idx = 0; col_idx < key_count && candidate_count; col_idx++) { + Vector input_slice(keys.data[col_idx], candidate_input_sel, candidate_count); + Vector representative_slice(representatives.data[col_idx], candidate_rep_sel, candidate_count); + const auto mismatch_count = VectorOperations::DistinctFrom(input_slice, representative_slice, nullptr, + candidate_count, &mismatch_sel, nullptr); + for (idx_t mismatch_idx = 0; mismatch_idx < mismatch_count; mismatch_idx++) { + const auto candidate_idx = mismatch_sel.get_index_unsafe(mismatch_idx); + MarkMixed(candidate_rep_sel.get_index_unsafe(candidate_idx)); + } + if (mismatch_count && col_idx + 1 < key_count) { + candidate_count = CompactCandidates(candidate_count); + } + } +} + +void PartitionKeyTracker::CombineBin(const PartitionKeyTracker &source, idx_t bin_idx, idx_t &candidate_count) { + if (source.states[bin_idx] == PartitionKeyTrackerState::EMPTY || + states[bin_idx] == PartitionKeyTrackerState::MULTIPLE_KEYS) { + return; + } + if (source.states[bin_idx] == PartitionKeyTrackerState::MULTIPLE_KEYS) { + MarkMixed(bin_idx); + return; + } + D_ASSERT(source.states[bin_idx] == PartitionKeyTrackerState::SINGLE_KEY); + if (states[bin_idx] == PartitionKeyTrackerState::EMPTY) { + StoreRepresentative(source, bin_idx, bin_idx); + return; + } + D_ASSERT(states[bin_idx] == PartitionKeyTrackerState::SINGLE_KEY); + if (hashes[bin_idx] != source.hashes[bin_idx]) { + MarkMixed(bin_idx); + return; + } + candidate_input_sel.set_index(candidate_count, bin_idx); + candidate_rep_sel.set_index(candidate_count, bin_idx); + candidate_count++; +} + +idx_t PartitionKeyTracker::CompactTrackerCandidates(idx_t candidate_count) { + idx_t new_count = 0; + for (idx_t candidate_idx = 0; candidate_idx < candidate_count; candidate_idx++) { + const auto bin_idx = candidate_input_sel.get_index_unsafe(candidate_idx); + const auto source_bin_idx = candidate_rep_sel.get_index_unsafe(candidate_idx); + const auto keep = states[bin_idx] == PartitionKeyTrackerState::SINGLE_KEY; + candidate_input_sel.set_index(new_count, bin_idx); + candidate_rep_sel.set_index(new_count, source_bin_idx); + new_count += keep; + } + return new_count; +} + +void PartitionKeyTracker::CompareTrackerCandidates(const PartitionKeyTracker &source, idx_t candidate_count) { + D_ASSERT(candidate_count <= STANDARD_VECTOR_SIZE); + for (idx_t col_idx = 0; col_idx < key_count && candidate_count; col_idx++) { + Vector target_slice(representatives.data[col_idx], candidate_input_sel, candidate_count); + Vector source_slice(source.representatives.data[col_idx], candidate_rep_sel, candidate_count); + const auto mismatch_count = VectorOperations::DistinctFrom(target_slice, source_slice, nullptr, candidate_count, + &mismatch_sel, nullptr); + for (idx_t mismatch_idx = 0; mismatch_idx < mismatch_count; mismatch_idx++) { + const auto candidate_idx = mismatch_sel.get_index_unsafe(mismatch_idx); + MarkMixed(candidate_input_sel.get_index_unsafe(candidate_idx)); + } + if (mismatch_count && col_idx + 1 < key_count) { + candidate_count = CompactTrackerCandidates(candidate_count); + } + } +} + +template idx_t PartitionKeyTracker::BuildCandidates(DataChunk &, Vector &, + PartitionedTupleDataAppendState &, idx_t); +template idx_t PartitionKeyTracker::BuildCandidates(DataChunk &, Vector &, + PartitionedTupleDataAppendState &, idx_t); +template idx_t PartitionKeyTracker::BuildCandidates(DataChunk &, Vector &, + PartitionedTupleDataAppendState &, idx_t); +template idx_t PartitionKeyTracker::BuildCandidates(DataChunk &, Vector &, + PartitionedTupleDataAppendState &, idx_t); + +RepartitionKeyTracker::RepartitionKeyTracker(Allocator &allocator, PartitionKeyTracker &tracker_p, + const vector &key_types, + const vector &partition_key_ids_p) + : tracker(tracker_p), partition_key_ids(partition_key_ids_p) { + keys.Initialize(allocator, key_types); +} + +void RepartitionKeyTracker::RepartitionChunk(TupleDataCollection &source_partition, TupleDataChunkState &source_chunk, + PartitionedTupleDataAppendState &target_append, idx_t count) { + if (!count) { + return; + } + + if (key_gather_state.column_ids.empty()) { + source_partition.InitializeChunkState(key_gather_state, partition_key_ids); + } + + keys.Reset(); + TupleDataCollection::ResetCachedCastVectors(key_gather_state, partition_key_ids); + source_partition.Gather(source_chunk.row_locations, *FlatVector::IncrementalSelectionVector(), count, + partition_key_ids, keys, *FlatVector::IncrementalSelectionVector(), + key_gather_state.cached_cast_vectors); + keys.SetChildCardinality(count); + + D_ASSERT(target_append.utility_vector); + tracker.Update(keys, *target_append.utility_vector, target_append, count); +} + +} // namespace duckdb diff --git a/src/duckdb/src/common/string_util.cpp b/src/duckdb/src/common/string_util.cpp index f6a29d76f..141a0e630 100644 --- a/src/duckdb/src/common/string_util.cpp +++ b/src/duckdb/src/common/string_util.cpp @@ -23,9 +23,7 @@ #include #include -#include "yyjson.hpp" - -using namespace duckdb_yyjson; // NOLINT +#include "duckdb/common/json_document.hpp" namespace duckdb { @@ -716,182 +714,75 @@ string StringUtil::CandidatesErrorMessage(const vector &strings, const s return StringUtil::CandidatesMessage(closest_strings, message_prefix); } -static unique_ptr ParseJSON(const string &json, yyjson_doc *doc, yyjson_val *root, bool ignore_errors) { - auto result = make_uniq(); - switch (yyjson_get_tag(root)) { - case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE: { - size_t idx, max; - yyjson_val *val; - yyjson_arr_foreach(root, idx, max, val) { - result->AddArrayElement(ParseJSON(json, doc, val, ignore_errors)); - } - return result; - } - case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE: { - size_t idx, max; - yyjson_val *key, *value; - yyjson_obj_foreach(root, idx, max, key, value) { - const auto key_val = yyjson_get_str(key); - const auto key_len = yyjson_get_len(key); - result->AddObjectEntry(string(key_val, key_len), ParseJSON(json, doc, value, ignore_errors)); - } - return result; - } - case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC: - case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE: { - // Since this is a string, we can directly add the value - const auto value_val = yyjson_get_str(root); - const auto value_len = yyjson_get_len(root); - return make_uniq(string(value_val, value_len)); - } - case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE: - case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE: { - // boolean values - const bool bool_val = yyjson_get_bool(root); - return make_uniq(bool_val ? "true" : "false"); - } - case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT: - return make_uniq(to_string(unsafe_yyjson_get_uint(root))); - case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT: - return make_uniq(to_string(unsafe_yyjson_get_sint(root))); - case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL: - case YYJSON_TYPE_RAW | YYJSON_SUBTYPE_NONE: - return make_uniq(to_string(unsafe_yyjson_get_real(root))); - case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE: - return make_uniq("null"); +//! Converts a single JSON value to its string representation: scalars become their literal value, nested +//! objects/arrays are re-serialized as a JSON string. +static string JSONValueToString(const string &json, JSONValue value) { + switch (value.GetType()) { + case JSONValueType::STRING: + return value.GetString(); + case JSONValueType::BOOLEAN: + return value.GetBoolean() ? "true" : "false"; + case JSONValueType::UNSIGNED_INTEGER: + return to_string(value.GetUnsignedInteger()); + case JSONValueType::SIGNED_INTEGER: + return to_string(value.GetSignedInteger()); + case JSONValueType::DOUBLE: + case JSONValueType::RAW: + return to_string(value.GetDouble()); + case JSONValueType::JSON_NULL: + return "null"; + case JSONValueType::OBJECT: + case JSONValueType::ARRAY: + return value.ToString(JSONWriteFlags::ALLOW_INVALID_UNICODE); default: - yyjson_doc_free(doc); throw SerializationException("Failed to parse JSON string: %s", json); } } -unique_ptr StringUtil::ParseJSONMap(const string &json, bool ignore_errors) { - auto result = make_uniq(json); +unordered_map StringUtil::ParseJSONMap(const string &json, bool ignore_errors) { + unordered_map result; if (json.empty()) { return result; } - yyjson_read_flag flags = YYJSON_READ_ALLOW_INVALID_UNICODE; - yyjson_doc *doc = yyjson_read(json.c_str(), json.size(), flags); + JSONParseError error; + auto doc = JSONDocument::TryParse(json.c_str(), json.size(), error, JSONReadFlags::ALLOW_INVALID_UNICODE); if (!doc) { if (ignore_errors) { return result; } throw SerializationException("Failed to parse JSON string: %s", json); } - yyjson_val *root = yyjson_doc_get_root(doc); - if (!root || yyjson_get_type(root) != YYJSON_TYPE_OBJ) { - yyjson_doc_free(doc); + auto root = doc->GetRoot(); + if (!root.IsObject()) { if (ignore_errors) { return result; } throw SerializationException("Failed to parse JSON string: %s", json); } - - result = ParseJSON(json, doc, root, ignore_errors); - yyjson_doc_free(doc); - return result; -} - -string WriteJsonToString(yyjson_mut_doc *doc) { - yyjson_write_err err; - size_t len; - constexpr yyjson_write_flag flags = YYJSON_WRITE_ALLOW_INVALID_UNICODE; - char *json = yyjson_mut_write_opts(doc, flags, nullptr, &len, &err); - if (!json) { - yyjson_mut_doc_free(doc); - throw SerializationException("Failed to write JSON string: %s", err.msg); - } - // Create a string from the JSON - string result(json, len); - - // Free the JSON and the document - free(json); - yyjson_mut_doc_free(doc); - - // Return the result + root.IterateObject([&](const string &key, JSONValue value) { result[key] = JSONValueToString(json, value); }); return result; } -string ToJsonMapInternal(const unordered_map &map, yyjson_mut_doc *doc, yyjson_mut_val *root) { - for (auto &entry : map) { - auto key = yyjson_mut_strncpy(doc, entry.first.c_str(), entry.first.size()); - auto value = yyjson_mut_strncpy(doc, entry.second.c_str(), entry.second.size()); - yyjson_mut_obj_add(root, key, value); - } - return WriteJsonToString(doc); -} string StringUtil::ToJSONMap(const unordered_map &map) { - yyjson_mut_doc *doc = yyjson_mut_doc_new(nullptr); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - return ToJsonMapInternal(map, doc, root); -} - -string ComplexJSON::GetValue(const string &key) const { - if (type == ComplexJSONType::OBJECT) { - if (obj_value.find(key) != obj_value.end()) { - return GetValueRecursive(*obj_value.at(key)); - } - } - // Object either doesn't exist or this is just a string - return ""; -} - -string ComplexJSON::GetValue(const idx_t &index) const { - if (type == ComplexJSONType::ARRAY) { - if (index >= arr_value.size()) { - return ""; - } - return GetValueRecursive(*arr_value[index]); - } - return ""; -} - -string ComplexJSON::GetValueRecursive(const ComplexJSON &child) { - if (child.type == ComplexJSONType::OBJECT) { - // We have to construct the nested json - yyjson_mut_doc *doc = yyjson_mut_doc_new(nullptr); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - for (const auto &object : child.obj_value) { - auto key = yyjson_mut_strncpy(doc, object.first.c_str(), object.first.size()); - auto value_str = GetValueRecursive(*object.second); - auto value = yyjson_mut_strncpy(doc, value_str.c_str(), value_str.size()); - yyjson_mut_obj_add(root, key, value); - } - return WriteJsonToString(doc); - } else if (child.type == ComplexJSONType::ARRAY) { - yyjson_mut_doc *doc = yyjson_mut_doc_new(nullptr); - yyjson_mut_val *root = yyjson_mut_arr(doc); - yyjson_mut_doc_set_root(doc, root); - for (const auto &elem : child.arr_value) { - auto value_str = GetValueRecursive(*elem); - auto value = yyjson_mut_strncpy(doc, value_str.c_str(), value_str.size()); - yyjson_mut_arr_append(root, value); - } - return WriteJsonToString(doc); - } else { - // simple string we can just write - return child.str_value; + JSONWriter writer; + auto obj = writer.CreateObject(); + for (auto &entry : map) { + obj.AddString(entry.first, entry.second); } -} -string StringUtil::ToComplexJSONMap(const ComplexJSON &complex_json) { - return ComplexJSON::GetValueRecursive(complex_json); + writer.SetRoot(obj); + return writer.ToString(JSONWriteFlags::ALLOW_INVALID_UNICODE); } string StringUtil::ValidateJSON(const char *data, const idx_t &len) { // Same flags as in JSON extension - static constexpr auto READ_FLAG = - YYJSON_READ_ALLOW_INF_AND_NAN | YYJSON_READ_ALLOW_TRAILING_COMMAS | YYJSON_READ_BIGNUM_AS_RAW; - yyjson_read_err error; - yyjson_doc *doc = yyjson_read_opts((char *)data, len, READ_FLAG, nullptr, &error); // NOLINT: for yyjson - if (error.code != YYJSON_READ_SUCCESS) { - return StringUtil::Format("Malformed JSON at byte %lld of input: %s. Input: \"%s\"", error.pos, error.msg, - string(data, len)); + static constexpr auto READ_FLAGS = + JSONReadFlags::ALLOW_INF_AND_NAN | JSONReadFlags::ALLOW_TRAILING_COMMAS | JSONReadFlags::BIGNUM_AS_RAW; + JSONParseError error; + auto doc = JSONDocument::TryParse(data, len, error, READ_FLAGS); + if (error.HasError()) { + return StringUtil::Format("Malformed JSON at byte %lld of input: %s. Input: \"%s\"", error.position, + error.message, string(data, len)); } - - yyjson_doc_free(doc); return string(); } @@ -900,15 +791,15 @@ string StringUtil::ExceptionToJSONMap(ExceptionType type, const string &message, D_ASSERT(map.find("exception_type") == map.end()); D_ASSERT(map.find("exception_message") == map.end()); - yyjson_mut_doc *doc = yyjson_mut_doc_new(nullptr); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - auto except_str = Exception::ExceptionTypeToString(type); - yyjson_mut_obj_add_strncpy(doc, root, "exception_type", except_str.c_str(), except_str.size()); - yyjson_mut_obj_add_strncpy(doc, root, "exception_message", message.c_str(), message.size()); - - return ToJsonMapInternal(map, doc, root); + JSONWriter writer; + auto obj = writer.CreateObject(); + obj.AddString("exception_type", Exception::ExceptionTypeToString(type)); + obj.AddString("exception_message", message); + for (auto &entry : map) { + obj.AddString(entry.first, entry.second); + } + writer.SetRoot(obj); + return writer.ToString(JSONWriteFlags::ALLOW_INVALID_UNICODE); } string StringUtil::GetFileName(const string &file_path) { diff --git a/src/duckdb/src/common/tree_renderer.cpp b/src/duckdb/src/common/tree_renderer.cpp index 4b9f5c927..45a285770 100644 --- a/src/duckdb/src/common/tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer.cpp @@ -9,15 +9,22 @@ #include "duckdb/common/exception.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/main/query_profiler.hpp" +#include "duckdb/main/client_config.hpp" +#include "duckdb/main/profiler_extension.hpp" +#include "duckdb/common/box_renderer.hpp" namespace duckdb { //===--------------------------------------------------------------------===// // Profiler output (base implementations) //===--------------------------------------------------------------------===// -string TreeRenderer::RenderProfiler(const QueryProfiler &profiler) { +void TreeRenderer::RenderProfiler(const QueryProfiler &profiler, BaseResultRenderer &ss) { // by default, render the profiling node tree using this renderer (covers HTML/GraphViz/Mermaid) - return profiler.RenderProfilingNodeTree(*this); + profiler.RenderProfilingNodeTree(*this, ss); +} + +unique_ptr TreeRenderer::GetPrintRenderer() { + return make_uniq(); } string TreeRenderer::RenderProfilerDisabled() { @@ -73,11 +80,6 @@ static const ProfilerPrintFormatEntry &LookupProfilerPrintFormat(const string &n StringUtil::Join(options, ", ")); } -ProfilerPrintFormat ProfilerPrintFormat::FromString(const string &name) { - // return the canonical (lowercase) name for the matched entry - return ProfilerPrintFormat(LookupProfilerPrintFormat(name).name); -} - unique_ptr TreeRenderer::CreateRenderer(const string &name) { return LookupProfilerPrintFormat(name).create_renderer(); } @@ -86,4 +88,21 @@ unique_ptr TreeRenderer::CreateRenderer(const ProfilerPrintFormat return CreateRenderer(format.ToString()); } +unique_ptr TreeRenderer::CreateRenderer(ClientContext &context, const string &name) { + // registered renderers take precedence over the built-in formats + auto extension = ProfilerExtension::Find(context, name); + if (extension && extension->create_renderer) { + return extension->create_renderer(context); + } + auto renderer = CreateRenderer(name); + if (renderer) { + renderer->Configure(ClientConfig::GetConfig(context).profiling_renderer_settings); + } + return renderer; +} + +unique_ptr TreeRenderer::CreateRenderer(ClientContext &context, const ProfilerPrintFormat &format) { + return CreateRenderer(context, format.ToString()); +} + } // namespace duckdb diff --git a/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp index 6f36b1cfd..aee40d819 100644 --- a/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/tree_renderer/graphviz_tree_renderer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/pair.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp" @@ -16,50 +17,50 @@ namespace duckdb { string GRAPHVIZTreeRenderer::ToString(const LogicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string GRAPHVIZTreeRenderer::ToString(const PhysicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string GRAPHVIZTreeRenderer::ToString(const ProfilingNode &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string GRAPHVIZTreeRenderer::ToString(const Pipeline &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } -void GRAPHVIZTreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { +void GRAPHVIZTreeRenderer::Render(const LogicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void GRAPHVIZTreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { +void GRAPHVIZTreeRenderer::Render(const PhysicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void GRAPHVIZTreeRenderer::Render(const ProfilingNode &op, std::ostream &ss) { +void GRAPHVIZTreeRenderer::Render(const ProfilingNode &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void GRAPHVIZTreeRenderer::Render(const Pipeline &op, std::ostream &ss) { +void GRAPHVIZTreeRenderer::Render(const Pipeline &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void GRAPHVIZTreeRenderer::ToStreamInternal(RenderTree &root, std::ostream &ss) { +void GRAPHVIZTreeRenderer::ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) { const string digraph_format = R"( digraph G { node [shape=box, style=rounded, fontname="Courier New", fontsize=10]; diff --git a/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp index cc07f0039..e4efae986 100644 --- a/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/tree_renderer/html_tree_renderer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/pair.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp" @@ -15,45 +16,45 @@ namespace duckdb { string HTMLTreeRenderer::ToString(const LogicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string HTMLTreeRenderer::ToString(const PhysicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string HTMLTreeRenderer::ToString(const ProfilingNode &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string HTMLTreeRenderer::ToString(const Pipeline &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } -void HTMLTreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { +void HTMLTreeRenderer::Render(const LogicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void HTMLTreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { +void HTMLTreeRenderer::Render(const PhysicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void HTMLTreeRenderer::Render(const ProfilingNode &op, std::ostream &ss) { +void HTMLTreeRenderer::Render(const ProfilingNode &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void HTMLTreeRenderer::Render(const Pipeline &op, std::ostream &ss) { +void HTMLTreeRenderer::Render(const Pipeline &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } @@ -257,7 +258,7 @@ function toggleDisplay(button) { return StringUtil::Format(body_section, CreateTreeRecursive(root, 0, 0)); } -void HTMLTreeRenderer::ToStreamInternal(RenderTree &root, std::ostream &ss) { +void HTMLTreeRenderer::ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) { string result; result += CreateHeadSection(root); result += CreateBodySection(root); diff --git a/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp index 067ab65a4..62c9d18b5 100644 --- a/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/tree_renderer/json_tree_renderer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/pair.hpp" #include "duckdb/main/query_profiler.hpp" #include "duckdb/common/string_util.hpp" @@ -11,112 +12,98 @@ #include "duckdb/planner/logical_operator.hpp" #include "utf8proc_wrapper.hpp" -#include "yyjson.hpp" +#include "duckdb/common/json_document.hpp" #include -using namespace duckdb_yyjson; // NOLINT - namespace duckdb { string JSONTreeRenderer::ToString(const LogicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string JSONTreeRenderer::ToString(const PhysicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string JSONTreeRenderer::ToString(const ProfilingNode &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string JSONTreeRenderer::ToString(const Pipeline &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } -void JSONTreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { +void JSONTreeRenderer::Render(const LogicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void JSONTreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { +void JSONTreeRenderer::Render(const PhysicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void JSONTreeRenderer::Render(const ProfilingNode &op, std::ostream &ss) { +void JSONTreeRenderer::Render(const ProfilingNode &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void JSONTreeRenderer::Render(const Pipeline &op, std::ostream &ss) { +void JSONTreeRenderer::Render(const Pipeline &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -static yyjson_mut_val *RenderRecursive(yyjson_mut_doc *doc, RenderTree &tree, idx_t x, idx_t y) { +static JSONMutableValue RenderRecursive(JSONWriter &writer, RenderTree &tree, idx_t x, idx_t y) { auto node_p = tree.GetNode(x, y); D_ASSERT(node_p); auto &node = *node_p; - auto object = yyjson_mut_obj(doc); - auto children = yyjson_mut_arr(doc); + auto object = writer.CreateObject(); + auto children = writer.CreateArray(); for (auto &child_pos : node.child_positions) { - auto child_object = RenderRecursive(doc, tree, child_pos.x, child_pos.y); - yyjson_mut_arr_append(children, child_object); + children.Append(RenderRecursive(writer, tree, child_pos.x, child_pos.y)); } - yyjson_mut_obj_add_str(doc, object, "name", node.name.c_str()); - yyjson_mut_obj_add_val(doc, object, "children", children); - auto extra_info = yyjson_mut_obj(doc); + object.AddString("name", node.name); + object.Add("children", children); + auto extra_info = writer.CreateObject(); for (auto &it : node.extra_text) { auto &key = it.first; auto &value = it.second; auto splits = StringUtil::Split(value, "\n"); if (splits.size() > 1) { - auto list_items = yyjson_mut_arr(doc); + auto list_items = writer.CreateArray(); for (auto &split : splits) { - yyjson_mut_arr_add_strcpy(doc, list_items, split.c_str()); + list_items.AppendString(split); } - yyjson_mut_obj_add_val(doc, extra_info, key.c_str(), list_items); + extra_info.Add(key, list_items); } else { - yyjson_mut_obj_add_strcpy(doc, extra_info, key.c_str(), value.c_str()); + extra_info.AddString(key, value); } } - yyjson_mut_obj_add_val(doc, object, "extra_info", extra_info); + object.Add("extra_info", extra_info); return object; } -void JSONTreeRenderer::ToStreamInternal(RenderTree &root, std::ostream &ss) { - auto doc = yyjson_mut_doc_new(nullptr); - auto result_obj = yyjson_mut_arr(doc); - yyjson_mut_doc_set_root(doc, result_obj); - - auto plan = RenderRecursive(doc, root, 0, 0); - yyjson_mut_arr_append(result_obj, plan); - - auto data = yyjson_mut_val_write_opts(result_obj, YYJSON_WRITE_ALLOW_INF_AND_NAN | YYJSON_WRITE_PRETTY, nullptr, - nullptr, nullptr); - if (!data) { - yyjson_mut_doc_free(doc); - throw InternalException("The plan could not be rendered as JSON, yyjson failed"); - } - ss << string(data); - free(data); - yyjson_mut_doc_free(doc); +void JSONTreeRenderer::ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) { + JSONWriter writer; + auto result_obj = writer.CreateArray(); + result_obj.Append(RenderRecursive(writer, root, 0, 0)); + writer.SetRoot(result_obj); + ss << writer.ToString(JSONWriteFlags::ALLOW_INF_AND_NAN | JSONWriteFlags::PRETTY); } -string JSONTreeRenderer::RenderProfiler(const QueryProfiler &profiler) { +void JSONTreeRenderer::RenderProfiler(const QueryProfiler &profiler, BaseResultRenderer &ss) { // the JSON profiler output is the full query profile result tree (including query-level metrics) - return profiler.ToJSON(); + ss << profiler.ToJSON(); } string JSONTreeRenderer::RenderProfilerDisabled() { diff --git a/src/duckdb/src/common/tree_renderer/mermaid_tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/mermaid_tree_renderer.cpp index 3da70ac61..ff5fb30c9 100644 --- a/src/duckdb/src/common/tree_renderer/mermaid_tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/mermaid_tree_renderer.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/tree_renderer/mermaid_tree_renderer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/pair.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp" @@ -16,45 +17,45 @@ namespace duckdb { string MermaidTreeRenderer::ToString(const LogicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string MermaidTreeRenderer::ToString(const PhysicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string MermaidTreeRenderer::ToString(const ProfilingNode &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string MermaidTreeRenderer::ToString(const Pipeline &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } -void MermaidTreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { +void MermaidTreeRenderer::Render(const LogicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void MermaidTreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { +void MermaidTreeRenderer::Render(const PhysicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void MermaidTreeRenderer::Render(const ProfilingNode &op, std::ostream &ss) { +void MermaidTreeRenderer::Render(const ProfilingNode &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void MermaidTreeRenderer::Render(const Pipeline &op, std::ostream &ss) { +void MermaidTreeRenderer::Render(const Pipeline &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } @@ -80,7 +81,7 @@ static string SanitizeMermaidLabel(const string &text) { return result; } -void MermaidTreeRenderer::ToStreamInternal(RenderTree &root, std::ostream &ss) { +void MermaidTreeRenderer::ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) { vector nodes; vector edges; diff --git a/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp index 504d71be9..529190467 100644 --- a/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/tree_renderer/text_tree_renderer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/pair.hpp" #include "duckdb/main/query_profiler.hpp" #include "duckdb/common/string_util.hpp" @@ -50,7 +51,7 @@ void TextTreeRenderer::Configure(const unordered_map &settings) { } } -void TextTreeRenderer::RenderTopLayer(RenderTree &root, std::ostream &ss, idx_t y) { +void TextTreeRenderer::RenderTopLayer(RenderTree &root, BaseResultRenderer &ss, idx_t y) { for (idx_t x = 0; x < root.width; x++) { if (x * config.node_render_width >= config.maximum_render_width) { break; @@ -110,7 +111,7 @@ static bool ShouldRenderWhitespace(RenderTree &root, idx_t x, idx_t y) { return false; } -void TextTreeRenderer::RenderBottomLayer(RenderTree &root, std::ostream &ss, idx_t y) { +void TextTreeRenderer::RenderBottomLayer(RenderTree &root, BaseResultRenderer &ss, idx_t y) { for (idx_t x = 0; x <= root.width; x++) { if (x * config.node_render_width >= config.maximum_render_width) { break; @@ -219,7 +220,7 @@ string TextTreeRenderer::FormatNumber(const string &input) { return result; } -void TextTreeRenderer::RenderBoxContent(RenderTree &root, std::ostream &ss, idx_t y) { +void TextTreeRenderer::RenderBoxContent(RenderTree &root, BaseResultRenderer &ss, idx_t y) { // we first need to figure out how high our boxes are going to be vector> extra_info; idx_t extra_height = 0; @@ -329,8 +330,35 @@ void TextTreeRenderer::RenderBoxContent(RenderTree &root, std::ostream &ss, idx_ } } } + // determine the type of this cell: the box title (render_y 0) is the operator name, the + // separator line between the title and the extra info is layout, everything else is a value + ResultRenderType content_type; + if (render_y == 0) { + content_type = ResultRenderType::COLUMN_NAME; + } else if (render_text == ExtraInfoSeparator()) { + content_type = ResultRenderType::LAYOUT; + } else { + content_type = ResultRenderType::VALUE; + } + // center the text in the box, rendering the surrounding padding as layout and the text with its type render_text = AdjustTextForRendering(render_text, config.node_render_width - 2); - ss << render_text; + idx_t content_start = 0; + while (content_start < render_text.size() && render_text[content_start] == ' ') { + content_start++; + } + idx_t content_end = render_text.size(); + while (content_end > content_start && render_text[content_end - 1] == ' ') { + content_end--; + } + if (content_start > 0) { + ss << render_text.substr(0, content_start); + } + if (content_end > content_start) { + ss.Render(content_type, render_text.substr(content_start, content_end - content_start)); + } + if (content_end < render_text.size()) { + ss << render_text.substr(content_end); + } if (render_y == halfway_point && NodeHasMultipleChildren(*node)) { ss << config.LMIDDLE; @@ -344,50 +372,50 @@ void TextTreeRenderer::RenderBoxContent(RenderTree &root, std::ostream &ss, idx_ } string TextTreeRenderer::ToString(const LogicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string TextTreeRenderer::ToString(const PhysicalOperator &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string TextTreeRenderer::ToString(const ProfilingNode &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string TextTreeRenderer::ToString(const Pipeline &op) { - duckdb::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } -void TextTreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { +void TextTreeRenderer::Render(const LogicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void TextTreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { +void TextTreeRenderer::Render(const PhysicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void TextTreeRenderer::Render(const ProfilingNode &op, std::ostream &ss) { +void TextTreeRenderer::Render(const ProfilingNode &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void TextTreeRenderer::Render(const Pipeline &op, std::ostream &ss) { +void TextTreeRenderer::Render(const Pipeline &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void TextTreeRenderer::ToStreamInternal(RenderTree &root, std::ostream &ss) { +void TextTreeRenderer::ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) { while (root.width * config.node_render_width > config.maximum_render_width) { if (config.node_render_width - 2 < config.minimum_render_width) { break; @@ -552,9 +580,9 @@ string TextTreeRenderer::ExtraInfoSeparator() { return StringUtil::Repeat(string(config.HORIZONTAL), (config.node_render_width - 9)); } -string TextTreeRenderer::RenderProfiler(const QueryProfiler &profiler) { +void TextTreeRenderer::RenderProfiler(const QueryProfiler &profiler, BaseResultRenderer &ss) { // the text profiler output is the framed query tree (header, total time, phase timings, operator tree) - return profiler.QueryTreeToString(); + profiler.RenderQueryTree(ss); } } // namespace duckdb diff --git a/src/duckdb/src/common/tree_renderer/tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/tree_renderer.cpp index b0a58b597..c270e4c2e 100644 --- a/src/duckdb/src/common/tree_renderer/tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/tree_renderer.cpp @@ -1,12 +1,13 @@ #include "duckdb/common/tree_renderer.hpp" +#include "duckdb/common/box_renderer.hpp" namespace duckdb { -void TreeRenderer::ToStream(RenderTree &root, std::ostream &ss) { +void TreeRenderer::ToStream(RenderTree &root, BaseResultRenderer &ss) { if (!UsesRawKeyNames()) { root.SanitizeKeyNames(); } - return ToStreamInternal(root, ss); + ToStreamInternal(root, ss); } } // namespace duckdb diff --git a/src/duckdb/src/common/tree_renderer/yaml_tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/yaml_tree_renderer.cpp index 3eb49a14d..a66adef51 100644 --- a/src/duckdb/src/common/tree_renderer/yaml_tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/yaml_tree_renderer.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/tree_renderer/yaml_tree_renderer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/common/typedefs.hpp" #include "duckdb/execution/physical_operator.hpp" @@ -7,106 +8,95 @@ #include "duckdb/planner/logical_operator.hpp" #include "fmt/printf.h" -#include -#include - namespace duckdb { string YAMLTreeRenderer::ToString(const LogicalOperator &op) { - std::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string YAMLTreeRenderer::ToString(const PhysicalOperator &op) { - std::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string YAMLTreeRenderer::ToString(const ProfilingNode &op) { - std::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } string YAMLTreeRenderer::ToString(const Pipeline &op) { - std::stringstream ss; + StringResultRenderer ss; Render(op, ss); return ss.str(); } -void YAMLTreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { +void YAMLTreeRenderer::Render(const LogicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void YAMLTreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { +void YAMLTreeRenderer::Render(const PhysicalOperator &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void YAMLTreeRenderer::Render(const ProfilingNode &op, std::ostream &ss) { +void YAMLTreeRenderer::Render(const ProfilingNode &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void YAMLTreeRenderer::Render(const Pipeline &op, std::ostream &ss) { +void YAMLTreeRenderer::Render(const Pipeline &op, BaseResultRenderer &ss) { auto tree = RenderTree::CreateRenderTree(op); ToStream(*tree, ss); } -void YAMLTreeRenderer::ToStreamInternal(RenderTree &root, std::ostream &ss) { +void YAMLTreeRenderer::ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) { RenderRecursive(root, ss, 0, 0, 0); } -struct EscapedString { - explicit EscapedString(const string &str) : str(str) {}; - const string &str; -}; - -std::ostream &operator<<(std::ostream &out, const EscapedString &es) { - out << '"'; - - // escape - for (auto &ch : es.str) { +static string EscapedString(const string &str) { + string out = "\""; + for (auto &ch : str) { switch (ch) { case '\b': - out << "\\b"; + out += "\\b"; break; case '\f': - out << "\\f"; + out += "\\f"; break; case '\n': - out << "\\n"; + out += "\\n"; break; case '\r': - out << "\\r"; + out += "\\r"; break; case '\t': - out << "\\t"; + out += "\\t"; break; case '"': - out << "\\\""; + out += "\\\""; break; case '\\': - out << "\\\\"; + out += "\\\\"; break; default: if ((unsigned char)ch < ' ') { - out << duckdb_fmt::sprintf("\\u%04x", (int)ch); + out += duckdb_fmt::sprintf("\\u%04x", (int)ch); } else { - out << ch; + out += ch; } break; } } - - out << '"'; + out += "\""; return out; } -void YAMLTreeRenderer::RenderRecursive(RenderTree &node, std::ostream &ss, idx_t indent, idx_t x, idx_t y) { +void YAMLTreeRenderer::RenderRecursive(RenderTree &node, BaseResultRenderer &ss, idx_t indent, idx_t x, idx_t y) { auto node_p = node.GetNode(x, y); D_ASSERT(node_p); auto ¤t_node = *node_p; diff --git a/src/duckdb/src/common/types/geometry_crs.cpp b/src/duckdb/src/common/types/geometry_crs.cpp index 8916561ae..7ad383052 100644 --- a/src/duckdb/src/common/types/geometry_crs.cpp +++ b/src/duckdb/src/common/types/geometry_crs.cpp @@ -8,7 +8,7 @@ #include "duckdb/common/helper.hpp" #include "duckdb/catalog/catalog.hpp" -#include "yyjson.hpp" +#include "duckdb/common/json_document.hpp" #include "duckdb/catalog/catalog_entry/coordinate_system_catalog_entry.hpp" #include "fast_float/fast_float.h" @@ -458,31 +458,28 @@ bool CoordinateReferenceSystem::TryParseWKT2(const string &text, CoordinateRefer // PROJJSON Parsing //---------------------------------------------------------------------------------------------------------------------- bool CoordinateReferenceSystem::TryParsePROJJSON(const string &text, CoordinateReferenceSystem &result) { - using namespace duckdb_yyjson; // NOLINT - - unique_ptr doc(yyjson_read(text.c_str(), text.size(), YYJSON_READ_NOFLAG), - yyjson_doc_free); - + JSONParseError error; + auto doc = JSONDocument::TryParse(text.c_str(), text.size(), error); if (!doc) { // Not a valid JSON return false; } - yyjson_val *root = yyjson_doc_get_root(doc.get()); - if (!root || !yyjson_is_obj(root)) { + auto root = doc->GetRoot(); + if (!root.IsObject()) { // The root is not an object return false; } // Get the "type" field from the root object - yyjson_val *type_val = yyjson_obj_get(root, "type"); - if (!type_val || !yyjson_is_str(type_val)) { + auto type_val = root.GetMember("type"); + if (!type_val.IsString()) { return false; } // Check that the type is one of the PROJJSON CRS types // There are other (derived CRS) types, but they can not be used as root CRS definitions - const string type_str = yyjson_get_str(type_val); + const string type_str = type_val.GetString(); const auto projjson_crs_types = {"GeographicCRS", "GeodeticCRS", "ProjectedCRS", "CompoundCRS", "BoundCRS", "VerticalCRS", "EngineeringCRS", "TemporalCRS", "ParametricCRS"}; @@ -499,12 +496,12 @@ bool CoordinateReferenceSystem::TryParsePROJJSON(const string &text, CoordinateR } // Start out with the root object - yyjson_val *target_val = root; + auto target_val = root; // Special case for BoundCRS, use the name of the transformation instead if (StringUtil::CIEquals(type_str, "BoundCRS")) { - const auto trans_val = yyjson_obj_get(root, "transformation"); - if (!trans_val || !yyjson_is_obj(trans_val)) { + auto trans_val = root.GetMember("transformation"); + if (!trans_val.IsObject()) { return false; } @@ -513,35 +510,24 @@ bool CoordinateReferenceSystem::TryParsePROJJSON(const string &text, CoordinateR } // Try to get the "name" field from the target object - yyjson_val *name_val = yyjson_obj_get(target_val, "name"); - if (name_val && yyjson_is_str(name_val)) { - const char *name_str = yyjson_get_str(name_val); - if (name_str) { - result.identifier = string(name_str); - } + auto name_val = target_val.GetMember("name"); + if (name_val.IsString()) { + result.identifier = name_val.GetString(); } // Try to get the "id" field from the target object - yyjson_val *id_val = yyjson_obj_get(target_val, "id"); - if (id_val && yyjson_is_obj(id_val)) { - const auto auth_val = yyjson_obj_get(id_val, "authority"); - if (auth_val && yyjson_is_str(auth_val)) { - const auto auth_str = yyjson_get_str(auth_val); - - if (auth_str) { - result.identifier = string(auth_str); - - const auto code_val = yyjson_obj_get(id_val, "code"); - if (code_val && yyjson_is_int(code_val)) { - const auto code_int = yyjson_get_int(code_val); - result.identifier += ":" + StringUtil::Format("%d", code_int); - } - if (code_val && yyjson_is_str(code_val)) { - const auto code_str = yyjson_get_str(code_val); - if (code_str) { - result.identifier += ":" + string(code_str); - } - } + auto id_val = target_val.GetMember("id"); + if (id_val.IsObject()) { + auto auth_val = id_val.GetMember("authority"); + if (auth_val.IsString()) { + result.identifier = auth_val.GetString(); + + auto code_val = id_val.GetMember("code"); + if (code_val.IsInteger()) { + result.identifier += ":" + StringUtil::Format("%d", code_val.GetSignedInteger()); + } + if (code_val.IsString()) { + result.identifier += ":" + code_val.GetString(); } } } @@ -550,14 +536,7 @@ bool CoordinateReferenceSystem::TryParsePROJJSON(const string &text, CoordinateR // Print the PROJJSON back to a string to normalize it // TODO: We should actually normalize the PROJJSON here (e.g. sort fields) to ensure consistent equality checks - size_t json_size = 0; - const auto json_text = yyjson_write(doc.get(), YYJSON_WRITE_NOFLAG, &json_size); - if (!json_text) { - return false; - } - - result.definition = string(json_text, json_size); - free(json_text); + result.definition = doc->ToString(JSONWriteFlags::NONE); return true; } diff --git a/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp b/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp index 1f7ad5fff..219c7b5bc 100644 --- a/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +++ b/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp @@ -322,7 +322,8 @@ void PartitionedTupleData::Reset() { Verify(); } -void PartitionedTupleData::Repartition(ClientContext &context, PartitionedTupleData &new_partitioned_data) { +void PartitionedTupleData::Repartition(ClientContext &context, PartitionedTupleData &new_partitioned_data, + optional_ptr key_tracker) { D_ASSERT(layout.GetTypes() == new_partitioned_data.layout.GetTypes()); if (partitions.size() == new_partitioned_data.partitions.size()) { @@ -342,7 +343,11 @@ void PartitionedTupleData::Repartition(ClientContext &context, PartitionedTupleD do { // Check for interrupts with each chunk context.InterruptCheck(); - new_partitioned_data.Append(append_state, chunk_state, iterator.GetCurrentChunkCount()); + const auto count = iterator.GetCurrentChunkCount(); + new_partitioned_data.Append(append_state, chunk_state, count); + if (key_tracker) { + key_tracker->RepartitionChunk(partition, chunk_state, append_state, count); + } } while (iterator.Next()); RepartitionFinalizeStates(*this, new_partitioned_data, append_state, partition_idx); diff --git a/src/duckdb/src/common/util/util_parsed_expression.cpp b/src/duckdb/src/common/util/util_parsed_expression.cpp index 724b834fb..651c0c9e8 100644 --- a/src/duckdb/src/common/util/util_parsed_expression.cpp +++ b/src/duckdb/src/common/util/util_parsed_expression.cpp @@ -581,10 +581,7 @@ bool FunctionExpression::Equals(const ParsedExpression &other) const { return false; } auto &other_p = other.Cast(); - if (function_name != other_p.function_name) { - return false; - } - if (schema != other_p.schema) { + if (qualified_name != other_p.qualified_name) { return false; } if (!ParsedExpression::Equals(filter, other_p.filter)) { @@ -599,9 +596,6 @@ bool FunctionExpression::Equals(const ParsedExpression &other) const { if (export_state != other_p.export_state) { return false; } - if (catalog != other_p.catalog) { - return false; - } if (arguments.size() != other_p.arguments.size()) { return false; } @@ -615,25 +609,21 @@ bool FunctionExpression::Equals(const ParsedExpression &other) const { hash_t FunctionExpression::Hash() const { hash_t hash = ParsedExpression::Hash(); - hash = CombineHash(hash, function_name.Hash()); - hash = CombineHash(hash, schema.Hash()); + hash = CombineHash(hash, qualified_name.Hash()); hash = CombineHash(hash, duckdb::Hash(distinct)); hash = CombineHash(hash, duckdb::Hash(export_state)); - hash = CombineHash(hash, catalog.Hash()); return hash; } unique_ptr FunctionExpression::Copy() const { auto copy = duckdb::unique_ptr(new FunctionExpression()); copy->is_legacy_function_call = is_legacy_function_call; - copy->function_name = function_name; - copy->schema = schema; + copy->qualified_name = qualified_name; copy->filter = filter ? filter->Copy() : nullptr; copy->order_bys = order_bys ? unique_ptr_cast(order_bys->Copy()) : nullptr; copy->distinct = distinct; copy->is_operator = is_operator; copy->export_state = export_state; - copy->catalog = catalog; for (auto &arg : arguments) { copy->arguments.emplace_back(arg.Copy()); } @@ -832,13 +822,7 @@ bool WindowExpression::Equals(const ParsedExpression &other) const { return false; } auto &other_p = other.Cast(); - if (function_name != other_p.function_name) { - return false; - } - if (schema != other_p.schema) { - return false; - } - if (catalog != other_p.catalog) { + if (qualified_name != other_p.qualified_name) { return false; } if (!ParsedExpression::ListEquals(partitions, other_p.partitions)) { @@ -912,9 +896,7 @@ bool WindowExpression::Equals(const ParsedExpression &other) const { hash_t WindowExpression::Hash() const { hash_t hash = ParsedExpression::Hash(); - hash = CombineHash(hash, function_name.Hash()); - hash = CombineHash(hash, schema.Hash()); - hash = CombineHash(hash, catalog.Hash()); + hash = CombineHash(hash, qualified_name.Hash()); for (idx_t i = 0; i < orders.size(); i++) { hash = CombineHash(hash, duckdb::Hash(static_cast(orders[i].type))); hash = CombineHash(hash, duckdb::Hash(static_cast(orders[i].null_order))); @@ -935,9 +917,7 @@ hash_t WindowExpression::Hash() const { unique_ptr WindowExpression::Copy() const { auto copy = duckdb::unique_ptr(new WindowExpression()); copy->is_legacy_function_call = is_legacy_function_call; - copy->function_name = function_name; - copy->schema = schema; - copy->catalog = catalog; + copy->qualified_name = qualified_name; for (auto &child : partitions) { copy->partitions.push_back(child->Copy()); } @@ -968,13 +948,7 @@ bool TypeExpression::Equals(const ParsedExpression &other) const { return false; } auto &other_p = other.Cast(); - if (catalog != other_p.catalog) { - return false; - } - if (schema != other_p.schema) { - return false; - } - if (type_name != other_p.type_name) { + if (qualified_name != other_p.qualified_name) { return false; } if (!ParsedExpression::ListEquals(children, other_p.children)) { @@ -985,17 +959,13 @@ bool TypeExpression::Equals(const ParsedExpression &other) const { hash_t TypeExpression::Hash() const { hash_t hash = ParsedExpression::Hash(); - hash = CombineHash(hash, catalog.Hash()); - hash = CombineHash(hash, schema.Hash()); - hash = CombineHash(hash, type_name.Hash()); + hash = CombineHash(hash, qualified_name.Hash()); return hash; } unique_ptr TypeExpression::Copy() const { auto copy = duckdb::unique_ptr(new TypeExpression()); - copy->catalog = catalog; - copy->schema = schema; - copy->type_name = type_name; + copy->qualified_name = qualified_name; for (auto &child : children) { copy->children.push_back(child->Copy()); } diff --git a/src/duckdb/src/execution/index/art/art_index.cpp b/src/duckdb/src/execution/index/art/art_index.cpp index f8c032e55..0e82653ea 100644 --- a/src/duckdb/src/execution/index/art/art_index.cpp +++ b/src/duckdb/src/execution/index/art/art_index.cpp @@ -53,7 +53,7 @@ unique_ptr ARTBuildGlobalInit(IndexBuildInitGlobalStateIn auto state = make_uniq(); auto &storage = input.table.GetStorage(); - state->global_index = make_uniq(input.info.index_name, input.info.constraint_type, input.storage_ids, + state->global_index = make_uniq(input.info.GetIndexName(), input.info.constraint_type, input.storage_ids, TableIOManager::Get(storage), input.expressions, storage.db); return std::move(state); @@ -78,7 +78,7 @@ unique_ptr ARTBuildLocalInit(IndexBuildInitLocalStateInput auto state = make_uniq(input.context); auto &storage = input.table.GetStorage(); - state->local_index = make_uniq(input.info.index_name, input.info.constraint_type, input.storage_ids, + state->local_index = make_uniq(input.info.GetIndexName(), input.info.constraint_type, input.storage_ids, TableIOManager::Get(storage), input.expressions, storage.db); // Initialize the local sink state. @@ -114,7 +114,7 @@ void ARTBuildSinkSorted(IndexBuildSinkInput &input, DataChunk &key_chunk, DataCh auto &l_index = l_state.local_index; // Construct an ART for this chunk. - auto art = make_uniq(input.info.index_name, l_index->GetConstraintType(), l_index->GetColumnIds(), + auto art = make_uniq(input.info.GetIndexName(), l_index->GetConstraintType(), l_index->GetColumnIds(), l_index->table_io_manager, l_index->unbound_expressions, storage.db, l_index->Cast().allocators); if (art->Build(l_state.keys, l_state.row_ids, key_chunk.size()) != ARTConflictType::NO_CONFLICT) { diff --git a/src/duckdb/src/execution/join_hashtable.cpp b/src/duckdb/src/execution/join_hashtable.cpp index b0217e24f..d8b761ad8 100644 --- a/src/duckdb/src/execution/join_hashtable.cpp +++ b/src/duckdb/src/execution/join_hashtable.cpp @@ -1312,8 +1312,9 @@ bool JoinHashTable::TryProbeConstant(ScanStructure &scan_structure, DataChunk &k unique_values.InitializeEmpty(vector {equality_types[0]}); TupleDataCollection::InitializeChunkState(dict_state.unique_key_state, {equality_types[0]}); } - unique_values.data[0].Reference(constant_col); - unique_values.SetChildCardinality(1); + SelectionVector sel(1); + sel.set_index(0, 0); + unique_values.data[0].Slice(constant_col, sel, 1); unique_values.Flatten(); TupleDataCollection::ToUnifiedFormat(dict_state.unique_key_state, unique_values); diff --git a/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp b/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp index 5e527af08..c326f487f 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp @@ -75,10 +75,10 @@ SourceResultType PhysicalCopyDatabase::GetDataInternal(ExecutionContext &context auto &create_index_info = create_info->Cast(); auto &table_entry = - catalog.GetEntry(context.client, create_index_info.schema, create_index_info.table); + catalog.GetEntry(context.client, create_index_info.Schema(), create_index_info.table); auto &data_table = table_entry.GetStorage(); - IndexStorageInfo storage_info(create_index_info.index_name); + IndexStorageInfo storage_info(create_index_info.GetIndexName()); storage_info.options.emplace("v1_0_0_storage", false); auto unbound_index = make_uniq(create_index_info.Copy(), std::move(storage_info), data_table.GetTableIOManager(), catalog.GetAttached()); diff --git a/src/duckdb/src/execution/operator/persistent/physical_export.cpp b/src/duckdb/src/execution/operator/persistent/physical_export.cpp index 1ac9eae1f..1c010b1dd 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_export.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_export.cpp @@ -36,7 +36,7 @@ static void WriteCatalogEntries(stringstream &ss, catalog_entry_vector_t &entrie auto create_info = entry.get().GetInfo(); try { // Strip the catalog from the info - create_info->catalog.clear(); + create_info->CatalogMutable().clear(); auto to_string = create_info->ToString(); ss << to_string; } catch (const NotImplementedException &) { @@ -215,7 +215,7 @@ SourceResultType PhysicalExport::GetDataInternal(ExecutionContext &context, Data auto &ccontext = context.client; auto &fs = FileSystem::GetFileSystem(ccontext); - auto &catalog = Catalog::GetCatalog(ccontext, info->catalog); + auto &catalog = Catalog::GetCatalog(ccontext, info->Catalog()); catalog_entry_vector_t catalog_entries; catalog_entries = GetNaiveExportOrder(context.client, catalog); diff --git a/src/duckdb/src/execution/operator/schema/physical_alter.cpp b/src/duckdb/src/execution/operator/schema/physical_alter.cpp index b41a15a8b..dde4e0cef 100644 --- a/src/duckdb/src/execution/operator/schema/physical_alter.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_alter.cpp @@ -16,7 +16,7 @@ SourceResultType PhysicalAlter::GetDataInternal(ExecutionContext &context, DataC auto &db_manager = DatabaseManager::Get(context.client); db_manager.Alter(context.client, db_info); } else { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); catalog.Alter(context.client, *info); } return SourceResultType::FINISHED; diff --git a/src/duckdb/src/execution/operator/schema/physical_create_function.cpp b/src/duckdb/src/execution/operator/schema/physical_create_function.cpp index cbc9d6df1..e1c777c92 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_function.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_function.cpp @@ -10,7 +10,7 @@ namespace duckdb { //===--------------------------------------------------------------------===// SourceResultType PhysicalCreateFunction::GetDataInternal(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); catalog.CreateFunction(context.client, *info); return SourceResultType::FINISHED; diff --git a/src/duckdb/src/execution/operator/schema/physical_create_index.cpp b/src/duckdb/src/execution/operator/schema/physical_create_index.cpp index 89929be4b..a9a05239c 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_index.cpp @@ -93,7 +93,7 @@ SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, DataChunk &c if (alter_table_info) { for (idx_t i = 0; i < lstate.key_chunk.ColumnCount(); i++) { if (VectorOperations::HasNull(lstate.key_chunk.data[i])) { - throw ConstraintException("NOT NULL constraint failed: %s", info->index_name); + throw ConstraintException("NOT NULL constraint failed: %s", info->GetIndexName()); } } } @@ -143,10 +143,11 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, if (!alter_table_info) { // Ensure that the index does not yet exist in the catalog. - auto entry = schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, info->index_name); + auto entry = + schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, info->GetIndexName()); if (entry) { if (info->on_conflict != OnCreateConflict::IGNORE_ON_CONFLICT) { - throw CatalogException("Index with name \"%s\" already exists!", info->index_name); + throw CatalogException("Index with name \"%s\" already exists!", info->GetIndexName()); } // IF NOT EXISTS on existing index. We are done. return SinkFinalizeType::READY; @@ -161,12 +162,13 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, // Ensure that there are no other indexes with that name on this table. auto &indexes = storage.GetDataTableInfo()->GetIndexes(); for (auto &index : indexes.Indexes()) { - if (index.GetIndexName() == info->index_name) { - throw CatalogException("an index with that name already exists for this table: %s", info->index_name); + if (index.GetIndexName() == info->GetIndexName()) { + throw CatalogException("an index with that name already exists for this table: %s", + info->GetIndexName()); } } - auto &catalog = Catalog::GetCatalog(context, info->catalog); + auto &catalog = Catalog::GetCatalog(context, info->Catalog()); catalog.Alter(context, *alter_table_info); } diff --git a/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp b/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp index 50fb4b24b..1eb0f2143 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp @@ -9,7 +9,7 @@ namespace duckdb { //===--------------------------------------------------------------------===// SourceResultType PhysicalCreateSchema::GetDataInternal(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); if (catalog.IsSystemCatalog()) { throw BinderException("Cannot create schema in system catalog"); } diff --git a/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp b/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp index d0a3cbcb5..ab85237c9 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp @@ -8,7 +8,7 @@ namespace duckdb { //===--------------------------------------------------------------------===// SourceResultType PhysicalCreateSequence::GetDataInternal(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); catalog.CreateSequence(context.client, *info); return SourceResultType::FINISHED; diff --git a/src/duckdb/src/execution/operator/schema/physical_create_trigger.cpp b/src/duckdb/src/execution/operator/schema/physical_create_trigger.cpp index f17a39bbc..018e5d04b 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_trigger.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_trigger.cpp @@ -6,9 +6,9 @@ namespace duckdb { SourceResultType PhysicalCreateTrigger::GetDataInternal(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); - auto &table = - Catalog::GetEntry(context.client, info->catalog, info->schema, info->base_table->table_name); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); + auto &table = Catalog::GetEntry(context.client, info->Catalog(), info->Schema(), + info->base_table->Table()); auto transaction = catalog.GetCatalogTransaction(context.client); table.CreateTrigger(transaction, *info); diff --git a/src/duckdb/src/execution/operator/schema/physical_create_type.cpp b/src/duckdb/src/execution/operator/schema/physical_create_type.cpp index 93a0aa64b..065771f54 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_type.cpp @@ -78,7 +78,7 @@ SourceResultType PhysicalCreateType::GetDataInternal(ExecutionContext &context, info->type = LogicalType::ENUM(g_sink_state.result, g_sink_state.size); } - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); catalog.CreateType(context.client, *info); return SourceResultType::FINISHED; } diff --git a/src/duckdb/src/execution/operator/schema/physical_create_view.cpp b/src/duckdb/src/execution/operator/schema/physical_create_view.cpp index c3d4ba21f..b34d72bec 100644 --- a/src/duckdb/src/execution/operator/schema/physical_create_view.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_create_view.cpp @@ -8,7 +8,7 @@ namespace duckdb { //===--------------------------------------------------------------------===// SourceResultType PhysicalCreateView::GetDataInternal(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); catalog.CreateView(context.client, *info); return SourceResultType::FINISHED; diff --git a/src/duckdb/src/execution/operator/schema/physical_drop.cpp b/src/duckdb/src/execution/operator/schema/physical_drop.cpp index dbed2efa7..e0a17a3e5 100644 --- a/src/duckdb/src/execution/operator/schema/physical_drop.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_drop.cpp @@ -21,14 +21,14 @@ SourceResultType PhysicalDrop::GetDataInternal(ExecutionContext &context, DataCh case CatalogType::PREPARED_STATEMENT: { // DEALLOCATE silently ignores errors auto &statements = ClientData::Get(context.client).prepared_statements; - auto stmt_iter = statements.find(info->name); + auto stmt_iter = statements.find(info->Name()); if (stmt_iter != statements.end()) { statements.erase(stmt_iter); } break; } case CatalogType::SCHEMA_ENTRY: { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); catalog.DropEntry(context.client, *info); // Check if the dropped schema was set as the current schema @@ -36,9 +36,9 @@ SourceResultType PhysicalDrop::GetDataInternal(ExecutionContext &context, DataCh auto &default_entry = client_data.catalog_search_path->GetDefault(); auto ¤t_catalog = default_entry.catalog; auto ¤t_schema = default_entry.schema; - D_ASSERT(info->name != DEFAULT_SCHEMA); + D_ASSERT(info->Name() != DEFAULT_SCHEMA); - if (info->catalog == current_catalog && current_schema == info->name) { + if (info->Catalog() == current_catalog && current_schema == info->Name()) { // Reset the schema to default SchemaSetting::SetLocal(context.client, DEFAULT_SCHEMA); } @@ -49,7 +49,7 @@ SourceResultType PhysicalDrop::GetDataInternal(ExecutionContext &context, DataCh D_ASSERT(info->extra_drop_info); auto &extra_info = info->extra_drop_info->Cast(); SecretManager::Get(context.client) - .DropSecretByName(context.client, info->name, info->if_not_found, extra_info.persist_mode, + .DropSecretByName(context.client, info->Name(), info->if_not_found, extra_info.persist_mode, Identifier(extra_info.secret_storage)); break; } @@ -63,20 +63,20 @@ SourceResultType PhysicalDrop::GetDataInternal(ExecutionContext &context, DataCh throw InternalException("DROP TRIGGER: ExtraDropTriggerInfo has no base_table"); } auto &base_table_ref = trigger_extra.base_table->Cast(); - auto &table_entry = Catalog::GetEntry(context.client, info->catalog, info->schema, - base_table_ref.table_name); + auto &table_entry = Catalog::GetEntry(context.client, info->Catalog(), info->Schema(), + base_table_ref.Table()); auto &duck_table = table_entry.Cast(); auto transaction = duck_table.catalog.GetCatalogTransaction(context.client); - if (!duck_table.DropTrigger(transaction, info->name, info->cascade)) { + if (!duck_table.DropTrigger(transaction, info->Name(), info->cascade)) { if (info->if_not_found == OnEntryNotFound::THROW_EXCEPTION) { - throw CatalogException("Trigger with name \"%s\" does not exist on table \"%s\"", info->name, - base_table_ref.table_name); + throw CatalogException("Trigger with name \"%s\" does not exist on table \"%s\"", info->Name(), + base_table_ref.Table()); } } break; } default: { - auto &catalog = Catalog::GetCatalog(context.client, info->catalog); + auto &catalog = Catalog::GetCatalog(context.client, info->Catalog()); catalog.DropEntry(context.client, *info); break; } diff --git a/src/duckdb/src/execution/physical_operator.cpp b/src/duckdb/src/execution/physical_operator.cpp index 6fa6c6395..3a0c68c1f 100644 --- a/src/duckdb/src/execution/physical_operator.cpp +++ b/src/duckdb/src/execution/physical_operator.cpp @@ -3,6 +3,7 @@ #include "duckdb/function/table_function.hpp" #include "duckdb/common/printer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/render_tree.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/common/tree_renderer.hpp" @@ -29,13 +30,13 @@ string PhysicalOperator::GetName() const { return PhysicalOperatorToString(type); } -string PhysicalOperator::ToString(const ProfilerPrintFormat &format) const { - auto renderer = TreeRenderer::CreateRenderer(format); +string PhysicalOperator::ToString(optional_ptr context, const ProfilerPrintFormat &format) const { + auto renderer = context ? TreeRenderer::CreateRenderer(*context, format) : TreeRenderer::CreateRenderer(format); if (!renderer) { // formats without output (e.g. "no_output") render nothing return string(); } - stringstream ss; + StringResultRenderer ss; auto tree = RenderTree::CreateRenderTree(*this); renderer->ToStream(*tree, ss); return ss.str(); diff --git a/src/duckdb/src/execution/physical_plan/plan_create_index.cpp b/src/duckdb/src/execution/physical_plan/plan_create_index.cpp index b71ad0593..54a9f5d34 100644 --- a/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_create_index.cpp @@ -96,10 +96,11 @@ static PhysicalOperator &AddSort(PhysicalPlanGenerator &plan, LogicalCreateIndex PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) { // Early-out, if the index already exists. auto &schema = op.table.schema; - auto entry = schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, op.info->index_name); + auto entry = + schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, op.info->GetIndexName()); if (entry) { if (op.info->on_conflict != OnCreateConflict::IGNORE_ON_CONFLICT) { - throw CatalogException("Index with name \"%s\" already exists!", op.info->index_name); + throw CatalogException("Index with name \"%s\" already exists!", op.info->GetIndexName()); } return Make(op.types, op.estimated_cardinality); } diff --git a/src/duckdb/src/execution/physical_plan/plan_create_table.cpp b/src/duckdb/src/execution/physical_plan/plan_create_table.cpp index c8688450d..b86134548 100644 --- a/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_create_table.cpp @@ -37,8 +37,8 @@ PhysicalOperator &DuckCatalog::PlanCreateTableAs(ClientContext &context, Physica PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalCreateTable &op) { const auto &create_info = op.info->base->Cast(); auto &catalog = op.info->schema.catalog; - auto existing_entry = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, create_info.schema, create_info.table, - OnEntryNotFound::RETURN_NULL); + auto existing_entry = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, create_info.Schema(), + create_info.GetTableName(), OnEntryNotFound::RETURN_NULL); bool replace = op.info->Base().on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT; if ((!existing_entry || replace) && !op.children.empty()) { auto &plan = CreatePlan(*op.children[0]); diff --git a/src/duckdb/src/execution/physical_plan/plan_explain.cpp b/src/duckdb/src/execution/physical_plan/plan_explain.cpp index acfed115b..630251993 100644 --- a/src/duckdb/src/execution/physical_plan/plan_explain.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_explain.cpp @@ -10,7 +10,7 @@ namespace duckdb { PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalExplain &op) { D_ASSERT(op.children.size() == 1); - auto logical_plan_opt = op.children[0]->ToString(op.format); + auto logical_plan_opt = op.children[0]->ToString(context, op.format); auto &plan = CreatePlan(*op.children[0]); if (op.explain_type == ExplainType::EXPLAIN_ANALYZE) { auto &explain = Make(op.types, op.format); @@ -19,7 +19,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalExplain &op) { } // Format the plan and set the output of the EXPLAIN. - op.physical_plan = plan.ToString(op.format); + op.physical_plan = plan.ToString(context, op.format); vector keys, values; switch (Settings::Get(context)) { case ExplainOutputType::OPTIMIZED_ONLY: diff --git a/src/duckdb/src/function/scalar/sequence/nextval.cpp b/src/duckdb/src/function/scalar/sequence/nextval.cpp index cfc02424e..df22a81fa 100644 --- a/src/duckdb/src/function/scalar/sequence/nextval.cpp +++ b/src/duckdb/src/function/scalar/sequence/nextval.cpp @@ -46,7 +46,7 @@ SequenceCatalogEntry &BindSequenceFromContext(ClientContext &context, Identifier SequenceCatalogEntry &BindSequence(Binder &binder, const Identifier &name) { auto qname = QualifiedName::Parse(name.GetIdentifierName()); - return BindSequence(binder, qname.catalog, qname.schema, qname.name); + return BindSequence(binder, qname.CatalogMutable(), qname.SchemaMutable(), qname.NameMutable()); } struct NextValLocalState : public FunctionLocalState { @@ -124,7 +124,8 @@ unique_ptr Deserialize(Deserializer &deserializer, BoundScalarFunc } auto &seq_info = create_info->Cast(); auto &context = deserializer.Get(); - auto &sequence = BindSequenceFromContext(context, seq_info.catalog, seq_info.schema, seq_info.name); + auto &sequence = BindSequenceFromContext(context, seq_info.CatalogMutable(), seq_info.SchemaMutable(), + seq_info.GetSequenceName()); return make_uniq(sequence); } diff --git a/src/duckdb/src/function/scalar/string/md5.cpp b/src/duckdb/src/function/scalar/string/md5.cpp index af7db74c4..06529dfb7 100644 --- a/src/duckdb/src/function/scalar/string/md5.cpp +++ b/src/duckdb/src/function/scalar/string/md5.cpp @@ -1,4 +1,4 @@ -#include "duckdb/common/crypto/md5.hpp" +#include "duckdb/function/scalar/crypto_hash.hpp" #include "duckdb/function/scalar/string_functions.hpp" #include "duckdb/common/exception.hpp" @@ -8,55 +8,54 @@ namespace duckdb { namespace { -struct MD5Operator { - template - static RESULT_TYPE Operation(INPUT_TYPE input, StringHeap &heap) { - auto hash = heap.EmptyString(MD5Context::MD5_HASH_LENGTH_TEXT); - MD5Context context; - context.Add(input); - context.FinishHex(hash.GetDataWriteable()); - hash.Finalize(); - return hash; - } -}; - struct MD5Number128Operator { template - static RESULT_TYPE Operation(INPUT_TYPE input) { - data_t digest[MD5Context::MD5_HASH_LENGTH_BINARY]; - - MD5Context context; - context.Add(input); - context.Finish(digest); - return BSwapIfBE(*reinterpret_cast(digest)); + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &, idx_t, crypto_hash_scalar::NumberData &data) { + data_t digest[16]; + uhugeint_t result; + D_ASSERT(CryptoHash::GetDigestSize(CryptoHashFunction::MD5) == sizeof(digest)); + D_ASSERT(sizeof(result) == sizeof(digest)); + data.hash_state.Hash(const_data_ptr_cast(input.GetData()), input.GetSize(), digest); + memcpy(&result, digest, sizeof(result)); + return BSwapIfBE(result); } }; void MD5Function(DataChunk &args, ExpressionState &state, Vector &result) { const auto &input = args.data[0]; + auto &local_state = crypto_hash_scalar::GetLocalState(state); + auto &heap = StringVector::GetStringHeap(result); + crypto_hash_scalar::StringData data(*local_state.hash_state, heap); - UnaryExecutor::ExecuteString(input, result); + UnaryExecutor::GenericExecute>( + input, result, data); } void MD5NumberFunction(DataChunk &args, ExpressionState &state, Vector &result) { const auto &input = args.data[0]; + auto &local_state = crypto_hash_scalar::GetLocalState(state); + crypto_hash_scalar::NumberData data(*local_state.hash_state); - UnaryExecutor::Execute(input, result); + UnaryExecutor::GenericExecute(input, result, data); } } // namespace ScalarFunctionSet MD5Fun::GetFunctions() { ScalarFunctionSet set("md5"); - set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, MD5Function)); - set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::VARCHAR, MD5Function)); + set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, MD5Function, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); + set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::VARCHAR, MD5Function, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); return set; } ScalarFunctionSet MD5NumberFun::GetFunctions() { ScalarFunctionSet set("md5_number"); - set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::UHUGEINT, MD5NumberFunction)); - set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::UHUGEINT, MD5NumberFunction)); + set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::UHUGEINT, MD5NumberFunction, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); + set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::UHUGEINT, MD5NumberFunction, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); return set; } diff --git a/src/duckdb/src/function/scalar/string/regexp.cpp b/src/duckdb/src/function/scalar/string/regexp.cpp index b21e37425..01c11a6cb 100644 --- a/src/duckdb/src/function/scalar/string/regexp.cpp +++ b/src/duckdb/src/function/scalar/string/regexp.cpp @@ -198,11 +198,16 @@ static void RegexReplaceFunction(DataChunk &args, ExpressionState &state, Vector auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast(); BinaryExecutor::Execute( strings, replaces, result, [&](string_t input, string_t replace) { + auto replace_piece = CreateStringPiece(replace); + std::string rewrite_error; + if (!lstate.constant_pattern.CheckRewriteString(replace_piece, &rewrite_error)) { + throw InvalidInputException("Invalid replacement string for regexp_replace: %s", rewrite_error); + } std::string sstring = input.GetString(); if (info.global_replace) { - RE2::GlobalReplace(&sstring, lstate.constant_pattern, CreateStringPiece(replace)); + RE2::GlobalReplace(&sstring, lstate.constant_pattern, replace_piece); } else { - RE2::Replace(&sstring, lstate.constant_pattern, CreateStringPiece(replace)); + RE2::Replace(&sstring, lstate.constant_pattern, replace_piece); } return heap.AddString(sstring); }); @@ -213,11 +218,16 @@ static void RegexReplaceFunction(DataChunk &args, ExpressionState &state, Vector if (!re.ok()) { throw InvalidInputException(re.error()); } + auto replace_piece = CreateStringPiece(replace); + std::string rewrite_error; + if (!re.CheckRewriteString(replace_piece, &rewrite_error)) { + throw InvalidInputException("Invalid replacement string for regexp_replace: %s", rewrite_error); + } std::string sstring = input.GetString(); if (info.global_replace) { - RE2::GlobalReplace(&sstring, re, CreateStringPiece(replace)); + RE2::GlobalReplace(&sstring, re, replace_piece); } else { - RE2::Replace(&sstring, re, CreateStringPiece(replace)); + RE2::Replace(&sstring, re, replace_piece); } return heap.AddString(sstring); }); diff --git a/src/duckdb/src/function/scalar/string/sha1.cpp b/src/duckdb/src/function/scalar/string/sha1.cpp index 0af67cbd1..f4b8f14e0 100644 --- a/src/duckdb/src/function/scalar/string/sha1.cpp +++ b/src/duckdb/src/function/scalar/string/sha1.cpp @@ -1,38 +1,31 @@ +#include "duckdb/function/scalar/crypto_hash.hpp" + #include "duckdb/function/scalar/string_functions.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/common/vector_operations/unary_executor.hpp" -#include "mbedtls_wrapper.hpp" namespace duckdb { namespace { -struct SHA1Operator { - template - static RESULT_TYPE Operation(INPUT_TYPE input, StringHeap &heap) { - auto hash = heap.EmptyString(duckdb_mbedtls::MbedTlsWrapper::SHA1_HASH_LENGTH_TEXT); - - duckdb_mbedtls::MbedTlsWrapper::SHA1State state; - state.AddString(input.GetString()); - state.FinishHex(hash.GetDataWriteable()); - - hash.Finalize(); - return hash; - } -}; - void SHA1Function(DataChunk &args, ExpressionState &state, Vector &result) { const auto &input = args.data[0]; + auto &local_state = crypto_hash_scalar::GetLocalState(state); + auto &heap = StringVector::GetStringHeap(result); + crypto_hash_scalar::StringData data(*local_state.hash_state, heap); - UnaryExecutor::ExecuteString(input, result); + UnaryExecutor::GenericExecute>( + input, result, data); } } // namespace ScalarFunctionSet SHA1Fun::GetFunctions() { ScalarFunctionSet set("sha1"); - set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, SHA1Function)); - set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::VARCHAR, SHA1Function)); + set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, SHA1Function, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); + set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::VARCHAR, SHA1Function, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); return set; } diff --git a/src/duckdb/src/function/scalar/string/sha256.cpp b/src/duckdb/src/function/scalar/string/sha256.cpp index e3fabbdce..f22052b9c 100644 --- a/src/duckdb/src/function/scalar/string/sha256.cpp +++ b/src/duckdb/src/function/scalar/string/sha256.cpp @@ -1,38 +1,31 @@ +#include "duckdb/function/scalar/crypto_hash.hpp" + #include "duckdb/function/scalar/string_functions.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/common/vector_operations/unary_executor.hpp" -#include "mbedtls_wrapper.hpp" namespace duckdb { namespace { -struct SHA256Operator { - template - static RESULT_TYPE Operation(INPUT_TYPE input, StringHeap &heap) { - auto hash = heap.EmptyString(duckdb_mbedtls::MbedTlsWrapper::SHA256_HASH_LENGTH_TEXT); - - duckdb_mbedtls::MbedTlsWrapper::SHA256State state; - state.AddString(input.GetString()); - state.FinishHex(hash.GetDataWriteable()); - - hash.Finalize(); - return hash; - } -}; - void SHA256Function(DataChunk &args, ExpressionState &state, Vector &result) { const auto &input = args.data[0]; + auto &local_state = crypto_hash_scalar::GetLocalState(state); + auto &heap = StringVector::GetStringHeap(result); + crypto_hash_scalar::StringData data(*local_state.hash_state, heap); - UnaryExecutor::ExecuteString(input, result); + UnaryExecutor::GenericExecute>( + input, result, data); } } // namespace ScalarFunctionSet SHA256Fun::GetFunctions() { ScalarFunctionSet set("sha256"); - set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, SHA256Function)); - set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::VARCHAR, SHA256Function)); + set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, SHA256Function, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); + set.AddFunction(ScalarFunction({LogicalType::BLOB}, LogicalType::VARCHAR, SHA256Function, nullptr, nullptr, + crypto_hash_scalar::InitLocalState)); return set; } diff --git a/src/duckdb/src/function/table/read_duckdb.cpp b/src/duckdb/src/function/table/read_duckdb.cpp index d96565d20..6f5cc0b6c 100644 --- a/src/duckdb/src/function/table/read_duckdb.cpp +++ b/src/duckdb/src/function/table/read_duckdb.cpp @@ -308,7 +308,6 @@ bool DuckDBReader::TryInitializeScan(ClientContext &context, GlobalTableFunction AsyncResult DuckDBReader::Scan(ClientContext &context, GlobalTableFunctionState &gstate_p, LocalTableFunctionState &lstate_p, DataChunk &chunk) { - chunk.Reset(); auto &lstate = lstate_p.Cast(); TableFunctionInput input(bind_data.get(), lstate.local_state, global_state); diff --git a/src/duckdb/src/function/table/system/duckdb_tables.cpp b/src/duckdb/src/function/table/system/duckdb_tables.cpp index 2b31e44c4..2e98971cb 100644 --- a/src/duckdb/src/function/table/system/duckdb_tables.cpp +++ b/src/duckdb/src/function/table/system/duckdb_tables.cpp @@ -168,7 +168,7 @@ void DuckDBTablesFunction(ClientContext &context, TableFunctionInput &data_p, Da index_count.Append(Value::BIGINT(NumericCast(storage_info.index_info.size()))); check_constraint_count.Append(Value::BIGINT(NumericCast(CheckConstraintCount(table)))); auto table_info = table.GetInfo(); - table_info->catalog.clear(); + table_info->CatalogMutable().clear(); sql.Append(Value(table_info->ToString())); count++; } diff --git a/src/duckdb/src/function/table/system/duckdb_triggers.cpp b/src/duckdb/src/function/table/system/duckdb_triggers.cpp index ceab6f2b0..7075e3f80 100644 --- a/src/duckdb/src/function/table/system/duckdb_triggers.cpp +++ b/src/duckdb/src/function/table/system/duckdb_triggers.cpp @@ -129,7 +129,7 @@ void DuckDBTriggersFunction(ClientContext &context, TableFunctionInput &data_p, schema_oid.Append(Value::BIGINT(NumericCast(trigger.schema.oid))); trigger_name.Append(Value(trigger.name)); trigger_oid.Append(Value::BIGINT(NumericCast(trigger.oid))); - table_name.Append(Value(trigger.base_table->table_name)); + table_name.Append(Value(trigger.base_table->Table())); action_timing.Append(Value(EnumUtil::ToString(trigger.timing))); event_manipulation.Append(Value(EnumUtil::ToString(trigger.event_type))); vector col_vals; diff --git a/src/duckdb/src/function/table/system/pragma_storage_info.cpp b/src/duckdb/src/function/table/system/pragma_storage_info.cpp index d6cba1649..3ac18997d 100644 --- a/src/duckdb/src/function/table/system/pragma_storage_info.cpp +++ b/src/duckdb/src/function/table/system/pragma_storage_info.cpp @@ -111,8 +111,8 @@ static unique_ptr PragmaStorageInfoBind(ClientContext &context, Ta auto qname = QualifiedName::Parse(input.inputs[0].GetValue()); // look up the table name in the catalog - Binder::BindSchemaOrCatalog(context, qname.catalog, qname.schema); - auto &table_entry = Catalog::GetEntry(context, qname.catalog, qname.schema, qname.name); + Binder::BindSchemaOrCatalog(context, qname); + auto &table_entry = Catalog::GetEntry(context, qname.Catalog(), qname.Schema(), qname.Name()); return make_uniq(table_entry, options); } diff --git a/src/duckdb/src/function/table/system/pragma_table_info.cpp b/src/duckdb/src/function/table/system/pragma_table_info.cpp index 0d8bf9eb6..cd9ddfba1 100644 --- a/src/duckdb/src/function/table/system/pragma_table_info.cpp +++ b/src/duckdb/src/function/table/system/pragma_table_info.cpp @@ -170,8 +170,8 @@ static unique_ptr PragmaTableInfoBind(ClientContext &context, Tabl auto qname = QualifiedName::Parse(input.inputs[0].GetValue()); // look up the table name in the catalog - Binder::BindSchemaOrCatalog(context, qname.catalog, qname.schema); - auto &entry = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, qname.catalog, qname.schema, qname.name); + Binder::BindSchemaOrCatalog(context, qname); + auto &entry = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, qname.Catalog(), qname.Schema(), qname.Name()); return make_uniq(entry, IS_PRAGMA_TABLE_INFO); } diff --git a/src/duckdb/src/function/table/system/pragma_table_sample.cpp b/src/duckdb/src/function/table/system/pragma_table_sample.cpp index b8b04ad15..36db1da93 100644 --- a/src/duckdb/src/function/table/system/pragma_table_sample.cpp +++ b/src/duckdb/src/function/table/system/pragma_table_sample.cpp @@ -34,9 +34,9 @@ static unique_ptr DuckDBTableSampleBind(ClientContext &context, Ta vector &return_types, vector &names) { // look up the table name in the catalog auto qname = QualifiedName::Parse(input.inputs[0].GetValue()); - Binder::BindSchemaOrCatalog(context, qname.catalog, qname.schema); + Binder::BindSchemaOrCatalog(context, qname); - auto &entry = Catalog::GetEntry(context, qname.catalog, qname.schema, qname.name); + auto &entry = Catalog::GetEntry(context, qname.Catalog(), qname.Schema(), qname.Name()); if (entry.type != CatalogType::TABLE_ENTRY) { throw NotImplementedException("Invalid Catalog type passed to table_sample()"); } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 32afadfca..04c59b32d 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "0-dev9575" +#define DUCKDB_PATCH_VERSION "0-dev9687" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 6 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.6.0-dev9575" +#define DUCKDB_VERSION "v1.6.0-dev9687" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "8abfd3a6ea" +#define DUCKDB_SOURCE_ID "d9fdc2f20b" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/catalog/catalog_entry/collate_catalog_entry.hpp b/src/duckdb/src/include/duckdb/catalog/catalog_entry/collate_catalog_entry.hpp index 67e61293c..e5bc8dd6d 100644 --- a/src/duckdb/src/include/duckdb/catalog/catalog_entry/collate_catalog_entry.hpp +++ b/src/duckdb/src/include/duckdb/catalog/catalog_entry/collate_catalog_entry.hpp @@ -22,8 +22,9 @@ class CollateCatalogEntry : public StandardEntry { public: CollateCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateCollationInfo &info) - : StandardEntry(CatalogType::COLLATION_ENTRY, schema, catalog, info.name), function(info.function), - combinable(info.combinable), not_required_for_equality(info.not_required_for_equality) { + : StandardEntry(CatalogType::COLLATION_ENTRY, schema, catalog, info.GetCollationName()), + function(info.function), combinable(info.combinable), + not_required_for_equality(info.not_required_for_equality) { } //! The collation function to push in case collation is required diff --git a/src/duckdb/src/include/duckdb/catalog/catalog_entry/coordinate_system_catalog_entry.hpp b/src/duckdb/src/include/duckdb/catalog/catalog_entry/coordinate_system_catalog_entry.hpp index f4d46db48..7cc910675 100644 --- a/src/duckdb/src/include/duckdb/catalog/catalog_entry/coordinate_system_catalog_entry.hpp +++ b/src/duckdb/src/include/duckdb/catalog/catalog_entry/coordinate_system_catalog_entry.hpp @@ -22,8 +22,8 @@ class CoordinateSystemCatalogEntry : public StandardEntry { public: CoordinateSystemCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateCoordinateSystemInfo &info) - : StandardEntry(CatalogType::COORDINATE_SYSTEM_ENTRY, schema, catalog, info.name), authority(info.authority), - code(info.code), projjson_definition(info.projjson_definition), + : StandardEntry(CatalogType::COORDINATE_SYSTEM_ENTRY, schema, catalog, info.GetCoordinateSystemName()), + authority(info.authority), code(info.code), projjson_definition(info.projjson_definition), wkt2_2019_definition(info.wkt2_2019_definition) { } diff --git a/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp b/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp index 53c9728a0..d693545e0 100644 --- a/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +++ b/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp @@ -17,7 +17,7 @@ namespace duckdb { class FunctionEntry : public StandardEntry { public: FunctionEntry(CatalogType type, Catalog &catalog, SchemaCatalogEntry &schema, CreateFunctionInfo &info) - : StandardEntry(type, schema, catalog, info.name) { + : StandardEntry(type, schema, catalog, info.GetFunctionName()) { descriptions = std::move(info.descriptions); alias_of = std::move(info.alias_of); this->dependencies = info.dependencies; diff --git a/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp b/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp index 4b23fd385..b87b81198 100644 --- a/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +++ b/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp @@ -12,7 +12,6 @@ #include "duckdb/common/arrow/arrow_wrapper.hpp" #include "duckdb/main/chunk_scan_state.hpp" #include "duckdb/common/arrow/arrow_type_extension.hpp" -#include "duckdb/common/complex_json.hpp" namespace duckdb { class ArrowSchemaMetadata { @@ -45,7 +44,8 @@ class ArrowSchemaMetadata { private: //! The unordered map that holds the metadata unordered_map schema_metadata_map; - //! The extension metadata map, currently only used for internal types in arrow.opaque - unique_ptr extension_metadata_map; + //! The extension metadata, parsed into a flat key -> value map, currently only used for internal types in + //! arrow.opaque + unordered_map extension_metadata_map; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/box_renderer.hpp b/src/duckdb/src/include/duckdb/common/box_renderer.hpp index 7ab48d92f..18a4035cf 100644 --- a/src/duckdb/src/include/duckdb/common/box_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/box_renderer.hpp @@ -10,6 +10,7 @@ #include "duckdb/common/constants.hpp" #include "duckdb/common/vector.hpp" +#include "duckdb/common/printer.hpp" #include "duckdb/main/query_profiler.hpp" #include "duckdb/common/list.hpp" #include "duckdb/common/column_data_collection_render_interface.hpp" @@ -69,6 +70,22 @@ class StringResultRenderer : public BaseResultRenderer { string result; }; +//! A result renderer that prints directly to an output stream (stdout/stderr) as it renders. +class PrinterResultRenderer : public BaseResultRenderer { +public: + explicit PrinterResultRenderer(OutputStream stream = OutputStream::STREAM_STDERR); + + void RenderLayout(const string &text) override; + void RenderColumnName(const string &text) override; + void RenderType(const string &text) override; + void RenderValue(const string &text, const LogicalType &type) override; + void RenderNull(const string &text, const LogicalType &type) override; + void RenderFooter(const string &text) override; + +private: + OutputStream stream; +}; + enum class LargeNumberRendering { NONE = 0, // render all numbers as-is FOOTER = 1, // if there is a single row, adds a second footer row with a readable summarization of large numbers diff --git a/src/duckdb/src/include/duckdb/common/complex_json.hpp b/src/duckdb/src/include/duckdb/common/complex_json.hpp deleted file mode 100644 index e57112db3..000000000 --- a/src/duckdb/src/include/duckdb/common/complex_json.hpp +++ /dev/null @@ -1,55 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/complex_json.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include "duckdb/common/exception.hpp" -#include "duckdb/common/vector.hpp" -#include "duckdb/common/unique_ptr.hpp" - -namespace duckdb { - -enum class ComplexJSONType : uint8_t { VALUE = 0, OBJECT = 1, ARRAY = 2 }; - -//! Custom struct to handle both strings and nested JSON objects -struct ComplexJSON { - //! Constructor for string values - explicit ComplexJSON(const string &str); - //! Basic empty constructor - ComplexJSON(); - //! Adds entry to the underlying map, also sets the type to OBJECT - void AddObjectEntry(const string &key, unique_ptr object); - //! Adds element to the underlying list, also sets the type to ARRAY - void AddArrayElement(unique_ptr object); - //! Gets a ComplexJSON object from the map - ComplexJSON &GetObject(const string &key); - //! Gets a ComplexJSON element from the list - ComplexJSON &GetArrayElement(const idx_t &index); - //! Gets a string version of the underlying ComplexJSON object from the map - string GetValue(const string &key) const; - //! Gets a string version of the underlying ComplexJSON array from the list - string GetValue(const idx_t &index) const; - //! Recursive function for GetValue - static string GetValueRecursive(const ComplexJSON &child); - //! Flattens this json to a top level key -> nested json - unordered_map Flatten() const; - -private: - //! Basic string value, in case this is the last value of a nested json - string str_value; - //! If this is a json object a map of key/value - unordered_map> obj_value; - //! If this is a json array a list of values - vector> arr_value; - //! If this json is an object (i.e., map or not) - ComplexJSONType type; -}; - -} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/encryption_state.hpp b/src/duckdb/src/include/duckdb/common/encryption_state.hpp index 4f3bfbcc9..3103e897d 100644 --- a/src/duckdb/src/include/duckdb/common/encryption_state.hpp +++ b/src/duckdb/src/include/duckdb/common/encryption_state.hpp @@ -18,6 +18,32 @@ namespace duckdb { struct EncryptionNonce; +enum class CryptoHashFunction : uint8_t { MD5, SHA1, SHA256 }; + +struct CryptoHash { + static constexpr idx_t MAX_DIGEST_SIZE = 32; + + DUCKDB_API static idx_t GetDigestSize(CryptoHashFunction function); + DUCKDB_API static idx_t GetHexDigestSize(CryptoHashFunction function); + DUCKDB_API static void ToHex(const_data_ptr_t input, idx_t input_len, char *output); +}; + +class CryptoHashState { +public: + DUCKDB_API explicit CryptoHashState(CryptoHashFunction function); + DUCKDB_API virtual ~CryptoHashState(); + + DUCKDB_API virtual void Hash(const_data_ptr_t input, idx_t input_len, data_ptr_t output) = 0; + DUCKDB_API virtual void HashHex(const_data_ptr_t input, idx_t input_len, char *output); + + CryptoHashFunction GetFunction() const { + return function; + } + +private: + CryptoHashFunction function; +}; + struct EncryptionStateMetadata { private: const EncryptionTypes::CipherType cipher; @@ -85,6 +111,16 @@ class EncryptionUtil { return make_shared_ptr(std::move(metadata)); } + DUCKDB_API virtual void Hash(CryptoHashFunction function, const_data_ptr_t input, idx_t input_len, + data_ptr_t output) const; + DUCKDB_API virtual void HashHex(CryptoHashFunction function, const_data_ptr_t input, idx_t input_len, + char *output) const; + DUCKDB_API virtual unique_ptr CreateHashState(CryptoHashFunction function) const; + DUCKDB_API virtual void Hmac(CryptoHashFunction function, const_data_ptr_t key, idx_t key_len, + const_data_ptr_t input, idx_t input_len, data_ptr_t output) const; + DUCKDB_API virtual bool SupportsHash(CryptoHashFunction function) const; + DUCKDB_API virtual bool SupportsHmac(CryptoHashFunction function) const; + virtual ~EncryptionUtil() { } diff --git a/src/duckdb/src/include/duckdb/common/enum_util.hpp b/src/duckdb/src/include/duckdb/common/enum_util.hpp index 83554cd72..bf7f101f9 100644 --- a/src/duckdb/src/include/duckdb/common/enum_util.hpp +++ b/src/duckdb/src/include/duckdb/common/enum_util.hpp @@ -158,6 +158,8 @@ enum class CopyOverwriteMode : uint8_t; enum class CopyToType : uint8_t; +enum class CryptoHashFunction : uint8_t; + enum class DataFileType : uint8_t; enum class DateCastResult : uint8_t; @@ -322,6 +324,8 @@ enum class MultiFileColumnMappingMode : uint8_t; enum class MultiFileFileState : uint8_t; +enum class MultiFileScanPhase : uint8_t; + enum class NType : uint8_t; enum class NewLineIdentifier : uint8_t; @@ -406,6 +410,8 @@ enum class RecursiveCTEInlineStageType : uint8_t; enum class RecursiveProbeSidePreference : uint8_t; +enum class RegexMatchOperatorSemantics : uint8_t; + enum class RelationType : uint8_t; enum class RemoteCapability : uint8_t; @@ -750,6 +756,9 @@ const char* EnumUtil::ToChars(CopyOverwriteMode value); template<> const char* EnumUtil::ToChars(CopyToType value); +template<> +const char* EnumUtil::ToChars(CryptoHashFunction value); + template<> const char* EnumUtil::ToChars(DataFileType value); @@ -996,6 +1005,9 @@ const char* EnumUtil::ToChars(MultiFileColumnMapping template<> const char* EnumUtil::ToChars(MultiFileFileState value); +template<> +const char* EnumUtil::ToChars(MultiFileScanPhase value); + template<> const char* EnumUtil::ToChars(NType value); @@ -1122,6 +1134,9 @@ const char* EnumUtil::ToChars(RecursiveCTEInlineSta template<> const char* EnumUtil::ToChars(RecursiveProbeSidePreference value); +template<> +const char* EnumUtil::ToChars(RegexMatchOperatorSemantics value); + template<> const char* EnumUtil::ToChars(RelationType value); @@ -1543,6 +1558,9 @@ CopyOverwriteMode EnumUtil::FromString(const char *value); template<> CopyToType EnumUtil::FromString(const char *value); +template<> +CryptoHashFunction EnumUtil::FromString(const char *value); + template<> DataFileType EnumUtil::FromString(const char *value); @@ -1789,6 +1807,9 @@ MultiFileColumnMappingMode EnumUtil::FromString(cons template<> MultiFileFileState EnumUtil::FromString(const char *value); +template<> +MultiFileScanPhase EnumUtil::FromString(const char *value); + template<> NType EnumUtil::FromString(const char *value); @@ -1915,6 +1936,9 @@ RecursiveCTEInlineStageType EnumUtil::FromString(co template<> RecursiveProbeSidePreference EnumUtil::FromString(const char *value); +template<> +RegexMatchOperatorSemantics EnumUtil::FromString(const char *value); + template<> RelationType EnumUtil::FromString(const char *value); diff --git a/src/duckdb/src/include/duckdb/common/enums/regex_match_operator_semantics.hpp b/src/duckdb/src/include/duckdb/common/enums/regex_match_operator_semantics.hpp new file mode 100644 index 000000000..d6a478a11 --- /dev/null +++ b/src/duckdb/src/include/duckdb/common/enums/regex_match_operator_semantics.hpp @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/regex_match_operator_semantics.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/constants.hpp" + +namespace duckdb { + +enum class RegexMatchOperatorSemantics : uint8_t { PARTIAL = 0, FULL = 1 }; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/json_document.hpp b/src/duckdb/src/include/duckdb/common/json_document.hpp new file mode 100644 index 000000000..4e0cbefcc --- /dev/null +++ b/src/duckdb/src/include/duckdb/common/json_document.hpp @@ -0,0 +1,221 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/json_document.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/typedefs.hpp" +#include "duckdb/common/string.hpp" +#include "duckdb/common/unique_ptr.hpp" +#include + +//! Forward declaration of the yyjson types - the actual definitions live in the yyjson headers, which are only +//! included in the implementation. This keeps yyjson an internal detail of the wrapper. +namespace duckdb_yyjson { // NOLINT +struct yyjson_doc; +struct yyjson_val; +struct yyjson_mut_doc; +struct yyjson_mut_val; +} // namespace duckdb_yyjson + +namespace duckdb { + +//! The type of a JSON value +//! NOTE: no explicit storage type so that EnumUtil ToString/FromString code is not generated for it +enum class JSONValueType { + INVALID, //!< Not a valid value (e.g. the result of a failed lookup) + JSON_NULL, //!< JSON null + BOOLEAN, //!< true / false + UNSIGNED_INTEGER, //!< an integer that fits in a uint64_t + SIGNED_INTEGER, //!< an integer that fits in an int64_t + DOUBLE, //!< a floating point number + STRING, //!< a string + ARRAY, //!< a JSON array + OBJECT, //!< a JSON object + RAW //!< raw (unparsed) number, e.g. a bignum +}; + +//! Flags controlling how JSON is parsed +enum class JSONReadFlags { + NONE = 0, + ALLOW_INVALID_UNICODE = 1 << 0, + ALLOW_INF_AND_NAN = 1 << 1, + ALLOW_TRAILING_COMMAS = 1 << 2, + BIGNUM_AS_RAW = 1 << 3 +}; + +constexpr JSONReadFlags operator|(JSONReadFlags a, JSONReadFlags b) { + return static_cast(static_cast(a) | static_cast(b)); +} + +//! Flags controlling how JSON is written +enum class JSONWriteFlags { NONE = 0, ALLOW_INVALID_UNICODE = 1 << 0, ALLOW_INF_AND_NAN = 1 << 1, PRETTY = 1 << 2 }; + +constexpr JSONWriteFlags operator|(JSONWriteFlags a, JSONWriteFlags b) { + return static_cast(static_cast(a) | static_cast(b)); +} + +//! A read-only handle to a single value inside a JSONDocument. +//! NOTE: a JSONValue is only valid for as long as the JSONDocument it originates from is alive. +class JSONValue { + friend class JSONDocument; + friend class JSONWriter; + +public: + //! Constructs an invalid value + JSONValue(); + + //! Whether or not this refers to an actual value + bool IsValid() const; + //! The type of this value + JSONValueType GetType() const; + + bool IsNull() const; + bool IsString() const; + bool IsArray() const; + bool IsObject() const; + //! Whether or not this is an integer (signed or unsigned) + bool IsInteger() const; + + //! Get the value as a string (only valid if IsString()) + string GetString() const; + //! Get the value as a boolean + bool GetBoolean() const; + //! Get the value as an unsigned integer + uint64_t GetUnsignedInteger() const; + //! Get the value as a signed integer + int64_t GetSignedInteger() const; + //! Get the value as a double + double GetDouble() const; + + //! Look up a member of an object by key - returns an invalid value if this is not an object or the key is absent + JSONValue GetMember(const string &key) const; + + //! Iterate over the elements of an array + void IterateArray(const std::function &callback) const; + //! Iterate over the key/value pairs of an object + void IterateObject(const std::function &callback) const; + + //! Serialize this value to a string + string ToString(JSONWriteFlags flags = JSONWriteFlags::NONE) const; + +private: + explicit JSONValue(duckdb_yyjson::yyjson_val *val); + +private: + //! The wrapped yyjson value - owned by the originating JSONDocument + duckdb_yyjson::yyjson_val *val; +}; + +//! Error information from parsing a JSON document +struct JSONParseError { + //! Whether or not an error occurred + bool HasError() const { + return has_error; + } + + bool has_error = false; + //! Byte position of the error in the input + idx_t position = 0; + //! Human-readable error message + string message; +}; + +//! Owns an immutable, parsed JSON document. Values obtained from it are valid for as long as the document is alive. +class JSONDocument { +public: + JSONDocument(); + ~JSONDocument(); + // non-copyable, movable + JSONDocument(const JSONDocument &) = delete; + JSONDocument &operator=(const JSONDocument &) = delete; + JSONDocument(JSONDocument &&other) noexcept; + JSONDocument &operator=(JSONDocument &&other) noexcept; + + //! Parse the given input. Throws an InvalidInputException on failure. + static unique_ptr Parse(const char *data, idx_t len, JSONReadFlags flags = JSONReadFlags::NONE); + //! Parse the given input. Returns nullptr on failure, in which case "error" is populated. + static unique_ptr TryParse(const char *data, idx_t len, JSONParseError &error, + JSONReadFlags flags = JSONReadFlags::NONE); + + //! The root value of the document + JSONValue GetRoot() const; + + //! Serialize the (immutable) document to a string + string ToString(JSONWriteFlags flags = JSONWriteFlags::NONE) const; + +private: + //! The wrapped yyjson document + duckdb_yyjson::yyjson_doc *doc; +}; + +//! A handle to a value being built inside a JSONWriter. The value is owned by the JSONWriter that created it - it must +//! not outlive that writer. +class JSONMutableValue { + friend class JSONWriter; + +public: + //! Constructs an invalid value + JSONMutableValue(); + + bool IsValid() const; + + //! Add a key/value pair to this (object) value + void Add(const string &key, JSONMutableValue value); + //! Add a string key/value pair to this (object) value + void AddString(const string &key, const string &value); + + //! Append a value to this (array) value + void Append(JSONMutableValue value); + //! Append a string value to this (array) value + void AppendString(const string &value); + +private: + JSONMutableValue(duckdb_yyjson::yyjson_mut_doc *doc, duckdb_yyjson::yyjson_mut_val *val); + +private: + //! The document this value belongs to - not owned + duckdb_yyjson::yyjson_mut_doc *doc; + //! The wrapped mutable yyjson value - owned by "doc" + duckdb_yyjson::yyjson_mut_val *val; +}; + +//! Builds a JSON document out of JSONMutableValues and serializes it to a string. +class JSONWriter { +public: + JSONWriter(); + ~JSONWriter(); + // non-copyable, movable + JSONWriter(const JSONWriter &) = delete; + JSONWriter &operator=(const JSONWriter &) = delete; + JSONWriter(JSONWriter &&other) noexcept; + JSONWriter &operator=(JSONWriter &&other) noexcept; + + //! Create values that belong to this document + JSONMutableValue CreateObject(); + JSONMutableValue CreateArray(); + JSONMutableValue CreateString(const string &value); + JSONMutableValue CreateNull(); + JSONMutableValue CreateBoolean(bool value); + JSONMutableValue CreateUnsignedInteger(uint64_t value); + JSONMutableValue CreateSignedInteger(int64_t value); + JSONMutableValue CreateDouble(double value); + //! Create a (deep) copy of an immutable value (e.g. from a parsed JSONDocument) belonging to this document + JSONMutableValue CreateCopy(const JSONValue &value); + + //! Set the root value of the document + void SetRoot(JSONMutableValue value); + + //! Serialize the document to a string + string ToString(JSONWriteFlags flags = JSONWriteFlags::NONE) const; + +private: + //! The wrapped mutable yyjson document + duckdb_yyjson::yyjson_mut_doc *doc; +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/multi_file/base_file_reader.hpp b/src/duckdb/src/include/duckdb/common/multi_file/base_file_reader.hpp index 23d8bc6fb..103d6e150 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file/base_file_reader.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file/base_file_reader.hpp @@ -95,7 +95,10 @@ class BaseFileReader : public enable_shared_from_this { LocalTableFunctionState &lstate) = 0; //! Prepare a scan - called after TryInitializeScan succeeds - this is done without any lock held virtual void PrepareScan(ClientContext &context, GlobalTableFunctionState &gstate, LocalTableFunctionState &lstate); - //! Scan a chunk from the read state + //! Function to schedule IO tasks, if Reader supports that + DUCKDB_API virtual AsyncResult ScheduleIO(ClientContext &context, GlobalTableFunctionState &gstate, + LocalTableFunctionState &lstate); + //! Scan a chunk virtual AsyncResult Scan(ClientContext &context, GlobalTableFunctionState &global_state, LocalTableFunctionState &local_state, DataChunk &chunk) = 0; //! Finish scanning a given file diff --git a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp index 37687d26c..f2b68279b 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_function.hpp @@ -480,6 +480,8 @@ class MultiFileFunction : public TableFunction { if (old_file_index != scan_data.file_index) { InitializeFileScanState(context, current_reader_data, scan_data, gstate.projection_ids); } + // Initializing a batch is always a schedule phase + scan_data.phase = MultiFileScanPhase::SCHEDULE; return true; } else { // Set state to the next file @@ -662,6 +664,89 @@ class MultiFileFunction : public TableFunction { return partition_data; } + static bool HandleBlocked(TableFunctionInput &data_p, AsyncResult &res) { + D_ASSERT(res.GetResultType() == AsyncResultType::BLOCKED); + switch (data_p.results_execution_mode) { + case AsyncResultsExecutionMode::TASK_EXECUTOR: + data_p.async_result = std::move(res); + return true; + case AsyncResultsExecutionMode::SYNCHRONOUS: + // run the I/O synchronously, then loop again to resume + res.ExecuteTasksSynchronously(); + if (res.GetResultType() != AsyncResultType::HAVE_MORE_OUTPUT) { + throw InternalException("Unexpected behaviour from ExecuteTasksSynchronously"); + } + return false; + default: + throw InternalException("Unexpected AsyncResultsExecutionMode in MultiFileScan"); + } + } + + //! Emit the current output to the caller, or signal the loop to continue when there is nothing to emit yet. + static bool EmitOutput(TableFunctionInput &data_p, DataChunk &output) { + if (output.size() == 0 && data_p.results_execution_mode == AsyncResultsExecutionMode::SYNCHRONOUS) { + return false; + } + data_p.async_result = SourceResultType::HAVE_MORE_OUTPUT; + return true; + } + + static bool SchedulePhase(ClientContext &context, TableFunctionInput &data_p, MultiFileLocalState &data, + MultiFileGlobalState &gstate) { + auto scheduled = data.reader->ScheduleIO(context, *gstate.global_state, *data.local_state); + data.phase = MultiFileScanPhase::DECODE; + if (scheduled.GetResultType() == AsyncResultType::BLOCKED) { + return HandleBlocked(data_p, scheduled); + } + return false; + } + + static bool DecodePhase(ClientContext &context, TableFunctionInput &data_p, MultiFileLocalState &data, + MultiFileGlobalState &gstate, MultiFileBindData &bind_data, DataChunk &output) { + auto &scan_chunk = data.scan_chunk; + if (!data.resuming_blocked_scan) { + // A BLOCKED scan leaves partial data in the chunk (e.g. filter prefetch from parquet) that the resume must + // keep. Hence, we only reset if we are not resuming from a blocked scan. + scan_chunk.Reset(); + } + auto res = data.reader->Scan(context, *gstate.global_state, *data.local_state, scan_chunk); + + data.resuming_blocked_scan = res.GetResultType() == AsyncResultType::BLOCKED; + if (res.GetResultType() == AsyncResultType::BLOCKED) { + return HandleBlocked(data_p, res); + } + + output.SetChildCardinality(scan_chunk.size()); + + if (scan_chunk.size() > 0) { + data.rows_scanned += scan_chunk.size(); + bind_data.multi_file_reader->FinalizeChunk(context, bind_data, *data.reader, *data.reader_data, scan_chunk, + output, data.executor, gstate.multi_file_reader_state); + output.SetChildCardinality(output.size()); + } + if (res.GetResultType() == AsyncResultType::HAVE_MORE_OUTPUT) { + // More chunks left on this batch, lets keep going + return EmitOutput(data_p, output); + } + + if (res.GetResultType() != AsyncResultType::FINISHED) { + throw InternalException("Unexpected result in MultiFileScan, must be FINISHED, is %s", + EnumUtil::ToChars(res.GetResultType())); + } + + // We are done with this batch + if (!TryInitializeNextBatch(context, bind_data, data, gstate)) { + if (output.size() > 0 && data_p.results_execution_mode == AsyncResultsExecutionMode::SYNCHRONOUS) { + gstate.finished = true; + data_p.async_result = SourceResultType::HAVE_MORE_OUTPUT; + } else { + data_p.async_result = SourceResultType::FINISHED; + } + return true; + } + return EmitOutput(data_p, output); + } + static void MultiFileScan(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { if (!data_p.local_state) { data_p.async_result = SourceResultType::FINISHED; @@ -677,68 +762,20 @@ class MultiFileFunction : public TableFunction { } do { - auto &scan_chunk = data.scan_chunk; - if (data.scan_blocked) { - data.scan_blocked = false; - } else { - scan_chunk.Reset(); - } - - auto res = data.reader->Scan(context, *gstate.global_state, *data.local_state, scan_chunk); - - if (res.GetResultType() == AsyncResultType::BLOCKED) { - data.scan_blocked = true; - switch (data_p.results_execution_mode) { - case AsyncResultsExecutionMode::TASK_EXECUTOR: - data_p.async_result = std::move(res); + switch (data.phase) { + case MultiFileScanPhase::SCHEDULE: + if (SchedulePhase(context, data_p, data, gstate)) { return; - case AsyncResultsExecutionMode::SYNCHRONOUS: - res.ExecuteTasksSynchronously(); - if (res.GetResultType() != AsyncResultType::HAVE_MORE_OUTPUT) { - throw InternalException("Unexpected behaviour from ExecuteTasksSynchronously"); - } - // no completed output yet, loop again to resume the Scan - continue; - } - } - - output.SetChildCardinality(scan_chunk.size()); - - if (scan_chunk.size() > 0) { - data.rows_scanned += scan_chunk.size(); - bind_data.multi_file_reader->FinalizeChunk(context, bind_data, *data.reader, *data.reader_data, - scan_chunk, output, data.executor, - gstate.multi_file_reader_state); - output.SetChildCardinality(output.size()); - } - if (res.GetResultType() == AsyncResultType::HAVE_MORE_OUTPUT) { - // Loop back to the same block - if (output.size() == 0 && data_p.results_execution_mode == AsyncResultsExecutionMode::SYNCHRONOUS) { - continue; - } - data_p.async_result = SourceResultType::HAVE_MORE_OUTPUT; - return; - } - - if (res.GetResultType() != AsyncResultType::FINISHED) { - throw InternalException("Unexpected result in MultiFileScan, must be FINISHED, is %s", - EnumUtil::ToChars(res.GetResultType())); - } - - if (!TryInitializeNextBatch(context, bind_data, data, gstate)) { - if (output.size() > 0 && data_p.results_execution_mode == AsyncResultsExecutionMode::SYNCHRONOUS) { - gstate.finished = true; - data_p.async_result = SourceResultType::HAVE_MORE_OUTPUT; - } else { - data_p.async_result = SourceResultType::FINISHED; } - } else { - if (output.size() == 0 && data_p.results_execution_mode == AsyncResultsExecutionMode::SYNCHRONOUS) { - continue; + break; + case MultiFileScanPhase::DECODE: + if (DecodePhase(context, data_p, data, gstate, bind_data, output)) { + return; } - data_p.async_result = SourceResultType::HAVE_MORE_OUTPUT; + break; + default: + throw InternalException("Unexpected MultiFileScanPhase in MultiFileScan"); } - return; } while (true); } diff --git a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp index 58736f3e4..cdaf27a1c 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp @@ -184,6 +184,9 @@ struct MultiFileGlobalState : public GlobalTableFunctionState { } }; +//! Phase of the per-thread multi-file scan: we are either scheduling or decoding +enum class MultiFileScanPhase : uint8_t { SCHEDULE, DECODE }; + struct MultiFileLocalState : public LocalTableFunctionState { public: explicit MultiFileLocalState(ClientContext &context) : executor(context) { @@ -198,8 +201,10 @@ struct MultiFileLocalState : public LocalTableFunctionState { unique_ptr local_state; //! The chunk written to by the reader, handed to FinalizeChunk to transform to the global schema DataChunk scan_chunk; - //! Whether the last Scan call returned BLOCKED - bool scan_blocked = false; + //! Set when the previous Scan() returned BLOCKED, so the next Scan() preserves the partial chunk + bool resuming_blocked_scan = false; + //! Whether the current batch still needs its I/O scheduled or is ready to decode + MultiFileScanPhase phase = MultiFileScanPhase::SCHEDULE; //! The executor to transform scan_chunk into the final result with FinalizeChunk ExpressionExecutor executor; //! Number of rows scanned by this thread (for profiling) diff --git a/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp b/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp index dde8215e9..b0861b43c 100644 --- a/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +++ b/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp @@ -51,6 +51,14 @@ struct RadixPartitioning { return (hash_t(1 << radix_bits) - 1) << Shift(radix_bits); } + //! Apply the radix mask to a hash and return the partition index + static inline idx_t ApplyMask(const hash_t hash, idx_t radix_bits) { + D_ASSERT(radix_bits <= MAX_RADIX_BITS); + const auto result = (hash & Mask(radix_bits)) >> Shift(radix_bits); + D_ASSERT(result < NumberOfPartitions(radix_bits)); + return result; + } + //! Select using a cutoff on the radix bits of the hash static idx_t Select(const Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, const ValidityMask &partition_mask, SelectionVector *true_sel, SelectionVector *false_sel); diff --git a/src/duckdb/src/include/duckdb/common/sorting/hashed_sort.hpp b/src/duckdb/src/include/duckdb/common/sorting/hashed_sort.hpp index 626a04fd2..cc7c00c13 100644 --- a/src/duckdb/src/include/duckdb/common/sorting/hashed_sort.hpp +++ b/src/duckdb/src/include/duckdb/common/sorting/hashed_sort.hpp @@ -75,6 +75,14 @@ class HashedSort : public SortStrategy { Orders orders; //! The partition columns vector partition_ids; + //! The payload columns corresponding to the PARTITION BY keys + vector partition_key_ids; + //! The PARTITION BY key types + vector partition_key_types; + //! The number of PARTITION BY keys + idx_t partition_key_count = 0; + //! Whether single-key hash groups can skip sorting + bool can_bypass_single_key_sort = false; //! Are we creating a dummy payload column? bool force_payload = false; // Key columns that must be computed diff --git a/src/duckdb/src/include/duckdb/common/sorting/partition_key_tracker.hpp b/src/duckdb/src/include/duckdb/common/sorting/partition_key_tracker.hpp new file mode 100644 index 000000000..401ff2840 --- /dev/null +++ b/src/duckdb/src/include/duckdb/common/sorting/partition_key_tracker.hpp @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/sorting/partition_key_tracker.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/types/data_chunk.hpp" +#include "duckdb/common/types/row/partitioned_tuple_data.hpp" + +namespace duckdb { + +class PartitionKeyTracker { +public: + PartitionKeyTracker(Allocator &allocator, const vector &key_types); + + void Reset(idx_t radix_bits); + bool CanBypass(idx_t hash_bin) const; + void Update(DataChunk &keys, Vector &hashes, PartitionedTupleDataAppendState &append_state, idx_t count); + void Combine(const PartitionKeyTracker &other); + +private: + enum class PartitionKeyTrackerState : uint8_t { EMPTY, SINGLE_KEY, MULTIPLE_KEYS }; + + void StoreRepresentative(DataChunk &keys, idx_t row_idx, hash_t hash, idx_t bin_idx); + void StoreRepresentative(const PartitionKeyTracker &source, idx_t source_bin, idx_t target_bin); + void MarkMixed(idx_t bin_idx); + + template + idx_t BuildCandidates(DataChunk &keys, Vector &input_hashes, PartitionedTupleDataAppendState &append_state, + idx_t count); + idx_t CompactCandidates(idx_t candidate_count); + void CompareCandidates(DataChunk &keys, idx_t candidate_count); + void CombineBin(const PartitionKeyTracker &source, idx_t bin_idx, idx_t &candidate_count); + idx_t CompactTrackerCandidates(idx_t candidate_count); + void CompareTrackerCandidates(const PartitionKeyTracker &source, idx_t candidate_count); + +private: + idx_t key_count; + idx_t radix_bits = 0; + unsafe_vector states; + unsafe_vector hashes; + DataChunk representatives; + SelectionVector single_value_sel; + SelectionVector candidate_input_sel; + SelectionVector candidate_rep_sel; + SelectionVector mismatch_sel; +}; + +class RepartitionKeyTracker : public PartitionedTupleDataRepartitionKeyTracker { +public: + RepartitionKeyTracker(Allocator &allocator, PartitionKeyTracker &tracker, const vector &key_types, + const vector &partition_key_ids); + + void RepartitionChunk(TupleDataCollection &source_partition, TupleDataChunkState &source_chunk, + PartitionedTupleDataAppendState &target_append, idx_t count) override; + +private: + PartitionKeyTracker &tracker; + const vector &partition_key_ids; + DataChunk keys; + TupleDataChunkState key_gather_state; +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/string_util.hpp b/src/duckdb/src/include/duckdb/common/string_util.hpp index 71560fc5d..727e39b88 100644 --- a/src/duckdb/src/include/duckdb/common/string_util.hpp +++ b/src/duckdb/src/include/duckdb/common/string_util.hpp @@ -14,7 +14,7 @@ #include "duckdb/common/pair.hpp" #include "duckdb/common/set.hpp" #include "duckdb/common/vector.hpp" -#include "duckdb/common/complex_json.hpp" +#include "duckdb/common/unordered_map.hpp" #include "duckdb/common/exception/parser_exception.hpp" #include @@ -316,12 +316,12 @@ class StringUtil { static bool Equals(const string_t &s1, const char *s2); static bool Equals(const char *s1, const string_t &s2); - //! JSON method that parses a { string: value } JSON blob + //! JSON method that parses a { string: value } JSON blob into a flat key -> value map. Nested objects/arrays are + //! kept as their (re-serialized) JSON string value. //! NOTE: this method is not efficient //! NOTE: this method is used in Exception construction - as such it does NOT throw on invalid JSON, instead an //! empty map is returned - //! Parses complex (i.e., nested) Json maps, it also parses invalid JSONs, as a pure string. - DUCKDB_API static unique_ptr ParseJSONMap(const string &json, bool ignore_errors = false); + DUCKDB_API static unordered_map ParseJSONMap(const string &json, bool ignore_errors = false); //! JSON method that constructs a { string: value } JSON map //! This is the inverse of ParseJSONMap @@ -331,8 +331,6 @@ class StringUtil { //! Transforms an unordered map to a JSON string DUCKDB_API static string ToJSONMap(const unordered_map &map); - //! Transforms an complex JSON to a JSON string - DUCKDB_API static string ToComplexJSONMap(const ComplexJSON &complex_json); DUCKDB_API static string ValidateJSON(const char *data, const idx_t &len); diff --git a/src/duckdb/src/include/duckdb/common/tree_renderer.hpp b/src/duckdb/src/include/duckdb/common/tree_renderer.hpp index a33ba3e23..a917e68d7 100644 --- a/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/tree_renderer.hpp @@ -17,6 +17,8 @@ namespace duckdb { +class BaseResultRenderer; +class ClientContext; class QueryProfiler; //! TreeRenderer renders a plan/operator tree (for EXPLAIN) or a query profiler's output in a particular format. @@ -30,13 +32,21 @@ class TreeRenderer { } public: - void ToStream(RenderTree &root, std::ostream &ss); - virtual void ToStreamInternal(RenderTree &root, std::ostream &ss) = 0; - //! Create a TreeRenderer for the given format name (e.g. "json", "text"). The name is matched case-insensitively - //! and throws if it is not recognized. Returns nullptr for formats that render no output (i.e. "no_output"). - //! This is the primary, name-based factory; new render formats are added here. + //! Render the tree into a BaseResultRenderer (e.g. a StringResultRenderer, or a highlighting-aware sink) + void ToStream(RenderTree &root, BaseResultRenderer &ss); + virtual void ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) = 0; + + //! Returns the sink to render into when printing this format's output directly. Only invoked when we are about + //! to print (the default renderer writes straight to the output stream), so it is never created for the + //! string-producing paths. Formats can override this to provide a highlighting-aware sink. + virtual unique_ptr GetPrintRenderer(); + //! Create a renderer for the given format, consulting the pluggable registry and configuring built-ins from the + //! client's "profiling_renderer_settings". Matched case-insensitively; throws if unknown, nullptr for "no_output". + static unique_ptr CreateRenderer(ClientContext &context, const string &name); + static unique_ptr CreateRenderer(ClientContext &context, const ProfilerPrintFormat &format); + + //! Create a built-in renderer without configuring it or consulting the registry (no ClientContext available) static unique_ptr CreateRenderer(const string &name); - //! Create a TreeRenderer for the given ProfilerPrintFormat (thin wrapper over the name-based factory). static unique_ptr CreateRenderer(const ProfilerPrintFormat &format); //! Generic configuration of the renderer: passes renderer settings (e.g. from the "profiling_renderer_settings" @@ -48,12 +58,12 @@ class TreeRenderer { virtual bool UsesRawKeyNames() { return false; } - virtual void Render(const ProfilingNode &op, std::ostream &ss) { + virtual void Render(const ProfilingNode &op, BaseResultRenderer &ss) { } - //! Render the profiler's output in this format. Only called when profiling is enabled. The base implementation - //! renders the profiling node tree; formats with richer output (text, JSON) override this. - virtual string RenderProfiler(const QueryProfiler &profiler); + //! Render the profiler's output into the given sink. Only called when profiling is enabled. The base + //! implementation renders the profiling node tree; formats with richer output (text, JSON) override this. + virtual void RenderProfiler(const QueryProfiler &profiler, BaseResultRenderer &ss); //! The message shown (in this format) when profiling is disabled. virtual string RenderProfilerDisabled(); }; diff --git a/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp b/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp index 9476107d5..832621e29 100644 --- a/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp @@ -33,12 +33,12 @@ class GRAPHVIZTreeRenderer : public TreeRenderer { string ToString(const ProfilingNode &op); string ToString(const Pipeline &op); - void Render(const LogicalOperator &op, std::ostream &ss); - void Render(const PhysicalOperator &op, std::ostream &ss); - void Render(const ProfilingNode &op, std::ostream &ss) override; - void Render(const Pipeline &op, std::ostream &ss); + void Render(const LogicalOperator &op, BaseResultRenderer &ss); + void Render(const PhysicalOperator &op, BaseResultRenderer &ss); + void Render(const ProfilingNode &op, BaseResultRenderer &ss) override; + void Render(const Pipeline &op, BaseResultRenderer &ss); - void ToStreamInternal(RenderTree &root, std::ostream &ss) override; + void ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) override; string RenderProfilerDisabled() override; }; diff --git a/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp b/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp index d19319149..48bec0f6e 100644 --- a/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp @@ -33,12 +33,12 @@ class HTMLTreeRenderer : public TreeRenderer { string ToString(const ProfilingNode &op); string ToString(const Pipeline &op); - void Render(const LogicalOperator &op, std::ostream &ss); - void Render(const PhysicalOperator &op, std::ostream &ss); - void Render(const ProfilingNode &op, std::ostream &ss) override; - void Render(const Pipeline &op, std::ostream &ss); + void Render(const LogicalOperator &op, BaseResultRenderer &ss); + void Render(const PhysicalOperator &op, BaseResultRenderer &ss); + void Render(const ProfilingNode &op, BaseResultRenderer &ss) override; + void Render(const Pipeline &op, BaseResultRenderer &ss); - void ToStreamInternal(RenderTree &root, std::ostream &ss) override; + void ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) override; string RenderProfilerDisabled() override; }; diff --git a/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp b/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp index 097b375b7..059329d43 100644 --- a/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp @@ -33,15 +33,15 @@ class JSONTreeRenderer : public TreeRenderer { string ToString(const ProfilingNode &op); string ToString(const Pipeline &op); - void Render(const LogicalOperator &op, std::ostream &ss); - void Render(const PhysicalOperator &op, std::ostream &ss); - void Render(const ProfilingNode &op, std::ostream &ss) override; - void Render(const Pipeline &op, std::ostream &ss); + void Render(const LogicalOperator &op, BaseResultRenderer &ss); + void Render(const PhysicalOperator &op, BaseResultRenderer &ss); + void Render(const ProfilingNode &op, BaseResultRenderer &ss) override; + void Render(const Pipeline &op, BaseResultRenderer &ss); - void ToStreamInternal(RenderTree &root, std::ostream &ss) override; + void ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) override; //! Profiler JSON output: the full query profile result tree (with query-level metrics) - string RenderProfiler(const QueryProfiler &profiler) override; + void RenderProfiler(const QueryProfiler &profiler, BaseResultRenderer &ss) override; string RenderProfilerDisabled() override; }; diff --git a/src/duckdb/src/include/duckdb/common/tree_renderer/mermaid_tree_renderer.hpp b/src/duckdb/src/include/duckdb/common/tree_renderer/mermaid_tree_renderer.hpp index d7ff98cb5..3cc9549e5 100644 --- a/src/duckdb/src/include/duckdb/common/tree_renderer/mermaid_tree_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/tree_renderer/mermaid_tree_renderer.hpp @@ -33,12 +33,12 @@ class MermaidTreeRenderer : public TreeRenderer { string ToString(const ProfilingNode &op); string ToString(const Pipeline &op); - void Render(const LogicalOperator &op, std::ostream &ss); - void Render(const PhysicalOperator &op, std::ostream &ss); - void Render(const ProfilingNode &op, std::ostream &ss) override; - void Render(const Pipeline &op, std::ostream &ss); + void Render(const LogicalOperator &op, BaseResultRenderer &ss); + void Render(const PhysicalOperator &op, BaseResultRenderer &ss); + void Render(const ProfilingNode &op, BaseResultRenderer &ss) override; + void Render(const Pipeline &op, BaseResultRenderer &ss); - void ToStreamInternal(RenderTree &root, std::ostream &ss) override; + void ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) override; string RenderProfilerDisabled() override; }; diff --git a/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp b/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp index 6cac2705a..f832cdde5 100644 --- a/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp @@ -75,15 +75,15 @@ class TextTreeRenderer : public TreeRenderer { string ToString(const ProfilingNode &op); string ToString(const Pipeline &op); - void Render(const LogicalOperator &op, std::ostream &ss); - void Render(const PhysicalOperator &op, std::ostream &ss); - void Render(const ProfilingNode &op, std::ostream &ss) override; - void Render(const Pipeline &op, std::ostream &ss); + void Render(const LogicalOperator &op, BaseResultRenderer &ss); + void Render(const PhysicalOperator &op, BaseResultRenderer &ss); + void Render(const ProfilingNode &op, BaseResultRenderer &ss) override; + void Render(const Pipeline &op, BaseResultRenderer &ss); - void ToStreamInternal(RenderTree &root, std::ostream &ss) override; + void ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) override; //! Profiler text output: the framed query tree (with phase timings, total time, etc.) - string RenderProfiler(const QueryProfiler &profiler) override; + void RenderProfiler(const QueryProfiler &profiler, BaseResultRenderer &ss) override; void Configure(const unordered_map &settings) override; @@ -97,9 +97,9 @@ class TextTreeRenderer : public TreeRenderer { private: string ExtraInfoSeparator(); - void RenderTopLayer(RenderTree &root, std::ostream &ss, idx_t y); - void RenderBoxContent(RenderTree &root, std::ostream &ss, idx_t y); - void RenderBottomLayer(RenderTree &root, std::ostream &ss, idx_t y); + void RenderTopLayer(RenderTree &root, BaseResultRenderer &ss, idx_t y); + void RenderBoxContent(RenderTree &root, BaseResultRenderer &ss, idx_t y); + void RenderBottomLayer(RenderTree &root, BaseResultRenderer &ss, idx_t y); bool CanSplitOnThisChar(char l); bool IsPadding(char l); diff --git a/src/duckdb/src/include/duckdb/common/tree_renderer/yaml_tree_renderer.hpp b/src/duckdb/src/include/duckdb/common/tree_renderer/yaml_tree_renderer.hpp index 85d8b7af9..f07c6acdc 100644 --- a/src/duckdb/src/include/duckdb/common/tree_renderer/yaml_tree_renderer.hpp +++ b/src/duckdb/src/include/duckdb/common/tree_renderer/yaml_tree_renderer.hpp @@ -23,18 +23,18 @@ class YAMLTreeRenderer : public TreeRenderer { string ToString(const ProfilingNode &op); string ToString(const Pipeline &op); - void Render(const LogicalOperator &op, std::ostream &ss); - void Render(const PhysicalOperator &op, std::ostream &ss); - void Render(const ProfilingNode &op, std::ostream &ss) override; - void Render(const Pipeline &op, std::ostream &ss); + void Render(const LogicalOperator &op, BaseResultRenderer &ss); + void Render(const PhysicalOperator &op, BaseResultRenderer &ss); + void Render(const ProfilingNode &op, BaseResultRenderer &ss) override; + void Render(const Pipeline &op, BaseResultRenderer &ss); - void ToStreamInternal(RenderTree &root, std::ostream &ss) override; + void ToStreamInternal(RenderTree &root, BaseResultRenderer &ss) override; bool UsesRawKeyNames() override { return false; } private: - void RenderRecursive(RenderTree &node, std::ostream &ss, idx_t depth, idx_t x, idx_t y); + void RenderRecursive(RenderTree &node, BaseResultRenderer &ss, idx_t depth, idx_t x, idx_t y); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp b/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp index 6e376fc2c..c36ceac3f 100644 --- a/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +++ b/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp @@ -10,6 +10,7 @@ #include "duckdb/common/fixed_size_map.hpp" #include "duckdb/common/optional_idx.hpp" +#include "duckdb/common/optional_ptr.hpp" #include "duckdb/common/perfect_map_set.hpp" #include "duckdb/common/types/row/tuple_data_allocator.hpp" #include "duckdb/common/types/row/tuple_data_collection.hpp" @@ -77,6 +78,14 @@ enum class PartitionedTupleDataType : uint8_t { RADIX }; +class PartitionedTupleDataRepartitionKeyTracker { +public: + virtual ~PartitionedTupleDataRepartitionKeyTracker() = default; + + virtual void RepartitionChunk(TupleDataCollection &source_partition, TupleDataChunkState &source_chunk, + PartitionedTupleDataAppendState &target_append, idx_t count) = 0; +}; + //! PartitionedTupleData represents partitioned row data, which serves as an interface for different types of //! partitioning, e.g., radix, hive class PartitionedTupleData { @@ -114,7 +123,8 @@ class PartitionedTupleData { //! Resets this PartitionedTupleData void Reset(); //! Repartition this PartitionedTupleData into the new PartitionedTupleData - void Repartition(ClientContext &context, PartitionedTupleData &new_partitioned_data); + void Repartition(ClientContext &context, PartitionedTupleData &new_partitioned_data, + optional_ptr key_tracker = nullptr); //! Unpins the data void Unpin(); //! Get the partitions in this PartitionedTupleData diff --git a/src/duckdb/src/include/duckdb/common/types/variant/variant_builder.hpp b/src/duckdb/src/include/duckdb/common/types/variant/variant_builder.hpp index 2d94fdbc6..3ca61f325 100644 --- a/src/duckdb/src/include/duckdb/common/types/variant/variant_builder.hpp +++ b/src/duckdb/src/include/duckdb/common/types/variant/variant_builder.hpp @@ -31,6 +31,7 @@ #include "duckdb/common/limits.hpp" #include +#include namespace duckdb { @@ -587,4 +588,16 @@ void BuildVariant(SOURCE &source, idx_t count, Vector &result) { result.Verify(); } +//! Build an all-NULL canonical (unshredded) VARIANT vector with no values/children/keys. Used as the +//! unshredded pool of a SHREDDED vector when a chunk has no leftover data: the pool is never consulted, so +//! only its structural validity matters. Far cheaper than BuildVariant (no per-row traversal / key finalize). +inline void BuildEmptyVariant(idx_t count, Vector &result) { + if (count == 0) { + return; + } + //! Every row is a (never-consulted) VARIANT NULL; the real row validity lives on the shredded component. The + //! pool is never read, so represent it as a single constant NULL (children come along as NULL for free). + ConstantVector::SetNull(result, count_t(count)); +} + } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp b/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp index 09928e893..787bb3e4a 100644 --- a/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp @@ -89,7 +89,7 @@ class UnboundIndex final : public Index { return GetCreateInfo().index_type; } const Identifier &GetIndexName() const override { - return GetCreateInfo().index_name; + return GetCreateInfo().GetIndexName(); } IndexConstraintType GetConstraintType() const override { return GetCreateInfo().constraint_type; diff --git a/src/duckdb/src/include/duckdb/execution/physical_operator.hpp b/src/duckdb/src/include/duckdb/execution/physical_operator.hpp index 76752c47f..c74ce5321 100644 --- a/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +++ b/src/duckdb/src/include/duckdb/execution/physical_operator.hpp @@ -75,7 +75,8 @@ class PhysicalOperator { return InsertionOrderPreservingMap(); } static void SetEstimatedCardinality(InsertionOrderPreservingMap &result, idx_t estimated_cardinality); - virtual string ToString(const ProfilerPrintFormat &format = ProfilerPrintFormat::Default()) const; + virtual string ToString(optional_ptr context = nullptr, + const ProfilerPrintFormat &format = ProfilerPrintFormat::Default()) const; void Print() const; virtual vector> GetChildren() const; diff --git a/src/duckdb/src/include/duckdb/function/scalar/crypto_hash.hpp b/src/duckdb/src/include/duckdb/function/scalar/crypto_hash.hpp new file mode 100644 index 000000000..1ffe1e989 --- /dev/null +++ b/src/duckdb/src/include/duckdb/function/scalar/crypto_hash.hpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// crypto_hash.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/encryption_state.hpp" +#include "duckdb/common/vector/string_vector.hpp" +#include "duckdb/execution/expression_executor_state.hpp" +#include "duckdb/function/scalar_function.hpp" +#include "duckdb/main/client_context.hpp" +#include "duckdb/main/database.hpp" + +namespace duckdb { +namespace crypto_hash_scalar { + +struct LocalState : public FunctionLocalState { + LocalState(shared_ptr encryption_util_p, CryptoHashFunction function) + : encryption_util(std::move(encryption_util_p)) { + D_ASSERT(encryption_util); + hash_state = encryption_util->CreateHashState(function); + D_ASSERT(hash_state); + } + + shared_ptr encryption_util; + unique_ptr hash_state; +}; + +template +unique_ptr InitLocalState(ExpressionState &state, const BoundFunctionExpression &, FunctionData *) { + auto &context = state.GetContext(); + auto &config = DBConfig::GetConfig(context); + if (!config.options.force_mbedtls && config.encryption_util && config.encryption_util->SupportsHash(FUNCTION)) { + return make_uniq(config.encryption_util, FUNCTION); + } + return make_uniq(context.db->GetMbedTLSUtil(config.options.force_mbedtls), FUNCTION); +} + +struct StringData { + StringData(CryptoHashState &hash_state, StringHeap &heap) : hash_state(hash_state), heap(heap) { + } + + CryptoHashState &hash_state; + StringHeap &heap; +}; + +struct NumberData { + explicit NumberData(CryptoHashState &hash_state) : hash_state(hash_state) { + } + + CryptoHashState &hash_state; +}; + +template +struct StringOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &, idx_t, StringData &data) { + auto hash = data.heap.EmptyString(CryptoHash::GetHexDigestSize(FUNCTION)); + data.hash_state.HashHex(const_data_ptr_cast(input.GetData()), input.GetSize(), hash.GetDataWriteable()); + hash.Finalize(); + return hash; + } +}; + +inline LocalState &GetLocalState(ExpressionState &state) { + return ExecuteFunctionState::GetFunctionState(state)->Cast(); +} + +} // namespace crypto_hash_scalar +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/main/extension_callback_manager.hpp b/src/duckdb/src/include/duckdb/main/extension_callback_manager.hpp index a3502a4c1..0b1c99219 100644 --- a/src/duckdb/src/include/duckdb/main/extension_callback_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_callback_manager.hpp @@ -22,6 +22,7 @@ class OperatorExtension; class OptimizerExtension; class ParserExtension; class PlannerExtension; +class ProfilerExtension; class StorageExtension; struct ExtensionCallbackRegistry; @@ -46,6 +47,7 @@ class ExtensionCallbackManager { void Register(shared_ptr extension); void Register(const string &name, shared_ptr extension); void Register(shared_ptr extension); + void Register(const string &name, shared_ptr extension); ExtensionCallbackIteratorHelper> OperatorExtensions() const; ExtensionCallbackIteratorHelper OptimizerExtensions() const; @@ -53,6 +55,7 @@ class ExtensionCallbackManager { ExtensionCallbackIteratorHelper PlannerExtensions() const; ExtensionCallbackIteratorHelper> ExtensionCallbacks() const; optional_ptr FindStorageExtension(const string &name) const; + optional_ptr FindProfilerExtension(const string &name) const; bool HasParserExtensions() const; private: diff --git a/src/duckdb/src/include/duckdb/main/profiler/profiler_print_format.hpp b/src/duckdb/src/include/duckdb/main/profiler/profiler_print_format.hpp index fcf9452b4..f659d6d2a 100644 --- a/src/duckdb/src/include/duckdb/main/profiler/profiler_print_format.hpp +++ b/src/duckdb/src/include/duckdb/main/profiler/profiler_print_format.hpp @@ -42,10 +42,6 @@ struct ProfilerPrintFormat { return ProfilerPrintFormat("mermaid"); } - //! Resolve a (case-insensitive) format name against the registry, returning its canonical form. Throws - //! InvalidInputException listing the valid format names when the name is not recognized. - static ProfilerPrintFormat FromString(const string &name); - bool operator==(const ProfilerPrintFormat &other) const { return format == other.format; } diff --git a/src/duckdb/src/include/duckdb/main/profiler_extension.hpp b/src/duckdb/src/include/duckdb/main/profiler_extension.hpp new file mode 100644 index 000000000..d22a83cd7 --- /dev/null +++ b/src/duckdb/src/include/duckdb/main/profiler_extension.hpp @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/main/profiler_extension.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/common.hpp" +#include "duckdb/main/extension_callback_manager.hpp" + +#include + +namespace duckdb { +struct DBConfig; +class ClientContext; +class TreeRenderer; + +//! Factory that creates and configures a TreeRenderer for a given client context +using create_tree_renderer_t = std::function(ClientContext &context)>; + +//! A ProfilerExtension registers a pluggable tree renderer for a profiler / EXPLAIN output format +class ProfilerExtension { +public: + //! Creates the renderer for the registered format name + create_tree_renderer_t create_renderer = nullptr; + + static void Register(DBConfig &config, const string &format_name, shared_ptr extension); + static optional_ptr Find(const ClientContext &context, const string &format_name); +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/main/query_profiler.hpp b/src/duckdb/src/include/duckdb/main/query_profiler.hpp index 41ae07c0b..89eadeaf9 100644 --- a/src/duckdb/src/include/duckdb/main/query_profiler.hpp +++ b/src/duckdb/src/include/duckdb/main/query_profiler.hpp @@ -29,6 +29,7 @@ namespace duckdb { +class BaseResultRenderer; class ClientContext; class ExpressionExecutor; class ProfilingNode; @@ -137,6 +138,8 @@ class QueryProfiler { DUCKDB_API string QueryTreeToString() const; DUCKDB_API void QueryTreeToStream(std::ostream &str) const; + //! Render the framed query tree (header, total time, phase timings, operator tree) into the given sink. + DUCKDB_API void RenderQueryTree(BaseResultRenderer &ss) const; DUCKDB_API void Print(); //! Render the profiler output as a string, formatted based on the given ProfilerPrintFormat (or the configured @@ -145,9 +148,9 @@ class QueryProfiler { //! Render the profiler output for the given profiler format name (e.g. "json", "query_tree"), handling the //! profiling-disabled and no-output cases. DUCKDB_API string ToString(const string &profiler_format_name) const; - //! Render the profiling node tree using the given renderer. Returns an empty string when there is no tree to - //! render. Called by TreeRenderer::RenderProfiler for the formats that render the node tree directly. - DUCKDB_API string RenderProfilingNodeTree(TreeRenderer &renderer) const; + //! Render the profiling node tree using the given renderer into the sink (renders nothing when there is no tree). + //! Called by TreeRenderer::RenderProfiler for the formats that render the node tree directly. + DUCKDB_API void RenderProfilingNodeTree(TreeRenderer &renderer, BaseResultRenderer &ss) const; // Sanitize a Value::MAP static Value JSONSanitize(const Value &input); @@ -166,9 +169,13 @@ class QueryProfiler { private: unique_ptr CreateTree(const PhysicalOperator &root, const idx_t depth = 0); - void Render(const ProfilingNode &node, std::ostream &str) const; - //! Render the profiler output via the given renderer (nullptr renders nothing), handling the disabled case. + void Render(const ProfilingNode &node, BaseResultRenderer &str) const; + //! Render the profiler output to a string via the given renderer (nullptr renders nothing), handling the disabled + //! case. Used for the programmatic / string paths. string RenderProfilerOutput(optional_ptr renderer) const; + //! Print the profiler output directly via the renderer's print sink (nullptr prints nothing), handling the + //! disabled case. Only used on the terminal-print paths. + void PrintProfilerOutput(optional_ptr renderer) const; private: ClientContext &context; diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index 50eea65bf..108ae8143 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -1781,6 +1781,18 @@ struct ProgressBarTimeSetting { static Value GetSetting(const ClientContext &context); }; +struct RegexMatchOperatorSemanticsSetting { + using RETURN_TYPE = RegexMatchOperatorSemantics; + static constexpr const char *Name = "regex_match_operator_semantics"; + static constexpr const char *Description = + "Configures whether regex match operators use partial or full string matching"; + static constexpr const char *InputType = "VARCHAR"; + static constexpr const char *DefaultValue = "partial"; + static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; + static constexpr idx_t SettingIndex = NEXT_SETTING_INDEX(); + static void OnSet(SettingCallbackInfo &info, Value &input); +}; + struct ScalarSubqueryErrorOnMultipleRowsSetting { using RETURN_TYPE = bool; static constexpr const char *Name = "scalar_subquery_error_on_multiple_rows"; diff --git a/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp b/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp index d511f123a..e156c1e54 100644 --- a/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +++ b/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp @@ -11,6 +11,7 @@ #include "duckdb/common/identifier.hpp" #include "duckdb/common/vector.hpp" #include "duckdb/parser/parsed_expression.hpp" +#include "duckdb/parser/qualified_name.hpp" #include "duckdb/parser/result_modifier.hpp" #include "duckdb/parser/keyword_helper.hpp" @@ -99,26 +100,32 @@ class FunctionExpression : public ParsedExpression { bool is_operator = false, bool export_state = false); public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } const Identifier &Catalog() const { - return catalog; + return qualified_name.Catalog(); } Identifier &CatalogMutable() { - return catalog; + return qualified_name.CatalogMutable(); } const Identifier &Schema() const { - return schema; + return qualified_name.Schema(); } Identifier &SchemaMutable() { - return schema; + return qualified_name.SchemaMutable(); } const Identifier &FunctionName() const { - return function_name; + return qualified_name.Name(); } Identifier &FunctionNameMutable() { - return function_name; + return qualified_name.NameMutable(); } void SetFunctionName(string function_name_p) { - function_name = Identifier(std::move(function_name_p)); + qualified_name.NameMutable() = Identifier(std::move(function_name_p)); } bool IsOperator() const { return is_operator; @@ -178,12 +185,8 @@ class FunctionExpression : public ParsedExpression { } private: - //! Catalog of the function - Identifier catalog; - //! Schema of the function - Identifier schema; - //! Function name - Identifier function_name; + //! Qualified name of the function (catalog.schema.name) + QualifiedName qualified_name; //! Whether or not the function is an operator, only used for rendering bool is_operator; //! List of arguments to the function diff --git a/src/duckdb/src/include/duckdb/parser/expression/type_expression.hpp b/src/duckdb/src/include/duckdb/parser/expression/type_expression.hpp index 1cf7b52e1..ce7f77e9d 100644 --- a/src/duckdb/src/include/duckdb/parser/expression/type_expression.hpp +++ b/src/duckdb/src/include/duckdb/parser/expression/type_expression.hpp @@ -11,6 +11,7 @@ #include "duckdb/common/identifier.hpp" #include "duckdb/common/vector.hpp" #include "duckdb/parser/parsed_expression.hpp" +#include "duckdb/parser/qualified_name.hpp" #include "duckdb/parser/keyword_helper.hpp" namespace duckdb { @@ -25,20 +26,26 @@ class TypeExpression : public ParsedExpression { TypeExpression(const string &type_name, vector> children); public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } const Identifier &GetTypeName() const { - return type_name; + return qualified_name.Name(); } const Identifier &GetSchema() const { - return schema; + return qualified_name.Schema(); } void SetSchema(Identifier new_schema) { - schema = std::move(new_schema); + qualified_name.SchemaMutable() = std::move(new_schema); } const Identifier &GetCatalog() const { - return catalog; + return qualified_name.Catalog(); } void SetCatalog(Identifier new_catalog) { - catalog = std::move(new_catalog); + qualified_name.CatalogMutable() = std::move(new_catalog); } const vector> &GetChildren() const { return children; @@ -63,10 +70,8 @@ class TypeExpression : public ParsedExpression { private: TypeExpression(); - //! Qualified name parts - Identifier catalog; - Identifier schema; - Identifier type_name; + //! Qualified name of the type (catalog.schema.name) + QualifiedName qualified_name; //! Children of the type expression (e.g. type parameters) vector> children; diff --git a/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp b/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp index ae76e407c..397ab3661 100644 --- a/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +++ b/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp @@ -67,23 +67,29 @@ class WindowExpression : public ParsedExpression { static ExpressionType WindowToExpressionType(const string &fun_name); public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } const Identifier &Catalog() const { - return catalog; + return qualified_name.Catalog(); } Identifier &CatalogMutable() { - return catalog; + return qualified_name.CatalogMutable(); } const Identifier &Schema() const { - return schema; + return qualified_name.Schema(); } Identifier &SchemaMutable() { - return schema; + return qualified_name.SchemaMutable(); } const Identifier &FunctionName() const { - return function_name; + return qualified_name.Name(); } Identifier &FunctionNameMutable() { - return function_name; + return qualified_name.NameMutable(); } const vector> &Partitions() const { return partitions; @@ -374,12 +380,8 @@ class WindowExpression : public ParsedExpression { } private: - //! Catalog of the aggregate function - Identifier catalog; - //! Schema of the aggregate function - Identifier schema; - //! Name of the aggregate function - Identifier function_name; + //! Qualified name of the aggregate function (catalog.schema.name) + QualifiedName qualified_name; //! The child expression of the main window function vector arguments; //! The set of expressions to partition by diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp index 3cea24761..0b2be8b89 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp @@ -11,6 +11,7 @@ #include "duckdb/common/identifier.hpp" #include "duckdb/common/enums/catalog_type.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" +#include "duckdb/parser/qualified_name.hpp" #include "duckdb/common/enums/on_entry_not_found.hpp" #include "duckdb/catalog/dependency_list.hpp" @@ -34,14 +35,24 @@ enum class AlterBindMode { BIND_ON_ALTER, SKIP_BINDING }; struct AlterEntryData { AlterEntryData() { } - AlterEntryData(Identifier catalog_p, Identifier schema_p, Identifier name_p, OnEntryNotFound if_not_found) - : catalog(std::move(catalog_p)), schema(std::move(schema_p)), name(std::move(name_p)), - if_not_found(if_not_found) { + AlterEntryData(QualifiedName qualified_name_p, OnEntryNotFound if_not_found) + : qualified_name(std::move(qualified_name_p)), if_not_found(if_not_found) { } - Identifier catalog; - Identifier schema; - Identifier name; + const Identifier &Catalog() const { + return qualified_name.Catalog(); + } + const Identifier &Schema() const { + return qualified_name.Schema(); + } + const Identifier &Name() const { + return qualified_name.Name(); + } + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + + QualifiedName qualified_name; OnEntryNotFound if_not_found; }; @@ -51,17 +62,12 @@ struct AlterInfo : public ParseInfo { public: AlterInfo(AlterType type, Identifier catalog, Identifier schema, Identifier name, OnEntryNotFound if_not_found); + AlterInfo(AlterType type, QualifiedName qualified_name, OnEntryNotFound if_not_found); ~AlterInfo() override; AlterType type; //! if exists OnEntryNotFound if_not_found; - //! Catalog name to alter - Identifier catalog; - //! Schema name to alter - Identifier schema; - //! Entry name to alter - Identifier name; //! Allow altering internal entries bool allow_internal; //! Determine whether to skip Bind @@ -69,6 +75,32 @@ struct AlterInfo : public ParseInfo { //! New dependencies for the altered entry (set during binding) unique_ptr new_dependencies; +public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } + const Identifier &Catalog() const { + return qualified_name.Catalog(); + } + Identifier &CatalogMutable() { + return qualified_name.CatalogMutable(); + } + const Identifier &Schema() const { + return qualified_name.Schema(); + } + Identifier &SchemaMutable() { + return qualified_name.SchemaMutable(); + } + const Identifier &Name() const { + return qualified_name.Name(); + } + Identifier &NameMutable() { + return qualified_name.NameMutable(); + } + public: virtual CatalogType GetCatalogType() const = 0; virtual unique_ptr Copy() const = 0; @@ -86,6 +118,9 @@ struct AlterInfo : public ParseInfo { protected: explicit AlterInfo(AlterType type); + + //! Qualified name of the entry to alter (catalog.schema.name) + QualifiedName qualified_name; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp index 606d8654a..297a1be8b 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp @@ -10,6 +10,7 @@ #include "duckdb/common/identifier.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" +#include "duckdb/parser/qualified_name.hpp" #include "duckdb/common/vector.hpp" #include "duckdb/common/unordered_map.hpp" #include "duckdb/common/types/value.hpp" @@ -27,12 +28,6 @@ struct CopyInfo : public ParseInfo { public: CopyInfo(); - //! The catalog name to copy to/from - Identifier catalog; - //! The schema name to copy to/from - Identifier schema; - //! The table name to copy to/from - Identifier table; //! List of columns to copy to/from vector select_list; //! Whether or not this is a copy to file (false) or copy from a file (true) @@ -52,6 +47,32 @@ struct CopyInfo : public ParseInfo { //! The SQL statement used instead of a table when copying data out to a file unique_ptr select_statement; +public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } + const Identifier &Catalog() const { + return qualified_name.Catalog(); + } + Identifier &CatalogMutable() { + return qualified_name.CatalogMutable(); + } + const Identifier &Schema() const { + return qualified_name.Schema(); + } + Identifier &SchemaMutable() { + return qualified_name.SchemaMutable(); + } + const Identifier &Table() const { + return qualified_name.Name(); + } + Identifier &TableMutable() { + return qualified_name.NameMutable(); + } + public: string CopyOptionsToString() const; @@ -62,6 +83,10 @@ struct CopyInfo : public ParseInfo { void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); + +private: + //! Qualified name of the table to copy to/from (catalog.schema.table) + QualifiedName qualified_name; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_collation_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_collation_info.hpp index bc3f85cba..d4f672ab4 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_collation_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_collation_info.hpp @@ -19,7 +19,12 @@ struct CreateCollationInfo : public CreateInfo { bool not_required_for_equality_p); //! The name of the collation - Identifier name; + const Identifier &GetCollationName() const { + return qualified_name.Name(); + } + void SetCollationName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! The collation function to push in case collation is required ScalarFunction function; //! Whether or not the collation can be combined with other collations. diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_coordinate_system_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_coordinate_system_info.hpp index 179dac63a..ad4025ceb 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_coordinate_system_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_coordinate_system_info.hpp @@ -19,7 +19,12 @@ struct CreateCoordinateSystemInfo : public CreateInfo { //! The name of the coordinate system //! This is typically in the format "AUTH:CODE", e.g. "OGC:CRS84" - Identifier name; + const Identifier &GetCoordinateSystemName() const { + return qualified_name.Name(); + } + void SetCoordinateSystemName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! The authority identifier of the coordinate system (e.g. "EPSG") string authority; diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_copy_function_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_copy_function_info.hpp index 97d77fb8a..363b49db9 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_copy_function_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_copy_function_info.hpp @@ -18,7 +18,12 @@ struct CreateCopyFunctionInfo : public CreateInfo { DUCKDB_API explicit CreateCopyFunctionInfo(CopyFunction function); //! Function name - Identifier name; + const Identifier &GetCopyFunctionName() const { + return qualified_name.Name(); + } + void SetCopyFunctionName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! The table function CopyFunction function; diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp index b42e9d3dd..3d304f246 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp @@ -29,13 +29,18 @@ struct FunctionDescription { struct CreateFunctionInfo : public CreateInfo { explicit CreateFunctionInfo(CatalogType type, Identifier schema = Identifier::DefaultSchema()); - //! Function name - Identifier name; //! The function name of which this function is an alias Identifier alias_of; //! Function description vector descriptions; + const Identifier &GetFunctionName() const { + return qualified_name.Name(); + } + void SetFunctionName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } + DUCKDB_API void CopyFunctionProperties(CreateFunctionInfo &other) const; }; diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp index 995654a3b..8795f8107 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp @@ -25,7 +25,12 @@ struct CreateIndexInfo : public CreateInfo { //! The table name of the underlying table Identifier table; //! The name of the index - Identifier index_name; + const Identifier &GetIndexName() const { + return qualified_name.Name(); + } + void SetIndexName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! Options values (WITH ...) case_insensitive_map_t options; diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp index 77c39c6e7..3e8e35b31 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp @@ -11,6 +11,7 @@ #include "duckdb/common/enums/catalog_type.hpp" #include "duckdb/common/identifier.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" +#include "duckdb/parser/qualified_name.hpp" #include "duckdb/common/enum_util.hpp" #include "duckdb/common/enums/on_create_conflict.hpp" #include "duckdb/common/types/value.hpp" @@ -26,18 +27,14 @@ struct CreateInfo : public ParseInfo { public: explicit CreateInfo(CatalogType type, Identifier schema = Identifier::DefaultSchema(), Identifier catalog_p = Identifier::InvalidCatalog()) - : ParseInfo(TYPE), type(type), catalog(std::move(catalog_p)), schema(std::move(schema)), - on_conflict(OnCreateConflict::ERROR_ON_CONFLICT), temporary(false), internal(false) { + : ParseInfo(TYPE), type(type), on_conflict(OnCreateConflict::ERROR_ON_CONFLICT), temporary(false), + internal(false), qualified_name(std::move(catalog_p), std::move(schema), Identifier()) { } ~CreateInfo() override { } //! The to-be-created catalog type CatalogType type; - //! The catalog name of the entry - Identifier catalog; - //! The schema name of the entry - Identifier schema; //! What to do on create conflict OnCreateConflict on_conflict; //! Whether or not the entry is temporary @@ -55,6 +52,26 @@ struct CreateInfo : public ParseInfo { //! Key-value tags with additional metadata InsertionOrderPreservingMap tags; +public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } + const Identifier &Catalog() const { + return qualified_name.Catalog(); + } + Identifier &CatalogMutable() { + return qualified_name.CatalogMutable(); + } + const Identifier &Schema() const { + return qualified_name.Schema(); + } + Identifier &SchemaMutable() { + return qualified_name.SchemaMutable(); + } + public: void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); @@ -71,6 +88,10 @@ struct CreateInfo : public ParseInfo { throw NotImplementedException("ToString not supported for this type of CreateInfo: '%s'", EnumUtil::ToString(info_type)); } + +protected: + //! Qualified name of the created entry (catalog.schema.name) + QualifiedName qualified_name; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp index 2dfee3158..9130ecacf 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp @@ -33,7 +33,12 @@ struct CreateSecretInfo : public CreateInfo { // NOLINT: work-around bug in clan //! (optionally) the provider of the secret credentials unique_ptr provider; //! (optionally) the name of the secret - Identifier name; + const Identifier &GetSecretName() const { + return qualified_name.Name(); + } + void SetSecretName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! (optionally) the scope of the secret unique_ptr scope; //! Named parameter list (if any) diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp index 066ef7e06..7bb2f049a 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp @@ -33,7 +33,12 @@ struct CreateSequenceInfo : public CreateInfo { CreateSequenceInfo(); //! Sequence name to create - Identifier name; + const Identifier &GetSequenceName() const { + return qualified_name.Name(); + } + void SetSequenceName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! Usage count of the sequence uint64_t usage_count; //! The increment value diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp index fcc3e723c..66e90a558 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp @@ -23,7 +23,12 @@ struct CreateTableInfo : public CreateInfo { DUCKDB_API CreateTableInfo(SchemaCatalogEntry &schema, Identifier name); //! Table name to insert to - Identifier table; + const Identifier &GetTableName() const { + return qualified_name.Name(); + } + void SetTableName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! List of columns of the table ColumnList columns; //! List of constraints on the table diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_trigger_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_trigger_info.hpp index 74bdacaab..68b6c213c 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_trigger_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_trigger_info.hpp @@ -20,7 +20,12 @@ struct CreateTriggerInfo : public CreateInfo { CreateTriggerInfo(); //! Trigger name - Identifier trigger_name; + const Identifier &GetTriggerName() const { + return qualified_name.Name(); + } + void SetTriggerName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! The table the trigger is on unique_ptr base_table; //! When the trigger fires (BEFORE/AFTER/INSTEAD OF) diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp index 3ff18a58c..0bf7fcefa 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp @@ -56,7 +56,12 @@ struct CreateTypeInfo : public CreateInfo { CreateTypeInfo(string name_p, LogicalType type_p, bind_logical_type_function_t bind_function_p = nullptr); //! Name of the Type - Identifier name; + const Identifier &GetTypeName() const { + return qualified_name.Name(); + } + void SetTypeName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! Logical Type LogicalType type; //! Used by create enum from query diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp index 604bf3462..f0cd9c2a6 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp @@ -25,7 +25,12 @@ struct CreateViewInfo : public CreateInfo { public: //! View name - Identifier view_name; + const Identifier &GetViewName() const { + return qualified_name.Name(); + } + void SetViewName(Identifier name) { + qualified_name.NameMutable() = std::move(name); + } //! Aliases of the view vector aliases; //! Return types diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp index 58dbff964..232c6b437 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp @@ -12,6 +12,7 @@ #include "duckdb/common/identifier.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" #include "duckdb/parser/parsed_data/extra_drop_info.hpp" +#include "duckdb/parser/qualified_name.hpp" #include "duckdb/common/enums/on_entry_not_found.hpp" namespace duckdb { @@ -27,12 +28,6 @@ struct DropInfo : public ParseInfo { //! The catalog type to drop CatalogType type; - //! Catalog name to drop from, if any - Identifier catalog; - //! Schema name to drop from, if any - Identifier schema; - //! Element name to drop - Identifier name; //! Ignore if the entry does not exist instead of failing OnEntryNotFound if_not_found = OnEntryNotFound::THROW_EXCEPTION; //! Cascade drop (drop all dependents instead of throwing an error if there @@ -43,12 +38,42 @@ struct DropInfo : public ParseInfo { //! Extra info related to this drop unique_ptr extra_drop_info; +public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } + const Identifier &Catalog() const { + return qualified_name.Catalog(); + } + Identifier &CatalogMutable() { + return qualified_name.CatalogMutable(); + } + const Identifier &Schema() const { + return qualified_name.Schema(); + } + Identifier &SchemaMutable() { + return qualified_name.SchemaMutable(); + } + const Identifier &Name() const { + return qualified_name.Name(); + } + Identifier &NameMutable() { + return qualified_name.NameMutable(); + } + public: virtual unique_ptr Copy() const; string ToString() const; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); + +private: + //! Qualified name of the entry to drop (catalog.schema.name) + QualifiedName qualified_name; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/parser/parser_options.hpp b/src/duckdb/src/include/duckdb/parser/parser_options.hpp index ab3df7151..94a725cb8 100644 --- a/src/duckdb/src/include/duckdb/parser/parser_options.hpp +++ b/src/duckdb/src/include/duckdb/parser/parser_options.hpp @@ -10,6 +10,7 @@ #include "duckdb/common/common.hpp" #include "duckdb/common/enums/allow_parser_override.hpp" +#include "duckdb/common/enums/regex_match_operator_semantics.hpp" #include "duckdb/common/optional_ptr.hpp" namespace duckdb { @@ -20,6 +21,7 @@ struct ParserCache; struct ParserOptions { bool preserve_identifier_case = true; bool integer_division = false; + RegexMatchOperatorSemantics regex_match_operator_semantics = RegexMatchOperatorSemantics::PARTIAL; idx_t max_expression_depth = 1000; optional_ptr extensions; AllowParserOverride parser_override_setting = AllowParserOverride::DEFAULT_OVERRIDE; diff --git a/src/duckdb/src/include/duckdb/parser/peg/inlined_grammar.hpp b/src/duckdb/src/include/duckdb/parser/peg/inlined_grammar.hpp index 4af010369..a2bedd748 100644 --- a/src/duckdb/src/include/duckdb/parser/peg/inlined_grammar.hpp +++ b/src/duckdb/src/include/duckdb/parser/peg/inlined_grammar.hpp @@ -911,13 +911,16 @@ const char INLINED_PEG_GRAMMAR[] = { "BetweenInLikeOpExpression <- BetweenClause / InClause / LikeClause\n" "LikeClause <- LikeVariations OtherOperatorExpression EscapeClause?\n" "EscapeClause <- 'ESCAPE' ComparisonExpression\n" - "LikeVariations <- SimilarToToken / ILikeToken / LikeToken / GlobToken / NotILikeOp / NotLikeOp / NotSimilarToOp\n" + "LikeVariations <- SimilarToToken / RegexInsensitiveMatchToken / RegexMatchToken / ILikeToken / LikeToken / GlobToken / NotILikeOp / NotLikeOp / NotRegexInsensitiveMatchOp / NotSimilarToOp\n" "LikeToken <- 'LIKE' / '~~'\n" "ILikeToken <- 'ILIKE' / '~~*'\n" "GlobToken <- 'GLOB' / '~~~'\n" - "SimilarToToken <- ('SIMILAR' 'TO') / '~'\n" + "SimilarToToken <- 'SIMILAR' 'TO'\n" + "RegexMatchToken <- '~'\n" + "RegexInsensitiveMatchToken <- '~*'\n" "NotILikeOp <- '!~~*'\n" "NotLikeOp <- '!~~'\n" + "NotRegexInsensitiveMatchOp <- '!~*'\n" "NotSimilarToOp <- '!~'\n" "InClause <- 'IN' InExpression\n" "InExpression <- InExpressionList / InSelectStatement / InContainsExpression\n" @@ -941,7 +944,7 @@ const char INLINED_PEG_GRAMMAR[] = { "StringOperator <- '^@' / '||'\n" "QualifiedOperator <- 'OPERATOR' Parens(QualifiedOperatorContents)\n" "QualifiedOperatorContents <- ColIdDot* AnyOp\n" - "AnyOp <- '!~~*' / '>>=' / '<<=' / '->>' / '!~~' / '~~*' / '~~~' / '!~' / '^@' / '||' / '&&' / '@>' / '<@' / '<=' / '>=' / '<>' / '!=' / '==' / '<<' / '>>' / '//' / '**' / '->' / '~~' / '+' / '-' / '*' / '/' / '%' / '^' / '<' / '>' / '=' / '&' / '|' / '~' / '!'\n" + "AnyOp <- '!~~*' / '>>=' / '<<=' / '->>' / '!~~' / '!~*' / '~~*' / '~~~' / '~*' / '!~' / '^@' / '||' / '&&' / '@>' / '<@' / '<=' / '>=' / '<>' / '!=' / '==' / '<<' / '>>' / '//' / '**' / '->' / '~~' / '+' / '-' / '*' / '/' / '%' / '^' / '<' / '>' / '=' / '&' / '|' / '~' / '!'\n" "# LEVEL 9\n" "BitwiseExpression <- AdditiveExpression BitwiseExpressionTail*\n" "BitwiseExpressionTail <- BitOperator AdditiveExpression\n" diff --git a/src/duckdb/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp b/src/duckdb/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp index 50eb27c1f..047f8c9f6 100644 --- a/src/duckdb/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp +++ b/src/duckdb/src/include/duckdb/parser/peg/transformer/peg_transformer.hpp @@ -810,7 +810,7 @@ class PEGTransformerFactory { static unique_ptr TransformRowTypeInternal(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformRowType(PEGTransformer &transformer, - const child_list_t &col_id_type_list); + const optional> &col_id_type_list); static unique_ptr TransformSetofTypeInternal(PEGTransformer &transformer, ParseResult &parse_result); static unique_ptr TransformSetofType(PEGTransformer &transformer, const LogicalType &type); @@ -2410,12 +2410,21 @@ class PEGTransformerFactory { static unique_ptr TransformSimilarToTokenInternal(PEGTransformer &transformer, ParseResult &parse_result); static string TransformSimilarToToken(PEGTransformer &transformer); + static unique_ptr TransformRegexMatchTokenInternal(PEGTransformer &transformer, + ParseResult &parse_result); + static string TransformRegexMatchToken(PEGTransformer &transformer); + static unique_ptr TransformRegexInsensitiveMatchTokenInternal(PEGTransformer &transformer, + ParseResult &parse_result); + static string TransformRegexInsensitiveMatchToken(PEGTransformer &transformer); static unique_ptr TransformNotILikeOpInternal(PEGTransformer &transformer, ParseResult &parse_result); static string TransformNotILikeOp(PEGTransformer &transformer); static unique_ptr TransformNotLikeOpInternal(PEGTransformer &transformer, ParseResult &parse_result); static string TransformNotLikeOp(PEGTransformer &transformer); + static unique_ptr TransformNotRegexInsensitiveMatchOpInternal(PEGTransformer &transformer, + ParseResult &parse_result); + static string TransformNotRegexInsensitiveMatchOp(PEGTransformer &transformer); static unique_ptr TransformNotSimilarToOpInternal(PEGTransformer &transformer, ParseResult &parse_result); static string TransformNotSimilarToOp(PEGTransformer &transformer); diff --git a/src/duckdb/src/include/duckdb/parser/qualified_name.hpp b/src/duckdb/src/include/duckdb/parser/qualified_name.hpp index e19f62c88..8c0416fbe 100644 --- a/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +++ b/src/duckdb/src/include/duckdb/parser/qualified_name.hpp @@ -11,19 +11,84 @@ #include "duckdb/common/string.hpp" #include "duckdb/planner/binding_alias.hpp" #include "duckdb/parser/keyword_helper.hpp" +#include "duckdb/common/vector.hpp" namespace duckdb { struct QualifiedName { - Identifier catalog; - Identifier schema; - Identifier name; + QualifiedName() = default; + QualifiedName(Identifier catalog_p, Identifier schema_p, Identifier name_p) : name(std::move(name_p)) { + // store the catalog/schema as a schema path - in preparation for multi-level schema support + // for now we only support a single schema level, so the path is at most [catalog, schema] + if (!catalog_p.empty()) { + schema_path.push_back(std::move(catalog_p)); + schema_path.push_back(std::move(schema_p)); + } else if (!schema_p.empty()) { + schema_path.push_back(std::move(schema_p)); + } + } + + //! The catalog is the first element of the schema path, but only when the path is fully qualified (size 2) + const Identifier &Catalog() const { + return schema_path.size() == 2 ? schema_path[0] : empty; + } + Identifier &CatalogMutable() { + EnsureQualified(); + return schema_path[0]; + } + //! The schema is the last element of the schema path (or empty if there is no schema) + const Identifier &Schema() const { + if (schema_path.size() == 1) { + return schema_path[0]; + } + if (schema_path.size() == 2) { + return schema_path[1]; + } + return empty; + } + Identifier &SchemaMutable() { + EnsureQualified(); + return schema_path[1]; + } + const Identifier &Name() const { + return name; + } + Identifier &NameMutable() { + return name; + } + //! The full schema path of the qualified name (catalog/schema components) + const vector &SchemaPath() const { + return schema_path; + } //! Parse the (optional) schema and a name from a string in the format of e.g. "schema"."table"; if there is no dot //! the schema will be set to INVALID_SCHEMA static QualifiedName Parse(const string &input); static vector ParseComponents(const string &input); string ToString() const; + + hash_t Hash() const; + bool operator==(const QualifiedName &rhs) const; + bool operator!=(const QualifiedName &rhs) const; + +private: + //! Normalize the schema path to be fully qualified ([catalog, schema]) so that CatalogMutable()/SchemaMutable() + //! return stable references - the catalog lives at [0] and the schema at [1] + void EnsureQualified() { + if (schema_path.empty()) { + schema_path.resize(2); + } else if (schema_path.size() == 1) { + schema_path.insert(schema_path.begin(), Identifier()); + } + } + +private: + //! The schema path (catalog/schema). For now at most [catalog, schema] (single schema level). + vector schema_path; + //! The name of the entry + Identifier name; + //! Always-empty identifier, returned by the accessors when a catalog/schema component is absent + Identifier empty; }; struct QualifiedColumnName { diff --git a/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp b/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp index 89ae24aea..0e76786a9 100644 --- a/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +++ b/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp @@ -11,6 +11,7 @@ #include "duckdb/main/table_description.hpp" #include "duckdb/parser/tableref.hpp" #include "duckdb/parser/tableref/at_clause.hpp" +#include "duckdb/parser/qualified_name.hpp" namespace duckdb { @@ -21,28 +22,53 @@ class BaseTableRef : public TableRef { public: BaseTableRef() - : TableRef(TableReferenceType::BASE_TABLE), catalog_name(INVALID_CATALOG), schema_name(INVALID_SCHEMA) { + : TableRef(TableReferenceType::BASE_TABLE), + qualified_name(Identifier(INVALID_CATALOG), Identifier(INVALID_SCHEMA), Identifier()) { } explicit BaseTableRef(const TableDescription &description) - : TableRef(TableReferenceType::BASE_TABLE), catalog_name(description.database), schema_name(description.schema), - table_name(description.table) { + : TableRef(TableReferenceType::BASE_TABLE), + qualified_name(description.database, description.schema, description.table) { } - //! The catalog name. - Identifier catalog_name; - //! The schema name. - Identifier schema_name; - //! The table name. - Identifier table_name; //! The timestamp/version at which to read this table entry (if any) unique_ptr at_clause; +public: + const QualifiedName &GetQualifiedName() const { + return qualified_name; + } + QualifiedName &GetQualifiedNameMutable() { + return qualified_name; + } + const Identifier &Catalog() const { + return qualified_name.Catalog(); + } + Identifier &CatalogMutable() { + return qualified_name.CatalogMutable(); + } + const Identifier &Schema() const { + return qualified_name.Schema(); + } + Identifier &SchemaMutable() { + return qualified_name.SchemaMutable(); + } + const Identifier &Table() const { + return qualified_name.Name(); + } + Identifier &TableMutable() { + return qualified_name.NameMutable(); + } + public: string ToString() const override; bool Equals(const TableRef &other_p) const override; unique_ptr Copy() override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &source); + +private: + //! Qualified name of the base table (catalog.schema.table). + QualifiedName qualified_name; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/binder.hpp b/src/duckdb/src/include/duckdb/planner/binder.hpp index 57ac66174..9a1e389b0 100644 --- a/src/duckdb/src/include/duckdb/planner/binder.hpp +++ b/src/duckdb/src/include/duckdb/planner/binder.hpp @@ -63,6 +63,7 @@ class BoundAtClause; struct CreateInfo; struct CreateTriggerInfo; +struct QualifiedName; struct BoundCreateTableInfo; struct BoundOnConflictInfo; struct CommonTableExpressionInfo; @@ -311,6 +312,7 @@ class Binder : public enable_shared_from_this { void BindVacuumTable(LogicalVacuum &vacuum, unique_ptr &root); static void BindSchemaOrCatalog(ClientContext &context, Identifier &catalog, Identifier &schema); + static void BindSchemaOrCatalog(ClientContext &context, QualifiedName &qualified_name); void BindLogicalType(LogicalType &type); @@ -478,11 +480,15 @@ class Binder : public enable_shared_from_this { TableCatalogEntry &table, TriggerEventType event_type); BoundStatement ExpandRowTriggers(QueryNode &node, vector> &returning_list, const TableCatalogEntry &table, - const vector> &triggers); - //! Registers NEW as a generic binding so child binders resolve NEW.col at depth=1. The returned binder is - //! pushed onto GetActiveBinders(). the caller must keep it alive until the matching pop_back(). - unique_ptr SetupNewRowScope(TableIndex table_index, const vector &col_names, - const vector &col_types); + const vector> &triggers, + TriggerEventType event_type); + //! Registers a row scope binding (named "new" for INSERT, "old" for DELETE) so child binders resolve + //! NEW.col / OLD.col at depth=1. The returned binder is pushed onto GetActiveBinders(). + //! The caller must keep it alive until the matching pop_back(). + unique_ptr SetupRowScope(TableIndex table_index, const vector &col_names, + const vector &col_types, const string &scope_name); + //! Returns the correlated-column scope name for a given event type ("new" for INSERT, "old" for DELETE). + static string RowScopeName(TriggerEventType event_type); BoundStatement BindNode(UpdateQueryNode &node); BoundStatement BindNode(DeleteQueryNode &node); BoundStatement BindNode(MergeQueryNode &node); @@ -576,6 +582,9 @@ class Binder : public enable_shared_from_this { //! If only a schema name is provided (e.g. "a.b") then figure out if "a" is a schema or a catalog name void BindSchemaOrCatalog(Identifier &catalog_name, Identifier &schema_name); static void BindSchemaOrCatalog(CatalogEntryRetriever &retriever, Identifier &catalog, Identifier &schema); + //! Resolve the (optional) schema/catalog of a qualified name in-place, overwriting it with the resolved name + void BindSchemaOrCatalog(QualifiedName &qualified_name); + static void BindSchemaOrCatalog(CatalogEntryRetriever &retriever, QualifiedName &qualified_name); Identifier BindCatalog(const Identifier &catalog_name); SchemaCatalogEntry &BindCreateSchema(CreateInfo &info); diff --git a/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp b/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp index 06dd83125..54b33c706 100644 --- a/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +++ b/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp @@ -8,7 +8,6 @@ #pragma once -#include "duckdb/common/case_insensitive_map.hpp" #include "duckdb/parser/expression_map.hpp" #include "duckdb/planner/expression_binder.hpp" @@ -34,6 +33,8 @@ class BaseSelectBinder : public ExpressionBinder { this->bound_columns.clear(); } + static bool IsFunctionallyDependent(const unique_ptr &expr, const vector> &deps); + protected: BindResult BindExpression(unique_ptr &expr_ptr, idx_t depth, bool root_expression = false) override; diff --git a/src/duckdb/src/include/duckdb/planner/logical_operator.hpp b/src/duckdb/src/include/duckdb/planner/logical_operator.hpp index 55d35f717..e6286f433 100644 --- a/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +++ b/src/duckdb/src/include/duckdb/planner/logical_operator.hpp @@ -64,7 +64,8 @@ class LogicalOperator { virtual string GetName() const; virtual InsertionOrderPreservingMap ParamsToString() const; - virtual string ToString(const ProfilerPrintFormat &format = ProfilerPrintFormat::Default()) const; + virtual string ToString(optional_ptr context = nullptr, + const ProfilerPrintFormat &format = ProfilerPrintFormat::Default()) const; DUCKDB_API void Print(); //! Debug method: verify that the integrity of expressions & child nodes are maintained virtual void Verify(ClientContext &context); diff --git a/src/duckdb/src/main/client_context.cpp b/src/duckdb/src/main/client_context.cpp index 082cb569c..709b0ef75 100644 --- a/src/duckdb/src/main/client_context.cpp +++ b/src/duckdb/src/main/client_context.cpp @@ -1304,7 +1304,7 @@ void ClientContext::DisableProfiling() { void ClientContext::RegisterFunction(CreateFunctionInfo &info) { RunFunctionInTransaction([&]() { auto existing_function = Catalog::GetEntry( - *this, Identifier::InvalidCatalog(), info.schema, info.name, OnEntryNotFound::RETURN_NULL); + *this, Identifier::InvalidCatalog(), info.Schema(), info.GetFunctionName(), OnEntryNotFound::RETURN_NULL); if (existing_function) { auto &new_info = info.Cast(); if (new_info.functions.MergeFunctionSet(existing_function->functions)) { @@ -1540,6 +1540,7 @@ ParserOptions ClientContext::GetParserOptions() const { ParserOptions options; options.preserve_identifier_case = Settings::Get(*this); options.integer_division = Settings::Get(*this); + options.regex_match_operator_semantics = Settings::Get(*this); options.max_expression_depth = Settings::Get(*this); options.extensions = DBConfig::GetConfig(*this).GetCallbackManager(); options.parser_override_setting = Settings::Get(*this); diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index 879a313a5..5403fbae4 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -219,6 +219,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_LOCAL(ProfilingOutputSetting), DUCKDB_LOCAL(ProfilingRendererSettingsSetting), DUCKDB_LOCAL(ProgressBarTimeSetting), + DUCKDB_SETTING_CALLBACK(RegexMatchOperatorSemanticsSetting), DUCKDB_SETTING(ScalarSubqueryErrorOnMultipleRowsSetting), DUCKDB_SETTING(SchedulerProcessPartialSetting), DUCKDB_LOCAL(SchemaSetting), @@ -249,9 +250,9 @@ static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("confi DUCKDB_SETTING_ALIAS("memory_limit", 127), DUCKDB_SETTING_ALIAS("null_order", 60), DUCKDB_SETTING_ALIAS("profile_output", 150), - DUCKDB_SETTING_ALIAS("user", 167), + DUCKDB_SETTING_ALIAS("user", 168), DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 28), - DUCKDB_SETTING_ALIAS("worker_threads", 165), + DUCKDB_SETTING_ALIAS("worker_threads", 166), FINAL_ALIAS}; vector DBConfig::GetOptions() { diff --git a/src/duckdb/src/main/database_manager.cpp b/src/duckdb/src/main/database_manager.cpp index 45dd1dcc2..f779b93d3 100644 --- a/src/duckdb/src/main/database_manager.cpp +++ b/src/duckdb/src/main/database_manager.cpp @@ -132,8 +132,8 @@ shared_ptr DatabaseManager::AttachDatabase(ClientContext &cont throw BinderException("Database \"%s\" is already attached in %s mode, cannot re-attach in %s mode", info.name, existing_mode_str, attached_mode); } - if (!options.default_table.name.empty()) { - existing_db->GetCatalog().SetDefaultTable(options.default_table.schema, options.default_table.name); + if (!options.default_table.Name().empty()) { + existing_db->GetCatalog().SetDefaultTable(options.default_table.Schema(), options.default_table.Name()); } if (info.on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT) { // we require the vacuuming threshold for indexed tables to be the same as the already attached db @@ -214,8 +214,8 @@ shared_ptr DatabaseManager::AttachDatabase(ClientContext &cont attached_db->Initialize(context); } else { attached_db->Initialize(context); - if (!options.default_table.name.empty()) { - attached_db->GetCatalog().SetDefaultTable(options.default_table.schema, options.default_table.name); + if (!options.default_table.Name().empty()) { + attached_db->GetCatalog().SetDefaultTable(options.default_table.Schema(), options.default_table.Name()); } attached_db->FinalizeLoad(context); } @@ -289,7 +289,7 @@ void DatabaseManager::Alter(ClientContext &context, AlterInfo &info) { switch (db_info.alter_database_type) { case AlterDatabaseType::RENAME_DATABASE: { auto &rename_info = db_info.Cast(); - RenameDatabase(context, db_info.catalog, rename_info.new_name, db_info.if_not_found); + RenameDatabase(context, db_info.Catalog(), rename_info.new_name, db_info.if_not_found); break; } default: diff --git a/src/duckdb/src/main/extension/extension_loader.cpp b/src/duckdb/src/main/extension/extension_loader.cpp index b7210336e..d19503ab8 100644 --- a/src/duckdb/src/main/extension/extension_loader.cpp +++ b/src/duckdb/src/main/extension/extension_loader.cpp @@ -60,7 +60,7 @@ void ExtensionLoader::CreateSchema(const Identifier &name) const { auto data = CatalogTransaction::GetSystemTransaction(db); CreateSchemaInfo info; - info.schema = name; + info.SchemaMutable() = name; info.internal = true; // TODO; we can give the user more control here info.on_conflict = OnCreateConflict::ERROR_ON_CONFLICT; @@ -127,7 +127,7 @@ void ExtensionLoader::RegisterFunction(ScalarFunction function) { void ExtensionLoader::RegisterFunction(ScalarFunctionSet function) { CreateScalarFunctionInfo info(std::move(function)); - info.schema = loader_info.extension_schema; + info.SchemaMutable() = loader_info.extension_schema; info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT; RegisterFunction(std::move(info)); } @@ -135,8 +135,8 @@ void ExtensionLoader::RegisterFunction(ScalarFunctionSet function) { void ExtensionLoader::RegisterFunction(CreateScalarFunctionInfo function) { D_ASSERT(!function.functions.name.empty()); function.extension_name = GetRegisteredExtensionName(); - if (function.schema == DEFAULT_SCHEMA) { - function.schema = loader_info.extension_schema; + if (function.Schema() == DEFAULT_SCHEMA) { + function.SchemaMutable() = loader_info.extension_schema; } auto &system_catalog = Catalog::GetSystemCatalog(db); auto data = CatalogTransaction::GetSystemTransaction(db); @@ -151,15 +151,15 @@ void ExtensionLoader::RegisterFunction(AggregateFunction function) { void ExtensionLoader::RegisterFunction(AggregateFunctionSet function) { CreateAggregateFunctionInfo info(std::move(function)); - info.schema = loader_info.extension_schema; + info.SchemaMutable() = loader_info.extension_schema; info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT; RegisterFunction(std::move(info)); } void ExtensionLoader::RegisterFunction(CreateAggregateFunctionInfo function) { D_ASSERT(!function.functions.name.empty()); - if (function.schema == DEFAULT_SCHEMA) { - function.schema = loader_info.extension_schema; + if (function.Schema() == DEFAULT_SCHEMA) { + function.SchemaMutable() = loader_info.extension_schema; } function.extension_name = GetRegisteredExtensionName(); auto &system_catalog = Catalog::GetSystemCatalog(db); @@ -175,15 +175,15 @@ void ExtensionLoader::RegisterFunction(WindowFunction function) { void ExtensionLoader::RegisterFunction(WindowFunctionSet function) { CreateWindowFunctionInfo info(std::move(function)); - info.schema = loader_info.extension_schema; + info.SchemaMutable() = loader_info.extension_schema; info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT; RegisterFunction(std::move(info)); } void ExtensionLoader::RegisterFunction(CreateWindowFunctionInfo function) { D_ASSERT(!function.functions.name.empty()); - if (function.schema == DEFAULT_SCHEMA) { - function.schema = loader_info.extension_schema; + if (function.Schema() == DEFAULT_SCHEMA) { + function.SchemaMutable() = loader_info.extension_schema; } function.extension_name = GetRegisteredExtensionName(); auto &system_catalog = Catalog::GetSystemCatalog(db); @@ -206,7 +206,7 @@ void ExtensionLoader::RegisterFunction(TableFunction function) { void ExtensionLoader::RegisterFunction(TableFunctionSet function) { D_ASSERT(!function.name.empty()); CreateTableFunctionInfo info(std::move(function)); - info.schema = loader_info.extension_schema; + info.SchemaMutable() = loader_info.extension_schema; info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT; RegisterFunction(std::move(info)); } @@ -214,8 +214,8 @@ void ExtensionLoader::RegisterFunction(TableFunctionSet function) { void ExtensionLoader::RegisterFunction(CreateTableFunctionInfo info) { D_ASSERT(!info.functions.name.empty()); info.extension_name = GetRegisteredExtensionName(); - if (info.schema == DEFAULT_SCHEMA) { - info.schema = loader_info.extension_schema; + if (info.Schema() == DEFAULT_SCHEMA) { + info.SchemaMutable() = loader_info.extension_schema; } auto &system_catalog = Catalog::GetSystemCatalog(db); auto data = CatalogTransaction::GetSystemTransaction(db); @@ -233,7 +233,7 @@ void ExtensionLoader::RegisterFunction(PragmaFunctionSet function) { D_ASSERT(!function.name.empty()); CreatePragmaFunctionInfo info(std::move(function)); info.extension_name = GetRegisteredExtensionName(); - info.schema = loader_info.extension_schema; + info.SchemaMutable() = loader_info.extension_schema; auto &system_catalog = Catalog::GetSystemCatalog(db); auto data = CatalogTransaction::GetSystemTransaction(db); system_catalog.CreatePragmaFunction(data, info); @@ -242,7 +242,7 @@ void ExtensionLoader::RegisterFunction(PragmaFunctionSet function) { void ExtensionLoader::RegisterFunction(CopyFunction function) { CreateCopyFunctionInfo info(std::move(function)); info.extension_name = GetRegisteredExtensionName(); - info.schema = loader_info.extension_schema; + info.SchemaMutable() = loader_info.extension_schema; auto &system_catalog = Catalog::GetSystemCatalog(db); auto data = CatalogTransaction::GetSystemTransaction(db); system_catalog.CreateCopyFunction(data, info); @@ -250,8 +250,8 @@ void ExtensionLoader::RegisterFunction(CopyFunction function) { void ExtensionLoader::RegisterFunction(CreateMacroInfo &info) { info.extension_name = GetRegisteredExtensionName(); - if (info.schema == DEFAULT_SCHEMA) { - info.schema = loader_info.extension_schema; + if (info.Schema() == DEFAULT_SCHEMA) { + info.SchemaMutable() = loader_info.extension_schema; } auto &system_catalog = Catalog::GetSystemCatalog(db); auto data = CatalogTransaction::GetSystemTransaction(db); @@ -262,8 +262,8 @@ void ExtensionLoader::RegisterCollation(CreateCollationInfo &info) { info.extension_name = GetRegisteredExtensionName(); auto &system_catalog = Catalog::GetSystemCatalog(db); auto data = CatalogTransaction::GetSystemTransaction(db); - if (info.schema == DEFAULT_SCHEMA) { - info.schema = loader_info.extension_schema; + if (info.Schema() == DEFAULT_SCHEMA) { + info.SchemaMutable() = loader_info.extension_schema; } info.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT; system_catalog.CreateCollation(data, info); @@ -271,7 +271,7 @@ void ExtensionLoader::RegisterCollation(CreateCollationInfo &info) { // Also register as a function for serialisation CreateScalarFunctionInfo finfo(info.function); finfo.extension_name = GetRegisteredExtensionName(); - finfo.schema = loader_info.extension_schema; + finfo.SchemaMutable() = loader_info.extension_schema; finfo.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT; system_catalog.CreateFunction(data, finfo); } @@ -342,7 +342,7 @@ void ExtensionLoader::RegisterType(string type_name, LogicalType type, bind_logi info.temporary = true; info.internal = true; info.extension_name = GetRegisteredExtensionName(); - info.schema = loader_info.extension_schema; + info.SchemaMutable() = loader_info.extension_schema; auto &system_catalog = Catalog::GetSystemCatalog(db); auto data = CatalogTransaction::GetSystemTransaction(db); system_catalog.CreateType(data, info); diff --git a/src/duckdb/src/main/extension_callback_manager.cpp b/src/duckdb/src/main/extension_callback_manager.cpp index ff606ad70..35c2b18e4 100644 --- a/src/duckdb/src/main/extension_callback_manager.cpp +++ b/src/duckdb/src/main/extension_callback_manager.cpp @@ -5,6 +5,7 @@ #include "duckdb/planner/planner_extension.hpp" #include "duckdb/storage/storage_extension.hpp" #include "duckdb/planner/extension_callback.hpp" +#include "duckdb/main/profiler_extension.hpp" namespace duckdb { @@ -21,6 +22,8 @@ struct ExtensionCallbackRegistry { case_insensitive_map_t> storage_extensions; //! Set of callbacks that can be installed by extensions vector> extension_callbacks; + //! Pluggable profiler / EXPLAIN tree renderers, keyed by format name + case_insensitive_map_t> profiler_extensions; }; ExtensionCallbackManager &ExtensionCallbackManager::Get(ClientContext &context) { @@ -90,6 +93,13 @@ void ExtensionCallbackManager::Register(shared_ptr extension) callback_registry.atomic_store(new_registry); } +void ExtensionCallbackManager::Register(const string &name, shared_ptr extension) { + lock_guard guard(registry_lock); + auto new_registry = make_shared_ptr(*callback_registry); + new_registry->profiler_extensions[name] = std::move(extension); + callback_registry.atomic_store(new_registry); +} + template ExtensionCallbackIteratorHelper::ExtensionCallbackIteratorHelper( const vector &vec, shared_ptr callback_registry) @@ -139,6 +149,15 @@ optional_ptr ExtensionCallbackManager::FindStorageExtension(co return entry->second.get(); } +optional_ptr ExtensionCallbackManager::FindProfilerExtension(const string &name) const { + auto registry = callback_registry.atomic_load(); + auto entry = registry->profiler_extensions.find(name); + if (entry == registry->profiler_extensions.end()) { + return nullptr; + } + return entry->second.get(); +} + bool ExtensionCallbackManager::HasParserExtensions() const { auto registry = callback_registry.atomic_load(); return !registry->parser_extensions.empty(); @@ -173,6 +192,14 @@ void StorageExtension::Register(DBConfig &config, const string &extension_name, config.GetCallbackManager().Register(extension_name, std::move(extension)); } +void ProfilerExtension::Register(DBConfig &config, const string &format_name, shared_ptr extension) { + config.GetCallbackManager().Register(format_name, std::move(extension)); +} + +optional_ptr ProfilerExtension::Find(const ClientContext &context, const string &format_name) { + return ExtensionCallbackManager::Get(context).FindProfilerExtension(format_name); +} + template class ExtensionCallbackIteratorHelper>; template class ExtensionCallbackIteratorHelper>; template class ExtensionCallbackIteratorHelper; diff --git a/src/duckdb/src/main/query_profiler.cpp b/src/duckdb/src/main/query_profiler.cpp index 4ae991a35..03ba0f950 100644 --- a/src/duckdb/src/main/query_profiler.cpp +++ b/src/duckdb/src/main/query_profiler.cpp @@ -5,6 +5,7 @@ #include "duckdb/common/numeric_utils.hpp" #include "duckdb/common/optional_idx.hpp" #include "duckdb/common/printer.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/common/tree_renderer.hpp" #include "duckdb/common/tree_renderer/text_tree_renderer.hpp" @@ -17,13 +18,10 @@ #include "duckdb/main/profiler/gathered_metrics.hpp" #include "duckdb/main/settings.hpp" #include "duckdb/storage/buffer/buffer_pool.hpp" -#include "yyjson.hpp" -#include "yyjson_utils.hpp" +#include "duckdb/common/json_document.hpp" #include -using namespace duckdb_yyjson; // NOLINT - namespace duckdb { void QueryProfileResult::AddValue(const string &k, Value val) { @@ -82,13 +80,7 @@ bool QueryProfiler::IsEnabled() const { } unique_ptr QueryProfiler::CreateProfiler(const string &name) const { - // formats are resolved through the renderer registry, which matches case-insensitively and throws on - // unrecognized formats - "no_output" has no renderer, for which CreateRenderer returns nullptr - auto renderer = TreeRenderer::CreateRenderer(name); - if (renderer) { - renderer->Configure(ClientConfig::GetConfig(context).profiling_renderer_settings); - } - return renderer; + return TreeRenderer::CreateRenderer(context, name); } unique_ptr QueryProfiler::GetRenderer(const ProfilerPrintFormat &format) const { @@ -236,13 +228,14 @@ void QueryProfiler::EndQuery() { guard.unlock(); if (emit_output) { - string tree = ToString(); auto save_location = GetSaveLocation(); - if (save_location.empty()) { - Printer::Print(tree); + // print directly through the renderer's print sink + auto renderer = GetRenderer(); + PrintProfilerOutput(renderer.get()); Printer::Print("\n"); } else { + string tree = ToString(); WriteToFile(save_location.c_str(), tree); } } @@ -315,19 +308,33 @@ string QueryProfiler::RenderProfilerOutput(optional_ptr renderer) if (!IsEnabled()) { return renderer->RenderProfilerDisabled(); } - return renderer->RenderProfiler(*this); + StringResultRenderer ss; + renderer->RenderProfiler(*this, ss); + return ss.str(); +} + +void QueryProfiler::PrintProfilerOutput(optional_ptr renderer) const { + if (!renderer) { + // "no_output" format: nothing is rendered, enabled or not + return; + } + // only created now that we are actually printing + auto sink = renderer->GetPrintRenderer(); + if (!IsEnabled()) { + *sink << renderer->RenderProfilerDisabled(); + return; + } + renderer->RenderProfiler(*this, *sink); } -string QueryProfiler::RenderProfilingNodeTree(TreeRenderer &renderer) const { +void QueryProfiler::RenderProfilingNodeTree(TreeRenderer &renderer, BaseResultRenderer &ss) const { lock_guard guard(lock); // checking the tree to ensure the query is really empty // the query string is empty when a logical plan is deserialized if (query_metrics.query_sql.empty() || !root) { - return ""; + return; } - stringstream str; - renderer.Render(*root, str); - return str.str(); + renderer.Render(*root, ss); } OperatorProfiler::OperatorProfiler(ClientContext &context) : context(context) { @@ -531,28 +538,54 @@ static string RenderTiming(double timing) { } string QueryProfiler::QueryTreeToString() const { - duckdb::stringstream str; - QueryTreeToStream(str); - return str.str(); + StringResultRenderer ss; + RenderQueryTree(ss); + return ss.str(); } -void RenderPhaseTimings(std::ostream &ss, const pair &head, map &timings, idx_t width) { +// renders a centered line: the surrounding box-drawing and padding is layout, the text itself is a value +static void RenderPaddedValue(BaseResultRenderer &ss, const string &border_left, const string &padded, + const string &border_right) { + idx_t start = 0; + while (start < padded.size() && padded[start] == ' ') { + start++; + } + idx_t end = padded.size(); + while (end > start && padded[end - 1] == ' ') { + end--; + } + ss << border_left; + if (start > 0) { + ss << padded.substr(0, start); + } + if (end > start) { + ss.Render(ResultRenderType::VALUE, padded.substr(start, end - start)); + } + if (end < padded.size()) { + ss << padded.substr(end); + } + ss << border_right; +} + +void RenderPhaseTimings(BaseResultRenderer &ss, const pair &head, map &timings, + idx_t width) { ss << "┌────────────────────────────────────────────────┐\n"; - ss << "│" + QueryProfiler::DrawPadded(RenderTitleCase(head.first) + ": " + RenderTiming(head.second), width - 2) + - "│\n"; + RenderPaddedValue( + ss, "│", QueryProfiler::DrawPadded(RenderTitleCase(head.first) + ": " + RenderTiming(head.second), width - 2), + "│\n"); ss << "│┌──────────────────────────────────────────────┐│\n"; for (const auto &entry : timings) { - ss << "││" + - QueryProfiler::DrawPadded(RenderTitleCase(entry.first) + ": " + RenderTiming(entry.second), - width - 4) + - "││\n"; + RenderPaddedValue( + ss, "││", + QueryProfiler::DrawPadded(RenderTitleCase(entry.first) + ": " + RenderTiming(entry.second), width - 4), + "││\n"); } ss << "│└──────────────────────────────────────────────┘│\n"; ss << "└────────────────────────────────────────────────┘\n"; } -void PrintPhaseTimingsToStream(std::ostream &ss, const GatheredMetrics &info, idx_t width) { +void PrintPhaseTimingsToStream(BaseResultRenderer &ss, const GatheredMetrics &info, idx_t width) { map optimizer_timings; map planner_timings; map parser_timings; @@ -600,6 +633,12 @@ void PrintPhaseTimingsToStream(std::ostream &ss, const GatheredMetrics &info, id } void QueryProfiler::QueryTreeToStream(std::ostream &ss) const { + StringResultRenderer renderer; + RenderQueryTree(renderer); + ss << renderer.str(); +} + +void QueryProfiler::RenderQueryTree(BaseResultRenderer &ss) const { lock_guard guard(lock); bool show_query_name = false; @@ -609,10 +648,13 @@ void QueryProfiler::QueryTreeToStream(std::ostream &ss) const { } ss << "┌─────────────────────────────────────┐\n"; ss << "│┌───────────────────────────────────┐│\n"; - ss << "││ Query Profiling Information ││\n"; + RenderPaddedValue(ss, "││", " Query Profiling Information ", "││\n"); ss << "│└───────────────────────────────────┘│\n"; ss << "└─────────────────────────────────────┘\n"; - ss << (show_query_name ? StringUtil::Replace(query_metrics.query_sql, "\n", " ") : "") + "\n"; + if (show_query_name) { + ss.Render(ResultRenderType::VALUE, StringUtil::Replace(query_metrics.query_sql, "\n", " ")); + } + ss << "\n"; // checking the tree to ensure the query is really empty // the query string is empty when a logical plan is deserialized @@ -620,15 +662,18 @@ void QueryProfiler::QueryTreeToStream(std::ostream &ss) const { return; } + // the registered states write profiling info through an ostream - capture it and emit as layout text + duckdb::stringstream state_info; for (auto &state : context.registered_state->States()) { - state->WriteProfilingInformation(ss); + state->WriteProfilingInformation(state_info); } + ss << state_info.str(); constexpr idx_t TOTAL_BOX_WIDTH = 50; ss << "┌────────────────────────────────────────────────┐\n"; ss << "│┌──────────────────────────────────────────────┐│\n"; string total_time = "Total Time: " + RenderTiming(query_metrics.GetStringMetricInSeconds("query.total_time")); - ss << "││" + DrawPadded(total_time, TOTAL_BOX_WIDTH - 4) + "││\n"; + RenderPaddedValue(ss, "││", DrawPadded(total_time, TOTAL_BOX_WIDTH - 4), "││\n"); ss << "│└──────────────────────────────────────────────┘│\n"; ss << "└────────────────────────────────────────────────┘\n"; // render the main operator tree @@ -725,55 +770,53 @@ profiler_metrics_t OperatorMetrics::GetMetrics(const GatheredMetrics &info) cons return result; } -static yyjson_mut_val *ValueToJSON(yyjson_mut_doc *doc, const Value &val) { +static JSONMutableValue ValueToJSON(JSONWriter &writer, const Value &val) { if (val.IsNull()) { - return yyjson_mut_null(doc); + return writer.CreateNull(); } auto &type = val.type(); if (type.id() == LogicalTypeId::MAP) { // MAP values (e.g. extra_info) become JSON objects; multiline string values become arrays - auto obj = yyjson_mut_obj(doc); + auto obj = writer.CreateObject(); for (auto &child : MapValue::GetChildren(val)) { auto kv = StructValue::GetChildren(child); auto k = kv[0].GetValue(); auto v = kv[1].GetValue(); - auto key_ptr = yyjson_mut_get_str(yyjson_mut_strcpy(doc, k.c_str())); auto splits = StringUtil::Split(v, "\n"); if (splits.size() > 1) { - auto arr = yyjson_mut_arr(doc); + auto arr = writer.CreateArray(); for (auto &s : splits) { - yyjson_mut_arr_add_strcpy(doc, arr, s.c_str()); + arr.AppendString(s); } - yyjson_mut_obj_add_val(doc, obj, key_ptr, arr); + obj.Add(k, arr); } else { - yyjson_mut_obj_add_strcpy(doc, obj, key_ptr, v.c_str()); + obj.AddString(k, v); } } return obj; } if (type.IsIntegral()) { - return yyjson_mut_uint(doc, val.GetValue()); + return writer.CreateUnsignedInteger(val.GetValue()); } if (type.IsNumeric()) { - return yyjson_mut_real(doc, val.GetValue()); + return writer.CreateDouble(val.GetValue()); } - auto str = val.GetValue(); - return yyjson_mut_strncpy(doc, str.c_str(), str.size()); + return writer.CreateString(val.GetValue()); } -static yyjson_mut_val *QueryProfileResultToJSON(yyjson_mut_doc *doc, const QueryProfileResult &node) { +static JSONMutableValue QueryProfileResultToJSON(JSONWriter &writer, const QueryProfileResult &node) { switch (node.kind) { case QueryProfileResultKind::VALUE: - return ValueToJSON(doc, node.value); + return ValueToJSON(writer, node.value); case QueryProfileResultKind::LIST: { - auto arr = yyjson_mut_arr(doc); + auto arr = writer.CreateArray(); for (auto &child : node.children) { - yyjson_mut_arr_add_val(arr, QueryProfileResultToJSON(doc, *child)); + arr.Append(QueryProfileResultToJSON(writer, *child)); } return arr; } case QueryProfileResultKind::OBJECT: { - auto obj = yyjson_mut_obj(doc); + auto obj = writer.CreateObject(); // Sort children alphabetically by key for deterministic output vector> sorted_children; sorted_children.reserve(node.children.size()); @@ -789,8 +832,7 @@ static yyjson_mut_val *QueryProfileResultToJSON(yyjson_mut_doc *doc, const Query }); for (const QueryProfileResult &child : sorted_children) { D_ASSERT(!child.key.empty()); - auto key_ptr = yyjson_mut_get_str(yyjson_mut_strcpy(doc, child.key.c_str())); - yyjson_mut_obj_add_val(doc, obj, key_ptr, QueryProfileResultToJSON(doc, child)); + obj.Add(child.key, QueryProfileResultToJSON(writer, child)); } return obj; } @@ -799,16 +841,6 @@ static yyjson_mut_val *QueryProfileResultToJSON(yyjson_mut_doc *doc, const Query } } -static string StringifyAndFree(ConvertedJSONHolder &json_holder, yyjson_mut_val *object) { - json_holder.stringified_json = yyjson_mut_val_write_opts( - object, YYJSON_WRITE_ALLOW_INF_AND_NAN | YYJSON_WRITE_PRETTY, nullptr, nullptr, nullptr); - if (!json_holder.stringified_json) { - throw InternalException("The plan could not be rendered as JSON, yyjson failed"); - } - auto result = string(json_holder.stringified_json); - return result; -} - void QueryProfiler::ToLogInternal() const { if (!root) { return; @@ -982,12 +1014,10 @@ bool QueryProfiler::HasRoot() const { string QueryProfiler::ToJSON() const { lock_guard guard(lock); - ConvertedJSONHolder json_holder; - json_holder.doc = yyjson_mut_doc_new(nullptr); + JSONWriter writer; auto result = ToResultTree(); - auto root_val = QueryProfileResultToJSON(json_holder.doc, *result); - yyjson_mut_doc_set_root(json_holder.doc, root_val); - return StringifyAndFree(json_holder, root_val); + writer.SetRoot(QueryProfileResultToJSON(writer, *result)); + return writer.ToString(JSONWriteFlags::ALLOW_INF_AND_NAN | JSONWriteFlags::PRETTY); } void QueryProfiler::WriteToFile(const char *path, string &info) const { @@ -1039,14 +1069,16 @@ void QueryProfiler::Initialize(const PhysicalOperator &root_op) { } } -void QueryProfiler::Render(const ProfilingNode &node, std::ostream &ss) const { +void QueryProfiler::Render(const ProfilingNode &node, BaseResultRenderer &ss) const { TextTreeRenderer renderer; renderer.Configure(ClientConfig::GetConfig(context).profiling_renderer_settings); renderer.Render(node, ss); } void QueryProfiler::Print() { - Printer::Print(QueryTreeToString()); + // print the framed text query tree directly through the renderer's print sink + auto renderer = CreateProfiler("query_tree"); + PrintProfilerOutput(renderer.get()); } static void MergeOperatorMeasurements(ProfilingNode &root, OperatorMetrics &result) { diff --git a/src/duckdb/src/main/relation/create_table_relation.cpp b/src/duckdb/src/main/relation/create_table_relation.cpp index 895cd8097..bf509aede 100644 --- a/src/duckdb/src/main/relation/create_table_relation.cpp +++ b/src/duckdb/src/main/relation/create_table_relation.cpp @@ -28,9 +28,9 @@ BoundStatement CreateTableRelation::Bind(Binder &binder) { CreateStatement stmt; auto info = make_uniq(); - info->catalog = catalog_name; - info->schema = schema_name; - info->table = table_name; + info->CatalogMutable() = catalog_name; + info->SchemaMutable() = schema_name; + info->SetTableName(table_name); info->query = std::move(select); info->on_conflict = on_conflict; info->temporary = temporary; diff --git a/src/duckdb/src/main/relation/create_view_relation.cpp b/src/duckdb/src/main/relation/create_view_relation.cpp index 3fb7fbfc6..c7b9b9a7a 100644 --- a/src/duckdb/src/main/relation/create_view_relation.cpp +++ b/src/duckdb/src/main/relation/create_view_relation.cpp @@ -27,9 +27,9 @@ BoundStatement CreateViewRelation::Bind(Binder &binder) { CreateStatement stmt; auto info = make_uniq(); info->query = std::move(select); - info->view_name = view_name; + info->SetViewName(view_name); info->temporary = temporary; - info->schema = schema_name; + info->SchemaMutable() = schema_name; info->on_conflict = replace ? OnCreateConflict::REPLACE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; stmt.info = std::move(info); return binder.Bind(stmt.Cast()); diff --git a/src/duckdb/src/main/relation/delete_relation.cpp b/src/duckdb/src/main/relation/delete_relation.cpp index c7107a3ad..db5dfe31f 100644 --- a/src/duckdb/src/main/relation/delete_relation.cpp +++ b/src/duckdb/src/main/relation/delete_relation.cpp @@ -17,9 +17,7 @@ DeleteRelation::DeleteRelation(shared_ptr &context, unique BoundStatement DeleteRelation::Bind(Binder &binder) { auto basetable = make_uniq(); - basetable->catalog_name = catalog_name; - basetable->schema_name = schema_name; - basetable->table_name = table_name; + basetable->GetQualifiedNameMutable() = QualifiedName(catalog_name, schema_name, table_name); DeleteStatement stmt; auto &node = *stmt.node; diff --git a/src/duckdb/src/main/relation/table_relation.cpp b/src/duckdb/src/main/relation/table_relation.cpp index 6ad221df7..92d06f1bc 100644 --- a/src/duckdb/src/main/relation/table_relation.cpp +++ b/src/duckdb/src/main/relation/table_relation.cpp @@ -28,9 +28,8 @@ unique_ptr TableRelation::GetQueryNode() { unique_ptr TableRelation::GetTableRef() { auto table_ref = make_uniq(); - table_ref->schema_name = description->schema; - table_ref->table_name = description->table; - table_ref->catalog_name = description->database; + table_ref->GetQualifiedNameMutable() = + QualifiedName(description->database, description->schema, description->table); return std::move(table_ref); } diff --git a/src/duckdb/src/main/relation/update_relation.cpp b/src/duckdb/src/main/relation/update_relation.cpp index 8cb5c62f1..943dd1001 100644 --- a/src/duckdb/src/main/relation/update_relation.cpp +++ b/src/duckdb/src/main/relation/update_relation.cpp @@ -20,9 +20,7 @@ UpdateRelation::UpdateRelation(shared_ptr &context, unique BoundStatement UpdateRelation::Bind(Binder &binder) { auto basetable = make_uniq(); - basetable->catalog_name = catalog_name; - basetable->schema_name = schema_name; - basetable->table_name = table_name; + basetable->GetQualifiedNameMutable() = QualifiedName(catalog_name, schema_name, table_name); UpdateStatement stmt; auto &node = *stmt.node; diff --git a/src/duckdb/src/main/relation/view_relation.cpp b/src/duckdb/src/main/relation/view_relation.cpp index 5fbbf3d38..edc29be2a 100644 --- a/src/duckdb/src/main/relation/view_relation.cpp +++ b/src/duckdb/src/main/relation/view_relation.cpp @@ -38,8 +38,8 @@ unique_ptr ViewRelation::GetTableRef() { return premade_tableref->Copy(); } auto table_ref = make_uniq(); - table_ref->schema_name = schema_name; - table_ref->table_name = view_name; + table_ref->SchemaMutable() = schema_name; + table_ref->TableMutable() = view_name; return std::move(table_ref); } diff --git a/src/duckdb/src/main/settings/autogenerated_settings.cpp b/src/duckdb/src/main/settings/autogenerated_settings.cpp index b2090c098..536080d99 100644 --- a/src/duckdb/src/main/settings/autogenerated_settings.cpp +++ b/src/duckdb/src/main/settings/autogenerated_settings.cpp @@ -257,6 +257,16 @@ void PinThreadsSetting::OnSet(SettingCallbackInfo &info, Value ¶meter) { EnumUtil::FromString(StringValue::Get(parameter)); } +//===----------------------------------------------------------------------===// +// Regex Match Operator Semantics +//===----------------------------------------------------------------------===// +void RegexMatchOperatorSemanticsSetting::OnSet(SettingCallbackInfo &info, Value ¶meter) { + if (parameter.IsNull()) { + throw InvalidInputException("regex_match_operator_semantics setting cannot be NULL"); + } + EnumUtil::FromString(StringValue::Get(parameter)); +} + //===----------------------------------------------------------------------===// // Storage Block Prefetch //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/common_subplan_optimizer.cpp b/src/duckdb/src/optimizer/common_subplan_optimizer.cpp index 4adeaf0cd..d04bcc975 100644 --- a/src/duckdb/src/optimizer/common_subplan_optimizer.cpp +++ b/src/duckdb/src/optimizer/common_subplan_optimizer.cpp @@ -788,6 +788,7 @@ class CommonSubplanFinder { void ConvertSubplansToCTEs(Optimizer &optimizer, unique_ptr &op) { const auto sorted_subplans = GetSortedSubplans(); idx_t index = 1; + bool converted_subplans = false; for (auto &entry : sorted_subplans) { auto &subplan_info = entry.get().second; if (!ShouldMaterialize(subplan_info)) { @@ -956,6 +957,12 @@ class CommonSubplanFinder { } break; } + converted_subplans = true; + } + if (converted_subplans) { + // Subplan replacement changes child output bindings under existing positional projection maps. + // Invalidate them here; column lifetime runs again later and rebuilds the maps. + ClearProjectionMaps(*op); } } diff --git a/src/duckdb/src/optimizer/late_materialization_helper.cpp b/src/duckdb/src/optimizer/late_materialization_helper.cpp index f2d9f474c..d92e15290 100644 --- a/src/duckdb/src/optimizer/late_materialization_helper.cpp +++ b/src/duckdb/src/optimizer/late_materialization_helper.cpp @@ -1,9 +1,10 @@ #include "duckdb/optimizer/late_materialization_helper.hpp" +#include "duckdb/planner/filter/expression_filter.hpp" +#include "duckdb/planner/filter/table_filter_functions.hpp" namespace duckdb { unique_ptr LateMaterializationHelper::CreateLHSGet(const LogicalGet &rhs, Binder &binder) { - // we need to construct a new scan of the same table auto table_index = binder.GenerateTableIndex(); auto new_get = make_uniq(table_index, rhs.function, rhs.bind_data->Copy(), rhs.returned_types, rhs.names, rhs.virtual_columns); @@ -13,6 +14,18 @@ unique_ptr LateMaterializationHelper::CreateLHSGet(const LogicalGet new_get->named_parameters = rhs.named_parameters; new_get->input_table_types = rhs.input_table_types; new_get->input_table_names = rhs.input_table_names; + auto &column_ids = rhs.GetColumnIds(); + for (auto &filter_entry : rhs.table_filters) { + auto &expr_filter = filter_entry.Filter().Cast(); + if (ExpressionFilter::ContainsInternalFunction(*expr_filter.expr, DynamicFilterScalarFun::NAME)) { + continue; + } + auto col_idx = column_ids[filter_entry.GetIndex()].GetPrimaryIndex(); + auto &col_type = rhs.returned_types[col_idx]; + auto optional_expr = CreateOptionalFilterExpression(expr_filter.expr->Copy(), col_type); + new_get->table_filters.PushFilter(filter_entry.GetIndex(), + make_uniq(std::move(optional_expr))); + } return new_get; } diff --git a/src/duckdb/src/optimizer/remote_pushdown_optimizer.cpp b/src/duckdb/src/optimizer/remote_pushdown_optimizer.cpp index ae0ab7a89..4ee07e3c9 100644 --- a/src/duckdb/src/optimizer/remote_pushdown_optimizer.cpp +++ b/src/duckdb/src/optimizer/remote_pushdown_optimizer.cpp @@ -359,9 +359,9 @@ CatalogPushdownResult RemotePushdownOptimizer::RewriteNode(SelectNode &node) { CatalogPushdownResult RemotePushdownOptimizer::RewriteNode(InsertQueryNode &node) { // first bind the target table for the insert BaseTableRef target_ref; - target_ref.catalog_name = node.catalog; - target_ref.schema_name = node.schema; - target_ref.table_name = node.table; + target_ref.CatalogMutable() = node.catalog; + target_ref.SchemaMutable() = node.schema; + target_ref.TableMutable() = node.table; RemotePushdownOptimizer target_optimizer(this); auto result = target_optimizer.Rewrite(target_ref); @@ -697,7 +697,7 @@ void RemotePushdownOptimizer::TrackLocalTable(const BaseTableRef &ref) { if (!ref.alias.empty()) { local_table_names.insert(ref.alias); } else { - local_table_names.insert(ref.table_name); + local_table_names.insert(ref.Table()); } } @@ -731,14 +731,14 @@ bool RemotePushdownOptimizer::RefersToCTE(const Identifier &cte_name, CatalogPus CatalogPushdownResult RemotePushdownOptimizer::Rewrite(BaseTableRef &ref) { // Resolve schema_name-as-catalog ambiguity using the binder's own resolution logic - Identifier catalog_name = ref.catalog_name; - Identifier schema_name = ref.schema_name; + Identifier catalog_name = ref.Catalog(); + Identifier schema_name = ref.Schema(); Binder::BindSchemaOrCatalog(binder.context, catalog_name, schema_name); // Case 0: check if this is a CTE reference (must have no explicit catalog/schema) if (catalog_name.empty() && schema_name.empty()) { CatalogPushdownResult pushdown_result; - if (RefersToCTE(ref.table_name, pushdown_result)) { + if (RefersToCTE(ref.Table(), pushdown_result)) { if (pushdown_result.reference_type == CatalogReferenceType::UNKNOWN_CATALOG_REFERENCE) { // Local/unknown CTE - track as local for correlated subquery detection TrackLocalTable(ref); @@ -753,7 +753,7 @@ CatalogPushdownResult RemotePushdownOptimizer::Rewrite(BaseTableRef &ref) { if (catalog && catalog->Supports(RemoteCapability::EXECUTE_QUERY_NODE)) { // verify the table actually exists in the remote catalog - if it does not, fall back // to the binder so it can report a proper error message - EntryLookupInfo table_lookup(CatalogType::TABLE_ENTRY, ref.table_name); + EntryLookupInfo table_lookup(CatalogType::TABLE_ENTRY, ref.Table()); const auto &schema = schema_name.empty() ? Identifier(DEFAULT_SCHEMA) : schema_name; auto entry = Catalog::GetEntry(binder.context, catalog->GetName(), schema, table_lookup, OnEntryNotFound::RETURN_NULL); @@ -773,7 +773,7 @@ CatalogPushdownResult RemotePushdownOptimizer::Rewrite(BaseTableRef &ref) { // Case 2: no explicit catalog - lazily populate search path catalogs on first use FindRemoteCatalogsInSearchPath(); - EntryLookupInfo table_lookup(CatalogType::TABLE_ENTRY, ref.table_name); + EntryLookupInfo table_lookup(CatalogType::TABLE_ENTRY, ref.Table()); if (pushdown_state.remote_catalogs_in_search_path.size() != 1) { TrackLocalTable(ref); @@ -1059,11 +1059,11 @@ void RemotePushdownOptimizer::StripCatalogName(TableRef &ref, const Identifier & switch (ref.type) { case TableReferenceType::BASE_TABLE: { auto &base = ref.Cast(); - if (base.catalog_name == catalog_name) { - base.catalog_name = ""; - } else if (base.catalog_name.empty() && base.schema_name == catalog_name) { + if (base.Catalog() == catalog_name) { + base.CatalogMutable() = ""; + } else if (base.Catalog().empty() && base.Schema() == catalog_name) { // 2-part name (schema.table) where the schema is actually the catalog being pushed to - base.schema_name = ""; + base.SchemaMutable() = ""; } break; } diff --git a/src/duckdb/src/optimizer/window_self_join.cpp b/src/duckdb/src/optimizer/window_self_join.cpp index 087ec1313..41aeb9144 100644 --- a/src/duckdb/src/optimizer/window_self_join.cpp +++ b/src/duckdb/src/optimizer/window_self_join.cpp @@ -8,11 +8,41 @@ #include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/expression/bound_aggregate_expression.hpp" #include "duckdb/function/aggregate_state.hpp" +#include "duckdb/planner/expression_binder/base_select_binder.hpp" #include "duckdb/planner/logical_operator_deep_copy.hpp" namespace duckdb { -static unique_ptr TranslateAggregate(const BoundWindowExpression &w_expr) { +static bool IsOrderableDistinctAggregate(const BoundWindowExpression &w_expr) { + // If the aggregate is order-sensitive and distinct, + // then the ORDER BYs need to be functional dependencies of the arguments. + auto agg_func = *w_expr.AggregateFunction(); + if (agg_func.GetOrderDependent() != AggregateOrderDependent::ORDER_DEPENDENT || !w_expr.Distinct()) { + return true; + } + + const auto &arguments = w_expr.GetChildren(); + vector> arg_refs; + arg_refs.reserve(arguments.size()); + for (auto &arg : arguments) { + arg_refs.emplace_back(*arg); + } + bool in_args = true; + + if (!w_expr.ArgOrders().empty()) { + for (auto &order : w_expr.ArgOrders()) { + in_args &= BaseSelectBinder::IsFunctionallyDependent(order.expression, arg_refs); + } + } else if (!w_expr.OrderBy().empty()) { + for (auto &order : w_expr.OrderBy()) { + in_args &= BaseSelectBinder::IsFunctionallyDependent(order.expression, arg_refs); + } + } + + return in_args; +} + +static unique_ptr TranslateAggregate(ClientContext &client, const BoundWindowExpression &w_expr) { auto agg_func = *w_expr.AggregateFunction(); unique_ptr bind_info; if (w_expr.BindInfo()) { @@ -32,11 +62,16 @@ static unique_ptr TranslateAggregate(const BoundWindowExpression &w_ filter = w_expr.Filter()->Copy(); } - auto aggr_type = w_expr.Distinct() ? AggregateType::DISTINCT : AggregateType::NON_DISTINCT; - + const auto aggr_type = w_expr.Distinct() ? AggregateType::DISTINCT : AggregateType::NON_DISTINCT; + const auto aggr_ordered = (agg_func.GetOrderDependent() == AggregateOrderDependent::ORDER_DEPENDENT); auto result = make_uniq(std::move(agg_func), std::move(children), std::move(filter), std::move(bind_info), aggr_type); + if (!aggr_ordered) { + // ORDER BY is a NOP, so drop it. + return std::move(result); + } + if (!w_expr.ArgOrders().empty()) { result->GetOrderBysMutable() = make_uniq(); auto &orders = result->GetOrderBysMutable()->orders; @@ -112,6 +147,10 @@ bool WindowSelfJoinOptimizer::CanOptimize(const BoundWindowExpression &w_expr, return false; } + if (!IsOrderableDistinctAggregate(w_expr)) { + return false; + } + return true; } @@ -195,7 +234,7 @@ unique_ptr WindowSelfJoinOptimizer::OptimizeInternal(unique_ptr for (auto &expr : window.expressions) { auto &w_expr = expr->Cast(); - aggregates.emplace_back(TranslateAggregate(w_expr)); + aggregates.emplace_back(TranslateAggregate(optimizer.GetContext(), w_expr)); } // args: group_index, aggregate_index, ... diff --git a/src/duckdb/src/parser/expression/function_expression.cpp b/src/duckdb/src/parser/expression/function_expression.cpp index fcf4cbc55..dc6d3feab 100644 --- a/src/duckdb/src/parser/expression/function_expression.cpp +++ b/src/duckdb/src/parser/expression/function_expression.cpp @@ -16,8 +16,9 @@ FunctionExpression::FunctionExpression(Identifier catalog, Identifier schema, co vector> children_p, unique_ptr filter, unique_ptr order_bys_p, bool distinct, bool is_operator, bool export_state_p) - : ParsedExpression(ExpressionType::FUNCTION, ExpressionClass::FUNCTION), catalog(std::move(catalog)), - schema(std::move(schema)), function_name(StringUtil::Lower(function_name.GetIdentifierName())), + : ParsedExpression(ExpressionType::FUNCTION, ExpressionClass::FUNCTION), + qualified_name {std::move(catalog), std::move(schema), + Identifier(StringUtil::Lower(function_name.GetIdentifierName()))}, is_operator(is_operator), distinct(distinct), filter(std::move(filter)), order_bys(std::move(order_bys_p)), export_state(export_state_p) { arguments.reserve(children_p.size()); @@ -42,8 +43,9 @@ FunctionExpression::FunctionExpression(Identifier catalog_name, Identifier schem vector children, unique_ptr filter, unique_ptr order_bys_p, bool distinct, bool is_operator, bool export_state) - : ParsedExpression(ExpressionType::FUNCTION, ExpressionClass::FUNCTION), catalog(std::move(catalog_name)), - schema(std::move(schema_name)), function_name(StringUtil::Lower(function_name.GetIdentifierName())), + : ParsedExpression(ExpressionType::FUNCTION, ExpressionClass::FUNCTION), + qualified_name {std::move(catalog_name), std::move(schema_name), + Identifier(StringUtil::Lower(function_name.GetIdentifierName()))}, is_operator(is_operator), arguments(std::move(children)), distinct(distinct), filter(std::move(filter)), order_bys(std::move(order_bys_p)), export_state(export_state) { D_ASSERT(!function_name.empty()); @@ -64,26 +66,26 @@ string FunctionExpression::ToString() const { // built-in operator D_ASSERT(!distinct); if (arguments.size() == 1) { - if (StringUtil::Contains(function_name.GetIdentifierName(), "__postfix")) { + if (StringUtil::Contains(qualified_name.Name().GetIdentifierName(), "__postfix")) { return "((" + arguments[0].ToString() + ")" + - StringUtil::Replace(function_name.GetIdentifierName(), "__postfix", "") + ")"; + StringUtil::Replace(qualified_name.Name().GetIdentifierName(), "__postfix", "") + ")"; } - return function_name.GetIdentifierName() + "(" + arguments[0].ToString() + ")"; + return qualified_name.Name().GetIdentifierName() + "(" + arguments[0].ToString() + ")"; } if (arguments.size() == 2) { - return StringUtil::Format("(%s %s %s)", arguments[0].ToString(), function_name.GetIdentifierName(), + return StringUtil::Format("(%s %s %s)", arguments[0].ToString(), qualified_name.Name().GetIdentifierName(), arguments[1].ToString()); } } // standard function call string result; - if (!catalog.empty()) { - result += SQLIdentifier(catalog) + "."; + if (!qualified_name.Catalog().empty()) { + result += SQLIdentifier(qualified_name.Catalog()) + "."; } - if (!schema.empty()) { - result += SQLIdentifier(schema) + "."; + if (!qualified_name.Schema().empty()) { + result += SQLIdentifier(qualified_name.Schema()) + "."; } - result += SQLIdentifier(function_name); + result += SQLIdentifier(qualified_name.Name()); result += "("; if (distinct) { result += "DISTINCT "; @@ -118,12 +120,12 @@ string FunctionExpression::ToString() const { } void FunctionExpression::Verify() const { - D_ASSERT(!function_name.empty()); + D_ASSERT(!qualified_name.Name().empty()); } optional_ptr FunctionExpression::IsLambdaFunction() { // Ignore the ->> operator (JSON extension). - if (function_name == "->>") { + if (qualified_name.Name() == "->>") { return nullptr; } // Check the children for lambda expressions. @@ -137,8 +139,8 @@ optional_ptr FunctionExpression::IsLambdaFunction() { void FunctionExpression::Serialize(Serializer &serializer) const { ParsedExpression::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "function_name", function_name); - serializer.WritePropertyWithDefault(201, "schema", schema); + serializer.WritePropertyWithDefault(200, "function_name", qualified_name.Name()); + serializer.WritePropertyWithDefault(201, "schema", qualified_name.Schema()); if (!serializer.ShouldSerialize(StorageVersion::V2_0_0)) { // Legacy serialization. @@ -156,7 +158,7 @@ void FunctionExpression::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault(205, "distinct", distinct); serializer.WritePropertyWithDefault(206, "is_operator", is_operator); serializer.WritePropertyWithDefault(207, "export_state", export_state); - serializer.WritePropertyWithDefault(208, "catalog", catalog); + serializer.WritePropertyWithDefault(208, "catalog", qualified_name.Catalog()); if (serializer.ShouldSerialize(StorageVersion::V2_0_0)) { serializer.WritePropertyWithDefault>(209, "arguments", arguments); @@ -165,8 +167,8 @@ void FunctionExpression::Serialize(Serializer &serializer) const { unique_ptr FunctionExpression::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new FunctionExpression()); - deserializer.ReadPropertyWithDefault(200, "function_name", result->function_name); - deserializer.ReadPropertyWithDefault(201, "schema", result->schema); + deserializer.ReadPropertyWithDefault(200, "function_name", result->qualified_name.NameMutable()); + deserializer.ReadPropertyWithDefault(201, "schema", result->qualified_name.SchemaMutable()); // Legacy children deserialization vector> children; @@ -188,7 +190,7 @@ unique_ptr FunctionExpression::Deserialize(Deserializer &deser deserializer.ReadPropertyWithDefault(205, "distinct", result->distinct); deserializer.ReadPropertyWithDefault(206, "is_operator", result->is_operator); deserializer.ReadPropertyWithDefault(207, "export_state", result->export_state); - deserializer.ReadPropertyWithDefault(208, "catalog", result->catalog); + deserializer.ReadPropertyWithDefault(208, "catalog", result->qualified_name.CatalogMutable()); // New children deserialization if (children.empty()) { diff --git a/src/duckdb/src/parser/expression/type_expression.cpp b/src/duckdb/src/parser/expression/type_expression.cpp index 05aa8c9fc..5243d91dd 100644 --- a/src/duckdb/src/parser/expression/type_expression.cpp +++ b/src/duckdb/src/parser/expression/type_expression.cpp @@ -9,9 +9,11 @@ namespace duckdb { TypeExpression::TypeExpression(Identifier catalog, Identifier schema, Identifier type_name, vector> children_p) - : ParsedExpression(ExpressionType::TYPE, ExpressionClass::TYPE), catalog(std::move(catalog)), - schema(std::move(schema)), type_name(std::move(type_name)), children(std::move(children_p)) { - D_ASSERT(!this->type_name.empty()); + : ParsedExpression(ExpressionType::TYPE, ExpressionClass::TYPE), qualified_name {std::move(catalog), + std::move(schema), + std::move(type_name)}, + children(std::move(children_p)) { + D_ASSERT(!qualified_name.Name().empty()); } TypeExpression::TypeExpression(Identifier type_name, vector> children) @@ -28,11 +30,12 @@ TypeExpression::TypeExpression() : ParsedExpression(ExpressionType::TYPE, Expres string TypeExpression::ToString() const { string result; - if (!catalog.empty()) { - result += SQLIdentifier(catalog) + "."; + auto &type_name = qualified_name.Name(); + if (!qualified_name.Catalog().empty()) { + result += SQLIdentifier(qualified_name.Catalog()) + "."; } - if (!schema.empty()) { - result += SQLIdentifier(schema) + "."; + if (!qualified_name.Schema().empty()) { + result += SQLIdentifier(qualified_name.Schema()) + "."; } auto ¶ms = children; @@ -115,7 +118,7 @@ string TypeExpression::ToString() const { } void TypeExpression::Verify() const { - D_ASSERT(!type_name.empty()); + D_ASSERT(!qualified_name.Name().empty()); } } // namespace duckdb diff --git a/src/duckdb/src/parser/expression/window_expression.cpp b/src/duckdb/src/parser/expression/window_expression.cpp index a8de313b4..bc3de0c06 100644 --- a/src/duckdb/src/parser/expression/window_expression.cpp +++ b/src/duckdb/src/parser/expression/window_expression.cpp @@ -10,7 +10,8 @@ WindowExpression::WindowExpression() : ParsedExpression(ExpressionType::INVALID, vector> WindowExpression::SerializedChildren(Serializer &serializer) const { vector> result; idx_t nargs = arguments.size(); - if (!serializer.ShouldSerialize(StorageVersion::V2_0_0) && (function_name == "lead" || function_name == "lag")) { + if (!serializer.ShouldSerialize(StorageVersion::V2_0_0) && + (qualified_name.Name() == "lead" || qualified_name.Name() == "lag")) { nargs = 1; } @@ -23,7 +24,7 @@ vector> WindowExpression::SerializedChildren(Serial unique_ptr WindowExpression::SerializedOffset(Serializer &serializer) const { if (!serializer.ShouldSerialize(StorageVersion::V2_0_0) && arguments.size() > 1 && - (function_name == "lead" || function_name == "lag")) { + (qualified_name.Name() == "lead" || qualified_name.Name() == "lag")) { return arguments[1].GetExpression().Copy(); } @@ -32,7 +33,7 @@ unique_ptr WindowExpression::SerializedOffset(Serializer &seri unique_ptr WindowExpression::SerializedDefault(Serializer &serializer) const { if (!serializer.ShouldSerialize(StorageVersion::V2_0_0) && arguments.size() > 2 && - (function_name == "lead" || function_name == "lag")) { + (qualified_name.Name() == "lead" || qualified_name.Name() == "lag")) { return arguments[2].GetExpression().Copy(); } @@ -40,8 +41,9 @@ unique_ptr WindowExpression::SerializedDefault(Serializer &ser } WindowExpression::WindowExpression(const string &catalog_name, const string &schema, const string &function_name) - : ParsedExpression(WindowToExpressionType(function_name), ExpressionClass::WINDOW), catalog(catalog_name), - schema(schema), function_name(StringUtil::Lower(function_name)), ignore_nulls(false), distinct(false) { + : ParsedExpression(WindowToExpressionType(function_name), ExpressionClass::WINDOW), + qualified_name {Identifier(catalog_name), Identifier(schema), Identifier(StringUtil::Lower(function_name))}, + ignore_nulls(false), distinct(false) { } struct WindowFunctionDefinition { @@ -87,13 +89,13 @@ string WindowExpression::ExpressionTypeToWindow(ExpressionType expression_type) } void WindowExpression::SetFunctionName(const string &function_name_p) { - function_name = Identifier(function_name_p); - type = WindowToExpressionType(function_name.GetIdentifierName()); + qualified_name.NameMutable() = Identifier(function_name_p); + type = WindowToExpressionType(qualified_name.Name().GetIdentifierName()); } string WindowExpression::ToString() const { - return ToString(*this, schema.GetIdentifierName(), - function_name.GetIdentifierName()); + return ToString(*this, qualified_name.Schema().GetIdentifierName(), + qualified_name.Name().GetIdentifierName()); } bool WindowExpression::HasBoundedParts() const { @@ -124,9 +126,9 @@ bool WindowExpression::HasBoundedParts() const { void WindowExpression::Serialize(Serializer &serializer) const { ParsedExpression::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "function_name", function_name); - serializer.WritePropertyWithDefault(201, "schema", schema); - serializer.WritePropertyWithDefault(202, "catalog", catalog); + serializer.WritePropertyWithDefault(200, "function_name", qualified_name.Name()); + serializer.WritePropertyWithDefault(201, "schema", qualified_name.Schema()); + serializer.WritePropertyWithDefault(202, "catalog", qualified_name.Catalog()); if (!serializer.ShouldSerialize(StorageVersion::V2_0_0)) { // Legacy serialization. @@ -165,9 +167,9 @@ void WindowExpression::Serialize(Serializer &serializer) const { unique_ptr WindowExpression::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new WindowExpression()); - deserializer.ReadPropertyWithDefault(200, "function_name", result->function_name); - deserializer.ReadPropertyWithDefault(201, "schema", result->schema); - deserializer.ReadPropertyWithDefault(202, "catalog", result->catalog); + deserializer.ReadPropertyWithDefault(200, "function_name", result->qualified_name.NameMutable()); + deserializer.ReadPropertyWithDefault(201, "schema", result->qualified_name.SchemaMutable()); + deserializer.ReadPropertyWithDefault(202, "catalog", result->qualified_name.CatalogMutable()); // Legacy children deserialization vector> children; diff --git a/src/duckdb/src/parser/parsed_data/alter_database_info.cpp b/src/duckdb/src/parser/parsed_data/alter_database_info.cpp index 51e56f90c..7278a0931 100644 --- a/src/duckdb/src/parser/parsed_data/alter_database_info.cpp +++ b/src/duckdb/src/parser/parsed_data/alter_database_info.cpp @@ -30,7 +30,7 @@ RenameDatabaseInfo::RenameDatabaseInfo(Identifier catalog_p, Identifier new_name } unique_ptr RenameDatabaseInfo::Copy() const { - return make_uniq(catalog, new_name, if_not_found); + return make_uniq(Catalog(), new_name, if_not_found); } string RenameDatabaseInfo::ToString() const { @@ -39,7 +39,7 @@ string RenameDatabaseInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += StringUtil::Format("%s SET ALIAS TO %s", catalog, new_name); + result += StringUtil::Format("%s SET ALIAS TO %s", Catalog(), new_name); return result; } diff --git a/src/duckdb/src/parser/parsed_data/alter_info.cpp b/src/duckdb/src/parser/parsed_data/alter_info.cpp index 3aead3374..f8fff8aff 100644 --- a/src/duckdb/src/parser/parsed_data/alter_info.cpp +++ b/src/duckdb/src/parser/parsed_data/alter_info.cpp @@ -7,8 +7,13 @@ namespace duckdb { AlterInfo::AlterInfo(AlterType type, Identifier catalog_p, Identifier schema_p, Identifier name_p, OnEntryNotFound if_not_found) - : ParseInfo(TYPE), type(type), if_not_found(if_not_found), catalog(std::move(catalog_p)), - schema(std::move(schema_p)), name(std::move(name_p)), allow_internal(false) { + : ParseInfo(TYPE), type(type), if_not_found(if_not_found), allow_internal(false), + qualified_name(std::move(catalog_p), std::move(schema_p), std::move(name_p)) { +} + +AlterInfo::AlterInfo(AlterType type, QualifiedName qualified_name_p, OnEntryNotFound if_not_found) + : ParseInfo(TYPE), type(type), if_not_found(if_not_found), allow_internal(false), + qualified_name(std::move(qualified_name_p)) { } AlterInfo::AlterInfo(AlterType type) : ParseInfo(TYPE), type(type) { @@ -18,12 +23,7 @@ AlterInfo::~AlterInfo() { } AlterEntryData AlterInfo::GetAlterEntryData() const { - AlterEntryData data; - data.catalog = catalog; - data.schema = schema; - data.name = name; - data.if_not_found = if_not_found; - return data; + return AlterEntryData(GetQualifiedName(), if_not_found); } bool AlterInfo::IsAddPrimaryKey() const { diff --git a/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp b/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp index 7a596dbbb..84054f418 100644 --- a/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp @@ -7,8 +7,7 @@ namespace duckdb { // AlterScalarFunctionInfo //===--------------------------------------------------------------------===// AlterScalarFunctionInfo::AlterScalarFunctionInfo(AlterScalarFunctionType type, AlterEntryData data) - : AlterInfo(AlterType::ALTER_SCALAR_FUNCTION, std::move(data.catalog), std::move(data.schema), std::move(data.name), - data.if_not_found), + : AlterInfo(AlterType::ALTER_SCALAR_FUNCTION, std::move(data.qualified_name), data.if_not_found), alter_scalar_function_type(type) { } AlterScalarFunctionInfo::~AlterScalarFunctionInfo() { diff --git a/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp b/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp index e5e8a88ac..cc848b315 100644 --- a/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp @@ -6,8 +6,7 @@ namespace duckdb { // AlterTableFunctionInfo //===--------------------------------------------------------------------===// AlterTableFunctionInfo::AlterTableFunctionInfo(AlterTableFunctionType type, AlterEntryData data) - : AlterInfo(AlterType::ALTER_TABLE_FUNCTION, std::move(data.catalog), std::move(data.schema), std::move(data.name), - data.if_not_found), + : AlterInfo(AlterType::ALTER_TABLE_FUNCTION, std::move(data.qualified_name), data.if_not_found), alter_table_function_type(type) { } AlterTableFunctionInfo::~AlterTableFunctionInfo() { diff --git a/src/duckdb/src/parser/parsed_data/alter_table_info.cpp b/src/duckdb/src/parser/parsed_data/alter_table_info.cpp index 0f92ff3a3..f94ccbe44 100644 --- a/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +++ b/src/duckdb/src/parser/parsed_data/alter_table_info.cpp @@ -25,7 +25,7 @@ CatalogType ChangeOwnershipInfo::GetCatalogType() const { } unique_ptr ChangeOwnershipInfo::Copy() const { - return make_uniq_base(entry_catalog_type, catalog, schema, name, owner_schema, + return make_uniq_base(entry_catalog_type, Catalog(), Schema(), Name(), owner_schema, owner_name, if_not_found); } @@ -38,9 +38,9 @@ string ChangeOwnershipInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " OWNED BY "; - result += QualifierToString(catalog, owner_schema, owner_name); + result += QualifierToString(Catalog(), owner_schema, owner_name); result += ";"; return result; } @@ -60,7 +60,7 @@ CatalogType SetCommentInfo::GetCatalogType() const { } unique_ptr SetCommentInfo::Copy() const { - return make_uniq_base(entry_catalog_type, catalog, schema, name, comment_value, + return make_uniq_base(entry_catalog_type, Catalog(), Schema(), Name(), comment_value, if_not_found); } @@ -70,7 +70,7 @@ string SetCommentInfo::ToString() const { result += "COMMENT ON "; result += ParseInfo::TypeToString(entry_catalog_type); result += " "; - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " IS "; result += comment_value.ToSQLString(); @@ -88,9 +88,7 @@ AlterTableInfo::AlterTableInfo(AlterTableType type) : AlterInfo(AlterType::ALTER } AlterTableInfo::AlterTableInfo(AlterTableType type, AlterEntryData data) - : AlterInfo(AlterType::ALTER_TABLE, std::move(data.catalog), std::move(data.schema), std::move(data.name), - data.if_not_found), - alter_table_type(type) { + : AlterInfo(AlterType::ALTER_TABLE, std::move(data.qualified_name), data.if_not_found), alter_table_type(type) { } AlterTableInfo::~AlterTableInfo() { } @@ -122,7 +120,7 @@ string RenameColumnInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " RENAME COLUMN "; result += SQLIdentifier(old_name); result += " TO "; @@ -155,7 +153,7 @@ string RenameFieldInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " RENAME COLUMN "; for (idx_t i = 0; i < column_path.size(); i++) { if (i > 0) { @@ -192,7 +190,7 @@ string RenameTableInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " RENAME TO "; result += SQLIdentifier(new_table_name); result += ";"; @@ -224,7 +222,7 @@ string AddColumnInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " ADD COLUMN"; if (if_column_not_exists) { result += " IF NOT EXISTS"; @@ -265,7 +263,7 @@ string AddFieldInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " ADD COLUMN "; if (if_field_not_exists) { result += "IF NOT EXISTS "; @@ -303,7 +301,7 @@ string RemoveColumnInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " DROP COLUMN "; if (if_column_exists) { result += "IF EXISTS "; @@ -340,7 +338,7 @@ string RemoveFieldInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " DROP COLUMN "; if (if_column_exists) { result += "IF EXISTS "; @@ -383,7 +381,7 @@ string ChangeColumnTypeInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " ALTER COLUMN "; result += SQLIdentifier(column_name); result += " TYPE "; @@ -429,7 +427,7 @@ string SetDefaultInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " ALTER COLUMN "; result += SQLIdentifier(column_name); if (expression) { @@ -464,7 +462,7 @@ string SetNotNullInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " ALTER COLUMN "; result += SQLIdentifier(column_name); result += " SET NOT NULL"; @@ -494,7 +492,7 @@ string DropNotNullInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " ALTER COLUMN "; result += SQLIdentifier(column_name); result += " DROP NOT NULL"; @@ -534,9 +532,7 @@ AlterViewInfo::AlterViewInfo(AlterViewType type) : AlterInfo(AlterType::ALTER_VI } AlterViewInfo::AlterViewInfo(AlterViewType type, AlterEntryData data) - : AlterInfo(AlterType::ALTER_VIEW, std::move(data.catalog), std::move(data.schema), std::move(data.name), - data.if_not_found), - alter_view_type(type) { + : AlterInfo(AlterType::ALTER_VIEW, std::move(data.qualified_name), data.if_not_found), alter_view_type(type) { } AlterViewInfo::~AlterViewInfo() { } @@ -566,7 +562,7 @@ string RenameViewInfo::ToString() const { if (if_not_found == OnEntryNotFound::RETURN_NULL) { result += "IF EXISTS "; } - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " RENAME TO "; result += SQLIdentifier(new_view_name); result += ";"; @@ -592,7 +588,7 @@ unique_ptr AddConstraintInfo::Copy() const { string AddConstraintInfo::ToString() const { string result = "ALTER TABLE "; - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " ADD "; result += constraint->ToString(); result += ";"; @@ -622,7 +618,7 @@ unique_ptr SetPartitionedByInfo::Copy() const { string SetPartitionedByInfo::ToString() const { string result = "ALTER TABLE "; - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); if (partition_keys.empty()) { result += " RESET PARTITIONED BY"; } else { @@ -661,7 +657,7 @@ unique_ptr SetSortedByInfo::Copy() const { string SetSortedByInfo::ToString() const { string result = "ALTER TABLE "; - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); if (orders.empty()) { result += " RESET SORTED BY"; } else { @@ -701,7 +697,7 @@ unique_ptr SetTableOptionsInfo::Copy() const { string SetTableOptionsInfo::ToString() const { string result = "ALTER TABLE "; - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " SET ("; idx_t i = 0; for (auto &entry : table_options) { @@ -738,7 +734,7 @@ unique_ptr ResetTableOptionsInfo::Copy() const { string ResetTableOptionsInfo::ToString() const { string result = "ALTER TABLE "; - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += " RESET ("; idx_t i = 0; for (auto &entry : table_options) { diff --git a/src/duckdb/src/parser/parsed_data/comment_on_column_info.cpp b/src/duckdb/src/parser/parsed_data/comment_on_column_info.cpp index 6aaf4ad14..0bfe7fd01 100644 --- a/src/duckdb/src/parser/parsed_data/comment_on_column_info.cpp +++ b/src/duckdb/src/parser/parsed_data/comment_on_column_info.cpp @@ -18,7 +18,8 @@ SetColumnCommentInfo::SetColumnCommentInfo(Identifier catalog, Identifier schema } unique_ptr SetColumnCommentInfo::Copy() const { - auto result = make_uniq(catalog, schema, name, column_name, comment_value, if_not_found); + auto result = + make_uniq(Catalog(), Schema(), Name(), column_name, comment_value, if_not_found); result->type = type; return std::move(result); } @@ -28,7 +29,7 @@ string SetColumnCommentInfo::ToString() const { D_ASSERT(catalog_entry_type == CatalogType::INVALID); result += "COMMENT ON COLUMN "; - result += QualifierToString(catalog, schema, name); + result += QualifierToString(Catalog(), Schema(), Name()); result += "." + SQLIdentifier(column_name); result += " IS "; result += comment_value.ToSQLString(); @@ -37,8 +38,8 @@ string SetColumnCommentInfo::ToString() const { } optional_ptr SetColumnCommentInfo::TryResolveCatalogEntry(CatalogEntryRetriever &retriever) { - EntryLookupInfo lookup_info(CatalogType::TABLE_ENTRY, name); - auto entry = retriever.GetEntry(catalog, schema, lookup_info, if_not_found); + EntryLookupInfo lookup_info(CatalogType::TABLE_ENTRY, Name()); + auto entry = retriever.GetEntry(Catalog(), Schema(), lookup_info, if_not_found); if (entry) { catalog_entry_type = entry->type; diff --git a/src/duckdb/src/parser/parsed_data/copy_info.cpp b/src/duckdb/src/parser/parsed_data/copy_info.cpp index f3228ab4f..9e85ff99d 100644 --- a/src/duckdb/src/parser/parsed_data/copy_info.cpp +++ b/src/duckdb/src/parser/parsed_data/copy_info.cpp @@ -4,14 +4,13 @@ namespace duckdb { CopyInfo::CopyInfo() - : ParseInfo(TYPE), catalog(INVALID_CATALOG), schema(DEFAULT_SCHEMA), is_from(false), is_format_auto_detected(true) { + : ParseInfo(TYPE), is_from(false), is_format_auto_detected(true), + qualified_name(Identifier(INVALID_CATALOG), Identifier(DEFAULT_SCHEMA), Identifier()) { } unique_ptr CopyInfo::Copy() const { auto result = make_uniq(); - result->catalog = catalog; - result->schema = schema; - result->table = table; + result->qualified_name = qualified_name; result->select_list = select_list; result->file_path_expression = file_path_expression ? file_path_expression->Copy() : nullptr; result->file_path = file_path; @@ -77,8 +76,8 @@ string CopyInfo::CopyOptionsToString() const { string CopyInfo::TablePartToString() const { string result; - D_ASSERT(!table.empty()); - result += QualifierToString(catalog, schema, table); + D_ASSERT(!Table().empty()); + result += qualified_name.ToString(); // (c1, c2, ..) if (!select_list.empty()) { diff --git a/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp b/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp index 4e7476b88..8e592ae75 100644 --- a/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp @@ -4,14 +4,14 @@ namespace duckdb { CreateAggregateFunctionInfo::CreateAggregateFunctionInfo(AggregateFunction function) : CreateFunctionInfo(CatalogType::AGGREGATE_FUNCTION_ENTRY), functions(function.name) { - name = function.name; + SetFunctionName(function.name); functions.AddFunction(std::move(function)); internal = true; } CreateAggregateFunctionInfo::CreateAggregateFunctionInfo(AggregateFunctionSet set) : CreateFunctionInfo(CatalogType::AGGREGATE_FUNCTION_ENTRY), functions(std::move(set)) { - name = functions.name; + SetFunctionName(functions.name); for (auto &func : functions.functions) { func.name = functions.name; } diff --git a/src/duckdb/src/parser/parsed_data/create_collation_info.cpp b/src/duckdb/src/parser/parsed_data/create_collation_info.cpp index 18ce5e81b..8c7e3f94f 100644 --- a/src/duckdb/src/parser/parsed_data/create_collation_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_collation_info.cpp @@ -6,12 +6,12 @@ CreateCollationInfo::CreateCollationInfo(Identifier name_p, ScalarFunction funct bool not_required_for_equality_p) : CreateInfo(CatalogType::COLLATION_ENTRY), function(std::move(function_p)), combinable(combinable_p), not_required_for_equality(not_required_for_equality_p) { - this->name = std::move(name_p); + this->SetCollationName(std::move(name_p)); internal = true; } unique_ptr CreateCollationInfo::Copy() const { - auto result = make_uniq(name, function, combinable, not_required_for_equality); + auto result = make_uniq(GetCollationName(), function, combinable, not_required_for_equality); CopyProperties(*result); return std::move(result); } diff --git a/src/duckdb/src/parser/parsed_data/create_coordinate_system_info.cpp b/src/duckdb/src/parser/parsed_data/create_coordinate_system_info.cpp index 9e50ba435..9c55ce492 100644 --- a/src/duckdb/src/parser/parsed_data/create_coordinate_system_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_coordinate_system_info.cpp @@ -4,14 +4,15 @@ namespace duckdb { CreateCoordinateSystemInfo::CreateCoordinateSystemInfo(Identifier name_p, string authority, string code, string projjson, string wkt2_2019) - : CreateInfo(CatalogType::COORDINATE_SYSTEM_ENTRY), name(std::move(name_p)), authority(std::move(authority)), - code(std::move(code)), projjson_definition(std::move(projjson)), wkt2_2019_definition(std::move(wkt2_2019)) { + : CreateInfo(CatalogType::COORDINATE_SYSTEM_ENTRY), authority(std::move(authority)), code(std::move(code)), + projjson_definition(std::move(projjson)), wkt2_2019_definition(std::move(wkt2_2019)) { + SetCoordinateSystemName(std::move(name_p)); internal = true; } unique_ptr CreateCoordinateSystemInfo::Copy() const { - auto result = - make_uniq(name, authority, code, projjson_definition, wkt2_2019_definition); + auto result = make_uniq(GetCoordinateSystemName(), authority, code, projjson_definition, + wkt2_2019_definition); CopyProperties(*result); return std::move(result); } diff --git a/src/duckdb/src/parser/parsed_data/create_copy_function_info.cpp b/src/duckdb/src/parser/parsed_data/create_copy_function_info.cpp index 333f27520..2e8e3c7fe 100644 --- a/src/duckdb/src/parser/parsed_data/create_copy_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_copy_function_info.cpp @@ -4,7 +4,7 @@ namespace duckdb { CreateCopyFunctionInfo::CreateCopyFunctionInfo(CopyFunction function_p) : CreateInfo(CatalogType::COPY_FUNCTION_ENTRY), function(std::move(function_p)) { - this->name = function.name; + this->SetCopyFunctionName(function.name); internal = true; } diff --git a/src/duckdb/src/parser/parsed_data/create_function_info.cpp b/src/duckdb/src/parser/parsed_data/create_function_info.cpp index add9d76b4..2fc078d96 100644 --- a/src/duckdb/src/parser/parsed_data/create_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_function_info.cpp @@ -11,7 +11,7 @@ CreateFunctionInfo::CreateFunctionInfo(CatalogType type, Identifier schema) : Cr void CreateFunctionInfo::CopyFunctionProperties(CreateFunctionInfo &other) const { CopyProperties(other); - other.name = name; + other.SetFunctionName(GetFunctionName()); other.alias_of = alias_of; other.descriptions = descriptions; } diff --git a/src/duckdb/src/parser/parsed_data/create_index_info.cpp b/src/duckdb/src/parser/parsed_data/create_index_info.cpp index 99c2b82da..976658432 100644 --- a/src/duckdb/src/parser/parsed_data/create_index_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_index_info.cpp @@ -9,9 +9,10 @@ CreateIndexInfo::CreateIndexInfo() : CreateInfo(CatalogType::INDEX_ENTRY, Identi } CreateIndexInfo::CreateIndexInfo(const duckdb::CreateIndexInfo &info) - : CreateInfo(CatalogType::INDEX_ENTRY, info.schema), table(info.table), index_name(info.index_name), - options(info.options), index_type(info.index_type), constraint_type(info.constraint_type), - column_ids(info.column_ids), scan_types(info.scan_types), names(info.names) { + : CreateInfo(CatalogType::INDEX_ENTRY, info.Schema()), table(info.table), options(info.options), + index_type(info.index_type), constraint_type(info.constraint_type), column_ids(info.column_ids), + scan_types(info.scan_types), names(info.names) { + SetIndexName(info.GetIndexName()); } static void RemoveTableQualificationRecursive(unique_ptr &root_expr, const Identifier &table_name) { @@ -69,9 +70,9 @@ string CreateIndexInfo::ToString() const { if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT) { result += "IF NOT EXISTS "; } - result += SQLIdentifier(index_name); + result += SQLIdentifier(GetIndexName()); result += " ON "; - result += QualifierToString(temporary ? Identifier() : catalog, schema, table); + result += QualifierToString(temporary ? Identifier() : Catalog(), Schema(), table); if (index_type != "ART") { result += " USING "; result += SQLIdentifier(index_type); diff --git a/src/duckdb/src/parser/parsed_data/create_info.cpp b/src/duckdb/src/parser/parsed_data/create_info.cpp index 8ed715931..c29775e07 100644 --- a/src/duckdb/src/parser/parsed_data/create_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_info.cpp @@ -8,8 +8,8 @@ namespace duckdb { void CreateInfo::CopyProperties(CreateInfo &other) const { other.type = type; - other.catalog = catalog; - other.schema = schema; + other.CatalogMutable() = Catalog(); + other.SchemaMutable() = Schema(); other.on_conflict = on_conflict; other.temporary = temporary; other.internal = internal; diff --git a/src/duckdb/src/parser/parsed_data/create_macro_info.cpp b/src/duckdb/src/parser/parsed_data/create_macro_info.cpp index bdce364e8..4d9454193 100644 --- a/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_macro_info.cpp @@ -18,7 +18,7 @@ CreateMacroInfo::CreateMacroInfo(CatalogType type, unique_ptr fun string CreateMacroInfo::ToString() const { auto prefix = GetCreatePrefix("MACRO"); - prefix += QualifierToString(temporary ? Identifier() : catalog, schema, name) + " "; + prefix += QualifierToString(temporary ? Identifier() : Catalog(), Schema(), GetFunctionName()) + " "; string definitions; for (auto &function : macros) { if (!definitions.empty()) { @@ -34,7 +34,7 @@ unique_ptr CreateMacroInfo::Copy() const { for (auto ¯o : macros) { result->macros.push_back(macro->Copy()); } - result->name = name; + result->SetFunctionName(GetFunctionName()); CopyFunctionProperties(*result); return std::move(result); } diff --git a/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp b/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp index ec574e6c3..5448e0fb3 100644 --- a/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp @@ -4,18 +4,18 @@ namespace duckdb { CreatePragmaFunctionInfo::CreatePragmaFunctionInfo(PragmaFunction function) : CreateFunctionInfo(CatalogType::PRAGMA_FUNCTION_ENTRY), functions(function.name) { - name = function.name; + SetFunctionName(function.name); functions.AddFunction(std::move(function)); internal = true; } CreatePragmaFunctionInfo::CreatePragmaFunctionInfo(PragmaFunctionSet functions_p) : CreateFunctionInfo(CatalogType::PRAGMA_FUNCTION_ENTRY), functions(std::move(functions_p)) { - name = functions.name; + SetFunctionName(functions.name); internal = true; } CreatePragmaFunctionInfo::CreatePragmaFunctionInfo(Identifier name, PragmaFunctionSet functions_p) : CreateFunctionInfo(CatalogType::PRAGMA_FUNCTION_ENTRY), functions(std::move(functions_p)) { - this->name = std::move(name); + this->SetFunctionName(std::move(name)); internal = true; } diff --git a/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp b/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp index 7fdfdc678..d78a4dd8f 100644 --- a/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp @@ -5,13 +5,13 @@ namespace duckdb { CreateScalarFunctionInfo::CreateScalarFunctionInfo(ScalarFunction function) : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY), functions(function.name) { - name = function.name; + SetFunctionName(function.name); functions.AddFunction(std::move(function)); internal = true; } CreateScalarFunctionInfo::CreateScalarFunctionInfo(ScalarFunctionSet set) : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY), functions(std::move(set)) { - name = functions.name; + SetFunctionName(functions.name); for (auto &func : functions.functions) { func.name = functions.name; } @@ -19,7 +19,7 @@ CreateScalarFunctionInfo::CreateScalarFunctionInfo(ScalarFunctionSet set) } unique_ptr CreateScalarFunctionInfo::Copy() const { - ScalarFunctionSet set {name}; + ScalarFunctionSet set {GetFunctionName()}; set.functions = functions.functions; auto result = make_uniq(std::move(set)); CopyFunctionProperties(*result); @@ -28,7 +28,7 @@ unique_ptr CreateScalarFunctionInfo::Copy() const { unique_ptr CreateScalarFunctionInfo::GetAlterInfo() const { return make_uniq_base( - AlterEntryData(catalog, schema, name, OnEntryNotFound::RETURN_NULL), + AlterEntryData(GetQualifiedName(), OnEntryNotFound::RETURN_NULL), unique_ptr_cast(Copy())); } diff --git a/src/duckdb/src/parser/parsed_data/create_schema_info.cpp b/src/duckdb/src/parser/parsed_data/create_schema_info.cpp index 889a73e85..ff2ff9da4 100644 --- a/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_schema_info.cpp @@ -13,7 +13,7 @@ unique_ptr CreateSchemaInfo::Copy() const { string CreateSchemaInfo::ToString() const { string ret = ""; - string qualified = QualifierToString(temporary ? Identifier() : catalog, Identifier(), schema); + string qualified = QualifierToString(temporary ? Identifier() : Catalog(), Identifier(), Schema()); switch (on_conflict) { case OnCreateConflict::ALTER_ON_CONFLICT: { diff --git a/src/duckdb/src/parser/parsed_data/create_secret_info.cpp b/src/duckdb/src/parser/parsed_data/create_secret_info.cpp index a6f5b0962..641a03211 100644 --- a/src/duckdb/src/parser/parsed_data/create_secret_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_secret_info.cpp @@ -16,7 +16,7 @@ unique_ptr CreateSecretInfo::Copy() const { auto result = make_uniq(on_conflict, persist_type); result->storage_type = storage_type; - result->name = name; + result->SetSecretName(GetSecretName()); if (type) { result->type = type->Copy(); @@ -44,8 +44,8 @@ string CreateSecretInfo::ToString() const { create_type = "SECRET"; } result = GetCreatePrefix(create_type); - if (!name.empty()) { - result += " " + SQLIdentifier(name); + if (!GetSecretName().empty()) { + result += " " + SQLIdentifier(GetSecretName()); } if (!storage_type.empty()) { result += " IN" + SQLIdentifier(storage_type); diff --git a/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp b/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp index 1b504f270..2d380695d 100644 --- a/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp @@ -3,15 +3,15 @@ namespace duckdb { CreateSequenceInfo::CreateSequenceInfo() - : CreateInfo(CatalogType::SEQUENCE_ENTRY, Identifier::InvalidSchema()), name(string()), usage_count(0), - increment(1), min_value(1), max_value(NumericLimits::Maximum()), start_value(1), cycle(false) { + : CreateInfo(CatalogType::SEQUENCE_ENTRY, Identifier::InvalidSchema()), usage_count(0), increment(1), min_value(1), + max_value(NumericLimits::Maximum()), start_value(1), cycle(false) { } unique_ptr CreateSequenceInfo::Copy() const { auto result = make_uniq(); CopyProperties(*result); - result->name = name; - result->schema = schema; + result->SetSequenceName(GetSequenceName()); + result->SchemaMutable() = Schema(); result->usage_count = usage_count; result->increment = increment; result->min_value = min_value; @@ -35,7 +35,7 @@ string CreateSequenceInfo::ToString() const { if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT) { ss << " IF NOT EXISTS "; } - ss << QualifierToString(temporary ? Identifier() : catalog, schema, name); + ss << QualifierToString(temporary ? Identifier() : Catalog(), Schema(), GetSequenceName()); ss << " INCREMENT BY " << increment; ss << " MINVALUE " << min_value; ss << " MAXVALUE " << max_value; diff --git a/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp b/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp index b5b7f6f38..5b8364fcd 100644 --- a/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp @@ -5,13 +5,13 @@ namespace duckdb { CreateTableFunctionInfo::CreateTableFunctionInfo(TableFunction function) : CreateFunctionInfo(CatalogType::TABLE_FUNCTION_ENTRY), functions(function.name) { - name = function.name; + SetFunctionName(function.name); functions.AddFunction(std::move(function)); internal = true; } CreateTableFunctionInfo::CreateTableFunctionInfo(TableFunctionSet set) : CreateFunctionInfo(CatalogType::TABLE_FUNCTION_ENTRY), functions(std::move(set)) { - name = functions.name; + SetFunctionName(functions.name); for (auto &func : functions.functions) { func.name = functions.name; } @@ -19,7 +19,7 @@ CreateTableFunctionInfo::CreateTableFunctionInfo(TableFunctionSet set) } unique_ptr CreateTableFunctionInfo::Copy() const { - TableFunctionSet set {name}; + TableFunctionSet set {GetFunctionName()}; set.functions = functions.functions; auto result = make_uniq(std::move(set)); CopyFunctionProperties(*result); @@ -28,7 +28,7 @@ unique_ptr CreateTableFunctionInfo::Copy() const { unique_ptr CreateTableFunctionInfo::GetAlterInfo() const { return make_uniq_base( - AlterEntryData(catalog, schema, name, OnEntryNotFound::RETURN_NULL), functions); + AlterEntryData(GetQualifiedName(), OnEntryNotFound::RETURN_NULL), functions); } } // namespace duckdb diff --git a/src/duckdb/src/parser/parsed_data/create_table_info.cpp b/src/duckdb/src/parser/parsed_data/create_table_info.cpp index ef4af7f08..0e2370ebb 100644 --- a/src/duckdb/src/parser/parsed_data/create_table_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_table_info.cpp @@ -9,7 +9,8 @@ CreateTableInfo::CreateTableInfo() : CreateInfo(CatalogType::TABLE_ENTRY, Identi } CreateTableInfo::CreateTableInfo(Identifier catalog_p, Identifier schema_p, Identifier name_p) - : CreateInfo(CatalogType::TABLE_ENTRY, std::move(schema_p), std::move(catalog_p)), table(std::move(name_p)) { + : CreateInfo(CatalogType::TABLE_ENTRY, std::move(schema_p), std::move(catalog_p)) { + SetTableName(std::move(name_p)); } CreateTableInfo::CreateTableInfo(SchemaCatalogEntry &schema, Identifier name_p) @@ -17,7 +18,7 @@ CreateTableInfo::CreateTableInfo(SchemaCatalogEntry &schema, Identifier name_p) } unique_ptr CreateTableInfo::Copy() const { - auto result = make_uniq(catalog, schema, table); + auto result = make_uniq(Catalog(), Schema(), GetTableName()); CopyProperties(*result); result->columns = columns.Copy(); for (auto &constraint : constraints) { @@ -69,7 +70,7 @@ string CreateTableInfo::ExtraOptionsToString() const { string CreateTableInfo::ToString() const { string ret = GetCreatePrefix("TABLE"); - ret += QualifierToString(temporary ? Identifier() : catalog, schema, table); + ret += QualifierToString(temporary ? Identifier() : Catalog(), Schema(), GetTableName()); if (query != nullptr) { ret += TableCatalogEntry::ColumnNamesToSQL(columns); diff --git a/src/duckdb/src/parser/parsed_data/create_trigger_info.cpp b/src/duckdb/src/parser/parsed_data/create_trigger_info.cpp index 7b01ce71b..79595f6c0 100644 --- a/src/duckdb/src/parser/parsed_data/create_trigger_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_trigger_info.cpp @@ -12,7 +12,7 @@ CreateTriggerInfo::CreateTriggerInfo() unique_ptr CreateTriggerInfo::Copy() const { auto result = make_uniq(); CopyProperties(*result); - result->trigger_name = trigger_name; + result->SetTriggerName(GetTriggerName()); result->base_table = unique_ptr_cast(base_table->Copy()); result->timing = timing; result->event_type = event_type; @@ -34,10 +34,10 @@ string CreateTriggerInfo::ToString() const { if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT) { ss << "IF NOT EXISTS "; } - if (!IsInvalidSchema(schema)) { - ss << SQLIdentifier(schema) << "."; + if (!IsInvalidSchema(Schema())) { + ss << SQLIdentifier(Schema()) << "."; } - ss << SQLIdentifier(trigger_name); + ss << SQLIdentifier(GetTriggerName()); ss << " "; ss << EnumUtil::ToString(timing); ss << " "; diff --git a/src/duckdb/src/parser/parsed_data/create_type_info.cpp b/src/duckdb/src/parser/parsed_data/create_type_info.cpp index 5f473d23e..8264fb14a 100644 --- a/src/duckdb/src/parser/parsed_data/create_type_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_type_info.cpp @@ -6,14 +6,14 @@ namespace duckdb { CreateTypeInfo::CreateTypeInfo() : CreateInfo(CatalogType::TYPE_ENTRY), bind_function(nullptr) { } CreateTypeInfo::CreateTypeInfo(string name_p, LogicalType type_p, bind_logical_type_function_t bind_function_p) - : CreateInfo(CatalogType::TYPE_ENTRY), name(std::move(name_p)), type(std::move(type_p)), - bind_function(bind_function_p) { + : CreateInfo(CatalogType::TYPE_ENTRY), type(std::move(type_p)), bind_function(bind_function_p) { + SetTypeName(Identifier(std::move(name_p))); } unique_ptr CreateTypeInfo::Copy() const { auto result = make_uniq(); CopyProperties(*result); - result->name = name; + result->SetTypeName(GetTypeName()); result->type = type; if (query) { result->query = query->Copy(); @@ -24,7 +24,7 @@ unique_ptr CreateTypeInfo::Copy() const { string CreateTypeInfo::ToString() const { string result = GetCreatePrefix("TYPE"); - result += QualifierToString(temporary ? Identifier() : catalog, schema, name); + result += QualifierToString(temporary ? Identifier() : Catalog(), Schema(), GetTypeName()); if (type.id() == LogicalTypeId::ENUM) { auto &values_insert_order = EnumType::GetValuesInsertOrder(type); idx_t size = EnumType::GetSize(type); diff --git a/src/duckdb/src/parser/parsed_data/create_view_info.cpp b/src/duckdb/src/parser/parsed_data/create_view_info.cpp index a96e0c6b8..bf99a37bf 100644 --- a/src/duckdb/src/parser/parsed_data/create_view_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_view_info.cpp @@ -11,8 +11,8 @@ namespace duckdb { CreateViewInfo::CreateViewInfo() : CreateInfo(CatalogType::VIEW_ENTRY, Identifier::InvalidSchema()) { } CreateViewInfo::CreateViewInfo(Identifier catalog_p, Identifier schema_p, Identifier view_name_p) - : CreateInfo(CatalogType::VIEW_ENTRY, std::move(schema_p), std::move(catalog_p)), - view_name(std::move(view_name_p)) { + : CreateInfo(CatalogType::VIEW_ENTRY, std::move(schema_p), std::move(catalog_p)) { + SetViewName(std::move(view_name_p)); } CreateViewInfo::CreateViewInfo(SchemaCatalogEntry &schema, Identifier view_name) @@ -21,7 +21,7 @@ CreateViewInfo::CreateViewInfo(SchemaCatalogEntry &schema, Identifier view_name) string CreateViewInfo::ToString() const { string result = GetCreatePrefix("VIEW"); - result += QualifierToString(temporary ? Identifier() : catalog, schema, view_name); + result += QualifierToString(temporary ? Identifier() : Catalog(), Schema(), GetViewName()); if (!aliases.empty()) { result += " ("; result += @@ -38,7 +38,7 @@ string CreateViewInfo::ToString() const { } unique_ptr CreateViewInfo::Copy() const { - auto result = make_uniq(catalog, schema, view_name); + auto result = make_uniq(Catalog(), Schema(), GetViewName()); CopyProperties(*result); result->aliases = aliases; result->types = types; @@ -63,7 +63,7 @@ unique_ptr CreateViewInfo::ParseSelect(const string &sql) { unique_ptr CreateViewInfo::FromSelect(ClientContext &context, unique_ptr info) { D_ASSERT(info); - D_ASSERT(!info->view_name.empty()); + D_ASSERT(!info->GetViewName().empty()); D_ASSERT(!info->sql.empty()); D_ASSERT(!info->query); @@ -91,8 +91,8 @@ unique_ptr CreateViewInfo::FromCreateView(ClientContext &context } auto result = unique_ptr_cast(std::move(create_statement.info)); - result->catalog = schema.ParentCatalog().GetName(); - result->schema = schema.name; + result->CatalogMutable() = schema.ParentCatalog().GetName(); + result->SchemaMutable() = schema.name; auto view_binder = Binder::CreateBinder(context); view_binder->BindCreateViewInfo(*result); @@ -114,8 +114,8 @@ vector CreateViewInfo::GetColumnCommentsList() const { auto it = std::find_if(names.begin(), names.end(), [&](const Identifier &n) { return entry.first == n; }); if (it == names.end()) { throw InternalException( - "While serializing comments for view \"%s\" - did not find column \"%s\" in list of names", view_name, - entry.first.GetIdentifierName()); + "While serializing comments for view \"%s\" - did not find column \"%s\" in list of names", + GetViewName(), entry.first.GetIdentifierName()); } result[NumericCast(it - names.begin())] = entry.second; } diff --git a/src/duckdb/src/parser/parsed_data/create_window_function_info.cpp b/src/duckdb/src/parser/parsed_data/create_window_function_info.cpp index 75bfe6215..c5acfb764 100644 --- a/src/duckdb/src/parser/parsed_data/create_window_function_info.cpp +++ b/src/duckdb/src/parser/parsed_data/create_window_function_info.cpp @@ -4,14 +4,14 @@ namespace duckdb { CreateWindowFunctionInfo::CreateWindowFunctionInfo(WindowFunction function) : CreateFunctionInfo(CatalogType::WINDOW_FUNCTION_ENTRY), functions(function.name) { - name = function.name; + SetFunctionName(function.name); functions.AddFunction(std::move(function)); internal = true; } CreateWindowFunctionInfo::CreateWindowFunctionInfo(WindowFunctionSet set) : CreateFunctionInfo(CatalogType::WINDOW_FUNCTION_ENTRY), functions(std::move(set)) { - name = functions.name; + SetFunctionName(functions.name); for (auto &func : functions.functions) { func.name = functions.name; } diff --git a/src/duckdb/src/parser/parsed_data/drop_info.cpp b/src/duckdb/src/parser/parsed_data/drop_info.cpp index 501b80b53..d5e770656 100644 --- a/src/duckdb/src/parser/parsed_data/drop_info.cpp +++ b/src/duckdb/src/parser/parsed_data/drop_info.cpp @@ -4,13 +4,16 @@ namespace duckdb { -DropInfo::DropInfo() : ParseInfo(TYPE), catalog(INVALID_CATALOG), schema(INVALID_SCHEMA), cascade(false) { +DropInfo::DropInfo() + : ParseInfo(TYPE), cascade(false), + qualified_name(Identifier(INVALID_CATALOG), Identifier(INVALID_SCHEMA), Identifier()) { } DropInfo::DropInfo(const DropInfo &info) - : ParseInfo(info.info_type), type(info.type), catalog(info.catalog), schema(info.schema), name(info.name), - if_not_found(info.if_not_found), cascade(info.cascade), allow_drop_internal(info.allow_drop_internal), - extra_drop_info(info.extra_drop_info ? info.extra_drop_info->Copy() : nullptr) { + : ParseInfo(info.info_type), type(info.type), if_not_found(info.if_not_found), cascade(info.cascade), + allow_drop_internal(info.allow_drop_internal), + extra_drop_info(info.extra_drop_info ? info.extra_drop_info->Copy() : nullptr), + qualified_name(info.qualified_name) { } unique_ptr DropInfo::Copy() const { @@ -21,7 +24,7 @@ string DropInfo::ToString() const { string result = ""; if (type == CatalogType::PREPARED_STATEMENT) { result += "DEALLOCATE PREPARE "; - result += SQLIdentifier(name); + result += SQLIdentifier(Name()); } else { result += "DROP"; result += " " + ParseInfo::TypeToString(type); @@ -29,7 +32,7 @@ string DropInfo::ToString() const { result += " IF EXISTS"; } result += " "; - result += QualifierToString(catalog, schema, name); + result += qualified_name.ToString(); if (type == CatalogType::TRIGGER_ENTRY && extra_drop_info) { auto &trigger_info = extra_drop_info->Cast(); if (trigger_info.base_table) { diff --git a/src/duckdb/src/parser/peg/matcher.cpp b/src/duckdb/src/parser/peg/matcher.cpp index c0a2e53c6..b72069227 100644 --- a/src/duckdb/src/parser/peg/matcher.cpp +++ b/src/duckdb/src/parser/peg/matcher.cpp @@ -964,8 +964,8 @@ class OperatorMatcher : public Matcher { return false; } // Exclude LIKE/SIMILAR operators — handled by LikeVariations at a higher precedence level - if (token_text == "~~" || token_text == "~~*" || token_text == "~~~" || token_text == "!~~" || - token_text == "!~~*" || token_text == "!~") { + if (token_text == "~~" || token_text == "~~*" || token_text == "~~~" || token_text == "~*" || + token_text == "!~~" || token_text == "!~~*" || token_text == "!~" || token_text == "!~*") { return false; } for (auto &c : token_text) { diff --git a/src/duckdb/src/parser/peg/transformer/peg_transformer.cpp b/src/duckdb/src/parser/peg/transformer/peg_transformer.cpp index 44808f671..30a54ee62 100644 --- a/src/duckdb/src/parser/peg/transformer/peg_transformer.cpp +++ b/src/duckdb/src/parser/peg/transformer/peg_transformer.cpp @@ -67,9 +67,9 @@ unique_ptr PEGTransformer::GenerateCreateEnumStmt(unique_ptr(); info->temporary = true; info->internal = false; - info->catalog = Identifier::InvalidCatalog(); - info->schema = Identifier::InvalidSchema(); - info->name = Identifier(std::move(entry->enum_name)); + info->CatalogMutable() = Identifier::InvalidCatalog(); + info->SchemaMutable() = Identifier::InvalidSchema(); + info->SetTypeName(Identifier(std::move(entry->enum_name))); info->on_conflict = OnCreateConflict::REPLACE_ON_CONFLICT; // generate the query that will result in the enum creation diff --git a/src/duckdb/src/parser/peg/transformer/peg_transformer_factory.cpp b/src/duckdb/src/parser/peg/transformer/peg_transformer_factory.cpp index e39cad1da..22ff8aca6 100644 --- a/src/duckdb/src/parser/peg/transformer/peg_transformer_factory.cpp +++ b/src/duckdb/src/parser/peg/transformer/peg_transformer_factory.cpp @@ -220,26 +220,18 @@ bool PEGTransformerFactory::ExpressionIsEmptyStar(const ParsedExpression &expr) } QualifiedName PEGTransformerFactory::StringToQualifiedName(vector input) { - QualifiedName result; if (input.empty()) { throw InternalException("QualifiedName cannot be made with an empty input."); } if (input.size() == 1) { - result.catalog = Identifier::InvalidCatalog(); - result.schema = Identifier::InvalidSchema(); - result.name = Identifier(input[0]); + return QualifiedName(Identifier::InvalidCatalog(), Identifier::InvalidSchema(), Identifier(input[0])); } else if (input.size() == 2) { - result.catalog = Identifier::InvalidCatalog(); - result.schema = Identifier(input[0]); - result.name = Identifier(input[1]); + return QualifiedName(Identifier::InvalidCatalog(), Identifier(input[0]), Identifier(input[1])); } else if (input.size() == 3) { - result.catalog = Identifier(input[0]); - result.schema = Identifier(input[1]); - result.name = Identifier(input[2]); + return QualifiedName(Identifier(input[0]), Identifier(input[1]), Identifier(input[2])); } else { throw ParserException("Too many qualifications found - expected [catalog.schema.name] or [schema.name]"); } - return result; } LogicalType PEGTransformerFactory::GetIntervalTargetType(DatePartSpecifier date_part) { diff --git a/src/duckdb/src/parser/peg/transformer/transform_alter.cpp b/src/duckdb/src/parser/peg/transformer/transform_alter.cpp index 1bbe66149..dc3804c8c 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_alter.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_alter.cpp @@ -48,9 +48,7 @@ PEGTransformerFactory::TransformAlterTableStmt(PEGTransformer &transformer, cons } auto result = std::move(alter_table_options[0]); result->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; - result->catalog = base_table_name->catalog_name; - result->schema = base_table_name->schema_name; - result->name = base_table_name->table_name; + result->GetQualifiedNameMutable() = base_table_name->GetQualifiedName(); return std::move(result); } @@ -72,9 +70,7 @@ unique_ptr PEGTransformerFactory::TransformAlterViewStmt(PEGTransform unique_ptr rename_alter) { auto rename_table = unique_ptr_cast(std::move(rename_alter)); auto result = make_uniq(AlterEntryData(), rename_table->new_table_name); - result->catalog = base_table_name->catalog_name; - result->schema = base_table_name->schema_name; - result->name = base_table_name->table_name; + result->GetQualifiedNameMutable() = base_table_name->GetQualifiedName(); result->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; return std::move(result); } @@ -90,13 +86,13 @@ unique_ptr PEGTransformerFactory::TransformAlterSequenceStmt(PEGTrans const optional &if_exists, const QualifiedName &qualified_sequence_name, unique_ptr alter_sequence_options) { - if (qualified_sequence_name.schema.empty()) { - alter_sequence_options->schema = qualified_sequence_name.catalog; + if (qualified_sequence_name.Schema().empty()) { + alter_sequence_options->GetQualifiedNameMutable() = + QualifiedName(INVALID_CATALOG, qualified_sequence_name.Catalog(), qualified_sequence_name.Name()); } else { - alter_sequence_options->catalog = qualified_sequence_name.catalog; - alter_sequence_options->schema = qualified_sequence_name.schema; + alter_sequence_options->GetQualifiedNameMutable() = QualifiedName( + qualified_sequence_name.Catalog(), qualified_sequence_name.Schema(), qualified_sequence_name.Name()); } - alter_sequence_options->name = qualified_sequence_name.name; alter_sequence_options->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; return alter_sequence_options; } @@ -105,10 +101,8 @@ QualifiedName PEGTransformerFactory::TransformQualifiedSequenceName(PEGTransform const optional &catalog_qualification, const optional &schema_qualification, const Identifier &sequence_name) { - QualifiedName result; - result.catalog = catalog_qualification ? *catalog_qualification : INVALID_CATALOG; - result.schema = schema_qualification ? *schema_qualification : INVALID_SCHEMA; - result.name = sequence_name; + QualifiedName result(catalog_qualification ? *catalog_qualification : INVALID_CATALOG, + schema_qualification ? *schema_qualification : INVALID_SCHEMA, sequence_name); return result; } @@ -132,11 +126,11 @@ PEGTransformerFactory::TransformSetSequenceOption(PEGTransformer &transformer, } has_owned = true; auto owned_by = unique_ptr_cast(std::move(seq_option.second)); - auto schema = - owned_by->qualified_name.schema.empty() ? Identifier::DefaultSchema() : owned_by->qualified_name.schema; + auto schema = owned_by->qualified_name.Schema().empty() ? Identifier::DefaultSchema() + : owned_by->qualified_name.Schema(); owned_info = make_uniq(CatalogType::SEQUENCE_ENTRY, "", "", "", schema, - owned_by->qualified_name.name, OnEntryNotFound::THROW_EXCEPTION); + owned_by->qualified_name.Name(), OnEntryNotFound::THROW_EXCEPTION); } } if (owned_info) { @@ -161,9 +155,7 @@ void PEGTransformerFactory::AddUpdateToMultiStatement(const unique_ptr(); - table_ref->catalog_name = table_data.catalog; - table_ref->schema_name = table_data.schema; - table_ref->table_name = table_data.name; + table_ref->GetQualifiedNameMutable() = table_data.GetQualifiedName(); node.table = std::move(table_ref); auto set_info = make_uniq(); diff --git a/src/duckdb/src/parser/peg/transformer/transform_call.cpp b/src/duckdb/src/parser/peg/transformer/transform_call.cpp index be8699c24..5992a6301 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_call.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_call.cpp @@ -11,8 +11,8 @@ PEGTransformerFactory::TransformCallStatement(PEGTransformer &transformer, vector table_function_arguments) { auto result = make_uniq(); auto function_expression = - make_uniq(qualified_table_function.catalog, qualified_table_function.schema, - qualified_table_function.name, std::move(table_function_arguments)); + make_uniq(qualified_table_function.Catalog(), qualified_table_function.Schema(), + qualified_table_function.Name(), std::move(table_function_arguments)); result->function = std::move(function_expression); return std::move(result); } diff --git a/src/duckdb/src/parser/peg/transformer/transform_comment.cpp b/src/duckdb/src/parser/peg/transformer/transform_comment.cpp index 583272458..08088b19d 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_comment.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_comment.cpp @@ -21,7 +21,7 @@ unique_ptr PEGTransformerFactory::TransformCommentStatement(PEGTra throw ParserException("Invalid column reference: '%s'", column_name.GetIdentifierName()); } auto qualified_name = StringToQualifiedName(identifier); - info = make_uniq(qualified_name.catalog, qualified_name.schema, qualified_name.name, + info = make_uniq(qualified_name.Catalog(), qualified_name.Schema(), qualified_name.Name(), column_name, comment_value, OnEntryNotFound::THROW_EXCEPTION); } else if (comment_on_type == CatalogType::DATABASE_ENTRY) { throw NotImplementedException("Adding comments to databases is not implemented"); @@ -29,8 +29,8 @@ unique_ptr PEGTransformerFactory::TransformCommentStatement(PEGTra throw NotImplementedException("Adding comments to schemas is not implemented"); } else { auto qualified_name = StringToQualifiedName(dotted_identifier); - info = make_uniq(comment_on_type, qualified_name.catalog, qualified_name.schema, - qualified_name.name, comment_value, OnEntryNotFound::THROW_EXCEPTION); + info = make_uniq(comment_on_type, qualified_name.Catalog(), qualified_name.Schema(), + qualified_name.Name(), comment_value, OnEntryNotFound::THROW_EXCEPTION); } if (!info) { throw NotImplementedException("Cannot comment on this type"); diff --git a/src/duckdb/src/parser/peg/transformer/transform_common.cpp b/src/duckdb/src/parser/peg/transformer/transform_common.cpp index ffab6d74f..f22a82e84 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_common.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_common.cpp @@ -266,43 +266,33 @@ PEGTransformerFactory::TransformQualifiedSimpleType(PEGTransformer &transformer, const QualifiedName &qualified_type_name, optional>> type_modifiers) { auto result = qualified_type_name; - if (result.schema.empty()) { - result.schema = result.catalog; - result.catalog = INVALID_CATALOG; + if (result.Schema().empty()) { + result = QualifiedName(INVALID_CATALOG, result.Catalog(), result.Name()); } vector> modifiers; if (type_modifiers) { modifiers = std::move(*type_modifiers); } - return make_uniq(result.catalog, result.schema, result.name, std::move(modifiers)); + return make_uniq(result.Catalog(), result.Schema(), result.Name(), std::move(modifiers)); } QualifiedName PEGTransformerFactory::TransformTypeNameAsQualifiedName(PEGTransformer &transformer, const Identifier &type_name) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = INVALID_SCHEMA; - result.name = type_name; + QualifiedName result(INVALID_CATALOG, INVALID_SCHEMA, type_name); return result; } QualifiedName PEGTransformerFactory::TransformSchemaReservedTypeName(PEGTransformer &transformer, const Identifier &schema_qualification, const Identifier &reserved_type_name) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = schema_qualification; - result.name = reserved_type_name; + QualifiedName result(INVALID_CATALOG, schema_qualification, reserved_type_name); return result; } QualifiedName PEGTransformerFactory::TransformCatalogReservedSchemaTypeName( PEGTransformer &transformer, const Identifier &catalog_qualification, const Identifier &reserved_schema_qualification, const Identifier &reserved_type_name) { - QualifiedName result; - result.catalog = catalog_qualification; - result.schema = reserved_schema_qualification; - result.name = reserved_type_name; + QualifiedName result(catalog_qualification, reserved_schema_qualification, reserved_type_name); return result; } @@ -319,13 +309,15 @@ unique_ptr PEGTransformerFactory::TransformMapType(PEGTransfor unique_ptr PEGTransformerFactory::TransformRowType(PEGTransformer &transformer, - const child_list_t &col_id_type_list) { + const optional> &col_id_type_list) { vector> struct_children; - for (auto &child : col_id_type_list) { - auto &type_expr = UnboundType::GetTypeExpression(child.second); - auto new_type_expr = type_expr->Copy(); - new_type_expr->SetAlias(child.first); - struct_children.push_back(std::move(new_type_expr)); + if (col_id_type_list) { + for (auto &child : *col_id_type_list) { + auto &type_expr = UnboundType::GetTypeExpression(child.second); + auto new_type_expr = type_expr->Copy(); + new_type_expr->SetAlias(child.first); + struct_children.push_back(std::move(new_type_expr)); + } } return make_uniq(Identifier("STRUCT"), std::move(struct_children)); } diff --git a/src/duckdb/src/parser/peg/transformer/transform_copy.cpp b/src/duckdb/src/parser/peg/transformer/transform_copy.cpp index 8f0bec34b..052bda0c7 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_copy.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_copy.cpp @@ -134,9 +134,7 @@ PEGTransformerFactory::TransformCopyTable(PEGTransformer &transformer, unique_pt auto result = make_uniq(); auto info = make_uniq(); - info->table = base_table_name->table_name; - info->schema = base_table_name->schema_name; - info->catalog = base_table_name->catalog_name; + info->GetQualifiedNameMutable() = base_table_name->GetQualifiedName(); if (insert_column_list) { info->select_list = StringsToIdentifiers(*insert_column_list); } diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_index.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_index.cpp index 3426e724d..098a37ea6 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_index.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_index.cpp @@ -18,17 +18,17 @@ unique_ptr PEGTransformerFactory::TransformCreateIndexStmt( if (!index_name) { throw NotImplementedException("Please provide an index name, e.g., CREATE INDEX my_name ..."); } - index_info->index_name = *index_name; - index_info->table = base_table_name->table_name; - index_info->catalog = base_table_name->catalog_name; - index_info->schema = base_table_name->schema_name; + index_info->SetIndexName(*index_name); + index_info->table = base_table_name->Table(); + index_info->CatalogMutable() = base_table_name->Catalog(); + index_info->SchemaMutable() = base_table_name->Schema(); index_info->index_type = index_type ? index_type->GetIdentifierName() : "ART"; if (insert_column_list) { for (auto &column : *insert_column_list) { index_info->expressions.push_back( - make_uniq(Identifier(column), base_table_name->table_name)); + make_uniq(Identifier(column), base_table_name->Table())); index_info->parsed_expressions.push_back( - make_uniq(Identifier(column), base_table_name->table_name)); + make_uniq(Identifier(column), base_table_name->Table())); } } if (index_element) { diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_macro.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_macro.cpp index 2dc7c8bfd..c3a6540d5 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_macro.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_macro.cpp @@ -11,13 +11,13 @@ unique_ptr PEGTransformerFactory::TransformCreateMacroStmt( auto result = make_uniq(); auto info = make_uniq(CatalogType::MACRO_ENTRY); - if (qualified_name.schema.empty()) { - info->schema = qualified_name.catalog; + if (qualified_name.Schema().empty()) { + info->SchemaMutable() = qualified_name.Catalog(); } else { - info->catalog = qualified_name.catalog; - info->schema = qualified_name.schema; + info->CatalogMutable() = qualified_name.Catalog(); + info->SchemaMutable() = qualified_name.Schema(); } - info->name = qualified_name.name; + info->SetFunctionName(qualified_name.Name()); info->on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; for (auto ¯o_function : macro_definition) { diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_schema.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_schema.cpp index 5acff3941..ed9fc7c9f 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_schema.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_schema.cpp @@ -5,14 +5,14 @@ namespace duckdb { unique_ptr PEGTransformerFactory::TransformCreateSchemaStmt(PEGTransformer &transformer, const optional &if_not_exists, const QualifiedName &qualified_name) { - if (!qualified_name.catalog.empty()) { + if (!qualified_name.Catalog().empty()) { throw ParserException("CREATE SCHEMA too many dots: expected \"catalog.schema\" or \"schema\""); } auto result = make_uniq(); auto info = make_uniq(); info->on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; - info->catalog = qualified_name.schema; - info->schema = qualified_name.name; + info->CatalogMutable() = qualified_name.Schema(); + info->SchemaMutable() = qualified_name.Name(); result->info = std::move(info); return result; diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_secret.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_secret.cpp index 79e3a0843..8d4b15a62 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_secret.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_secret.cpp @@ -20,7 +20,7 @@ unique_ptr PEGTransformerFactory::TransformCreateSecretStmt( auto on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; auto info = make_uniq(on_conflict, SecretPersistType::DEFAULT); if (secret_name) { - info->name = *secret_name; + info->SetSecretName(*secret_name); } if (secret_storage_specifier) { info->storage_type = Identifier(StringUtil::Lower(secret_storage_specifier->GetIdentifierName())); @@ -44,7 +44,7 @@ unique_ptr PEGTransformerFactory::TransformCreateSecretStmt( } info->options.insert({lower_name, option.GetFirstChildOrExpression()}); } - if (info->name.empty()) { + if (info->GetSecretName().empty()) { if (!info->type) { throw ParserException("Failed to create secret - secret must have a type defined"); } @@ -54,7 +54,7 @@ unique_ptr PEGTransformerFactory::TransformCreateSecretStmt( "Can not combine a non-constant expression for the secret type with a default-named secret. Either " "provide an explicit secret name or use a constant expression for the secret type."); } - info->name = Identifier("__default_" + StringUtil::Lower(value.ToString())); + info->SetSecretName(Identifier("__default_" + StringUtil::Lower(value.ToString()))); } result->info = std::move(info); return result; diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_sequence.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_sequence.cpp index 3e0dab0c5..72165c4d1 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_sequence.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_sequence.cpp @@ -9,9 +9,9 @@ unique_ptr PEGTransformerFactory::TransformCreateSequenceStmt( optional>>> sequence_option) { auto result = make_uniq(); auto info = make_uniq(); - info->catalog = qualified_name.catalog; - info->schema = qualified_name.schema; - info->name = qualified_name.name; + info->CatalogMutable() = qualified_name.Catalog(); + info->SchemaMutable() = qualified_name.Schema(); + info->SetSequenceName(qualified_name.Name()); info->on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; case_insensitive_map_t> sequence_options; if (sequence_option) { diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_table.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_table.cpp index 54ca47161..25d23b1b5 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_table.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_table.cpp @@ -57,11 +57,11 @@ unique_ptr PEGTransformerFactory::TransformCreateTableStmt( PEGTransformer &transformer, const optional &if_not_exists, const QualifiedName &qualified_name, CreateTableDefinition create_table_definition, const optional &commit_action) { auto result = make_uniq(); - if (qualified_name.name.empty()) { + if (qualified_name.Name().empty()) { throw ParserException("Empty table name not supported"); } // Use appropriate constructor - auto info = make_uniq(qualified_name.catalog, qualified_name.schema, qualified_name.name); + auto info = make_uniq(qualified_name.Catalog(), qualified_name.Schema(), qualified_name.Name()); info->on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; info->query = std::move(create_table_definition.select_statement); @@ -185,10 +185,7 @@ PEGTransformerFactory::TransformCreateTableConstraint(PEGTransformer &transforme QualifiedName PEGTransformerFactory::TransformIdentifierOrStringLiteral(PEGTransformer &transformer, const string &child) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = INVALID_SCHEMA; - result.name = Identifier(child); + QualifiedName result(INVALID_CATALOG, INVALID_SCHEMA, Identifier(child)); return result; } @@ -257,7 +254,7 @@ ConstraintColumnDefinition PEGTransformerFactory::TransformColumnDefinition( } } else if (cc_entry.constraint_name == "ForeignKeyConstraint") { auto &fk_constraint = cc_entry.constraint->Cast(); - fk_constraint.fk_columns.push_back(qualified_name.name); + fk_constraint.fk_columns.push_back(qualified_name.Name()); accumulated_constraints.constraints.push_back(std::move(cc_entry.constraint)); } else if (cc_entry.constraint_name == "ColumnCollation") { if (has_generated) { @@ -291,17 +288,17 @@ ConstraintColumnDefinition PEGTransformerFactory::TransformColumnDefinition( auto generated = std::move(*generated_column); if (generated.expr->HasSubquery()) { throw ParserException("Expression of generated column \"%s\" contains a subquery, which isn't allowed", - qualified_name.name); + qualified_name.Name()); } if (column_type != LogicalType::ANY) { generated.expr = make_uniq(column_type, std::move(generated.expr)); } if (generated.expr->HasSubquery()) { throw ParserException("Expression of generated column \"%s\" contains a subquery, which isn't allowed", - qualified_name.name); + qualified_name.Name()); } - ColumnDefinition col(qualified_name.name, column_type, std::move(generated.expr), TableColumnType::GENERATED); + ColumnDefinition col(qualified_name.Name(), column_type, std::move(generated.expr), TableColumnType::GENERATED); col.SetCompressionType(compression_type); if (accumulated_constraints.default_value) { throw ParserException("Not allowed to set default on a generated column"); @@ -311,7 +308,7 @@ ConstraintColumnDefinition PEGTransformerFactory::TransformColumnDefinition( return result; } - ColumnDefinition col(qualified_name.name, column_type); + ColumnDefinition col(qualified_name.Name(), column_type); if (accumulated_constraints.default_value) { col.SetDefaultValue(std::move(accumulated_constraints.default_value)); @@ -406,8 +403,8 @@ ColumnConstraintEntry PEGTransformerFactory::TransformForeignKeyConstraint(PEGTr const optional> &column_list, const KeyActions &key_actions) { ForeignKeyInfo fk_info; - fk_info.schema = base_table_name->schema_name; - fk_info.table = base_table_name->table_name; + fk_info.schema = base_table_name->Schema(); + fk_info.table = base_table_name->Table(); fk_info.type = ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE; ColumnConstraintEntry entry; diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_trigger.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_trigger.cpp index b9b0dd3e5..4fd1ca96d 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_trigger.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_trigger.cpp @@ -34,7 +34,7 @@ unique_ptr PEGTransformerFactory::TransformCreateTriggerStmt( auto result = make_uniq(); auto info = make_uniq(); info->on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; - info->trigger_name = trigger_name; + info->SetTriggerName(trigger_name); info->timing = trigger_timing; info->event_type = trigger_event.event_type; info->columns = trigger_event.columns; diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_type.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_type.cpp index bdfffa0d0..cfa61818f 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_type.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_type.cpp @@ -10,9 +10,9 @@ unique_ptr PEGTransformerFactory::TransformCreateTypeStmt(PEGTr const QualifiedName &qualified_name, unique_ptr create_type) { auto result = make_uniq(); - create_type->catalog = qualified_name.catalog; - create_type->schema = qualified_name.schema; - create_type->name = qualified_name.name; + create_type->CatalogMutable() = qualified_name.Catalog(); + create_type->SchemaMutable() = qualified_name.Schema(); + create_type->SetTypeName(qualified_name.Name()); create_type->on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; result->info = std::move(create_type); diff --git a/src/duckdb/src/parser/peg/transformer/transform_create_view.cpp b/src/duckdb/src/parser/peg/transformer/transform_create_view.cpp index ea1c4373d..49701e67e 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_create_view.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_create_view.cpp @@ -66,13 +66,13 @@ void PEGTransformerFactory::WrapRecursiveView(unique_ptr &info, cte_info->query_node = std::move(inner_node); - outer_select->cte_map.map.insert(info->view_name, std::move(cte_info)); + outer_select->cte_map.map.insert(info->GetViewName(), std::move(cte_info)); for (const auto &column : info->aliases) { outer_select->select_list.push_back(make_uniq(column)); } - auto table_description = TableDescription(info->catalog, info->schema, info->view_name); + auto table_description = TableDescription(info->Catalog(), info->Schema(), info->GetViewName()); outer_select->from_table = make_uniq(table_description); auto outer_select_statement = make_uniq(); @@ -82,7 +82,7 @@ void PEGTransformerFactory::WrapRecursiveView(unique_ptr &info, void PEGTransformerFactory::ConvertToRecursiveView(unique_ptr &info, unique_ptr &node) { vector> empty_key_targets; - auto result_node = ToRecursiveCTE(std::move(node), info->view_name, info->aliases, empty_key_targets); + auto result_node = ToRecursiveCTE(std::move(node), info->GetViewName(), info->aliases, empty_key_targets); WrapRecursiveView(info, std::move(result_node)); } @@ -95,9 +95,9 @@ PEGTransformerFactory::TransformCreateViewStmt(PEGTransformer &transformer, cons auto result = make_uniq(); auto info = make_uniq(); info->on_conflict = if_not_exists ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; - info->catalog = qualified_name.catalog; - info->schema = qualified_name.schema; - info->view_name = qualified_name.name; + info->CatalogMutable() = qualified_name.Catalog(); + info->SchemaMutable() = qualified_name.Schema(); + info->SetViewName(qualified_name.Name()); if (insert_column_list) { info->aliases = StringsToIdentifiers(*insert_column_list); } diff --git a/src/duckdb/src/parser/peg/transformer/transform_deallocate.cpp b/src/duckdb/src/parser/peg/transformer/transform_deallocate.cpp index e93574225..17a949115 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_deallocate.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_deallocate.cpp @@ -8,7 +8,7 @@ unique_ptr PEGTransformerFactory::TransformDeallocateStatement(PEG const Identifier &identifier) { auto result = make_uniq(); result->info->type = CatalogType::PREPARED_STATEMENT; - result->info->name = identifier; + result->info->NameMutable() = identifier; return std::move(result); } diff --git a/src/duckdb/src/parser/peg/transformer/transform_describe.cpp b/src/duckdb/src/parser/peg/transformer/transform_describe.cpp index 570cecad6..6ebf75be0 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_describe.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_describe.cpp @@ -27,15 +27,15 @@ unique_ptr PEGTransformerFactory::TransformShowTables(PEGTransformer const QualifiedName &qualified_name) { auto showref = make_uniq(); showref->show_type = ShowType::SHOW_FROM; - if (!IsInvalidCatalog(qualified_name.catalog)) { + if (!IsInvalidCatalog(qualified_name.Catalog())) { throw ParserException("Expected \"SHOW TABLES FROM database\", \"SHOW TABLES FROM schema\", or " "\"SHOW TABLES FROM database.schema\""); } - if (IsInvalidSchema(qualified_name.schema)) { - showref->schema_name = qualified_name.name; + if (IsInvalidSchema(qualified_name.Schema())) { + showref->schema_name = qualified_name.Name(); } else { - showref->catalog_name = qualified_name.schema; - showref->schema_name = qualified_name.name; + showref->catalog_name = qualified_name.Schema(); + showref->schema_name = qualified_name.Name(); } auto select_node = make_uniq(); select_node->select_list.push_back(make_uniq()); @@ -75,15 +75,15 @@ unique_ptr PEGTransformerFactory::TransformShowQualifiedName(PEGTrans if (showref->show_type == ShowType::SHOW_FROM) { // Logic for SHOW TABLES FROM [database].[schema] - if (IsInvalidSchema(base_table.schema_name)) { - showref->schema_name = base_table.table_name; + if (IsInvalidSchema(base_table.Schema())) { + showref->schema_name = base_table.Table(); } else { - showref->catalog_name = base_table.schema_name; - showref->schema_name = base_table.table_name; + showref->catalog_name = base_table.Schema(); + showref->schema_name = base_table.Table(); } - } else if (IsInvalidSchema(base_table.schema_name)) { + } else if (IsInvalidSchema(base_table.Schema())) { // Logic for unqualified relations (databases, tables, variables) - auto table_name = StringUtil::Lower(base_table.table_name.GetIdentifierName()); + auto table_name = StringUtil::Lower(base_table.Table().GetIdentifierName()); if (table_name == "databases" || table_name == "tables" || table_name == "schemas" || table_name == "variables") { showref->table_name = Identifier("\"" + table_name + "\""); @@ -97,7 +97,7 @@ unique_ptr PEGTransformerFactory::TransformShowQualifiedName(PEGTrans if (target.is_table_name) { // Case: SHOW 'something' or DESCRIBE 'something' auto table_ref = make_uniq(); - table_ref->table_name = target.table_name; + table_ref->TableMutable() = target.table_name; show_select_node->from_table = std::move(table_ref); } else { // Case: A relation/table reference diff --git a/src/duckdb/src/parser/peg/transformer/transform_drop.cpp b/src/duckdb/src/parser/peg/transformer/transform_drop.cpp index 050d3a276..1a59769f2 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_drop.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_drop.cpp @@ -23,9 +23,7 @@ unique_ptr PEGTransformerFactory::TransformDropTable(PEGTransform throw NotImplementedException("Can only drop one object at a time"); } auto base_table = std::move(base_table_name[0]); - info->catalog = base_table->catalog_name; - info->schema = base_table->schema_name; - info->name = base_table->table_name; + info->GetQualifiedNameMutable() = base_table->GetQualifiedName(); info->type = table_or_view; info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; result->info = std::move(info); @@ -53,9 +51,7 @@ PEGTransformerFactory::TransformDropTableFunction(PEGTransformer &transformer, c if (table_function_name.size() > 1) { throw NotImplementedException("Can only drop one object at a time"); } - info->name = table_function_name[0]; - info->catalog = INVALID_CATALOG; - info->schema = INVALID_SCHEMA; + info->GetQualifiedNameMutable() = QualifiedName(INVALID_CATALOG, INVALID_SCHEMA, table_function_name[0]); info->type = comment_macro_table; info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; result->info = std::move(info); @@ -73,9 +69,7 @@ PEGTransformerFactory::TransformDropFunction(PEGTransformer &transformer, const throw NotImplementedException("Can only drop one object at a time"); } const auto &function = function_identifier[0]; - info->catalog = function.catalog.empty() ? INVALID_CATALOG : function.catalog; - info->schema = function.schema; - info->name = function.name; + info->GetQualifiedNameMutable() = function; info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; info->type = catalog_type; result->info = std::move(info); @@ -91,8 +85,8 @@ PEGTransformerFactory::TransformDropSchema(PEGTransformer &transformer, const op throw NotImplementedException("Can only drop one object at a time"); } const auto &schema = qualified_schema_name[0]; - info->catalog = schema.catalog; - info->name = schema.schema; + info->CatalogMutable() = schema.Catalog(); + info->NameMutable() = schema.Schema(); info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; info->type = CatalogType::SCHEMA_ENTRY; result->info = std::move(info); @@ -101,18 +95,14 @@ PEGTransformerFactory::TransformDropSchema(PEGTransformer &transformer, const op QualifiedName PEGTransformerFactory::TransformQualifiedSchemaNameString(PEGTransformer &transformer, const Identifier &schema_name) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = schema_name; + QualifiedName result(INVALID_CATALOG, schema_name, Identifier()); return result; } QualifiedName PEGTransformerFactory::TransformCatalogReservedSchema(PEGTransformer &transformer, const Identifier &catalog_qualification, const Identifier &reserved_schema_name) { - QualifiedName result; - result.catalog = catalog_qualification; - result.schema = reserved_schema_name; + QualifiedName result(catalog_qualification, reserved_schema_name, Identifier()); return result; } @@ -124,10 +114,7 @@ unique_ptr PEGTransformerFactory::TransformDropIndex(PEGTransform if (qualified_index_name.size() > 1) { throw NotImplementedException("Can only drop one object at a time"); } - const auto &index = qualified_index_name[0]; - info->catalog = index.catalog; - info->schema = index.schema; - info->name = index.name; + info->GetQualifiedNameMutable() = qualified_index_name[0]; info->type = CatalogType::INDEX_ENTRY; info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; result->info = std::move(info); @@ -136,30 +123,21 @@ unique_ptr PEGTransformerFactory::TransformDropIndex(PEGTransform QualifiedName PEGTransformerFactory::TransformQualifiedIndexNameString(PEGTransformer &transformer, const Identifier &index_name) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = INVALID_SCHEMA; - result.name = index_name; + QualifiedName result(INVALID_CATALOG, INVALID_SCHEMA, index_name); return result; } QualifiedName PEGTransformerFactory::TransformSchemaReservedIndex(PEGTransformer &transformer, const Identifier &schema_qualification, const Identifier &reserved_index_name) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = schema_qualification; - result.name = reserved_index_name; + QualifiedName result(INVALID_CATALOG, schema_qualification, reserved_index_name); return result; } QualifiedName PEGTransformerFactory::TransformCatalogReservedSchemaIndex( PEGTransformer &transformer, const Identifier &catalog_qualification, const Identifier &reserved_schema_qualification, const Identifier &reserved_index_name) { - QualifiedName result; - result.catalog = catalog_qualification; - result.schema = reserved_schema_qualification; - result.name = reserved_index_name; + QualifiedName result(catalog_qualification, reserved_schema_qualification, reserved_index_name); return result; } @@ -172,13 +150,11 @@ PEGTransformerFactory::TransformDropSequence(PEGTransformer &transformer, const throw NotImplementedException("Can only drop one object at a time"); } const auto &sequence = qualified_sequence_name[0]; - if (sequence.schema.empty()) { - info->schema = sequence.catalog; + if (sequence.Schema().empty()) { + info->GetQualifiedNameMutable() = QualifiedName(INVALID_CATALOG, sequence.Catalog(), sequence.Name()); } else { - info->catalog = sequence.catalog; - info->schema = sequence.schema; + info->GetQualifiedNameMutable() = sequence; } - info->name = sequence.name; info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; info->type = CatalogType::SEQUENCE_ENTRY; result->info = std::move(info); @@ -219,13 +195,11 @@ unique_ptr PEGTransformerFactory::TransformDropType(PEGTransforme throw NotImplementedException("Can only drop one object at a time"); } const auto &type = qualified_type_name[0]; - if (type.schema.empty()) { - info->schema = type.catalog; + if (type.Schema().empty()) { + info->GetQualifiedNameMutable() = QualifiedName(INVALID_CATALOG, type.Catalog(), type.Name()); } else { - info->catalog = type.catalog; - info->schema = type.schema; + info->GetQualifiedNameMutable() = type; } - info->name = type.name; info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; info->type = CatalogType::TYPE_ENTRY; result->info = std::move(info); @@ -258,7 +232,7 @@ unique_ptr PEGTransformerFactory::TransformDropSecret(PEGTransfor } info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; - info->name = secret_name; + info->NameMutable() = secret_name; if (drop_secret_storage) { extra_drop_info->secret_storage = drop_secret_storage->GetIdentifierName(); } @@ -281,7 +255,7 @@ unique_ptr PEGTransformerFactory::TransformDropTrigger(PEGTransfo info->type = CatalogType::TRIGGER_ENTRY; info->if_not_found = if_exists ? OnEntryNotFound::RETURN_NULL : OnEntryNotFound::THROW_EXCEPTION; - info->name = trigger_name; + info->NameMutable() = trigger_name; auto extra_info = make_uniq(); extra_info->base_table = std::move(base_table_name); diff --git a/src/duckdb/src/parser/peg/transformer/transform_explain.cpp b/src/duckdb/src/parser/peg/transformer/transform_explain.cpp index 4c28a5bfb..c489c3c46 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_explain.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_explain.cpp @@ -9,8 +9,8 @@ ProfilerPrintFormat ParseProfilerPrintFormat(const Value &val) { if (val.type().id() != LogicalTypeId::VARCHAR) { throw InvalidInputException("Expected a string as argument to FORMAT"); } - // resolve the format name through the shared explain format registry (see main/profiler/profiler_print_format.hpp) - return ProfilerPrintFormat::FromString(val.GetValue()); + // the format name is validated when the renderer is created (needs a ClientContext); only normalize it here + return ProfilerPrintFormat(StringUtil::Lower(val.GetValue())); } unique_ptr diff --git a/src/duckdb/src/parser/peg/transformer/transform_expression.cpp b/src/duckdb/src/parser/peg/transformer/transform_expression.cpp index 50bf4a901..d838825f9 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_expression.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_expression.cpp @@ -58,7 +58,7 @@ PEGTransformerFactory::TransformExpressionStatement(PEGTransformer &transformer, } } else { auto base_table = make_uniq(); - base_table->table_name = col_expr.GetColumnName(); + base_table->TableMutable() = col_expr.GetColumnName(); select_node->from_table = std::move(base_table); } select_node->select_list.push_back(make_uniq()); @@ -174,7 +174,7 @@ unique_ptr PEGTransformerFactory::TransformFunctionExpression( // COUNT(*) gets converted into COUNT() function_children.clear(); } - auto lowercase_name = StringUtil::Lower(qualified_function.name.GetIdentifierName()); + auto lowercase_name = StringUtil::Lower(qualified_function.Name().GetIdentifierName()); if (over_clause) { if (transformer.in_window_definition) { @@ -192,8 +192,8 @@ unique_ptr PEGTransformerFactory::TransformFunctionExpression( transformer.in_window_definition = true; auto expr = std::move(*over_clause); - expr->CatalogMutable() = qualified_function.catalog; - expr->SchemaMutable() = qualified_function.schema; + expr->CatalogMutable() = qualified_function.Catalog(); + expr->SchemaMutable() = qualified_function.Schema(); expr->SetFunctionName(lowercase_name); for (auto &arg : function_children) { @@ -323,12 +323,13 @@ unique_ptr PEGTransformerFactory::TransformFunctionExpression( } lowercase_name = "mode"; } else { - throw ParserException("Unknown ordered aggregate \"%s\".", qualified_function.name); + throw ParserException("Unknown ordered aggregate \"%s\".", qualified_function.Name()); } } - auto result = make_uniq( - qualified_function.catalog, qualified_function.schema, Identifier(lowercase_name), std::move(function_children), - std::move(filter_expr), std::move(order_modifier), distinct, false, export_clause); + auto result = + make_uniq(qualified_function.Catalog(), qualified_function.Schema(), + Identifier(lowercase_name), std::move(function_children), std::move(filter_expr), + std::move(order_modifier), distinct, false, export_clause); return std::move(result); } @@ -370,10 +371,7 @@ bool PEGTransformerFactory::TransformAllKeyword(PEGTransformer &transformer) { QualifiedName PEGTransformerFactory::TransformFunctionIdentifier(PEGTransformer &transformer, ParseResult &choice_result) { if (choice_result.type == ParseResultType::IDENTIFIER) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = INVALID_SCHEMA; - result.name = choice_result.Cast().identifier; + QualifiedName result(INVALID_CATALOG, INVALID_SCHEMA, choice_result.Cast().identifier); return result; } return transformer.Transform(choice_result); @@ -382,26 +380,18 @@ QualifiedName PEGTransformerFactory::TransformFunctionIdentifier(PEGTransformer QualifiedName PEGTransformerFactory::TransformSchemaReservedFunctionName(PEGTransformer &transformer, const Identifier &schema_qualification, const Identifier &reserved_function_name) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = schema_qualification; - result.name = reserved_function_name; + QualifiedName result(INVALID_CATALOG, schema_qualification, reserved_function_name); return result; } QualifiedName PEGTransformerFactory::TransformCatalogReservedSchemaFunctionName( PEGTransformer &transformer, const Identifier &catalog_qualification, const optional &reserved_schema_qualification, const Identifier &reserved_function_name) { - QualifiedName result; if (reserved_schema_qualification) { - result.catalog = catalog_qualification; - result.schema = *reserved_schema_qualification; + return QualifiedName(catalog_qualification, *reserved_schema_qualification, reserved_function_name); } else { - result.catalog = INVALID_CATALOG; - result.schema = catalog_qualification; + return QualifiedName(INVALID_CATALOG, catalog_qualification, reserved_function_name); } - result.name = reserved_function_name; - return result; } unique_ptr PEGTransformerFactory::TransformArrayBoundedListExpression( @@ -784,12 +774,150 @@ bool TryNegateLikeFunction(Identifier &function_name) { return true; } else if (function_name == "~~~") { return false; + } else if (function_name == "regexp_matches") { + return false; } else if (function_name == "regexp_full_match") { return false; } return false; } +static string RegexMatchOperatorFunctionName(PEGTransformer &transformer) { + if (transformer.options.regex_match_operator_semantics == RegexMatchOperatorSemantics::FULL) { + return "regexp_full_match"; + } + return "regexp_matches"; +} + +static bool IsRegexMatchFunctionName(const string &function_name) { + auto name = function_name; + if (StringUtil::StartsWith(name, "!")) { + name = name.substr(1); + } + return name == "regexp_matches" || name == "regexp_full_match"; +} + +static bool TryRemoveRegexOperatorNegation(Identifier &function_name) { + if (function_name == "!regexp_matches") { + function_name = "regexp_matches"; + return true; + } + if (function_name == "!regexp_full_match") { + function_name = "regexp_full_match"; + return true; + } + return false; +} + +static bool TryRemoveRegexCaseInsensitiveSuffix(string &function_name) { + static constexpr const char *REGEX_CASE_INSENSITIVE_SUFFIX = "__case_insensitive"; + if (!StringUtil::EndsWith(function_name, REGEX_CASE_INSENSITIVE_SUFFIX)) { + return false; + } + function_name = + function_name.substr(0, function_name.size() - std::char_traits::length(REGEX_CASE_INSENSITIVE_SUFFIX)); + return true; +} + +static bool TryGetRegexMatchOperator(const string &op_string, PEGTransformer &transformer, string &function_name, + bool &negated, bool &case_insensitive) { + if (op_string == "~") { + function_name = RegexMatchOperatorFunctionName(transformer); + negated = false; + case_insensitive = false; + return true; + } + if (op_string == "!~") { + function_name = RegexMatchOperatorFunctionName(transformer); + negated = true; + case_insensitive = false; + return true; + } + if (op_string == "~*") { + function_name = RegexMatchOperatorFunctionName(transformer); + negated = false; + case_insensitive = true; + return true; + } + if (op_string == "!~*") { + function_name = RegexMatchOperatorFunctionName(transformer); + negated = true; + case_insensitive = true; + return true; + } + return false; +} + +static unique_ptr MakeFunctionExpression(const string &name, + vector> children) { + return make_uniq(Identifier(name), std::move(children)); +} + +static unique_ptr MakeBooleanConstant(bool value) { + return make_uniq(Value::BOOLEAN(value)); +} + +static unique_ptr MakeListContains(unique_ptr list, bool value) { + vector> children; + children.push_back(std::move(list)); + children.push_back(MakeBooleanConstant(value)); + return MakeFunctionExpression("list_contains", std::move(children)); +} + +static unique_ptr MakeListHasNull(unique_ptr list) { + auto is_null = make_uniq(ExpressionType::OPERATOR_IS_NULL, + make_uniq("__regex_match")); + + auto null_check_lambda = make_uniq(vector {"__regex_match"}, std::move(is_null)); + vector> filter_children; + filter_children.push_back(std::move(list)); + filter_children.push_back(std::move(null_check_lambda)); + auto null_matches = MakeFunctionExpression("list_filter", std::move(filter_children)); + + vector> length_children; + length_children.push_back(std::move(null_matches)); + auto null_count = MakeFunctionExpression("len", std::move(length_children)); + + return make_uniq(ExpressionType::COMPARE_GREATERTHAN, std::move(null_count), + make_uniq(Value::INTEGER(0))); +} + +static unique_ptr TransformRegexAnyAllList(unique_ptr left_expr, + unique_ptr right_expr, + const string &function_name, bool negated, + bool case_insensitive, bool is_any) { + vector> regex_children; + regex_children.push_back(std::move(left_expr)); + regex_children.push_back(make_uniq("__regex_pattern")); + if (case_insensitive) { + regex_children.push_back(make_uniq(Value("i"))); + } + unique_ptr regex_match = MakeFunctionExpression(function_name, std::move(regex_children)); + if (negated) { + regex_match = make_uniq(ExpressionType::OPERATOR_NOT, std::move(regex_match)); + } + + auto pattern_lambda = make_uniq(vector {"__regex_pattern"}, std::move(regex_match)); + vector> transform_children; + transform_children.push_back(std::move(right_expr)); + transform_children.push_back(std::move(pattern_lambda)); + auto match_list = MakeFunctionExpression("list_transform", std::move(transform_children)); + + auto result = make_uniq(); + CaseCheck has_decisive_value; + has_decisive_value.when_expr = MakeListContains(match_list->Copy(), is_any); + has_decisive_value.then_expr = MakeBooleanConstant(is_any); + result->CaseChecksMutable().push_back(std::move(has_decisive_value)); + + CaseCheck has_null_value; + has_null_value.when_expr = MakeListHasNull(match_list->Copy()); + has_null_value.then_expr = make_uniq(Value()); + result->CaseChecksMutable().push_back(std::move(has_null_value)); + + result->ElseMutable() = MakeBooleanConstant(!is_any); + return std::move(result); +} + unique_ptr PEGTransformerFactory::TransformBetweenInLikeExpression(PEGTransformer &transformer, unique_ptr other_operator_expression, @@ -815,16 +943,17 @@ PEGTransformerFactory::TransformBetweenInLikeExpression(PEGTransformer &transfor } else { func_expr->GetArgumentsMutable().insert(func_expr->GetArgumentsMutable().begin(), std::move(expr)); } + auto regex_operator_negated = TryRemoveRegexOperatorNegation(func_expr->FunctionNameMutable()); if (has_not) { - if (!TryNegateLikeFunction(func_expr->FunctionNameMutable())) { + if (regex_operator_negated) { + expr = std::move(func_expr); + } else if (!TryNegateLikeFunction(func_expr->FunctionNameMutable())) { // If it wasn't a special "Like" function, wrap it in a standard NOT operator expr = make_uniq(ExpressionType::OPERATOR_NOT, std::move(func_expr)); } else { expr = std::move(func_expr); } - } else if (func_expr->FunctionName() == "!~") { - func_expr->FunctionNameMutable() = "regexp_full_match"; - func_expr->IsOperatorMutable() = false; + } else if (regex_operator_negated) { expr = make_uniq(ExpressionType::OPERATOR_NOT, std::move(func_expr)); } else { expr = std::move(func_expr); @@ -911,8 +1040,14 @@ PEGTransformerFactory::TransformLikeClause(PEGTransformer &transformer, const st unique_ptr other_operator_expression, optional> escape_clause) { string like_variation = like_variations; + bool case_insensitive_regex = TryRemoveRegexCaseInsensitiveSuffix(like_variation); + bool is_regex_operator = IsRegexMatchFunctionName(like_variation); vector> like_children; like_children.push_back(std::move(other_operator_expression)); + if (case_insensitive_regex && escape_clause) { + throw ParserException( + "ESCAPE clause is not supported with case-insensitive regular expression match operators"); + } if (escape_clause) { if (like_variation == "~~") { like_variation = "like_escape"; @@ -921,8 +1056,11 @@ PEGTransformerFactory::TransformLikeClause(PEGTransformer &transformer, const st } like_children.push_back(std::move(*escape_clause)); } + if (case_insensitive_regex) { + like_children.push_back(make_uniq(Value("i"))); + } auto result = make_uniq(Identifier(like_variation), std::move(like_children)); - if (like_variation != "regexp_full_match") { + if (!is_regex_operator) { result->IsOperatorMutable() = true; } return std::move(result); @@ -950,6 +1088,14 @@ string PEGTransformerFactory::TransformSimilarToToken(PEGTransformer &transforme return "regexp_full_match"; } +string PEGTransformerFactory::TransformRegexMatchToken(PEGTransformer &transformer) { + return RegexMatchOperatorFunctionName(transformer); +} + +string PEGTransformerFactory::TransformRegexInsensitiveMatchToken(PEGTransformer &transformer) { + return RegexMatchOperatorFunctionName(transformer) + "__case_insensitive"; +} + string PEGTransformerFactory::TransformNotILikeOp(PEGTransformer &transformer) { return "!~~*"; } @@ -958,8 +1104,12 @@ string PEGTransformerFactory::TransformNotLikeOp(PEGTransformer &transformer) { return "!~~"; } +string PEGTransformerFactory::TransformNotRegexInsensitiveMatchOp(PEGTransformer &transformer) { + return "!" + RegexMatchOperatorFunctionName(transformer) + "__case_insensitive"; +} + string PEGTransformerFactory::TransformNotSimilarToOp(PEGTransformer &transformer) { - return "!~"; + return "!" + RegexMatchOperatorFunctionName(transformer); } unique_ptr @@ -999,6 +1149,15 @@ PEGTransformerFactory::TransformOtherOperatorExpression(PEGTransformer &transfor } expr = std::move(subquery_expr); } else { + string regex_function_name; + bool regex_negated; + bool regex_case_insensitive; + if (TryGetRegexMatchOperator(op_string, transformer, regex_function_name, regex_negated, + regex_case_insensitive)) { + expr = TransformRegexAnyAllList(std::move(expr), std::move(right_expr), regex_function_name, + regex_negated, regex_case_insensitive, is_any); + continue; + } // left=ANY(right) // we turn this into left=ANY((SELECT UNNEST(right))) if (expression_type == ExpressionType::INVALID) { diff --git a/src/duckdb/src/parser/peg/transformer/transform_generated.cpp b/src/duckdb/src/parser/peg/transformer/transform_generated.cpp index 2c2ba3a58..a023fd708 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_generated.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_generated.cpp @@ -1196,7 +1196,7 @@ unique_ptr PEGTransformerFactory::TransformTypeModifiersIn unique_ptr PEGTransformerFactory::TransformRowTypeInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto &list_pr = parse_result.Cast(); - child_list_t col_id_type_list {}; + optional> col_id_type_list {}; auto &col_id_type_list_opt = list_pr.GetChild(1).Cast(); if (col_id_type_list_opt.HasResult()) { auto col_id_type_list_value = @@ -5868,6 +5868,19 @@ unique_ptr PEGTransformerFactory::TransformSimilarToTokenI return make_uniq>(result); } +unique_ptr PEGTransformerFactory::TransformRegexMatchTokenInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto result = TransformRegexMatchToken(transformer); + return make_uniq>(result); +} + +unique_ptr +PEGTransformerFactory::TransformRegexInsensitiveMatchTokenInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto result = TransformRegexInsensitiveMatchToken(transformer); + return make_uniq>(result); +} + unique_ptr PEGTransformerFactory::TransformNotILikeOpInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto result = TransformNotILikeOp(transformer); @@ -5880,6 +5893,13 @@ unique_ptr PEGTransformerFactory::TransformNotLikeOpIntern return make_uniq>(result); } +unique_ptr +PEGTransformerFactory::TransformNotRegexInsensitiveMatchOpInternal(PEGTransformer &transformer, + ParseResult &parse_result) { + auto result = TransformNotRegexInsensitiveMatchOp(transformer); + return make_uniq>(result); +} + unique_ptr PEGTransformerFactory::TransformNotSimilarToOpInternal(PEGTransformer &transformer, ParseResult &parse_result) { auto result = TransformNotSimilarToOp(transformer); @@ -10558,8 +10578,11 @@ void PEGTransformerFactory::RegisterGenerated() { {"ILikeToken", &PEGTransformerFactory::TransformILikeTokenInternal}, {"GlobToken", &PEGTransformerFactory::TransformGlobTokenInternal}, {"SimilarToToken", &PEGTransformerFactory::TransformSimilarToTokenInternal}, + {"RegexMatchToken", &PEGTransformerFactory::TransformRegexMatchTokenInternal}, + {"RegexInsensitiveMatchToken", &PEGTransformerFactory::TransformRegexInsensitiveMatchTokenInternal}, {"NotILikeOp", &PEGTransformerFactory::TransformNotILikeOpInternal}, {"NotLikeOp", &PEGTransformerFactory::TransformNotLikeOpInternal}, + {"NotRegexInsensitiveMatchOp", &PEGTransformerFactory::TransformNotRegexInsensitiveMatchOpInternal}, {"NotSimilarToOp", &PEGTransformerFactory::TransformNotSimilarToOpInternal}, {"InClause", &PEGTransformerFactory::TransformInClauseInternal}, {"InExpression", &PEGTransformerFactory::TransformInExpressionInternal}, diff --git a/src/duckdb/src/parser/peg/transformer/transform_insert.cpp b/src/duckdb/src/parser/peg/transformer/transform_insert.cpp index 64066880e..714209e6b 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_insert.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_insert.cpp @@ -17,9 +17,9 @@ unique_ptr PEGTransformerFactory::TransformInsertStatement( if (with_clause) { node.cte_map = std::move(*with_clause); } - node.catalog = insert_target->catalog_name; - node.schema = insert_target->schema_name; - node.table = insert_target->table_name; + node.catalog = insert_target->Catalog(); + node.schema = insert_target->Schema(); + node.table = insert_target->Table(); node.column_order = by_name_or_position ? *by_name_or_position : InsertColumnOrder::INSERT_BY_POSITION; if (insert_column_list) { node.columns = StringsToIdentifiers(*insert_column_list); diff --git a/src/duckdb/src/parser/peg/transformer/transform_load.cpp b/src/duckdb/src/parser/peg/transformer/transform_load.cpp index 64e398b34..6fce3548f 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_load.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_load.cpp @@ -33,7 +33,7 @@ unique_ptr PEGTransformerFactory::TransformInstallStatement( auto result = make_uniq(); auto info = make_uniq(); info->load_type = LoadType::INSTALL; - info->filename = identifier_or_string_literal.name.GetIdentifierName(); + info->filename = identifier_or_string_literal.Name().GetIdentifierName(); info->repo_is_alias = false; if (from_source) { info->repository = from_source->name.GetIdentifierName(); @@ -76,7 +76,7 @@ PEGTransformerFactory::TransformUpdateExtensionsStatement(PEGTransformer &transf string PEGTransformerFactory::TransformVersionNumber(PEGTransformer &transformer, const QualifiedName &identifier_or_string_literal) { - return identifier_or_string_literal.name.GetIdentifierName(); + return identifier_or_string_literal.Name().GetIdentifierName(); } } // namespace duckdb diff --git a/src/duckdb/src/parser/peg/transformer/transform_select.cpp b/src/duckdb/src/parser/peg/transformer/transform_select.cpp index 7bac4239e..063e12cdb 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_select.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_select.cpp @@ -255,20 +255,14 @@ Identifier PEGTransformerFactory::TransformCatalogQualification(PEGTransformer & QualifiedName PEGTransformerFactory::TransformCatalogReservedSchemaIdentifier( PEGTransformer &transformer, const Identifier &catalog_qualification, const Identifier &reserved_schema_qualification, const Identifier &reserved_identifier_or_string_literal) { - QualifiedName result; - result.catalog = catalog_qualification; - result.schema = reserved_schema_qualification; - result.name = reserved_identifier_or_string_literal; + QualifiedName result(catalog_qualification, reserved_schema_qualification, reserved_identifier_or_string_literal); return result; } QualifiedName PEGTransformerFactory::TransformSchemaReservedIdentifierOrStringLiteral( PEGTransformer &transformer, const Identifier &schema_qualification, const Identifier &reserved_identifier_or_string_literal) { - QualifiedName result; - result.catalog = INVALID_CATALOG; - result.schema = schema_qualification; - result.name = reserved_identifier_or_string_literal; + QualifiedName result(INVALID_CATALOG, schema_qualification, reserved_identifier_or_string_literal); return result; } @@ -587,8 +581,8 @@ unique_ptr PEGTransformerFactory::TransformTableFunctionLateralOpt( result->with_ordinality = with_ordinality.value_or(false) ? OrdinalityType::WITH_ORDINALITY : OrdinalityType::WITHOUT_ORDINALITY; result->function = - make_uniq(qualified_table_function.catalog, qualified_table_function.schema, - qualified_table_function.name, std::move(table_function_arguments)); + make_uniq(qualified_table_function.Catalog(), qualified_table_function.Schema(), + qualified_table_function.Name(), std::move(table_function_arguments)); if (table_alias) { result->alias = table_alias->name; result->column_name_alias = table_alias->column_name_alias; @@ -604,8 +598,8 @@ unique_ptr PEGTransformerFactory::TransformTableFunctionAliasColon( result->with_ordinality = with_ordinality.value_or(false) ? OrdinalityType::WITH_ORDINALITY : OrdinalityType::WITHOUT_ORDINALITY; result->function = - make_uniq(qualified_table_function.catalog, qualified_table_function.schema, - qualified_table_function.name, std::move(table_function_arguments)); + make_uniq(qualified_table_function.Catalog(), qualified_table_function.Schema(), + qualified_table_function.Name(), std::move(table_function_arguments)); result->alias = table_alias_colon; if (sample_clause) { result->sample = std::move(*sample_clause); @@ -1276,14 +1270,13 @@ QualifiedName PEGTransformerFactory::TransformQualifiedTableFunction(PEGTransfor const optional &catalog_qualification, const optional &schema_qualification, const Identifier &table_function_name) { - QualifiedName result; - result.catalog = catalog_qualification ? *catalog_qualification : INVALID_CATALOG; - result.schema = schema_qualification ? *schema_qualification : INVALID_SCHEMA; - if (!result.catalog.empty() && result.schema.empty()) { - result.schema = result.catalog; - result.catalog = INVALID_CATALOG; - } - result.name = table_function_name; + QualifiedName result(catalog_qualification ? *catalog_qualification : INVALID_CATALOG, + schema_qualification ? *schema_qualification : INVALID_SCHEMA, Identifier()); + if (!result.Catalog().empty() && result.Schema().empty()) { + result.SchemaMutable() = result.Catalog(); + result.CatalogMutable() = INVALID_CATALOG; + } + result.NameMutable() = table_function_name; return result; } @@ -1300,7 +1293,7 @@ TableAlias PEGTransformerFactory::TransformTableAliasAs(PEGTransformer &transfor const QualifiedName &identifier_or_string_literal, const optional> &column_aliases) { TableAlias result; - result.name = identifier_or_string_literal.name; + result.name = identifier_or_string_literal.Name(); if (column_aliases) { result.column_name_alias = StringsToIdentifiers(*column_aliases); } diff --git a/src/duckdb/src/parser/peg/transformer/transform_use.cpp b/src/duckdb/src/parser/peg/transformer/transform_use.cpp index 50e23914a..32506c644 100644 --- a/src/duckdb/src/parser/peg/transformer/transform_use.cpp +++ b/src/duckdb/src/parser/peg/transformer/transform_use.cpp @@ -7,10 +7,10 @@ namespace duckdb { unique_ptr PEGTransformerFactory::TransformUseStatement(PEGTransformer &transformer, const QualifiedName &use_target) { string value_str; - if (IsInvalidSchema(use_target.schema)) { - value_str = SQLIdentifier::ToString(use_target.name.GetIdentifierName()); + if (IsInvalidSchema(use_target.Schema())) { + value_str = SQLIdentifier::ToString(use_target.Name().GetIdentifierName()); } else { - value_str = SQLIdentifier(use_target.schema) + "." + SQLIdentifier(use_target.name); + value_str = SQLIdentifier(use_target.Schema()) + "." + SQLIdentifier(use_target.Name()); } auto value_expr = make_uniq(Value(value_str)); @@ -21,14 +21,14 @@ unique_ptr PEGTransformerFactory::TransformUseStatement(PEGTransfo QualifiedName PEGTransformerFactory::TransformSchemaNameAsUseTarget(PEGTransformer &transformer, const Identifier &schema_name) { QualifiedName result; - result.name = schema_name; + result.NameMutable() = schema_name; return result; } QualifiedName PEGTransformerFactory::TransformCatalogNameAsUseTarget(PEGTransformer &transformer, const Identifier &catalog_name) { QualifiedName result; - result.name = catalog_name; + result.NameMutable() = catalog_name; return result; } @@ -40,10 +40,7 @@ PEGTransformerFactory::TransformUseTargetCatalogSchema(PEGTransformer &transform if (dot_identifier && !dot_identifier->empty()) { throw ParserException("Expected \"USE database\" or \"USE database.schema\""); } - QualifiedName result; - result.catalog = Identifier::InvalidCatalog(); - result.schema = catalog_name; - result.name = reserved_schema_name; + QualifiedName result(Identifier::InvalidCatalog(), catalog_name, reserved_schema_name); return result; } diff --git a/src/duckdb/src/parser/qualified_name.cpp b/src/duckdb/src/parser/qualified_name.cpp index d8b94dacb..0ec9449b6 100644 --- a/src/duckdb/src/parser/qualified_name.cpp +++ b/src/duckdb/src/parser/qualified_name.cpp @@ -1,11 +1,12 @@ #include "duckdb/parser/qualified_name.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" #include "duckdb/common/exception/parser_exception.hpp" +#include "duckdb/common/types/hash.hpp" namespace duckdb { string QualifiedName::ToString() const { - return ParseInfo::QualifierToString(catalog, schema, name); + return ParseInfo::QualifierToString(Catalog(), Schema(), Name()); } vector QualifiedName::ParseComponents(const string &input) { @@ -48,6 +49,21 @@ vector QualifiedName::ParseComponents(const string &input) { return result; } +hash_t QualifiedName::Hash() const { + hash_t result = Catalog().Hash(); + result = CombineHash(result, Schema().Hash()); + result = CombineHash(result, Name().Hash()); + return result; +} + +bool QualifiedName::operator==(const QualifiedName &rhs) const { + return Catalog() == rhs.Catalog() && Schema() == rhs.Schema() && Name() == rhs.Name(); +} + +bool QualifiedName::operator!=(const QualifiedName &rhs) const { + return !(*this == rhs); +} + QualifiedName QualifiedName::Parse(const string &input) { Identifier catalog; Identifier schema; diff --git a/src/duckdb/src/parser/tableref/basetableref.cpp b/src/duckdb/src/parser/tableref/basetableref.cpp index abb9c1818..176368fe2 100644 --- a/src/duckdb/src/parser/tableref/basetableref.cpp +++ b/src/duckdb/src/parser/tableref/basetableref.cpp @@ -8,9 +8,9 @@ namespace duckdb { string BaseTableRef::ToString() const { string result; - result += catalog_name.empty() ? "" : (SQLIdentifier(catalog_name) + "."); - result += schema_name.empty() ? "" : (SQLIdentifier(schema_name) + "."); - result += SQLIdentifier(table_name); + result += Catalog().empty() ? "" : (SQLIdentifier(Catalog()) + "."); + result += Schema().empty() ? "" : (SQLIdentifier(Schema()) + "."); + result += SQLIdentifier(Table()); result += AliasToString(column_name_alias); if (at_clause) { result += " " + at_clause->ToString(); @@ -24,16 +24,14 @@ bool BaseTableRef::Equals(const TableRef &other_p) const { return false; } auto &other = other_p.Cast(); - return other.catalog_name == catalog_name && other.schema_name == schema_name && other.table_name == table_name && + return other.Catalog() == Catalog() && other.Schema() == Schema() && other.Table() == Table() && column_name_alias == other.column_name_alias && AtClause::Equals(at_clause.get(), other.at_clause.get()); } unique_ptr BaseTableRef::Copy() { auto copy = make_uniq(); - copy->catalog_name = catalog_name; - copy->schema_name = schema_name; - copy->table_name = table_name; + copy->GetQualifiedNameMutable() = GetQualifiedName(); copy->column_name_alias = column_name_alias; copy->at_clause = at_clause ? at_clause->Copy() : nullptr; CopyProperties(*copy); diff --git a/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp index 55c92b7d0..f9b17e78f 100644 --- a/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +++ b/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp @@ -3,7 +3,6 @@ #include "duckdb/common/pair.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/function/function_binder.hpp" -#include "duckdb/function/scalar/generic_functions.hpp" #include "duckdb/function/scalar/generic_common.hpp" #include "duckdb/main/config.hpp" #include "duckdb/parser/expression/constant_expression.hpp" @@ -13,7 +12,6 @@ #include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/expression_binder/base_select_binder.hpp" #include "duckdb/planner/expression_iterator.hpp" #include "duckdb/planner/query_node/bound_select_node.hpp" @@ -21,7 +19,8 @@ namespace duckdb { -static bool IsFunctionallyDependent(const unique_ptr &expr, const vector> &deps) { +bool BaseSelectBinder::IsFunctionallyDependent(const unique_ptr &expr, + const vector> &deps) { // Volatile expressions can't depend on anything else if (expr->IsVolatile()) { return false; diff --git a/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp index 137e9b37b..d19280535 100644 --- a/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +++ b/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp @@ -181,6 +181,7 @@ void TryTransformStarLike(unique_ptr &root) { "~~*", "!~~*", "regexp_full_match", + "regexp_matches", "not_like_escape", "ilike_escape", "not_ilike_escape", @@ -202,7 +203,8 @@ void TryTransformStarLike(unique_ptr &root) { auto original_alias = root->GetAlias(); auto star_expr = std::move(left); unique_ptr child_expr; - if (!inverse && function.FunctionName() == "regexp_full_match" && star.ExcludeList().empty()) { + if (!inverse && function.FunctionName() == "regexp_full_match" && function.GetArguments().size() == 2 && + star.ExcludeList().empty()) { // * SIMILAR TO '[regex]' is equivalent to COLUMNS('[regex]') so we can just move the expression directly child_expr = std::move(right.GetExpressionMutable()); } else { diff --git a/src/duckdb/src/planner/binder/query_node/bind_trigger_expansion.cpp b/src/duckdb/src/planner/binder/query_node/bind_trigger_expansion.cpp index 44cc76f29..2bf378828 100644 --- a/src/duckdb/src/planner/binder/query_node/bind_trigger_expansion.cpp +++ b/src/duckdb/src/planner/binder/query_node/bind_trigger_expansion.cpp @@ -115,7 +115,7 @@ static unique_ptr MakeTransitionTableAliasCTE(const I } alias_select->select_list.push_back(std::move(star)); auto alias_ref = make_uniq(); - alias_ref->table_name = base_cte_name; + alias_ref->TableMutable() = base_cte_name; alias_select->from_table = std::move(alias_ref); alias_cte->query_node = std::move(alias_select); alias_cte->materialized = CTEMaterialize::CTE_MATERIALIZE_DEFAULT; @@ -264,7 +264,7 @@ static unique_ptr BuildTriggerChain(const QueryNode &node, const Tab make_uniq("count_star", vector>())); } auto from_ref = make_uniq(); - from_ref->table_name = base_cte_name; + from_ref->TableMutable() = base_cte_name; from_ref->alias = GetTableAlias(node, table.name); outer->from_table = std::move(from_ref); @@ -457,14 +457,23 @@ unique_ptr Binder::TryExpandRowTriggers(QueryNode &node, throw NotImplementedException("RETURNING is not yet supported on tables with FOR EACH ROW triggers"); } expanded_tables.insert(table); - auto bound = ExpandRowTriggers(node, returning_list, table, triggers); + auto bound = ExpandRowTriggers(node, returning_list, table, triggers, event_type); expanded_tables.erase(table); return make_uniq(std::move(bound)); } -unique_ptr Binder::SetupNewRowScope(TableIndex table_index, const vector &col_names, - const vector &col_types) { - bind_context.AddGenericBinding(table_index, "new", col_names, col_types); +string Binder::RowScopeName(TriggerEventType event_type) { + switch (event_type) { + case TriggerEventType::DELETE_EVENT: + return "old"; + default: + return "new"; + } +} + +unique_ptr Binder::SetupRowScope(TableIndex table_index, const vector &col_names, + const vector &col_types, const string &scope_name) { + bind_context.AddGenericBinding(table_index, Identifier(scope_name), col_names, col_types); auto scope_binder = make_uniq(*this, context); GetActiveBinders().push_back(*scope_binder); return scope_binder; @@ -472,7 +481,8 @@ unique_ptr Binder::SetupNewRowScope(TableIndex table_index, co BoundStatement Binder::ExpandRowTriggers(QueryNode &node, vector> &returning_list, const TableCatalogEntry &table, - const vector> &triggers) { + const vector> &triggers, + TriggerEventType event_type) { D_ASSERT(!triggers.empty()); D_ASSERT(returning_list.empty()); returning_list.push_back(make_uniq()); @@ -488,7 +498,7 @@ BoundStatement Binder::ExpandRowTriggers(QueryNode &node, vector(); outer->select_list.push_back(make_uniq("count_star", vector>())); auto from_ref = make_uniq(); - from_ref->table_name = base_cte_name; + from_ref->TableMutable() = base_cte_name; outer->from_table = std::move(from_ref); outer->cte_map.map[base_cte_name] = std::move(base_cte); @@ -496,7 +506,7 @@ BoundStatement Binder::ExpandRowTriggers(QueryNode &node, vectorCast(); auto cte_table_idx = base_mat_cte.table_index; - // proj_idx is the binding source for NEW.col refs in trigger bodies. + // proj_idx is the binding source for NEW.col (INSERT) / OLD.col (DELETE) refs in trigger bodies. vector col_names; vector col_types; for (auto &col : table.GetColumns().Physical()) { @@ -508,7 +518,7 @@ BoundStatement Binder::ExpandRowTriggers(QueryNode &node, vector(cte_ref_idx, cte_table_idx, col_types, col_names, false); cte_ref->ResolveOperatorTypes(); - auto proj_idx = GenerateTableIndex(); // the table_index for NEW bindings + auto proj_idx = GenerateTableIndex(); // the table_index for NEW/OLD bindings vector> proj_exprs; for (idx_t i = 0; i < col_types.size(); i++) { proj_exprs.push_back(make_uniq(col_names[i], col_types[i], @@ -518,7 +528,8 @@ BoundStatement Binder::ExpandRowTriggers(QueryNode &node, vectorchildren.push_back(std::move(cte_ref)); new_rows_proj->ResolveOperatorTypes(); - auto new_scope_binder = SetupNewRowScope(proj_idx, col_names, col_types); + auto row_scope_name = RowScopeName(event_type); + auto new_scope_binder = SetupRowScope(proj_idx, col_names, col_types, row_scope_name); unique_ptr trigger_plan = std::move(new_rows_proj); for (idx_t i = 0; i < triggers.size(); i++) { @@ -530,7 +541,7 @@ BoundStatement Binder::ExpandRowTriggers(QueryNode &node, vectorcorrelated_columns); if (corr_cols.empty()) { - throw BinderException("FOR EACH ROW trigger \"%s\" on table \"%s\" must reference at least one NEW " + throw BinderException("FOR EACH ROW trigger \"%s\" on table \"%s\" must reference at least one NEW or OLD " "column in the trigger body (use FOR EACH STATEMENT if row data is not needed)", trigger.name, table.name); } @@ -554,7 +565,7 @@ BoundStatement Binder::ExpandRowTriggers(QueryNode &node, vectorResolveOperatorTypes(); trigger_plan = std::move(logi_trig); } - // remove new_scope_binder + // remove row_scope_binder GetActiveBinders().pop_back(); Identifier trigger_cte_name(string(TRIGGER_BODY_CTE_PREFIX) + "row_" + uuid_suffix); diff --git a/src/duckdb/src/planner/binder/statement/bind_copy.cpp b/src/duckdb/src/planner/binder/statement/bind_copy.cpp index 92ae253ab..95a461255 100644 --- a/src/duckdb/src/planner/binder/statement/bind_copy.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_copy.cpp @@ -504,7 +504,7 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt, const CopyFunction &fun result.types = {LogicalType::BIGINT}; result.names = {"Count"}; - if (stmt.info->table.empty()) { + if (stmt.info->Table().empty()) { throw ParserException("COPY FROM requires a table name to be specified"); } if (!function.copy_from_bind) { @@ -514,9 +514,9 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt, const CopyFunction &fun // generate an insert statement for the to-be-inserted table InsertStatement insert; auto &insert_node = *insert.node; - insert_node.table = stmt.info->table; - insert_node.schema = stmt.info->schema; - insert_node.catalog = stmt.info->catalog; + insert_node.table = stmt.info->Table(); + insert_node.schema = stmt.info->Schema(); + insert_node.catalog = stmt.info->Catalog(); insert_node.columns = stmt.info->select_list; // bind the insert statement to the base table @@ -526,9 +526,9 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt, const CopyFunction &fun auto &bound_insert = insert_statement.plan->Cast(); // lookup the table to copy into - BindSchemaOrCatalog(stmt.info->catalog, stmt.info->schema); + BindSchemaOrCatalog(stmt.info->GetQualifiedNameMutable()); auto &table = - Catalog::GetEntry(context, stmt.info->catalog, stmt.info->schema, stmt.info->table); + Catalog::GetEntry(context, stmt.info->Catalog(), stmt.info->Schema(), stmt.info->Table()); physical_index_vector_t column_index_map; vector named_column_map; vector expected_types; @@ -655,9 +655,7 @@ BoundStatement Binder::Bind(CopyStatement &stmt, CopyToType copy_to_type) { // copy table into file without a query // generate SELECT * FROM table; auto ref = make_uniq(); - ref->catalog_name = stmt.info->catalog; - ref->schema_name = stmt.info->schema; - ref->table_name = stmt.info->table; + ref->GetQualifiedNameMutable() = stmt.info->GetQualifiedName(); auto statement = make_uniq(); statement->from_table = std::move(ref); diff --git a/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp b/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp index 705b6ede6..918c535f4 100644 --- a/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp @@ -29,7 +29,7 @@ unique_ptr Binder::BindCopyDatabaseSchema(Catalog &from_databas auto info = make_uniq(target_database_name); for (auto &entry : catalog_entries) { auto create_info = entry.get().GetInfo(); - create_info->catalog = target_database_name; + create_info->CatalogMutable() = target_database_name; auto on_conflict = create_info->type == CatalogType::SCHEMA_ENTRY ? OnCreateConflict::IGNORE_ON_CONFLICT : OnCreateConflict::ERROR_ON_CONFLICT; // Update all the dependencies of the entry to point to the newly created entries on the target database @@ -66,9 +66,8 @@ unique_ptr Binder::BindCopyDatabaseData(Catalog &source_catalog insert_node.table = table.name; auto from_tbl = make_uniq(); - from_tbl->catalog_name = source_catalog.GetName(); - from_tbl->schema_name = table.ParentSchema().name; - from_tbl->table_name = table.name; + from_tbl->GetQualifiedNameMutable() = + QualifiedName(source_catalog.GetName(), table.ParentSchema().name, table.name); auto select_node = make_uniq(); auto &select_list = select_node->select_list; diff --git a/src/duckdb/src/planner/binder/statement/bind_create.cpp b/src/duckdb/src/planner/binder/statement/bind_create.cpp index 84416f015..dec6d4db0 100644 --- a/src/duckdb/src/planner/binder/statement/bind_create.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_create.cpp @@ -58,9 +58,7 @@ static unique_ptr MakeTriggerValidationCTE(const Tabl auto alias_select = make_uniq(); alias_select->select_list.push_back(make_uniq()); auto alias_table_ref = make_uniq(); - alias_table_ref->table_name = table.name; - alias_table_ref->schema_name = table.schema.name; - alias_table_ref->catalog_name = table.catalog.GetName(); + alias_table_ref->GetQualifiedNameMutable() = QualifiedName(table.catalog.GetName(), table.schema.name, table.name); alias_select->from_table = std::move(alias_table_ref); auto alias_cte = make_uniq(); alias_cte->query_node = std::move(alias_select); @@ -114,6 +112,22 @@ void Binder::BindSchemaOrCatalog(Identifier &catalog, Identifier &schema) { BindSchemaOrCatalog(context, catalog, schema); } +void Binder::BindSchemaOrCatalog(CatalogEntryRetriever &retriever, QualifiedName &qualified_name) { + auto catalog = qualified_name.Catalog(); + auto schema = qualified_name.Schema(); + BindSchemaOrCatalog(retriever, catalog, schema); + qualified_name = QualifiedName(std::move(catalog), std::move(schema), qualified_name.Name()); +} + +void Binder::BindSchemaOrCatalog(ClientContext &context, QualifiedName &qualified_name) { + CatalogEntryRetriever retriever(context); + BindSchemaOrCatalog(retriever, qualified_name); +} + +void Binder::BindSchemaOrCatalog(QualifiedName &qualified_name) { + BindSchemaOrCatalog(context, qualified_name); +} + Identifier Binder::BindCatalog(const Identifier &catalog) { auto &db_manager = DatabaseManager::Get(context); optional_ptr database = db_manager.GetDatabase(context, catalog); @@ -125,30 +139,30 @@ Identifier Binder::BindCatalog(const Identifier &catalog) { } void Binder::SearchSchema(CreateInfo &info) { - BindSchemaOrCatalog(info.catalog, info.schema); - if (IsInvalidCatalog(info.catalog) && info.temporary) { - info.catalog = Identifier::TempCatalog(); + BindSchemaOrCatalog(info.GetQualifiedNameMutable()); + if (IsInvalidCatalog(info.Catalog()) && info.temporary) { + info.CatalogMutable() = Identifier::TempCatalog(); } auto &search_path = ClientData::Get(context).catalog_search_path; - if (IsInvalidCatalog(info.catalog) && IsInvalidSchema(info.schema)) { + if (IsInvalidCatalog(info.Catalog()) && IsInvalidSchema(info.Schema())) { auto &default_entry = search_path->GetDefault(); - info.catalog = default_entry.catalog; - info.schema = default_entry.schema; - } else if (IsInvalidSchema(info.schema)) { - info.schema = Identifier(search_path->GetDefaultSchema(context, info.catalog)); - } else if (IsInvalidCatalog(info.catalog)) { - info.catalog = Identifier(search_path->GetDefaultCatalog(info.schema)); + info.CatalogMutable() = default_entry.catalog; + info.SchemaMutable() = default_entry.schema; + } else if (IsInvalidSchema(info.Schema())) { + info.SchemaMutable() = Identifier(search_path->GetDefaultSchema(context, info.Catalog())); + } else if (IsInvalidCatalog(info.Catalog())) { + info.CatalogMutable() = Identifier(search_path->GetDefaultCatalog(info.Schema())); } - if (IsInvalidCatalog(info.catalog)) { - info.catalog = DatabaseManager::GetDefaultDatabase(context); + if (IsInvalidCatalog(info.Catalog())) { + info.CatalogMutable() = DatabaseManager::GetDefaultDatabase(context); } if (!info.temporary) { // non-temporary create: not read only - if (info.catalog == TEMP_CATALOG) { + if (info.Catalog() == TEMP_CATALOG) { throw ParserException("Only TEMPORARY table names can use the \"%s\" catalog", TEMP_CATALOG); } } else { - if (info.catalog != TEMP_CATALOG) { + if (info.Catalog() != TEMP_CATALOG) { throw ParserException("TEMPORARY table names can *only* use the \"%s\" catalog", TEMP_CATALOG); } } @@ -157,9 +171,9 @@ void Binder::SearchSchema(CreateInfo &info) { SchemaCatalogEntry &Binder::BindSchema(CreateInfo &info) { SearchSchema(info); // fetch the schema in which we want to create the object - auto &schema_obj = Catalog::GetSchema(context, info.catalog, info.schema); + auto &schema_obj = Catalog::GetSchema(context, info.Catalog(), info.Schema()); D_ASSERT(schema_obj.type == CatalogType::SCHEMA_ENTRY); - info.schema = schema_obj.name; + info.SchemaMutable() = schema_obj.name; if (!info.temporary) { auto &properties = GetStatementProperties(); properties.RegisterDBModify(schema_obj.catalog, context, DatabaseModificationType::CREATE_CATALOG_ENTRY); @@ -230,7 +244,7 @@ void Binder::BindCreateViewInfo(CreateViewInfo &base) { if (Settings::Get(context)) { dependencies = base.dependencies; } - BindView(context, *base.query, base.catalog, base.schema, dependencies, base.aliases, base.types, base.names); + BindView(context, *base.query, base.Catalog(), base.Schema(), dependencies, base.aliases, base.types, base.names); } SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) { @@ -264,7 +278,7 @@ SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) { // Bind the catalog/schema SearchSchema(info); - auto &catalog = Catalog::GetCatalog(context, info.catalog); + auto &catalog = Catalog::GetCatalog(context, info.Catalog()); // Figure out if we can store typed macro parameters auto &attached = catalog.GetAttached(); @@ -365,10 +379,11 @@ SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) { if (!type_overloads.insert(dummy_types).second) { throw BinderException( "Ambiguity in macro overloads - macro %s() has multiple definitions with the same parameters", - base.name.GetIdentifierName()); + base.GetFunctionName().GetIdentifierName()); } - auto this_macro_binding = make_uniq(dummy_types, dummy_names, base.name.GetIdentifierName()); + auto this_macro_binding = + make_uniq(dummy_types, dummy_names, base.GetFunctionName().GetIdentifierName()); macro_binding = this_macro_binding.get(); auto &dependencies = base.dependencies; @@ -480,9 +495,9 @@ bool BoundBodyContainsTrigger(const LogicalOperator &op); SchemaCatalogEntry &Binder::BindCreateTriggerInfo(CreateTriggerInfo &create_trigger_info) { // Resolve the base table first — triggers inherit catalog/schema from their table (like Postgres) - TableDescription table_description(create_trigger_info.base_table->catalog_name, - create_trigger_info.base_table->schema_name, - create_trigger_info.base_table->table_name); + TableDescription table_description(create_trigger_info.base_table->Catalog(), + create_trigger_info.base_table->Schema(), + create_trigger_info.base_table->Table()); auto table_ref = make_uniq(table_description); auto bound_table = Bind(*table_ref); if (bound_table.plan->type != LogicalOperatorType::LOGICAL_GET) { @@ -496,13 +511,13 @@ SchemaCatalogEntry &Binder::BindCreateTriggerInfo(CreateTriggerInfo &create_trig auto &table = *table_ptr; // Trigger inherits catalog/schema from the base table - create_trigger_info.catalog = table.catalog.GetName(); - create_trigger_info.schema = table.schema.name; + create_trigger_info.CatalogMutable() = table.catalog.GetName(); + create_trigger_info.SchemaMutable() = table.schema.name; auto &schema = BindCreateSchema(create_trigger_info); // Block trigger creation on databases with an older storage version - auto &catalog = Catalog::GetCatalog(context, create_trigger_info.catalog); + auto &catalog = Catalog::GetCatalog(context, create_trigger_info.Catalog()); auto &attached = catalog.GetAttached(); if (attached.HasStorageManager()) { auto &storage_manager = attached.GetStorageManager(); @@ -531,8 +546,8 @@ SchemaCatalogEntry &Binder::BindCreateTriggerInfo(CreateTriggerInfo &create_trig throw NotImplementedException("BEFORE FOR EACH ROW triggers are not yet supported"); } if (create_trigger_info.for_each == TriggerForEach::ROW && - create_trigger_info.event_type != TriggerEventType::INSERT_EVENT) { - throw NotImplementedException("UPDATE and DELETE FOR EACH ROW triggers are not yet supported"); + create_trigger_info.event_type == TriggerEventType::UPDATE_EVENT) { + throw NotImplementedException("UPDATE FOR EACH ROW triggers are not yet supported"); } if ((!create_trigger_info.referencing_new_table.empty() || !create_trigger_info.referencing_old_table.empty()) && create_trigger_info.timing != TriggerTiming::AFTER) { @@ -565,7 +580,7 @@ SchemaCatalogEntry &Binder::BindCreateTriggerInfo(CreateTriggerInfo &create_trig bool is_replace = create_trigger_info.on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT; auto has_real_conflict = std::any_of(conflicting.begin(), conflicting.end(), [&](const_reference t) { - return !(is_replace && t.get().name == create_trigger_info.trigger_name); + return !(is_replace && t.get().name == create_trigger_info.GetTriggerName()); }); if (has_real_conflict) { throw NotImplementedException( @@ -578,7 +593,7 @@ SchemaCatalogEntry &Binder::BindCreateTriggerInfo(CreateTriggerInfo &create_trig auto validation_binder = Binder::CreateBinder(context); validation_binder->global_binder_state->trigger_expanded_tables.insert(table); validation_binder->global_binder_state->trigger_creation_table = &table; - validation_binder->global_binder_state->trigger_creation_name = create_trigger_info.trigger_name; + validation_binder->global_binder_state->trigger_creation_name = create_trigger_info.GetTriggerName(); auto body_copy = create_trigger_info.trigger_action->Copy(); for (const auto &alias : {create_trigger_info.referencing_new_table, create_trigger_info.referencing_old_table}) { @@ -586,8 +601,8 @@ SchemaCatalogEntry &Binder::BindCreateTriggerInfo(CreateTriggerInfo &create_trig body_copy->cte_map.map[alias] = MakeTriggerValidationCTE(table); } } - // For FOR EACH ROW: register NEW as a generic binding so BindCorrelatedColumns can resolve NEW.col column - // references. + // For FOR EACH ROW: register NEW (INSERT) or OLD (DELETE) as a generic binding so BindCorrelatedColumns can + // resolve NEW.col / OLD.col references. unique_ptr row_scope_binder; if (create_trigger_info.for_each == TriggerForEach::ROW) { if (table.HasGeneratedColumns()) { @@ -604,22 +619,23 @@ SchemaCatalogEntry &Binder::BindCreateTriggerInfo(CreateTriggerInfo &create_trig col_types.push_back(col.GetType()); } auto new_idx = validation_binder->GenerateTableIndex(); - row_scope_binder = validation_binder->SetupNewRowScope(new_idx, col_names, col_types); + auto scope_name = Binder::RowScopeName(create_trigger_info.event_type); + row_scope_binder = validation_binder->SetupRowScope(new_idx, col_names, col_types, scope_name); } if (row_scope_binder) { auto body_binder = Binder::CreateBinder(context, validation_binder.get()); auto bound_body = body_binder->Bind(*body_copy); validation_binder->GetActiveBinders().pop_back(); if (body_binder->correlated_columns.empty()) { - throw BinderException("FOR EACH ROW trigger \"%s\" on table \"%s\" must reference at least one NEW " + throw BinderException("FOR EACH ROW trigger \"%s\" on table \"%s\" must reference at least one NEW or OLD " "column in the trigger body (use FOR EACH STATEMENT if row data is not needed)", - create_trigger_info.trigger_name, table.name); + create_trigger_info.GetTriggerName(), table.name); } if (BoundBodyContainsTrigger(*bound_body.plan)) { throw NotImplementedException( "FOR EACH ROW trigger \"%s\" on table \"%s\" writes to a table that has its own FOR EACH ROW " "trigger (cascading row triggers are not yet supported)", - create_trigger_info.trigger_name, table.name); + create_trigger_info.GetTriggerName(), table.name); } } else { validation_binder->Bind(*body_copy); @@ -651,7 +667,7 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { switch (catalog_type) { case CatalogType::SCHEMA_ENTRY: { auto &base = stmt.info->Cast(); - auto catalog = BindCatalog(base.catalog); + auto catalog = BindCatalog(base.Catalog()); properties.RegisterDBModify(Catalog::GetCatalog(context, catalog), context, DatabaseModificationType::CREATE_CATALOG_ENTRY); result.plan = make_uniq(LogicalOperatorType::LOGICAL_CREATE_SCHEMA, std::move(stmt.info)); @@ -663,7 +679,7 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { auto &schema = BindCreateSchema(*stmt.info); if (stmt.info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT) { CatalogTransaction transaction(schema.ParentCatalog(), context); - auto existing_entry = schema.GetEntry(transaction, CatalogType::VIEW_ENTRY, base.view_name); + auto existing_entry = schema.GetEntry(transaction, CatalogType::VIEW_ENTRY, base.GetViewName()); if (existing_entry && existing_entry->type == CatalogType::VIEW_ENTRY) { // IF EXISTS and the view already exists - avoid binding base.binding_mode = CreateViewBindingMode::SKIP_BINDING; @@ -696,7 +712,7 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { auto &create_index_info = stmt.info->Cast(); // Plan the table scan. - TableDescription table_description(create_index_info.catalog, create_index_info.schema, + TableDescription table_description(create_index_info.Catalog(), create_index_info.Schema(), create_index_info.table); auto table_ref = make_uniq(table_description); auto bound_table = Bind(*table_ref); @@ -738,7 +754,7 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { auto &create_type_info = stmt.info->Cast(); result.plan = make_uniq(LogicalOperatorType::LOGICAL_CREATE_TYPE, std::move(stmt.info), &schema); - auto &catalog = Catalog::GetCatalog(context, create_type_info.catalog); + auto &catalog = Catalog::GetCatalog(context, create_type_info.Catalog()); auto &dependencies = create_type_info.dependencies; auto dependency_callback = [&dependencies, &catalog](CatalogEntry &entry) { if (&catalog != &entry.ParentCatalog()) { @@ -846,7 +862,7 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { CreateSecretInput create_secret_input {Identifier(type_string), Identifier(provider_string), Identifier(info.storage_type), - info.name, + info.GetSecretName(), scope_strings, bound_options, info.on_conflict, diff --git a/src/duckdb/src/planner/binder/statement/bind_create_table.cpp b/src/duckdb/src/planner/binder/statement/bind_create_table.cpp index 33888ff47..53770b029 100644 --- a/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_create_table.cpp @@ -265,10 +265,10 @@ void Binder::BindGeneratedColumns(BoundCreateTableInfo &info) { // Create a new binder because we dont need (or want) these bindings in this scope auto binder = Binder::CreateBinder(context); binder->SetCatalogLookupCallback(entry_retriever.GetCallback()); - binder->bind_context.AddGenericBinding(table_index, base.table, names, types); + binder->bind_context.AddGenericBinding(table_index, base.GetTableName(), names, types); auto expr_binder = ExpressionBinder(*binder, context); ErrorData ignore; - auto table_binding = binder->bind_context.GetBinding(base.table, ignore); + auto table_binding = binder->bind_context.GetBinding(base.GetTableName(), ignore); D_ASSERT(table_binding && !ignore.HasError()); auto bind_order = info.column_dependency_manager.GetBindOrder(base.columns); @@ -556,7 +556,7 @@ static void BindCreateTableConstraints(CreateTableInfo &create_info, CatalogEntr FindForeignKeyIndexes(create_info.columns, fk.fk_columns, fk.info.fk_keys); // Resolve the self-reference. - if (create_info.table == fk.info.table) { + if (create_info.GetTableName() == fk.info.table) { fk.info.type = ForeignKeyType::FK_TYPE_SELF_REFERENCE_TABLE; FindMatchingPrimaryKeyColumns(create_info.columns, create_info.constraints, fk); FindForeignKeyIndexes(create_info.columns, fk.pk_columns, fk.info.pk_keys); @@ -692,7 +692,7 @@ unique_ptr Binder::BindCreateTableInfo(unique_ptr Binder::BindCreateTableInfo(unique_ptrdependencies.VerifyDependencies(schema.catalog, result->Base().table); + result->dependencies.VerifyDependencies(schema.catalog, result->Base().GetTableName()); #ifdef DEBUG // Ensure all types are bound diff --git a/src/duckdb/src/planner/binder/statement/bind_delete.cpp b/src/duckdb/src/planner/binder/statement/bind_delete.cpp index 3f2a30b20..00f56699b 100644 --- a/src/duckdb/src/planner/binder/statement/bind_delete.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_delete.cpp @@ -33,6 +33,9 @@ BoundStatement Binder::BindNode(DeleteQueryNode &node) { if (auto expanded = TryExpandTriggers(node, table, TriggerEventType::DELETE_EVENT)) { return std::move(*expanded); } + if (auto expanded = TryExpandRowTriggers(node, node.returning_list, table, TriggerEventType::DELETE_EVENT)) { + return std::move(*expanded); + } if (!table.temporary) { // delete from persistent table: not read only! diff --git a/src/duckdb/src/planner/binder/statement/bind_drop.cpp b/src/duckdb/src/planner/binder/statement/bind_drop.cpp index bf53a49ba..efca03304 100644 --- a/src/duckdb/src/planner/binder/statement/bind_drop.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_drop.cpp @@ -22,14 +22,14 @@ void Binder::BindDropTrigger(DropStatement &stmt, StatementProperties &propertie throw BinderException("DROP TRIGGER requires an ON clause specifying the table"); } auto &base_table_ref = trigger_extra.base_table->Cast(); - Identifier catalog_name = base_table_ref.catalog_name; - Identifier schema_name = base_table_ref.schema_name; + Identifier catalog_name = base_table_ref.Catalog(); + Identifier schema_name = base_table_ref.Schema(); BindSchemaOrCatalog(catalog_name, schema_name); // IF EXISTS only guards the trigger, not the table (PostgreSQL-compatible behavior). auto &table_entry = - Catalog::GetEntry(context, catalog_name, schema_name, base_table_ref.table_name); - stmt.info->catalog = table_entry.ParentCatalog().GetName(); - stmt.info->schema = table_entry.ParentSchema().name; + Catalog::GetEntry(context, catalog_name, schema_name, base_table_ref.Table()); + stmt.info->CatalogMutable() = table_entry.ParentCatalog().GetName(); + stmt.info->SchemaMutable() = table_entry.ParentSchema().name; properties.RegisterDBModify(table_entry.ParentCatalog(), context, DatabaseModificationType::DROP_CATALOG_ENTRY); } @@ -45,7 +45,7 @@ BoundStatement Binder::Bind(DropStatement &stmt) { break; case CatalogType::SCHEMA_ENTRY: { // dropping a schema is never read-only because there are no temporary schemas - auto &catalog = Catalog::GetCatalog(context, stmt.info->catalog); + auto &catalog = Catalog::GetCatalog(context, stmt.info->Catalog()); properties.RegisterDBModify(catalog, context, DatabaseModificationType::DROP_CATALOG_ENTRY); break; } @@ -56,8 +56,8 @@ BoundStatement Binder::Bind(DropStatement &stmt) { case CatalogType::INDEX_ENTRY: case CatalogType::TABLE_ENTRY: case CatalogType::TYPE_ENTRY: { - BindSchemaOrCatalog(stmt.info->catalog, stmt.info->schema); - auto catalog = Catalog::GetCatalogEntry(context, stmt.info->catalog); + BindSchemaOrCatalog(stmt.info->GetQualifiedNameMutable()); + auto catalog = Catalog::GetCatalogEntry(context, stmt.info->Catalog()); if (catalog) { // mark catalog as accessed properties.RegisterDBRead(*catalog, context); @@ -66,13 +66,13 @@ BoundStatement Binder::Bind(DropStatement &stmt) { if (stmt.info->type == CatalogType::MACRO_ENTRY) { // We also support "DROP MACRO" (instead of "DROP MACRO TABLE") for table macros // First try to drop a scalar macro - EntryLookupInfo macro_entry_lookup(stmt.info->type, stmt.info->name); - entry = Catalog::GetEntry(context, stmt.info->catalog, stmt.info->schema, macro_entry_lookup, + EntryLookupInfo macro_entry_lookup(stmt.info->type, stmt.info->Name()); + entry = Catalog::GetEntry(context, stmt.info->Catalog(), stmt.info->Schema(), macro_entry_lookup, OnEntryNotFound::RETURN_NULL); if (!entry) { // Unable to find a scalar macro, try to drop a table macro - EntryLookupInfo table_macro_entry_lookup(CatalogType::TABLE_MACRO_ENTRY, stmt.info->name); - entry = Catalog::GetEntry(context, stmt.info->catalog, stmt.info->schema, table_macro_entry_lookup, + EntryLookupInfo table_macro_entry_lookup(CatalogType::TABLE_MACRO_ENTRY, stmt.info->Name()); + entry = Catalog::GetEntry(context, stmt.info->Catalog(), stmt.info->Schema(), table_macro_entry_lookup, OnEntryNotFound::RETURN_NULL); if (entry) { // Change type to table macro so future lookups get the correct one @@ -82,12 +82,12 @@ BoundStatement Binder::Bind(DropStatement &stmt) { if (!entry) { // Unable to find table macro, try again with original OnEntryNotFound to ensure we throw if necessary - entry = Catalog::GetEntry(context, stmt.info->catalog, stmt.info->schema, macro_entry_lookup, + entry = Catalog::GetEntry(context, stmt.info->Catalog(), stmt.info->Schema(), macro_entry_lookup, stmt.info->if_not_found); } } else { - EntryLookupInfo entry_lookup(stmt.info->type, stmt.info->name); - entry = Catalog::GetEntry(context, stmt.info->catalog, stmt.info->schema, entry_lookup, + EntryLookupInfo entry_lookup(stmt.info->type, stmt.info->Name()); + entry = Catalog::GetEntry(context, stmt.info->Catalog(), stmt.info->Schema(), entry_lookup, stmt.info->if_not_found); } if (!entry) { @@ -96,12 +96,12 @@ BoundStatement Binder::Bind(DropStatement &stmt) { if (entry->internal) { throw CatalogException("Cannot drop internal catalog entry \"%s\"!", entry->name.GetIdentifierName()); } - stmt.info->catalog = entry->ParentCatalog().GetName(); + stmt.info->CatalogMutable() = entry->ParentCatalog().GetName(); if (!entry->temporary) { // we can only drop temporary schema entries in read-only mode properties.RegisterDBModify(entry->ParentCatalog(), context, DatabaseModificationType::DROP_CATALOG_ENTRY); } - stmt.info->schema = entry->ParentSchema().name; + stmt.info->SchemaMutable() = entry->ParentSchema().name; break; } case CatalogType::SECRET_ENTRY: { diff --git a/src/duckdb/src/planner/binder/statement/bind_explain.cpp b/src/duckdb/src/planner/binder/statement/bind_explain.cpp index 1141d30ba..7121970c5 100644 --- a/src/duckdb/src/planner/binder/statement/bind_explain.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_explain.cpp @@ -10,7 +10,7 @@ BoundStatement Binder::Bind(ExplainStatement &stmt) { // bind the underlying statement auto plan = Bind(*stmt.stmt); // get the unoptimized logical plan, and create the explain statement - auto logical_plan_unopt = plan.plan->ToString(stmt.format); + auto logical_plan_unopt = plan.plan->ToString(context, stmt.format); auto explain = make_uniq(std::move(plan.plan), stmt.explain_type, stmt.format); explain->logical_plan_unopt = logical_plan_unopt; diff --git a/src/duckdb/src/planner/binder/statement/bind_export.cpp b/src/duckdb/src/planner/binder/statement/bind_export.cpp index 3446be63f..db1fe21f2 100644 --- a/src/duckdb/src/planner/binder/statement/bind_export.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_export.cpp @@ -119,9 +119,7 @@ string CreateFileName(const string &id_suffix, TableCatalogEntry &table, const s static unique_ptr CreateSelectStatement(CopyStatement &stmt, child_list_t &select_list) { auto ref = make_uniq(); - ref->catalog_name = stmt.info->catalog; - ref->schema_name = stmt.info->schema; - ref->table_name = stmt.info->table; + ref->GetQualifiedNameMutable() = stmt.info->GetQualifiedName(); auto statement = make_uniq(); statement->from_table = std::move(ref); @@ -225,9 +223,7 @@ BoundStatement Binder::Bind(ExportStatement &stmt) { id++; } info->is_from = false; - info->catalog = Identifier(catalog); - info->schema = table.schema.name; - info->table = table.name; + info->GetQualifiedNameMutable() = QualifiedName(Identifier(catalog), table.schema.name, table.name); // We can not export generated columns child_list_t select_list; @@ -245,8 +241,8 @@ BoundStatement Binder::Bind(ExportStatement &stmt) { ExportedTableData exported_data; exported_data.database_name = Identifier(catalog); - exported_data.table_name = info->table; - exported_data.schema_name = info->schema; + exported_data.table_name = info->Table(); + exported_data.schema_name = info->Schema(); exported_data.file_path = info->file_path; @@ -274,7 +270,7 @@ BoundStatement Binder::Bind(ExportStatement &stmt) { fs.CreateDirectory(stmt.info->file_path); } - stmt.info->catalog = Identifier(catalog); + stmt.info->CatalogMutable() = Identifier(catalog); // prepare the options for export auto &format = stmt.info->format; auto &options = stmt.info->options; diff --git a/src/duckdb/src/planner/binder/statement/bind_simple.cpp b/src/duckdb/src/planner/binder/statement/bind_simple.cpp index 620c2db7e..b45ad0005 100644 --- a/src/duckdb/src/planner/binder/statement/bind_simple.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_simple.cpp @@ -39,29 +39,29 @@ BoundStatement Binder::BindAlterAddIndex(BoundStatement &result, CatalogEntry &e auto &table = entry.Cast(); auto &column_list = table.GetColumns(); - auto bound_constraint = BindUniqueConstraint(*constraint_info.constraint, table_info.name, column_list); + auto bound_constraint = BindUniqueConstraint(*constraint_info.constraint, table_info.Name(), column_list); auto &bound_unique = bound_constraint->Cast(); // Create the CreateIndexInfo. auto create_index_info = make_uniq(); - create_index_info->table = table_info.name; + create_index_info->table = table_info.Name(); create_index_info->index_type = ART::TYPE_NAME; create_index_info->constraint_type = IndexConstraintType::PRIMARY; for (const auto &physical_index : bound_unique.keys) { auto &col = column_list.GetColumn(physical_index); - unique_ptr parsed = make_uniq(col.GetName(), table_info.name); + unique_ptr parsed = make_uniq(col.GetName(), table_info.Name()); create_index_info->expressions.push_back(parsed->Copy()); create_index_info->parsed_expressions.push_back(parsed->Copy()); } auto unique_constraint = constraint_info.constraint->Cast(); - auto index_name = unique_constraint.GetName(table_info.name); - create_index_info->index_name = index_name; - D_ASSERT(!create_index_info->index_name.empty()); + auto index_name = unique_constraint.GetName(table_info.Name()); + create_index_info->SetIndexName(index_name); + D_ASSERT(!create_index_info->GetIndexName().empty()); // Plan the table scan. - TableDescription table_description(table_info.catalog, table_info.schema, table_info.name); + TableDescription table_description(table_info.Catalog(), table_info.Schema(), table_info.Name()); auto table_ref = make_uniq(table_description); auto bound_table = Bind(*table_ref); if (bound_table.plan->type != LogicalOperatorType::LOGICAL_GET) { @@ -112,7 +112,7 @@ BoundStatement Binder::Bind(AlterStatement &stmt) { return result; } - BindSchemaOrCatalog(stmt.info->catalog, stmt.info->schema); + BindSchemaOrCatalog(stmt.info->GetQualifiedNameMutable()); optional_ptr entry; if (stmt.info->type == AlterType::SET_COLUMN_COMMENT) { @@ -126,8 +126,9 @@ BoundStatement Binder::Bind(AlterStatement &stmt) { } } else { // For any other ALTER, we retrieve the catalog entry directly. - EntryLookupInfo lookup_info(stmt.info->GetCatalogType(), stmt.info->name); - entry = entry_retriever.GetEntry(stmt.info->catalog, stmt.info->schema, lookup_info, stmt.info->if_not_found); + EntryLookupInfo lookup_info(stmt.info->GetCatalogType(), stmt.info->Name()); + entry = + entry_retriever.GetEntry(stmt.info->Catalog(), stmt.info->Schema(), lookup_info, stmt.info->if_not_found); } auto &properties = GetStatementProperties(); @@ -145,7 +146,7 @@ BoundStatement Binder::Bind(AlterStatement &stmt) { // Bind types in the same catalog as the entry auto type_binder = Binder::CreateBinder(context, *this); - type_binder->SetSearchPath(catalog, stmt.info->schema); + type_binder->SetSearchPath(catalog, stmt.info->Schema()); BindAlterTypes(*type_binder, stmt); @@ -156,8 +157,8 @@ BoundStatement Binder::Bind(AlterStatement &stmt) { // We can only alter temporary tables and views in read-only mode. properties.RegisterDBModify(catalog, context, DatabaseModificationType::ALTER_TABLE); } - stmt.info->catalog = catalog.GetName(); - stmt.info->schema = entry->ParentSchema().name; + stmt.info->CatalogMutable() = catalog.GetName(); + stmt.info->SchemaMutable() = entry->ParentSchema().name; if (!stmt.info->IsAddPrimaryKey()) { result.plan = make_uniq(LogicalOperatorType::LOGICAL_ALTER, std::move(stmt.info)); diff --git a/src/duckdb/src/planner/binder/statement/bind_summarize.cpp b/src/duckdb/src/planner/binder/statement/bind_summarize.cpp index e81457d96..d01c9fef2 100644 --- a/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_summarize.cpp @@ -86,9 +86,7 @@ BoundStatement Binder::BindSummarize(ShowRef &ref) { auto node = make_uniq(); node->select_list.push_back(make_uniq()); auto basetableref = make_uniq(); - basetableref->catalog_name = table_name.catalog; - basetableref->schema_name = table_name.schema; - basetableref->table_name = table_name.name; + basetableref->GetQualifiedNameMutable() = table_name; node->from_table = std::move(basetableref); query = std::move(node); } diff --git a/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp b/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp index 2a6bb2f0b..b623fcd0f 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp @@ -50,8 +50,8 @@ BoundStatement Binder::BindWithReplacementScan(ClientContext &context, BaseTable return BoundStatement(); } for (auto &scan : config.replacement_scans) { - ReplacementScanInput input(ref.catalog_name.GetIdentifierName(), ref.schema_name.GetIdentifierName(), - ref.table_name.GetIdentifierName()); + ReplacementScanInput input(ref.Catalog().GetIdentifierName(), ref.Schema().GetIdentifierName(), + ref.Table().GetIdentifierName()); auto replacement_function = scan.function(context, input, scan.data.get()); if (!replacement_function) { continue; @@ -61,7 +61,7 @@ BoundStatement Binder::BindWithReplacementScan(ClientContext &context, BaseTable replacement_function->alias = ref.alias; } else if (replacement_function->alias.empty()) { // if the replacement scan itself did not provide an alias we use the table name - replacement_function->alias = ref.table_name; + replacement_function->alias = ref.Table(); } if (replacement_function->type == TableReferenceType::TABLE_FUNCTION) { auto &table_function = replacement_function->Cast(); @@ -84,7 +84,7 @@ BoundStatement Binder::BindWithReplacementScan(ClientContext &context, BaseTable replacement_function = std::move(subquery); } if (GetBindingMode() == BindingMode::EXTRACT_REPLACEMENT_SCANS) { - AddReplacementScan(ref.table_name, replacement_function->Copy()); + AddReplacementScan(ref.Table(), replacement_function->Copy()); } return Bind(*replacement_function); } @@ -128,7 +128,7 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { // CTE name should never be qualified (i.e. schema_name should be empty) // unless we want to refer to the recurring table of "using key". - BindingAlias binding_alias(ref.schema_name, ref.table_name); + BindingAlias binding_alias(ref.Schema(), ref.Table()); auto ctebinding = GetCTEBinding(binding_alias); if (ctebinding && ctebinding->CanBeReferenced()) { ctebinding->Reference(); @@ -138,12 +138,12 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { // or a materialized CTE present. auto index = GenerateTableIndex(); - auto alias = ref.alias.empty() ? ref.table_name : ref.alias; + auto alias = ref.alias.empty() ? ref.Table() : ref.alias; auto names = BindContext::AliasColumnNames(alias, ctebinding->GetColumnNames(), ref.column_name_alias); bind_context.AddGenericBinding(index, alias, names, ctebinding->GetColumnTypes()); - bool is_recurring = ref.schema_name == "recurring"; + bool is_recurring = ref.Schema() == "recurring"; BoundStatement result; result.types = ctebinding->GetColumnTypes(); @@ -156,10 +156,10 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { // extract a table or view from the catalog auto at_clause = BindAtClause(ref.at_clause); auto entry_at_clause = at_clause ? at_clause.get() : entry_retriever.GetAtClause(); - EntryLookupInfo table_lookup(CatalogType::TABLE_ENTRY, ref.table_name, entry_at_clause, error_context); - BindSchemaOrCatalog(entry_retriever, ref.catalog_name, ref.schema_name); + EntryLookupInfo table_lookup(CatalogType::TABLE_ENTRY, ref.Table(), entry_at_clause, error_context); + BindSchemaOrCatalog(entry_retriever, ref.GetQualifiedNameMutable()); auto table_or_view = - entry_retriever.GetEntry(ref.catalog_name, ref.schema_name, table_lookup, OnEntryNotFound::RETURN_NULL); + entry_retriever.GetEntry(ref.Catalog(), ref.Schema(), table_lookup, OnEntryNotFound::RETURN_NULL); // we still didn't find the table if (GetBindingMode() == BindingMode::EXTRACT_NAMES || GetBindingMode() == BindingMode::EXTRACT_QUALIFIED_NAMES) { if (!table_or_view || table_or_view->type == CatalogType::TABLE_ENTRY) { @@ -167,12 +167,12 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { if (GetBindingMode() == BindingMode::EXTRACT_QUALIFIED_NAMES) { AddTableName(ref.ToString()); } else { - AddTableName(ref.table_name.GetIdentifierName()); + AddTableName(ref.Table().GetIdentifierName()); } // add a bind context entry auto table_index = GenerateTableIndex(); - auto ref_alias = ref.alias.empty() ? ref.table_name : ref.alias; + auto ref_alias = ref.alias.empty() ? ref.Table() : ref.alias; vector types {LogicalType::INTEGER}; vector names {Identifier("__dummy_col" + to_string(table_index.index))}; bind_context.AddGenericBinding(table_index, ref_alias, names, types); @@ -193,9 +193,8 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { } // Try autoloading an extension, then retry the replacement scan bind - auto full_path = - ReplacementScan::GetFullPath(ref.catalog_name.GetIdentifierName(), ref.schema_name.GetIdentifierName(), - ref.table_name.GetIdentifierName()); + auto full_path = ReplacementScan::GetFullPath( + ref.Catalog().GetIdentifierName(), ref.Schema().GetIdentifierName(), ref.Table().GetIdentifierName()); auto extension_loaded = TryLoadExtensionForReplacementScan(context, full_path); if (extension_loaded) { replacement_scan_bind_result = BindWithReplacementScan(context, ref); @@ -221,13 +220,13 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { throw BinderException(error_context, "Circular reference to CTE \"%s\", use WITH RECURSIVE to " "use recursive CTEs.", - ref.table_name.GetIdentifierName()); + ref.Table().GetIdentifierName()); } // could not find an alternative: bind again to get the error // note: this will always throw when using DuckDB as a catalog, but a second look-up might succeed // in catalogs that do not have transactional DDL table_or_view = - entry_retriever.GetEntry(ref.catalog_name, ref.schema_name, table_lookup, OnEntryNotFound::THROW_EXCEPTION); + entry_retriever.GetEntry(ref.Catalog(), ref.Schema(), table_lookup, OnEntryNotFound::THROW_EXCEPTION); } switch (table_or_view->type) { case CatalogType::TABLE_ENTRY: { @@ -256,7 +255,7 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { return_types.push_back(col.Type()); return_names.emplace_back(col.Name()); } - table_names = BindContext::AliasColumnNames(ref.table_name, table_names, ref.column_name_alias); + table_names = BindContext::AliasColumnNames(ref.Table(), table_names, ref.column_name_alias); virtual_column_map_t virtual_columns; if (scan_function.get_virtual_columns) { @@ -270,7 +269,7 @@ BoundStatement Binder::Bind(BaseTableRef &ref) { auto table_entry = logical_get->GetTable(); auto &col_ids = logical_get->GetMutableColumnIds(); if (!table_entry) { - bind_context.AddBaseTable(table_index, ref.alias, table_names, table_types, col_ids, ref.table_name); + bind_context.AddBaseTable(table_index, ref.alias, table_names, table_types, col_ids, ref.Table()); } else { bind_context.AddBaseTable(table_index, ref.alias, table_names, table_types, col_ids, *table_entry); } diff --git a/src/duckdb/src/planner/column_qualifier.cpp b/src/duckdb/src/planner/column_qualifier.cpp index 46046007a..21575c7a7 100644 --- a/src/duckdb/src/planner/column_qualifier.cpp +++ b/src/duckdb/src/planner/column_qualifier.cpp @@ -261,7 +261,7 @@ optional_ptr ColumnQualifier::QualifyFunction(FunctionExpression & D_ASSERT(!ExpressionBinder::IsUnnestFunction(function.FunctionName())); // lookup the function in the catalog QueryErrorContext error_context(function.GetQueryLocation()); - binder.BindSchemaOrCatalog(function.CatalogMutable(), function.SchemaMutable()); + binder.BindSchemaOrCatalog(function.GetQualifiedNameMutable()); EntryLookupInfo function_lookup(CatalogType::SCALAR_FUNCTION_ENTRY, function.FunctionName(), error_context); auto func = diff --git a/src/duckdb/src/planner/expression_binder/index_binder.cpp b/src/duckdb/src/planner/expression_binder/index_binder.cpp index 6441e9392..93e88b8f3 100644 --- a/src/duckdb/src/planner/expression_binder/index_binder.cpp +++ b/src/duckdb/src/planner/expression_binder/index_binder.cpp @@ -42,7 +42,7 @@ unique_ptr IndexBinder::BindIndex(const UnboundIndex &unbound_index) } CreateIndexInput input(context, unbound_index.table_io_manager, unbound_index.db, create_info.constraint_type, - create_info.index_name, create_info.column_ids, unbound_expressions, storage_info, + create_info.GetIndexName(), create_info.column_ids, unbound_expressions, storage_info, create_info.options); return index_type->create_instance(input); @@ -61,8 +61,8 @@ void IndexBinder::InitCreateIndexInfo(LogicalGet &get, CreateIndexInfo &info, co info.scan_types.emplace_back(LogicalType::ROW_TYPE); info.names = get.names; - info.schema = Identifier(schema); - info.catalog = get.GetTable()->catalog.GetName(); + info.SchemaMutable() = Identifier(schema); + info.CatalogMutable() = get.GetTable()->catalog.GetName(); get.AddColumnId(COLUMN_IDENTIFIER_ROW_ID); } @@ -73,7 +73,7 @@ unique_ptr IndexBinder::BindCreateIndex(ClientContext &context, unique_ptr alter_table_info) { // Add the dependencies. auto &dependencies = create_index_info->dependencies; - auto &catalog = Catalog::GetCatalog(context, create_index_info->catalog); + auto &catalog = Catalog::GetCatalog(context, create_index_info->Catalog()); SetCatalogLookupCallback([&dependencies, &catalog](CatalogEntry &entry) { if (&catalog != &entry.ParentCatalog()) { return; diff --git a/src/duckdb/src/planner/logical_operator.cpp b/src/duckdb/src/planner/logical_operator.cpp index 3bb934da0..c239b546b 100644 --- a/src/duckdb/src/planner/logical_operator.cpp +++ b/src/duckdb/src/planner/logical_operator.cpp @@ -7,6 +7,7 @@ #include "duckdb/common/serializer/binary_serializer.hpp" #include "duckdb/common/serializer/memory_stream.hpp" #include "duckdb/common/string_util.hpp" +#include "duckdb/common/box_renderer.hpp" #include "duckdb/common/tree_renderer.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/planner/operator/list.hpp" @@ -156,13 +157,13 @@ vector LogicalOperator::MapBindings(const vector & } } -string LogicalOperator::ToString(const ProfilerPrintFormat &format) const { - auto renderer = TreeRenderer::CreateRenderer(format); +string LogicalOperator::ToString(optional_ptr context, const ProfilerPrintFormat &format) const { + auto renderer = context ? TreeRenderer::CreateRenderer(*context, format) : TreeRenderer::CreateRenderer(format); if (!renderer) { // formats without output (e.g. "no_output") render nothing return string(); } - duckdb::stringstream ss; + StringResultRenderer ss; auto tree = RenderTree::CreateRenderTree(*this); renderer->ToStream(*tree, ss); return ss.str(); diff --git a/src/duckdb/src/planner/operator/logical_create.cpp b/src/duckdb/src/planner/operator/logical_create.cpp index ff4aeb981..cb6a9baf7 100644 --- a/src/duckdb/src/planner/operator/logical_create.cpp +++ b/src/duckdb/src/planner/operator/logical_create.cpp @@ -9,7 +9,7 @@ LogicalCreate::LogicalCreate(LogicalOperatorType type, unique_ptr in LogicalCreate::LogicalCreate(LogicalOperatorType type, ClientContext &context, unique_ptr info_p) : LogicalOperator(type), info(std::move(info_p)) { - this->schema = Catalog::GetSchema(context, info->catalog, info->schema, OnEntryNotFound::RETURN_NULL); + this->schema = Catalog::GetSchema(context, info->Catalog(), info->Schema(), OnEntryNotFound::RETURN_NULL); } idx_t LogicalCreate::EstimateCardinality(ClientContext &context) { diff --git a/src/duckdb/src/planner/operator/logical_create_index.cpp b/src/duckdb/src/planner/operator/logical_create_index.cpp index 44dcab583..01239ce4e 100644 --- a/src/duckdb/src/planner/operator/logical_create_index.cpp +++ b/src/duckdb/src/planner/operator/logical_create_index.cpp @@ -37,8 +37,8 @@ void LogicalCreateIndex::ResolveTypes() { } TableCatalogEntry &LogicalCreateIndex::BindTable(ClientContext &context, CreateIndexInfo &info_p) { - auto &catalog = info_p.catalog; - auto &schema = info_p.schema; + auto &catalog = info_p.Catalog(); + auto &schema = info_p.Schema(); auto &table_name = info_p.table; return Catalog::GetEntry(context, catalog, schema, table_name); } diff --git a/src/duckdb/src/planner/operator/logical_create_table.cpp b/src/duckdb/src/planner/operator/logical_create_table.cpp index 76b2cc91f..acb0fdbac 100644 --- a/src/duckdb/src/planner/operator/logical_create_table.cpp +++ b/src/duckdb/src/planner/operator/logical_create_table.cpp @@ -10,7 +10,7 @@ LogicalCreateTable::LogicalCreateTable(SchemaCatalogEntry &schema, unique_ptr unbound_info) : LogicalOperator(LogicalOperatorType::LOGICAL_CREATE_TABLE), - schema(Catalog::GetSchema(context, unbound_info->catalog, unbound_info->schema)) { + schema(Catalog::GetSchema(context, unbound_info->Catalog(), unbound_info->Schema())) { D_ASSERT(unbound_info->type == CatalogType::TABLE_ENTRY); auto binder = Binder::CreateBinder(context); info = binder->BindCreateTableInfo(unique_ptr_cast(std::move(unbound_info))); diff --git a/src/duckdb/src/planner/operator/logical_delete.cpp b/src/duckdb/src/planner/operator/logical_delete.cpp index 57cbf15e1..4ce902250 100644 --- a/src/duckdb/src/planner/operator/logical_delete.cpp +++ b/src/duckdb/src/planner/operator/logical_delete.cpp @@ -14,8 +14,8 @@ LogicalDelete::LogicalDelete(TableCatalogEntry &table, TableIndex table_index) LogicalDelete::LogicalDelete(ClientContext &context, const unique_ptr &table_info) : LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), - table(Catalog::GetEntry(context, table_info->catalog, table_info->schema, - table_info->Cast().table)) { + table(Catalog::GetEntry(context, table_info->Catalog(), table_info->Schema(), + table_info->Cast().GetTableName())) { auto binder = Binder::CreateBinder(context); bound_constraints = binder->BindConstraints(table); } diff --git a/src/duckdb/src/planner/operator/logical_insert.cpp b/src/duckdb/src/planner/operator/logical_insert.cpp index 51eb17bd7..189b0127c 100644 --- a/src/duckdb/src/planner/operator/logical_insert.cpp +++ b/src/duckdb/src/planner/operator/logical_insert.cpp @@ -18,8 +18,8 @@ LogicalInsert::LogicalInsert(TableCatalogEntry &table, TableIndex table_index) LogicalInsert::LogicalInsert(ClientContext &context, const unique_ptr table_info) : LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), - table(Catalog::GetEntry(context, table_info->catalog, table_info->schema, - table_info->Cast().table)) { + table(Catalog::GetEntry(context, table_info->Catalog(), table_info->Schema(), + table_info->Cast().GetTableName())) { auto binder = Binder::CreateBinder(context); bound_constraints = binder->BindConstraints(table); } diff --git a/src/duckdb/src/planner/operator/logical_merge_into.cpp b/src/duckdb/src/planner/operator/logical_merge_into.cpp index d0de5ecf7..4d8bc2c67 100644 --- a/src/duckdb/src/planner/operator/logical_merge_into.cpp +++ b/src/duckdb/src/planner/operator/logical_merge_into.cpp @@ -12,8 +12,8 @@ LogicalMergeInto::LogicalMergeInto(TableCatalogEntry &table) LogicalMergeInto::LogicalMergeInto(ClientContext &context, const unique_ptr &table_info) : LogicalOperator(LogicalOperatorType::LOGICAL_MERGE_INTO), - table(Catalog::GetEntry(context, table_info->catalog, table_info->schema, - table_info->Cast().table)) { + table(Catalog::GetEntry(context, table_info->Catalog(), table_info->Schema(), + table_info->Cast().GetTableName())) { auto binder = Binder::CreateBinder(context); bound_constraints = binder->BindConstraints(table); } diff --git a/src/duckdb/src/planner/operator/logical_update.cpp b/src/duckdb/src/planner/operator/logical_update.cpp index a457d0754..6e436595a 100644 --- a/src/duckdb/src/planner/operator/logical_update.cpp +++ b/src/duckdb/src/planner/operator/logical_update.cpp @@ -15,8 +15,8 @@ LogicalUpdate::LogicalUpdate(TableCatalogEntry &table) LogicalUpdate::LogicalUpdate(ClientContext &context, const unique_ptr &table_info) : LogicalOperator(LogicalOperatorType::LOGICAL_UPDATE), - table(Catalog::GetEntry(context, table_info->catalog, table_info->schema, - table_info->Cast().table)) { + table(Catalog::GetEntry(context, table_info->Catalog(), table_info->Schema(), + table_info->Cast().GetTableName())) { auto binder = Binder::CreateBinder(context); bound_constraints = binder->BindConstraints(table); } diff --git a/src/duckdb/src/storage/checkpoint_manager.cpp b/src/duckdb/src/storage/checkpoint_manager.cpp index 1a369af45..b309ba66b 100644 --- a/src/duckdb/src/storage/checkpoint_manager.cpp +++ b/src/duckdb/src/storage/checkpoint_manager.cpp @@ -553,8 +553,8 @@ void CheckpointReader::ReadTrigger(CatalogTransaction transaction, Deserializer auto info = deserializer.ReadProperty>(100, "trigger"); auto &trigger_info = info->Cast(); trigger_info.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT; - auto &schema = catalog.GetSchema(transaction, trigger_info.schema); - auto table_entry = schema.GetEntry(transaction, CatalogType::TABLE_ENTRY, trigger_info.base_table->table_name); + auto &schema = catalog.GetSchema(transaction, trigger_info.Schema()); + auto table_entry = schema.GetEntry(transaction, CatalogType::TABLE_ENTRY, trigger_info.base_table->Table()); if (!table_entry) { throw IOException("corrupt database file - trigger entry without table entry"); } @@ -600,7 +600,7 @@ void CheckpointReader::ReadIndex(CatalogTransaction transaction, Deserializer &d // create the index in the catalog // look for the table in the catalog - auto &schema = catalog.GetSchema(transaction, create_info->schema); + auto &schema = catalog.GetSchema(transaction, create_info->Schema()); auto catalog_table = schema.GetEntry(transaction, CatalogType::TABLE_ENTRY, info.table); if (!catalog_table) { // See internal issue 3663. @@ -702,7 +702,7 @@ void SingleFileCheckpointWriter::WriteTable(TableCatalogEntry &table, Serializer void CheckpointReader::ReadTable(CatalogTransaction transaction, Deserializer &deserializer) { // deserialize the table meta data auto info = deserializer.ReadProperty>(100, "table"); - auto &schema = catalog.GetSchema(transaction, info->schema); + auto &schema = catalog.GetSchema(transaction, info->Schema()); auto bound_info = Binder::BindCreateTableCheckpoint(std::move(info), schema); for (auto &dep : bound_info->Base().dependencies.Set()) { diff --git a/src/duckdb/src/storage/external_file_cache/caching_file_system.cpp b/src/duckdb/src/storage/external_file_cache/caching_file_system.cpp index a621b52d9..2cd7724a8 100644 --- a/src/duckdb/src/storage/external_file_cache/caching_file_system.cpp +++ b/src/duckdb/src/storage/external_file_cache/caching_file_system.cpp @@ -3,11 +3,13 @@ #include "duckdb/common/checksum.hpp" #include "duckdb/common/chrono.hpp" #include "duckdb/common/enums/cache_validation_mode.hpp" +#include "duckdb/common/enums/destroy_buffer_upon.hpp" #include "duckdb/common/enums/memory_tag.hpp" #include "duckdb/common/file_system.hpp" #include "duckdb/main/client_context.hpp" #include "duckdb/parallel/task_executor.hpp" #include "duckdb/parallel/task_scheduler.hpp" +#include "duckdb/storage/buffer/block_handle.hpp" #include "duckdb/storage/buffer_manager.hpp" #include "duckdb/storage/external_file_cache/external_file_cache.hpp" #include "duckdb/storage/external_file_cache/external_file_cache_util.hpp" @@ -17,6 +19,16 @@ namespace duckdb { // Forward declaration. class DatabaseInstance; +namespace { + +// Allocate an uncached read buffer to make sure it's de-allocated immediately, and its metadata is not stored in the +// eviction queue. +BufferHandle AllocateUncachedReadBuffer(BufferManager &buffer_manager, idx_t size) { + auto buffer = buffer_manager.Allocate(MemoryTag::EXTERNAL_FILE_CACHE, size); + buffer.GetBlockHandle()->GetMemory().SetDestroyBufferUpon(DestroyBufferUpon::UNPIN); + return buffer; +} + //===----------------------------------------------------------------------===// // FetchBlockTask //===----------------------------------------------------------------------===// @@ -110,6 +122,8 @@ class FetchBlockTask : public BaseExecutorTask { BufferHandle &result_pin; }; +} // namespace + //===----------------------------------------------------------------------===// // CachingFileSystem //===----------------------------------------------------------------------===// @@ -262,7 +276,7 @@ FileBufferHandleGroup CachingFileHandle::Read(const idx_t nr_bytes, const idx_t Validate() && version_tag.empty() && (!last_modified.IsFinite() || last_modified == timestamp_t(0)); if (!external_file_cache.IsEnabled() || !external_file_cache.ShouldCacheFile(path.path) || no_validation_metadata) { - auto buf = external_file_cache.GetBufferManager().Allocate(MemoryTag::EXTERNAL_FILE_CACHE, nr_bytes); + auto buf = AllocateUncachedReadBuffer(external_file_cache.GetBufferManager(), nr_bytes); ReadAndRecord(context, buf.GetDataMutable(), nr_bytes, location); vector mem_handles; mem_handles.push_back({std::move(buf), 0, nr_bytes}); @@ -327,7 +341,7 @@ FileBufferHandleGroup CachingFileHandle::Read(const idx_t nr_bytes, const idx_t FileBufferHandleGroup CachingFileHandle::Read(idx_t &nr_bytes) { if (!external_file_cache.IsEnabled() || !CanSeek()) { - auto buf = external_file_cache.GetBufferManager().Allocate(MemoryTag::EXTERNAL_FILE_CACHE, nr_bytes); + auto buf = AllocateUncachedReadBuffer(external_file_cache.GetBufferManager(), nr_bytes); nr_bytes = NumericCast(GetFileHandle().Read(context, buf.GetDataMutable(), nr_bytes)); vector mem_handles; mem_handles.push_back({std::move(buf), 0, nr_bytes}); diff --git a/src/duckdb/src/storage/open_file_storage_extension.cpp b/src/duckdb/src/storage/open_file_storage_extension.cpp index 818ef36a5..730a707c6 100644 --- a/src/duckdb/src/storage/open_file_storage_extension.cpp +++ b/src/duckdb/src/storage/open_file_storage_extension.cpp @@ -22,8 +22,8 @@ class OpenFileDefaultGenerator : public DefaultGenerator { for (auto &entry : view_names) { if (entry_name == entry) { auto result = make_uniq(); - result->schema = Identifier::DefaultSchema(); - result->view_name = entry; + result->SchemaMutable() = Identifier::DefaultSchema(); + result->SetViewName(entry); result->sql = StringUtil::Format("SELECT * FROM %s", SQLString(file)); auto view_info = CreateViewInfo::FromSelect(context, std::move(result)); return make_uniq_base(catalog, schema, *view_info); diff --git a/src/duckdb/src/storage/serialization/serialize_create_info.cpp b/src/duckdb/src/storage/serialization/serialize_create_info.cpp index 13de98d50..807c62bb8 100644 --- a/src/duckdb/src/storage/serialization/serialize_create_info.cpp +++ b/src/duckdb/src/storage/serialization/serialize_create_info.cpp @@ -19,8 +19,8 @@ namespace duckdb { void CreateInfo::Serialize(Serializer &serializer) const { serializer.WriteProperty(100, "type", type); - serializer.WritePropertyWithDefault(101, "catalog", catalog); - serializer.WritePropertyWithDefault(102, "schema", schema); + serializer.WritePropertyWithDefault(101, "catalog", qualified_name.Catalog()); + serializer.WritePropertyWithDefault(102, "schema", qualified_name.Schema()); serializer.WritePropertyWithDefault(103, "temporary", temporary); serializer.WritePropertyWithDefault(104, "internal", internal); serializer.WriteProperty(105, "on_conflict", on_conflict); @@ -79,8 +79,8 @@ unique_ptr CreateInfo::Deserialize(Deserializer &deserializer) { throw SerializationException("Unsupported type for deserialization of CreateInfo!"); } deserializer.Unset(); - result->catalog = std::move(catalog); - result->schema = std::move(schema); + result->qualified_name.CatalogMutable() = std::move(catalog); + result->qualified_name.SchemaMutable() = std::move(schema); result->temporary = temporary; result->internal = internal; result->on_conflict = on_conflict; @@ -94,7 +94,7 @@ unique_ptr CreateInfo::Deserialize(Deserializer &deserializer) { void CreateIndexInfo::Serialize(Serializer &serializer) const { CreateInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "name", index_name); + serializer.WritePropertyWithDefault(200, "name", qualified_name.Name()); serializer.WritePropertyWithDefault(201, "table", table); /* [Deleted] (DeprecatedIndexType) "index_type" */ serializer.WriteProperty(203, "constraint_type", constraint_type); @@ -108,7 +108,7 @@ void CreateIndexInfo::Serialize(Serializer &serializer) const { unique_ptr CreateIndexInfo::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new CreateIndexInfo()); - deserializer.ReadPropertyWithDefault(200, "name", result->index_name); + deserializer.ReadPropertyWithDefault(200, "name", result->qualified_name.NameMutable()); deserializer.ReadPropertyWithDefault(201, "table", result->table); deserializer.ReadDeletedProperty(202, "index_type"); deserializer.ReadProperty(203, "constraint_type", result->constraint_type); @@ -123,7 +123,7 @@ unique_ptr CreateIndexInfo::Deserialize(Deserializer &deserializer) void CreateMacroInfo::Serialize(Serializer &serializer) const { CreateInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "name", name); + serializer.WritePropertyWithDefault(200, "name", qualified_name.Name()); serializer.WritePropertyWithDefault>(201, "function", macros[0]); serializer.WritePropertyWithDefault>>(202, "extra_functions", GetAllButFirstFunction()); } @@ -133,7 +133,7 @@ unique_ptr CreateMacroInfo::Deserialize(Deserializer &deserializer) auto function = deserializer.ReadPropertyWithDefault>(201, "function"); auto extra_functions = deserializer.ReadPropertyWithDefault>>(202, "extra_functions"); auto result = duckdb::unique_ptr(new CreateMacroInfo(deserializer.Get(), std::move(function), std::move(extra_functions))); - result->name = std::move(name); + result->qualified_name.NameMutable() = std::move(name); return std::move(result); } @@ -148,7 +148,7 @@ unique_ptr CreateSchemaInfo::Deserialize(Deserializer &deserializer) void CreateSequenceInfo::Serialize(Serializer &serializer) const { CreateInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "name", name); + serializer.WritePropertyWithDefault(200, "name", qualified_name.Name()); serializer.WritePropertyWithDefault(201, "usage_count", usage_count); serializer.WritePropertyWithDefault(202, "increment", increment); serializer.WritePropertyWithDefault(203, "min_value", min_value); @@ -162,7 +162,7 @@ void CreateSequenceInfo::Serialize(Serializer &serializer) const { unique_ptr CreateSequenceInfo::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new CreateSequenceInfo()); - deserializer.ReadPropertyWithDefault(200, "name", result->name); + deserializer.ReadPropertyWithDefault(200, "name", result->qualified_name.NameMutable()); deserializer.ReadPropertyWithDefault(201, "usage_count", result->usage_count); deserializer.ReadPropertyWithDefault(202, "increment", result->increment); deserializer.ReadPropertyWithDefault(203, "min_value", result->min_value); @@ -175,7 +175,7 @@ unique_ptr CreateSequenceInfo::Deserialize(Deserializer &deserialize void CreateTableInfo::Serialize(Serializer &serializer) const { CreateInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "table", table); + serializer.WritePropertyWithDefault(200, "table", qualified_name.Name()); serializer.WriteProperty(201, "columns", columns); serializer.WritePropertyWithDefault>>(202, "constraints", constraints); serializer.WritePropertyWithDefault>(203, "query", query); @@ -186,7 +186,7 @@ void CreateTableInfo::Serialize(Serializer &serializer) const { unique_ptr CreateTableInfo::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new CreateTableInfo()); - deserializer.ReadPropertyWithDefault(200, "table", result->table); + deserializer.ReadPropertyWithDefault(200, "table", result->qualified_name.NameMutable()); deserializer.ReadProperty(201, "columns", result->columns); deserializer.ReadPropertyWithDefault>>(202, "constraints", result->constraints); deserializer.ReadPropertyWithDefault>(203, "query", result->query); @@ -198,7 +198,7 @@ unique_ptr CreateTableInfo::Deserialize(Deserializer &deserializer) void CreateTriggerInfo::Serialize(Serializer &serializer) const { CreateInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "trigger_name", trigger_name); + serializer.WritePropertyWithDefault(200, "trigger_name", qualified_name.Name()); serializer.WritePropertyWithDefault>(201, "base_table", base_table); serializer.WriteProperty(204, "timing", timing); serializer.WriteProperty(205, "event_type", event_type); @@ -211,7 +211,7 @@ void CreateTriggerInfo::Serialize(Serializer &serializer) const { unique_ptr CreateTriggerInfo::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new CreateTriggerInfo()); - deserializer.ReadPropertyWithDefault(200, "trigger_name", result->trigger_name); + deserializer.ReadPropertyWithDefault(200, "trigger_name", result->qualified_name.NameMutable()); auto base_table = deserializer.ReadPropertyWithDefault>(201, "base_table"); result->base_table = unique_ptr_cast(std::move(base_table)); deserializer.ReadProperty(204, "timing", result->timing); @@ -226,20 +226,20 @@ unique_ptr CreateTriggerInfo::Deserialize(Deserializer &deserializer void CreateTypeInfo::Serialize(Serializer &serializer) const { CreateInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "name", name); + serializer.WritePropertyWithDefault(200, "name", qualified_name.Name()); serializer.WriteProperty(201, "logical_type", type); } unique_ptr CreateTypeInfo::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new CreateTypeInfo()); - deserializer.ReadPropertyWithDefault(200, "name", result->name); + deserializer.ReadPropertyWithDefault(200, "name", result->qualified_name.NameMutable()); deserializer.ReadProperty(201, "logical_type", result->type); return std::move(result); } void CreateViewInfo::Serialize(Serializer &serializer) const { CreateInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "view_name", view_name); + serializer.WritePropertyWithDefault(200, "view_name", qualified_name.Name()); serializer.WritePropertyWithDefault>(201, "aliases", aliases); serializer.WritePropertyWithDefault>(202, "types", types); serializer.WritePropertyWithDefault>(203, "query", query); @@ -261,7 +261,7 @@ unique_ptr CreateViewInfo::Deserialize(Deserializer &deserializer) { auto column_comments = deserializer.ReadPropertyWithDefault>(205, "column_comments"); auto column_comments_map = deserializer.ReadPropertyWithExplicitDefault>(206, "column_comments_map", identifier_map_t()); auto result = duckdb::unique_ptr(new CreateViewInfo(std::move(names), std::move(column_comments), std::move(column_comments_map))); - result->view_name = std::move(view_name); + result->qualified_name.NameMutable() = std::move(view_name); result->aliases = std::move(aliases); result->types = std::move(types); result->query = std::move(query); diff --git a/src/duckdb/src/storage/serialization/serialize_parse_info.cpp b/src/duckdb/src/storage/serialization/serialize_parse_info.cpp index 4cc985893..c8e5d8a83 100644 --- a/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +++ b/src/duckdb/src/storage/serialization/serialize_parse_info.cpp @@ -85,9 +85,9 @@ unique_ptr ParseInfo::Deserialize(Deserializer &deserializer) { void AlterInfo::Serialize(Serializer &serializer) const { ParseInfo::Serialize(serializer); serializer.WriteProperty(200, "type", type); - serializer.WritePropertyWithDefault(201, "catalog", catalog); - serializer.WritePropertyWithDefault(202, "schema", schema); - serializer.WritePropertyWithDefault(203, "name", name); + serializer.WritePropertyWithDefault(201, "catalog", qualified_name.Catalog()); + serializer.WritePropertyWithDefault(202, "schema", qualified_name.Schema()); + serializer.WritePropertyWithDefault(203, "name", qualified_name.Name()); serializer.WriteProperty(204, "if_not_found", if_not_found); serializer.WritePropertyWithDefault(205, "allow_internal", allow_internal); } @@ -122,9 +122,9 @@ unique_ptr AlterInfo::Deserialize(Deserializer &deserializer) { default: throw SerializationException("Unsupported type for deserialization of AlterInfo!"); } - result->catalog = std::move(catalog); - result->schema = std::move(schema); - result->name = std::move(name); + result->qualified_name.CatalogMutable() = std::move(catalog); + result->qualified_name.SchemaMutable() = std::move(schema); + result->qualified_name.NameMutable() = std::move(name); result->if_not_found = if_not_found; result->allow_internal = allow_internal; return std::move(result); @@ -380,9 +380,9 @@ unique_ptr CopyDatabaseInfo::Deserialize(Deserializer &deserializer) void CopyInfo::Serialize(Serializer &serializer) const { ParseInfo::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "catalog", catalog); - serializer.WritePropertyWithDefault(201, "schema", schema); - serializer.WritePropertyWithDefault(202, "table", table); + serializer.WritePropertyWithDefault(200, "catalog", qualified_name.Catalog()); + serializer.WritePropertyWithDefault(201, "schema", qualified_name.Schema()); + serializer.WritePropertyWithDefault(202, "table", qualified_name.Name()); serializer.WritePropertyWithDefault>(203, "select_list", select_list); serializer.WritePropertyWithDefault(204, "is_from", is_from); serializer.WritePropertyWithDefault(205, "format", format); @@ -394,9 +394,9 @@ void CopyInfo::Serialize(Serializer &serializer) const { unique_ptr CopyInfo::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new CopyInfo()); - deserializer.ReadPropertyWithDefault(200, "catalog", result->catalog); - deserializer.ReadPropertyWithDefault(201, "schema", result->schema); - deserializer.ReadPropertyWithDefault(202, "table", result->table); + deserializer.ReadPropertyWithDefault(200, "catalog", result->qualified_name.CatalogMutable()); + deserializer.ReadPropertyWithDefault(201, "schema", result->qualified_name.SchemaMutable()); + deserializer.ReadPropertyWithDefault(202, "table", result->qualified_name.NameMutable()); deserializer.ReadPropertyWithDefault>(203, "select_list", result->select_list); deserializer.ReadPropertyWithDefault(204, "is_from", result->is_from); deserializer.ReadPropertyWithDefault(205, "format", result->format); @@ -432,9 +432,9 @@ unique_ptr DisconnectInfo::Deserialize(Deserializer &deserializer) { void DropInfo::Serialize(Serializer &serializer) const { ParseInfo::Serialize(serializer); serializer.WriteProperty(200, "type", type); - serializer.WritePropertyWithDefault(201, "catalog", catalog); - serializer.WritePropertyWithDefault(202, "schema", schema); - serializer.WritePropertyWithDefault(203, "name", name); + serializer.WritePropertyWithDefault(201, "catalog", qualified_name.Catalog()); + serializer.WritePropertyWithDefault(202, "schema", qualified_name.Schema()); + serializer.WritePropertyWithDefault(203, "name", qualified_name.Name()); serializer.WriteProperty(204, "if_not_found", if_not_found); serializer.WritePropertyWithDefault(205, "cascade", cascade); serializer.WritePropertyWithDefault(206, "allow_drop_internal", allow_drop_internal); @@ -444,9 +444,9 @@ void DropInfo::Serialize(Serializer &serializer) const { unique_ptr DropInfo::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new DropInfo()); deserializer.ReadProperty(200, "type", result->type); - deserializer.ReadPropertyWithDefault(201, "catalog", result->catalog); - deserializer.ReadPropertyWithDefault(202, "schema", result->schema); - deserializer.ReadPropertyWithDefault(203, "name", result->name); + deserializer.ReadPropertyWithDefault(201, "catalog", result->qualified_name.CatalogMutable()); + deserializer.ReadPropertyWithDefault(202, "schema", result->qualified_name.SchemaMutable()); + deserializer.ReadPropertyWithDefault(203, "name", result->qualified_name.NameMutable()); deserializer.ReadProperty(204, "if_not_found", result->if_not_found); deserializer.ReadPropertyWithDefault(205, "cascade", result->cascade); deserializer.ReadPropertyWithDefault(206, "allow_drop_internal", result->allow_drop_internal); diff --git a/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp b/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp index 167775b1b..350b02319 100644 --- a/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +++ b/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp @@ -312,17 +312,17 @@ unique_ptr SubqueryExpression::Deserialize(Deserializer &deser void TypeExpression::Serialize(Serializer &serializer) const { ParsedExpression::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "catalog", catalog); - serializer.WritePropertyWithDefault(201, "schema", schema); - serializer.WritePropertyWithDefault(202, "type_name", type_name); + serializer.WritePropertyWithDefault(200, "catalog", qualified_name.Catalog()); + serializer.WritePropertyWithDefault(201, "schema", qualified_name.Schema()); + serializer.WritePropertyWithDefault(202, "type_name", qualified_name.Name()); serializer.WritePropertyWithDefault>>(203, "children", children); } unique_ptr TypeExpression::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new TypeExpression()); - deserializer.ReadPropertyWithDefault(200, "catalog", result->catalog); - deserializer.ReadPropertyWithDefault(201, "schema", result->schema); - deserializer.ReadPropertyWithDefault(202, "type_name", result->type_name); + deserializer.ReadPropertyWithDefault(200, "catalog", result->qualified_name.CatalogMutable()); + deserializer.ReadPropertyWithDefault(201, "schema", result->qualified_name.SchemaMutable()); + deserializer.ReadPropertyWithDefault(202, "type_name", result->qualified_name.NameMutable()); deserializer.ReadPropertyWithDefault>>(203, "children", result->children); return std::move(result); } diff --git a/src/duckdb/src/storage/serialization/serialize_tableref.cpp b/src/duckdb/src/storage/serialization/serialize_tableref.cpp index 9d21f6f10..e0df38f0c 100644 --- a/src/duckdb/src/storage/serialization/serialize_tableref.cpp +++ b/src/duckdb/src/storage/serialization/serialize_tableref.cpp @@ -74,19 +74,19 @@ unique_ptr AtClause::Deserialize(Deserializer &deserializer) { void BaseTableRef::Serialize(Serializer &serializer) const { TableRef::Serialize(serializer); - serializer.WritePropertyWithDefault(200, "schema_name", schema_name); - serializer.WritePropertyWithDefault(201, "table_name", table_name); + serializer.WritePropertyWithDefault(200, "schema_name", qualified_name.Schema()); + serializer.WritePropertyWithDefault(201, "table_name", qualified_name.Name()); serializer.WritePropertyWithDefault>(202, "column_name_alias", column_name_alias); - serializer.WritePropertyWithDefault(203, "catalog_name", catalog_name); + serializer.WritePropertyWithDefault(203, "catalog_name", qualified_name.Catalog()); serializer.WritePropertyWithDefault>(204, "at_clause", at_clause); } unique_ptr BaseTableRef::Deserialize(Deserializer &deserializer) { auto result = duckdb::unique_ptr(new BaseTableRef()); - deserializer.ReadPropertyWithDefault(200, "schema_name", result->schema_name); - deserializer.ReadPropertyWithDefault(201, "table_name", result->table_name); + deserializer.ReadPropertyWithDefault(200, "schema_name", result->qualified_name.SchemaMutable()); + deserializer.ReadPropertyWithDefault(201, "table_name", result->qualified_name.NameMutable()); deserializer.ReadPropertyWithDefault>(202, "column_name_alias", result->column_name_alias); - deserializer.ReadPropertyWithDefault(203, "catalog_name", result->catalog_name); + deserializer.ReadPropertyWithDefault(203, "catalog_name", result->qualified_name.CatalogMutable()); deserializer.ReadPropertyWithDefault>(204, "at_clause", result->at_clause); return std::move(result); } diff --git a/src/duckdb/src/storage/wal_replay.cpp b/src/duckdb/src/storage/wal_replay.cpp index c443b0571..32412d053 100644 --- a/src/duckdb/src/storage/wal_replay.cpp +++ b/src/duckdb/src/storage/wal_replay.cpp @@ -723,7 +723,7 @@ void WriteAheadLogDeserializer::ReplayCreateTable() { } // bind the constraints to the table again auto binder = Binder::CreateBinder(context); - auto &schema = catalog.GetSchema(context, info->schema); + auto &schema = catalog.GetSchema(context, info->Schema()); auto bound_info = Binder::BindCreateTableCheckpoint(std::move(info), schema); catalog.CreateTable(context, *bound_info); @@ -733,8 +733,8 @@ void WriteAheadLogDeserializer::ReplayDropTable() { DropInfo info; info.type = CatalogType::TABLE_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); if (DeserializeOnly()) { return; } @@ -742,8 +742,8 @@ void WriteAheadLogDeserializer::ReplayDropTable() { // Remove any replay indexes of this table. state.replay_index_infos.erase(std::remove_if(state.replay_index_infos.begin(), state.replay_index_infos.end(), [&info](const ReplayState::ReplayIndexInfo &replay_info) { - return replay_info.table_schema == info.schema && - replay_info.table_name == info.name; + return replay_info.table_schema == info.Schema() && + replay_info.table_name == info.Name(); }), state.replay_index_infos.end()); @@ -806,7 +806,7 @@ void WriteAheadLogDeserializer::ReplayAlter() { auto &unique_info = constraint_info.constraint->Cast(); auto &table = - catalog.GetEntry(context, table_info.schema, table_info.name).Cast(); + catalog.GetEntry(context, table_info.Schema(), table_info.Name()).Cast(); auto &column_list = table.GetColumns(); // Add the table to the bind context to bind the parsed expressions. @@ -829,7 +829,7 @@ void WriteAheadLogDeserializer::ReplayAlter() { auto logical_indexes = unique_info.GetLogicalIndexes(column_list); for (const auto &logical_index : logical_indexes) { auto &col = column_list.GetColumn(logical_index); - unique_ptr parsed = make_uniq(col.GetName(), table_info.name); + unique_ptr parsed = make_uniq(col.GetName(), table_info.Name()); unbound_expressions.push_back(idx_binder.Bind(parsed)); } @@ -847,8 +847,8 @@ void WriteAheadLogDeserializer::ReplayAlter() { auto index_instance = index_type->create_instance(input); auto &table_index_list = storage.GetDataTableInfo()->GetIndexes(); - state.replay_index_infos.emplace_back(table_index_list, std::move(index_instance), table_info.schema, - table_info.name); + state.replay_index_infos.emplace_back(table_index_list, std::move(index_instance), table_info.Schema(), + table_info.Name()); catalog.Alter(context, alter_info); } @@ -867,8 +867,8 @@ void WriteAheadLogDeserializer::ReplayCreateView() { void WriteAheadLogDeserializer::ReplayDropView() { DropInfo info; info.type = CatalogType::VIEW_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); if (DeserializeOnly()) { return; } @@ -880,7 +880,7 @@ void WriteAheadLogDeserializer::ReplayDropView() { //===--------------------------------------------------------------------===// void WriteAheadLogDeserializer::ReplayCreateSchema() { CreateSchemaInfo info; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); if (DeserializeOnly()) { return; } @@ -892,7 +892,7 @@ void WriteAheadLogDeserializer::ReplayDropSchema() { DropInfo info; info.type = CatalogType::SCHEMA_ENTRY; - info.name = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(101, "schema")); if (DeserializeOnly()) { return; } @@ -913,8 +913,8 @@ void WriteAheadLogDeserializer::ReplayDropType() { DropInfo info; info.type = CatalogType::TYPE_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); if (DeserializeOnly()) { return; } @@ -932,8 +932,8 @@ void WriteAheadLogDeserializer::ReplayCreateTrigger() { return; } auto &trigger_info = info->Cast(); - auto &table = Catalog::GetEntry(context, trigger_info.catalog, trigger_info.schema, - trigger_info.base_table->table_name); + auto &table = Catalog::GetEntry(context, trigger_info.Catalog(), trigger_info.Schema(), + trigger_info.base_table->Table()); auto &duck_table = table.Cast(); auto transaction = catalog.GetCatalogTransaction(context); duck_table.CreateTrigger(transaction, trigger_info); @@ -942,19 +942,20 @@ void WriteAheadLogDeserializer::ReplayCreateTrigger() { void WriteAheadLogDeserializer::ReplayDropTrigger() { DropInfo info; info.type = CatalogType::TRIGGER_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); auto table_name = deserializer.ReadPropertyWithDefault(103, "table"); if (DeserializeOnly()) { return; } if (table_name.empty()) { - throw InternalException("WAL replay: DROP TRIGGER entry has an empty table name for trigger \"%s\"", info.name); + throw InternalException("WAL replay: DROP TRIGGER entry has an empty table name for trigger \"%s\"", + info.Name()); } - auto &table = Catalog::GetEntry(context, catalog.GetName(), info.schema, table_name); + auto &table = Catalog::GetEntry(context, catalog.GetName(), info.Schema(), table_name); auto &duck_table = table.Cast(); auto transaction = catalog.GetCatalogTransaction(context); - duck_table.DropTrigger(transaction, info.name, info.cascade); + duck_table.DropTrigger(transaction, info.Name(), info.cascade); } //===--------------------------------------------------------------------===// @@ -972,8 +973,8 @@ void WriteAheadLogDeserializer::ReplayCreateSequence() { void WriteAheadLogDeserializer::ReplayDropSequence() { DropInfo info; info.type = CatalogType::SEQUENCE_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); if (DeserializeOnly()) { return; } @@ -1012,8 +1013,8 @@ void WriteAheadLogDeserializer::ReplayCreateMacro() { void WriteAheadLogDeserializer::ReplayDropMacro() { DropInfo info; info.type = CatalogType::MACRO_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); if (DeserializeOnly()) { return; } @@ -1035,8 +1036,8 @@ void WriteAheadLogDeserializer::ReplayCreateTableMacro() { void WriteAheadLogDeserializer::ReplayDropTableMacro() { DropInfo info; info.type = CatalogType::TABLE_MACRO_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); if (DeserializeOnly()) { return; } @@ -1062,7 +1063,7 @@ void WriteAheadLogDeserializer::ReplayCreateIndex() { info.index_type = ART::TYPE_NAME; } - const auto schema_name = create_info->schema; + const auto schema_name = create_info->Schema(); const auto table_name = info.table; auto &entry = catalog.GetEntry(context, schema_name, table_name); @@ -1083,8 +1084,8 @@ void WriteAheadLogDeserializer::ReplayCreateIndex() { void WriteAheadLogDeserializer::ReplayDropIndex() { DropInfo info; info.type = CatalogType::INDEX_ENTRY; - info.schema = Identifier(deserializer.ReadProperty(101, "schema")); - info.name = Identifier(deserializer.ReadProperty(102, "name")); + info.SchemaMutable() = Identifier(deserializer.ReadProperty(101, "schema")); + info.NameMutable() = Identifier(deserializer.ReadProperty(102, "name")); if (DeserializeOnly()) { return; } @@ -1092,8 +1093,8 @@ void WriteAheadLogDeserializer::ReplayDropIndex() { // Remove the replay index, if any. state.replay_index_infos.erase(std::remove_if(state.replay_index_infos.begin(), state.replay_index_infos.end(), [&info](const ReplayState::ReplayIndexInfo &replay_info) { - return replay_info.table_schema == info.schema && - replay_info.index->GetIndexName() == info.name; + return replay_info.table_schema == info.Schema() && + replay_info.index->GetIndexName() == info.Name(); }), state.replay_index_infos.end()); diff --git a/src/duckdb/src/storage/write_ahead_log.cpp b/src/duckdb/src/storage/write_ahead_log.cpp index ee3aa483e..22d00e1be 100644 --- a/src/duckdb/src/storage/write_ahead_log.cpp +++ b/src/duckdb/src/storage/write_ahead_log.cpp @@ -451,7 +451,7 @@ void WriteAheadLog::WriteDropTrigger(const TriggerCatalogEntry &entry) { WriteAheadLogSerializer serializer(*this, WALType::DROP_TRIGGER); serializer.WriteProperty(101, "schema", entry.schema.name); serializer.WriteProperty(102, "name", entry.name); - serializer.WriteProperty(103, "table", entry.base_table->table_name); + serializer.WriteProperty(103, "table", entry.base_table->Table()); serializer.End(); } diff --git a/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp b/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp index 8633517c9..891c014ae 100644 --- a/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +++ b/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp @@ -117,6 +117,16 @@ class AESStateMBEDTLS : public duckdb::EncryptionState { return mbedtls_state; } + DUCKDB_API void Hash(duckdb::CryptoHashFunction function, duckdb::const_data_ptr_t input, + duckdb::idx_t input_len, duckdb::data_ptr_t output) const override; + DUCKDB_API duckdb::unique_ptr + CreateHashState(duckdb::CryptoHashFunction function) const override; + DUCKDB_API void Hmac(duckdb::CryptoHashFunction function, duckdb::const_data_ptr_t key, duckdb::idx_t key_len, + duckdb::const_data_ptr_t input, duckdb::idx_t input_len, + duckdb::data_ptr_t output) const override; + DUCKDB_API bool SupportsHash(duckdb::CryptoHashFunction function) const override; + DUCKDB_API bool SupportsHmac(duckdb::CryptoHashFunction function) const override; + ~AESStateMBEDTLSFactory() override {} // public: diff --git a/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp b/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp index 7694b0193..bf35b7d32 100644 --- a/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +++ b/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp @@ -3,6 +3,7 @@ // otherwise we have different definitions for mbedtls_pk_context / mbedtls_sha256_context #define MBEDTLS_ALLOW_PRIVATE_ACCESS +#include "duckdb/common/crypto/md5.hpp" #include "duckdb/common/helper.hpp" #include "mbedtls/md.h" #include "mbedtls/pk.h" @@ -110,6 +111,123 @@ void MbedTlsWrapper::ToBase16(char *in, char *out, size_t len) { } } +class MbedTLSCryptoHashState : public duckdb::CryptoHashState { +public: + explicit MbedTLSCryptoHashState(duckdb::CryptoHashFunction function) : duckdb::CryptoHashState(function) { + switch (function) { + case duckdb::CryptoHashFunction::MD5: + break; + case duckdb::CryptoHashFunction::SHA1: + mbedtls_sha1_init(&sha1_context); + break; + case duckdb::CryptoHashFunction::SHA256: + mbedtls_sha256_init(&sha256_context); + break; + default: + throw duckdb::InternalException("Unsupported crypto hash function"); + } + } + + ~MbedTLSCryptoHashState() override { + switch (GetFunction()) { + case duckdb::CryptoHashFunction::MD5: + break; + case duckdb::CryptoHashFunction::SHA1: + mbedtls_sha1_free(&sha1_context); + break; + case duckdb::CryptoHashFunction::SHA256: + mbedtls_sha256_free(&sha256_context); + break; + default: + break; + } + } + + void Hash(duckdb::const_data_ptr_t input, duckdb::idx_t input_len, duckdb::data_ptr_t output) override { + switch (GetFunction()) { + case duckdb::CryptoHashFunction::MD5: { + duckdb::MD5Context context; + context.Add(input, input_len); + context.Finish(output); + return; + } + case duckdb::CryptoHashFunction::SHA1: + if (mbedtls_sha1_starts(&sha1_context) || mbedtls_sha1_update(&sha1_context, input, input_len) || + mbedtls_sha1_finish(&sha1_context, output)) { + throw std::runtime_error("SHA1 Error"); + } + return; + case duckdb::CryptoHashFunction::SHA256: + if (mbedtls_sha256_starts(&sha256_context, false) || + mbedtls_sha256_update(&sha256_context, input, input_len) || + mbedtls_sha256_finish(&sha256_context, output)) { + throw std::runtime_error("SHA256 Error"); + } + return; + default: + throw duckdb::InternalException("Unsupported crypto hash function"); + } + } + +private: + mbedtls_sha1_context sha1_context; + mbedtls_sha256_context sha256_context; +}; + +void MbedTlsWrapper::AESStateMBEDTLSFactory::Hash(duckdb::CryptoHashFunction function, duckdb::const_data_ptr_t input, + duckdb::idx_t input_len, duckdb::data_ptr_t output) const { + switch (function) { + case duckdb::CryptoHashFunction::MD5: { + duckdb::MD5Context context; + context.Add(input, input_len); + context.Finish(output); + return; + } + case duckdb::CryptoHashFunction::SHA1: + if (mbedtls_sha1(input, input_len, output)) { + throw std::runtime_error("SHA1 Error"); + } + return; + case duckdb::CryptoHashFunction::SHA256: + if (mbedtls_sha256(input, input_len, output, false)) { + throw std::runtime_error("SHA256 Error"); + } + return; + default: + throw duckdb::InternalException("Unsupported crypto hash function"); + } +} + +duckdb::unique_ptr +MbedTlsWrapper::AESStateMBEDTLSFactory::CreateHashState(duckdb::CryptoHashFunction function) const { + return duckdb::make_uniq(function); +} + +void MbedTlsWrapper::AESStateMBEDTLSFactory::Hmac(duckdb::CryptoHashFunction function, duckdb::const_data_ptr_t key, + duckdb::idx_t key_len, duckdb::const_data_ptr_t input, + duckdb::idx_t input_len, duckdb::data_ptr_t output) const { + if (function != duckdb::CryptoHashFunction::SHA256) { + throw duckdb::NotImplementedException("MbedTLS HMAC currently only supports SHA256"); + } + MbedTlsWrapper::Hmac256(duckdb::const_char_ptr_cast(key), key_len, duckdb::const_char_ptr_cast(input), input_len, + duckdb::char_ptr_cast(output)); +} + +bool MbedTlsWrapper::AESStateMBEDTLSFactory::SupportsHash(duckdb::CryptoHashFunction function) const { + switch (function) { + case duckdb::CryptoHashFunction::MD5: + case duckdb::CryptoHashFunction::SHA1: + case duckdb::CryptoHashFunction::SHA256: + return true; + default: + return false; + } +} + +bool MbedTlsWrapper::AESStateMBEDTLSFactory::SupportsHmac(duckdb::CryptoHashFunction function) const { + return function == duckdb::CryptoHashFunction::SHA256; +} + MbedTlsWrapper::SHA256State::SHA256State() : sha_context(new mbedtls_sha256_context()) { auto context = reinterpret_cast(sha_context); diff --git a/src/duckdb/ub_extension_parquet_reader_variant.cpp b/src/duckdb/ub_extension_parquet_reader_variant.cpp index 8a5fc94ad..b0e0398b5 100644 --- a/src/duckdb/ub_extension_parquet_reader_variant.cpp +++ b/src/duckdb/ub_extension_parquet_reader_variant.cpp @@ -1,4 +1,6 @@ #include "extension/parquet/reader/variant/parquet_variant_iterator.cpp" +#include "extension/parquet/reader/variant/parquet_variant_shredding.cpp" + #include "extension/parquet/reader/variant/variant_binary_decoder.cpp" diff --git a/src/duckdb/ub_src_common.cpp b/src/duckdb/ub_src_common.cpp index c09dc26a7..3cd95a6fa 100644 --- a/src/duckdb/ub_src_common.cpp +++ b/src/duckdb/ub_src_common.cpp @@ -20,8 +20,6 @@ #include "src/common/column_index.cpp" -#include "src/common/complex_json.cpp" - #include "src/common/compressed_file_system.cpp" #include "src/common/constants.cpp" @@ -60,6 +58,8 @@ #include "src/common/identifier.cpp" +#include "src/common/json_document.cpp" + #include "src/common/local_file_system.cpp" #include "src/common/memory_mapped_file.cpp" diff --git a/src/duckdb/ub_src_common_sort.cpp b/src/duckdb/ub_src_common_sort.cpp index cac9ac0d6..38ddfc54f 100644 --- a/src/duckdb/ub_src_common_sort.cpp +++ b/src/duckdb/ub_src_common_sort.cpp @@ -4,6 +4,8 @@ #include "src/common/sort/natural_sort.cpp" +#include "src/common/sort/partition_key_tracker.cpp" + #include "src/common/sort/sort.cpp" #include "src/common/sort/sort_strategy.cpp"