From 9afb062de713e413369a331e55f443e818e4f657 Mon Sep 17 00:00:00 2001 From: wuguowei1994 <185086661@qq.com> Date: Tue, 2 Jun 2026 23:57:01 +0800 Subject: [PATCH] [fix](be) Fix variant inverted-index cast pushdown for int and boolean --- .../core/data_type/convert_field_to_type.cpp | 46 +++- be/src/core/data_type/convert_field_to_type.h | 6 + be/src/exprs/function/functions_comparison.h | 19 +- be/src/exprs/function/in.h | 24 +- be/src/exprs/vcast_expr.cpp | 54 ++++ be/src/exprs/vcast_expr.h | 1 + be/src/exprs/vexpr.cpp | 17 +- .../index/inverted/inverted_index_reader.cpp | 102 ++++++++ .../index/inverted/inverted_index_reader.h | 18 ++ .../storage/predicate/comparison_predicate.h | 7 +- be/src/storage/predicate/in_list_predicate.h | 12 +- .../segment/inverted_index_reader_test.cpp | 67 +++++ .../test_variant_inverted_index_cast.groovy | 243 ++++++++++++++++++ 13 files changed, 591 insertions(+), 25 deletions(-) create mode 100644 regression-test/suites/inverted_index_p0/test_variant_inverted_index_cast.groovy diff --git a/be/src/core/data_type/convert_field_to_type.cpp b/be/src/core/data_type/convert_field_to_type.cpp index 8873c09346b2c0..140a49f357aadb 100644 --- a/be/src/core/data_type/convert_field_to_type.cpp +++ b/be/src/core/data_type/convert_field_to_type.cpp @@ -49,6 +49,43 @@ namespace doris { +namespace { + +int int_byte_size(PrimitiveType type) { + switch (type) { + case TYPE_TINYINT: + return 1; + case TYPE_SMALLINT: + return 2; + case TYPE_INT: + return 4; + case TYPE_BIGINT: + return 8; + case TYPE_LARGEINT: + return 16; + default: + throw Exception(ErrorCode::INTERNAL_ERROR, "Unexpected non-integer type {}", + type_to_string(type)); + } +} + +bool is_lossless_int_widen(PrimitiveType storage_type, PrimitiveType query_type) { + if (!is_int(storage_type) || !is_int(query_type)) { + return false; + } + return int_byte_size(storage_type) < int_byte_size(query_type); +} + +bool is_lossless_float_widen(PrimitiveType storage_type, PrimitiveType query_type) { + return storage_type == TYPE_FLOAT && query_type == TYPE_DOUBLE; +} + +bool is_int_to_decimal_cast(PrimitiveType storage_type, PrimitiveType query_type) { + return is_int(storage_type) && is_decimal(query_type); +} + +} // namespace + template /// Field template parameter may be const or non-const Field. void dispatch(F&& f, const Field& field) { switch (field.get_type()) { @@ -828,4 +865,11 @@ void convert_field_to_type(const Field& from_value, const IDataType& to_type, Fi return convert_field_to_typeImpl(from_value, to_type, from_type_hint, to); } } -} // namespace doris \ No newline at end of file + +bool is_cast_compatible_for_field_conversion(PrimitiveType storage_type, PrimitiveType query_type) { + return is_lossless_int_widen(storage_type, query_type) || + is_lossless_float_widen(storage_type, query_type) || + is_int_to_decimal_cast(storage_type, query_type) || + (is_string_type(storage_type) && is_string_type(query_type)); +} +} // namespace doris diff --git a/be/src/core/data_type/convert_field_to_type.h b/be/src/core/data_type/convert_field_to_type.h index cd8c5469b1c0ff..a1e3d8b40214c0 100644 --- a/be/src/core/data_type/convert_field_to_type.h +++ b/be/src/core/data_type/convert_field_to_type.h @@ -20,6 +20,7 @@ #pragma once #include "common/status.h" +#include "core/data_type/define_primitive_type.h" #include "core/field.h" namespace doris { @@ -36,4 +37,9 @@ class IDataType; void convert_field_to_type(const Field& from_value, const IDataType& to_type, Field* field, const IDataType* from_type_hint = nullptr); +// Return whether this storage/query primitive-type pair is eligible for cross-type +// Field conversion in index-pushdown paths. Exact safety is still enforced by the +// caller's value-level convert + round-trip checks. +bool is_cast_compatible_for_field_conversion(PrimitiveType storage_type, PrimitiveType query_type); + } // namespace doris diff --git a/be/src/exprs/function/functions_comparison.h b/be/src/exprs/function/functions_comparison.h index 5d891248e08c11..252d92598e20f6 100644 --- a/be/src/exprs/function/functions_comparison.h +++ b/be/src/exprs/function/functions_comparison.h @@ -41,6 +41,7 @@ #include "exprs/function/function_helpers.h" #include "exprs/function/functions_logical.h" #include "storage/index/index_reader_helper.h" +#include "storage/index/inverted/inverted_index_reader.h" namespace doris { @@ -487,24 +488,22 @@ class FunctionComparison : public IFunction { if (param_value.is_null()) { return Status::OK(); } + Field query_value; + const bool allow_int_cross_width = name_view == NameEquals::name; + RETURN_IF_ERROR(segment_v2::inverted_index_query_param::convert_to_storage_value( + arguments[0].type, param_value, data_type_with_name.second, &query_value, + allow_int_cross_width)); segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = param_value; + param.query_value = query_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); param.analyzer_ctx = analyzer_ctx; RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); - std::shared_ptr null_bitmap = std::make_shared(); - if (iter->has_null()) { - segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; - RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); - null_bitmap = null_bitmap_cache_handle.get_bitmap(); - } - segment_v2::InvertedIndexResultBitmap result(param.roaring, null_bitmap); - bitmap_result = result; - bitmap_result.mask_out_null(); + RETURN_IF_ERROR(segment_v2::inverted_index_query_param::build_result_bitmap( + iter, param.roaring, &bitmap_result)); if (name_view == NameNotEquals::name) { roaring::Roaring full_result; diff --git a/be/src/exprs/function/in.h b/be/src/exprs/function/in.h index 55c709a2a58bb2..4768cb1b9f2fc3 100644 --- a/be/src/exprs/function/in.h +++ b/be/src/exprs/function/in.h @@ -47,6 +47,7 @@ #include "exprs/function_context.h" #include "exprs/hybrid_set.h" #include "storage/index/index_reader_helper.h" +#include "storage/index/inverted/inverted_index_reader.h" namespace doris { @@ -154,11 +155,6 @@ class FunctionIn : public IFunction { //NOT support in list when parser is FULLTEXT for expr inverted index evaluate. return Status::OK(); } - if (iter->has_null()) { - segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; - RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); - null_bitmap = null_bitmap_cache_handle.get_bitmap(); - } for (const auto& arg : arguments) { Field param_value; arg.column->get(0, param_value); @@ -167,14 +163,25 @@ class FunctionIn : public IFunction { if (negative) { return Status::OK(); } + RETURN_IF_ERROR(segment_v2::inverted_index_query_param::read_null_bitmap( + iter, &null_bitmap)); *roaring |= *null_bitmap; continue; } + Field query_value; + auto convert_status = segment_v2::inverted_index_query_param::convert_to_storage_value( + arg.type, param_value, data_type_with_name.second, &query_value, !negative); + if (convert_status.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED) { + // The literal cannot map to any storage value that would round-trip to it, so it + // contributes no hits to the positive IN union. + continue; + } + RETURN_IF_ERROR(convert_status); InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = param_value; + param.query_value = query_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); @@ -182,9 +189,8 @@ class FunctionIn : public IFunction { RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); *roaring |= *param.roaring; } - segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); - bitmap_result = result; - bitmap_result.mask_out_null(); + RETURN_IF_ERROR(segment_v2::inverted_index_query_param::build_result_bitmap( + iter, roaring, &bitmap_result)); if constexpr (negative) { roaring::Roaring full_result; full_result.addRange(0, num_rows); diff --git a/be/src/exprs/vcast_expr.cpp b/be/src/exprs/vcast_expr.cpp index 1435b40c58c58d..6c457f84061716 100644 --- a/be/src/exprs/vcast_expr.cpp +++ b/be/src/exprs/vcast_expr.cpp @@ -37,7 +37,10 @@ #include "exprs/function/simple_function_factory.h" #include "exprs/vexpr.h" #include "exprs/vexpr_context.h" +#include "exprs/vslot_ref.h" #include "runtime/runtime_state.h" +#include "storage/index/index_reader_helper.h" +#include "storage/index/inverted/inverted_index_iterator.h" namespace doris { class RowDescriptor; @@ -127,6 +130,57 @@ Status VCastExpr::execute_column_impl(VExprContext* context, const Block* block, return Status::OK(); } +Status VCastExpr::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) { + auto target_type = remove_nullable(get_target_type()); + if (target_type->get_primitive_type() != TYPE_BOOLEAN) { + return Status::OK(); + } + DCHECK_EQ(get_num_children(), 1); + if (!get_child(0)->is_slot_ref()) { + return Status::OK(); + } + + auto* column_slot_ref = assert_cast(get_child(0).get()); + auto column_id = column_slot_ref->column_id(); + auto index_context = context->get_index_context(); + auto* iter = index_context->get_inverted_index_iterator_by_column_id(column_id); + if (iter == nullptr) { + return Status::OK(); + } + if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) { + return Status::OK(); + } + + const auto* storage_name_type = + index_context->get_storage_name_and_type_by_column_id(column_id); + if (storage_name_type == nullptr) { + return Status::OK(); + } + + Field query_value; + RETURN_IF_ERROR(segment_v2::inverted_index_query_param::convert_to_storage_value( + get_target_type(), Field::create_field(1), storage_name_type->second, + &query_value)); + + segment_v2::InvertedIndexParam param; + param.column_name = storage_name_type->first; + param.column_type = storage_name_type->second; + param.query_value = query_value; + param.query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY; + param.num_rows = segment_num_rows; + param.roaring = std::make_shared(); + RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); + + segment_v2::InvertedIndexResultBitmap result; + RETURN_IF_ERROR(segment_v2::inverted_index_query_param::build_result_bitmap(iter, param.roaring, + &result)); + if (!result.is_empty()) { + index_context->set_index_result_for_expr(this, result); + index_context->set_true_for_index_status(this, column_id); + } + return Status::OK(); +} + bool cast_error_code(Status& st) { //There may be more error codes that need to be captured by try cast in the future. if (st.is()) { diff --git a/be/src/exprs/vcast_expr.h b/be/src/exprs/vcast_expr.h index c3f2526794b3b8..8e0fce470e8d3e 100644 --- a/be/src/exprs/vcast_expr.h +++ b/be/src/exprs/vcast_expr.h @@ -48,6 +48,7 @@ class VCastExpr : public VExpr { ~VCastExpr() override = default; Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, size_t count, ColumnPtr& result_column) const override; + Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override; Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; Status open(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; diff --git a/be/src/exprs/vexpr.cpp b/be/src/exprs/vexpr.cpp index 61157a5dd00574..96842d7f053c6b 100644 --- a/be/src/exprs/vexpr.cpp +++ b/be/src/exprs/vexpr.cpp @@ -36,6 +36,7 @@ #include "common/status.h" #include "core/column/column_nothing.h" #include "core/column/column_vector.h" +#include "core/data_type/convert_field_to_type.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_decimal.h" #include "core/data_type/data_type_factory.hpp" @@ -885,10 +886,20 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase continue; } } + const bool string_to_string = is_string_type(origin_primitive_type) && + is_string_type(target_primitive_type); + const auto& function_name = function->get_name(); + // This only decides whether peeling the cast is worth trying. The converted + // literal still has to pass convert_to_storage_value() and its round-trip check. + const bool int_cross_width_for_equal_or_in = + (function_name == "eq" || function_name == "in") && + is_int(origin_primitive_type) && is_int(target_primitive_type); if (origin_primitive_type != TYPE_VARIANT && - (storage_type->equals(*target_type) || - (is_string_type(target_primitive_type) && - is_string_type(origin_primitive_type)))) { + (storage_type->equals(*target_type) || string_to_string || + (!is_complex_type(storage_type->get_primitive_type()) && + (is_cast_compatible_for_field_conversion(origin_primitive_type, + target_primitive_type) || + int_cross_width_for_equal_or_in)))) { children_exprs.emplace_back(expr_without_cast(child)); } } else { diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index ec11fb5b28c9f3..f152c77998faf2 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -39,6 +39,9 @@ #include "common/exception.h" #include "common/logging.h" #include "common/status.h" +#include "core/data_type/convert_field_to_type.h" +#include "core/data_type/data_type_factory.hpp" +#include "core/data_type/data_type_nullable.h" #include "core/data_type/primitive_type.h" #include "core/string_ref.h" #include "core/type_limit.h" @@ -151,6 +154,105 @@ static doris::Status encode_bkd_max_ascending(doris::FieldType ft, const doris:: namespace doris::segment_v2 { +// Normalize the query literal to the segment storage type before index probing. +// The convert + round-trip check prevents cross-width/type mismatches from being +// treated as valid index hits when original predicates may be pruned afterwards. +namespace inverted_index_query_param { + +Status convert_to_storage_value(const DataTypePtr& query_type, const Field& query_value, + const DataTypePtr& storage_type, Field* storage_value, + bool allow_int_cross_width) { + DORIS_CHECK(storage_value != nullptr); + DORIS_CHECK(query_type != nullptr); + DORIS_CHECK(storage_type != nullptr); + DataTypePtr normalized_query_type = remove_nullable(query_type); + DataTypePtr normalized_storage_type = remove_nullable(storage_type); + PrimitiveType query_primitive_type = normalized_query_type->get_primitive_type(); + PrimitiveType storage_primitive_type = normalized_storage_type->get_primitive_type(); + // Reaching this path with the same primitive type means either column predicate construction + // from the same column DataType, or expr cast pushdown peeled a cast only because the storage + // and target DataTypes are identical modulo nullable wrappers. Cross-primitive peeling falls + // through to the conversion branch below. + if (storage_primitive_type == query_primitive_type) { + *storage_value = query_value; + return Status::OK(); + } + const bool int_cross_width = + allow_int_cross_width && is_int(storage_primitive_type) && is_int(query_primitive_type); + if (!is_cast_compatible_for_field_conversion(storage_primitive_type, query_primitive_type) && + !int_cross_width) { + return Status::Error( + "Inverted index evaluate skipped, incompatible cast from storage type {} to query " + "type {}", + type_to_string(storage_primitive_type), type_to_string(query_primitive_type)); + } + + try { + convert_field_to_type(query_value, *normalized_storage_type, storage_value); + if (storage_value->is_null()) { + return Status::Error( + "Inverted index evaluate skipped, query value cannot be represented by " + "storage type {}", + normalized_storage_type->get_name()); + } + + // A successful conversion is not sufficient. Every cross-primitive value must round-trip + // bit-exactly to the original literal before probing the index. This is what makes + // FLOAT/DOUBLE widening, integer cross-width conversion, and overflow-to-NULL casts safe: + // if cast(storage_value AS query_type) can equal the literal, the round-trip proves the + // storage value is unique and index matches are identical to the original cast results. + Field roundtrip_value; + convert_field_to_type(*storage_value, *normalized_query_type, &roundtrip_value); + if (roundtrip_value.is_null() || !(roundtrip_value == query_value)) { + return Status::Error( + "Inverted index evaluate skipped, query value cannot round-trip between " + "query type {} and storage type {}", + normalized_query_type->get_name(), normalized_storage_type->get_name()); + } + } catch (const Exception& e) { + return Status::Error( + "Inverted index evaluate skipped, failed to convert query value to storage type: " + "{}", + e.what()); + } + + return Status::OK(); +} + +Status convert_to_storage_value(PrimitiveType query_type, const Field& query_value, + const DataTypePtr& storage_type, Field* storage_value, + bool allow_int_cross_width) { + auto query_data_type = + DataTypeFactory::instance().create_data_type(query_type, false /* is_nullable */); + return convert_to_storage_value(query_data_type, query_value, storage_type, storage_value, + allow_int_cross_width); +} + +Status read_null_bitmap(IndexIterator* iter, std::shared_ptr* null_bitmap) { + DORIS_CHECK(iter != nullptr); + DORIS_CHECK(null_bitmap != nullptr); + *null_bitmap = std::make_shared(); + if (iter->has_null()) { + InvertedIndexQueryCacheHandle null_bitmap_cache_handle; + RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); + *null_bitmap = null_bitmap_cache_handle.get_bitmap(); + } + return Status::OK(); +} + +Status build_result_bitmap(IndexIterator* iter, std::shared_ptr data_bitmap, + InvertedIndexResultBitmap* result) { + DORIS_CHECK(data_bitmap != nullptr); + DORIS_CHECK(result != nullptr); + std::shared_ptr null_bitmap; + RETURN_IF_ERROR(read_null_bitmap(iter, &null_bitmap)); + *result = InvertedIndexResultBitmap(data_bitmap, null_bitmap); + result->mask_out_null(); + return Status::OK(); +} + +} // namespace inverted_index_query_param + std::string InvertedIndexReader::get_index_file_path() { return _index_file_reader->get_index_file_path(&_index_meta); } diff --git a/be/src/storage/index/inverted/inverted_index_reader.h b/be/src/storage/index/inverted/inverted_index_reader.h index 0e2f6a120d41e3..d02cc91f868465 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.h +++ b/be/src/storage/index/inverted/inverted_index_reader.h @@ -25,6 +25,7 @@ #include #include "common/status.h" +#include "core/data_type/data_type.h" #include "core/data_type/primitive_type.h" #include "core/field.h" #include "io/fs/file_system.h" @@ -396,5 +397,22 @@ class BkdIndexReader : public InvertedIndexReader { const KeyCoder* _value_key_coder {}; }; +namespace inverted_index_query_param { + +Status convert_to_storage_value(const DataTypePtr& query_type, const Field& query_value, + const DataTypePtr& storage_type, Field* storage_value, + bool allow_int_cross_width = false); + +Status convert_to_storage_value(PrimitiveType query_type, const Field& query_value, + const DataTypePtr& storage_type, Field* storage_value, + bool allow_int_cross_width = false); + +Status read_null_bitmap(IndexIterator* iter, std::shared_ptr* null_bitmap); + +Status build_result_bitmap(IndexIterator* iter, std::shared_ptr data_bitmap, + InvertedIndexResultBitmap* result); + +} // namespace inverted_index_query_param + } // namespace segment_v2 } // namespace doris diff --git a/be/src/storage/predicate/comparison_predicate.h b/be/src/storage/predicate/comparison_predicate.h index e1ebae39f8f9d4..dafb288de0a41f 100644 --- a/be/src/storage/predicate/comparison_predicate.h +++ b/be/src/storage/predicate/comparison_predicate.h @@ -22,6 +22,7 @@ #include "common/compare.h" #include "core/column/column_dictionary.h" +#include "core/data_type/data_type_nullable.h" #include "core/field.h" #include "storage/index/bloom_filter/bloom_filter.h" #include "storage/index/inverted/inverted_index_cache.h" // IWYU pragma: keep @@ -93,10 +94,14 @@ class ComparisonPredicateBase final : public ColumnPredicate { return Status::InvalidArgument("invalid comparison predicate type {}", PT); } + Field field_value = Field::create_field(_value); + Field query_value; + RETURN_IF_ERROR(inverted_index_query_param::convert_to_storage_value( + Type, field_value, name_with_type.second, &query_value)); InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = Field::create_field(_value); + param.query_value = query_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/predicate/in_list_predicate.h b/be/src/storage/predicate/in_list_predicate.h index 6c92290e5be009..4687fd9589a605 100644 --- a/be/src/storage/predicate/in_list_predicate.h +++ b/be/src/storage/predicate/in_list_predicate.h @@ -24,6 +24,7 @@ #include "common/exception.h" #include "core/column/column_dictionary.h" #include "core/data_type/data_type.h" +#include "core/data_type/data_type_nullable.h" #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" #include "core/decimal12.h" @@ -171,11 +172,20 @@ class InListPredicateBase final : public ColumnPredicate { const T* value = (const T*)(iter->get_value()); field_value = Field::create_field(*value); } + Field query_value; + auto convert_status = inverted_index_query_param::convert_to_storage_value( + Type, field_value, name_with_type.second, &query_value, + PT == PredicateType::IN_LIST); + if (convert_status.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED) { + iter->next(); + continue; + } + RETURN_IF_ERROR(convert_status); InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = field_value; + param.query_value = query_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/test/storage/segment/inverted_index_reader_test.cpp b/be/test/storage/segment/inverted_index_reader_test.cpp index 2305833a402778..a49ae7e7a9717a 100644 --- a/be/test/storage/segment/inverted_index_reader_test.cpp +++ b/be/test/storage/segment/inverted_index_reader_test.cpp @@ -29,6 +29,7 @@ #include #include +#include "core/data_type/data_type_factory.hpp" #include "core/field.h" #include "core/value/vdatetime_value.h" #include "runtime/runtime_state.h" @@ -4131,6 +4132,72 @@ TEST_F(InvertedIndexReaderTest, BkdRangeIPv6RangeQuery) { test_bkd_range_ipv6(); } +TEST(InvertedIndexQueryParamTest, ConvertToStorageValue) { + auto make_type = [](PrimitiveType type) { + return DataTypeFactory::instance().create_data_type(type, false); + }; + auto make_type_with_scale = [](PrimitiveType type, int precision, int scale) { + return DataTypeFactory::instance().create_data_type(type, false, precision, scale); + }; + auto expect_skipped_with_type = [](const DataTypePtr& query_type, const Field& query_value, + const DataTypePtr& storage_type, + bool allow_int_cross_width = false) { + Field storage_value; + auto status = inverted_index_query_param::convert_to_storage_value( + query_type, query_value, storage_type, &storage_value, allow_int_cross_width); + ASSERT_FALSE(status.ok()); + EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED); + }; + auto expect_ok_with_type = [](const DataTypePtr& query_type, const Field& query_value, + const DataTypePtr& storage_type, const Field& expected_value, + bool allow_int_cross_width = true) { + Field storage_value; + auto status = inverted_index_query_param::convert_to_storage_value( + query_type, query_value, storage_type, &storage_value, allow_int_cross_width); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(storage_value, expected_value); + }; + + auto storage_type_bigint = make_type(TYPE_BIGINT); + auto storage_type_tinyint = make_type(TYPE_TINYINT); + auto storage_type_float = make_type(TYPE_FLOAT); + auto storage_type_int = make_type(TYPE_INT); + auto storage_type_string = make_type(TYPE_STRING); + + // Positive IN/equality call-sites can normalize integer query literals to the segment + // storage type when the value round-trips exactly. + expect_ok_with_type(make_type(TYPE_TINYINT), Field::create_field(13), + storage_type_bigint, Field::create_field(13)); + expect_ok_with_type(make_type(TYPE_INT), Field::create_field(13), + storage_type_tinyint, Field::create_field(13)); + + // The default path remains conservative, and out-of-range values are skipped even for + // positive IN/equality call-sites. + expect_skipped_with_type(make_type(TYPE_TINYINT), Field::create_field(13), + storage_type_bigint); + expect_skipped_with_type(make_type(TYPE_INT), Field::create_field(128), + storage_type_tinyint, true); + expect_skipped_with_type(make_type(TYPE_BIGINT), Field::create_field(1000), + storage_type_tinyint); + + // The same round-trip check also gates non-integer cross-primitive conversions. + expect_ok_with_type(make_type(TYPE_DOUBLE), Field::create_field(0.5), + storage_type_float, Field::create_field(0.5F), false); + expect_skipped_with_type(make_type(TYPE_DOUBLE), Field::create_field(0.1), + storage_type_float); + + expect_ok_with_type(make_type_with_scale(TYPE_DECIMAL128I, 38, 0), + Field::create_field(Decimal128V3(13)), storage_type_int, + Field::create_field(13), false); + expect_ok_with_type(make_type(TYPE_VARCHAR), + Field::create_field(std::string("abc")), storage_type_string, + Field::create_field(std::string("abc")), false); + + // BOOLEAN casts are not treated as integer cross-width conversions. + expect_skipped_with_type(make_type(TYPE_BOOLEAN), Field::create_field(true), + storage_type_int, true); +} + // Verifies that KeyCoder produces byte-identical // output regardless of whether the input pointer is to int64_t or uint64_t. // This is what makes TypedInvertedIndexQueryParam::storage_val diff --git a/regression-test/suites/inverted_index_p0/test_variant_inverted_index_cast.groovy b/regression-test/suites/inverted_index_p0/test_variant_inverted_index_cast.groovy new file mode 100644 index 00000000000000..1ac74e37396420 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_variant_inverted_index_cast.groovy @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.util.regex.Pattern +import org.apache.doris.regression.action.ProfileAction + +suite("test_variant_inverted_index_cast", "nonConcurrent") { + def fetchProfileText = { sqlText -> + def profileAction = new ProfileAction(context) + def profiles = profileAction.getProfileList() + assertTrue(profiles.size() > 0) + + def profileId = null + for (def profile in profiles) { + if (profile["Sql Statement"].contains(sqlText)) { + profileId = profile["Profile ID"] + break + } + } + assertTrue(profileId != null) + return profileAction.getProfile(profileId) + } + + def maxCounter = { profileText, counterName -> + def matcher = Pattern.compile( + "${counterName}(?:_[^:\\s]+)?:\\s*(?:sum\\s+)?(\\d+)\\b").matcher(profileText) + int maxVal = 0 + while (matcher.find()) { + int val = matcher.group(1).toInteger() + if (val > maxVal) { + maxVal = val + } + } + return maxVal + } + + def assertIndexFilterUsed = { profileText -> + assertTrue(maxCounter(profileText, "RowsInvertedIndexFiltered") > 0) + } + + def assertScanRows = { profileText, expectedScanRows -> + assertTrue(Pattern.compile("ScanRows:\\s*(?:sum\\s+)?${expectedScanRows}\\b").matcher(profileText).find()) + } + + def assertIndexFilterHit = { profileText -> + assertIndexFilterUsed(profileText) + assertScanRows(profileText, 1) + } + + def assertRows = { expected, actual -> + assertEquals(expected.size(), actual.size()) + for (int i = 0; i < expected.size(); i++) { + assertEquals(expected[i].size(), actual[i].size()) + for (int j = 0; j < expected[i].size(); j++) { + assertEquals(expected[i][j] as long, actual[i][j] as long) + } + } + } + + sql """ set enable_profile = true """ + sql """ set profile_level = 2 """ + sql """ set enable_common_expr_pushdown = true """ + sql """ set enable_common_expr_pushdown_for_inverted_index = true """ + sql """ set inverted_index_skip_threshold = 0 """ + + sql """ DROP TABLE IF EXISTS test_variant_inverted_index_cast_auto_storage """ + sql """ + CREATE TABLE test_variant_inverted_index_cast_auto_storage ( + row_id BIGINT, + v VARIANT COMMENT 'auto inferred variant', + INDEX idx_v(v) USING INVERTED COMMENT '' + ) + ENGINE=OLAP + DUPLICATE KEY(row_id) + DISTRIBUTED BY HASH(row_id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "inverted_index_storage_format" = "v2" + ) + """ + + sql """ + INSERT INTO test_variant_inverted_index_cast_auto_storage + SELECT number, CONCAT( + '{"int_key": ', number, ', "bigint_key": ', 1300000000000 + number, '}') + FROM numbers("number" = "20") + """ + sql """ + INSERT INTO test_variant_inverted_index_cast_auto_storage VALUES + (269, '{"int_key": 269}'), + (-243, '{"int_key": -243}'), + (128, '{"int_key": 128}'), + (-129, '{"int_key": -129}') + """ + + sql """ clean all profile """ + def autoBigintToIntSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as int) = 13""" + def autoBigintToIntResult = sql """ ${autoBigintToIntSql} """ + assertRows([[13]], autoBigintToIntResult) + assertIndexFilterHit(fetchProfileText(autoBigintToIntSql)) + + sql """ clean all profile """ + def autoBigintToTinyintSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as tinyint) = 13 +ORDER BY row_id""" + def autoBigintToTinyintResult = sql """ ${autoBigintToTinyintSql} """ + assertRows([[13]], autoBigintToTinyintResult) + assertIndexFilterHit(fetchProfileText(autoBigintToTinyintSql)) + + sql """ clean all profile """ + def autoBigintToTinyintInSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as tinyint) IN (13) +ORDER BY row_id""" + def autoBigintToTinyintInResult = sql """ ${autoBigintToTinyintInSql} """ + assertRows([[13]], autoBigintToTinyintInResult) + assertIndexFilterHit(fetchProfileText(autoBigintToTinyintInSql)) + + def tinyintNotEqExpectedRows = [ + [0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], + [14], [15], [16], [17], [18], [19] + ] + + sql """ clean all profile """ + def autoBigintToTinyintNotEqSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as tinyint) != 13 +ORDER BY row_id""" + def autoBigintToTinyintNotEqResult = sql """ ${autoBigintToTinyintNotEqSql} """ + assertRows(tinyintNotEqExpectedRows, autoBigintToTinyintNotEqResult) + + sql """ clean all profile """ + def autoBigintToTinyintNotInSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as tinyint) NOT IN (13) +ORDER BY row_id""" + def autoBigintToTinyintNotInResult = sql """ ${autoBigintToTinyintNotInSql} """ + assertRows(tinyintNotEqExpectedRows, autoBigintToTinyintNotInResult) + + sql """ clean all profile """ + def autoBigintToTinyintPositiveOverflowSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as tinyint) = 128 +ORDER BY row_id""" + def autoBigintToTinyintPositiveOverflowResult = + sql """ ${autoBigintToTinyintPositiveOverflowSql} """ + assertRows([], autoBigintToTinyintPositiveOverflowResult) + + sql """ clean all profile """ + def autoBigintToTinyintNegativeOverflowSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as tinyint) = -129 +ORDER BY row_id""" + def autoBigintToTinyintNegativeOverflowResult = + sql """ ${autoBigintToTinyintNegativeOverflowSql} """ + assertRows([], autoBigintToTinyintNegativeOverflowResult) + + sql """ clean all profile """ + def autoBigintToSmallintSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as smallint) = 13""" + def autoBigintToSmallintResult = sql """ ${autoBigintToSmallintSql} """ + assertRows([[13]], autoBigintToSmallintResult) + assertIndexFilterHit(fetchProfileText(autoBigintToSmallintSql)) + + sql """ clean all profile """ + def autoBigintToBigintSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as bigint) = 13""" + def autoBigintToBigintResult = sql """ ${autoBigintToBigintSql} """ + assertRows([[13]], autoBigintToBigintResult) + assertIndexFilterHit(fetchProfileText(autoBigintToBigintSql)) + + sql """ clean all profile """ + // BIGINT -> BIGINT cast pushdown was already supported on old master. Keep this + // case as a regression guard so the existing same-type large-value index path + // is not broken while fixing cross-width cast pushdown. + def autoBigintLargeValueSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["bigint_key"] as bigint) = 1300000000013""" + def autoBigintLargeValueResult = sql """ ${autoBigintLargeValueSql} """ + assertRows([[13]], autoBigintLargeValueResult) + assertIndexFilterUsed(fetchProfileText(autoBigintLargeValueSql)) + + sql """ clean all profile """ + def autoBigintToLargeintSql = """SELECT row_id +FROM test_variant_inverted_index_cast_auto_storage +WHERE cast(v["int_key"] as largeint) = 13""" + def autoBigintToLargeintResult = sql """ ${autoBigintToLargeintSql} """ + assertRows([[13]], autoBigintToLargeintResult) + assertIndexFilterHit(fetchProfileText(autoBigintToLargeintSql)) + + sql """ DROP TABLE IF EXISTS test_variant_inverted_index_cast_bool_storage """ + sql """ + CREATE TABLE test_variant_inverted_index_cast_bool_storage ( + row_id BIGINT, + v VARIANT COMMENT 'auto inferred variant', + INDEX idx_v(v) USING INVERTED COMMENT '' + ) + ENGINE=OLAP + DUPLICATE KEY(row_id) + DISTRIBUTED BY HASH(row_id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "inverted_index_storage_format" = "v2" + ) + """ + + sql """ + INSERT INTO test_variant_inverted_index_cast_bool_storage VALUES + (1, '{"bool_key": true}'), + (2, '{"bool_key": false}') + """ + + sql """ clean all profile """ + def boolCastSql = """SELECT row_id +FROM test_variant_inverted_index_cast_bool_storage +WHERE cast(v["bool_key"] as boolean)""" + def boolCastResult = sql """ ${boolCastSql} """ + assertRows([[1]], boolCastResult) + assertIndexFilterHit(fetchProfileText(boolCastSql)) + + sql """ set enable_profile = false """ +}