Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion be/src/core/data_type/convert_field_to_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,43 @@

namespace doris {

namespace {

int int_byte_size(PrimitiveType type) {
switch (type) {
case TYPE_TINYINT:
return 1;
case TYPE_SMALLINT:
return 2;
case TYPE_INT:
return 4;
case TYPE_BIGINT:
return 8;
case TYPE_LARGEINT:
return 16;
default:
throw Exception(ErrorCode::INTERNAL_ERROR, "Unexpected non-integer type {}",
type_to_string(type));
}
}

bool is_lossless_int_widen(PrimitiveType storage_type, PrimitiveType query_type) {
if (!is_int(storage_type) || !is_int(query_type)) {
return false;
}
return int_byte_size(storage_type) < int_byte_size(query_type);
}

bool is_lossless_float_widen(PrimitiveType storage_type, PrimitiveType query_type) {
return storage_type == TYPE_FLOAT && query_type == TYPE_DOUBLE;
}

bool is_int_to_decimal_cast(PrimitiveType storage_type, PrimitiveType query_type) {
return is_int(storage_type) && is_decimal(query_type);
}

} // namespace

template <typename F> /// Field template parameter may be const or non-const Field.
void dispatch(F&& f, const Field& field) {
switch (field.get_type()) {
Expand Down Expand Up @@ -828,4 +865,11 @@ void convert_field_to_type(const Field& from_value, const IDataType& to_type, Fi
return convert_field_to_typeImpl(from_value, to_type, from_type_hint, to);
}
}
} // namespace doris

bool is_cast_compatible_for_field_conversion(PrimitiveType storage_type, PrimitiveType query_type) {
return is_lossless_int_widen(storage_type, query_type) ||
is_lossless_float_widen(storage_type, query_type) ||
is_int_to_decimal_cast(storage_type, query_type) ||
(is_string_type(storage_type) && is_string_type(query_type));
}
} // namespace doris
6 changes: 6 additions & 0 deletions be/src/core/data_type/convert_field_to_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#pragma once
#include "common/status.h"
#include "core/data_type/define_primitive_type.h"
#include "core/field.h"

namespace doris {
Expand All @@ -36,4 +37,9 @@ class IDataType;
void convert_field_to_type(const Field& from_value, const IDataType& to_type, Field* field,
const IDataType* from_type_hint = nullptr);

// Return whether this storage/query primitive-type pair is eligible for cross-type
// Field conversion in index-pushdown paths. Exact safety is still enforced by the
// caller's value-level convert + round-trip checks.
bool is_cast_compatible_for_field_conversion(PrimitiveType storage_type, PrimitiveType query_type);

} // namespace doris
19 changes: 9 additions & 10 deletions be/src/exprs/function/functions_comparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "exprs/function/function_helpers.h"
#include "exprs/function/functions_logical.h"
#include "storage/index/index_reader_helper.h"
#include "storage/index/inverted/inverted_index_reader.h"

namespace doris {

Expand Down Expand Up @@ -487,24 +488,22 @@ class FunctionComparison : public IFunction {
if (param_value.is_null()) {
return Status::OK();
}
Field query_value;
const bool allow_int_cross_width = name_view == NameEquals::name;
RETURN_IF_ERROR(segment_v2::inverted_index_query_param::convert_to_storage_value(
arguments[0].type, param_value, data_type_with_name.second, &query_value,
allow_int_cross_width));
segment_v2::InvertedIndexParam param;
param.column_name = data_type_with_name.first;
param.column_type = data_type_with_name.second;
param.query_value = param_value;
param.query_value = query_value;
param.query_type = query_type;
param.num_rows = num_rows;
param.roaring = std::make_shared<roaring::Roaring>();
param.analyzer_ctx = analyzer_ctx;
RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
segment_v2::InvertedIndexResultBitmap result(param.roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
RETURN_IF_ERROR(segment_v2::inverted_index_query_param::build_result_bitmap(
iter, param.roaring, &bitmap_result));

if (name_view == NameNotEquals::name) {
roaring::Roaring full_result;
Expand Down
24 changes: 15 additions & 9 deletions be/src/exprs/function/in.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include "exprs/function_context.h"
#include "exprs/hybrid_set.h"
#include "storage/index/index_reader_helper.h"
#include "storage/index/inverted/inverted_index_reader.h"

namespace doris {

Expand Down Expand Up @@ -154,11 +155,6 @@ class FunctionIn : public IFunction {
//NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
return Status::OK();
}
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
for (const auto& arg : arguments) {
Field param_value;
arg.column->get(0, param_value);
Expand All @@ -167,24 +163,34 @@ class FunctionIn : public IFunction {
if (negative) {
return Status::OK();
}
RETURN_IF_ERROR(segment_v2::inverted_index_query_param::read_null_bitmap(
iter, &null_bitmap));
*roaring |= *null_bitmap;
continue;
}
Field query_value;
auto convert_status = segment_v2::inverted_index_query_param::convert_to_storage_value(
arg.type, param_value, data_type_with_name.second, &query_value, !negative);
if (convert_status.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED) {
// The literal cannot map to any storage value that would round-trip to it, so it
// contributes no hits to the positive IN union.
continue;
}
RETURN_IF_ERROR(convert_status);
InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
segment_v2::InvertedIndexParam param;
param.column_name = data_type_with_name.first;
param.column_type = data_type_with_name.second;
param.query_value = param_value;
param.query_value = query_value;
param.query_type = query_type;
param.num_rows = num_rows;
param.roaring = std::make_shared<roaring::Roaring>();
param.analyzer_ctx = analyzer_ctx;
RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
*roaring |= *param.roaring;
}
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
RETURN_IF_ERROR(segment_v2::inverted_index_query_param::build_result_bitmap(
iter, roaring, &bitmap_result));
if constexpr (negative) {
roaring::Roaring full_result;
full_result.addRange(0, num_rows);
Expand Down
54 changes: 54 additions & 0 deletions be/src/exprs/vcast_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
#include "exprs/function/simple_function_factory.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"
#include "exprs/vslot_ref.h"
#include "runtime/runtime_state.h"
#include "storage/index/index_reader_helper.h"
#include "storage/index/inverted/inverted_index_iterator.h"

namespace doris {
class RowDescriptor;
Expand Down Expand Up @@ -127,6 +130,57 @@ Status VCastExpr::execute_column_impl(VExprContext* context, const Block* block,
return Status::OK();
}

Status VCastExpr::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
auto target_type = remove_nullable(get_target_type());
if (target_type->get_primitive_type() != TYPE_BOOLEAN) {
return Status::OK();
}
DCHECK_EQ(get_num_children(), 1);
if (!get_child(0)->is_slot_ref()) {
return Status::OK();
}

auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
auto column_id = column_slot_ref->column_id();
auto index_context = context->get_index_context();
auto* iter = index_context->get_inverted_index_iterator_by_column_id(column_id);
if (iter == nullptr) {
return Status::OK();
}
if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
return Status::OK();
}

const auto* storage_name_type =
index_context->get_storage_name_and_type_by_column_id(column_id);
if (storage_name_type == nullptr) {
return Status::OK();
}

Field query_value;
RETURN_IF_ERROR(segment_v2::inverted_index_query_param::convert_to_storage_value(
get_target_type(), Field::create_field<TYPE_BOOLEAN>(1), storage_name_type->second,
&query_value));

segment_v2::InvertedIndexParam param;
param.column_name = storage_name_type->first;
param.column_type = storage_name_type->second;
param.query_value = query_value;
param.query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY;
param.num_rows = segment_num_rows;
param.roaring = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));

segment_v2::InvertedIndexResultBitmap result;
RETURN_IF_ERROR(segment_v2::inverted_index_query_param::build_result_bitmap(iter, param.roaring,
&result));
if (!result.is_empty()) {
index_context->set_index_result_for_expr(this, result);
index_context->set_true_for_index_status(this, column_id);
}
return Status::OK();
}

bool cast_error_code(Status& st) {
//There may be more error codes that need to be captured by try cast in the future.
if (st.is<ErrorCode::INVALID_ARGUMENT>()) {
Expand Down
1 change: 1 addition & 0 deletions be/src/exprs/vcast_expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class VCastExpr : public VExpr {
~VCastExpr() override = default;
Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
size_t count, ColumnPtr& result_column) const override;
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
Status open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) override;
Expand Down
17 changes: 14 additions & 3 deletions be/src/exprs/vexpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "common/status.h"
#include "core/column/column_nothing.h"
#include "core/column/column_vector.h"
#include "core/data_type/convert_field_to_type.h"
#include "core/data_type/data_type_array.h"
#include "core/data_type/data_type_decimal.h"
#include "core/data_type/data_type_factory.hpp"
Expand Down Expand Up @@ -885,10 +886,20 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase
continue;
}
}
const bool string_to_string = is_string_type(origin_primitive_type) &&
is_string_type(target_primitive_type);
const auto& function_name = function->get_name();
// This only decides whether peeling the cast is worth trying. The converted
// literal still has to pass convert_to_storage_value() and its round-trip check.
const bool int_cross_width_for_equal_or_in =
(function_name == "eq" || function_name == "in") &&
is_int(origin_primitive_type) && is_int(target_primitive_type);
if (origin_primitive_type != TYPE_VARIANT &&
(storage_type->equals(*target_type) ||
(is_string_type(target_primitive_type) &&
is_string_type(origin_primitive_type)))) {
(storage_type->equals(*target_type) || string_to_string ||
(!is_complex_type(storage_type->get_primitive_type()) &&
(is_cast_compatible_for_field_conversion(origin_primitive_type,
target_primitive_type) ||
int_cross_width_for_equal_or_in)))) {
children_exprs.emplace_back(expr_without_cast(child));
}
} else {
Expand Down
Loading
Loading