Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions vortex-array/src/stats/stats_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,28 @@ impl MutTypedStatsSetRef<'_, '_> {
) {
(Some(m1), Some(m2)) => {
// If the combine sum is exact, then we can sum them.
if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
s1.as_primitive()
.checked_add(&s2.as_primitive())
.and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive))
}) {
if let Some(scalar_value) =
m1.zip(m2).as_exact().and_then(|(s1, s2)| match s1.dtype() {
DType::Primitive(..) => s1
.as_primitive()
.checked_add(&s2.as_primitive())
.and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive)),
DType::Decimal(..) => s1
.as_decimal()
.checked_binary_numeric(
&s2.as_decimal(),
crate::scalar::NumericOperator::Add,
)
.map(|scalar| {
ScalarValue::Decimal(
scalar
.decimal_value()
.vortex_expect("no decimal value in scalar"),
)
}),
_ => None,
})
{
self.set(Stat::Sum, Precision::Exact(scalar_value));
}
}
Expand Down
15 changes: 14 additions & 1 deletion vortex-duckdb/cpp/include/duckdb_vx/table_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,15 @@ typedef struct {
bool has_max_cardinality;
} duckdb_vx_node_statistics;

typedef struct {
duckdb_value min;
duckdb_value max;
// upper bit: "length is set". lower 32 bits: DuckDB's max string length.
uint64_t max_string_length;
} duckdb_column_statistics;

typedef idx_t column_t;

// A transparent DuckDB table function vtable, which can be used to configure a table function.
// See duckdb/include/function/tfunc.hpp for details on each field.
typedef struct {
Expand Down Expand Up @@ -137,7 +146,11 @@ typedef struct {

// void *in_out_function;
// void *in_out_function_final;
void *statistics;

void (*statistics)(duckdb_client_context context,
const void *bind_data,
size_t column_index,
duckdb_column_statistics *stats_out);

// void *dependency;
void (*cardinality)(void *bind_data, duckdb_vx_node_statistics *node_stats_out);
Expand Down
88 changes: 84 additions & 4 deletions vortex-duckdb/cpp/table_function.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "duckdb_vx/table_function.h"
#include "duckdb_vx/duckdb_diagnostics.h"

DUCKDB_INCLUDES_BEGIN
Expand Down Expand Up @@ -30,8 +31,10 @@ struct CTableFunctionInfo final : TableFunctionInfo {
};

struct CTableBindData final : TableFunctionData {
CTableBindData(unique_ptr<CTableFunctionInfo> info_p, unique_ptr<vortex::CData> ffi_data_p)
: info(std::move(info_p)), ffi_data(std::move(ffi_data_p)) {
CTableBindData(unique_ptr<CTableFunctionInfo> info_p,
unique_ptr<vortex::CData> ffi_data_p,
const vector<LogicalType> &types)
: info(std::move(info_p)), ffi_data(std::move(ffi_data_p)), types(types) {
}

unique_ptr<FunctionData> Copy() const override {
Expand All @@ -43,11 +46,13 @@ struct CTableBindData final : TableFunctionData {
throw BinderException(IntoErrString(error_out));
}
return make_uniq<CTableBindData>(make_uniq<CTableFunctionInfo>(info->vtab),
unique_ptr<CData>(reinterpret_cast<CData *>(copied_ffi_data)));
unique_ptr<CData>(reinterpret_cast<CData *>(copied_ffi_data)),
types);
}

unique_ptr<CTableFunctionInfo> info;
unique_ptr<CData> ffi_data;
vector<LogicalType> types;
};

struct CTableGlobalData final : GlobalTableFunctionState {
Expand Down Expand Up @@ -88,6 +93,79 @@ double c_table_scan_progress(ClientContext &context,
return bind.info->vtab.table_scan_progress(c_ctx, c_bind_data, c_global_state);
}

static Value &UnwrapValue(duckdb_value value) {
return *(reinterpret_cast<Value *>(value));
}

unique_ptr<BaseStatistics> numeric_stats(duckdb_column_statistics &stats, LogicalType type) {
BaseStatistics out = StringStats::CreateUnknown(type);
if (stats.min) {
NumericStats::SetMin(out, UnwrapValue(stats.min));
duckdb_destroy_value(&stats.min);
}
if (stats.max) {
NumericStats::SetMax(out, UnwrapValue(stats.max));
duckdb_destroy_value(&stats.max);
}
return out.ToUnique();
}

unique_ptr<BaseStatistics> string_stats(duckdb_column_statistics &stats, LogicalType type) {
BaseStatistics out = StringStats::CreateUnknown(type);
if (stats.min) {
StringStats::SetMin(out, StringValue::Get(UnwrapValue(stats.min)));
duckdb_destroy_value(&stats.min);
}
if (stats.max) {
StringStats::SetMax(out, StringValue::Get(UnwrapValue(stats.max)));
duckdb_destroy_value(&stats.max);
}
if (stats.max_string_length >> 63) {
StringStats::SetMaxStringLength(out, uint32_t(stats.max_string_length));
}
return out.ToUnique();
}

unique_ptr<BaseStatistics>
c_statistics(ClientContext &context, const FunctionData *bind_data, column_t column_index) {
if (column_index == COLUMN_IDENTIFIER_EMPTY) {
return BaseStatistics::CreateUnknown(LogicalTypeId::INVALID).ToUnique();
}

const auto &bind = bind_data->Cast<CTableBindData>();
void *const ffi_bind = bind.ffi_data->DataPtr();

duckdb_client_context c_ctx = reinterpret_cast<duckdb_client_context>(&context);
duckdb_column_statistics statistics = {};
const LogicalType type = bind.types[column_index];

switch (type.id()) {
case LogicalTypeId::BOOLEAN:
case LogicalTypeId::TINYINT:
case LogicalTypeId::SMALLINT:
case LogicalTypeId::INTEGER:
case LogicalTypeId::BIGINT:
case LogicalTypeId::FLOAT:
case LogicalTypeId::DOUBLE:
case LogicalTypeId::UTINYINT:
case LogicalTypeId::USMALLINT:
case LogicalTypeId::UINTEGER:
case LogicalTypeId::UBIGINT:
case LogicalTypeId::UHUGEINT:
case LogicalTypeId::HUGEINT: {
bind.info->vtab.statistics(c_ctx, ffi_bind, column_index, &statistics);
return numeric_stats(statistics, type);
}
case LogicalTypeId::VARCHAR:
case LogicalTypeId::BLOB: {
bind.info->vtab.statistics(c_ctx, ffi_bind, column_index, &statistics);
return string_stats(statistics, type);
}
default:
return BaseStatistics::CreateUnknown(type).ToUnique();
}
}

unique_ptr<FunctionData> c_bind(ClientContext &context,
TableFunctionBindInput &input,
vector<LogicalType> &return_types,
Expand All @@ -111,7 +189,8 @@ unique_ptr<FunctionData> c_bind(ClientContext &context,
}

return make_uniq<CTableBindData>(make_uniq<CTableFunctionInfo>(info.vtab),
unique_ptr<CData>(reinterpret_cast<CData *>(ffi_bind_data)));
unique_ptr<CData>(reinterpret_cast<CData *>(ffi_bind_data)),
return_types);
}

unique_ptr<GlobalTableFunctionState> c_init_global(ClientContext &context, TableFunctionInitInput &input) {
Expand Down Expand Up @@ -363,6 +442,7 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
tf.get_virtual_columns = c_get_virtual_columns;
tf.to_string = c_to_string;
tf.table_scan_progress = c_table_scan_progress;
tf.statistics = c_statistics;

// Set up the parameters
tf.arguments.reserve(vtab->parameter_count);
Expand Down
Loading
Loading