diff --git a/rust/perspective-js/test/arrow/bad_row_count.arrow b/rust/perspective-js/test/arrow/bad_row_count.arrow new file mode 100644 index 0000000000..6993013fce Binary files /dev/null and b/rust/perspective-js/test/arrow/bad_row_count.arrow differ diff --git a/rust/perspective-js/test/js/aggregates.spec.js b/rust/perspective-js/test/js/aggregates.spec.js index 69dde8740d..67390c63b1 100644 --- a/rust/perspective-js/test/js/aggregates.spec.js +++ b/rust/perspective-js/test/js/aggregates.spec.js @@ -164,6 +164,52 @@ const std = (nums) => { table.delete(); }); + test("`first` aggregate with a hidden sort column does not crash", async function () { + const table = await perspective.table([ + { g: "x", v: 1 }, + { g: "x", v: 2 }, + { g: "y", v: 3 }, + ]); + const view = await table.view({ + group_by: ["g"], + columns: ["g"], + aggregates: { v: "first" }, + sort: [["v", "desc"]], + }); + + const result = await view.to_json(); + const paths = result.map((r) => r.__ROW_PATH__); + expect(paths.length).toEqual(3); + expect(paths).toContainEqual([]); + expect(paths).toContainEqual(["x"]); + expect(paths).toContainEqual(["y"]); + await view.delete(); + await table.delete(); + }); + + test("`max by` with a non-visible by-column", async function () { + const table = await perspective.table([ + { g: "x", v: 1, w: 10 }, + { g: "x", v: 2, w: 20 }, + { g: "y", v: 3, w: 5 }, + ]); + const view = await table.view({ + group_by: ["g"], + columns: ["v"], + aggregates: { v: ["max by", ["w"]] }, + }); + + const result = await view.to_json(); + // `max by` picks the `v` at the row with the maximum `w`. + expect(result).toEqual([ + { __ROW_PATH__: [], v: 2 }, + { __ROW_PATH__: ["x"], v: 2 }, + { __ROW_PATH__: ["y"], v: 3 }, + ]); + await view.delete(); + await table.delete(); + }); + test("Aggregates are not in columns are ignored", async function () { const table = await perspective.table(data); const view = await table.view({ diff --git a/rust/perspective-js/test/js/constructors.spec.js b/rust/perspective-js/test/js/constructors.spec.js index 7af0fe54d6..5dec9b300e 100644 --- a/rust/perspective-js/test/js/constructors.spec.js +++ b/rust/perspective-js/test/js/constructors.spec.js @@ -732,6 +732,78 @@ function validate_typed_array(typed_array, column_data) { table.delete(); } }); + + test("Table constructor pads short trailing columns with null", async function () { + const table = await perspective.table({ + overflow: [1, 2, 3, 4, 5, 6, 7, 8, 9], + short: [1], + }); + const view = await table.view(); + const result = await view.to_columns(); + expect(result).toEqual({ + overflow: [1, 2, 3, 4, 5, 6, 7, 8, 9], + short: [1, null, null, null, null, null, null, null, null], + }); + await view.delete(); + await table.delete(); + }); + + test("Table constructor pads short leading columns with null", async function () { + const table = await perspective.table({ + short: [1], + overflow: [1, 2, 3, 4, 5, 6, 7, 8, 9], + }); + const view = await table.view(); + const result = await view.to_columns(); + expect(result).toEqual({ + short: [1, null, null, null, null, null, null, null, null], + overflow: [1, 2, 3, 4, 5, 6, 7, 8, 9], + }); + await view.delete(); + await table.delete(); + }); + + test("Table constructor handles columns of equal length", async function () { + const table = await perspective.table({ + a: [1, 2, 3], + b: ["x", "y", "z"], + }); + const view = await table.view(); + const result = await view.to_columns(); + expect(result).toEqual({ + a: [1, 2, 3], + b: ["x", "y", "z"], + }); + await view.delete(); + await table.delete(); + }); + + test("Table update pads short columns with null", async function () { + const table = await perspective.table({ + a: "integer", + b: "integer", + }); + await table.update({ + a: [1, 2, 3, 4, 5], + b: [1], + }); + const view = await table.view(); + const result = await view.to_columns(); + expect(result).toEqual({ + a: [1, 2, 3, 4, 5], + b: [1, null, null, null, null], + }); + + // The table must remain usable for subsequent updates. + await table.update({ a: [10], b: [20] }); + const result2 = await view.to_columns(); + expect(result2).toEqual({ + a: [1, 2, 3, 4, 5, 10], + b: [1, null, null, null, null, 20], + }); + await view.delete(); + await table.delete(); + }); }); test.describe("Formatters", function () { @@ -918,6 +990,20 @@ function validate_typed_array(typed_array, column_data) { view.delete(); table.delete(); }); + + test("Handles many objects without newline separators", async function () { + const expected = []; + for (let i = 0; i < 64; i++) { + expected.push({ a: `row-${i}` }); + } + const data = expected.map(JSON.stringify).join(""); + const table = await perspective.table(data, { format: "ndjson" }); + const view = await table.view(); + expect(await table.size()).toEqual(64); + expect(await view.to_json()).toEqual(expected); + await view.delete(); + await table.delete(); + }); }); test.describe("Constructors", function () { diff --git a/rust/perspective-js/test/js/constructors/arrow.spec.ts b/rust/perspective-js/test/js/constructors/arrow.spec.ts index fe4f0b2165..711666bc98 100644 --- a/rust/perspective-js/test/js/constructors/arrow.spec.ts +++ b/rust/perspective-js/test/js/constructors/arrow.spec.ts @@ -162,5 +162,30 @@ test.describe("Arrow", function () { __ROW_PATH__: [[], [null], [""], ["AAAA"]], }); }); + + test("Loads a time32 (millisecond) column without overflow", async function () { + const expected = Array.from({ length: 64 }, (_, i) => i); + const tableData = arrow.tableFromArrays({ + t: arrow.vectorFromArray(expected, new arrow.TimeMillisecond()), + }); + + const table = await perspective.table(arrow.tableToIPC(tableData)); + const view = await table.view(); + expect(await table.size()).toEqual(64); + expect(await view.to_columns()).toEqual({ t: expected }); + await view.delete(); + await table.delete(); + }); + }); + + test.describe("Malformed input", function () { + test("Rejects an Arrow with a row count that exceeds 32 bits", async function () { + const bytes = new Uint8Array( + fs.readFileSync(`${__dirname}/../../arrow/bad_row_count.arrow`), + ); + await expect(perspective.table(bytes)).rejects.toThrow( + /row count exceeds maximum supported size/, + ); + }); }); }); diff --git a/rust/perspective-js/test/js/expressions/vectors.spec.js b/rust/perspective-js/test/js/expressions/vectors.spec.js index 2ab8f3c890..3a5094825b 100644 --- a/rust/perspective-js/test/js/expressions/vectors.spec.js +++ b/rust/perspective-js/test/js/expressions/vectors.spec.js @@ -117,5 +117,38 @@ import * as common from "./common.js"; await view.delete(); await table.delete(); }); + + test("`norm3` with a vector shorter than 3 is rejected", async () => { + const table = await perspective.table(common.int_float_data); + await expect( + table.view({ + expressions: { a: `var v[2] := {1, 2}; norm3(v)` }, + }), + ).rejects.toThrow(); + await table.delete(); + }); + + test("`diff3` with vectors shorter than 3 is rejected", async () => { + const table = await perspective.table(common.int_float_data); + await expect( + table.view({ + expressions: { + a: `var x[2] := {1, 2}; var y[2] := {3, 4}; var o[2]; diff3(x, y, o)`, + }, + }), + ).rejects.toThrow(); + await table.delete(); + }); + + test("`norm3` with a 3-element vector still computes", async () => { + const table = await perspective.table(common.int_float_data); + const view = await table.view({ + expressions: { a: `var v[3] := {3, 4, 0}; norm3(v)` }, + }); + const result = await view.to_columns(); + expect(result["a"]).toEqual(Array(4).fill(5)); // sqrt(9+16+0) + await view.delete(); + await table.delete(); + }); }); })(perspective); diff --git a/rust/perspective-server/cpp/perspective/CMakeLists.txt b/rust/perspective-server/cpp/perspective/CMakeLists.txt index e2dc3f3b16..dd7e666f74 100644 --- a/rust/perspective-server/cpp/perspective/CMakeLists.txt +++ b/rust/perspective-server/cpp/perspective/CMakeLists.txt @@ -186,6 +186,9 @@ string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWER) if(CMAKE_BUILD_TYPE_LOWER STREQUAL debug) set(BUILD_MESSAGE "${BUILD_MESSAGE}\n${Red}Building DEBUG${ColorReset}") add_definitions(-DPSP_DEBUG) + add_definitions(-DPSP_STORAGE_VERIFY) + add_definitions(-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_FAST) + add_definitions(-D_GLIBCXX_ASSERTIONS) else() set(BUILD_MESSAGE "${BUILD_MESSAGE}\n${Cyan}Building RELEASE${ColorReset}") endif() diff --git a/rust/perspective-server/cpp/perspective/src/cpp/aggregate.cpp b/rust/perspective-server/cpp/perspective/src/cpp/aggregate.cpp index 51f57f73ed..3abb77ae08 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/aggregate.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/aggregate.cpp @@ -168,10 +168,9 @@ t_aggregate::init() { case AGGTYPE_COUNT: { switch (m_icolumns[0]->get_dtype()) { case DTYPE_STR: { - build_aggregate>(); + build_aggregate< + t_aggimpl_count>( + ); } break; case DTYPE_TIME: case DTYPE_INT64: { diff --git a/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp b/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp index c8201f873c..c7b55f693e 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,11 @@ ArrowLoader::initialize(const std::uint8_t* ptr, const uint32_t length) { load_stream(ptr, length, m_table); } + const auto validation = m_table->ValidateFull(); + if (!validation.ok()) { + PSP_COMPLAIN_AND_ABORT(validation.ToString()); + } + std::shared_ptr schema = m_table->schema(); std::vector> fields = schema->fields(); @@ -782,9 +788,9 @@ copy_array( case arrow::Time32Type::type_id: { auto scol = std::static_pointer_cast(src); std::memcpy( - dest->get_nth(offset), + dest->get_nth(offset), (void*)scol->raw_values(), - len * 8 + len * 4 ); } break; // case arrow::Type { @@ -972,7 +978,16 @@ ArrowLoader::fill_column( std::uint32_t ArrowLoader::row_count() const { - return m_table->num_rows(); + const std::int64_t n = m_table->num_rows(); + if (n < 0 + || static_cast(n) + > std::numeric_limits::max()) { + PSP_COMPLAIN_AND_ABORT( + "Arrow table row count exceeds maximum supported size" + ); + } + + return static_cast(n); } std::vector diff --git a/rust/perspective-server/cpp/perspective/src/cpp/column.cpp b/rust/perspective-server/cpp/perspective/src/cpp/column.cpp index c9983ce577..369058281c 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/column.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/column.cpp @@ -341,6 +341,7 @@ t_column::size() const { void t_column::set_size(t_uindex size) { + reserve(size); #ifdef PSP_COLUMN_VERIFY PSP_VERBOSE_ASSERT( size * get_dtype_size(m_dtype) <= m_data->capacity(), diff --git a/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp b/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp index a81f4593e4..4f27aa9f98 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp @@ -1885,6 +1885,11 @@ diff3::operator()(t_parameter_list parameters) { t_vector_view v2(parameters[1]); t_vector_view out(parameters[2]); + if (v1.size() < 3 || v2.size() < 3 || out.size() < 3) { + rval.m_status = STATUS_CLEAR; + return rval; + } + t_tscalar o1; o1.set(v1[0] - v2[0]); @@ -1912,6 +1917,10 @@ norm3::operator()(t_parameter_list parameters) { rval.clear(); rval.m_type = DTYPE_FLOAT64; t_vector_view v1(parameters[0]); + if (v1.size() < 3) { + rval.m_status = STATUS_CLEAR; + return rval; + } double a = v1[0].to_double(); double b = v1[1].to_double(); double c = v1[2].to_double(); @@ -1935,6 +1944,11 @@ cross_product3::operator()(t_parameter_list parameters) { t_vector_view v2(parameters[1]); t_vector_view out(parameters[2]); + if (v1.size() < 3 || v2.size() < 3 || out.size() < 3) { + rval.m_status = STATUS_CLEAR; + return rval; + } + // a2 * b3 - a3 * b2 t_tscalar o1; o1.set(v1[1] * v2[2] - v1[2] * v2[1]); @@ -1969,6 +1983,11 @@ dot_product3::operator()(t_parameter_list parameters) { t_vector_view v1(parameters[0]); t_vector_view v2(parameters[1]); + if (v1.size() < 3 || v2.size() < 3) { + rval.m_status = STATUS_CLEAR; + return rval; + } + rval.set(v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]); return rval; } diff --git a/rust/perspective-server/cpp/perspective/src/cpp/residency.cpp b/rust/perspective-server/cpp/perspective/src/cpp/residency.cpp index bc1123591b..be593c6d23 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/residency.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/residency.cpp @@ -91,15 +91,15 @@ t_residency_manager::resident_bytes() { std::size_t t_residency_manager::prepare() { + refresh_config(); + std::lock_guard lk(m_mutex); m_pending.clear(); m_pending_fnames.clear(); - refresh_config(); if (!g_residency_active) { return 0; } - std::lock_guard lk(m_mutex); g_residency_tick = ++m_tick; std::size_t resident = 0; std::vector candidates; @@ -157,36 +157,18 @@ t_residency_manager::commit() { } n = m_pending.size(); + m_pending.clear(); + m_pending_fnames.clear(); } - m_pending.clear(); - m_pending_fnames.clear(); if (n == 0) { return; } - - // // TODO: No diagnostics hooks - // // Test/diagnostic hook: dump cumulative stats so a harness can confirm - // // eviction is actually occurring. - // const char* stats_file = std::getenv("PSP_RESIDENCY_STATS_FILE"); - // if (stats_file != nullptr) { - // FILE* f = std::fopen(stats_file, "w"); - // if (f != nullptr) { - // std::fprintf( - // f, - // "evictions=%llu restores=%llu budget=%zu\n", - // static_cast(m_evictions), - // static_cast(m_restores), - // m_budget - // ); - // std::fclose(f); - // } - // } } void t_residency_manager::safepoint() { - // Native (mmap) path: no async handle setup, so both phases run inline. + std::lock_guard lk(m_safepoint_mutex); prepare(); commit(); } diff --git a/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp b/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp index d2198b9371..7b3691447d 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp @@ -1238,6 +1238,11 @@ t_stree::update_agg_table( case AGGTYPE_WEIGHTED_MEAN: { auto pkeys = get_pkeys(nidx); + if (spec.get_dependencies().size() < 2) { + dst->set_valid(dst_ridx, false); + break; + } + double nr = 0; double dr = 0; std::vector values; @@ -1618,6 +1623,11 @@ t_stree::update_agg_table( break; } + if (spec.get_dependencies().size() < 2) { + dst->set_scalar(dst_ridx, new_value); + break; + } + std::vector values; read_column_from_gstate( gstate, @@ -1673,6 +1683,11 @@ t_stree::update_agg_table( break; } + if (spec.get_dependencies().size() < 2) { + dst->set_scalar(dst_ridx, new_value); + break; + } + std::vector values; read_column_from_gstate( gstate, @@ -2665,23 +2680,20 @@ t_stree::first_last_helper( return std::make_pair(mknone(), mknone()); } + const auto& deps = spec.get_dependencies(); + if (deps.size() < 2) { + return std::make_pair(mknone(), mknone()); + } + std::vector values; std::vector sort_values; read_column_from_gstate( - gstate, - expression_master_table, - spec.get_dependencies()[0].name(), - pkeys, - values + gstate, expression_master_table, deps[0].name(), pkeys, values ); read_column_from_gstate( - gstate, - expression_master_table, - spec.get_dependencies()[1].name(), - pkeys, - sort_values + gstate, expression_master_table, deps[1].name(), pkeys, sort_values ); auto minmax_idx = get_minmax_idx(sort_values, spec.get_sort_type()); diff --git a/rust/perspective-server/cpp/perspective/src/cpp/table.cpp b/rust/perspective-server/cpp/perspective/src/cpp/table.cpp index 86b9ddcae4..241a3b0e13 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/table.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/table.cpp @@ -967,8 +967,7 @@ Table::update_cols(const std::string_view& data, std::uint32_t port_id) { PSP_COMPLAIN_AND_ABORT("Can't create table from empty columns") } - nrows = it.value.Size(); - break; + nrows = std::max(nrows, static_cast(it.value.Size())); } bool is_implicit = m_index.empty(); @@ -1069,7 +1068,7 @@ Table::from_cols( is_implicit = false; } - nrows = it.value.Size(); + nrows = std::max(nrows, static_cast(it.value.Size())); bool found = false; for (const auto& column_value : it.value.GetArray()) { auto dtype = rapidjson_type_to_dtype(column_value); @@ -1603,6 +1602,7 @@ Table::from_ndjson( // 3.) Fill table bool is_finished = false; while (!is_finished) { + data_table->extend(ii + 1); for (const auto& it : document.GetObj()) { auto col = data_table->get_column(it.name.GetString()); const auto* col_name = it.name.GetString(); diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/residency.h b/rust/perspective-server/cpp/perspective/src/include/perspective/residency.h index 4ae9616e15..696369f0b0 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/residency.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/residency.h @@ -81,13 +81,10 @@ class PERSPECTIVE_EXPORT t_residency_manager { std::uint64_t m_evictions = 0; std::uint64_t m_restores = 0; std::unordered_set m_stores; - // Victims selected by `prepare()`, evicted by `commit()`. Held between the - // two phases while the JS driver opens their OPFS handles. `m_pending_fnames` - // owns stable C-strings for `victim_fname()` (a `t_lstore::get_fname()` temp - // would dangle). std::vector m_pending; std::vector m_pending_fnames; std::mutex m_mutex; + std::mutex m_safepoint_mutex; }; } // namespace perspective diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/storage.h b/rust/perspective-server/cpp/perspective/src/include/perspective/storage.h index a4f293d790..eeec156d79 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/storage.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/storage.h @@ -94,9 +94,14 @@ typedef std::vector t_lstore_argvec; #ifdef PSP_STORAGE_VERIFY #define STORAGE_CHECK_ACCESS_GET(idx) \ - PSP_VERBOSE_ASSERT( \ - sizeof(T) * idx < (m_capacity + sizeof(T)), "Invalid access" \ - ); + if (!(sizeof(T) * (idx) < (m_capacity + sizeof(T)))) { \ + std::stringstream __psp_oob__; \ + __psp_oob__ << "Invalid get access in " << repr() << " idx=" << (idx) \ + << " elemsize=" << sizeof(T) \ + << " byte_offset=" << (sizeof(T) * (idx)) \ + << " m_size=" << m_size << " m_capacity=" << m_capacity; \ + PSP_COMPLAIN_AND_ABORT(__psp_oob__.str()); \ + } #define PSP_CHECK_CAPACITY() \ PSP_VERBOSE_ASSERT( \ m_size <= m_capacity, \ @@ -104,7 +109,14 @@ typedef std::vector t_lstore_argvec; "mismatch" \ ) #define STORAGE_CHECK_ACCESS(idx) \ - PSP_VERBOSE_ASSERT(sizeof(T) * idx < m_capacity, "Invalid access"); + if (!(sizeof(T) * (idx) < m_capacity)) { \ + std::stringstream __psp_oob__; \ + __psp_oob__ << "Invalid set access in " << repr() << " idx=" << (idx) \ + << " elemsize=" << sizeof(T) \ + << " byte_offset=" << (sizeof(T) * (idx)) \ + << " m_size=" << m_size << " m_capacity=" << m_capacity; \ + PSP_COMPLAIN_AND_ABORT(__psp_oob__.str()); \ + } #else #define STORAGE_CHECK_ACCESS(idx) #define STORAGE_CHECK_ACCESS_GET(idx)