-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Fix memory safety issues #3188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix memory safety issues #3188
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -162,5 +162,30 @@ test.describe("Arrow", function () { | |
| __ROW_PATH__: [[], [null], [""], ["AAAA"]], | ||
| }); | ||
| }); | ||
|
|
||
| test("Loads a time32 (millisecond) column without overflow", async function () { | ||
| const expected = Array.from({ length: 64 }, (_, i) => i); | ||
| const tableData = arrow.tableFromArrays({ | ||
| t: arrow.vectorFromArray(expected, new arrow.TimeMillisecond()), | ||
| }); | ||
|
|
||
| const table = await perspective.table(arrow.tableToIPC(tableData)); | ||
| const view = await table.view(); | ||
| expect(await table.size()).toEqual(64); | ||
| expect(await view.to_columns()).toEqual({ t: expected }); | ||
| await view.delete(); | ||
| await table.delete(); | ||
| }); | ||
| }); | ||
|
|
||
| test.describe("Malformed input", function () { | ||
| test("Rejects an Arrow with a row count that exceeds 32 bits", async function () { | ||
| const bytes = new Uint8Array( | ||
| fs.readFileSync(`${__dirname}/../../arrow/bad_row_count.arrow`), | ||
| ); | ||
| await expect(perspective.table(bytes)).rejects.toThrow( | ||
| /row count exceeds maximum supported size/, | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is brittle and irrelevent, we don't cate about the message. |
||
| ); | ||
| }); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| #include <arrow/type_fwd.h> | ||
| #include <cstdint> | ||
| #include <exception> | ||
| #include <limits> | ||
| #include <memory> | ||
| #include <mutex> | ||
| #include <perspective/arrow_loader.h> | ||
|
|
@@ -211,6 +212,11 @@ ArrowLoader::initialize(const std::uint8_t* ptr, const uint32_t length) { | |
| load_stream(ptr, length, m_table); | ||
| } | ||
|
|
||
| const auto validation = m_table->ValidateFull(); | ||
| if (!validation.ok()) { | ||
| PSP_COMPLAIN_AND_ABORT(validation.ToString()); | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked that this has a minimal (nonexistent) performance impact. |
||
|
|
||
| std::shared_ptr<arrow::Schema> schema = m_table->schema(); | ||
| std::vector<std::shared_ptr<arrow::Field>> fields = schema->fields(); | ||
|
|
||
|
|
@@ -782,9 +788,9 @@ copy_array( | |
| case arrow::Time32Type::type_id: { | ||
| auto scol = std::static_pointer_cast<arrow::Time32Array>(src); | ||
| std::memcpy( | ||
| dest->get_nth<std::uint64_t>(offset), | ||
| dest->get_nth<std::uint32_t>(offset), | ||
| (void*)scol->raw_values(), | ||
| len * 8 | ||
| len * 4 | ||
| ); | ||
| } break; | ||
| // case arrow::Type { | ||
|
|
@@ -972,7 +978,16 @@ ArrowLoader::fill_column( | |
|
|
||
| std::uint32_t | ||
| ArrowLoader::row_count() const { | ||
| return m_table->num_rows(); | ||
| const std::int64_t n = m_table->num_rows(); | ||
| if (n < 0 | ||
| || static_cast<std::uint64_t>(n) | ||
| > std::numeric_limits<std::uint32_t>::max()) { | ||
| PSP_COMPLAIN_AND_ABORT( | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not something Perspective itself will check internally, e.g. a subsequent |
||
| "Arrow table row count exceeds maximum supported size" | ||
| ); | ||
| } | ||
|
|
||
| return static_cast<std::uint32_t>(n); | ||
| } | ||
|
|
||
| std::vector<std::string> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1885,6 +1885,11 @@ diff3::operator()(t_parameter_list parameters) { | |
| t_vector_view v2(parameters[1]); | ||
| t_vector_view out(parameters[2]); | ||
|
|
||
| if (v1.size() < 3 || v2.size() < 3 || out.size() < 3) { | ||
| rval.m_status = STATUS_CLEAR; | ||
| return rval; | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an actual good catch but we can validate this better by just checking the exact cases and erroring else? |
||
|
|
||
| t_tscalar o1; | ||
| o1.set(v1[0] - v2[0]); | ||
|
|
||
|
|
@@ -1912,6 +1917,10 @@ norm3::operator()(t_parameter_list parameters) { | |
| rval.clear(); | ||
| rval.m_type = DTYPE_FLOAT64; | ||
| t_vector_view v1(parameters[0]); | ||
| if (v1.size() < 3) { | ||
| rval.m_status = STATUS_CLEAR; | ||
| return rval; | ||
| } | ||
| double a = v1[0].to_double(); | ||
| double b = v1[1].to_double(); | ||
| double c = v1[2].to_double(); | ||
|
|
@@ -1935,6 +1944,11 @@ cross_product3::operator()(t_parameter_list parameters) { | |
| t_vector_view v2(parameters[1]); | ||
| t_vector_view out(parameters[2]); | ||
|
|
||
| if (v1.size() < 3 || v2.size() < 3 || out.size() < 3) { | ||
| rval.m_status = STATUS_CLEAR; | ||
| return rval; | ||
| } | ||
|
|
||
| // a2 * b3 - a3 * b2 | ||
| t_tscalar o1; | ||
| o1.set(v1[1] * v2[2] - v1[2] * v2[1]); | ||
|
|
@@ -1969,6 +1983,11 @@ dot_product3::operator()(t_parameter_list parameters) { | |
| t_vector_view v1(parameters[0]); | ||
| t_vector_view v2(parameters[1]); | ||
|
|
||
| if (v1.size() < 3 || v2.size() < 3) { | ||
| rval.m_status = STATUS_CLEAR; | ||
| return rval; | ||
| } | ||
|
|
||
| rval.set(v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]); | ||
| return rval; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These data structures are small, for readability its worth just asserting the entire structure instead of multiple playwright assertion calls in a row.