-
Notifications
You must be signed in to change notification settings - Fork 161
ci(vortex-duckdb): cross-platform build.rs and Windows DuckDB tests #8223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
f1b1511
dc8be6c
e01c37c
7e6f013
6ae50fc
ee7b915
9b98e47
1973e3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -332,6 +332,25 @@ jobs: | |
| --exclude lance-bench --exclude datafusion-bench --exclude random-access-bench ` | ||
| --exclude compress-bench --exclude xtask --exclude vortex-datafusion ` | ||
| --exclude gpu-scan-cli --exclude vortex-sqllogictest | ||
| - name: Rust Tests (Windows - DuckDB) | ||
| if: matrix.os == 'windows-x64' | ||
| shell: pwsh | ||
| # Non-blocking: vortex-duckdb's C++ extras call DuckDB's internal C++ API, but the | ||
| # prebuilt Windows duckdb.dll only exports the C API, so the link step fails with | ||
| # unresolved externals (LNK1120). Linking would require building DuckDB from source | ||
| # on Windows with all symbols exported. Until then this still validates that the | ||
| # crate configures, generates bindings (bindgen/libclang) and compiles on MSVC. | ||
| continue-on-error: true | ||
| run: | | ||
| # vortex-duckdb's build.rs runs bindgen, which needs libclang. Use the | ||
| # preinstalled LLVM if present, otherwise install it via Chocolatey. | ||
| $llvm = "C:\Program Files\LLVM\bin" | ||
| if (-not (Test-Path "$llvm\libclang.dll")) { | ||
| choco install llvm --no-progress -y | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. chocolatey has low ceiling rate-limiting, and strictly speaking requires a commercial license (maybe we can get away here without for vortex) but given the number of builds we run the rate-limiting is a real risk (i've recall this from embark studios where the ci shut down bc of that). |
||
| } | ||
| echo "LIBCLANG_PATH=$llvm" >> $env:GITHUB_ENV | ||
| $env:LIBCLANG_PATH = $llvm | ||
| cargo nextest run --cargo-profile ci --locked -p vortex-duckdb --all-features --no-fail-fast | ||
| - name: Rust Tests (Other) | ||
| if: matrix.os != 'windows-x64' | ||
| run: | | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| use std::env; | ||
| use std::fs; | ||
| use std::io; | ||
| #[cfg(unix)] | ||
| use std::os::unix::fs::symlink; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we don't use a symlink what do we do for Windows. |
||
| use std::path::Path; | ||
| use std::path::PathBuf; | ||
|
|
@@ -22,8 +23,6 @@ const DUCKDB_SOURCE_RELEASE_URL: &str = "https://github.com/duckdb/duckdb/archiv | |
| const DUCKDB_SOURCE_COMMIT_URL: &str = "https://github.com/duckdb/duckdb/archive"; | ||
| const DEFAULT_DUCKDB_VERSION: &str = "1.5.3"; | ||
|
|
||
| const BUILD_ARTIFACTS: [&str; 3] = ["libduckdb.dylib", "libduckdb.so", "libduckdb_static.a"]; | ||
|
|
||
| const SOURCE_FILES: [&str; 17] = [ | ||
| "cpp/client_context.cpp", | ||
| "cpp/config.cpp", | ||
|
|
@@ -47,6 +46,35 @@ const SOURCE_FILES: [&str; 17] = [ | |
| const DOWNLOAD_MAX_RETRIES: i32 = 3; | ||
| const DOWNLOAD_TIMEOUT: u64 = 90; | ||
|
|
||
| /// Returns `true` if the build *target* uses the MSVC toolchain (Windows `cl`/`link`). | ||
| /// | ||
| /// Reads `CARGO_CFG_TARGET_ENV` rather than `cfg!(target_env = ..)` because a build | ||
| /// script's `cfg!` reflects the host, not the target being compiled for. | ||
| fn target_is_msvc() -> bool { | ||
| env::var("CARGO_CFG_TARGET_ENV").as_deref() == Ok("msvc") | ||
| } | ||
|
|
||
| /// Returns `true` if the build *target* OS is Windows. | ||
| /// | ||
| /// Reads `CARGO_CFG_TARGET_OS` rather than `cfg!(target_os = ..)` for the same | ||
| /// host-vs-target reason as [`target_is_msvc`]. | ||
| fn target_is_windows() -> bool { | ||
| env::var("CARGO_CFG_TARGET_OS").as_deref() == Ok("windows") | ||
| } | ||
|
|
||
| /// The DuckDB library files to look for on the current target. | ||
| /// | ||
| /// DuckDB names its shared library per-OS (`.dylib` / `.so` / `.dll`) and, on unix, also | ||
| /// emits a static archive. We probe for these after a download or source build and copy | ||
| /// whichever exist, so the list must reflect the target rather than mixing every OS. | ||
| fn build_artifacts() -> &'static [&'static str] { | ||
| match env::var("CARGO_CFG_TARGET_OS").as_deref() { | ||
| Ok("macos") => &["libduckdb.dylib", "libduckdb_static.a"], | ||
| Ok("windows") => &["duckdb.dll"], | ||
| _ => &["libduckdb.so", "libduckdb_static.a"], | ||
| } | ||
| } | ||
|
|
||
| #[derive(Debug)] | ||
| struct BindgenCargoCallbacks; | ||
|
|
||
|
|
@@ -178,6 +206,8 @@ fn download(version: &DuckDBVersion, library_dir: &Path) { | |
| "aarch64-apple-darwin" | "x86_64-apple-darwin" => ("osx", "universal"), | ||
| "x86_64-unknown-linux-gnu" => ("linux", "amd64"), | ||
| "aarch64-unknown-linux-gnu" => ("linux", "arm64"), | ||
| "x86_64-pc-windows-msvc" => ("windows", "amd64"), | ||
| "aarch64-pc-windows-msvc" => ("windows", "arm64"), | ||
| _ => { | ||
| println!("cargo:error=Unsupported target {target}"); | ||
| exit(1); | ||
|
|
@@ -192,7 +222,7 @@ fn download(version: &DuckDBVersion, library_dir: &Path) { | |
| download_url(&url, &archive_path); | ||
|
|
||
| let duckdb_lib_dir = archive_path.parent().unwrap().to_path_buf(); | ||
| for artifact in BUILD_ARTIFACTS { | ||
| for artifact in build_artifacts() { | ||
| if duckdb_lib_dir.join(artifact).exists() { | ||
| return; | ||
| } | ||
|
|
@@ -264,7 +294,7 @@ fn try_build_duckdb( | |
| let build_src_dir = build_dir.join("src"); | ||
|
|
||
| let mut build = true; | ||
| for artifact in BUILD_ARTIFACTS { | ||
| for artifact in build_artifacts() { | ||
| let path = build_src_dir.join(artifact); | ||
| if path.exists() { | ||
| println!("cargo:info=Found {artifact} in {}", path.display()); | ||
|
|
@@ -287,7 +317,7 @@ fn try_build_duckdb( | |
| fs::create_dir_all(library_dir).unwrap(); | ||
|
|
||
| let mut found_artifact = false; | ||
| for artifact in BUILD_ARTIFACTS { | ||
| for artifact in build_artifacts() { | ||
| let src = build_src_dir.join(artifact); | ||
| if !src.exists() { | ||
| continue; | ||
|
|
@@ -298,7 +328,7 @@ fn try_build_duckdb( | |
| } | ||
|
|
||
| if !found_artifact { | ||
| let artifacts = BUILD_ARTIFACTS.join(","); | ||
| let artifacts = build_artifacts().join(","); | ||
| println!("cargo:error=Failed to find any of {artifacts} after build"); | ||
| exit(1); | ||
| } | ||
|
|
@@ -351,15 +381,31 @@ fn bindgen_c2rust(crate_dir: &Path, duckdb_include_dir: &Path) { | |
| } | ||
| } | ||
|
|
||
| /// Configure the C++ compiler flags and the DuckDB header include path on `build`. | ||
| /// | ||
| /// MSVC's `cl` does not understand the GCC/Clang `-W*`/`-isystem` flags, so it gets `/W4` | ||
| /// and a plain include. It also needs `/EHsc` to enable standard C++ exception unwinding | ||
| /// (which the DuckDB headers rely on). We deliberately do not enable `/WX` | ||
| /// (warnings-as-errors) there, so warnings originating inside the DuckDB headers don't fail | ||
| /// the build. Other toolchains keep the stricter `-Werror` set with the headers added via | ||
| /// `-isystem` for the same reason. | ||
| fn configure_cpp_warnings(build: &mut cc::Build, duckdb_include_dir: &Path) { | ||
| if target_is_msvc() { | ||
| build.flag("/W4").flag("/EHsc").include(duckdb_include_dir); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we ignore header warnings for Windows? Ideally, we could match the UNIX setup here fully. |
||
| } else { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just build with Clang actually. We need Clang anyway bc of libclang and clang is abi compatible with msvc on windows. |
||
| build | ||
| .flags(["-Wall", "-Wextra", "-Wpedantic", "-Werror"]) | ||
| .flag("-isystem") | ||
| .flag(duckdb_include_dir); | ||
| } | ||
| } | ||
|
|
||
| /// Generate libvortex_duckdb.* | ||
| fn compile_cpp(duckdb_include_dir: &Path) { | ||
| cc::Build::new() | ||
| .std("c++20") | ||
| .flags(["-Wall", "-Wextra", "-Wpedantic", "-Werror"]) | ||
| .cpp(true) | ||
| // We don't want compiler warnings inside duckdb headers, pass as flags | ||
| .flag("-isystem") | ||
| .flag(duckdb_include_dir) | ||
| let mut build = cc::Build::new(); | ||
| build.std("c++20").cpp(true); | ||
| configure_cpp_warnings(&mut build, duckdb_include_dir); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any way to retain the builder pattern style? |
||
| build | ||
| .include("include") | ||
| .include("cpp/include") | ||
| .files(SOURCE_FILES) | ||
|
|
@@ -396,6 +442,56 @@ fn cbindgen_rust2c(crate_dir: &Path) { | |
| } | ||
| } | ||
|
|
||
| /// The linker argument that embeds an rpath pointing at the DuckDB library directory, or | ||
| /// `None` on platforms whose linker rejects `-Wl,-rpath` (e.g. Windows/MSVC, where the DLL | ||
| /// is resolved via `PATH` / the executable directory instead). | ||
| fn rpath_link_arg(library_dir: &Path) -> Option<String> { | ||
| if target_is_windows() { | ||
| None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we ensure then that libduckdb is added to the path? |
||
| } else { | ||
| Some(format!("-Wl,-rpath,{}", library_dir.display())) | ||
| } | ||
| } | ||
|
|
||
| /// On Windows, copy `duckdb.dll` next to the built artifacts so the loader can find it at | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For all comments, double check whether we can make them more concise and readable. |
||
| /// run time. Windows has no rpath: a DLL is resolved from the executable's own directory or | ||
| /// from `PATH`. We place it in the Cargo profile dir and its `deps/` subdir (where `nextest` | ||
| /// runs test executables). No-op elsewhere, where rpath / `*_LIBRARY_PATH` handle this. | ||
| fn copy_runtime_dll(library_dir: &Path, out_dir: &Path) { | ||
| if !target_is_windows() { | ||
| return; | ||
| } | ||
| let dll = library_dir.join("duckdb.dll"); | ||
| if !dll.exists() { | ||
| return; | ||
| } | ||
| // OUT_DIR is `<target>/<profile>/build/<pkg>/out`; the profile dir four levels up holds | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This differs for arch local and cross-compilation:
|
||
| // the binaries and its `deps/` holds the test executables. | ||
| let Some(profile_dir) = out_dir.ancestors().nth(3) else { | ||
| return; | ||
| }; | ||
| for dir in [profile_dir.to_path_buf(), profile_dir.join("deps")] { | ||
| if dir.is_dir() { | ||
| drop(fs::copy(&dll, dir.join("duckdb.dll"))); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Create a convenience symlink `crate_dir/duckdb` -> the extracted DuckDB source tree. | ||
| /// This is purely for editor/tooling navigation; the build uses the source path directly. | ||
| /// Only created on unix, where the symlink API needs no special privilege. | ||
| #[cfg(unix)] | ||
| fn symlink_duckdb_source(source_dir: &Path, duckdb_dir: &Path) { | ||
| drop(fs::remove_file(duckdb_dir)); | ||
| drop(fs::remove_dir_all(duckdb_dir)); | ||
| symlink(source_dir, duckdb_dir).unwrap(); | ||
| } | ||
|
|
||
| /// No-op on non-unix platforms (e.g. Windows): the symlink is only a convenience and | ||
| /// Windows directory symlinks require elevated privileges. | ||
| #[cfg(not(unix))] | ||
| fn symlink_duckdb_source(_source_dir: &Path, _duckdb_dir: &Path) {} | ||
|
|
||
| fn main() { | ||
| println!("cargo:rerun-if-env-changed=DUCKDB_VERSION"); | ||
| println!("cargo:rerun-if-env-changed=VX_DUCKDB_DEBUG"); | ||
|
|
@@ -427,8 +523,11 @@ fn main() { | |
| println!("cargo:rustc-link-lib=dylib=duckdb"); | ||
|
|
||
| // Set rpath for binaries built directly from this crate. This is not | ||
| // inherited by downstream crates. | ||
| println!("cargo:rustc-link-arg=-Wl,-rpath,{library_dir_str}"); | ||
| // inherited by downstream crates. Skipped on platforms whose linker rejects | ||
| // `-Wl,-rpath` (see `rpath_link_arg`). | ||
| if let Some(rpath_arg) = rpath_link_arg(&library_dir) { | ||
| println!("cargo:rustc-link-arg={rpath_arg}"); | ||
| } | ||
|
|
||
| // Export the library path for downstream crates via the `links` manifest key. | ||
| // Downstream crates can access this via `env::var("DEP_DUCKDB_LIB_DIR")` in their build.rs | ||
|
|
@@ -467,9 +566,7 @@ fn main() { | |
| fs::write(&extract_marker, version.to_string()).unwrap(); | ||
| } | ||
|
|
||
| drop(fs::remove_file(&duckdb_dir)); | ||
| drop(fs::remove_dir_all(&duckdb_dir)); | ||
| symlink(&source_dir, &duckdb_dir).unwrap(); | ||
| symlink_duckdb_source(&source_dir, &duckdb_dir); | ||
|
|
||
| let has_debug_env = | ||
| env::var("VX_DUCKDB_DEBUG").is_ok_and(|v| matches!(v.as_str(), "1" | "true")); | ||
|
|
@@ -485,6 +582,8 @@ fn main() { | |
| download(&version, &library_dir); | ||
| }; | ||
|
|
||
| copy_runtime_dll(&library_dir, &out_dir); | ||
|
|
||
| let duckdb_include_dir = inner_dir.join("src").join("include"); | ||
| println!("cargo:rerun-if-changed=cpp/include"); | ||
| bindgen_c2rust(&crate_dir, &duckdb_include_dir); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -127,12 +127,12 @@ extern "C" duckdb_value duckdb_vx_vector_get_value(duckdb_vector ffi_vector, idx | |
| return reinterpret_cast<duckdb_value>(value.release()); | ||
| } | ||
|
|
||
| void duckdb_vector_flatten(duckdb_vector vector, unsigned long len) { | ||
| void duckdb_vector_flatten(duckdb_vector vector, idx_t len) { | ||
| auto dvector = reinterpret_cast<Vector *>(vector); | ||
| dvector->Flatten(len); | ||
| } | ||
|
|
||
| const char *duckdb_vector_to_string(duckdb_vector vector, unsigned long len, duckdb_vx_error *err) { | ||
| const char *duckdb_vector_to_string(duckdb_vector vector, idx_t len, duckdb_vx_error *err) { | ||
| try { | ||
| auto dvector = reinterpret_cast<Vector *>(vector); | ||
| auto str = dvector->ToString(len); | ||
|
|
@@ -160,6 +160,10 @@ void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector) { | |
| case FSST_VECTOR: | ||
| return FSSTVector::Validity(vector).Reset(); | ||
| default: | ||
| #if defined(_MSC_VER) | ||
| __assume(false); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we build with clang, we don't need any of these specializations. |
||
| #else | ||
| __builtin_unreachable(); | ||
| #endif | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this a robust way to get the path, can we query chocolatey?