From 34572a1935937a40cb92ea43b231a91c8c1d6b81 Mon Sep 17 00:00:00 2001 From: Shion Tanaka Date: Wed, 6 May 2026 22:56:17 +0900 Subject: [PATCH 1/3] feat(macOS): add vfkit backend for ephemeral and persistent VMs macOS has no KVM/QEMU, so this adds vfkit as the VM backend. Ephemeral VMs use direct kernel boot with SquashFS, persistent VMs use EFI boot. The vfkit/ module mirrors the libvirt/ directory structure, and CLI options match Linux where applicable. Build and run on macOS: cargo build --release codesign -fs - target/release/bcvk Tested on macOS (Apple Silicon) with rootful and rootless podman machine. Assisted-by: Claude Code (Claude Opus 4.6) Signed-off-by: Shion Tanaka --- Cargo.lock | 41 + crates/kit/Cargo.toml | 4 + crates/kit/src/ephemeral_macos.rs | 195 +++++ crates/kit/src/lib.rs | 10 + crates/kit/src/main.rs | 45 +- crates/kit/src/run_ephemeral_macos.rs | 1094 +++++++++++++++++++++++++ crates/kit/src/ssh_options.rs | 136 +++ crates/kit/src/vfkit/inspect.rs | 62 ++ crates/kit/src/vfkit/list.rs | 29 + crates/kit/src/vfkit/mod.rs | 271 ++++++ crates/kit/src/vfkit/rm.rs | 59 ++ crates/kit/src/vfkit/rm_all.rs | 44 + crates/kit/src/vfkit/run.rs | 188 +++++ crates/kit/src/vfkit/ssh.rs | 24 + crates/kit/src/vfkit/start.rs | 115 +++ crates/kit/src/vfkit/stop.rs | 63 ++ 16 files changed, 2370 insertions(+), 10 deletions(-) create mode 100644 crates/kit/src/ephemeral_macos.rs create mode 100644 crates/kit/src/run_ephemeral_macos.rs create mode 100644 crates/kit/src/ssh_options.rs create mode 100644 crates/kit/src/vfkit/inspect.rs create mode 100644 crates/kit/src/vfkit/list.rs create mode 100644 crates/kit/src/vfkit/mod.rs create mode 100644 crates/kit/src/vfkit/rm.rs create mode 100644 crates/kit/src/vfkit/rm_all.rs create mode 100644 crates/kit/src/vfkit/run.rs create mode 100644 crates/kit/src/vfkit/ssh.rs create mode 100644 crates/kit/src/vfkit/start.rs create mode 100644 crates/kit/src/vfkit/stop.rs diff --git a/Cargo.lock b/Cargo.lock index b651d1a79..f18fec2bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -268,6 +268,7 @@ dependencies = [ "xshell", "yaml-rust2", "zlink", + "zstd", ] [[package]] @@ -433,6 +434,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -1598,6 +1601,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.81" @@ -3878,3 +3891,31 @@ dependencies = [ "tokio-stream", "zlink-core", ] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/crates/kit/Cargo.toml b/crates/kit/Cargo.toml index a5ef6afdc..399f0764f 100644 --- a/crates/kit/Cargo.toml +++ b/crates/kit/Cargo.toml @@ -58,6 +58,10 @@ zlink = "0.4" futures-util = "0.3" libsystemd = "0.7" +# macOS-only dependencies (vfkit backend) +[target.'cfg(target_os = "macos")'.dependencies] +zstd = "0.13" + [dev-dependencies] similar-asserts = "1.5" diff --git a/crates/kit/src/ephemeral_macos.rs b/crates/kit/src/ephemeral_macos.rs new file mode 100644 index 000000000..ca3255247 --- /dev/null +++ b/crates/kit/src/ephemeral_macos.rs @@ -0,0 +1,195 @@ +//! Ephemeral VM management commands for macOS (vfkit backend). + +use std::io::Write; +use std::process::{Command, Stdio}; + +use clap::Subcommand; +use color_eyre::eyre::bail; +use color_eyre::Result; + +use crate::run_ephemeral_macos::{self, EphemeralVmMetadata}; + +/// Options for `ephemeral run-ssh`, combining run options with optional SSH arguments. +#[derive(Debug, clap::Parser)] +pub struct RunSshOpts { + #[command(flatten)] + pub run_opts: run_ephemeral_macos::RunEphemeralOpts, + + /// SSH command to execute (optional, defaults to interactive shell) + #[arg(trailing_var_arg = true)] + pub ssh_args: Vec, +} + +#[derive(Debug, Subcommand)] +pub enum EphemeralCommands { + /// Run bootc containers as ephemeral VMs + #[clap(name = "run")] + Run(run_ephemeral_macos::RunEphemeralOpts), + + /// Run ephemeral VM and SSH into it + #[clap(name = "run-ssh")] + RunSsh(RunSshOpts), + + /// Connect to a running ephemeral VM via SSH + #[clap(name = "ssh")] + Ssh { + /// VM name + name: String, + + /// Additional SSH arguments (e.g. -v, -L, commands to execute) + #[clap(allow_hyphen_values = true)] + args: Vec, + }, + + /// List ephemeral VM containers + #[clap(name = "ps")] + Ps { + /// Output as JSON + #[clap(long)] + json: bool, + }, + + /// Remove all ephemeral VM containers + #[clap(name = "rm-all")] + RmAll { + /// Force removal without confirmation + #[clap(short, long)] + force: bool, + }, +} + +impl EphemeralCommands { + /// Execute the ephemeral subcommand. + pub fn run(self) -> Result<()> { + match self { + EphemeralCommands::Run(opts) => run_ephemeral_macos::run(opts), + EphemeralCommands::RunSsh(mut opts) => { + opts.run_opts.ssh_keygen = true; + if !opts.ssh_args.is_empty() { + let combined = shlex::try_join(opts.ssh_args.iter().map(|s| s.as_str())) + .map_err(|e| color_eyre::eyre::eyre!("failed to escape SSH args: {}", e))?; + opts.run_opts.execute.push(combined); + } + run_ephemeral_macos::run(opts.run_opts) + } + EphemeralCommands::Ssh { name, args } => cmd_ssh(&name, &args), + EphemeralCommands::Ps { json } => cmd_ps(json), + EphemeralCommands::RmAll { force } => cmd_rm_all(force), + } + } +} + +fn cmd_ps(json: bool) -> Result<()> { + let vms = EphemeralVmMetadata::list_all()?; + for vm in &vms { + if !vm.is_alive() { + EphemeralVmMetadata::remove(&vm.name); + } + } + let live: Vec<_> = vms.into_iter().filter(|vm| vm.is_alive()).collect(); + + if json { + println!("{}", serde_json::to_string_pretty(&live)?); + return Ok(()); + } + + if live.is_empty() { + println!("No running ephemeral VMs."); + return Ok(()); + } + + println!("{:<24} {:<50} SSH", "NAME", "IMAGE"); + for vm in &live { + println!( + "{:<24} {:<50} ssh -p {} -i {} root@localhost", + vm.name, vm.image, vm.ssh_port, vm.ssh_key + ); + } + Ok(()) +} + +fn cmd_rm_all(force: bool) -> Result<()> { + let vms = EphemeralVmMetadata::list_all()?; + if vms.is_empty() { + println!("No ephemeral VMs found."); + return Ok(()); + } + + if !force { + println!("Found {} ephemeral VM(s):", vms.len()); + for vm in &vms { + println!( + " {} ({})", + vm.name, + if vm.is_alive() { "running" } else { "stopped" } + ); + } + print!("Remove all ephemeral VMs? [y/N]: "); + std::io::stdout().flush()?; + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + let input = input.trim().to_lowercase(); + if input != "y" && input != "yes" { + println!("Aborted."); + return Ok(()); + } + } + + for vm in &vms { + if vm.is_alive() { + if let Err(e) = Command::new("kill") + .args([&vm.pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill VM process {}: {}", vm.pid, e); + } + if vm.gvproxy_pid > 0 { + if let Err(e) = Command::new("kill") + .args([&vm.gvproxy_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill gvproxy {}: {}", vm.gvproxy_pid, e); + } + } + } + EphemeralVmMetadata::remove(&vm.name); + println!("Removed {}", vm.name); + } + Ok(()) +} + +fn cmd_ssh(name: &str, args: &[String]) -> Result<()> { + let vm = EphemeralVmMetadata::load(name)?; + if !vm.is_alive() { + EphemeralVmMetadata::remove(name); + bail!("VM '{}' is not running", name); + } + + // Try to set up SSH port forwarding via VM-specific gvproxy socket + let svc_sock = format!("/private/tmp/bcvk/{}-gvproxy-svc.sock", name); + if std::path::Path::new(&svc_sock).exists() { + if let Err(e) = + run_ephemeral_macos::expose_ssh_port(&svc_sock, "192.168.127.2", vm.ssh_port) + { + tracing::debug!("SSH port forward re-expose: {}", e); + } + } + + let key_path = std::path::Path::new(&vm.ssh_key); + if args.is_empty() { + run_ephemeral_macos::run_ssh_interactive(vm.ssh_port, key_path, "root")?; + } else { + let combined = shlex::try_join(args.iter().map(|s| s.as_str())) + .map_err(|e| color_eyre::eyre::eyre!("failed to escape SSH command: {}", e))?; + let status = + run_ephemeral_macos::run_ssh_command(vm.ssh_port, key_path, "root", &combined)?; + if !status.success() { + std::process::exit(status.code().unwrap_or(1)); + } + } + Ok(()) +} diff --git a/crates/kit/src/lib.rs b/crates/kit/src/lib.rs index 279e5caa5..a3aa51578 100644 --- a/crates/kit/src/lib.rs +++ b/crates/kit/src/lib.rs @@ -4,6 +4,16 @@ pub mod cpio; pub mod qemu_img; pub mod xml_utils; +// Cross-platform modules +pub mod ssh_options; + // Linux-only modules #[cfg(target_os = "linux")] pub mod kernel; + +// macOS-only modules (vfkit backend) +#[cfg(target_os = "macos")] +pub mod run_ephemeral_macos; + +#[cfg(target_os = "macos")] +pub mod vfkit; diff --git a/crates/kit/src/main.rs b/crates/kit/src/main.rs index de0a3107e..cc4969312 100644 --- a/crates/kit/src/main.rs +++ b/crates/kit/src/main.rs @@ -11,6 +11,7 @@ mod cpio; mod install_options; mod instancetypes; mod qemu_img; +mod ssh_options; mod xml_utils; // Linux-only modules @@ -60,6 +61,14 @@ mod utils; #[cfg(target_os = "linux")] mod varlink_ipc; +// macOS-only modules (vfkit backend) +#[cfg(target_os = "macos")] +mod ephemeral_macos; +#[cfg(target_os = "macos")] +mod run_ephemeral_macos; +#[cfg(target_os = "macos")] +mod vfkit; + /// Default state directory for bcvk container data #[cfg(target_os = "linux")] pub const CONTAINER_STATEDIR: &str = "/var/lib/bcvk"; @@ -104,8 +113,8 @@ enum InternalsCmds { DumpCliJson, } -/// Stub subcommands for macOS (shows error message when run) -#[cfg(not(target_os = "linux"))] +/// Stub subcommands for unsupported platforms +#[cfg(not(any(target_os = "linux", target_os = "macos")))] #[derive(Debug, Subcommand)] pub enum StubEphemeralCommands { /// Run bootc containers as ephemeral VMs @@ -139,9 +148,21 @@ enum Commands { #[clap(subcommand)] Ephemeral(ephemeral::EphemeralCommands), - // macOS stub: ephemeral command exists but errors out - #[cfg(not(target_os = "linux"))] - /// Run bootc images as stateless VMs via QEMU+Podman (not available on this platform) + // macOS: vfkit-based ephemeral VMs + #[cfg(target_os = "macos")] + /// Manage ephemeral VMs for bootc containers (vfkit backend) + #[clap(subcommand)] + Ephemeral(ephemeral_macos::EphemeralCommands), + + // macOS: vfkit-based persistent VMs + #[cfg(target_os = "macos")] + /// Manage persistent VMs (vfkit backend) + #[clap(subcommand)] + Vm(vfkit::VmCommands), + + // Other platforms: stub + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + /// Manage ephemeral VMs for bootc containers (not available on this platform) #[clap(subcommand)] Ephemeral(StubEphemeralCommands), @@ -284,13 +305,17 @@ fn main() -> Result<(), Report> { #[cfg(target_os = "linux")] Commands::Ephemeral(cmd) => cmd.run()?, - // macOS stub: ephemeral command exists but errors out - #[cfg(not(target_os = "linux"))] + #[cfg(target_os = "macos")] + Commands::Ephemeral(cmd) => cmd.run()?, + + #[cfg(target_os = "macos")] + Commands::Vm(cmd) => cmd.run()?, + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] Commands::Ephemeral(_) => { return Err(color_eyre::eyre::eyre!( - "The 'ephemeral' command is not available on macOS.\n\ - bcvk requires Linux with KVM/QEMU for VM operations.\n\ - See https://github.com/bootc-dev/bcvk/issues/21 for more information." + "The 'ephemeral' command is not available on this platform.\n\ + bcvk requires Linux with KVM/QEMU or macOS with vfkit for VM operations." )); } diff --git a/crates/kit/src/run_ephemeral_macos.rs b/crates/kit/src/run_ephemeral_macos.rs new file mode 100644 index 000000000..d7fe9257f --- /dev/null +++ b/crates/kit/src/run_ephemeral_macos.rs @@ -0,0 +1,1094 @@ +//! Ephemeral VM launch flow for macOS using vfkit + SquashFS. +//! +//! Boot flow: +//! 1. Extract kernel + initramfs from container image +//! 2. Create SquashFS rootfs (lz4, cached by digest) +//! 3. Decompress vmlinuz PE+zstd → uncompressed ARM64 Image +//! 4. Append bcvk units CPIO to initramfs (/etc overlay + /var tmpfs + SSH) +//! 5. Launch vfkit with virtio-blk (SquashFS) + virtio-net (gvproxy) +//! +//! Common helpers (gvproxy, SSH, vfkit detection) are pub for reuse by vfkit/ module. + +use std::fs::{self, OpenOptions}; +use std::io::{Seek, SeekFrom, Write}; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use color_eyre::{ + eyre::{bail, eyre, Context}, + Result, +}; +use tracing::{debug, info}; + +// --- Data structures --- + +/// Metadata for a running ephemeral VM, persisted as JSON for `ps` and `ssh`. +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] +#[allow(dead_code)] +pub struct EphemeralVmMetadata { + /// VM name used as identifier for resource isolation. + pub name: String, + /// Container image reference used to boot the VM. + pub image: String, + /// PID of the vfkit process. + pub pid: u32, + /// PID of the gvproxy network proxy process. + pub gvproxy_pid: u32, + /// Host-side SSH port forwarded to the VM. + pub ssh_port: u16, + /// Path to the SSH private key for this VM. + pub ssh_key: String, + /// Path to the serial console log file. + pub serial_log: String, + /// Path to the vfkit process log file. + pub log_path: Option, + /// ISO 8601 timestamp when the VM was created. + pub created: String, +} + +#[allow(dead_code)] +impl EphemeralVmMetadata { + /// Return the directory path for ephemeral VM metadata files. + pub fn vms_dir() -> std::path::PathBuf { + std::path::PathBuf::from("/private/tmp/bcvk/vms") + } + + /// Save metadata to a JSON file in the VMs directory. + pub fn save(&self) -> Result<()> { + let dir = Self::vms_dir(); + fs::create_dir_all(&dir)?; + let path = dir.join(format!("{}.json", self.name)); + fs::write(&path, serde_json::to_string_pretty(self)?)?; + Ok(()) + } + + /// Remove metadata file for the named VM. + pub fn remove(name: &str) { + let path = Self::vms_dir().join(format!("{}.json", name)); + let _ = fs::remove_file(path); + } + + /// Load metadata for the named VM from its JSON file. + pub fn load(name: &str) -> Result { + let path = Self::vms_dir().join(format!("{}.json", name)); + let data = fs::read_to_string(&path)?; + Ok(serde_json::from_str(&data)?) + } + + /// List all ephemeral VM metadata from the VMs directory. + pub fn list_all() -> Result> { + let dir = Self::vms_dir(); + if !dir.exists() { + return Ok(Vec::new()); + } + let mut vms = Vec::new(); + for entry in fs::read_dir(&dir)? { + let path = entry?.path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + Ok(vms) + } + + /// Check if the VM process is still alive via kill -0. + pub fn is_alive(&self) -> bool { + Command::new("kill") + .args(["-0", &self.pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false) + } +} + +/// Options for launching an ephemeral VM via vfkit. +#[derive(clap::Parser, Debug)] +pub struct RunEphemeralOpts { + /// Container image to boot + pub image: String, + /// Number of vCPUs + #[clap(long)] + pub vcpus: Option, + /// Memory size (e.g. "4G", "2048M", or plain number for MB) + #[clap(long, default_value = "4G")] + pub memory: String, + /// Generate a temporary SSH key pair for VM access + #[clap(long = "ssh-keygen", short = 'K')] + pub ssh_keygen: bool, + /// Command(s) to execute via SSH after boot + #[clap(long)] + pub execute: Vec, + /// VM name for identification and resource isolation + #[clap(long)] + pub name: Option, + /// Additional kernel command line arguments + #[clap(long = "karg")] + pub kernel_args: Vec, + /// Display VM console in GUI window + #[clap(long)] + pub gui: bool, + /// Run in background + #[clap(long, short = 'd')] + pub detach: bool, + /// Enable debug mode (reserved for future use) + #[clap(long)] + pub debug: bool, +} + +fn default_vcpus() -> u32 { + std::thread::available_parallelism() + .map(|n| n.get() as u32) + .unwrap_or(2) +} + +/// Parse memory specification string (e.g. "4G", "2048M") to megabytes. +pub fn parse_memory_to_mb(s: &str) -> Result { + let s = s.trim(); + if let Some(n) = s.strip_suffix('G').or_else(|| s.strip_suffix('g')) { + Ok((n.parse::()? * 1024.0) as u32) + } else if let Some(n) = s.strip_suffix('M').or_else(|| s.strip_suffix('m')) { + Ok(n.parse::()? as u32) + } else { + Ok(s.parse::()?) + } +} + +// --- RAII cleanup guard --- + +struct VmCleanup { + vfkit_pid: u32, + gvproxy_pid: u32, + vm_name: String, +} + +impl Drop for VmCleanup { + fn drop(&mut self) { + tracing::debug!("cleaning up VM processes..."); + if let Err(e) = Command::new("kill") + .arg(self.vfkit_pid.to_string()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill vfkit (PID {}): {}", self.vfkit_pid, e); + } + if let Err(e) = Command::new("kill") + .arg(self.gvproxy_pid.to_string()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill gvproxy (PID {}): {}", self.gvproxy_pid, e); + } + EphemeralVmMetadata::remove(&self.vm_name); + } +} + +// --- Main entry point --- + +/// Run an ephemeral VM from a container image using vfkit + SquashFS. +pub fn run(opts: RunEphemeralOpts) -> Result<()> { + if opts.gui && opts.detach { + bail!("--gui and --detach cannot be used together (GUI requires foreground process)"); + } + + if opts.detach { + return run_detached(&opts); + } + + let vfkit_bin = find_vfkit()?; + info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + SquashFS)"); + + let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + fs::create_dir_all(&cache_base)?; + + let machine = detect_machine_name()?; + let rootful = is_machine_rootful(&machine); + debug!( + "podman machine '{}' ({})", + machine, + if rootful { "rootful" } else { "rootless" } + ); + let digest = ensure_image_and_get_digest(&opts.image)?; + let digest_short = &digest[..16.min(digest.len())]; + info!("image digest: {}...", digest_short); + + let vm_name = opts + .name + .clone() + .unwrap_or_else(|| format!("ephemeral-{}", &digest_short[..8])); + let ssh_key_path = cache_base.join(format!("{}-key", vm_name)); + + let boot_dir = cache_base.join(format!("boot-{}", digest_short)); + fs::create_dir_all(&boot_dir)?; + let squashfs_cache = format!("/private/tmp/bcvk/rootfs-{}.squashfs", digest_short); + let squashfs_path = format!("/private/tmp/bcvk/{}-rootfs.squashfs", vm_name); + let vmlinuz_path = boot_dir.join("vmlinuz"); + let image_path = boot_dir.join("Image"); + let initramfs_orig = boot_dir.join("initramfs-orig.img"); + let initramfs_path = cache_base.join(format!("{}-initramfs.img", vm_name)); + + // Step 1+2: kernel extract + SquashFS creation (parallel) + let step2_handle = if !Path::new(&squashfs_cache).exists() { + let mc = machine.clone(); + let rf = rootful; + let img = opts.image.clone(); + let sc = squashfs_cache.clone(); + Some(std::thread::spawn(move || -> Result<()> { + info!("creating SquashFS image (lz4)..."); + create_squashfs_image(&mc, rf, &img, &sc) + })) + } else { + info!("using cached SquashFS: {}", squashfs_cache); + None + }; + + if !vmlinuz_path.exists() || !initramfs_orig.exists() { + info!("extracting kernel and initramfs..."); + extract_kernel(&machine, &opts.image, &boot_dir)?; + fs::rename(boot_dir.join("initramfs.img"), &initramfs_orig)?; + } + + // Step 3+4: kernel decompress + CPIO append (parallel after Step 1) + let step3_handle = if !image_path.exists() { + let vp = vmlinuz_path.clone(); + let ip = image_path.clone(); + Some(std::thread::spawn(move || -> Result<()> { + info!("decompressing kernel (vmlinuz → Image)..."); + extract_uncompressed_kernel(&vp, &ip) + })) + } else { + None + }; + + fs::copy(&initramfs_orig, &initramfs_path)?; + { + let cpio_data = crate::cpio::create_initramfs_units_cpio() + .map_err(|e| eyre!("failed to create CPIO: {e}"))?; + let mut f = OpenOptions::new().append(true).open(&initramfs_path)?; + let sz = f.seek(SeekFrom::End(0))?; + let pad = sz.next_multiple_of(4) - sz; + if pad > 0 { + f.write_all(&vec![0u8; pad as usize])?; + } + f.write_all(&cpio_data)?; + + if opts.ssh_keygen || !opts.execute.is_empty() { + info!("generating SSH keypair..."); + let _ = fs::remove_file(&ssh_key_path); + let _ = fs::remove_file(ssh_key_path.with_extension("pub")); + let status = Command::new("ssh-keygen") + .args([ + "-t", + "ed25519", + "-f", + &ssh_key_path.to_string_lossy(), + "-N", + "", + "-q", + ]) + .status()?; + if !status.success() { + bail!("ssh-keygen failed (exit code: {:?})", status.code()); + } + let pubkey = fs::read_to_string(ssh_key_path.with_extension("pub"))?; + let ssh_cpio = create_ssh_setup_cpio(pubkey.trim())?; + let pos = f.seek(SeekFrom::End(0))?; + let pad = pos.next_multiple_of(4) - pos; + if pad > 0 { + f.write_all(&vec![0u8; pad as usize])?; + } + f.write_all(&ssh_cpio)?; + } + info!("initramfs prepared"); + } + + if let Some(h) = step3_handle { + h.join() + .map_err(|_| eyre!("kernel decompression thread panicked"))??; + } + if let Some(h) = step2_handle { + h.join() + .map_err(|_| eyre!("squashfs creation thread panicked"))??; + } + + // CoW clone SquashFS for this VM (allows concurrent use of same image) + let _ = fs::remove_file(&squashfs_path); + let clone_status = Command::new("cp") + .args(["-c", &squashfs_cache, &squashfs_path]) + .status() + .context("cloning SquashFS")?; + if !clone_status.success() { + fs::copy(&squashfs_cache, &squashfs_path).context("copying SquashFS")?; + } + + // 5. gvproxy + vfkit + let gvproxy_sock = cache_base.join(format!("{}-gvproxy.sock", vm_name)); + let services_sock = cache_base.join(format!("{}-gvproxy-svc.sock", vm_name)); + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + info!("starting gvproxy..."); + let mut gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + + let mut cmdline_parts: Vec<&str> = vec![ + "root=/dev/vda", + "ro", + "rootfstype=squashfs", + "console=tty0", + "console=hvc0", + "loglevel=4", + "selinux=0", + "net.ifnames=0", + "systemd.journald.storage=volatile", + ]; + let user_args: Vec<&str> = opts.kernel_args.iter().map(|s| s.as_str()).collect(); + cmdline_parts.extend(&user_args); + let cmdline = cmdline_parts.join(" "); + + let mac = generate_mac(); + let mac_str = format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] + ); + + let bootloader_arg = format!( + "linux,kernel={},initrd={},cmdline=\"{}\"", + image_path.display(), + initramfs_path.display(), + cmdline + ); + + let vcpus = opts.vcpus.unwrap_or_else(default_vcpus); + let memory_mb = parse_memory_to_mb(&opts.memory)?; + + let mut vfkit_args = vec![ + "--cpus".to_string(), + vcpus.to_string(), + "--memory".to_string(), + memory_mb.to_string(), + "--bootloader".to_string(), + bootloader_arg, + "--device".to_string(), + format!("virtio-blk,path={}", squashfs_path), + "--device".to_string(), + format!( + "virtio-net,unixSocketPath={},mac={}", + gvproxy_sock_str, mac_str + ), + "--device".to_string(), + "virtio-rng".to_string(), + ]; + if opts.gui { + vfkit_args.push("--gui".to_string()); + } + + info!("launching vfkit..."); + let vfkit_log = cache_base.join(format!("{}-vfkit.log", vm_name)); + let vfkit_log_file = fs::File::create(&vfkit_log)?; + let mut vfkit_child = Command::new(&vfkit_bin) + .args(&vfkit_args) + .stdout(vfkit_log_file.try_clone()?) + .stderr(vfkit_log_file) + .spawn() + .context("failed to start vfkit")?; + + let ssh_port = find_available_ssh_port(); + debug!("allocated SSH port: {}", ssh_port); + + let metadata = EphemeralVmMetadata { + name: vm_name.clone(), + image: opts.image.clone(), + pid: vfkit_child.id(), + gvproxy_pid: gvproxy_child.id(), + ssh_port, + ssh_key: ssh_key_path.to_string_lossy().to_string(), + serial_log: String::new(), + log_path: None, + created: chrono::Utc::now().to_rfc3339(), + }; + metadata.save()?; + + let _cleanup = VmCleanup { + vfkit_pid: vfkit_child.id(), + gvproxy_pid: gvproxy_child.id(), + vm_name: vm_name.clone(), + }; + + if opts.ssh_keygen || !opts.execute.is_empty() { + info!("setting up SSH port forwarding..."); + for attempt in 0..15u32 { + match expose_ssh_port(&services_sock_str, "192.168.127.2", ssh_port) { + Ok(_) => { + info!("SSH port {} forwarded", ssh_port); + break; + } + Err(e) if attempt < 14 => { + debug!("SSH port forward attempt {}: {}", attempt, e); + let backoff = 200 * 2u64.pow(attempt.min(4)); + std::thread::sleep(Duration::from_millis(backoff)); + } + Err(e) => bail!("SSH port forward failed: {}", e), + } + } + + wait_for_ssh(ssh_port, &ssh_key_path, "root")?; + + if !opts.execute.is_empty() { + for cmd_str in &opts.execute { + info!("executing: {}", cmd_str); + let status = run_ssh_command(ssh_port, &ssh_key_path, "root", cmd_str)?; + if !status.success() { + bail!("command failed: {}", status); + } + } + return Ok(()); + } + + info!( + "SSH ready: ssh -p {} -i {} root@localhost", + ssh_port, + ssh_key_path.display() + ); + + use std::io::IsTerminal; + if std::io::stdin().is_terminal() { + let status = run_ssh_interactive(ssh_port, &ssh_key_path, "root")?; + let exit_code = status.code().unwrap_or(1); + drop(_cleanup); + std::process::exit(exit_code); + } + } + + // No SSH: wait for vfkit to exit (GUI window closed or VM shutdown) + std::mem::forget(_cleanup); + let status = vfkit_child.wait()?; + info!("vfkit exited: {}", status); + if let Err(e) = gvproxy_child.kill() { + tracing::debug!("failed to kill gvproxy: {}", e); + } + EphemeralVmMetadata::remove(&vm_name); + Ok(()) +} + +fn run_detached(opts: &RunEphemeralOpts) -> Result<()> { + let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + fs::create_dir_all(&cache_base)?; + let digest = ensure_image_and_get_digest(&opts.image)?; + let digest_short = &digest[..16.min(digest.len())]; + let vm_name = opts + .name + .clone() + .unwrap_or_else(|| format!("ephemeral-{}", &digest_short[..8])); + let log_path = cache_base.join(format!("bcvk-{}.log", vm_name)); + let log_file = fs::File::create(&log_path)?; + + let exe = std::env::current_exe()?; + let mut args: Vec = std::env::args() + .skip(1) + .filter(|a| a != "--detach" && a != "-d") + .collect(); + if !args.contains(&"-K".to_string()) && !args.contains(&"--ssh-keygen".to_string()) { + args.insert(args.len() - 1, "-K".to_string()); + } + if opts.name.is_none() { + args.insert(args.len() - 1, "--name".to_string()); + args.insert(args.len() - 1, vm_name.clone()); + } + + let child = Command::new(exe) + .args(&args) + .stdin(Stdio::null()) + .stdout(log_file.try_clone()?) + .stderr(log_file) + .spawn()?; + + let metadata = EphemeralVmMetadata { + name: vm_name.clone(), + image: opts.image.clone(), + pid: child.id(), + gvproxy_pid: 0, + ssh_port: 0, + ssh_key: cache_base + .join(format!("{}-key", vm_name)) + .to_string_lossy() + .to_string(), + serial_log: String::new(), + log_path: Some(log_path.to_string_lossy().to_string()), + created: chrono::Utc::now().to_rfc3339(), + }; + metadata.save()?; + println!("{}", vm_name); + Ok(()) +} + +// --- SSH setup CPIO --- + +fn create_ssh_setup_cpio(pubkey: &str) -> Result> { + use cpio::newc::Builder as NewcBuilder; + let mut buf = Vec::new(); + + let script = format!( + "#!/bin/bash\n\ + mkdir -p /sysroot/var/roothome/.ssh\n\ + chmod 700 /sysroot/var/roothome/.ssh\n\ + echo '{}' > /sysroot/var/roothome/.ssh/authorized_keys\n\ + chmod 600 /sysroot/var/roothome/.ssh/authorized_keys\n\ + chown -R 0:0 /sysroot/var/roothome/.ssh\n", + pubkey + ); + + let service = "[Unit]\n\ + Description=Setup SSH authorized_keys for root\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=bcvk-var-ephemeral.service\n\ + Requires=bcvk-var-ephemeral.service\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + ExecStart=/usr/bin/bash /usr/lib/bcvk/setup-ssh.sh\n"; + + let dropin = "[Unit]\nWants=bcvk-ssh-setup.service\n"; + + let write_entry = + |buf: &mut Vec, path: &str, data: &[u8], executable: bool| -> std::io::Result<()> { + let mode = if executable { 0o100755 } else { 0o100644 }; + let builder = NewcBuilder::new(path).mode(mode).uid(0).gid(0); + let mut writer = builder.write(buf, data.len() as u32); + writer.write_all(data)?; + writer.finish()?; + Ok(()) + }; + + let write_dir = |buf: &mut Vec, path: &str| -> std::io::Result<()> { + NewcBuilder::new(path) + .mode(0o040755) + .uid(0) + .gid(0) + .write(buf, 0) + .finish()?; + Ok(()) + }; + + write_dir(&mut buf, "usr/lib/bcvk")?; + write_entry( + &mut buf, + "usr/lib/bcvk/setup-ssh.sh", + script.as_bytes(), + true, + )?; + write_entry( + &mut buf, + "usr/lib/systemd/system/bcvk-ssh-setup.service", + service.as_bytes(), + false, + )?; + write_entry( + &mut buf, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-ssh-setup.conf", + dropin.as_bytes(), + false, + )?; + cpio::newc::trailer(&mut buf).map_err(|e| eyre!("cpio trailer: {e}"))?; + Ok(buf) +} + +// --- vfkit kernel decompression --- + +fn extract_uncompressed_kernel(vmlinuz_path: &Path, output_path: &Path) -> Result<()> { + let data = fs::read(vmlinuz_path)?; + + // Parse zboot header: offset 0x08 = payload_offset (le32), 0x0c = payload_size (le32) + let (pos, payload_end) = if data.len() >= 16 && &data[4..8] == b"zimg" { + let payload_offset = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize; + let payload_size = u32::from_le_bytes(data[12..16].try_into().unwrap()) as usize; + if payload_offset + payload_size > data.len() { + bail!("zboot payload extends beyond file"); + } + info!( + "zboot header: payload at 0x{:x}, size 0x{:x}", + payload_offset, payload_size + ); + (payload_offset, payload_offset + payload_size) + } else { + let magic = [0x28u8, 0xb5, 0x2f, 0xfd]; + let p = data + .windows(4) + .position(|w| w == magic) + .ok_or_else(|| eyre!("zstd magic not found in vmlinuz"))?; + info!("zstd payload at offset 0x{:x} (no zboot header)", p); + (p, data.len()) + }; + + let mut kernel = Vec::new(); + zstd::stream::copy_decode(&data[pos..payload_end], &mut kernel) + .context("decompressing zstd payload from vmlinuz")?; + + if kernel.len() < 0x3c || &kernel[0x38..0x3c] != b"ARMd" { + bail!("decompressed kernel is not a valid ARM64 Image"); + } + fs::write(output_path, &kernel)?; + info!("decompressed kernel: {} bytes (ARM64 Image)", kernel.len()); + Ok(()) +} + +// --- Shared helpers (pub for vfkit/ module) --- + +fn detect_machine_name() -> Result { + let output = Command::new("podman") + .args(["machine", "info", "--format", "{{.Host.CurrentMachine}}"]) + .output()?; + let name = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if name.is_empty() { + bail!("no podman machine is running"); + } + Ok(name) +} + +fn ensure_image_and_get_digest(image: &str) -> Result { + let status = Command::new("podman") + .args(["image", "exists", image]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status()?; + if !status.success() { + info!("pulling image {}...", image); + if !Command::new("podman") + .args(["pull", image]) + .status()? + .success() + { + bail!("failed to pull image: {}", image); + } + } + let output = Command::new("podman") + .args(["image", "inspect", "--format", "{{.Digest}}", image]) + .output()?; + let digest = String::from_utf8_lossy(&output.stdout).trim().to_string(); + Ok(digest.trim_start_matches("sha256:").to_string()) +} + +fn extract_kernel(machine: &str, image: &str, boot_dir: &Path) -> Result<()> { + let boot_dir_str = boot_dir.to_string_lossy(); + let script = format!( + "KVER=$(podman run --rm {image} ls /usr/lib/modules/ | head -1) && \ + [ -n \"$KVER\" ] && \ + podman run --rm {image} cat /usr/lib/modules/$KVER/vmlinuz > {boot}/vmlinuz && \ + podman run --rm {image} cat /usr/lib/modules/$KVER/initramfs.img > {boot}/initramfs.img", + image = image, + boot = boot_dir_str + ); + let output = Command::new("podman") + .args(["machine", "ssh", machine, &script]) + .output() + .context("extracting kernel from container image")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!( + "No kernel found in image '{}'.\n\ + Checked: /usr/lib/modules//vmlinuz + initramfs.img\n\ + This image may not be a bootable container (bootc) image.\n\ + {}", + image, + stderr.trim() + ); + } + Ok(()) +} + +fn is_machine_rootful(machine: &str) -> bool { + Command::new("podman") + .args(["machine", "ssh", machine, "id", "-u"]) + .output() + .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "0") + .unwrap_or(false) +} + +fn create_squashfs_image( + machine: &str, + rootful: bool, + image: &str, + output_path: &str, +) -> Result<()> { + let script = if rootful { + format!( + "MERGED=$(podman image mount {}) && \ + mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet", + image, output_path + ) + } else { + info!("rootless mode: using podman unshare for SquashFS creation"); + format!( + "podman unshare sh -c 'MERGED=$(podman image mount {}) && \ + mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet'", + image, output_path + ) + }; + + let output = Command::new("podman") + .args(["machine", "ssh", machine, &script]) + .output() + .context("running mksquashfs")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("mksquashfs failed: {}", stderr.trim()); + } + Ok(()) +} + +/// Clear extended attributes from a file. +/// +/// Apple Virtualization.framework rejects disk images with xattrs like +/// `security.selinux` or `user.containers.override_stat` that are added +/// by podman/buildah when creating images inside containers. +pub fn clear_xattr(path: &Path) { + let _ = Command::new("xattr") + .args(["-c", &path.to_string_lossy()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); +} + +/// Find the vfkit binary, checking PATH and Podman PKG location. +pub fn find_vfkit() -> Result { + if let Ok(path) = which::which("vfkit") { + return Ok(path.to_string_lossy().to_string()); + } + let podman_path = "/opt/podman/bin/vfkit"; + if Path::new(podman_path).exists() { + return Ok(podman_path.to_string()); + } + bail!("vfkit not found. Install: brew install vfkit") +} + +/// Fixed MAC address matching gvproxy's DHCP static lease for 192.168.127.2. +const GVPROXY_STATIC_MAC: [u8; 6] = [0x5a, 0x94, 0xef, 0xe4, 0x0c, 0xee]; + +/// Generate the fixed MAC address for gvproxy DHCP static lease. +pub fn generate_mac() -> [u8; 6] { + GVPROXY_STATIC_MAC +} + +/// Find the gvproxy binary, checking PATH and Podman installation paths. +fn find_gvproxy() -> Result { + if let Ok(path) = which::which("gvproxy") { + return Ok(path.to_string_lossy().to_string()); + } + for candidate in [ + "/opt/homebrew/opt/podman/libexec/podman/gvproxy", + "/opt/podman/bin/gvproxy", + ] { + if Path::new(candidate).exists() { + return Ok(candidate.to_string()); + } + } + bail!("gvproxy not found. Ensure Podman is installed (brew install podman)") +} + +/// Start a gvproxy instance with the given socket paths. +pub fn start_gvproxy(gvproxy_sock: &str, services_sock: &str) -> Result { + let gvproxy_bin = find_gvproxy()?; + let _ = fs::remove_file(gvproxy_sock); + let _ = fs::remove_file(services_sock); + let child = Command::new(&gvproxy_bin) + .args([ + "-listen-vfkit", + &format!("unixgram://{}", gvproxy_sock), + "-ssh-port", + "-1", + "-services", + &format!("unix://{}", services_sock), + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .context("failed to start gvproxy. Ensure gvproxy is installed (included in Podman)")?; + for _ in 0..50 { + if Path::new(gvproxy_sock).exists() { + break; + } + std::thread::sleep(Duration::from_millis(100)); + } + if !Path::new(gvproxy_sock).exists() { + bail!("gvproxy socket did not appear"); + } + Ok(child) +} + +/// Expose SSH port forwarding via gvproxy's HTTP API. +pub fn expose_ssh_port(services_sock: &str, vm_ip: &str, host_port: u16) -> Result<()> { + let body = format!( + r#"{{"local":":{}","remote":"{}:22","protocol":"tcp"}}"#, + host_port, vm_ip + ); + let mut stream = UnixStream::connect(services_sock)?; + let request = format!( + "POST /services/forwarder/expose HTTP/1.1\r\nHost: unix\r\n\ + Content-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + body.len(), + body + ); + std::io::Write::write_all(&mut stream, request.as_bytes())?; + std::io::Write::flush(&mut stream)?; + let mut response = vec![0u8; 1024]; + let _ = std::io::Read::read(&mut stream, &mut response); + let response_str = String::from_utf8_lossy(&response); + if !response_str.contains("200") { + bail!( + "gvproxy expose failed: {}", + response_str.trim_end_matches('\0') + ); + } + Ok(()) +} + +const SSH_TIMEOUT: Duration = Duration::from_secs(240); + +/// Find an available TCP port for SSH forwarding in range 2222-3000. +pub fn find_available_ssh_port() -> u16 { + use rand::Rng; + let mut rng = rand::rng(); + const PORT_RANGE_START: u16 = 2222; + const PORT_RANGE_END: u16 = 3000; + for _ in 0..100 { + let port = rng.random_range(PORT_RANGE_START..PORT_RANGE_END); + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + for port in PORT_RANGE_START..PORT_RANGE_END { + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + PORT_RANGE_START +} + +/// Wait for SSH connectivity with exponential backoff (240s timeout). +pub fn wait_for_ssh(port: u16, key_path: &Path, user: &str) -> Result<()> { + use crate::ssh_options::CommonSshOptions; + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + info!("waiting for SSH on port {} ({}@localhost)...", port, user); + let start = std::time::Instant::now(); + let mut attempt = 0u32; + loop { + if start.elapsed() > SSH_TIMEOUT { + bail!("SSH connection timeout ({}s)", SSH_TIMEOUT.as_secs()); + } + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-o", "BatchMode=yes", &user_host, "true"]); + let status = cmd.stdout(Stdio::null()).stderr(Stdio::null()).status(); + if let Ok(s) = status { + if s.success() { + info!("SSH connected after {}s", start.elapsed().as_secs()); + return Ok(()); + } + } + let backoff = if attempt < 2 { + 500 + } else if attempt < 4 { + 1000 + } else { + 2000 + }; + std::thread::sleep(Duration::from_millis(backoff)); + attempt += 1; + } +} + +/// Execute a command via SSH and return the exit status. +pub fn run_ssh_command( + port: u16, + key_path: &Path, + user: &str, + command: &str, +) -> Result { + use crate::ssh_options::CommonSshOptions; + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-o", "BatchMode=yes", &user_host, command]); + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .status() + .map_err(|e| eyre!("ssh failed: {}", e)) +} + +/// Start an interactive SSH session with TTY allocation. +pub fn run_ssh_interactive( + port: u16, + key_path: &Path, + user: &str, +) -> Result { + use crate::ssh_options::CommonSshOptions; + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-t", &user_host]); + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .status() + .map_err(|e| eyre!("ssh failed: {}", e)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_memory_to_mb() { + let cases = [ + ("4G", 4096), + ("4g", 4096), + ("2048M", 2048), + ("2048m", 2048), + ("512", 512), + ("1G", 1024), + ]; + for (input, expected) in &cases { + assert_eq!( + parse_memory_to_mb(input).unwrap(), + *expected, + "parse_memory_to_mb({:?})", + input + ); + } + } + + #[test] + fn test_parse_memory_to_mb_errors() { + assert!(parse_memory_to_mb("").is_err()); + assert!(parse_memory_to_mb("abc").is_err()); + } + + #[test] + fn test_generate_mac() { + let mac = generate_mac(); + assert_eq!(mac, GVPROXY_STATIC_MAC); + } + + #[test] + fn test_default_vcpus() { + let vcpus = default_vcpus(); + assert!(vcpus >= 1); + assert_eq!( + vcpus, + std::thread::available_parallelism() + .map(|n| n.get() as u32) + .unwrap_or(2) + ); + } + + #[test] + fn test_find_available_ssh_port() { + let port = find_available_ssh_port(); + assert!((2222..3000).contains(&port)); + assert!(std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()); + } + + #[test] + fn test_ephemeral_vm_metadata_roundtrip() { + let meta = EphemeralVmMetadata { + name: "test-vm".to_string(), + image: "quay.io/fedora/fedora-bootc:42".to_string(), + pid: 12345, + gvproxy_pid: 12346, + ssh_port: 2222, + ssh_key: "/tmp/test-key".to_string(), + serial_log: "/tmp/test-serial.log".to_string(), + log_path: Some("/tmp/test-vfkit.log".to_string()), + created: "2026-01-01T00:00:00Z".to_string(), + }; + let json = serde_json::to_string_pretty(&meta).unwrap(); + let loaded: EphemeralVmMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!(loaded.name, "test-vm"); + assert_eq!(loaded.image, "quay.io/fedora/fedora-bootc:42"); + assert_eq!(loaded.pid, 12345); + assert_eq!(loaded.ssh_port, 2222); + assert_eq!(loaded.log_path.as_deref(), Some("/tmp/test-vfkit.log")); + } + + #[test] + fn test_ephemeral_vm_metadata_save_load_remove() { + let dir = tempfile::tempdir().unwrap(); + let json_path = dir.path().join("roundtrip-vm.json"); + let meta = EphemeralVmMetadata { + name: "roundtrip-vm".to_string(), + image: "localhost/test:latest".to_string(), + pid: 999, + gvproxy_pid: 1000, + ssh_port: 2250, + ssh_key: "/tmp/key".to_string(), + serial_log: "/tmp/serial.log".to_string(), + log_path: None, + created: "2026-05-04T00:00:00Z".to_string(), + }; + fs::write(&json_path, serde_json::to_string_pretty(&meta).unwrap()).unwrap(); + let data = fs::read_to_string(&json_path).unwrap(); + let loaded: EphemeralVmMetadata = serde_json::from_str(&data).unwrap(); + assert_eq!(loaded.name, "roundtrip-vm"); + assert_eq!(loaded.ssh_port, 2250); + assert!(loaded.log_path.is_none()); + fs::remove_file(&json_path).unwrap(); + assert!(!json_path.exists()); + } + + #[test] + fn test_ephemeral_vm_metadata_list_all_from_dir() { + let dir = tempfile::tempdir().unwrap(); + for i in 0..3 { + let meta = EphemeralVmMetadata { + name: format!("vm-{i}"), + image: "test:latest".to_string(), + pid: 100 + i, + gvproxy_pid: 200 + i, + ssh_port: 2222 + (i as u16), + ssh_key: "/tmp/key".to_string(), + serial_log: "/tmp/serial.log".to_string(), + log_path: None, + created: "2026-01-01T00:00:00Z".to_string(), + }; + let path = dir.path().join(format!("vm-{i}.json")); + fs::write(&path, serde_json::to_string(&meta).unwrap()).unwrap(); + } + // Also write a non-json file that should be skipped + fs::write(dir.path().join("README.txt"), "not json").unwrap(); + + let mut vms = Vec::new(); + for entry in fs::read_dir(dir.path()).unwrap() { + let path = entry.unwrap().path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + assert_eq!(vms.len(), 3); + let mut names: Vec<_> = vms.iter().map(|v| v.name.clone()).collect(); + names.sort(); + assert_eq!(names, vec!["vm-0", "vm-1", "vm-2"]); + } +} diff --git a/crates/kit/src/ssh_options.rs b/crates/kit/src/ssh_options.rs new file mode 100644 index 000000000..8e26be324 --- /dev/null +++ b/crates/kit/src/ssh_options.rs @@ -0,0 +1,136 @@ +//! Cross-platform SSH option types shared between Linux and macOS backends. +//! +//! Extracted from ssh.rs to avoid pulling in Linux-only dependencies on macOS. + +/// Common SSH options that can be shared between different SSH implementations +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct CommonSshOptions { + /// Use strict host key checking + pub strict_host_keys: bool, + /// SSH connection timeout in seconds + pub connect_timeout: u32, + /// Server alive interval in seconds + pub server_alive_interval: u32, + /// SSH log level + pub log_level: String, + /// Additional SSH options as key-value pairs + pub extra_options: Vec<(String, String)>, +} + +impl Default for CommonSshOptions { + fn default() -> Self { + Self { + strict_host_keys: false, + connect_timeout: 1, + server_alive_interval: 60, + log_level: "ERROR".to_string(), + extra_options: vec![], + } + } +} + +impl CommonSshOptions { + /// Apply these options to an SSH command + #[allow(dead_code)] + pub fn apply_to_command(&self, cmd: &mut std::process::Command) { + cmd.args(["-o", "IdentitiesOnly=yes"]); + cmd.args(["-o", "PasswordAuthentication=no"]); + cmd.args(["-o", "KbdInteractiveAuthentication=no"]); + cmd.args(["-o", "GSSAPIAuthentication=no"]); + + cmd.args(["-o", &format!("ConnectTimeout={}", self.connect_timeout)]); + cmd.args([ + "-o", + &format!("ServerAliveInterval={}", self.server_alive_interval), + ]); + cmd.args(["-o", &format!("LogLevel={}", self.log_level)]); + + if !self.strict_host_keys { + cmd.args(["-o", "StrictHostKeyChecking=no"]); + cmd.args(["-o", "UserKnownHostsFile=/dev/null"]); + } + + for (key, value) in &self.extra_options { + cmd.args(["-o", &format!("{}={}", key, value)]); + } + } +} + +/// SSH connection configuration options +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct SshConnectionOptions { + /// Common SSH options shared across implementations + pub common: CommonSshOptions, + /// Enable/disable TTY allocation (default: true) + pub allocate_tty: bool, + /// Suppress output to stdout/stderr (default: false) + pub suppress_output: bool, +} + +impl Default for SshConnectionOptions { + fn default() -> Self { + Self { + common: CommonSshOptions::default(), + allocate_tty: true, + suppress_output: false, + } + } +} + +impl SshConnectionOptions { + /// Create options suitable for quick connectivity tests (short timeout, no TTY) + #[allow(dead_code)] + pub fn for_connectivity_test() -> Self { + Self { + common: CommonSshOptions { + strict_host_keys: false, + connect_timeout: 2, + server_alive_interval: 60, + log_level: "ERROR".to_string(), + extra_options: vec![], + }, + allocate_tty: false, + suppress_output: true, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_common_ssh_options_default() { + let opts = CommonSshOptions::default(); + assert!(!opts.strict_host_keys); + assert_eq!(opts.connect_timeout, 1); + assert_eq!(opts.server_alive_interval, 60); + assert_eq!(opts.log_level, "ERROR"); + assert!(opts.extra_options.is_empty()); + } + + #[test] + fn test_connectivity_test_options() { + let opts = SshConnectionOptions::for_connectivity_test(); + assert_eq!(opts.common.connect_timeout, 2); + assert!(!opts.allocate_tty); + assert!(opts.suppress_output); + } + + #[test] + fn test_apply_to_command() { + let opts = CommonSshOptions::default(); + let mut cmd = std::process::Command::new("ssh"); + opts.apply_to_command(&mut cmd); + let args: Vec<_> = cmd + .get_args() + .map(|a| a.to_string_lossy().to_string()) + .collect(); + assert!(args.contains(&"IdentitiesOnly=yes".to_string())); + assert!(args.contains(&"PasswordAuthentication=no".to_string())); + assert!(args.contains(&"StrictHostKeyChecking=no".to_string())); + assert!(args.contains(&"ConnectTimeout=1".to_string())); + } +} diff --git a/crates/kit/src/vfkit/inspect.rs b/crates/kit/src/vfkit/inspect.rs new file mode 100644 index 000000000..67a506d7c --- /dev/null +++ b/crates/kit/src/vfkit/inspect.rs @@ -0,0 +1,62 @@ +//! vm inspect — Show detailed VM information. + +use super::VmMetadata; +use color_eyre::Result; + +/// Display detailed metadata for the named VM. +pub fn run(name: &str, json: bool) -> Result<()> { + let meta = VmMetadata::load(name)?; + + if json { + println!("{}", serde_json::to_string_pretty(&meta)?); + return Ok(()); + } + + let state = if meta.is_alive() { + "running" + } else { + "stopped" + }; + + println!("Name: {}", meta.name); + println!("State: {}", state); + println!("Disk: {}", meta.disk_image); + println!("CPUs: {}", meta.cpus); + println!("Memory: {} MiB", meta.memory); + println!("GUI: {}", meta.gui); + println!("Created: {}", meta.created); + println!(); + println!("Processes:"); + if meta.vfkit_pid > 0 { + println!( + " vfkit: PID {} ({})", + meta.vfkit_pid, + if meta.is_alive() { + "running" + } else { + "stopped" + } + ); + } + if meta.gvproxy_pid > 0 { + println!(" gvproxy: PID {}", meta.gvproxy_pid); + } + println!(); + println!("SSH:"); + println!(" Port: {}", meta.ssh_port); + println!(" User: {}", meta.ssh_user); + println!(" Key: {}", meta.ssh_key); + if state == "running" { + println!(); + println!( + " ssh -p {} -i {} {}@localhost", + meta.ssh_port, meta.ssh_key, meta.ssh_user + ); + } + println!(); + println!("Files:"); + println!(" EFI store: {}", meta.efi_store); + println!(" Serial log: {}", meta.serial_log); + + Ok(()) +} diff --git a/crates/kit/src/vfkit/list.rs b/crates/kit/src/vfkit/list.rs new file mode 100644 index 000000000..bdda3f295 --- /dev/null +++ b/crates/kit/src/vfkit/list.rs @@ -0,0 +1,29 @@ +//! vm list — List all persistent VMs. + +use super::VmMetadata; +use color_eyre::Result; + +/// List all persistent VMs, optionally as JSON. +pub fn run(json: bool) -> Result<()> { + let vms = VmMetadata::list_all()?; + + if json { + println!("{}", serde_json::to_string_pretty(&vms)?); + return Ok(()); + } + + if vms.is_empty() { + println!("No VMs found."); + return Ok(()); + } + + println!("{:<20} {:<10} {:<30} SSH", "NAME", "STATE", "DISK"); + for vm in &vms { + let state = if vm.is_alive() { "running" } else { "stopped" }; + println!( + "{:<20} {:<10} {:<30} ssh -p {} -i {} {}@localhost", + vm.name, state, vm.disk_image, vm.ssh_port, vm.ssh_key, vm.ssh_user + ); + } + Ok(()) +} diff --git a/crates/kit/src/vfkit/mod.rs b/crates/kit/src/vfkit/mod.rs new file mode 100644 index 000000000..62939254a --- /dev/null +++ b/crates/kit/src/vfkit/mod.rs @@ -0,0 +1,271 @@ +//! Persistent VM management for macOS using vfkit + EFI boot. +//! +//! Subcommands mirror the Linux libvirt/ module structure: +//! run, list, ssh, stop, start, rm, rm-all, inspect + +use std::fs; +use std::path::PathBuf; +use std::process::{Command, Stdio}; + +use clap::Subcommand; +use color_eyre::Result; + +pub mod inspect; +pub mod list; +pub mod rm; +pub mod rm_all; +pub mod run; +pub mod ssh; +pub mod start; +pub mod stop; + +/// Subcommands for persistent VM management via vfkit. +#[derive(Debug, Subcommand)] +pub enum VmCommands { + /// Run a persistent VM from a disk image + Run(run::VmRunOpts), + + /// List all persistent VMs + #[clap(name = "list", alias = "ls")] + List { + /// Output in JSON format + #[clap(long)] + json: bool, + }, + + /// SSH into a running VM + Ssh(ssh::VmSshOpts), + + /// Stop a running VM + Stop { + /// VM name + name: String, + }, + + /// Start a stopped VM + Start(start::VmStartOpts), + + /// Remove a VM and its metadata + #[clap(name = "rm")] + Remove(rm::VmRmOpts), + + /// Remove all VMs + #[clap(name = "rm-all")] + RemoveAll { + /// Force removal without confirmation + #[clap(short, long)] + force: bool, + }, + + /// Show detailed VM information + Inspect { + /// VM name + name: String, + /// Output in JSON format + #[clap(long)] + json: bool, + }, +} + +impl VmCommands { + /// Dispatch to the appropriate subcommand handler. + pub fn run(self) -> Result<()> { + match self { + VmCommands::Run(opts) => run::run(opts), + VmCommands::List { json } => list::run(json), + VmCommands::Ssh(opts) => ssh::run(opts), + VmCommands::Stop { name } => stop::run(&name), + VmCommands::Start(opts) => start::run(opts), + VmCommands::Remove(opts) => rm::run(opts), + VmCommands::RemoveAll { force } => rm_all::run(force), + VmCommands::Inspect { name, json } => inspect::run(&name, json), + } + } +} + +// --- VM Metadata --- + +/// Persistent VM metadata, stored as JSON in `~/.local/share/bcvk/vms/`. +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] +pub struct VmMetadata { + /// VM name used as identifier. + pub name: String, + /// Path to the disk image file. + pub disk_image: String, + /// PID of the vfkit process. + pub vfkit_pid: u32, + /// PID of the gvproxy network proxy process. + pub gvproxy_pid: u32, + /// Host-side SSH port forwarded to the VM. + pub ssh_port: u16, + /// Path to the SSH private key. + pub ssh_key: String, + /// SSH username for connecting to the VM. + pub ssh_user: String, + /// Number of vCPUs allocated. + pub cpus: u32, + /// Memory in megabytes. + pub memory: u32, + /// Path to the EFI variable store file. + pub efi_store: String, + /// Path to the serial console log file. + pub serial_log: String, + /// Whether GUI mode is enabled. + pub gui: bool, + /// ISO 8601 timestamp when the VM was created. + pub created: String, + /// Current VM state (running, stopped). + pub state: String, +} + +impl VmMetadata { + /// Return the directory path for persistent VM metadata files. + pub fn vms_dir() -> PathBuf { + dirs::home_dir() + .expect("cannot determine home directory") + .join(".local/share/bcvk/vms") + } + + /// Save metadata to a JSON file in the VMs directory. + pub fn save(&self) -> Result<()> { + let dir = Self::vms_dir(); + fs::create_dir_all(&dir)?; + let path = dir.join(format!("{}.json", self.name)); + fs::write(&path, serde_json::to_string_pretty(self)?)?; + Ok(()) + } + + /// Load metadata for the named VM from its JSON file. + pub fn load(name: &str) -> Result { + let path = Self::vms_dir().join(format!("{}.json", name)); + let data = fs::read_to_string(&path)?; + Ok(serde_json::from_str(&data)?) + } + + /// Remove metadata file for the named VM. + pub fn remove(name: &str) { + let path = Self::vms_dir().join(format!("{}.json", name)); + let _ = fs::remove_file(path); + } + + /// List all persistent VM metadata from the VMs directory. + pub fn list_all() -> Result> { + let dir = Self::vms_dir(); + if !dir.exists() { + return Ok(Vec::new()); + } + let mut vms = Vec::new(); + for entry in fs::read_dir(&dir)? { + let path = entry?.path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + Ok(vms) + } + + /// Check if the VM process is still alive via kill -0. + pub fn is_alive(&self) -> bool { + if self.vfkit_pid == 0 { + return false; + } + Command::new("kill") + .args(["-0", &self.vfkit_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_vm_metadata(name: &str) -> VmMetadata { + VmMetadata { + name: name.to_string(), + disk_image: "/tmp/disk.raw".to_string(), + vfkit_pid: 0, + gvproxy_pid: 0, + ssh_port: 2222, + ssh_key: "/tmp/key".to_string(), + ssh_user: "root".to_string(), + cpus: 2, + memory: 4096, + efi_store: "/tmp/efi.fd".to_string(), + serial_log: "/tmp/serial.log".to_string(), + gui: false, + created: "2026-01-01T00:00:00Z".to_string(), + state: "running".to_string(), + } + } + + #[test] + fn test_vm_metadata_roundtrip() { + let meta = sample_vm_metadata("test-vm"); + let json = serde_json::to_string_pretty(&meta).unwrap(); + let loaded: VmMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!(loaded.name, "test-vm"); + assert_eq!(loaded.disk_image, "/tmp/disk.raw"); + assert_eq!(loaded.cpus, 2); + assert_eq!(loaded.memory, 4096); + assert_eq!(loaded.ssh_user, "root"); + assert_eq!(loaded.state, "running"); + assert!(!loaded.gui); + } + + #[test] + fn test_vm_metadata_save_load_remove() { + let dir = tempfile::tempdir().unwrap(); + let json_path = dir.path().join("myvm.json"); + let meta = sample_vm_metadata("myvm"); + fs::write(&json_path, serde_json::to_string_pretty(&meta).unwrap()).unwrap(); + let data = fs::read_to_string(&json_path).unwrap(); + let loaded: VmMetadata = serde_json::from_str(&data).unwrap(); + assert_eq!(loaded.name, "myvm"); + assert_eq!(loaded.ssh_port, 2222); + fs::remove_file(&json_path).unwrap(); + assert!(!json_path.exists()); + } + + #[test] + fn test_vm_metadata_list_from_dir() { + let dir = tempfile::tempdir().unwrap(); + for i in 0..3 { + let meta = sample_vm_metadata(&format!("vm-{i}")); + let path = dir.path().join(format!("vm-{i}.json")); + fs::write(&path, serde_json::to_string(&meta).unwrap()).unwrap(); + } + fs::write(dir.path().join("notes.txt"), "ignored").unwrap(); + + let mut vms = Vec::new(); + for entry in fs::read_dir(dir.path()).unwrap() { + let path = entry.unwrap().path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + assert_eq!(vms.len(), 3); + let mut names: Vec<_> = vms.iter().map(|v| v.name.clone()).collect(); + names.sort(); + assert_eq!(names, vec!["vm-0", "vm-1", "vm-2"]); + } + + #[test] + fn test_vm_metadata_is_alive_zero_pid() { + let meta = sample_vm_metadata("dead-vm"); + assert!(!meta.is_alive()); + } +} diff --git a/crates/kit/src/vfkit/rm.rs b/crates/kit/src/vfkit/rm.rs new file mode 100644 index 000000000..ec48044e8 --- /dev/null +++ b/crates/kit/src/vfkit/rm.rs @@ -0,0 +1,59 @@ +//! vm rm — Remove a persistent VM and its metadata. + +use std::fs; + +use clap::Parser; +use color_eyre::Result; +use tracing::info; + +use super::VmMetadata; + +/// Options for `vm rm`. +#[derive(Parser, Debug)] +pub struct VmRmOpts { + /// VM name + pub name: String, + /// Force removal even if running + #[clap(short, long)] + pub force: bool, +} + +/// Remove a persistent VM, optionally force-killing it. +pub fn run(opts: VmRmOpts) -> Result<()> { + let meta = VmMetadata::load(&opts.name)?; + + if meta.is_alive() { + if !opts.force { + color_eyre::eyre::bail!( + "VM '{}' is running. Stop it first or use --force", + opts.name + ); + } + info!("force stopping VM '{}'...", opts.name); + crate::vfkit::stop::run(&opts.name)?; + } + + for path in [&meta.efi_store, &meta.serial_log] { + if !path.is_empty() { + if let Err(e) = fs::remove_file(path) { + if e.kind() != std::io::ErrorKind::NotFound { + tracing::debug!("failed to remove {}: {}", path, e); + } + } + } + } + + let vms_dir = VmMetadata::vms_dir(); + for suffix in ["-gvproxy.sock", "-gvproxy-svc.sock"] { + let p = vms_dir.join(format!("{}{}", meta.name, suffix)); + if let Err(e) = fs::remove_file(&p) { + if e.kind() != std::io::ErrorKind::NotFound { + tracing::debug!("failed to remove {}: {}", p.display(), e); + } + } + } + + VmMetadata::remove(&opts.name); + println!("Removed '{}'", opts.name); + Ok(()) +} diff --git a/crates/kit/src/vfkit/rm_all.rs b/crates/kit/src/vfkit/rm_all.rs new file mode 100644 index 000000000..2ed80df66 --- /dev/null +++ b/crates/kit/src/vfkit/rm_all.rs @@ -0,0 +1,44 @@ +//! vm rm-all — Remove all persistent VMs. + +use std::io::Write; + +use super::VmMetadata; +use color_eyre::Result; + +/// Remove all persistent VMs, prompting unless `force` is set. +pub fn run(force: bool) -> Result<()> { + let vms = VmMetadata::list_all()?; + if vms.is_empty() { + println!("No VMs found."); + return Ok(()); + } + + if !force { + println!("Found {} VM(s):", vms.len()); + for vm in &vms { + println!( + " {} ({})", + vm.name, + if vm.is_alive() { "running" } else { "stopped" } + ); + } + print!("Remove all VMs? [y/N]: "); + std::io::stdout().flush()?; + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + let input = input.trim().to_lowercase(); + if input != "y" && input != "yes" { + println!("Aborted."); + return Ok(()); + } + } + + for vm in &vms { + let opts = super::rm::VmRmOpts { + name: vm.name.clone(), + force: true, + }; + super::rm::run(opts)?; + } + Ok(()) +} diff --git a/crates/kit/src/vfkit/run.rs b/crates/kit/src/vfkit/run.rs new file mode 100644 index 000000000..389aa0ca7 --- /dev/null +++ b/crates/kit/src/vfkit/run.rs @@ -0,0 +1,188 @@ +//! vm run — Start a persistent VM from a disk image using vfkit + EFI boot. + +use std::fs; +use std::path::Path; +use std::process::{Command, Stdio}; + +use clap::Parser; +use color_eyre::{eyre::bail, Result}; +use tracing::info; + +use super::VmMetadata; +use crate::run_ephemeral_macos::{ + clear_xattr, expose_ssh_port, find_available_ssh_port, find_vfkit, generate_mac, start_gvproxy, + wait_for_ssh, +}; + +/// Options for `vm run`. +#[derive(Parser, Debug)] +pub struct VmRunOpts { + /// Disk image path (.raw) + pub disk: String, + /// VM name for identification + #[clap(long)] + pub name: Option, + /// Number of vCPUs + #[clap(long)] + pub vcpus: Option, + /// Memory size (e.g. "4G", "2048M", or plain number for MB) + #[clap(long, default_value = "4G")] + pub memory: String, + /// Path to an existing SSH private key + #[clap(long)] + pub ssh_key: Option, + /// SSH username (default: root) + #[clap(long, default_value = "root")] + pub ssh_user: String, + /// SSH port (default: auto-allocate) + #[clap(long)] + pub ssh_port: Option, + /// Display VM console in GUI window + #[clap(long)] + pub gui: bool, +} + +/// Create and launch a persistent VM from a disk image via vfkit + EFI. +pub fn run(opts: VmRunOpts) -> Result<()> { + let vfkit_bin = find_vfkit()?; + + if !Path::new(&opts.disk).exists() { + bail!("disk image not found: {}", opts.disk); + } + clear_xattr(Path::new(&opts.disk)); + + let ssh_key_path = match &opts.ssh_key { + Some(p) => p.clone(), + None => find_ssh_key()?, + }; + if !Path::new(&ssh_key_path).exists() { + bail!( + "SSH key not found: {}. Specify with --ssh-key", + ssh_key_path + ); + } + + let vm_name = opts.name.clone().unwrap_or_else(|| { + Path::new(&opts.disk) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("vm") + .to_string() + }); + + let vms_dir = VmMetadata::vms_dir(); + fs::create_dir_all(&vms_dir)?; + + let efi_store = vms_dir.join(format!("{}-efi-vars", vm_name)); + let serial_log = vms_dir.join(format!("{}-serial.log", vm_name)); + let gvproxy_sock = vms_dir.join(format!("{}-gvproxy.sock", vm_name)); + let services_sock = vms_dir.join(format!("{}-gvproxy-svc.sock", vm_name)); + + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + + info!("starting gvproxy..."); + let gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + + let mac = generate_mac(); + let mac_str = format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] + ); + + let vcpus = opts.vcpus.unwrap_or(2); + let memory_mb = crate::run_ephemeral_macos::parse_memory_to_mb(&opts.memory)?; + + let mut vfkit_args = vec![ + "--cpus".to_string(), + vcpus.to_string(), + "--memory".to_string(), + memory_mb.to_string(), + "--bootloader".to_string(), + format!("efi,variable-store={},create", efi_store.display()), + "--device".to_string(), + format!("virtio-blk,path={}", opts.disk), + "--device".to_string(), + format!( + "virtio-net,unixSocketPath={},mac={}", + gvproxy_sock_str, mac_str + ), + "--device".to_string(), + format!("virtio-serial,logFilePath={}", serial_log.display()), + "--device".to_string(), + "virtio-rng".to_string(), + ]; + if opts.gui { + vfkit_args.push("--gui".to_string()); + } + + info!("launching vfkit (EFI boot)..."); + let vfkit_child = Command::new(&vfkit_bin) + .args(&vfkit_args) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + + let ssh_port = opts.ssh_port.unwrap_or_else(find_available_ssh_port); + info!("SSH port: {}", ssh_port); + + info!("setting up SSH port forwarding..."); + for attempt in 0..15u32 { + match expose_ssh_port(&services_sock_str, "192.168.127.2", ssh_port) { + Ok(_) => { + info!("SSH port {} forwarded", ssh_port); + break; + } + Err(e) if attempt < 14 => { + tracing::debug!("SSH port forward attempt {}: {}", attempt, e); + let backoff = 200 * 2u64.pow(attempt.min(4)); + std::thread::sleep(std::time::Duration::from_millis(backoff)); + } + Err(e) => bail!("SSH port forward failed: {}", e), + } + } + + let key_path = std::path::Path::new(&ssh_key_path); + wait_for_ssh(ssh_port, key_path, &opts.ssh_user)?; + + let metadata = VmMetadata { + name: vm_name.clone(), + disk_image: opts.disk.clone(), + vfkit_pid: vfkit_child.id(), + gvproxy_pid: gvproxy_child.id(), + ssh_port, + ssh_key: ssh_key_path.clone(), + ssh_user: opts.ssh_user.clone(), + cpus: vcpus, + memory: memory_mb, + efi_store: efi_store.to_string_lossy().to_string(), + serial_log: serial_log.to_string_lossy().to_string(), + gui: opts.gui, + created: chrono::Utc::now().to_rfc3339(), + state: "running".to_string(), + }; + metadata.save()?; + + println!("VM '{}' is running", vm_name); + println!( + " ssh -p {} -i {} {}@localhost", + ssh_port, ssh_key_path, opts.ssh_user + ); + println!(); + println!("To connect: bcvk vm ssh {}", vm_name); + println!("To stop: bcvk vm stop {}", vm_name); + + Ok(()) +} + +fn find_ssh_key() -> Result { + let home = dirs::home_dir() + .ok_or_else(|| color_eyre::eyre::eyre!("cannot determine home directory"))?; + for name in &["id_ed25519", "id_rsa"] { + let path = home.join(".ssh").join(name); + if path.exists() { + return Ok(path.to_string_lossy().to_string()); + } + } + bail!("no SSH key found in ~/.ssh/. Generate with: ssh-keygen -t ed25519") +} diff --git a/crates/kit/src/vfkit/ssh.rs b/crates/kit/src/vfkit/ssh.rs new file mode 100644 index 000000000..74af46736 --- /dev/null +++ b/crates/kit/src/vfkit/ssh.rs @@ -0,0 +1,24 @@ +//! vm ssh — SSH into a running persistent VM. + +use super::VmMetadata; +use crate::run_ephemeral_macos::run_ssh_interactive; +use clap::Parser; +use color_eyre::{eyre::bail, Result}; + +/// Options for `vm ssh`. +#[derive(Parser, Debug)] +pub struct VmSshOpts { + /// VM name + pub name: String, +} + +/// Open an interactive SSH session to a running persistent VM. +pub fn run(opts: VmSshOpts) -> Result<()> { + let vm = VmMetadata::load(&opts.name)?; + if !vm.is_alive() { + bail!("VM '{}' is not running", opts.name); + } + let key_path = std::path::Path::new(&vm.ssh_key); + run_ssh_interactive(vm.ssh_port, key_path, &vm.ssh_user)?; + Ok(()) +} diff --git a/crates/kit/src/vfkit/start.rs b/crates/kit/src/vfkit/start.rs new file mode 100644 index 000000000..f2f2a48f3 --- /dev/null +++ b/crates/kit/src/vfkit/start.rs @@ -0,0 +1,115 @@ +//! vm start — Restart a stopped persistent VM. + +use std::process::{Command, Stdio}; + +use clap::Parser; +use color_eyre::{eyre::bail, Result}; +use tracing::info; + +use super::VmMetadata; +use crate::run_ephemeral_macos::{ + clear_xattr, expose_ssh_port, find_vfkit, generate_mac, start_gvproxy, wait_for_ssh, +}; + +/// Options for `vm start`. +#[derive(Parser, Debug)] +pub struct VmStartOpts { + /// VM name + pub name: String, + /// Display VM console in GUI window + #[clap(long)] + pub gui: bool, +} + +/// Restart a stopped persistent VM by re-launching vfkit. +pub fn run(opts: VmStartOpts) -> Result<()> { + let mut meta = VmMetadata::load(&opts.name)?; + if meta.is_alive() { + bail!("VM '{}' is already running", opts.name); + } + + if !std::path::Path::new(&meta.disk_image).exists() { + bail!("disk image not found: {}", meta.disk_image); + } + clear_xattr(std::path::Path::new(&meta.disk_image)); + + let vfkit_bin = find_vfkit()?; + let vms_dir = VmMetadata::vms_dir(); + + let gvproxy_sock = vms_dir.join(format!("{}-gvproxy.sock", meta.name)); + let services_sock = vms_dir.join(format!("{}-gvproxy-svc.sock", meta.name)); + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + + info!("starting gvproxy..."); + let gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + + let mac = generate_mac(); + let mac_str = format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] + ); + + let gui = opts.gui || meta.gui; + let mut vfkit_args = vec![ + "--cpus".to_string(), + meta.cpus.to_string(), + "--memory".to_string(), + meta.memory.to_string(), + "--bootloader".to_string(), + format!("efi,variable-store={},create", meta.efi_store), + "--device".to_string(), + format!("virtio-blk,path={}", meta.disk_image), + "--device".to_string(), + format!( + "virtio-net,unixSocketPath={},mac={}", + gvproxy_sock_str, mac_str + ), + "--device".to_string(), + format!("virtio-serial,logFilePath={}", meta.serial_log), + "--device".to_string(), + "virtio-rng".to_string(), + ]; + if gui { + vfkit_args.push("--gui".to_string()); + } + + info!("launching vfkit (EFI boot)..."); + let vfkit_child = Command::new(&vfkit_bin) + .args(&vfkit_args) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + + info!("setting up SSH port forwarding..."); + for attempt in 0..15u32 { + match expose_ssh_port(&services_sock_str, "192.168.127.2", meta.ssh_port) { + Ok(_) => { + info!("SSH port {} forwarded", meta.ssh_port); + break; + } + Err(e) if attempt < 14 => { + tracing::debug!("SSH port forward attempt {}: {}", attempt, e); + let backoff = 200 * 2u64.pow(attempt.min(4)); + std::thread::sleep(std::time::Duration::from_millis(backoff)); + } + Err(e) => bail!("SSH port forward failed: {}", e), + } + } + + let key_path = std::path::Path::new(&meta.ssh_key); + wait_for_ssh(meta.ssh_port, key_path, &meta.ssh_user)?; + + meta.vfkit_pid = vfkit_child.id(); + meta.gvproxy_pid = gvproxy_child.id(); + meta.state = "running".to_string(); + meta.gui = gui; + meta.save()?; + + println!("Started '{}'", meta.name); + println!( + " ssh -p {} -i {} {}@localhost", + meta.ssh_port, meta.ssh_key, meta.ssh_user + ); + Ok(()) +} diff --git a/crates/kit/src/vfkit/stop.rs b/crates/kit/src/vfkit/stop.rs new file mode 100644 index 000000000..24ea6ceba --- /dev/null +++ b/crates/kit/src/vfkit/stop.rs @@ -0,0 +1,63 @@ +//! vm stop — Stop a running persistent VM. + +use std::process::{Command, Stdio}; +use std::time::Duration; + +use super::VmMetadata; +use color_eyre::{eyre::bail, Result}; +use tracing::info; + +/// Stop a running persistent VM by sending SIGTERM to vfkit. +pub fn run(name: &str) -> Result<()> { + let mut meta = VmMetadata::load(name)?; + if !meta.is_alive() { + bail!("VM '{}' is not running", name); + } + + info!("stopping VM '{}'...", name); + + if meta.vfkit_pid > 0 { + if let Err(e) = Command::new("kill") + .args(["-TERM", &meta.vfkit_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::debug!("failed to SIGTERM vfkit (PID {}): {}", meta.vfkit_pid, e); + } + std::thread::sleep(Duration::from_secs(3)); + if meta.is_alive() { + if let Err(e) = Command::new("kill") + .args(["-KILL", &meta.vfkit_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::debug!("failed to SIGKILL vfkit (PID {}): {}", meta.vfkit_pid, e); + } + } + } + + if meta.gvproxy_pid > 0 { + if let Err(e) = Command::new("kill") + .args(["-KILL", &meta.gvproxy_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::debug!( + "failed to SIGKILL gvproxy (PID {}): {}", + meta.gvproxy_pid, + e + ); + } + } + + meta.state = "stopped".to_string(); + meta.vfkit_pid = 0; + meta.gvproxy_pid = 0; + meta.save()?; + + println!("Stopped '{}'", name); + Ok(()) +} From b1c257347abeacde5f3ffb6ca3cbe16df3f331fe Mon Sep 17 00:00:00 2001 From: Shion Tanaka Date: Tue, 12 May 2026 01:10:31 +0900 Subject: [PATCH 2/3] macOS: add vfkit backend for ephemeral and persistent VMs macOS has no KVM/QEMU, so this adds vfkit as the VM backend. Ephemeral VMs use a custom nbdkit EROFS plugin that dynamically generates rootfs, ESP, and GPT from the container overlay via NBD. Persistent VMs use EFI boot. The vfkit/ module mirrors the libvirt/ directory structure, and CLI options match Linux where applicable. Plugin distribution method is TBD. Build and run on macOS: cargo build --release codesign -fs - target/release/bcvk Tested on macOS (Apple Silicon) with rootful and rootless podman machine. Assisted-by: Claude Code (Opus 4.6) Signed-off-by: Shion Tanaka --- Cargo.lock | 17 + crates/kit/Cargo.toml | 1 + crates/kit/src/ephemeral_macos.rs | 54 +- crates/kit/src/lib.rs | 2 + crates/kit/src/main.rs | 2 + crates/kit/src/nbdkit_macos.rs | 186 +++++++ crates/kit/src/run_ephemeral_macos.rs | 478 ++++++----------- crates/kit/src/vfkit/mod.rs | 14 +- crates/kit/src/vfkit/stop.rs | 26 +- crates/nbdkit-erofs-plugin/Cargo.lock | 39 ++ crates/nbdkit-erofs-plugin/Cargo.toml | 13 + crates/nbdkit-erofs-plugin/src/dir_walk.rs | 138 +++++ crates/nbdkit-erofs-plugin/src/erofs.rs | 502 ++++++++++++++++++ crates/nbdkit-erofs-plugin/src/fat32.rs | 548 ++++++++++++++++++++ crates/nbdkit-erofs-plugin/src/gpt.rs | 290 +++++++++++ crates/nbdkit-erofs-plugin/src/initramfs.rs | 182 +++++++ crates/nbdkit-erofs-plugin/src/lib.rs | 389 ++++++++++++++ crates/nbdkit-erofs-plugin/src/regions.rs | 80 +++ 18 files changed, 2595 insertions(+), 366 deletions(-) create mode 100644 crates/kit/src/nbdkit_macos.rs create mode 100644 crates/nbdkit-erofs-plugin/Cargo.lock create mode 100644 crates/nbdkit-erofs-plugin/Cargo.toml create mode 100644 crates/nbdkit-erofs-plugin/src/dir_walk.rs create mode 100644 crates/nbdkit-erofs-plugin/src/erofs.rs create mode 100644 crates/nbdkit-erofs-plugin/src/fat32.rs create mode 100644 crates/nbdkit-erofs-plugin/src/gpt.rs create mode 100644 crates/nbdkit-erofs-plugin/src/initramfs.rs create mode 100644 crates/nbdkit-erofs-plugin/src/lib.rs create mode 100644 crates/nbdkit-erofs-plugin/src/regions.rs diff --git a/Cargo.lock b/Cargo.lock index f18fec2bf..27f2d278d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -632,6 +632,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -1833,6 +1842,14 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nbdkit-erofs-plugin" +version = "0.1.0" +dependencies = [ + "crc32fast", + "libc", +] + [[package]] name = "newtype-uuid" version = "1.3.2" diff --git a/crates/kit/Cargo.toml b/crates/kit/Cargo.toml index 399f0764f..034663be1 100644 --- a/crates/kit/Cargo.toml +++ b/crates/kit/Cargo.toml @@ -60,6 +60,7 @@ libsystemd = "0.7" # macOS-only dependencies (vfkit backend) [target.'cfg(target_os = "macos")'.dependencies] +rustix = { version = "1", features = ["process"] } zstd = "0.13" [dev-dependencies] diff --git a/crates/kit/src/ephemeral_macos.rs b/crates/kit/src/ephemeral_macos.rs index ca3255247..8d46075f4 100644 --- a/crates/kit/src/ephemeral_macos.rs +++ b/crates/kit/src/ephemeral_macos.rs @@ -137,28 +137,55 @@ fn cmd_rm_all(force: bool) -> Result<()> { for vm in &vms { if vm.is_alive() { - if let Err(e) = Command::new("kill") - .args([&vm.pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(vm.pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill VM process {}: {}", vm.pid, e); } if vm.gvproxy_pid > 0 { - if let Err(e) = Command::new("kill") - .args([&vm.gvproxy_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(vm.gvproxy_pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill gvproxy {}: {}", vm.gvproxy_pid, e); } } } + if let Some(ref container) = vm.nbd_container { + crate::nbdkit_macos::stop_nbdkit_container(container); + } EphemeralVmMetadata::remove(&vm.name); println!("Removed {}", vm.name); } + + // Sweep orphaned resources inside podman machine + if let Ok(machine) = run_ephemeral_macos::detect_machine_name() { + // Remove orphaned nbdkit containers + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "rm", + "-f", + "--filter", + "name=bcvk-nbd-", + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + // Unmount any remaining container image overlays + let _ = Command::new("podman") + .args([ + "machine", "ssh", &machine, "--", "podman", "image", "umount", "--all", + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + } Ok(()) } @@ -170,7 +197,8 @@ fn cmd_ssh(name: &str, args: &[String]) -> Result<()> { } // Try to set up SSH port forwarding via VM-specific gvproxy socket - let svc_sock = format!("/private/tmp/bcvk/{}-gvproxy-svc.sock", name); + let base = run_ephemeral_macos::ephemeral_base_dir(); + let svc_sock = format!("{}/{}-gvproxy-svc.sock", base.display(), name); if std::path::Path::new(&svc_sock).exists() { if let Err(e) = run_ephemeral_macos::expose_ssh_port(&svc_sock, "192.168.127.2", vm.ssh_port) diff --git a/crates/kit/src/lib.rs b/crates/kit/src/lib.rs index a3aa51578..d7257cb8e 100644 --- a/crates/kit/src/lib.rs +++ b/crates/kit/src/lib.rs @@ -13,6 +13,8 @@ pub mod kernel; // macOS-only modules (vfkit backend) #[cfg(target_os = "macos")] +pub mod nbdkit_macos; +#[cfg(target_os = "macos")] pub mod run_ephemeral_macos; #[cfg(target_os = "macos")] diff --git a/crates/kit/src/main.rs b/crates/kit/src/main.rs index cc4969312..b92d35783 100644 --- a/crates/kit/src/main.rs +++ b/crates/kit/src/main.rs @@ -65,6 +65,8 @@ mod varlink_ipc; #[cfg(target_os = "macos")] mod ephemeral_macos; #[cfg(target_os = "macos")] +mod nbdkit_macos; +#[cfg(target_os = "macos")] mod run_ephemeral_macos; #[cfg(target_os = "macos")] mod vfkit; diff --git a/crates/kit/src/nbdkit_macos.rs b/crates/kit/src/nbdkit_macos.rs new file mode 100644 index 000000000..40c2cc20e --- /dev/null +++ b/crates/kit/src/nbdkit_macos.rs @@ -0,0 +1,186 @@ +//! nbdkit EROFS plugin management for macOS ephemeral VMs. + +use color_eyre::{ + eyre::{bail, Context}, + Result, +}; +use std::process::{Command, Stdio}; +use std::time::Duration; +use tracing::info; + +use crate::run_ephemeral_macos::detect_machine_name; + +/// Path to the nbdkit EROFS plugin shared library inside podman machine. +const NBDKIT_EROFS_PLUGIN_PATH: &str = "/var/tmp/bcvk/libnbdkit_erofs_plugin.so"; + +/// Get the merged overlay path from podman image mount. +pub(crate) fn get_merged_path(machine: &str, rootful: bool, image: &str) -> Result { + let output = if rootful { + Command::new("podman") + .args([ + "machine", "ssh", machine, "--", "podman", "image", "mount", image, + ]) + .output() + .context("podman image mount")? + } else { + Command::new("podman") + .args([ + "machine", "ssh", machine, "--", "podman", "unshare", "podman", "image", "mount", + image, + ]) + .output() + .context("podman image mount")? + }; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("podman image mount failed: {}", stderr.trim()); + } + Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) +} + +/// Start nbdkit with the erofs plugin for dynamic EROFS + ESP + GPT generation. +pub(crate) fn start_nbdkit_erofs_plugin( + machine: &str, + merged_path: &str, + cmdline: &str, + ssh_pubkey: &str, + nbd_port: u16, + vm_name: &str, +) -> Result { + let container_name = format!("bcvk-nbd-{}", vm_name); + + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + machine, + "--", + "podman", + "rm", + "-f", + &container_name, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + + fn shell_escape(s: &str) -> String { + format!("'{}'", s.replace('\'', "'\\''")) + } + + let cmdline_esc = shell_escape(&format!("cmdline={}", cmdline)); + let dir_esc = shell_escape(&format!("dir={}", merged_path)); + + let mut ssh_param = String::new(); + if !ssh_pubkey.is_empty() { + ssh_param = format!(" {}", shell_escape(&format!("ssh_pubkey={}", ssh_pubkey))); + } + + let podman_cmd = format!( + "podman run -d --name {name} --security-opt label=disable \ + -p {port}:10809 \ + -v {merged}:{merged}:ro \ + -v {plugin}:/plugin.so:ro \ + -v /usr/bin/nbdkit:/usr/bin/nbdkit:ro \ + -v /usr/lib64/nbdkit:/usr/lib64/nbdkit:ro \ + quay.io/fedora/fedora:latest \ + nbdkit -f -p 10809 -r /plugin.so \ + {dir} {cmdline}{ssh}", + name = container_name, + port = nbd_port, + merged = merged_path, + plugin = NBDKIT_EROFS_PLUGIN_PATH, + dir = dir_esc, + cmdline = cmdline_esc, + ssh = ssh_param, + ); + + let output = Command::new("podman") + .args(["machine", "ssh", machine, "--", &podman_cmd]) + .output() + .context("failed to start nbdkit erofs plugin")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("failed to start nbdkit erofs plugin: {}", stderr.trim()); + } + + info!("waiting for nbdkit on port {}...", nbd_port); + let deadline = std::time::Instant::now() + Duration::from_secs(30); + loop { + if let Ok(mut stream) = std::net::TcpStream::connect_timeout( + &std::net::SocketAddr::from(([127, 0, 0, 1], nbd_port)), + Duration::from_millis(500), + ) { + use std::io::Read; + stream.set_read_timeout(Some(Duration::from_secs(2))).ok(); + let mut buf = [0u8; 8]; + if stream.read_exact(&mut buf).is_ok() && &buf == b"NBDMAGIC" { + break; + } + } + if std::time::Instant::now() > deadline { + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + machine, + "--", + "podman", + "rm", + "-f", + &container_name, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + bail!( + "nbdkit erofs plugin did not become ready on port {}", + nbd_port + ); + } + std::thread::sleep(Duration::from_millis(500)); + } + + Ok(container_name) +} + +/// Find an available TCP port for NBD in range 10800-10900. +pub fn find_available_nbd_port() -> u16 { + use rand::Rng; + let mut rng = rand::rng(); + const PORT_RANGE_START: u16 = 10800; + const PORT_RANGE_END: u16 = 10900; + for _ in 0..100 { + let port = rng.random_range(PORT_RANGE_START..PORT_RANGE_END); + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + for port in PORT_RANGE_START..PORT_RANGE_END { + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + PORT_RANGE_START +} + +/// Stop and remove an nbdkit container (best-effort). +pub fn stop_nbdkit_container(container_name: &str) { + if let Ok(machine) = detect_machine_name() { + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "rm", + "-f", + container_name, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + } +} diff --git a/crates/kit/src/run_ephemeral_macos.rs b/crates/kit/src/run_ephemeral_macos.rs index d7fe9257f..2265aacb7 100644 --- a/crates/kit/src/run_ephemeral_macos.rs +++ b/crates/kit/src/run_ephemeral_macos.rs @@ -1,16 +1,14 @@ -//! Ephemeral VM launch flow for macOS using vfkit + SquashFS. +//! Ephemeral VM launch flow for macOS using vfkit + NBD EROFS plugin. //! -//! Boot flow: -//! 1. Extract kernel + initramfs from container image -//! 2. Create SquashFS rootfs (lz4, cached by digest) -//! 3. Decompress vmlinuz PE+zstd → uncompressed ARM64 Image -//! 4. Append bcvk units CPIO to initramfs (/etc overlay + /var tmpfs + SSH) -//! 5. Launch vfkit with virtio-blk (SquashFS) + virtio-net (gvproxy) +//! Boot flow (fully diskless): +//! 1. Mount container image overlay (`podman image mount`) +//! 2. Start nbdkit with erofs plugin (dynamically generates GPT + ESP + EROFS) +//! 3. Launch vfkit with EFI boot via NBD + virtio-net (gvproxy) +//! 4. Wait for SSH and execute commands //! //! Common helpers (gvproxy, SSH, vfkit detection) are pub for reuse by vfkit/ module. -use std::fs::{self, OpenOptions}; -use std::io::{Seek, SeekFrom, Write}; +use std::fs; use std::os::unix::net::UnixStream; use std::path::Path; use std::process::{Command, Stdio}; @@ -22,6 +20,13 @@ use color_eyre::{ }; use tracing::{debug, info}; +/// Base directory for ephemeral VM state on macOS host. +pub fn ephemeral_base_dir() -> std::path::PathBuf { + dirs::home_dir() + .unwrap_or_else(|| std::path::PathBuf::from("/tmp")) + .join(".local/share/bcvk/ephemeral") +} + // --- Data structures --- /// Metadata for a running ephemeral VM, persisted as JSON for `ps` and `ssh`. @@ -46,13 +51,19 @@ pub struct EphemeralVmMetadata { pub log_path: Option, /// ISO 8601 timestamp when the VM was created. pub created: String, + /// Name of the nbdkit podman container serving the rootfs. + #[serde(default)] + pub nbd_container: Option, + /// NBD port allocated for this VM's rootfs. + #[serde(default)] + pub nbd_port: Option, } #[allow(dead_code)] impl EphemeralVmMetadata { /// Return the directory path for ephemeral VM metadata files. pub fn vms_dir() -> std::path::PathBuf { - std::path::PathBuf::from("/private/tmp/bcvk/vms") + ephemeral_base_dir().join("vms") } /// Save metadata to a JSON file in the VMs directory. @@ -98,15 +109,10 @@ impl EphemeralVmMetadata { Ok(vms) } - /// Check if the VM process is still alive via kill -0. + /// Check if the VM process is still alive via kill(pid, 0). pub fn is_alive(&self) -> bool { - Command::new("kill") - .args(["-0", &self.pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .map(|s| s.success()) - .unwrap_or(false) + rustix::process::test_kill_process(rustix::process::Pid::from_raw(self.pid as i32).unwrap()) + .is_ok() } } @@ -167,35 +173,53 @@ pub fn parse_memory_to_mb(s: &str) -> Result { struct VmCleanup { vfkit_pid: u32, gvproxy_pid: u32, + nbd_container: Option, + image: String, vm_name: String, } impl Drop for VmCleanup { fn drop(&mut self) { tracing::debug!("cleaning up VM processes..."); - if let Err(e) = Command::new("kill") - .arg(self.vfkit_pid.to_string()) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Some(ref name) = self.nbd_container { + crate::nbdkit_macos::stop_nbdkit_container(name); + } + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(self.vfkit_pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill vfkit (PID {}): {}", self.vfkit_pid, e); } - if let Err(e) = Command::new("kill") - .arg(self.gvproxy_pid.to_string()) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(self.gvproxy_pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill gvproxy (PID {}): {}", self.gvproxy_pid, e); } + // Release container image overlay mount + if let Ok(machine) = detect_machine_name() { + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "image", + "umount", + &self.image, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + } EphemeralVmMetadata::remove(&self.vm_name); } } // --- Main entry point --- -/// Run an ephemeral VM from a container image using vfkit + SquashFS. +/// Run an ephemeral VM from a container image using vfkit + EROFS over NBD. pub fn run(opts: RunEphemeralOpts) -> Result<()> { if opts.gui && opts.detach { bail!("--gui and --detach cannot be used together (GUI requires foreground process)"); @@ -206,9 +230,9 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { } let vfkit_bin = find_vfkit()?; - info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + SquashFS)"); + info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + EROFS)"); - let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + let cache_base = ephemeral_base_dir(); fs::create_dir_all(&cache_base)?; let machine = detect_machine_name()?; @@ -228,121 +252,37 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { .unwrap_or_else(|| format!("ephemeral-{}", &digest_short[..8])); let ssh_key_path = cache_base.join(format!("{}-key", vm_name)); - let boot_dir = cache_base.join(format!("boot-{}", digest_short)); - fs::create_dir_all(&boot_dir)?; - let squashfs_cache = format!("/private/tmp/bcvk/rootfs-{}.squashfs", digest_short); - let squashfs_path = format!("/private/tmp/bcvk/{}-rootfs.squashfs", vm_name); - let vmlinuz_path = boot_dir.join("vmlinuz"); - let image_path = boot_dir.join("Image"); - let initramfs_orig = boot_dir.join("initramfs-orig.img"); - let initramfs_path = cache_base.join(format!("{}-initramfs.img", vm_name)); - - // Step 1+2: kernel extract + SquashFS creation (parallel) - let step2_handle = if !Path::new(&squashfs_cache).exists() { - let mc = machine.clone(); - let rf = rootful; - let img = opts.image.clone(); - let sc = squashfs_cache.clone(); - Some(std::thread::spawn(move || -> Result<()> { - info!("creating SquashFS image (lz4)..."); - create_squashfs_image(&mc, rf, &img, &sc) - })) - } else { - info!("using cached SquashFS: {}", squashfs_cache); - None - }; - - if !vmlinuz_path.exists() || !initramfs_orig.exists() { - info!("extracting kernel and initramfs..."); - extract_kernel(&machine, &opts.image, &boot_dir)?; - fs::rename(boot_dir.join("initramfs.img"), &initramfs_orig)?; - } - - // Step 3+4: kernel decompress + CPIO append (parallel after Step 1) - let step3_handle = if !image_path.exists() { - let vp = vmlinuz_path.clone(); - let ip = image_path.clone(); - Some(std::thread::spawn(move || -> Result<()> { - info!("decompressing kernel (vmlinuz → Image)..."); - extract_uncompressed_kernel(&vp, &ip) - })) - } else { - None - }; - - fs::copy(&initramfs_orig, &initramfs_path)?; - { - let cpio_data = crate::cpio::create_initramfs_units_cpio() - .map_err(|e| eyre!("failed to create CPIO: {e}"))?; - let mut f = OpenOptions::new().append(true).open(&initramfs_path)?; - let sz = f.seek(SeekFrom::End(0))?; - let pad = sz.next_multiple_of(4) - sz; - if pad > 0 { - f.write_all(&vec![0u8; pad as usize])?; - } - f.write_all(&cpio_data)?; + fs::create_dir_all(&cache_base)?; - if opts.ssh_keygen || !opts.execute.is_empty() { - info!("generating SSH keypair..."); - let _ = fs::remove_file(&ssh_key_path); - let _ = fs::remove_file(ssh_key_path.with_extension("pub")); - let status = Command::new("ssh-keygen") - .args([ - "-t", - "ed25519", - "-f", - &ssh_key_path.to_string_lossy(), - "-N", - "", - "-q", - ]) - .status()?; - if !status.success() { - bail!("ssh-keygen failed (exit code: {:?})", status.code()); - } - let pubkey = fs::read_to_string(ssh_key_path.with_extension("pub"))?; - let ssh_cpio = create_ssh_setup_cpio(pubkey.trim())?; - let pos = f.seek(SeekFrom::End(0))?; - let pad = pos.next_multiple_of(4) - pos; - if pad > 0 { - f.write_all(&vec![0u8; pad as usize])?; - } - f.write_all(&ssh_cpio)?; + // Generate SSH keypair on macOS host + let mut ssh_pubkey = String::new(); + if opts.ssh_keygen || !opts.execute.is_empty() { + info!("generating SSH keypair..."); + let _ = fs::remove_file(&ssh_key_path); + let _ = fs::remove_file(ssh_key_path.with_extension("pub")); + let status = Command::new("ssh-keygen") + .args([ + "-t", + "ed25519", + "-f", + &ssh_key_path.to_string_lossy(), + "-N", + "", + "-q", + ]) + .status()?; + if !status.success() { + bail!("ssh-keygen failed"); } - info!("initramfs prepared"); - } - - if let Some(h) = step3_handle { - h.join() - .map_err(|_| eyre!("kernel decompression thread panicked"))??; - } - if let Some(h) = step2_handle { - h.join() - .map_err(|_| eyre!("squashfs creation thread panicked"))??; + ssh_pubkey = fs::read_to_string(ssh_key_path.with_extension("pub"))? + .trim() + .to_string(); } - // CoW clone SquashFS for this VM (allows concurrent use of same image) - let _ = fs::remove_file(&squashfs_path); - let clone_status = Command::new("cp") - .args(["-c", &squashfs_cache, &squashfs_path]) - .status() - .context("cloning SquashFS")?; - if !clone_status.success() { - fs::copy(&squashfs_cache, &squashfs_path).context("copying SquashFS")?; - } - - // 5. gvproxy + vfkit - let gvproxy_sock = cache_base.join(format!("{}-gvproxy.sock", vm_name)); - let services_sock = cache_base.join(format!("{}-gvproxy-svc.sock", vm_name)); - let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); - let services_sock_str = services_sock.to_string_lossy().to_string(); - info!("starting gvproxy..."); - let mut gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; - let mut cmdline_parts: Vec<&str> = vec![ - "root=/dev/vda", + "root=/dev/vda2", "ro", - "rootfstype=squashfs", + "rootfstype=erofs", "console=tty0", "console=hvc0", "loglevel=4", @@ -354,18 +294,39 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { cmdline_parts.extend(&user_args); let cmdline = cmdline_parts.join(" "); + // Get container image merged overlay path + let merged_path = crate::nbdkit_macos::get_merged_path(&machine, rootful, &opts.image)?; + info!("overlay merged: {}", merged_path); + + // Start nbdkit with erofs plugin (dynamic EROFS + ESP + GPT from overlay dir) + let nbd_port = crate::nbdkit_macos::find_available_nbd_port(); + let nbd_container_name = crate::nbdkit_macos::start_nbdkit_erofs_plugin( + &machine, + &merged_path, + &cmdline, + &ssh_pubkey, + nbd_port, + &vm_name, + )?; + std::thread::sleep(Duration::from_millis(500)); + info!("nbdkit ready on port {}", nbd_port); + + // gvproxy + vfkit (EFI boot) + let gvproxy_sock = cache_base.join(format!("{}-gvproxy.sock", vm_name)); + let services_sock = cache_base.join(format!("{}-gvproxy-svc.sock", vm_name)); + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + info!("starting gvproxy..."); + let mut gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + let mac = generate_mac(); let mac_str = format!( "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] ); - let bootloader_arg = format!( - "linux,kernel={},initrd={},cmdline=\"{}\"", - image_path.display(), - initramfs_path.display(), - cmdline - ); + let efi_var_store = cache_base.join(format!("{}-efi-vars", vm_name)); + let bootloader_arg = format!("efi,variable-store={},create", efi_var_store.display()); let vcpus = opts.vcpus.unwrap_or_else(default_vcpus); let memory_mb = parse_memory_to_mb(&opts.memory)?; @@ -378,7 +339,10 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { "--bootloader".to_string(), bootloader_arg, "--device".to_string(), - format!("virtio-blk,path={}", squashfs_path), + format!( + "nbd,uri=nbd://127.0.0.1:{}/,readonly,timeout=5000,deviceId=rootfs", + nbd_port + ), "--device".to_string(), format!( "virtio-net,unixSocketPath={},mac={}", @@ -387,6 +351,13 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { "--device".to_string(), "virtio-rng".to_string(), ]; + + let serial_log = cache_base.join(format!("{}-serial.log", vm_name)); + vfkit_args.extend([ + "--device".to_string(), + format!("virtio-serial,logFilePath={}", serial_log.display()), + ]); + if opts.gui { vfkit_args.push("--gui".to_string()); } @@ -411,15 +382,19 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { gvproxy_pid: gvproxy_child.id(), ssh_port, ssh_key: ssh_key_path.to_string_lossy().to_string(), - serial_log: String::new(), + serial_log: serial_log.to_string_lossy().to_string(), log_path: None, created: chrono::Utc::now().to_rfc3339(), + nbd_container: Some(nbd_container_name.clone()), + nbd_port: Some(nbd_port), }; metadata.save()?; let _cleanup = VmCleanup { vfkit_pid: vfkit_child.id(), gvproxy_pid: gvproxy_child.id(), + nbd_container: Some(nbd_container_name.clone()), + image: opts.image.clone(), vm_name: vm_name.clone(), }; @@ -472,15 +447,31 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { std::mem::forget(_cleanup); let status = vfkit_child.wait()?; info!("vfkit exited: {}", status); + crate::nbdkit_macos::stop_nbdkit_container(&nbd_container_name); if let Err(e) = gvproxy_child.kill() { tracing::debug!("failed to kill gvproxy: {}", e); } + // Release container image overlay mount + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "image", + "umount", + &opts.image, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); EphemeralVmMetadata::remove(&vm_name); Ok(()) } fn run_detached(opts: &RunEphemeralOpts) -> Result<()> { - let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + let cache_base = ephemeral_base_dir(); fs::create_dir_all(&cache_base)?; let digest = ensure_image_and_get_digest(&opts.image)?; let digest_short = &digest[..16.min(digest.len())]; @@ -524,128 +515,18 @@ fn run_detached(opts: &RunEphemeralOpts) -> Result<()> { serial_log: String::new(), log_path: Some(log_path.to_string_lossy().to_string()), created: chrono::Utc::now().to_rfc3339(), + nbd_container: None, + nbd_port: None, }; metadata.save()?; println!("{}", vm_name); Ok(()) } -// --- SSH setup CPIO --- - -fn create_ssh_setup_cpio(pubkey: &str) -> Result> { - use cpio::newc::Builder as NewcBuilder; - let mut buf = Vec::new(); - - let script = format!( - "#!/bin/bash\n\ - mkdir -p /sysroot/var/roothome/.ssh\n\ - chmod 700 /sysroot/var/roothome/.ssh\n\ - echo '{}' > /sysroot/var/roothome/.ssh/authorized_keys\n\ - chmod 600 /sysroot/var/roothome/.ssh/authorized_keys\n\ - chown -R 0:0 /sysroot/var/roothome/.ssh\n", - pubkey - ); - - let service = "[Unit]\n\ - Description=Setup SSH authorized_keys for root\n\ - DefaultDependencies=no\n\ - ConditionPathExists=/etc/initrd-release\n\ - Before=initrd-fs.target\n\ - After=bcvk-var-ephemeral.service\n\ - Requires=bcvk-var-ephemeral.service\n\ - \n\ - [Service]\n\ - Type=oneshot\n\ - RemainAfterExit=yes\n\ - ExecStart=/usr/bin/bash /usr/lib/bcvk/setup-ssh.sh\n"; - - let dropin = "[Unit]\nWants=bcvk-ssh-setup.service\n"; - - let write_entry = - |buf: &mut Vec, path: &str, data: &[u8], executable: bool| -> std::io::Result<()> { - let mode = if executable { 0o100755 } else { 0o100644 }; - let builder = NewcBuilder::new(path).mode(mode).uid(0).gid(0); - let mut writer = builder.write(buf, data.len() as u32); - writer.write_all(data)?; - writer.finish()?; - Ok(()) - }; - - let write_dir = |buf: &mut Vec, path: &str| -> std::io::Result<()> { - NewcBuilder::new(path) - .mode(0o040755) - .uid(0) - .gid(0) - .write(buf, 0) - .finish()?; - Ok(()) - }; - - write_dir(&mut buf, "usr/lib/bcvk")?; - write_entry( - &mut buf, - "usr/lib/bcvk/setup-ssh.sh", - script.as_bytes(), - true, - )?; - write_entry( - &mut buf, - "usr/lib/systemd/system/bcvk-ssh-setup.service", - service.as_bytes(), - false, - )?; - write_entry( - &mut buf, - "usr/lib/systemd/system/initrd-fs.target.d/bcvk-ssh-setup.conf", - dropin.as_bytes(), - false, - )?; - cpio::newc::trailer(&mut buf).map_err(|e| eyre!("cpio trailer: {e}"))?; - Ok(buf) -} - -// --- vfkit kernel decompression --- - -fn extract_uncompressed_kernel(vmlinuz_path: &Path, output_path: &Path) -> Result<()> { - let data = fs::read(vmlinuz_path)?; - - // Parse zboot header: offset 0x08 = payload_offset (le32), 0x0c = payload_size (le32) - let (pos, payload_end) = if data.len() >= 16 && &data[4..8] == b"zimg" { - let payload_offset = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize; - let payload_size = u32::from_le_bytes(data[12..16].try_into().unwrap()) as usize; - if payload_offset + payload_size > data.len() { - bail!("zboot payload extends beyond file"); - } - info!( - "zboot header: payload at 0x{:x}, size 0x{:x}", - payload_offset, payload_size - ); - (payload_offset, payload_offset + payload_size) - } else { - let magic = [0x28u8, 0xb5, 0x2f, 0xfd]; - let p = data - .windows(4) - .position(|w| w == magic) - .ok_or_else(|| eyre!("zstd magic not found in vmlinuz"))?; - info!("zstd payload at offset 0x{:x} (no zboot header)", p); - (p, data.len()) - }; - - let mut kernel = Vec::new(); - zstd::stream::copy_decode(&data[pos..payload_end], &mut kernel) - .context("decompressing zstd payload from vmlinuz")?; - - if kernel.len() < 0x3c || &kernel[0x38..0x3c] != b"ARMd" { - bail!("decompressed kernel is not a valid ARM64 Image"); - } - fs::write(output_path, &kernel)?; - info!("decompressed kernel: {} bytes (ARM64 Image)", kernel.len()); - Ok(()) -} - // --- Shared helpers (pub for vfkit/ module) --- -fn detect_machine_name() -> Result { +/// Detect the name of the running podman machine. +pub fn detect_machine_name() -> Result { let output = Command::new("podman") .args(["machine", "info", "--format", "{{.Host.CurrentMachine}}"]) .output()?; @@ -679,34 +560,6 @@ fn ensure_image_and_get_digest(image: &str) -> Result { Ok(digest.trim_start_matches("sha256:").to_string()) } -fn extract_kernel(machine: &str, image: &str, boot_dir: &Path) -> Result<()> { - let boot_dir_str = boot_dir.to_string_lossy(); - let script = format!( - "KVER=$(podman run --rm {image} ls /usr/lib/modules/ | head -1) && \ - [ -n \"$KVER\" ] && \ - podman run --rm {image} cat /usr/lib/modules/$KVER/vmlinuz > {boot}/vmlinuz && \ - podman run --rm {image} cat /usr/lib/modules/$KVER/initramfs.img > {boot}/initramfs.img", - image = image, - boot = boot_dir_str - ); - let output = Command::new("podman") - .args(["machine", "ssh", machine, &script]) - .output() - .context("extracting kernel from container image")?; - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - bail!( - "No kernel found in image '{}'.\n\ - Checked: /usr/lib/modules//vmlinuz + initramfs.img\n\ - This image may not be a bootable container (bootc) image.\n\ - {}", - image, - stderr.trim() - ); - } - Ok(()) -} - fn is_machine_rootful(machine: &str) -> bool { Command::new("podman") .args(["machine", "ssh", machine, "id", "-u"]) @@ -715,38 +568,6 @@ fn is_machine_rootful(machine: &str) -> bool { .unwrap_or(false) } -fn create_squashfs_image( - machine: &str, - rootful: bool, - image: &str, - output_path: &str, -) -> Result<()> { - let script = if rootful { - format!( - "MERGED=$(podman image mount {}) && \ - mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet", - image, output_path - ) - } else { - info!("rootless mode: using podman unshare for SquashFS creation"); - format!( - "podman unshare sh -c 'MERGED=$(podman image mount {}) && \ - mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet'", - image, output_path - ) - }; - - let output = Command::new("podman") - .args(["machine", "ssh", machine, &script]) - .output() - .context("running mksquashfs")?; - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - bail!("mksquashfs failed: {}", stderr.trim()); - } - Ok(()) -} - /// Clear extended attributes from a file. /// /// Apple Virtualization.framework rejects disk images with xattrs like @@ -1018,12 +839,15 @@ mod tests { serial_log: "/tmp/test-serial.log".to_string(), log_path: Some("/tmp/test-vfkit.log".to_string()), created: "2026-01-01T00:00:00Z".to_string(), + nbd_container: Some("bcvk-nbd-test-vm".to_string()), + nbd_port: Some(10841), }; let json = serde_json::to_string_pretty(&meta).unwrap(); let loaded: EphemeralVmMetadata = serde_json::from_str(&json).unwrap(); assert_eq!(loaded.name, "test-vm"); assert_eq!(loaded.image, "quay.io/fedora/fedora-bootc:42"); assert_eq!(loaded.pid, 12345); + assert_eq!(loaded.nbd_container.as_deref(), Some("bcvk-nbd-test-vm")); assert_eq!(loaded.ssh_port, 2222); assert_eq!(loaded.log_path.as_deref(), Some("/tmp/test-vfkit.log")); } @@ -1042,6 +866,8 @@ mod tests { serial_log: "/tmp/serial.log".to_string(), log_path: None, created: "2026-05-04T00:00:00Z".to_string(), + nbd_container: None, + nbd_port: None, }; fs::write(&json_path, serde_json::to_string_pretty(&meta).unwrap()).unwrap(); let data = fs::read_to_string(&json_path).unwrap(); @@ -1067,6 +893,8 @@ mod tests { serial_log: "/tmp/serial.log".to_string(), log_path: None, created: "2026-01-01T00:00:00Z".to_string(), + nbd_container: Some(format!("bcvk-nbd-vm-{i}")), + nbd_port: Some(10800 + i as u16), }; let path = dir.path().join(format!("vm-{i}.json")); fs::write(&path, serde_json::to_string(&meta).unwrap()).unwrap(); diff --git a/crates/kit/src/vfkit/mod.rs b/crates/kit/src/vfkit/mod.rs index 62939254a..2062851d5 100644 --- a/crates/kit/src/vfkit/mod.rs +++ b/crates/kit/src/vfkit/mod.rs @@ -5,7 +5,6 @@ use std::fs; use std::path::PathBuf; -use std::process::{Command, Stdio}; use clap::Subcommand; use color_eyre::Result; @@ -169,18 +168,15 @@ impl VmMetadata { Ok(vms) } - /// Check if the VM process is still alive via kill -0. + /// Check if the VM process is still alive via kill(pid, 0). pub fn is_alive(&self) -> bool { if self.vfkit_pid == 0 { return false; } - Command::new("kill") - .args(["-0", &self.vfkit_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .map(|s| s.success()) - .unwrap_or(false) + rustix::process::test_kill_process( + rustix::process::Pid::from_raw(self.vfkit_pid as i32).unwrap(), + ) + .is_ok() } } diff --git a/crates/kit/src/vfkit/stop.rs b/crates/kit/src/vfkit/stop.rs index 24ea6ceba..52c69fb51 100644 --- a/crates/kit/src/vfkit/stop.rs +++ b/crates/kit/src/vfkit/stop.rs @@ -1,6 +1,5 @@ //! vm stop — Stop a running persistent VM. -use std::process::{Command, Stdio}; use std::time::Duration; use super::VmMetadata; @@ -17,34 +16,23 @@ pub fn run(name: &str) -> Result<()> { info!("stopping VM '{}'...", name); if meta.vfkit_pid > 0 { - if let Err(e) = Command::new("kill") - .args(["-TERM", &meta.vfkit_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + let pid = rustix::process::Pid::from_raw(meta.vfkit_pid as i32).unwrap(); + if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::TERM) { tracing::debug!("failed to SIGTERM vfkit (PID {}): {}", meta.vfkit_pid, e); } std::thread::sleep(Duration::from_secs(3)); if meta.is_alive() { - if let Err(e) = Command::new("kill") - .args(["-KILL", &meta.vfkit_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::KILL) { tracing::debug!("failed to SIGKILL vfkit (PID {}): {}", meta.vfkit_pid, e); } } } if meta.gvproxy_pid > 0 { - if let Err(e) = Command::new("kill") - .args(["-KILL", &meta.gvproxy_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(meta.gvproxy_pid as i32).unwrap(), + rustix::process::Signal::KILL, + ) { tracing::debug!( "failed to SIGKILL gvproxy (PID {}): {}", meta.gvproxy_pid, diff --git a/crates/nbdkit-erofs-plugin/Cargo.lock b/crates/nbdkit-erofs-plugin/Cargo.lock new file mode 100644 index 000000000..b5064fd23 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/Cargo.lock @@ -0,0 +1,39 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpio" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938e716cb1ade5d6c8f959c13a7248b889c07491fc7e41167c3afe20f8f0de1e" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "nbdkit-erofs-plugin" +version = "0.1.0" +dependencies = [ + "cpio", + "crc32fast", + "libc", +] diff --git a/crates/nbdkit-erofs-plugin/Cargo.toml b/crates/nbdkit-erofs-plugin/Cargo.toml new file mode 100644 index 000000000..0f645c08c --- /dev/null +++ b/crates/nbdkit-erofs-plugin/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "nbdkit-erofs-plugin" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +libc = "0.2" +cpio = "0.4" +crc32fast = "1.4" diff --git a/crates/nbdkit-erofs-plugin/src/dir_walk.rs b/crates/nbdkit-erofs-plugin/src/dir_walk.rs new file mode 100644 index 000000000..674556d4b --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/dir_walk.rs @@ -0,0 +1,138 @@ +use std::ffi::OsString; +use std::fs; +use std::os::unix::fs::MetadataExt; +use std::path::{Path, PathBuf}; + +#[derive(Debug)] +pub struct FileEntry { + pub host_path: PathBuf, + pub size: u64, + pub mode: u32, + pub uid: u32, + pub gid: u32, + pub mtime: u64, + pub nlink: u32, + pub inode_id: u64, +} + +#[derive(Debug)] +pub struct DirInfo { + pub name: OsString, + pub mode: u32, + pub uid: u32, + pub gid: u32, + pub mtime: u64, + pub inode_id: u64, + pub parent_inode_id: u64, + pub children: Vec, +} + +#[derive(Debug)] +pub struct SymlinkEntry { + pub name: Vec, + pub target: Vec, + pub mode: u32, + pub uid: u32, + pub gid: u32, + pub mtime: u64, + pub inode_id: u64, +} + +/// Child entry in a directory: either a file index, dir index, or symlink index +#[derive(Debug, Clone, Copy)] +pub enum ChildRef { + File(usize), + Dir(usize), + Symlink(usize), +} + +#[derive(Debug)] +pub struct WalkResult { + pub dirs: Vec, + pub files: Vec, + pub symlinks: Vec, +} + +pub fn walk_directory(root: &Path) -> std::io::Result { + let mut result = WalkResult { + dirs: Vec::new(), + files: Vec::new(), + symlinks: Vec::new(), + }; + let mut next_inode: u64 = 0; + + walk_recursive(root, root, &mut result, &mut next_inode, 0)?; + Ok(result) +} + +fn walk_recursive( + root: &Path, + dir: &Path, + result: &mut WalkResult, + next_inode: &mut u64, + parent_inode_id: u64, +) -> std::io::Result { + let meta = fs::symlink_metadata(dir)?; + let dir_inode = *next_inode; + *next_inode += 1; + + let di = result.dirs.len(); + result.dirs.push(DirInfo { + name: dir.file_name().unwrap_or_default().to_os_string(), + mode: meta.mode(), + uid: meta.uid(), + gid: meta.gid(), + mtime: meta.mtime() as u64, + inode_id: dir_inode, + parent_inode_id, + children: Vec::new(), + }); + + let mut entries: Vec<_> = fs::read_dir(dir)?.filter_map(|e| e.ok()).collect(); + entries.sort_by_key(|e| e.file_name()); + + for entry in entries { + let path = entry.path(); + let meta = fs::symlink_metadata(&path)?; + let ft = meta.file_type(); + + if ft.is_dir() { + let child_di = walk_recursive(root, &path, result, next_inode, dir_inode)?; + result.dirs[di].children.push(ChildRef::Dir(child_di)); + } else if ft.is_symlink() { + let target = fs::read_link(&path)?; + let target_bytes = target.as_os_str().as_encoded_bytes().to_vec(); + let name = entry.file_name().as_encoded_bytes().to_vec(); + let si = result.symlinks.len(); + let inode = *next_inode; + *next_inode += 1; + result.symlinks.push(SymlinkEntry { + name, + target: target_bytes, + mode: meta.mode(), + uid: meta.uid(), + gid: meta.gid(), + mtime: meta.mtime() as u64, + inode_id: inode, + }); + result.dirs[di].children.push(ChildRef::Symlink(si)); + } else if ft.is_file() { + let fi = result.files.len(); + let inode = *next_inode; + *next_inode += 1; + result.files.push(FileEntry { + host_path: path, + size: meta.len(), + mode: meta.mode(), + uid: meta.uid(), + gid: meta.gid(), + mtime: meta.mtime() as u64, + nlink: meta.nlink() as u32, + inode_id: inode, + }); + result.dirs[di].children.push(ChildRef::File(fi)); + } + } + + Ok(di) +} diff --git a/crates/nbdkit-erofs-plugin/src/erofs.rs b/crates/nbdkit-erofs-plugin/src/erofs.rs new file mode 100644 index 000000000..a795b076a --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/erofs.rs @@ -0,0 +1,502 @@ +use crate::dir_walk::{ChildRef, DirInfo, WalkResult}; +use crate::regions::{Region, RegionType}; +use std::sync::Arc; + +const EROFS_MAGIC: u32 = 0xE0F5E1E2; +const BLOCK_SIZE: u64 = 4096; +const BLOCK_BITS: u8 = 12; +const SUPERBLOCK_OFFSET: u64 = 1024; + +// EROFS inode formats +const EROFS_INODE_LAYOUT_COMPACT: u16 = 0; + +// EROFS data layouts +const EROFS_INODE_FLAT_PLAIN: u16 = 0; + +// EROFS file types (matching Linux DT_* values) +const EROFS_FT_REG_FILE: u8 = 1; +const EROFS_FT_DIR: u8 = 2; +const EROFS_FT_SYMLINK: u8 = 7; + +#[derive(Debug)] +pub struct FileRegion { + pub file_index: usize, + pub offset_in_erofs: u64, + pub size: u64, +} + +#[derive(Debug)] +pub struct ErofsLayout { + pub metadata: Vec, + pub file_regions: Vec, + pub total_size: u64, +} + +struct DirEntryOnDisk { + nid: u64, + file_type: u8, + name: Vec, +} + +pub fn build_erofs(walk: &WalkResult) -> std::io::Result { + let total_inodes = walk.dirs.len() + walk.files.len() + walk.symlinks.len(); + + // Phase 1: Assign inode positions + // Inodes start at block 1 (block 0 has superblock) + let inode_table_offset = BLOCK_SIZE; // block 1 + let inode_size: u64 = 32; // compact inode + let inode_table_size = align_up(total_inodes as u64 * inode_size, BLOCK_SIZE); + + // Phase 2: Build directory entry blocks + let dir_blocks_offset = inode_table_offset + inode_table_size; + let mut dir_data: Vec = Vec::new(); + let mut dir_block_offsets: Vec = Vec::new(); // per-directory offset in dir_data + + for dir in &walk.dirs { + let offset = align_up(dir_data.len() as u64, BLOCK_SIZE); + dir_data.resize(offset as usize, 0); + dir_block_offsets.push(dir_blocks_offset + offset); + + let mut entries = Vec::new(); + + // "." entry + entries.push(DirEntryOnDisk { + nid: dir.inode_id, + file_type: EROFS_FT_DIR, + name: b".".to_vec(), + }); + + // ".." entry (root points to self) + let parent_nid = dir.parent_inode_id; + entries.push(DirEntryOnDisk { + nid: parent_nid, + file_type: EROFS_FT_DIR, + name: b"..".to_vec(), + }); + + // children (sorted by name in walk) + for child in &dir.children { + match child { + ChildRef::Dir(di) => { + let child_dir = &walk.dirs[*di]; + entries.push(DirEntryOnDisk { + nid: child_dir.inode_id, + file_type: EROFS_FT_DIR, + name: child_dir.name.as_encoded_bytes().to_vec(), + }); + } + ChildRef::File(fi) => { + let file = &walk.files[*fi]; + entries.push(DirEntryOnDisk { + nid: file.inode_id, + file_type: EROFS_FT_REG_FILE, + name: file + .host_path + .file_name() + .unwrap_or_default() + .as_encoded_bytes() + .to_vec(), + }); + } + ChildRef::Symlink(si) => { + let symlink = &walk.symlinks[*si]; + entries.push(DirEntryOnDisk { + nid: symlink.inode_id, + file_type: EROFS_FT_SYMLINK, + name: symlink.name.clone(), + }); + } + } + } + + // Write EROFS directory blocks (splits at 4096-byte boundaries) + write_dir_blocks(&mut dir_data, &entries); + } + let dir_data_size = align_up(dir_data.len() as u64, BLOCK_SIZE); + dir_data.resize(dir_data_size as usize, 0); + + // Phase 3: Compute data region layout + let data_offset = dir_blocks_offset + dir_data_size; + let mut file_regions = Vec::new(); + let mut current_data_offset = data_offset; + + for (i, file) in walk.files.iter().enumerate() { + if file.size > 0 { + let aligned_offset = align_up(current_data_offset, BLOCK_SIZE); + file_regions.push(FileRegion { + file_index: i, + offset_in_erofs: aligned_offset, + size: file.size, + }); + current_data_offset = aligned_offset + align_up(file.size, BLOCK_SIZE); + } + } + + // Symlink targets also need data blocks + for (si, symlink) in walk.symlinks.iter().enumerate() { + if !symlink.target.is_empty() { + let aligned_offset = align_up(current_data_offset, BLOCK_SIZE); + file_regions.push(FileRegion { + file_index: walk.files.len() + si, // files.len() + symlink index + offset_in_erofs: aligned_offset, + size: symlink.target.len() as u64, + }); + current_data_offset = + aligned_offset + align_up(symlink.target.len() as u64, BLOCK_SIZE); + } + } + + let total_size = align_up(current_data_offset, BLOCK_SIZE); + let total_blocks = total_size / BLOCK_SIZE; + + // Phase 4: Build metadata blob + let mut metadata = vec![0u8; (dir_blocks_offset + dir_data_size) as usize]; + + // Write superblock at offset 1024 + write_superblock( + &mut metadata, + total_inodes as u32, + total_blocks as u32, + 0, // root nid + ); + + // Write inodes + // Directories + for (i, dir) in walk.dirs.iter().enumerate() { + let dir_size = compute_dir_size(dir, walk); + let dir_block = (dir_block_offsets[i] - dir_blocks_offset) / BLOCK_SIZE; + write_compact_inode( + &mut metadata, + inode_table_offset as usize + (dir.inode_id as usize * 32), + 0o040000 | (dir.mode & 0o7777), + dir.uid as u16, + dir.gid as u16, + dir_size as u32, + dir.mtime as u32, + 2 + dir + .children + .iter() + .filter(|c| matches!(c, ChildRef::Dir(_))) + .count() as u16, + EROFS_INODE_FLAT_PLAIN, + (dir_blocks_offset / BLOCK_SIZE + dir_block) as u32, + ); + } + + // Regular files + for (i, file) in walk.files.iter().enumerate() { + let data_block = if file.size > 0 { + let fr = file_regions.iter().find(|r| r.file_index == i); + fr.map(|r| (r.offset_in_erofs / BLOCK_SIZE) as u32) + .unwrap_or(0) + } else { + 0 + }; + write_compact_inode( + &mut metadata, + inode_table_offset as usize + (file.inode_id as usize * 32), + 0o100000 | (file.mode & 0o7777), + file.uid as u16, + file.gid as u16, + file.size as u32, + file.mtime as u32, + file.nlink as u16, + EROFS_INODE_FLAT_PLAIN, + data_block, + ); + } + + // Symlinks: FlatPlain with target in data region + // File regions for symlinks start after file regions + let file_region_count = walk.files.iter().filter(|f| f.size > 0).count(); + let mut sym_fr_idx = file_region_count; + for symlink in &walk.symlinks { + let data_block = if !symlink.target.is_empty() { + let fr = &file_regions[sym_fr_idx]; + sym_fr_idx += 1; + (fr.offset_in_erofs / BLOCK_SIZE) as u32 + } else { + 0 + }; + + write_compact_inode( + &mut metadata, + inode_table_offset as usize + (symlink.inode_id as usize * 32), + 0o120000 | (symlink.mode & 0o7777), + symlink.uid as u16, + symlink.gid as u16, + symlink.target.len() as u32, + symlink.mtime as u32, + 1, + EROFS_INODE_FLAT_PLAIN, + data_block, + ); + } + + // Write directory data + let dir_start = dir_blocks_offset as usize; + if dir_start + dir_data.len() <= metadata.len() { + metadata[dir_start..dir_start + dir_data.len()].copy_from_slice(&dir_data); + } + + Ok(ErofsLayout { + metadata, + file_regions, + total_size, + }) +} + +fn write_superblock(buf: &mut [u8], inodes: u32, blocks: u32, root_nid: u16) { + let off = SUPERBLOCK_OFFSET as usize; + // magic + buf[off..off + 4].copy_from_slice(&EROFS_MAGIC.to_le_bytes()); + // checksum (unused) + // feature_compat + buf[off + 8..off + 12].copy_from_slice(&0u32.to_le_bytes()); + // blkszbits + buf[off + 12] = BLOCK_BITS; + // sb_extslots + buf[off + 13] = 0; + // root_nid + buf[off + 14..off + 16].copy_from_slice(&root_nid.to_le_bytes()); + // inos + buf[off + 16..off + 24].copy_from_slice(&(inodes as u64).to_le_bytes()); + // build_time + buf[off + 24..off + 32].copy_from_slice(&0u64.to_le_bytes()); + // build_time_nsec + buf[off + 32..off + 36].copy_from_slice(&0u32.to_le_bytes()); + // blocks + buf[off + 36..off + 40].copy_from_slice(&blocks.to_le_bytes()); + // meta_blkaddr (inode table starts at block 1) + buf[off + 40..off + 44].copy_from_slice(&1u32.to_le_bytes()); + // xattr_blkaddr + buf[off + 44..off + 48].copy_from_slice(&0u32.to_le_bytes()); + // uuid (16 bytes) + // volume_name (16 bytes) + // feature_incompat + buf[off + 80..off + 84].copy_from_slice(&0u32.to_le_bytes()); + // available_compr_algs (union with checksum) + // lz4_max_distance +} + +fn write_compact_inode( + buf: &mut [u8], + offset: usize, + mode: u32, + uid: u16, + gid: u16, + size: u32, + _mtime: u32, + nlink: u16, + data_layout: u16, + u_field: u32, +) { + if offset + 32 > buf.len() { + return; + } + + // format: layout(compact=0) | data_layout << 1 + let format = (EROFS_INODE_LAYOUT_COMPACT) | (data_layout << 1); + buf[offset..offset + 2].copy_from_slice(&format.to_le_bytes()); + // xattr_icount + buf[offset + 2..offset + 4].copy_from_slice(&0u16.to_le_bytes()); + // mode + buf[offset + 4..offset + 6].copy_from_slice(&(mode as u16).to_le_bytes()); + // nlink + buf[offset + 6..offset + 8].copy_from_slice(&nlink.to_le_bytes()); + // size + buf[offset + 8..offset + 12].copy_from_slice(&size.to_le_bytes()); + // reserved + buf[offset + 12..offset + 16].copy_from_slice(&0u32.to_le_bytes()); + // u (union: raw_blkaddr for FlatPlain) + buf[offset + 16..offset + 20].copy_from_slice(&u_field.to_le_bytes()); + // ino (on-disk inode number, optional) + buf[offset + 20..offset + 24].copy_from_slice(&0u32.to_le_bytes()); + // uid + buf[offset + 24..offset + 26].copy_from_slice(&uid.to_le_bytes()); + // gid + buf[offset + 26..offset + 28].copy_from_slice(&gid.to_le_bytes()); + // reserved2 + buf[offset + 28..offset + 32].copy_from_slice(&0u32.to_le_bytes()); +} + +fn write_dir_blocks(buf: &mut Vec, entries: &[DirEntryOnDisk]) { + // EROFS directories are split into 4096-byte blocks. + // Each block contains: [headers...][names...] + // header = 12 bytes: nid(8) + nameoff(2) + file_type(1) + reserved(1) + // nameoff is relative to block start. + + let mut remaining = entries; + + while !remaining.is_empty() { + // Determine how many entries fit in this block + let mut count = 0; + let mut total_size: usize = 0; + for entry in remaining { + let entry_size = 12 + entry.name.len(); + if total_size + entry_size > BLOCK_SIZE as usize && count > 0 { + break; + } + total_size += entry_size; + count += 1; + } + + let block_entries = &remaining[..count]; + remaining = &remaining[count..]; + + // Write headers + let header_total = 12 * block_entries.len(); + let mut nameoff = header_total as u16; + for entry in block_entries { + buf.extend_from_slice(&(entry.nid as u64).to_le_bytes()); + buf.extend_from_slice(&nameoff.to_le_bytes()); + buf.push(entry.file_type); + buf.push(0); + nameoff += entry.name.len() as u16; + } + + // Write names + for entry in block_entries { + buf.extend_from_slice(&entry.name); + } + + // Pad to block boundary (except last block which is sized by inode.size) + if !remaining.is_empty() { + let written = total_size; + let pad = BLOCK_SIZE as usize - (written % BLOCK_SIZE as usize); + if pad < BLOCK_SIZE as usize { + buf.resize(buf.len() + pad, 0); + } + } + } +} + +fn compute_dir_size(dir: &DirInfo, walk: &WalkResult) -> u64 { + // Build entry list to accurately compute size including block splits + let mut entries = Vec::new(); + entries.push(DirEntryOnDisk { + nid: 0, + file_type: EROFS_FT_DIR, + name: b".".to_vec(), + }); + entries.push(DirEntryOnDisk { + nid: 0, + file_type: EROFS_FT_DIR, + name: b"..".to_vec(), + }); + for child in &dir.children { + let name_len = match child { + ChildRef::Dir(di) => walk.dirs[*di].name.len(), + ChildRef::File(fi) => walk.files[*fi] + .host_path + .file_name() + .unwrap_or_default() + .len(), + ChildRef::Symlink(si) => walk.symlinks[*si].name.len(), + }; + entries.push(DirEntryOnDisk { + nid: 0, + file_type: 0, + name: vec![0; name_len], + }); + } + + // Simulate block splitting to get total size + let mut total = 0u64; + let mut remaining = &entries[..]; + while !remaining.is_empty() { + let mut count = 0; + let mut block_size = 0usize; + for entry in remaining { + let entry_size = 12 + entry.name.len(); + if block_size + entry_size > BLOCK_SIZE as usize && count > 0 { + break; + } + block_size += entry_size; + count += 1; + } + remaining = &remaining[count..]; + if remaining.is_empty() { + total += block_size as u64; // last block: actual size + } else { + total += BLOCK_SIZE; // full block + } + } + total +} + +fn align_up(val: u64, align: u64) -> u64 { + (val + align - 1) & !(align - 1) +} + +pub fn build_erofs_regions(layout: &ErofsLayout, walk: &WalkResult) -> Vec { + let files = &walk.files; + let mut regions = Vec::new(); + + // Metadata region (superblock + inode table + dir blocks) + regions.push(Region { + start: 0, + len: layout.metadata.len() as u64, + region_type: RegionType::Data(Arc::new(layout.metadata.clone())), + }); + + // File and symlink data regions + for fr in &layout.file_regions { + // Padding gap + let current_end = regions.last().map(|r| r.start + r.len).unwrap_or(0); + if fr.offset_in_erofs > current_end { + regions.push(Region { + start: current_end, + len: fr.offset_in_erofs - current_end, + region_type: RegionType::Zero, + }); + } + + if fr.file_index < files.len() { + // Regular file: read from host + regions.push(Region { + start: fr.offset_in_erofs, + len: fr.size, + region_type: RegionType::File { + path: files[fr.file_index].host_path.clone(), + }, + }); + } else { + // Symlink target: inline data + let sym_idx = fr.file_index - files.len(); + if sym_idx < walk.symlinks.len() { + // Pad symlink target to fill the block + let mut data = walk.symlinks[sym_idx].target.clone(); + data.resize(fr.size as usize, 0); + regions.push(Region { + start: fr.offset_in_erofs, + len: fr.size, + region_type: RegionType::Data(Arc::new(data)), + }); + } + } + + // Padding to block boundary + let end = fr.offset_in_erofs + fr.size; + let aligned_end = align_up(end, BLOCK_SIZE); + if aligned_end > end { + regions.push(Region { + start: end, + len: aligned_end - end, + region_type: RegionType::Zero, + }); + } + } + + // Ensure total size + let last_end = regions.last().map(|r| r.start + r.len).unwrap_or(0); + if last_end < layout.total_size { + regions.push(Region { + start: last_end, + len: layout.total_size - last_end, + region_type: RegionType::Zero, + }); + } + + regions +} diff --git a/crates/nbdkit-erofs-plugin/src/fat32.rs b/crates/nbdkit-erofs-plugin/src/fat32.rs new file mode 100644 index 000000000..ecc6992d5 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/fat32.rs @@ -0,0 +1,548 @@ +//! FAT32 ESP generation using the regions pattern. +//! +//! Generates a virtual FAT32 filesystem with boot files for EFI boot. +//! Metadata (BPB, FAT tables, directory entries) are in-memory Data regions. +//! File data uses File regions for lazy pread from source files. + +use crate::regions::{Region, RegionType}; +use std::path::PathBuf; +use std::sync::Arc; + +const SECTOR_SIZE: u64 = 512; +const CLUSTER_SIZE: u64 = 512; +const SECTORS_PER_CLUSTER: u64 = 1; +const RESERVED_SECTORS: u64 = 32; +const NUM_FATS: u64 = 2; +const DIR_ENTRY_SIZE: u64 = 32; + +const FAT32_EOC: u32 = 0x0FFF_FFFF; +const FAT32_MEDIA: u32 = 0x0FFF_FFF8; + +// Fixed cluster assignments for the ESP directory structure. +// Root directory is always cluster 2 per FAT32 spec. +const CLUSTER_ROOT: u32 = 2; +const CLUSTER_EFI: u32 = 3; +const CLUSTER_EFI_BOOT: u32 = 4; +const CLUSTER_BOOT: u32 = 5; + +struct FatFile { + name_8_3: [u8; 11], + size: u64, + regions: Vec, +} + +pub enum FileDataRegion { + FromFile { path: PathBuf, len: u64 }, + FromData(Vec), + Zero(u64), +} + +struct FatDir { + name_8_3: [u8; 11], + cluster: u32, + entries: Vec, +} + +enum FatDirChild { + Dir(usize), + File(usize), +} + +fn clusters_for(size: u64) -> u64 { + if size == 0 { + 1 + } else { + (size + CLUSTER_SIZE - 1) / CLUSTER_SIZE + } +} + +fn make_8_3(name: &str, ext: &str) -> [u8; 11] { + let mut r = [b' '; 11]; + for (i, b) in name.bytes().take(8).enumerate() { + r[i] = b; + } + for (i, b) in ext.bytes().take(3).enumerate() { + r[8 + i] = b; + } + r +} + +pub fn build_esp_regions( + grub_path: &std::path::Path, + grub_size: u64, + grub_cfg: &[u8], + kernel_path: &std::path::Path, + kernel_size: u64, + initrd_parts: Vec<(FileDataRegion, u64)>, + initrd_total_size: u64, +) -> (Vec, u64) { + // Files + let mut files: Vec = Vec::new(); + + // BOOTAA64.EFI + files.push(FatFile { + name_8_3: make_8_3("BOOTAA64", "EFI"), + size: grub_size, + regions: vec![FileDataRegion::FromFile { + path: grub_path.to_path_buf(), + len: grub_size, + }], + }); + + // GRUB.CFG + files.push(FatFile { + name_8_3: make_8_3("GRUB", "CFG"), + size: grub_cfg.len() as u64, + regions: vec![FileDataRegion::FromData(grub_cfg.to_vec())], + }); + + // VMLINUZ + files.push(FatFile { + name_8_3: make_8_3("VMLINUZ", ""), + size: kernel_size, + regions: vec![FileDataRegion::FromFile { + path: kernel_path.to_path_buf(), + len: kernel_size, + }], + }); + + // INITRD.IMG + files.push(FatFile { + name_8_3: make_8_3("INITRD", "IMG"), + size: initrd_total_size, + regions: initrd_parts.into_iter().map(|(r, _)| r).collect(), + }); + + // Directory structure: + // / (root, cluster 2) → EFI/, boot/ + // /EFI (cluster 3) → BOOT/ + // /EFI/BOOT (cluster 4) → BOOTAA64.EFI, GRUB.CFG + // /boot (cluster 5) → VMLINUZ, INITRD.IMG + // Note: /EFI/BOOT and /boot both use 8.3 name "BOOT" but are in different + // parent directories so there is no conflict in the FAT32 namespace. + let dirs = vec![ + FatDir { + name_8_3: make_8_3("", ""), + cluster: CLUSTER_ROOT, + entries: vec![FatDirChild::Dir(1), FatDirChild::Dir(3)], + }, + FatDir { + name_8_3: make_8_3("EFI", ""), + cluster: CLUSTER_EFI, + entries: vec![FatDirChild::Dir(2)], + }, + FatDir { + name_8_3: make_8_3("BOOT", ""), + cluster: CLUSTER_EFI_BOOT, + entries: vec![FatDirChild::File(0), FatDirChild::File(1)], + }, + FatDir { + name_8_3: make_8_3("BOOT", ""), + cluster: CLUSTER_BOOT, + entries: vec![FatDirChild::File(2), FatDirChild::File(3)], + }, + ]; + + let dir_clusters = dirs.len() as u32; + + // Assign file clusters (starting after directory clusters) + let mut file_start_clusters: Vec = Vec::new(); + let mut next_cluster = 2 + dir_clusters; + for f in &files { + file_start_clusters.push(next_cluster); + next_cluster += clusters_for(f.size) as u32; + } + let total_clusters = next_cluster; + let data_clusters = total_clusters - 2; + + // FAT table + let fat_entries = total_clusters as usize; + let fat_bytes = ((fat_entries * 4 + SECTOR_SIZE as usize - 1) / SECTOR_SIZE as usize) + * SECTOR_SIZE as usize; + let fat_sectors = fat_bytes as u64 / SECTOR_SIZE; + + let mut fat = vec![0u8; fat_bytes]; + // Entry 0: media descriptor + fat[0..4].copy_from_slice(&FAT32_MEDIA.to_le_bytes()); + // Entry 1: EOC + fat[4..8].copy_from_slice(&FAT32_EOC.to_le_bytes()); + + // Directory clusters (each is single-cluster, EOC) + for d in &dirs { + let off = d.cluster as usize * 4; + fat[off..off + 4].copy_from_slice(&FAT32_EOC.to_le_bytes()); + } + + // File cluster chains + for (fi, f) in files.iter().enumerate() { + let start = file_start_clusters[fi]; + let num = clusters_for(f.size) as u32; + for c in 0..num { + let cluster = start + c; + let off = cluster as usize * 4; + if c == num - 1 { + fat[off..off + 4].copy_from_slice(&FAT32_EOC.to_le_bytes()); + } else { + fat[off..off + 4].copy_from_slice(&(cluster + 1).to_le_bytes()); + } + } + } + + // Data region start (in sectors) + let data_start_sector = RESERVED_SECTORS + NUM_FATS * fat_sectors; + + // Build directory entry blocks + let mut dir_blocks: Vec> = Vec::new(); + for (di, d) in dirs.iter().enumerate() { + let mut block = vec![0u8; CLUSTER_SIZE as usize]; + let mut pos = 0usize; + + // "." and ".." entries for subdirectories + if di > 0 { + write_dir_entry(&mut block, pos, b". ", 0x10, d.cluster, 0); + pos += DIR_ENTRY_SIZE as usize; + // Parent cluster: dirs at index 1 (EFI) and 3 (boot) are children of root (0). + // Dir at index 2 (EFI/BOOT) is a child of EFI (dirs[1]). + debug_assert!(dirs.len() == 4, "directory structure changed"); + let parent_cluster = if di == 1 || di == 3 { + 0u32 + } else { + dirs[1].cluster + }; + write_dir_entry(&mut block, pos, b".. ", 0x10, parent_cluster, 0); + pos += DIR_ENTRY_SIZE as usize; + } + + for child in &d.entries { + match child { + FatDirChild::Dir(idx) => { + let cd = &dirs[*idx]; + write_dir_entry(&mut block, pos, &cd.name_8_3, 0x10, cd.cluster, 0); + } + FatDirChild::File(idx) => { + let cf = &files[*idx]; + write_dir_entry( + &mut block, + pos, + &cf.name_8_3, + 0x20, + file_start_clusters[*idx], + cf.size, + ); + } + } + pos += DIR_ENTRY_SIZE as usize; + } + dir_blocks.push(block); + } + + // Total size of ESP partition + let total_sectors = data_start_sector + data_clusters as u64 * SECTORS_PER_CLUSTER; + let total_size = total_sectors * SECTOR_SIZE; + + // BPB (Boot Parameter Block) + let bpb = build_bpb( + total_sectors as u32, + fat_sectors as u32, + data_clusters as u64, + ); + + // FSInfo + let fsinfo = build_fsinfo( + (data_clusters as u32).saturating_sub( + dir_clusters as u32 + + files + .iter() + .map(|f| clusters_for(f.size) as u32) + .sum::(), + ), + next_cluster, + ); + + // Assemble regions + let mut regions: Vec = Vec::new(); + let mut offset = 0u64; + + // Sector 0: BPB + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(bpb.clone())), + }); + offset += SECTOR_SIZE; + + // Sector 1: FSInfo + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(fsinfo.clone())), + }); + offset += SECTOR_SIZE; + + // Sectors 2-5: zero padding + let pad_to_backup = 4 * SECTOR_SIZE; + regions.push(Region { + start: offset, + len: pad_to_backup, + region_type: RegionType::Zero, + }); + offset += pad_to_backup; + + // Sector 6: Backup BPB + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(bpb)), + }); + offset += SECTOR_SIZE; + + // Sector 7: Backup FSInfo + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(fsinfo)), + }); + offset += SECTOR_SIZE; + + // Sectors 8-31: zero padding to reserved end + let remaining_reserved = (RESERVED_SECTORS * SECTOR_SIZE) - offset; + if remaining_reserved > 0 { + regions.push(Region { + start: offset, + len: remaining_reserved, + region_type: RegionType::Zero, + }); + offset += remaining_reserved; + } + + // FAT1 + let fat_data = Arc::new(fat.clone()); + regions.push(Region { + start: offset, + len: fat_bytes as u64, + region_type: RegionType::Data(fat_data.clone()), + }); + offset += fat_bytes as u64; + + // FAT2 (copy) + regions.push(Region { + start: offset, + len: fat_bytes as u64, + region_type: RegionType::Data(fat_data), + }); + offset += fat_bytes as u64; + + // Data area: directory clusters + for block in &dir_blocks { + regions.push(Region { + start: offset, + len: CLUSTER_SIZE, + region_type: RegionType::Data(Arc::new(block.clone())), + }); + offset += CLUSTER_SIZE; + } + + // Data area: file clusters + for (_fi, f) in files.iter().enumerate() { + let mut file_offset = 0u64; + + for part in &f.regions { + match part { + FileDataRegion::FromFile { path, len } => { + regions.push(Region { + start: offset, + len: *len, + region_type: RegionType::File { path: path.clone() }, + }); + offset += len; + file_offset += len; + } + FileDataRegion::FromData(data) => { + let len = data.len() as u64; + regions.push(Region { + start: offset, + len, + region_type: RegionType::Data(Arc::new(data.clone())), + }); + offset += len; + file_offset += len; + } + FileDataRegion::Zero(len) => { + if *len > 0 { + regions.push(Region { + start: offset, + len: *len, + region_type: RegionType::Zero, + }); + offset += len; + file_offset += len; + } + } + } + } + + // Pad to cluster boundary + let used_in_last = file_offset % CLUSTER_SIZE; + if used_in_last > 0 { + let pad = CLUSTER_SIZE - used_in_last; + regions.push(Region { + start: offset, + len: pad, + region_type: RegionType::Zero, + }); + offset += pad; + } + } + + // Ensure total_size is correct + debug_assert!( + offset <= total_size, + "regions exceeded total_size: {} > {}", + offset, + total_size + ); + if offset < total_size { + regions.push(Region { + start: offset, + len: total_size - offset, + region_type: RegionType::Zero, + }); + } + + (regions, total_size) +} + +/// Build initrd regions: original file + 4-byte alignment + CPIO data. +pub fn build_initrd_regions( + initrd_path: &std::path::Path, + initrd_size: u64, + units_cpio: &[u8], + ssh_cpio: Option<&[u8]>, +) -> (Vec<(FileDataRegion, u64)>, u64) { + let mut parts = Vec::new(); + let mut total = 0u64; + + // Original initramfs + parts.push(( + FileDataRegion::FromFile { + path: initrd_path.to_path_buf(), + len: initrd_size, + }, + initrd_size, + )); + total += initrd_size; + + // 4-byte alignment padding + let pad = ((4 - (initrd_size % 4)) % 4) as u64; + if pad > 0 { + parts.push((FileDataRegion::Zero(pad), pad)); + total += pad; + } + + // Units CPIO + let len = units_cpio.len() as u64; + parts.push((FileDataRegion::FromData(units_cpio.to_vec()), len)); + total += len; + + // SSH CPIO (if provided) + if let Some(ssh) = ssh_cpio { + let pad2 = ((4 - (total % 4)) % 4) as u64; + if pad2 > 0 { + parts.push((FileDataRegion::Zero(pad2), pad2)); + total += pad2; + } + let len = ssh.len() as u64; + parts.push((FileDataRegion::FromData(ssh.to_vec()), len)); + total += len; + } + + (parts, total) +} + +fn write_dir_entry(buf: &mut [u8], pos: usize, name: &[u8; 11], attr: u8, cluster: u32, size: u64) { + buf[pos..pos + 11].copy_from_slice(name); + buf[pos + 11] = attr; + // cluster high + buf[pos + 20..pos + 22].copy_from_slice(&((cluster >> 16) as u16).to_le_bytes()); + // cluster low + buf[pos + 26..pos + 28].copy_from_slice(&(cluster as u16).to_le_bytes()); + // file size (32-bit) + buf[pos + 28..pos + 32].copy_from_slice(&(size as u32).to_le_bytes()); +} + +fn build_bpb(total_sectors: u32, fat_sectors: u32, _data_clusters: u64) -> Vec { + let mut bpb = vec![0u8; SECTOR_SIZE as usize]; + // Jump instruction + bpb[0] = 0xEB; + bpb[1] = 0x58; + bpb[2] = 0x90; + // OEM name + bpb[3..11].copy_from_slice(b"MSWIN4.1"); + // Bytes per sector + bpb[11..13].copy_from_slice(&(SECTOR_SIZE as u16).to_le_bytes()); + // Sectors per cluster + bpb[13] = SECTORS_PER_CLUSTER as u8; + // Reserved sectors + bpb[14..16].copy_from_slice(&(RESERVED_SECTORS as u16).to_le_bytes()); + // Number of FATs + bpb[16] = NUM_FATS as u8; + // Root entry count (0 for FAT32) + bpb[17..19].copy_from_slice(&0u16.to_le_bytes()); + // Total sectors 16 (0 for FAT32) + bpb[19..21].copy_from_slice(&0u16.to_le_bytes()); + // Media type + bpb[21] = 0xF8; + // Sectors per FAT 16 (0 for FAT32) + bpb[22..24].copy_from_slice(&0u16.to_le_bytes()); + // Sectors per track + bpb[24..26].copy_from_slice(&32u16.to_le_bytes()); + // Number of heads + bpb[26..28].copy_from_slice(&64u16.to_le_bytes()); + // Hidden sectors + bpb[28..32].copy_from_slice(&0u32.to_le_bytes()); + // Total sectors 32 + bpb[32..36].copy_from_slice(&total_sectors.to_le_bytes()); + // --- FAT32 specific --- + // Sectors per FAT + bpb[36..40].copy_from_slice(&fat_sectors.to_le_bytes()); + // Extended flags + bpb[40..42].copy_from_slice(&0u16.to_le_bytes()); + // FS version + bpb[42..44].copy_from_slice(&0u16.to_le_bytes()); + // Root cluster + bpb[44..48].copy_from_slice(&2u32.to_le_bytes()); + // FSInfo sector + bpb[48..50].copy_from_slice(&1u16.to_le_bytes()); + // Backup boot sector + bpb[50..52].copy_from_slice(&6u16.to_le_bytes()); + // Reserved (12 bytes, already zero) + // Drive number + bpb[64] = 0x80; + // Boot signature + bpb[66] = 0x29; + // Volume serial number + bpb[67..71].copy_from_slice(&0x42424242u32.to_le_bytes()); + // Volume label + bpb[71..82].copy_from_slice(b"BCVK-ESP "); + // Filesystem type + bpb[82..90].copy_from_slice(b"FAT32 "); + // Boot signature + bpb[510] = 0x55; + bpb[511] = 0xAA; + bpb +} + +fn build_fsinfo(free_clusters: u32, next_free: u32) -> Vec { + let mut fs = vec![0u8; SECTOR_SIZE as usize]; + // Signature1 + fs[0..4].copy_from_slice(&0x41615252u32.to_le_bytes()); + // Signature2 + fs[484..488].copy_from_slice(&0x61417272u32.to_le_bytes()); + // Free cluster count + fs[488..492].copy_from_slice(&free_clusters.to_le_bytes()); + // Next free cluster + fs[492..496].copy_from_slice(&next_free.to_le_bytes()); + // Signature3 + fs[508..512].copy_from_slice(&0xAA550000u32.to_le_bytes()); + fs +} diff --git a/crates/nbdkit-erofs-plugin/src/gpt.rs b/crates/nbdkit-erofs-plugin/src/gpt.rs new file mode 100644 index 000000000..88e8bcf44 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/gpt.rs @@ -0,0 +1,290 @@ +use crate::regions::{Region, RegionType}; +use std::sync::Arc; + +const SECTOR_SIZE: u64 = 512; +const GPT_HEADER_SIZE: u64 = 92; +const GPT_ENTRY_SIZE: u64 = 128; +const GPT_ENTRIES: u64 = 128; + +// EFI System Partition type GUID +const ESP_TYPE_GUID: [u8; 16] = [ + 0x28, 0x73, 0x2A, 0xC1, 0x1F, 0xF8, 0xD2, 0x11, 0xBA, 0x4B, 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B, +]; + +// Linux filesystem type GUID +const LINUX_TYPE_GUID: [u8; 16] = [ + 0xAF, 0x3D, 0xC6, 0x0F, 0x83, 0x84, 0x72, 0x47, 0x8E, 0x79, 0x3D, 0x69, 0xD8, 0x47, 0x7D, 0xE4, +]; + +pub struct DiskLayout { + pub regions: Vec, + pub total_size: u64, +} + +pub fn build_gpt_disk( + esp_regions: Vec, + esp_size: u64, + erofs_regions: Vec, + erofs_size: u64, +) -> std::io::Result { + // GPT layout: + // LBA 0: Protective MBR + // LBA 1: GPT Header + // LBA 2-33: Partition Table (128 entries * 128 bytes = 16384 bytes = 32 sectors) + // LBA 34+: ESP partition (aligned to 2048 sectors / 1MB) + // After ESP: EROFS partition + // End: Backup GPT + + let partition_table_sectors = (GPT_ENTRIES * GPT_ENTRY_SIZE + SECTOR_SIZE - 1) / SECTOR_SIZE; + let first_usable_lba = 34u64; // standard + let esp_start_lba = 2048u64; // 1MB aligned + let esp_sectors = (esp_size + SECTOR_SIZE - 1) / SECTOR_SIZE; + let erofs_start_lba = esp_start_lba + esp_sectors; + // Align to 2048 sectors + let erofs_start_lba = (erofs_start_lba + 2047) & !2047; + let erofs_sectors = (erofs_size + SECTOR_SIZE - 1) / SECTOR_SIZE; + let last_usable_lba = erofs_start_lba + erofs_sectors - 1; + let backup_table_lba = last_usable_lba + 1; + let backup_header_lba = backup_table_lba + partition_table_sectors; + let total_sectors = backup_header_lba + 1; + let total_size = total_sectors * SECTOR_SIZE; + + // Build partition table entries + let mut partition_table = vec![0u8; (GPT_ENTRIES * GPT_ENTRY_SIZE) as usize]; + + // Entry 0: ESP + write_gpt_entry( + &mut partition_table, + 0, + &ESP_TYPE_GUID, + esp_start_lba, + esp_start_lba + esp_sectors - 1, + b"EFI System", + ); + + // Entry 1: EROFS rootfs + write_gpt_entry( + &mut partition_table, + 1, + &LINUX_TYPE_GUID, + erofs_start_lba, + erofs_start_lba + erofs_sectors - 1, + b"root", + ); + + let partition_table_crc = crc32fast::hash(&partition_table); + + // Build GPT header + let mut gpt_header = vec![0u8; SECTOR_SIZE as usize]; + write_gpt_header( + &mut gpt_header, + 1, // my LBA + backup_header_lba, + first_usable_lba, + last_usable_lba, + 2, // partition table LBA + 2, // num entries used + partition_table_crc, + ); + + // Build backup GPT header + let mut backup_header = vec![0u8; SECTOR_SIZE as usize]; + write_gpt_header( + &mut backup_header, + backup_header_lba, + 1, // alternate LBA + first_usable_lba, + last_usable_lba, + backup_table_lba, + 2, + partition_table_crc, + ); + + // Build protective MBR + let mut mbr = vec![0u8; SECTOR_SIZE as usize]; + write_protective_mbr(&mut mbr, total_sectors); + + // Assemble regions + let mut regions = Vec::new(); + + // MBR + regions.push(Region { + start: 0, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(mbr)), + }); + + // GPT Header + regions.push(Region { + start: SECTOR_SIZE, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(gpt_header)), + }); + + // Partition Table + regions.push(Region { + start: 2 * SECTOR_SIZE, + len: partition_table.len() as u64, + region_type: RegionType::Data(Arc::new(partition_table.clone())), + }); + + // Padding to ESP start + let pad_start = 2 * SECTOR_SIZE + partition_table.len() as u64; + let esp_byte_offset = esp_start_lba * SECTOR_SIZE; + if esp_byte_offset > pad_start { + regions.push(Region { + start: pad_start, + len: esp_byte_offset - pad_start, + region_type: RegionType::Zero, + }); + } + + // ESP partition (from provided regions, offset-adjusted) + for mut r in esp_regions { + r.start += esp_byte_offset; + regions.push(r); + } + + // Padding between ESP and EROFS + let esp_end = esp_byte_offset + esp_size; + let erofs_byte_offset = erofs_start_lba * SECTOR_SIZE; + if erofs_byte_offset > esp_end { + regions.push(Region { + start: esp_end, + len: erofs_byte_offset - esp_end, + region_type: RegionType::Zero, + }); + } + + // EROFS partition (offset all regions) + for mut r in erofs_regions { + r.start += erofs_byte_offset; + regions.push(r); + } + + // Padding to backup GPT + let erofs_end = erofs_byte_offset + erofs_size; + let backup_table_offset = backup_table_lba * SECTOR_SIZE; + if backup_table_offset > erofs_end { + regions.push(Region { + start: erofs_end, + len: backup_table_offset - erofs_end, + region_type: RegionType::Zero, + }); + } + + // Backup partition table + regions.push(Region { + start: backup_table_offset, + len: partition_table.len() as u64, + region_type: RegionType::Data(Arc::new(partition_table)), + }); + + // Backup GPT header + regions.push(Region { + start: backup_header_lba * SECTOR_SIZE, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(backup_header)), + }); + + Ok(DiskLayout { + regions, + total_size, + }) +} + +fn write_gpt_entry( + table: &mut [u8], + index: usize, + type_guid: &[u8; 16], + first_lba: u64, + last_lba: u64, + name: &[u8], +) { + let off = index * GPT_ENTRY_SIZE as usize; + // Partition type GUID + table[off..off + 16].copy_from_slice(type_guid); + // Unique partition GUID (generate simple one from index) + let mut unique = [0u8; 16]; + unique[0] = index as u8 + 1; + unique[15] = 0x42; + table[off + 16..off + 32].copy_from_slice(&unique); + // First LBA + table[off + 32..off + 40].copy_from_slice(&first_lba.to_le_bytes()); + // Last LBA + table[off + 40..off + 48].copy_from_slice(&last_lba.to_le_bytes()); + // Attributes + table[off + 48..off + 56].copy_from_slice(&0u64.to_le_bytes()); + // Name (UTF-16LE) + for (i, &b) in name.iter().enumerate().take(36) { + table[off + 56 + i * 2] = b; + table[off + 56 + i * 2 + 1] = 0; + } +} + +fn write_gpt_header( + buf: &mut [u8], + my_lba: u64, + alternate_lba: u64, + first_usable: u64, + last_usable: u64, + partition_table_lba: u64, + _num_entries: u32, + partition_crc: u32, +) { + // Signature "EFI PART" + buf[0..8].copy_from_slice(b"EFI PART"); + // Revision 1.0 + buf[8..12].copy_from_slice(&0x00010000u32.to_le_bytes()); + // Header size + buf[12..16].copy_from_slice(&(GPT_HEADER_SIZE as u32).to_le_bytes()); + // Header CRC32 (computed after all fields set) + // My LBA + buf[24..32].copy_from_slice(&my_lba.to_le_bytes()); + // Alternate LBA + buf[32..40].copy_from_slice(&alternate_lba.to_le_bytes()); + // First usable LBA + buf[40..48].copy_from_slice(&first_usable.to_le_bytes()); + // Last usable LBA + buf[48..56].copy_from_slice(&last_usable.to_le_bytes()); + // Fixed disk GUID for reproducible builds (not security-sensitive) + const DISK_GUID: [u8; 16] = [ + 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, + 0xCC, + ]; + let disk_guid = DISK_GUID; + buf[56..72].copy_from_slice(&disk_guid); + // Partition entry start LBA + buf[72..80].copy_from_slice(&partition_table_lba.to_le_bytes()); + // Number of partition entries + buf[80..84].copy_from_slice(&(GPT_ENTRIES as u32).to_le_bytes()); + // Size of partition entry + buf[84..88].copy_from_slice(&(GPT_ENTRY_SIZE as u32).to_le_bytes()); + // Partition table CRC32 + buf[88..92].copy_from_slice(&partition_crc.to_le_bytes()); + + // Compute header CRC32 + buf[16..20].copy_from_slice(&0u32.to_le_bytes()); // zero CRC field first + let crc = crc32fast::hash(&buf[0..GPT_HEADER_SIZE as usize]); + buf[16..20].copy_from_slice(&crc.to_le_bytes()); +} + +fn write_protective_mbr(buf: &mut [u8], total_sectors: u64) { + // Partition entry at offset 446 + buf[446] = 0x00; // not bootable + buf[447] = 0x00; // CHS start + buf[448] = 0x02; + buf[449] = 0x00; + buf[450] = 0xEE; // type: GPT protective + buf[451] = 0xFF; // CHS end + buf[452] = 0xFF; + buf[453] = 0xFF; + // LBA start + buf[454..458].copy_from_slice(&1u32.to_le_bytes()); + // LBA size + let size = std::cmp::min(total_sectors - 1, 0xFFFFFFFF) as u32; + buf[458..462].copy_from_slice(&size.to_le_bytes()); + // Boot signature + buf[510] = 0x55; + buf[511] = 0xAA; +} diff --git a/crates/nbdkit-erofs-plugin/src/initramfs.rs b/crates/nbdkit-erofs-plugin/src/initramfs.rs new file mode 100644 index 000000000..87d0d7732 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/initramfs.rs @@ -0,0 +1,182 @@ +//! CPIO newc archive generation for initramfs append. + +use std::io::Write; + +use cpio::newc::Builder as NewcBuilder; +use cpio::newc::ModeFileType; + +fn write_dir(out: &mut Vec, path: &str) { + NewcBuilder::new(path) + .mode(0o755) + .set_mode_file_type(ModeFileType::Directory) + .write(out, 0) + .finish() + .unwrap(); +} + +fn write_file(out: &mut Vec, path: &str, data: &[u8]) { + let mut w = NewcBuilder::new(path) + .mode(0o644) + .set_mode_file_type(ModeFileType::Regular) + .write(out, data.len() as u32); + w.write_all(data).unwrap(); + w.finish().unwrap(); +} + +fn write_file_exec(out: &mut Vec, path: &str, data: &[u8]) { + let mut w = NewcBuilder::new(path) + .mode(0o755) + .set_mode_file_type(ModeFileType::Regular) + .write(out, data.len() as u32); + w.write_all(data).unwrap(); + w.finish().unwrap(); +} + +pub fn build_units_cpio() -> Vec { + let mut out = Vec::with_capacity(32768); + + write_dir(&mut out, "usr"); + write_dir(&mut out, "usr/lib"); + write_dir(&mut out, "usr/lib/systemd"); + write_dir(&mut out, "usr/lib/systemd/system"); + write_dir(&mut out, "usr/lib/systemd/system/initrd-fs.target.d"); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-var-ephemeral.service", + b"[Unit]\n\ + Description=Setup ephemeral /var from image content\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=sysroot.mount initrd-parse-etc.service\n\ + Requires=sysroot.mount\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + TimeoutStartSec=60\n\ + ExecStart=/usr/bin/mkdir -p /run/var-ephemeral\n\ + ExecStart=/usr/bin/cp -a /sysroot/var/. /run/var-ephemeral/\n\ + ExecStart=/usr/bin/mount --bind /run/var-ephemeral /sysroot/var\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-etc-overlay.service", + b"[Unit]\n\ + Description=Setup ephemeral /etc overlay\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=sysroot.mount initrd-parse-etc.service\n\ + Requires=sysroot.mount\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + TimeoutStartSec=30\n\ + ExecStart=/usr/bin/mkdir -p /run/etc-lower /run/etc-upper /run/etc-work\n\ + ExecStart=/usr/bin/mount --bind /sysroot/etc /run/etc-lower\n\ + ExecStart=/usr/bin/mount -t overlay overlay -o lowerdir=/run/etc-lower,upperdir=/run/etc-upper,workdir=/run/etc-work,index=off,metacopy=off /sysroot/etc\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-copy-units.service", + b"[Unit]\n\ + Description=Copy bcvk units for post-switch-root on systemd <256\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + ExecStart=/bin/sh -c 'mkdir -p /run/systemd/system/sysinit.target.wants && cp /usr/lib/systemd/system/bcvk-journal-stream.service /run/systemd/system/ && ln -s ../bcvk-journal-stream.service /run/systemd/system/sysinit.target.wants/'\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-journal-stream.service", + b"[Unit]\n\ + Description=Stream journal to virtio-serial\n\ + DefaultDependencies=no\n\ + \n\ + [Service]\n\ + Type=simple\n\ + ExecStart=/bin/sh -c 'journalctl -f --no-hostname -o short-monotonic > /dev/hvc1 2>&1 || true'\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-var-ephemeral.conf", + b"[Unit]\nWants=bcvk-var-ephemeral.service\n", + ); + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-etc-overlay.conf", + b"[Unit]\nWants=bcvk-etc-overlay.service\n", + ); + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-copy-units.conf", + b"[Unit]\nWants=bcvk-copy-units.service\n", + ); + + cpio::newc::trailer(out).unwrap() +} + +pub fn build_ssh_cpio(pubkey: &str) -> Vec { + let mut out = Vec::with_capacity(4096); + + write_dir(&mut out, "usr"); + write_dir(&mut out, "usr/lib"); + write_dir(&mut out, "usr/lib/bcvk"); + write_dir(&mut out, "usr/lib/systemd"); + write_dir(&mut out, "usr/lib/systemd/system"); + write_dir(&mut out, "usr/lib/systemd/system/initrd-fs.target.d"); + + let setup_script = format!( + "#!/bin/bash\n\ + mkdir -p /sysroot/var/roothome /sysroot/var/empty /sysroot/var/log /sysroot/var/tmp\n\ + chmod 700 /sysroot/var/roothome\n\ + chmod 711 /sysroot/var/empty\n\ + mkdir -p /sysroot/var/roothome/.ssh\n\ + chmod 700 /sysroot/var/roothome/.ssh\n\ + echo '{}' > /sysroot/var/roothome/.ssh/authorized_keys\n\ + chmod 600 /sysroot/var/roothome/.ssh/authorized_keys\n\ + chown -R 0:0 /sysroot/var/roothome/.ssh\n", + pubkey + ); + write_file_exec( + &mut out, + "usr/lib/bcvk/setup-ssh.sh", + setup_script.as_bytes(), + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-ssh-setup.service", + b"[Unit]\n\ + Description=Setup SSH authorized_keys for root\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=bcvk-var-ephemeral.service\n\ + Requires=bcvk-var-ephemeral.service\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + ExecStart=/usr/bin/bash /usr/lib/bcvk/setup-ssh.sh\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-ssh-setup.conf", + b"[Unit]\nWants=bcvk-ssh-setup.service\n", + ); + + cpio::newc::trailer(out).unwrap() +} diff --git a/crates/nbdkit-erofs-plugin/src/lib.rs b/crates/nbdkit-erofs-plugin/src/lib.rs new file mode 100644 index 000000000..b2cd4075c --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/lib.rs @@ -0,0 +1,389 @@ +mod dir_walk; +mod erofs; +mod fat32; +mod gpt; +mod initramfs; +mod regions; + +use std::ffi::{c_char, c_int, c_void, CStr, CString}; +use std::path::PathBuf; +use std::sync::Mutex; + +use regions::Region; + +static PLUGIN_STATE: Mutex> = Mutex::new(None); + +struct PluginState { + dir: PathBuf, + cmdline: Option, + ssh_pubkey: Option, + regions: Vec, + total_size: u64, +} + +// --- nbdkit C FFI --- + +extern "C" { + fn nbdkit_error(fmt: *const c_char, ...); +} + +fn log_error(msg: &str) { + let c = CString::new(msg).unwrap_or_default(); + unsafe { nbdkit_error(b"%s\0".as_ptr() as *const c_char, c.as_ptr()) }; +} + +// --- Plugin callbacks --- + +#[no_mangle] +pub extern "C" fn plugin_config(key: *const c_char, value: *const c_char) -> c_int { + let key = unsafe { CStr::from_ptr(key) }.to_str().unwrap_or(""); + let value = unsafe { CStr::from_ptr(value) }.to_str().unwrap_or(""); + + let mut state = PLUGIN_STATE.lock().unwrap(); + let state = state.get_or_insert_with(|| PluginState { + dir: PathBuf::new(), + cmdline: None, + ssh_pubkey: None, + regions: Vec::new(), + total_size: 0, + }); + + match key { + "dir" => state.dir = PathBuf::from(value), + "cmdline" => state.cmdline = Some(value.to_string()), + "ssh_pubkey" => state.ssh_pubkey = Some(value.to_string()), + _ => { + log_error(&format!("unknown parameter: {}", key)); + return -1; + } + } + 0 +} + +#[no_mangle] +pub extern "C" fn plugin_config_complete() -> c_int { + let state = PLUGIN_STATE.lock().unwrap(); + let state = match state.as_ref() { + Some(s) => s, + None => { + log_error("dir parameter is required"); + return -1; + } + }; + + if state.dir.as_os_str().is_empty() { + log_error("dir parameter is required"); + return -1; + } + + if state.cmdline.is_none() { + log_error("cmdline parameter is required"); + return -1; + } + + 0 +} + +fn find_kernel_dir(dir: &std::path::Path) -> Option<(PathBuf, PathBuf)> { + let modules = dir.join("usr/lib/modules"); + if let Ok(entries) = std::fs::read_dir(&modules) { + for entry in entries.flatten() { + let kdir = entry.path(); + let vmlinuz = kdir.join("vmlinuz"); + let initramfs = kdir.join("initramfs.img"); + if vmlinuz.exists() && initramfs.exists() { + return Some((vmlinuz, initramfs)); + } + } + } + None +} + +fn find_grub(dir: &std::path::Path) -> Option { + fn walk(path: &std::path::Path, target: &str) -> Option { + if let Ok(entries) = std::fs::read_dir(path) { + for entry in entries.flatten() { + let p = entry.path(); + if p.is_file() && p.file_name().map(|n| n == target).unwrap_or(false) { + return Some(p); + } + if p.is_dir() { + if let Some(found) = walk(&p, target) { + return Some(found); + } + } + } + } + None + } + walk(&dir.join("usr/lib"), "grubaa64.efi") +} + +#[no_mangle] +pub extern "C" fn plugin_get_ready() -> c_int { + let mut state_guard = PLUGIN_STATE.lock().unwrap(); + let state = match state_guard.as_mut() { + Some(s) => s, + None => return -1, + }; + + // Walk directory for EROFS + let walk = match dir_walk::walk_directory(&state.dir) { + Ok(w) => w, + Err(e) => { + log_error(&format!("failed to walk directory: {}", e)); + return -1; + } + }; + + let erofs_layout = match erofs::build_erofs(&walk) { + Ok(l) => l, + Err(e) => { + log_error(&format!("failed to build EROFS: {}", e)); + return -1; + } + }; + + let erofs_regions = erofs::build_erofs_regions(&erofs_layout, &walk); + + // Discover boot files from dir + let (kernel_path, initrd_path) = match find_kernel_dir(&state.dir) { + Some(paths) => paths, + None => { + log_error("kernel/initramfs not found in dir/usr/lib/modules/"); + return -1; + } + }; + + let grub_path = match find_grub(&state.dir) { + Some(p) => p, + None => { + log_error("grubaa64.efi not found in dir/usr/lib/"); + return -1; + } + }; + + fn file_size(path: &std::path::Path) -> Option { + match std::fs::metadata(path) { + Ok(m) => Some(m.len()), + Err(e) => { + log_error(&format!("cannot stat {:?}: {}", path, e)); + None + } + } + } + + let Some(kernel_size) = file_size(&kernel_path) else { + return -1; + }; + let Some(initrd_size) = file_size(&initrd_path) else { + return -1; + }; + let Some(grub_size) = file_size(&grub_path) else { + return -1; + }; + + let cmdline = state.cmdline.as_deref().unwrap_or(""); + + // Generate grub.cfg + let grub_cfg = format!( + "set timeout=0\nset default=0\nmenuentry \"bcvk\" {{\n linux /boot/vmlinuz {}\n initrd /boot/initrd.img\n}}\n", + cmdline + ); + + // Generate CPIO archives + let units_cpio = initramfs::build_units_cpio(); + let ssh_cpio = state.ssh_pubkey.as_deref().map(initramfs::build_ssh_cpio); + + // Build initrd regions (original file + padding + CPIO) + let (initrd_parts, initrd_total) = + fat32::build_initrd_regions(&initrd_path, initrd_size, &units_cpio, ssh_cpio.as_deref()); + + // Build ESP regions + let (esp_regions, esp_size) = fat32::build_esp_regions( + &grub_path, + grub_size, + grub_cfg.as_bytes(), + &kernel_path, + kernel_size, + initrd_parts, + initrd_total, + ); + + // Build GPT disk with ESP + EROFS + match gpt::build_gpt_disk( + esp_regions, + esp_size, + erofs_regions, + erofs_layout.total_size, + ) { + Ok(disk) => { + state.regions = disk.regions; + state.total_size = disk.total_size; + } + Err(e) => { + log_error(&format!("failed to build GPT disk: {}", e)); + return -1; + } + } + + 0 +} + +#[no_mangle] +pub extern "C" fn plugin_open(_readonly: c_int) -> *mut c_void { + 1 as *mut c_void +} + +#[no_mangle] +pub extern "C" fn plugin_close(_handle: *mut c_void) {} + +#[no_mangle] +pub extern "C" fn plugin_get_size(_handle: *mut c_void) -> i64 { + let state = PLUGIN_STATE.lock().unwrap(); + state.as_ref().map(|s| s.total_size as i64).unwrap_or(-1) +} + +#[no_mangle] +pub extern "C" fn plugin_can_multi_conn(_handle: *mut c_void) -> c_int { + 1 +} + +#[no_mangle] +pub extern "C" fn plugin_pread( + _handle: *mut c_void, + buf: *mut c_void, + count: u32, + offset: u64, + _flags: u32, +) -> c_int { + let state = PLUGIN_STATE.lock().unwrap(); + let state = match state.as_ref() { + Some(s) => s, + None => return -1, + }; + + let buf = unsafe { std::slice::from_raw_parts_mut(buf as *mut u8, count as usize) }; + + match regions::pread(&state.regions, buf, offset) { + Ok(()) => 0, + Err(e) => { + log_error(&format!("pread error at offset {}: {}", offset, e)); + -1 + } + } +} + +// --- Plugin registration --- + +#[repr(C)] +pub struct NbdkitPlugin { + _struct_size: u64, + _api_version: c_int, + _thread_model: c_int, + name: *const c_char, + longname: *const c_char, + version: *const c_char, + description: *const c_char, + load: Option, + unload: Option, + config: Option c_int>, + config_complete: Option c_int>, + config_help: *const c_char, + open: Option *mut c_void>, + close: Option, + get_size: Option i64>, + can_write: Option c_int>, + can_flush: Option c_int>, + is_rotational: Option c_int>, + can_trim: Option c_int>, + _pread_v1: Option c_int>, + _pwrite_v1: Option c_int>, + _flush_v1: Option c_int>, + _trim_v1: Option c_int>, + _zero_v1: Option c_int>, + errno_is_preserved: c_int, + dump_plugin: Option, + can_zero: Option c_int>, + can_fua: Option c_int>, + pread: Option c_int>, + pwrite: Option c_int>, + flush: Option c_int>, + trim: Option c_int>, + zero: Option c_int>, + magic_config_key: *const c_char, + can_multi_conn: Option c_int>, + can_extents: Option c_int>, + extents: Option c_int>, + can_cache: Option c_int>, + cache: Option c_int>, + thread_model: Option c_int>, + can_fast_zero: Option c_int>, + preconnect: Option c_int>, + get_ready: Option c_int>, + after_fork: Option c_int>, + // Fields after after_fork (list_exports, default_export, export_description, + // cleanup, block_size) are omitted. nbdkit uses _struct_size to determine + // which fields are present, so omitting trailing fields is safe. +} + +unsafe impl Sync for NbdkitPlugin {} + +static PLUGIN_NAME: &[u8] = b"erofs\0"; +static PLUGIN_LONGNAME: &[u8] = b"nbdkit EROFS plugin\0"; +static PLUGIN_VERSION: &[u8] = b"0.2.0\0"; +static PLUGIN_DESCRIPTION: &[u8] = b"Create virtual EROFS+ESP disk from directory\0"; +static PLUGIN_CONFIG_HELP: &[u8] = b"dir= (required) Container overlay merged directory\ncmdline= (required) Kernel command line for grub.cfg\nssh_pubkey= SSH public key for root access\0"; +static PLUGIN_MAGIC_KEY: &[u8] = b"dir\0"; + +static PLUGIN: NbdkitPlugin = NbdkitPlugin { + _struct_size: std::mem::size_of::() as u64, + _api_version: 2, + _thread_model: 0, + name: PLUGIN_NAME.as_ptr() as *const c_char, + longname: PLUGIN_LONGNAME.as_ptr() as *const c_char, + version: PLUGIN_VERSION.as_ptr() as *const c_char, + description: PLUGIN_DESCRIPTION.as_ptr() as *const c_char, + load: None, + unload: None, + config: Some(plugin_config), + config_complete: Some(plugin_config_complete), + config_help: PLUGIN_CONFIG_HELP.as_ptr() as *const c_char, + open: Some(plugin_open), + close: Some(plugin_close), + get_size: Some(plugin_get_size), + can_write: None, + can_flush: None, + is_rotational: None, + can_trim: None, + _pread_v1: None, + _pwrite_v1: None, + _flush_v1: None, + _trim_v1: None, + _zero_v1: None, + errno_is_preserved: 1, + dump_plugin: None, + can_zero: None, + can_fua: None, + pread: Some(plugin_pread), + pwrite: None, + flush: None, + trim: None, + zero: None, + magic_config_key: PLUGIN_MAGIC_KEY.as_ptr() as *const c_char, + can_multi_conn: Some(plugin_can_multi_conn), + can_extents: None, + extents: None, + can_cache: None, + cache: None, + thread_model: None, + can_fast_zero: None, + preconnect: None, + get_ready: Some(plugin_get_ready), + after_fork: None, +}; + +#[no_mangle] +pub extern "C" fn plugin_init() -> *const NbdkitPlugin { + &PLUGIN +} diff --git a/crates/nbdkit-erofs-plugin/src/regions.rs b/crates/nbdkit-erofs-plugin/src/regions.rs new file mode 100644 index 000000000..16268d623 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/regions.rs @@ -0,0 +1,80 @@ +//! Region-based virtual block device composition. +//! Inspired by the regions pattern in nbdkit's floppy plugin (BSD-3-Clause). + +use std::path::PathBuf; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub enum RegionType { + Data(Arc>), + File { path: PathBuf }, + Zero, +} + +#[derive(Debug, Clone)] +pub struct Region { + pub start: u64, + pub len: u64, + pub region_type: RegionType, +} + +impl Region { + pub fn end(&self) -> u64 { + self.start + self.len + } +} + +pub fn find_region(regions: &[Region], offset: u64) -> Option<&Region> { + regions + .binary_search_by(|r| { + if offset < r.start { + std::cmp::Ordering::Greater + } else if offset >= r.end() { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }) + .ok() + .map(|i| ®ions[i]) +} + +pub fn pread(regions: &[Region], buf: &mut [u8], offset: u64) -> std::io::Result<()> { + let mut remaining = buf.len(); + let mut buf_offset = 0; + let mut disk_offset = offset; + + while remaining > 0 { + let region = find_region(regions, disk_offset).ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("offset {} outside disk", disk_offset), + ) + })?; + + let region_offset = disk_offset - region.start; + let avail = (region.len - region_offset) as usize; + let len = remaining.min(avail); + + match ®ion.region_type { + RegionType::Data(data) => { + let start = region_offset as usize; + buf[buf_offset..buf_offset + len].copy_from_slice(&data[start..start + len]); + } + RegionType::File { path } => { + use std::os::unix::fs::FileExt; + let f = std::fs::File::open(path)?; + f.read_exact_at(&mut buf[buf_offset..buf_offset + len], region_offset)?; + } + RegionType::Zero => { + buf[buf_offset..buf_offset + len].fill(0); + } + } + + remaining -= len; + buf_offset += len; + disk_offset += len as u64; + } + + Ok(()) +} From 7acd9a451c558c129d5fe527647a07ca2cd68ac2 Mon Sep 17 00:00:00 2001 From: Shion Tanaka Date: Fri, 5 Jun 2026 04:53:34 +0900 Subject: [PATCH 3/3] macOS: add to-disk, vm_helpers, nbdkit auto-build, CLI unification - to-disk with APFS clonefile-based base disk caching - vm_helpers.rs shared with Windows (12 functions) - nbdkit .so plugin auto-build via include_bytes! embedding - CLI options unified with Linux/Windows (--ssh, --ssh-wait, --force, --stop, --install-log, --label, --format, --itype) Assisted-by: Claude Code (Claude Opus 4.6) Signed-off-by: Shion Tanaka --- Cargo.lock | 1 + crates/kit/src/ephemeral_macos.rs | 12 +- crates/kit/src/install_options.rs | 11 +- crates/kit/src/instancetypes.rs | 6 +- crates/kit/src/lib.rs | 6 + crates/kit/src/main.rs | 17 +- crates/kit/src/nbdkit_macos.rs | 112 ++++-- crates/kit/src/run_ephemeral_macos.rs | 254 ++++---------- crates/kit/src/to_disk_macos.rs | 402 ++++++++++++++++++++++ crates/kit/src/vfkit/inspect.rs | 46 ++- crates/kit/src/vfkit/list.rs | 92 ++++- crates/kit/src/vfkit/mod.rs | 71 ++-- crates/kit/src/vfkit/rm.rs | 30 +- crates/kit/src/vfkit/rm_all.rs | 47 ++- crates/kit/src/vfkit/run.rs | 281 +++++++++++++-- crates/kit/src/vfkit/ssh.rs | 16 +- crates/kit/src/vfkit/start.rs | 23 +- crates/kit/src/vfkit/stop.rs | 38 +- crates/kit/src/vm_helpers.rs | 340 ++++++++++++++++++ crates/nbdkit-erofs-plugin/src/erofs.rs | 5 +- crates/nbdkit-erofs-plugin/src/fat32.rs | 18 +- crates/nbdkit-erofs-plugin/src/gpt.rs | 2 +- crates/nbdkit-erofs-plugin/src/lib.rs | 21 +- crates/nbdkit-erofs-plugin/src/regions.rs | 79 ++++- 24 files changed, 1551 insertions(+), 379 deletions(-) create mode 100644 crates/kit/src/to_disk_macos.rs create mode 100644 crates/kit/src/vm_helpers.rs diff --git a/Cargo.lock b/Cargo.lock index 27f2d278d..20fdb4e67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1846,6 +1846,7 @@ dependencies = [ name = "nbdkit-erofs-plugin" version = "0.1.0" dependencies = [ + "cpio", "crc32fast", "libc", ] diff --git a/crates/kit/src/ephemeral_macos.rs b/crates/kit/src/ephemeral_macos.rs index 8d46075f4..97d599ec7 100644 --- a/crates/kit/src/ephemeral_macos.rs +++ b/crates/kit/src/ephemeral_macos.rs @@ -11,7 +11,7 @@ use crate::run_ephemeral_macos::{self, EphemeralVmMetadata}; /// Options for `ephemeral run-ssh`, combining run options with optional SSH arguments. #[derive(Debug, clap::Parser)] -pub struct RunSshOpts { +pub struct RunEphemeralSshOpts { #[command(flatten)] pub run_opts: run_ephemeral_macos::RunEphemeralOpts, @@ -28,7 +28,7 @@ pub enum EphemeralCommands { /// Run ephemeral VM and SSH into it #[clap(name = "run-ssh")] - RunSsh(RunSshOpts), + RunSsh(RunEphemeralSshOpts), /// Connect to a running ephemeral VM via SSH #[clap(name = "ssh")] @@ -151,6 +151,12 @@ fn cmd_rm_all(force: bool) -> Result<()> { tracing::warn!("failed to kill gvproxy {}: {}", vm.gvproxy_pid, e); } } + // Wait for the VM process to exit so cleanup (VmCleanup::drop in + // the detached child) finishes before we proceed. + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + while vm.is_alive() && std::time::Instant::now() < deadline { + std::thread::sleep(std::time::Duration::from_millis(100)); + } } if let Some(ref container) = vm.nbd_container { crate::nbdkit_macos::stop_nbdkit_container(container); @@ -201,7 +207,7 @@ fn cmd_ssh(name: &str, args: &[String]) -> Result<()> { let svc_sock = format!("{}/{}-gvproxy-svc.sock", base.display(), name); if std::path::Path::new(&svc_sock).exists() { if let Err(e) = - run_ephemeral_macos::expose_ssh_port(&svc_sock, "192.168.127.2", vm.ssh_port) + run_ephemeral_macos::expose_port(&svc_sock, "192.168.127.2", vm.ssh_port, 22) { tracing::debug!("SSH port forward re-expose: {}", e); } diff --git a/crates/kit/src/install_options.rs b/crates/kit/src/install_options.rs index a284558cb..61908d275 100644 --- a/crates/kit/src/install_options.rs +++ b/crates/kit/src/install_options.rs @@ -5,7 +5,7 @@ //! and other installation-related commands. // On non-Linux, this module is unused as it's for installation operations -#![cfg_attr(not(target_os = "linux"), allow(dead_code))] +#![cfg_attr(not(any(target_os = "linux", target_os = "macos")), allow(dead_code))] use camino::Utf8PathBuf; use clap::Parser; @@ -52,6 +52,10 @@ pub struct InstallOptions { /// backend #[clap(long, requires = "composefs_backend")] pub allow_missing_fsverity: bool, + + /// Path to an authorized_keys file to inject into the root account + #[clap(long)] + pub root_ssh_authorized_keys: Option, } impl InstallOptions { @@ -91,6 +95,11 @@ impl InstallOptions { args.push("--allow-missing-fsverity".into()); } + if let Some(ref key_path) = self.root_ssh_authorized_keys { + args.push("--root-ssh-authorized-keys".to_string()); + args.push(key_path.to_string()); + } + args } } diff --git a/crates/kit/src/instancetypes.rs b/crates/kit/src/instancetypes.rs index 70a8c4ccb..aa43dfc0a 100644 --- a/crates/kit/src/instancetypes.rs +++ b/crates/kit/src/instancetypes.rs @@ -8,14 +8,14 @@ //! Instance types follow the format: u1.{size} //! Examples: u1.nano, u1.micro, u1.small, u1.medium, u1.large, etc. //! -//! Source: https://github.com/kubevirt/common-instancetypes +//! Source: // On non-Linux, this module is unused as it's for VM instance types -#![cfg_attr(not(target_os = "linux"), allow(dead_code))] +#![cfg_attr(not(any(target_os = "linux", target_os = "macos")), allow(dead_code))] /// Instance type variants with associated vCPU and memory specifications /// -/// Source: https://github.com/kubevirt/common-instancetypes/blob/main/instancetypes/u/1/sizes.yaml +/// Source: #[derive( Debug, Clone, diff --git a/crates/kit/src/lib.rs b/crates/kit/src/lib.rs index d7257cb8e..f98fce92e 100644 --- a/crates/kit/src/lib.rs +++ b/crates/kit/src/lib.rs @@ -5,6 +5,7 @@ pub mod qemu_img; pub mod xml_utils; // Cross-platform modules +pub mod install_options; pub mod ssh_options; // Linux-only modules @@ -17,5 +18,10 @@ pub mod nbdkit_macos; #[cfg(target_os = "macos")] pub mod run_ephemeral_macos; +#[cfg(target_os = "macos")] +pub mod instancetypes; +#[cfg(target_os = "macos")] +pub mod to_disk_macos; #[cfg(target_os = "macos")] pub mod vfkit; +pub mod vm_helpers; diff --git a/crates/kit/src/main.rs b/crates/kit/src/main.rs index b92d35783..efa91d614 100644 --- a/crates/kit/src/main.rs +++ b/crates/kit/src/main.rs @@ -69,7 +69,11 @@ mod nbdkit_macos; #[cfg(target_os = "macos")] mod run_ephemeral_macos; #[cfg(target_os = "macos")] +mod to_disk_macos; +#[cfg(target_os = "macos")] mod vfkit; +#[cfg(target_os = "macos")] +mod vm_helpers; /// Default state directory for bcvk container data #[cfg(target_os = "linux")] @@ -159,9 +163,15 @@ enum Commands { // macOS: vfkit-based persistent VMs #[cfg(target_os = "macos")] /// Manage persistent VMs (vfkit backend) - #[clap(subcommand)] + #[clap(subcommand, alias = "vfkit")] Vm(vfkit::VmCommands), + // macOS: to-disk + #[cfg(target_os = "macos")] + /// Install bootc images to persistent disk images + #[clap(name = "to-disk")] + ToDisk(to_disk_macos::ToDiskMacosOpts), + // Other platforms: stub #[cfg(not(any(target_os = "linux", target_os = "macos")))] /// Manage ephemeral VMs for bootc containers (not available on this platform) @@ -313,6 +323,11 @@ fn main() -> Result<(), Report> { #[cfg(target_os = "macos")] Commands::Vm(cmd) => cmd.run()?, + #[cfg(target_os = "macos")] + Commands::ToDisk(opts) => { + to_disk_macos::run(opts)?; + } + #[cfg(not(any(target_os = "linux", target_os = "macos")))] Commands::Ephemeral(_) => { return Err(color_eyre::eyre::eyre!( diff --git a/crates/kit/src/nbdkit_macos.rs b/crates/kit/src/nbdkit_macos.rs index 40c2cc20e..d8e13c91e 100644 --- a/crates/kit/src/nbdkit_macos.rs +++ b/crates/kit/src/nbdkit_macos.rs @@ -8,10 +8,14 @@ use std::process::{Command, Stdio}; use std::time::Duration; use tracing::info; -use crate::run_ephemeral_macos::detect_machine_name; +use crate::vm_helpers::detect_machine_name; -/// Path to the nbdkit EROFS plugin shared library inside podman machine. -const NBDKIT_EROFS_PLUGIN_PATH: &str = "/var/tmp/bcvk/libnbdkit_erofs_plugin.so"; +/// EROFS plugin shared library, embedded at compile time. +const EROFS_PLUGIN_SO: &[u8] = include_bytes!("../nbdkit-erofs-plugin.so"); + +fn shell_escape(s: &str) -> String { + format!("'{}'", s.replace('\'', "'\\''")) +} /// Get the merged overlay path from podman image mount. pub(crate) fn get_merged_path(machine: &str, rootful: bool, image: &str) -> Result { @@ -38,7 +42,33 @@ pub(crate) fn get_merged_path(machine: &str, rootful: bool, image: &str) -> Resu Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) } -/// Start nbdkit with the erofs plugin for dynamic EROFS + ESP + GPT generation. +/// Ensure the nbdkit container image exists in podman machine. +/// On first run, transfers embedded .so and builds container image. +pub(crate) fn ensure_nbdkit_ready(machine: &str) -> Result<()> { + let script = crate::vm_helpers::nbdkit_setup_script(EROFS_PLUGIN_SO); + info!("checking nbdkit container image..."); + let mut child = Command::new("podman") + .args(["machine", "ssh", machine, "--", "bash", "-s"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("nbdkit setup in podman machine")?; + if let Some(mut stdin) = child.stdin.take() { + use std::io::Write; + stdin.write_all(script.as_bytes())?; + } + let output = child.wait_with_output()?; + if !output.status.success() { + bail!( + "nbdkit setup failed: {}", + String::from_utf8_lossy(&output.stderr).trim() + ); + } + Ok(()) +} + +#[allow(dead_code)] pub(crate) fn start_nbdkit_erofs_plugin( machine: &str, merged_path: &str, @@ -64,10 +94,6 @@ pub(crate) fn start_nbdkit_erofs_plugin( .stderr(Stdio::null()) .status(); - fn shell_escape(s: &str) -> String { - format!("'{}'", s.replace('\'', "'\\''")) - } - let cmdline_esc = shell_escape(&format!("cmdline={}", cmdline)); let dir_esc = shell_escape(&format!("dir={}", merged_path)); @@ -80,16 +106,13 @@ pub(crate) fn start_nbdkit_erofs_plugin( "podman run -d --name {name} --security-opt label=disable \ -p {port}:10809 \ -v {merged}:{merged}:ro \ - -v {plugin}:/plugin.so:ro \ - -v /usr/bin/nbdkit:/usr/bin/nbdkit:ro \ - -v /usr/lib64/nbdkit:/usr/lib64/nbdkit:ro \ - quay.io/fedora/fedora:latest \ - nbdkit -f -p 10809 -r /plugin.so \ + {image} \ + nbdkit -f --threads 4 -p 10809 -r /plugin.so \ {dir} {cmdline}{ssh}", name = container_name, port = nbd_port, merged = merged_path, - plugin = NBDKIT_EROFS_PLUGIN_PATH, + image = crate::vm_helpers::NBDKIT_IMAGE, dir = dir_esc, cmdline = cmdline_esc, ssh = ssh_param, @@ -106,7 +129,6 @@ pub(crate) fn start_nbdkit_erofs_plugin( } info!("waiting for nbdkit on port {}...", nbd_port); - let deadline = std::time::Instant::now() + Duration::from_secs(30); loop { if let Ok(mut stream) = std::net::TcpStream::connect_timeout( &std::net::SocketAddr::from(([127, 0, 0, 1], nbd_port)), @@ -119,25 +141,47 @@ pub(crate) fn start_nbdkit_erofs_plugin( break; } } - if std::time::Instant::now() > deadline { - let _ = Command::new("podman") - .args([ - "machine", - "ssh", - machine, - "--", - "podman", - "rm", - "-f", - &container_name, - ]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status(); - bail!( - "nbdkit erofs plugin did not become ready on port {}", - nbd_port - ); + // Check if container is still alive (no fixed timeout — wait as long + // as plugin_get_ready() is running, which scans the entire overlay + // directory and scales with image size) + let ps_output = Command::new("podman") + .args([ + "machine", + "ssh", + machine, + "--", + "podman", + "ps", + "-a", + "--filter", + &format!("name=^{}$", container_name), + "--format", + "{{.Status}}", + ]) + .output(); + if let Ok(out) = &ps_output { + let stdout = String::from_utf8_lossy(&out.stdout); + if stdout.contains("Exited") { + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + machine, + "--", + "podman", + "rm", + "-f", + &container_name, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + bail!( + "nbdkit container '{}' exited before becoming ready on port {}", + container_name, + nbd_port + ); + } } std::thread::sleep(Duration::from_millis(500)); } diff --git a/crates/kit/src/run_ephemeral_macos.rs b/crates/kit/src/run_ephemeral_macos.rs index 2265aacb7..156de0517 100644 --- a/crates/kit/src/run_ephemeral_macos.rs +++ b/crates/kit/src/run_ephemeral_macos.rs @@ -1,9 +1,9 @@ -//! Ephemeral VM launch flow for macOS using vfkit + NBD EROFS plugin. +//! Ephemeral VM launch flow for macOS using vfkit + NBD EROFS over TCP. //! //! Boot flow (fully diskless): //! 1. Mount container image overlay (`podman image mount`) -//! 2. Start nbdkit with erofs plugin (dynamically generates GPT + ESP + EROFS) -//! 3. Launch vfkit with EFI boot via NBD + virtio-net (gvproxy) +//! 2. Start nbdkit with erofs plugin in TCP mode (port forwarded via gvproxy) +//! 3. Launch vfkit with EFI boot via NBD TCP + virtio-net (gvproxy) //! 4. Wait for SSH and execute commands //! //! Common helpers (gvproxy, SSH, vfkit detection) are pub for reuse by vfkit/ module. @@ -15,11 +15,16 @@ use std::process::{Command, Stdio}; use std::time::Duration; use color_eyre::{ - eyre::{bail, eyre, Context}, + eyre::{bail, Context}, Result, }; use tracing::{debug, info}; +pub use crate::vm_helpers::{ + default_vcpus, detect_machine_name, ensure_image_and_get_digest, is_machine_rootful, + parse_memory_to_mb, run_ssh_command, run_ssh_interactive, wait_for_ssh, +}; + /// Base directory for ephemeral VM state on macOS host. pub fn ephemeral_base_dir() -> std::path::PathBuf { dirs::home_dir() @@ -121,10 +126,13 @@ impl EphemeralVmMetadata { pub struct RunEphemeralOpts { /// Container image to boot pub image: String, - /// Number of vCPUs + /// Instance type (e.g., u1.nano, u1.small). Overrides vcpus/memory if specified. + #[clap(long)] + pub itype: Option, + /// Number of vCPUs (overridden by --itype if specified) #[clap(long)] pub vcpus: Option, - /// Memory size (e.g. "4G", "2048M", or plain number for MB) + /// Memory size (overridden by --itype if specified) #[clap(long, default_value = "4G")] pub memory: String, /// Generate a temporary SSH key pair for VM access @@ -150,24 +158,6 @@ pub struct RunEphemeralOpts { pub debug: bool, } -fn default_vcpus() -> u32 { - std::thread::available_parallelism() - .map(|n| n.get() as u32) - .unwrap_or(2) -} - -/// Parse memory specification string (e.g. "4G", "2048M") to megabytes. -pub fn parse_memory_to_mb(s: &str) -> Result { - let s = s.trim(); - if let Some(n) = s.strip_suffix('G').or_else(|| s.strip_suffix('g')) { - Ok((n.parse::()? * 1024.0) as u32) - } else if let Some(n) = s.strip_suffix('M').or_else(|| s.strip_suffix('m')) { - Ok(n.parse::()? as u32) - } else { - Ok(s.parse::()?) - } -} - // --- RAII cleanup guard --- struct VmCleanup { @@ -219,18 +209,23 @@ impl Drop for VmCleanup { // --- Main entry point --- -/// Run an ephemeral VM from a container image using vfkit + EROFS over NBD. +/// Run an ephemeral VM from a container image. +/// pub fn run(opts: RunEphemeralOpts) -> Result<()> { if opts.gui && opts.detach { bail!("--gui and --detach cannot be used together (GUI requires foreground process)"); } + run_vfkit(opts) +} +/// Run an ephemeral VM using vfkit + EROFS over NBD (TCP transport). +fn run_vfkit(opts: RunEphemeralOpts) -> Result<()> { if opts.detach { return run_detached(&opts); } let vfkit_bin = find_vfkit()?; - info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + EROFS)"); + info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + NBD TCP)"); let cache_base = ephemeral_base_dir(); fs::create_dir_all(&cache_base)?; @@ -294,12 +289,15 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { cmdline_parts.extend(&user_args); let cmdline = cmdline_parts.join(" "); + // Ensure nbdkit container image is ready (auto-build on first run) + crate::nbdkit_macos::ensure_nbdkit_ready(&machine)?; + // Get container image merged overlay path let merged_path = crate::nbdkit_macos::get_merged_path(&machine, rootful, &opts.image)?; info!("overlay merged: {}", merged_path); - // Start nbdkit with erofs plugin (dynamic EROFS + ESP + GPT from overlay dir) let nbd_port = crate::nbdkit_macos::find_available_nbd_port(); + info!("NBD transport: TCP (port {})", nbd_port); let nbd_container_name = crate::nbdkit_macos::start_nbdkit_erofs_plugin( &machine, &merged_path, @@ -308,8 +306,6 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { nbd_port, &vm_name, )?; - std::thread::sleep(Duration::from_millis(500)); - info!("nbdkit ready on port {}", nbd_port); // gvproxy + vfkit (EFI boot) let gvproxy_sock = cache_base.join(format!("{}-gvproxy.sock", vm_name)); @@ -328,8 +324,16 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { let efi_var_store = cache_base.join(format!("{}-efi-vars", vm_name)); let bootloader_arg = format!("efi,variable-store={},create", efi_var_store.display()); - let vcpus = opts.vcpus.unwrap_or_else(default_vcpus); - let memory_mb = parse_memory_to_mb(&opts.memory)?; + let vcpus = opts + .itype + .map(|t| t.vcpus()) + .or(opts.vcpus) + .unwrap_or_else(default_vcpus); + let memory_mb = opts + .itype + .map(|t| t.memory_mb()) + .map(Ok) + .unwrap_or_else(|| parse_memory_to_mb(&opts.memory))?; let mut vfkit_args = vec![ "--cpus".to_string(), @@ -350,8 +354,20 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { ), "--device".to_string(), "virtio-rng".to_string(), + "--device".to_string(), + format!( + "virtio-vsock,port=9000,socketURL={},connect", + cache_base.join(format!("{}-vsock.sock", vm_name)).display() + ), ]; + if let Ok(bench_nbd) = std::env::var("BCVK_BENCH_NBD") { + vfkit_args.extend([ + "--device".to_string(), + format!("nbd,uri={},readonly,timeout=5000,deviceId=bench", bench_nbd), + ]); + } + let serial_log = cache_base.join(format!("{}-serial.log", vm_name)); vfkit_args.extend([ "--device".to_string(), @@ -401,7 +417,7 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { if opts.ssh_keygen || !opts.execute.is_empty() { info!("setting up SSH port forwarding..."); for attempt in 0..15u32 { - match expose_ssh_port(&services_sock_str, "192.168.127.2", ssh_port) { + match expose_port(&services_sock_str, "192.168.127.2", ssh_port, 22) { Ok(_) => { info!("SSH port {} forwarded", ssh_port); break; @@ -523,50 +539,7 @@ fn run_detached(opts: &RunEphemeralOpts) -> Result<()> { Ok(()) } -// --- Shared helpers (pub for vfkit/ module) --- - -/// Detect the name of the running podman machine. -pub fn detect_machine_name() -> Result { - let output = Command::new("podman") - .args(["machine", "info", "--format", "{{.Host.CurrentMachine}}"]) - .output()?; - let name = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if name.is_empty() { - bail!("no podman machine is running"); - } - Ok(name) -} - -fn ensure_image_and_get_digest(image: &str) -> Result { - let status = Command::new("podman") - .args(["image", "exists", image]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status()?; - if !status.success() { - info!("pulling image {}...", image); - if !Command::new("podman") - .args(["pull", image]) - .status()? - .success() - { - bail!("failed to pull image: {}", image); - } - } - let output = Command::new("podman") - .args(["image", "inspect", "--format", "{{.Digest}}", image]) - .output()?; - let digest = String::from_utf8_lossy(&output.stdout).trim().to_string(); - Ok(digest.trim_start_matches("sha256:").to_string()) -} - -fn is_machine_rootful(machine: &str) -> bool { - Command::new("podman") - .args(["machine", "ssh", machine, "id", "-u"]) - .output() - .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "0") - .unwrap_or(false) -} +// --- macOS-specific helpers (pub for vfkit/ module) --- /// Clear extended attributes from a file. /// @@ -647,11 +620,16 @@ pub fn start_gvproxy(gvproxy_sock: &str, services_sock: &str) -> Result Result<()> { +/// Expose a TCP port forwarding rule via gvproxy's HTTP API. +pub fn expose_port( + services_sock: &str, + vm_ip: &str, + host_port: u16, + guest_port: u16, +) -> Result<()> { let body = format!( - r#"{{"local":":{}","remote":"{}:22","protocol":"tcp"}}"#, - host_port, vm_ip + r#"{{"local":":{}","remote":"{}:{}","protocol":"tcp"}}"#, + host_port, vm_ip, guest_port ); let mut stream = UnixStream::connect(services_sock)?; let request = format!( @@ -674,8 +652,6 @@ pub fn expose_ssh_port(services_sock: &str, vm_ip: &str, host_port: u16) -> Resu Ok(()) } -const SSH_TIMEOUT: Duration = Duration::from_secs(240); - /// Find an available TCP port for SSH forwarding in range 2222-3000. pub fn find_available_ssh_port() -> u16 { use rand::Rng; @@ -696,130 +672,16 @@ pub fn find_available_ssh_port() -> u16 { PORT_RANGE_START } -/// Wait for SSH connectivity with exponential backoff (240s timeout). -pub fn wait_for_ssh(port: u16, key_path: &Path, user: &str) -> Result<()> { - use crate::ssh_options::CommonSshOptions; - let ssh_opts = CommonSshOptions::default(); - let user_host = format!("{}@localhost", user); - info!("waiting for SSH on port {} ({}@localhost)...", port, user); - let start = std::time::Instant::now(); - let mut attempt = 0u32; - loop { - if start.elapsed() > SSH_TIMEOUT { - bail!("SSH connection timeout ({}s)", SSH_TIMEOUT.as_secs()); - } - let mut cmd = Command::new("ssh"); - cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); - ssh_opts.apply_to_command(&mut cmd); - cmd.args(["-o", "BatchMode=yes", &user_host, "true"]); - let status = cmd.stdout(Stdio::null()).stderr(Stdio::null()).status(); - if let Ok(s) = status { - if s.success() { - info!("SSH connected after {}s", start.elapsed().as_secs()); - return Ok(()); - } - } - let backoff = if attempt < 2 { - 500 - } else if attempt < 4 { - 1000 - } else { - 2000 - }; - std::thread::sleep(Duration::from_millis(backoff)); - attempt += 1; - } -} - -/// Execute a command via SSH and return the exit status. -pub fn run_ssh_command( - port: u16, - key_path: &Path, - user: &str, - command: &str, -) -> Result { - use crate::ssh_options::CommonSshOptions; - let ssh_opts = CommonSshOptions::default(); - let user_host = format!("{}@localhost", user); - let mut cmd = Command::new("ssh"); - cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); - ssh_opts.apply_to_command(&mut cmd); - cmd.args(["-o", "BatchMode=yes", &user_host, command]); - cmd.stdin(Stdio::inherit()) - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()) - .status() - .map_err(|e| eyre!("ssh failed: {}", e)) -} - -/// Start an interactive SSH session with TTY allocation. -pub fn run_ssh_interactive( - port: u16, - key_path: &Path, - user: &str, -) -> Result { - use crate::ssh_options::CommonSshOptions; - let ssh_opts = CommonSshOptions::default(); - let user_host = format!("{}@localhost", user); - let mut cmd = Command::new("ssh"); - cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); - ssh_opts.apply_to_command(&mut cmd); - cmd.args(["-t", &user_host]); - cmd.stdin(Stdio::inherit()) - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()) - .status() - .map_err(|e| eyre!("ssh failed: {}", e)) -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn test_parse_memory_to_mb() { - let cases = [ - ("4G", 4096), - ("4g", 4096), - ("2048M", 2048), - ("2048m", 2048), - ("512", 512), - ("1G", 1024), - ]; - for (input, expected) in &cases { - assert_eq!( - parse_memory_to_mb(input).unwrap(), - *expected, - "parse_memory_to_mb({:?})", - input - ); - } - } - - #[test] - fn test_parse_memory_to_mb_errors() { - assert!(parse_memory_to_mb("").is_err()); - assert!(parse_memory_to_mb("abc").is_err()); - } - #[test] fn test_generate_mac() { let mac = generate_mac(); assert_eq!(mac, GVPROXY_STATIC_MAC); } - #[test] - fn test_default_vcpus() { - let vcpus = default_vcpus(); - assert!(vcpus >= 1); - assert_eq!( - vcpus, - std::thread::available_parallelism() - .map(|n| n.get() as u32) - .unwrap_or(2) - ); - } - #[test] fn test_find_available_ssh_port() { let port = find_available_ssh_port(); diff --git a/crates/kit/src/to_disk_macos.rs b/crates/kit/src/to_disk_macos.rs new file mode 100644 index 000000000..834ea1398 --- /dev/null +++ b/crates/kit/src/to_disk_macos.rs @@ -0,0 +1,402 @@ +//! Install bootc images to disk on macOS using loopback devices via podman machine. +//! +//! Uses losetup inside podman machine to create loop devices from raw disk files +//! accessible via virtiofs, then runs `bootc install to-disk` targeting the loop device. +//! Base disk caching with APFS clonefile (`cp -c`) provides fast VM creation. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +use clap::Parser; +use color_eyre::eyre::{bail, Context}; +use color_eyre::Result; +use tracing::{debug, info}; + +use crate::install_options::InstallOptions; +use crate::run_ephemeral_macos::clear_xattr; +use crate::vm_helpers::{ + detect_machine_name, ensure_image_and_get_digest, generate_ssh_keypair, is_machine_rootful, + parse_size, remove_file_if_exists, +}; +use sha2::{Digest, Sha256}; + +/// Options for `bcvk to-disk` on macOS. +#[derive(Parser, Debug)] +pub struct ToDiskMacosOpts { + /// Container image to install + pub source_image: String, + /// Target disk path (output .raw file) + pub target_disk: String, + /// Disk size (e.g. "10G", "5120M", or plain number for bytes) + #[clap(long, default_value = "20G")] + pub disk_size: String, + /// Installation options (filesystem, root-size, etc.) + #[clap(flatten)] + pub install: InstallOptions, + /// Configure logging for `bootc install` by setting the `RUST_LOG` environment variable + #[clap(long)] + pub install_log: Option, + /// Add metadata to the container in key=value form + #[clap(long = "label")] + pub label: Vec, + /// Check if the disk would be regenerated without actually creating it + #[clap(long)] + pub dry_run: bool, +} + +fn base_dir() -> PathBuf { + dirs::home_dir() + .expect("cannot determine home directory") + .join(".local/share/bcvk/base") +} + +/// Directory for persistent VM disk images. +pub fn vms_dir() -> PathBuf { + dirs::home_dir() + .expect("cannot determine home directory") + .join(".local/share/bcvk/vms") +} + +fn resolve_path_in_machine(host_path: &str) -> String { + let resolved = if let Ok(canonical) = std::fs::canonicalize(host_path) { + canonical.to_string_lossy().to_string() + } else { + host_path.to_string() + }; + // macOS /tmp is a symlink to /private/tmp; podman machine mounts + // /private/tmp via virtiofs, so we need the canonical path. + // canonicalize() normally resolves this, but handle it explicitly. + if resolved.starts_with("/tmp/") { + format!("/private{}", resolved) + } else { + resolved + } +} + +fn create_raw_disk(path: &str, size_bytes: u64) -> Result<()> { + let file = fs::File::create(path).with_context(|| format!("creating {}", path))?; + file.set_len(size_bytes) + .with_context(|| format!("setting size {} on {}", size_bytes, path))?; + drop(file); + clear_xattr(Path::new(path)); + Ok(()) +} + +fn generate_bootc_install_script( + disk_path_in_machine: &str, + image: &str, + install_opts: &InstallOptions, + ssh_pubkey: &str, + rootful: bool, + install_log: &Option, + labels: &[String], +) -> String { + let bootc_args = install_opts + .to_bootc_args() + .iter() + .map(|a| { + shlex::try_quote(a) + .unwrap_or(std::borrow::Cow::Borrowed(a)) + .to_string() + }) + .collect::>() + .join(" "); + + let image_quoted = shlex::try_quote(image) + .unwrap_or(std::borrow::Cow::Borrowed(image)) + .to_string(); + + use base64::Engine; + let pub_key_b64 = base64::engine::general_purpose::STANDARD.encode(ssh_pubkey); + + let sudo = if rootful { "" } else { "sudo " }; + + let rust_log_line = if let Some(ref level) = install_log { + format!( + "export RUST_LOG={}\n", + shlex::try_quote(level).unwrap_or(std::borrow::Cow::Borrowed(level)) + ) + } else { + String::new() + }; + + let label_args = labels + .iter() + .map(|l| { + format!( + "--label {}", + shlex::try_quote(l).unwrap_or(std::borrow::Cow::Borrowed(l)) + ) + }) + .collect::>() + .join(" \\\n "); + let label_line = if label_args.is_empty() { + String::new() + } else { + format!(" {} \\\n", label_args) + }; + + format!( + r#"set -euo pipefail +{rust_log} +LOOP=$({sudo}losetup -fP --show {disk_path}) +echo "Loop device: $LOOP" +trap '{sudo}losetup -d $LOOP 2>/dev/null' EXIT + +printf '%s' '{b64}' | base64 -d > /dev/shm/bcvk-ssh-key.pub + +echo "Running bootc install to-disk..." +podman run --rm --privileged --pid=host --net=none \ + -v /dev:/dev \ + -v /dev/shm:/dev/shm \ + -v /var/lib/containers:/var/lib/containers \ +{label_line} {image} bootc install to-disk \ + --generic-image --skip-fetch-check --wipe \ + --root-ssh-authorized-keys /dev/shm/bcvk-ssh-key.pub \ + {bootc_args} $LOOP + +rm -f /dev/shm/bcvk-ssh-key.pub + +echo "Installation complete!" +"#, + rust_log = rust_log_line, + sudo = sudo, + disk_path = disk_path_in_machine, + b64 = pub_key_b64, + image = image_quoted, + bootc_args = bootc_args, + label_line = label_line, + ) +} + +const CACHE_HASH_XATTR: &str = "user.bcvk.cache_hash"; + +fn compute_cache_hash( + image_digest: &str, + source_image: &str, + install_opts: &InstallOptions, +) -> String { + let bootc_args = install_opts.to_bootc_args().join(","); + let input = format!("{}|{}|{}", image_digest, source_image, bootc_args); + let hash = Sha256::digest(input.as_bytes()); + format!("sha256:{:x}", hash) +} + +fn read_xattr(path: &Path, name: &str) -> Option { + let output = Command::new("xattr") + .args(["-p", name, &path.to_string_lossy()]) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .output() + .ok()?; + if output.status.success() { + Some(String::from_utf8_lossy(&output.stdout).trim().to_string()) + } else { + None + } +} + +fn write_xattr(path: &Path, name: &str, value: &str) -> Result<()> { + let status = Command::new("xattr") + .args(["-w", name, value, &path.to_string_lossy()]) + .status() + .with_context(|| format!("writing xattr {} on {}", name, path.display()))?; + if !status.success() { + bail!("xattr -w failed for {} on {}", name, path.display()); + } + Ok(()) +} + +/// Find or create a cached base disk for the given image + install options. +pub fn find_or_create_base_disk( + source_image: &str, + image_digest: &str, + install_options: &InstallOptions, + disk_size: &str, + machine: &str, + install_log: &Option, + labels: &[String], +) -> Result { + let cache_hash = compute_cache_hash(image_digest, source_image, install_options); + let short_hash = cache_hash + .strip_prefix("sha256:") + .unwrap_or(&cache_hash) + .chars() + .take(16) + .collect::(); + + let base_dir = base_dir(); + fs::create_dir_all(&base_dir)?; + let base_disk_name = format!("bootc-base-{}.raw", short_hash); + let base_disk_path = base_dir.join(&base_disk_name); + + if base_disk_path.exists() { + debug!("checking existing base disk: {:?}", base_disk_path); + if let Some(stored_hash) = read_xattr(&base_disk_path, CACHE_HASH_XATTR) { + if stored_hash == cache_hash { + info!("reusing cached base disk: {:?}", base_disk_path); + return Ok(base_disk_path); + } + info!("base disk cache hash mismatch, recreating"); + } else { + info!("base disk has no cache hash, recreating"); + } + fs::remove_file(&base_disk_path)?; + } + + info!("creating base disk: {:?}", base_disk_path); + let base_disk_str = base_disk_path.to_string_lossy().to_string(); + + let size_bytes = parse_size(disk_size)?; + create_raw_disk(&base_disk_str, size_bytes)?; + + let key_path = PathBuf::from(format!("{}.key", base_disk_path.display())); + let ssh_pubkey = generate_ssh_keypair(&key_path)?; + + let disk_in_machine = resolve_path_in_machine(&base_disk_str); + let rootful = is_machine_rootful(machine); + let script = generate_bootc_install_script( + &disk_in_machine, + source_image, + install_options, + &ssh_pubkey, + rootful, + install_log, + labels, + ); + + info!("running bootc install to-disk in podman machine..."); + let mut child = Command::new("podman") + .args(["machine", "ssh", machine, "--", "bash", "-s"]) + .stdin(Stdio::piped()) + .spawn() + .context("podman machine ssh")?; + if let Some(mut stdin) = child.stdin.take() { + use std::io::Write; + stdin.write_all(script.as_bytes())?; + } + let status = child.wait()?; + + if !status.success() { + remove_file_if_exists(&base_disk_path); + remove_file_if_exists(&key_path); + remove_file_if_exists(&PathBuf::from(format!("{}.pub", key_path.display()))); + bail!("bootc install to-disk failed"); + } + + write_xattr(&base_disk_path, CACHE_HASH_XATTR, &cache_hash)?; + + Ok(base_disk_path) +} + +/// Clone a base disk to create a VM-specific disk via APFS clonefile (`cp -c`). +pub fn clone_base_disk(base_path: &Path, vm_disk_path: &Path) -> Result<()> { + if let Some(parent) = vm_disk_path.parent() { + fs::create_dir_all(parent)?; + } + let status = Command::new("cp") + .args([ + "-c", + &base_path.to_string_lossy(), + &vm_disk_path.to_string_lossy(), + ]) + .status() + .context("cp -c (APFS clonefile)")?; + if !status.success() { + bail!( + "APFS clonefile failed: {} -> {}", + base_path.display(), + vm_disk_path.display() + ); + } + clear_xattr(vm_disk_path); + Ok(()) +} + +/// Execute `bcvk to-disk` on macOS. +pub fn run(opts: ToDiskMacosOpts) -> Result<()> { + let machine = detect_machine_name()?; + let digest = ensure_image_and_get_digest(&opts.source_image)?; + info!("image digest: {}...", &digest[..16.min(digest.len())]); + + let cache_hash = compute_cache_hash(&digest, &opts.source_image, &opts.install); + let short_hash: String = cache_hash + .strip_prefix("sha256:") + .unwrap_or(&cache_hash) + .chars() + .take(16) + .collect(); + let base_disk_path = base_dir().join(format!("bootc-base-{}.raw", short_hash)); + + if opts.dry_run { + if base_disk_path.exists() { + if let Some(stored) = read_xattr(&base_disk_path, CACHE_HASH_XATTR) { + if stored == cache_hash { + println!("Would reuse cached base disk: {}", base_disk_path.display()); + if Path::new(&opts.target_disk).exists() { + println!("Output already exists: {}", opts.target_disk); + } else { + println!("Would create disk: {} (from base)", opts.target_disk); + } + return Ok(()); + } + } + println!("Would regenerate base disk (hash mismatch)"); + } else { + println!( + "Would create new base disk and output: {}", + opts.target_disk + ); + } + return Ok(()); + } + + let base_disk_path = find_or_create_base_disk( + &opts.source_image, + &digest, + &opts.install, + &opts.disk_size, + &machine, + &opts.install_log, + &opts.label, + )?; + + // Copy base disk to target via APFS clonefile + let target = Path::new(&opts.target_disk); + clone_base_disk(&base_disk_path, target)?; + + // Copy SSH key ({base}.raw.key → {target}.key) + let base_key = PathBuf::from(format!("{}.key", base_disk_path.display())); + let target_key = PathBuf::from(format!("{}.key", target.display())); + if base_key.exists() { + fs::copy(&base_key, &target_key).context("copying SSH key")?; + let base_pub = PathBuf::from(format!("{}.pub", base_key.display())); + let target_pub = PathBuf::from(format!("{}.pub", target_key.display())); + if base_pub.exists() { + fs::copy(&base_pub, &target_pub).context("copying SSH pubkey")?; + } + } + + println!("Disk image created: {}", opts.target_disk); + println!("SSH key: {}", target_key.display()); + println!( + "\nTo boot: bcvk vm run --ssh-key {} {}", + target_key.display(), + opts.target_disk + ); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_resolve_path_in_machine() { + assert_eq!( + resolve_path_in_machine("/tmp/test.raw"), + "/private/tmp/test.raw" + ); + } +} diff --git a/crates/kit/src/vfkit/inspect.rs b/crates/kit/src/vfkit/inspect.rs index 67a506d7c..a539c2422 100644 --- a/crates/kit/src/vfkit/inspect.rs +++ b/crates/kit/src/vfkit/inspect.rs @@ -1,15 +1,34 @@ //! vm inspect — Show detailed VM information. -use super::VmMetadata; +use super::{OutputFormat, VmMetadata}; +use clap::Parser; use color_eyre::Result; +/// Options for `vm inspect`. +#[derive(Parser, Debug)] +pub struct VmInspectOpts { + /// VM name + pub name: String, + /// Output format + #[clap(long, value_enum, default_value_t = OutputFormat::Yaml)] + pub format: OutputFormat, +} + /// Display detailed metadata for the named VM. -pub fn run(name: &str, json: bool) -> Result<()> { - let meta = VmMetadata::load(name)?; +pub fn run(opts: VmInspectOpts) -> Result<()> { + let meta = VmMetadata::load(&opts.name)?; - if json { - println!("{}", serde_json::to_string_pretty(&meta)?); - return Ok(()); + match opts.format { + OutputFormat::Json => { + println!("{}", serde_json::to_string_pretty(&meta)?); + return Ok(()); + } + OutputFormat::Yaml | OutputFormat::Table => {} + OutputFormat::Xml => { + return Err(color_eyre::eyre::eyre!( + "XML format is not supported for inspect command" + )); + } } let state = if meta.is_alive() { @@ -21,8 +40,8 @@ pub fn run(name: &str, json: bool) -> Result<()> { println!("Name: {}", meta.name); println!("State: {}", state); println!("Disk: {}", meta.disk_image); - println!("CPUs: {}", meta.cpus); - println!("Memory: {} MiB", meta.memory); + println!("CPUs: {}", meta.vcpus); + println!("Memory: {} MiB", meta.memory_mb); println!("GUI: {}", meta.gui); println!("Created: {}", meta.created); println!(); @@ -53,6 +72,17 @@ pub fn run(name: &str, json: bool) -> Result<()> { meta.ssh_port, meta.ssh_key, meta.ssh_user ); } + if !meta.labels.is_empty() { + println!(); + println!("Labels: {}", meta.labels.join(", ")); + } + if !meta.port_mappings.is_empty() { + println!(); + println!("Port mappings:"); + for (h, g) in &meta.port_mappings { + println!(" {}:{}", h, g); + } + } println!(); println!("Files:"); println!(" EFI store: {}", meta.efi_store); diff --git a/crates/kit/src/vfkit/list.rs b/crates/kit/src/vfkit/list.rs index bdda3f295..397856573 100644 --- a/crates/kit/src/vfkit/list.rs +++ b/crates/kit/src/vfkit/list.rs @@ -1,29 +1,85 @@ //! vm list — List all persistent VMs. -use super::VmMetadata; +use super::{OutputFormat, VmMetadata}; +use clap::Parser; use color_eyre::Result; -/// List all persistent VMs, optionally as JSON. -pub fn run(json: bool) -> Result<()> { - let vms = VmMetadata::list_all()?; +/// Options for `vm list`. +#[derive(Parser, Debug)] +pub struct VmListOpts { + /// VM name to query (returns only this VM) + pub domain_name: Option, + /// Output format + #[clap(long, value_enum, default_value_t = OutputFormat::Table)] + pub format: OutputFormat, + /// Show all VMs including stopped ones + #[clap(long, short = 'a')] + pub all: bool, + /// Filter VMs by label + #[clap(long)] + pub label: Option, +} - if json { - println!("{}", serde_json::to_string_pretty(&vms)?); - return Ok(()); - } +/// List persistent VMs with optional filtering and format selection. +pub fn run(opts: VmListOpts) -> Result<()> { + let all_vms = if let Some(ref name) = opts.domain_name { + match VmMetadata::load(name) { + Ok(meta) => vec![meta], + Err(e) => { + return Err(color_eyre::eyre::eyre!( + "Failed to get VM '{}': {}", + name, + e + )); + } + } + } else { + VmMetadata::list_all()? + }; + + let mut vms: Vec<_> = all_vms + .into_iter() + .filter(|vm| opts.all || opts.domain_name.is_some() || vm.is_alive()) + .collect(); - if vms.is_empty() { - println!("No VMs found."); - return Ok(()); + if let Some(ref filter_label) = opts.label { + vms.retain(|vm| vm.labels.contains(filter_label)); } - println!("{:<20} {:<10} {:<30} SSH", "NAME", "STATE", "DISK"); - for vm in &vms { - let state = if vm.is_alive() { "running" } else { "stopped" }; - println!( - "{:<20} {:<10} {:<30} ssh -p {} -i {} {}@localhost", - vm.name, state, vm.disk_image, vm.ssh_port, vm.ssh_key, vm.ssh_user - ); + match opts.format { + OutputFormat::Table => { + if vms.is_empty() { + println!("No VMs found."); + return Ok(()); + } + println!("{:<20} {:<10} {:<30} SSH", "NAME", "STATE", "DISK"); + for vm in &vms { + let state = if vm.is_alive() { "running" } else { "stopped" }; + println!( + "{:<20} {:<10} {:<30} ssh -p {} -i {} {}@localhost", + vm.name, state, vm.disk_image, vm.ssh_port, vm.ssh_key, vm.ssh_user + ); + } + } + OutputFormat::Json => { + println!("{}", serde_json::to_string_pretty(&vms)?); + } + OutputFormat::Yaml => { + for vm in &vms { + let state = if vm.is_alive() { "running" } else { "stopped" }; + println!("- name: {}", vm.name); + println!(" state: {}", state); + println!(" disk: {}", vm.disk_image); + println!(" vcpus: {}", vm.vcpus); + println!(" memory_mb: {}", vm.memory_mb); + println!(" ssh_port: {}", vm.ssh_port); + } + } + OutputFormat::Xml => { + return Err(color_eyre::eyre::eyre!( + "XML format is not supported for list command" + )); + } } Ok(()) } diff --git a/crates/kit/src/vfkit/mod.rs b/crates/kit/src/vfkit/mod.rs index 2062851d5..347f242d5 100644 --- a/crates/kit/src/vfkit/mod.rs +++ b/crates/kit/src/vfkit/mod.rs @@ -18,6 +18,20 @@ pub mod ssh; pub mod start; pub mod stop; +/// Output format for inspect and list commands. +#[derive(Debug, Clone, clap::ValueEnum)] +#[clap(rename_all = "kebab-case")] +pub enum OutputFormat { + /// Table format (default for list) + Table, + /// JSON format + Json, + /// YAML-like key-value format (default for inspect) + Yaml, + /// XML format (not yet implemented) + Xml, +} + /// Subcommands for persistent VM management via vfkit. #[derive(Debug, Subcommand)] pub enum VmCommands { @@ -26,20 +40,13 @@ pub enum VmCommands { /// List all persistent VMs #[clap(name = "list", alias = "ls")] - List { - /// Output in JSON format - #[clap(long)] - json: bool, - }, + List(list::VmListOpts), /// SSH into a running VM Ssh(ssh::VmSshOpts), /// Stop a running VM - Stop { - /// VM name - name: String, - }, + Stop(stop::VmStopOpts), /// Start a stopped VM Start(start::VmStartOpts), @@ -50,20 +57,10 @@ pub enum VmCommands { /// Remove all VMs #[clap(name = "rm-all")] - RemoveAll { - /// Force removal without confirmation - #[clap(short, long)] - force: bool, - }, + RemoveAll(rm_all::VmRmAllOpts), /// Show detailed VM information - Inspect { - /// VM name - name: String, - /// Output in JSON format - #[clap(long)] - json: bool, - }, + Inspect(inspect::VmInspectOpts), } impl VmCommands { @@ -71,13 +68,13 @@ impl VmCommands { pub fn run(self) -> Result<()> { match self { VmCommands::Run(opts) => run::run(opts), - VmCommands::List { json } => list::run(json), + VmCommands::List(opts) => list::run(opts), VmCommands::Ssh(opts) => ssh::run(opts), - VmCommands::Stop { name } => stop::run(&name), + VmCommands::Stop(opts) => stop::run(opts), VmCommands::Start(opts) => start::run(opts), VmCommands::Remove(opts) => rm::run(opts), - VmCommands::RemoveAll { force } => rm_all::run(force), - VmCommands::Inspect { name, json } => inspect::run(&name, json), + VmCommands::RemoveAll(opts) => rm_all::run(opts), + VmCommands::Inspect(opts) => inspect::run(opts), } } } @@ -89,6 +86,9 @@ impl VmCommands { pub struct VmMetadata { /// VM name used as identifier. pub name: String, + /// Container image used to create this VM (None if created from disk directly). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub image: Option, /// Path to the disk image file. pub disk_image: String, /// PID of the vfkit process. @@ -102,9 +102,9 @@ pub struct VmMetadata { /// SSH username for connecting to the VM. pub ssh_user: String, /// Number of vCPUs allocated. - pub cpus: u32, + pub vcpus: u32, /// Memory in megabytes. - pub memory: u32, + pub memory_mb: u32, /// Path to the EFI variable store file. pub efi_store: String, /// Path to the serial console log file. @@ -115,6 +115,12 @@ pub struct VmMetadata { pub created: String, /// Current VM state (running, stopped). pub state: String, + /// User-defined labels for organizing VMs. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub labels: Vec, + /// Port mappings from host to VM (host_port, guest_port). + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub port_mappings: Vec<(u16, u16)>, } impl VmMetadata { @@ -187,19 +193,22 @@ mod tests { fn sample_vm_metadata(name: &str) -> VmMetadata { VmMetadata { name: name.to_string(), + image: None, disk_image: "/tmp/disk.raw".to_string(), vfkit_pid: 0, gvproxy_pid: 0, ssh_port: 2222, ssh_key: "/tmp/key".to_string(), ssh_user: "root".to_string(), - cpus: 2, - memory: 4096, + vcpus: 2, + memory_mb: 4096, efi_store: "/tmp/efi.fd".to_string(), serial_log: "/tmp/serial.log".to_string(), gui: false, created: "2026-01-01T00:00:00Z".to_string(), state: "running".to_string(), + labels: vec![], + port_mappings: vec![], } } @@ -210,8 +219,8 @@ mod tests { let loaded: VmMetadata = serde_json::from_str(&json).unwrap(); assert_eq!(loaded.name, "test-vm"); assert_eq!(loaded.disk_image, "/tmp/disk.raw"); - assert_eq!(loaded.cpus, 2); - assert_eq!(loaded.memory, 4096); + assert_eq!(loaded.vcpus, 2); + assert_eq!(loaded.memory_mb, 4096); assert_eq!(loaded.ssh_user, "root"); assert_eq!(loaded.state, "running"); assert!(!loaded.gui); diff --git a/crates/kit/src/vfkit/rm.rs b/crates/kit/src/vfkit/rm.rs index ec48044e8..e78f6212f 100644 --- a/crates/kit/src/vfkit/rm.rs +++ b/crates/kit/src/vfkit/rm.rs @@ -16,6 +16,9 @@ pub struct VmRmOpts { /// Force removal even if running #[clap(short, long)] pub force: bool, + /// Stop domain if it's running (implied by --force) + #[clap(long)] + pub stop: bool, } /// Remove a persistent VM, optionally force-killing it. @@ -23,14 +26,34 @@ pub fn run(opts: VmRmOpts) -> Result<()> { let meta = VmMetadata::load(&opts.name)?; if meta.is_alive() { - if !opts.force { + if !(opts.force || opts.stop) { color_eyre::eyre::bail!( - "VM '{}' is running. Stop it first or use --force", + "VM '{}' is running. Stop it first or use --force/--stop", opts.name ); } info!("force stopping VM '{}'...", opts.name); - crate::vfkit::stop::run(&opts.name)?; + crate::vfkit::stop::run(crate::vfkit::stop::VmStopOpts { + name: opts.name.clone(), + force: true, + })?; + } + + // Remove disk image and SSH keys if they are inside the bcvk vms directory + // (i.e., created by `bcvk run`). User-provided disks from `bcvk vm run` are left alone. + let vms_dir = VmMetadata::vms_dir(); + if std::path::Path::new(&meta.disk_image).starts_with(&vms_dir) { + for path in [ + meta.disk_image.clone(), + format!("{}.key", meta.disk_image), + format!("{}.key.pub", meta.disk_image), + ] { + if let Err(e) = fs::remove_file(&path) { + if e.kind() != std::io::ErrorKind::NotFound { + tracing::debug!("failed to remove {}: {}", path, e); + } + } + } } for path in [&meta.efi_store, &meta.serial_log] { @@ -43,7 +66,6 @@ pub fn run(opts: VmRmOpts) -> Result<()> { } } - let vms_dir = VmMetadata::vms_dir(); for suffix in ["-gvproxy.sock", "-gvproxy-svc.sock"] { let p = vms_dir.join(format!("{}{}", meta.name, suffix)); if let Err(e) = fs::remove_file(&p) { diff --git a/crates/kit/src/vfkit/rm_all.rs b/crates/kit/src/vfkit/rm_all.rs index 2ed80df66..570a906a3 100644 --- a/crates/kit/src/vfkit/rm_all.rs +++ b/crates/kit/src/vfkit/rm_all.rs @@ -3,17 +3,41 @@ use std::io::Write; use super::VmMetadata; +use clap::Parser; use color_eyre::Result; -/// Remove all persistent VMs, prompting unless `force` is set. -pub fn run(force: bool) -> Result<()> { - let vms = VmMetadata::list_all()?; +/// Options for `vm rm-all`. +#[derive(Parser, Debug)] +pub struct VmRmAllOpts { + /// Force removal without confirmation + #[clap(long, short = 'f')] + pub force: bool, + /// Stop running VMs before removal (gentler than --force kill) + #[clap(long)] + pub stop: bool, + /// Only remove VMs with this label + #[clap(long)] + pub label: Option, +} + +/// Remove all persistent VMs, with optional label filtering. +pub fn run(opts: VmRmAllOpts) -> Result<()> { + let mut vms = VmMetadata::list_all()?; + + if let Some(ref filter_label) = opts.label { + vms.retain(|v| v.labels.contains(filter_label)); + } + if vms.is_empty() { - println!("No VMs found."); + if let Some(ref label) = opts.label { + println!("No VMs found with label '{}'", label); + } else { + println!("No VMs found."); + } return Ok(()); } - if !force { + if !opts.force { println!("Found {} VM(s):", vms.len()); for vm in &vms { println!( @@ -34,11 +58,20 @@ pub fn run(force: bool) -> Result<()> { } for vm in &vms { - let opts = super::rm::VmRmOpts { + if vm.is_alive() && opts.stop { + if let Err(e) = super::stop::run(super::stop::VmStopOpts { + name: vm.name.clone(), + force: false, + }) { + tracing::warn!("failed to stop '{}': {}", vm.name, e); + } + } + let rm_opts = super::rm::VmRmOpts { name: vm.name.clone(), force: true, + stop: false, }; - super::rm::run(opts)?; + super::rm::run(rm_opts)?; } Ok(()) } diff --git a/crates/kit/src/vfkit/run.rs b/crates/kit/src/vfkit/run.rs index 389aa0ca7..5fadf89f9 100644 --- a/crates/kit/src/vfkit/run.rs +++ b/crates/kit/src/vfkit/run.rs @@ -1,7 +1,7 @@ -//! vm run — Start a persistent VM from a disk image using vfkit + EFI boot. +//! vm run — Start a persistent VM from a container image or disk image. use std::fs; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; use clap::Parser; @@ -10,22 +10,62 @@ use tracing::info; use super::VmMetadata; use crate::run_ephemeral_macos::{ - clear_xattr, expose_ssh_port, find_available_ssh_port, find_vfkit, generate_mac, start_gvproxy, - wait_for_ssh, + clear_xattr, expose_port, find_available_ssh_port, find_vfkit, generate_mac, start_gvproxy, }; +use crate::vm_helpers::{ + detect_machine_name, ensure_image_and_get_digest, parse_memory_to_mb, remove_file_if_exists, + run_ssh_interactive, sanitize_vm_name, wait_for_ssh, +}; + +/// Port mapping from host to VM (format: host_port:guest_port). +#[derive(Debug, Clone)] +pub struct PortMapping { + /// Host-side port number. + pub host_port: u16, + /// Guest-side port number. + pub guest_port: u16, +} + +impl std::str::FromStr for PortMapping { + type Err = color_eyre::Report; + fn from_str(s: &str) -> Result { + let (host_part, guest_part) = s.split_once(':').ok_or_else(|| { + color_eyre::eyre::eyre!( + "Invalid port format '{}'. Expected format: host_port:guest_port", + s + ) + })?; + let host_port = host_part + .trim() + .parse::() + .map_err(|_| color_eyre::eyre::eyre!("Invalid host port '{}'", host_part))?; + let guest_port = guest_part + .trim() + .parse::() + .map_err(|_| color_eyre::eyre::eyre!("Invalid guest port '{}'", guest_part))?; + Ok(PortMapping { + host_port, + guest_port, + }) + } +} /// Options for `vm run`. #[derive(Parser, Debug)] pub struct VmRunOpts { - /// Disk image path (.raw) - pub disk: String, - /// VM name for identification - #[clap(long)] + /// Container image or disk image path (.raw) + #[clap(default_value = "")] + pub image_or_disk: String, + /// VM name (default: derived from image or disk filename) + #[clap(long, short)] pub name: Option, - /// Number of vCPUs + /// Instance type (e.g., u1.nano, u1.small). Overrides vcpus/memory if specified. + #[clap(long)] + pub itype: Option, + /// Number of vCPUs (overridden by --itype if specified) #[clap(long)] pub vcpus: Option, - /// Memory size (e.g. "4G", "2048M", or plain number for MB) + /// Memory size (overridden by --itype if specified) #[clap(long, default_value = "4G")] pub memory: String, /// Path to an existing SSH private key @@ -40,30 +80,159 @@ pub struct VmRunOpts { /// Display VM console in GUI window #[clap(long)] pub gui: bool, + /// Disk size for to-disk (e.g. "10G", "20G") + #[clap(long, default_value = "20G")] + pub disk_size: String, + /// Installation options (filesystem, root-size, etc.) + #[clap(flatten)] + pub install: crate::install_options::InstallOptions, + /// Replace existing VM with same name + #[clap(long, short = 'R')] + pub replace: bool, + /// Port mapping from host to VM (format: host_port:guest_port, e.g. 8080:80) + #[clap(long = "port", short = 'p', action = clap::ArgAction::Append)] + pub port_mappings: Vec, + /// User-defined labels for organizing VMs (comma not allowed in labels) + #[clap(long)] + pub label: Vec, + /// Automatically SSH into the VM after creation + #[clap(long)] + pub ssh: bool, + /// Wait for SSH to become available and verify connectivity (for testing) + #[clap(long, conflicts_with = "ssh")] + pub ssh_wait: bool, + /// Keep the VM running in background after creation (always true for vfkit) + #[clap(long, short = 'd')] + pub detach: bool, } -/// Create and launch a persistent VM from a disk image via vfkit + EFI. -pub fn run(opts: VmRunOpts) -> Result<()> { - let vfkit_bin = find_vfkit()?; +fn validate_labels(labels: &[String]) -> Result<()> { + for label in labels { + if label.contains(',') { + bail!("Label '{}' contains comma which is not allowed", label); + } + } + Ok(()) +} + +fn is_disk_path(input: &str) -> bool { + let p = Path::new(input); + p.extension() + .map(|e| e == "raw" || e == "img" || e == "qcow2") + .unwrap_or(false) + || p.exists() +} - if !Path::new(&opts.disk).exists() { - bail!("disk image not found: {}", opts.disk); +/// Create and launch a persistent VM. +pub fn run(opts: VmRunOpts) -> Result<()> { + if opts.image_or_disk.is_empty() { + bail!("container image or disk path required"); } - clear_xattr(Path::new(&opts.disk)); + validate_labels(&opts.label)?; + + let (disk_path_str, image_name) = if is_disk_path(&opts.image_or_disk) { + let p = Path::new(&opts.image_or_disk); + if !p.exists() { + bail!("disk image not found: {}", opts.image_or_disk); + } + (opts.image_or_disk.clone(), None) + } else { + let image = &opts.image_or_disk; + let vm_name = opts.name.clone().unwrap_or_else(|| sanitize_vm_name(image)); - let ssh_key_path = match &opts.ssh_key { - Some(p) => p.clone(), - None => find_ssh_key()?, + if vm_name.is_empty() { + bail!("could not derive VM name from image. Use --name to specify one."); + } + + // Check existing VM + if let Ok(existing) = VmMetadata::load(&vm_name) { + if opts.replace { + info!("replacing existing VM '{}'", vm_name); + if existing.is_alive() { + if let Some(pid) = rustix::process::Pid::from_raw(existing.vfkit_pid as i32) { + if let Err(e) = + rustix::process::kill_process(pid, rustix::process::Signal::KILL) + { + tracing::warn!( + "failed to kill vfkit (pid {}): {}", + existing.vfkit_pid, + e + ); + } + } + if let Some(pid) = rustix::process::Pid::from_raw(existing.gvproxy_pid as i32) { + if let Err(e) = + rustix::process::kill_process(pid, rustix::process::Signal::KILL) + { + tracing::warn!( + "failed to kill gvproxy (pid {}): {}", + existing.gvproxy_pid, + e + ); + } + } + std::thread::sleep(std::time::Duration::from_millis(500)); + } + VmMetadata::remove(&vm_name); + } else { + bail!( + "VM '{}' already exists. Use --replace to overwrite, or --name to choose a different name.", + vm_name + ); + } + } + + let vms_dir = crate::to_disk_macos::vms_dir(); + fs::create_dir_all(&vms_dir)?; + let disk_path = vms_dir.join(format!("{}.raw", vm_name)); + let key_path = PathBuf::from(format!("{}.key", disk_path.display())); + let key_pub_path = PathBuf::from(format!("{}.pub", key_path.display())); + + if opts.replace { + remove_file_if_exists(&disk_path); + remove_file_if_exists(&key_path); + remove_file_if_exists(&key_pub_path); + } + + if !disk_path.exists() { + info!("creating disk image for VM '{}'...", vm_name); + let machine = detect_machine_name()?; + let digest = ensure_image_and_get_digest(image)?; + + let base_disk_path = crate::to_disk_macos::find_or_create_base_disk( + image, + &digest, + &opts.install, + &opts.disk_size, + &machine, + &None, + &[], + )?; + + crate::to_disk_macos::clone_base_disk(&base_disk_path, &disk_path)?; + + let base_key = PathBuf::from(format!("{}.key", base_disk_path.display())); + if base_key.exists() { + fs::copy(&base_key, &key_path)?; + let base_pub = PathBuf::from(format!("{}.pub", base_key.display())); + if base_pub.exists() { + fs::copy(&base_pub, &key_pub_path)?; + } + } + } + + ( + disk_path.to_string_lossy().to_string(), + Some(image.to_string()), + ) }; - if !Path::new(&ssh_key_path).exists() { - bail!( - "SSH key not found: {}. Specify with --ssh-key", - ssh_key_path - ); - } + + clear_xattr(Path::new(&disk_path_str)); + + let ssh_key_path = find_ssh_key(&opts.ssh_key, &disk_path_str)?; let vm_name = opts.name.clone().unwrap_or_else(|| { - Path::new(&opts.disk) + Path::new(&disk_path_str) .file_stem() .and_then(|s| s.to_str()) .unwrap_or("vm") @@ -90,9 +259,14 @@ pub fn run(opts: VmRunOpts) -> Result<()> { mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] ); - let vcpus = opts.vcpus.unwrap_or(2); - let memory_mb = crate::run_ephemeral_macos::parse_memory_to_mb(&opts.memory)?; + let vcpus = opts.itype.map(|t| t.vcpus()).or(opts.vcpus).unwrap_or(2); + let memory_mb = opts + .itype + .map(|t| t.memory_mb()) + .map(Ok) + .unwrap_or_else(|| parse_memory_to_mb(&opts.memory))?; + let vfkit_bin = find_vfkit()?; let mut vfkit_args = vec![ "--cpus".to_string(), vcpus.to_string(), @@ -101,7 +275,7 @@ pub fn run(opts: VmRunOpts) -> Result<()> { "--bootloader".to_string(), format!("efi,variable-store={},create", efi_store.display()), "--device".to_string(), - format!("virtio-blk,path={}", opts.disk), + format!("virtio-blk,path={}", disk_path_str), "--device".to_string(), format!( "virtio-net,unixSocketPath={},mac={}", @@ -128,7 +302,7 @@ pub fn run(opts: VmRunOpts) -> Result<()> { info!("setting up SSH port forwarding..."); for attempt in 0..15u32 { - match expose_ssh_port(&services_sock_str, "192.168.127.2", ssh_port) { + match expose_port(&services_sock_str, "192.168.127.2", ssh_port, 22) { Ok(_) => { info!("SSH port {} forwarded", ssh_port); break; @@ -142,24 +316,41 @@ pub fn run(opts: VmRunOpts) -> Result<()> { } } + for pm in &opts.port_mappings { + expose_port( + &services_sock_str, + "192.168.127.2", + pm.host_port, + pm.guest_port, + )?; + info!("port {}:{} forwarded", pm.host_port, pm.guest_port); + } + let key_path = std::path::Path::new(&ssh_key_path); wait_for_ssh(ssh_port, key_path, &opts.ssh_user)?; let metadata = VmMetadata { name: vm_name.clone(), - disk_image: opts.disk.clone(), + image: image_name, + disk_image: disk_path_str.clone(), vfkit_pid: vfkit_child.id(), gvproxy_pid: gvproxy_child.id(), ssh_port, ssh_key: ssh_key_path.clone(), ssh_user: opts.ssh_user.clone(), - cpus: vcpus, - memory: memory_mb, + vcpus, + memory_mb, efi_store: efi_store.to_string_lossy().to_string(), serial_log: serial_log.to_string_lossy().to_string(), gui: opts.gui, created: chrono::Utc::now().to_rfc3339(), state: "running".to_string(), + labels: opts.label.clone(), + port_mappings: opts + .port_mappings + .iter() + .map(|pm| (pm.host_port, pm.guest_port)) + .collect(), }; metadata.save()?; @@ -172,10 +363,30 @@ pub fn run(opts: VmRunOpts) -> Result<()> { println!("To connect: bcvk vm ssh {}", vm_name); println!("To stop: bcvk vm stop {}", vm_name); + if opts.ssh_wait { + println!("Ready; use bcvk vm ssh to connect"); + return Ok(()); + } + if opts.ssh { + let status = run_ssh_interactive(ssh_port, key_path, &opts.ssh_user)?; + std::process::exit(status.code().unwrap_or(1)); + } + Ok(()) } -fn find_ssh_key() -> Result { +fn find_ssh_key(explicit: &Option, disk_path: &str) -> Result { + if let Some(p) = explicit { + if !Path::new(p).exists() { + bail!("SSH key not found: {}", p); + } + return Ok(p.clone()); + } + let auto_key = format!("{}.key", disk_path); + if Path::new(&auto_key).exists() { + info!("using auto-generated SSH key: {}", auto_key); + return Ok(auto_key); + } let home = dirs::home_dir() .ok_or_else(|| color_eyre::eyre::eyre!("cannot determine home directory"))?; for name in &["id_ed25519", "id_rsa"] { @@ -184,5 +395,5 @@ fn find_ssh_key() -> Result { return Ok(path.to_string_lossy().to_string()); } } - bail!("no SSH key found in ~/.ssh/. Generate with: ssh-keygen -t ed25519") + bail!("no SSH key found. Specify with --ssh-key") } diff --git a/crates/kit/src/vfkit/ssh.rs b/crates/kit/src/vfkit/ssh.rs index 74af46736..84527e8a8 100644 --- a/crates/kit/src/vfkit/ssh.rs +++ b/crates/kit/src/vfkit/ssh.rs @@ -1,7 +1,7 @@ //! vm ssh — SSH into a running persistent VM. use super::VmMetadata; -use crate::run_ephemeral_macos::run_ssh_interactive; +use crate::vm_helpers::{run_ssh_command, run_ssh_interactive}; use clap::Parser; use color_eyre::{eyre::bail, Result}; @@ -10,15 +10,25 @@ use color_eyre::{eyre::bail, Result}; pub struct VmSshOpts { /// VM name pub name: String, + /// Additional SSH arguments + #[clap(trailing_var_arg = true, allow_hyphen_values = true)] + pub args: Vec, } -/// Open an interactive SSH session to a running persistent VM. +/// Open an SSH session to a running persistent VM. pub fn run(opts: VmSshOpts) -> Result<()> { let vm = VmMetadata::load(&opts.name)?; if !vm.is_alive() { bail!("VM '{}' is not running", opts.name); } let key_path = std::path::Path::new(&vm.ssh_key); - run_ssh_interactive(vm.ssh_port, key_path, &vm.ssh_user)?; + if opts.args.is_empty() { + run_ssh_interactive(vm.ssh_port, key_path, &vm.ssh_user)?; + } else { + let cmd = shlex::try_join(opts.args.iter().map(|s| s.as_str())) + .map_err(|e| color_eyre::eyre::eyre!("failed to escape SSH args: {}", e))?; + let status = run_ssh_command(vm.ssh_port, key_path, &vm.ssh_user, &cmd)?; + std::process::exit(status.code().unwrap_or(1)); + } Ok(()) } diff --git a/crates/kit/src/vfkit/start.rs b/crates/kit/src/vfkit/start.rs index f2f2a48f3..4e97c2e30 100644 --- a/crates/kit/src/vfkit/start.rs +++ b/crates/kit/src/vfkit/start.rs @@ -8,8 +8,9 @@ use tracing::info; use super::VmMetadata; use crate::run_ephemeral_macos::{ - clear_xattr, expose_ssh_port, find_vfkit, generate_mac, start_gvproxy, wait_for_ssh, + clear_xattr, expose_port, find_vfkit, generate_mac, start_gvproxy, }; +use crate::vm_helpers::{run_ssh_interactive, wait_for_ssh}; /// Options for `vm start`. #[derive(Parser, Debug)] @@ -19,6 +20,9 @@ pub struct VmStartOpts { /// Display VM console in GUI window #[clap(long)] pub gui: bool, + /// Automatically SSH into the VM after starting + #[clap(long)] + pub ssh: bool, } /// Restart a stopped persistent VM by re-launching vfkit. @@ -53,9 +57,9 @@ pub fn run(opts: VmStartOpts) -> Result<()> { let gui = opts.gui || meta.gui; let mut vfkit_args = vec![ "--cpus".to_string(), - meta.cpus.to_string(), + meta.vcpus.to_string(), "--memory".to_string(), - meta.memory.to_string(), + meta.memory_mb.to_string(), "--bootloader".to_string(), format!("efi,variable-store={},create", meta.efi_store), "--device".to_string(), @@ -83,7 +87,7 @@ pub fn run(opts: VmStartOpts) -> Result<()> { info!("setting up SSH port forwarding..."); for attempt in 0..15u32 { - match expose_ssh_port(&services_sock_str, "192.168.127.2", meta.ssh_port) { + match expose_port(&services_sock_str, "192.168.127.2", meta.ssh_port, 22) { Ok(_) => { info!("SSH port {} forwarded", meta.ssh_port); break; @@ -97,6 +101,11 @@ pub fn run(opts: VmStartOpts) -> Result<()> { } } + for &(host_port, guest_port) in &meta.port_mappings { + expose_port(&services_sock_str, "192.168.127.2", host_port, guest_port)?; + info!("port {}:{} forwarded", host_port, guest_port); + } + let key_path = std::path::Path::new(&meta.ssh_key); wait_for_ssh(meta.ssh_port, key_path, &meta.ssh_user)?; @@ -111,5 +120,11 @@ pub fn run(opts: VmStartOpts) -> Result<()> { " ssh -p {} -i {} {}@localhost", meta.ssh_port, meta.ssh_key, meta.ssh_user ); + + if opts.ssh { + let status = run_ssh_interactive(meta.ssh_port, key_path, &meta.ssh_user)?; + std::process::exit(status.code().unwrap_or(1)); + } + Ok(()) } diff --git a/crates/kit/src/vfkit/stop.rs b/crates/kit/src/vfkit/stop.rs index 52c69fb51..acc8cd4de 100644 --- a/crates/kit/src/vfkit/stop.rs +++ b/crates/kit/src/vfkit/stop.rs @@ -6,25 +6,41 @@ use super::VmMetadata; use color_eyre::{eyre::bail, Result}; use tracing::info; -/// Stop a running persistent VM by sending SIGTERM to vfkit. -pub fn run(name: &str) -> Result<()> { - let mut meta = VmMetadata::load(name)?; +/// Options for `vm stop`. +#[derive(clap::Parser, Debug)] +pub struct VmStopOpts { + /// VM name + pub name: String, + /// Force immediate power-off (SIGKILL) instead of graceful shutdown + #[clap(long, short = 'f')] + pub force: bool, +} + +/// Stop a running persistent VM. +pub fn run(opts: VmStopOpts) -> Result<()> { + let mut meta = VmMetadata::load(&opts.name)?; if !meta.is_alive() { - bail!("VM '{}' is not running", name); + bail!("VM '{}' is not running", opts.name); } - info!("stopping VM '{}'...", name); + info!("stopping VM '{}'...", opts.name); if meta.vfkit_pid > 0 { let pid = rustix::process::Pid::from_raw(meta.vfkit_pid as i32).unwrap(); - if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::TERM) { - tracing::debug!("failed to SIGTERM vfkit (PID {}): {}", meta.vfkit_pid, e); - } - std::thread::sleep(Duration::from_secs(3)); - if meta.is_alive() { + if opts.force { if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::KILL) { tracing::debug!("failed to SIGKILL vfkit (PID {}): {}", meta.vfkit_pid, e); } + } else { + if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::TERM) { + tracing::debug!("failed to SIGTERM vfkit (PID {}): {}", meta.vfkit_pid, e); + } + std::thread::sleep(Duration::from_secs(3)); + if meta.is_alive() { + if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::KILL) { + tracing::debug!("failed to SIGKILL vfkit (PID {}): {}", meta.vfkit_pid, e); + } + } } } @@ -46,6 +62,6 @@ pub fn run(name: &str) -> Result<()> { meta.gvproxy_pid = 0; meta.save()?; - println!("Stopped '{}'", name); + println!("Stopped '{}'", opts.name); Ok(()) } diff --git a/crates/kit/src/vm_helpers.rs b/crates/kit/src/vm_helpers.rs new file mode 100644 index 000000000..0c03157d0 --- /dev/null +++ b/crates/kit/src/vm_helpers.rs @@ -0,0 +1,340 @@ +//! Shared helpers for macOS/Windows VM management. +//! +//! Functions in this module are OS-independent (use `podman` and `ssh` CLI). +//! Modelled after `ssh_options.rs` — designed for future cross-platform sharing. + +use std::path::Path; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use color_eyre::{eyre::bail, eyre::eyre, eyre::Context, Result}; +use tracing::info; + +use crate::ssh_options::CommonSshOptions; + +/// SSH connection timeout (shared by wait_for_ssh). +pub const SSH_TIMEOUT: Duration = Duration::from_secs(240); + +/// Detect the currently active podman machine name. +pub fn detect_machine_name() -> Result { + let output = Command::new("podman") + .args(["machine", "info", "--format", "{{.Host.CurrentMachine}}"]) + .output()?; + let name = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if name.is_empty() { + bail!("no podman machine is running"); + } + Ok(name) +} + +/// Detect the podman machine VM type (e.g. "hyperv", "wsl", "libkrun", "applehv"). +#[allow(dead_code)] +pub fn detect_podman_vmtype() -> Result { + let output = Command::new("podman") + .args(["machine", "info", "--format", "{{.Host.VMType}}"]) + .output()?; + let vmtype = String::from_utf8_lossy(&output.stdout) + .trim() + .to_lowercase(); + if vmtype.is_empty() { + bail!("could not detect podman machine VM type"); + } + Ok(vmtype) +} + +/// Check if the podman machine is running as root (UID 0). +pub fn is_machine_rootful(machine: &str) -> bool { + Command::new("podman") + .args(["machine", "ssh", machine, "id", "-u"]) + .output() + .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "0") + .unwrap_or(false) +} + +/// Parse memory specification string (e.g. "4G", "2048M") to megabytes. +pub fn parse_memory_to_mb(s: &str) -> Result { + let s = s.trim(); + if let Some(n) = s.strip_suffix('G').or_else(|| s.strip_suffix('g')) { + Ok((n.parse::()? * 1024.0) as u32) + } else if let Some(n) = s.strip_suffix('M').or_else(|| s.strip_suffix('m')) { + Ok(n.parse::()? as u32) + } else { + Ok(s.parse::()?) + } +} + +/// Return sensible default vCPU count based on available host parallelism. +pub fn default_vcpus() -> u32 { + std::thread::available_parallelism() + .map(|n| n.get() as u32) + .unwrap_or(2) +} + +/// Ensure image exists locally (pulling if needed) and return its short digest. +pub fn ensure_image_and_get_digest(image: &str) -> Result { + let status = Command::new("podman") + .args(["image", "exists", image]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status()?; + if !status.success() { + info!("pulling image {}...", image); + if !Command::new("podman") + .args(["pull", image]) + .status()? + .success() + { + bail!("failed to pull image: {}", image); + } + } + let output = Command::new("podman") + .args(["image", "inspect", "--format", "{{.Digest}}", image]) + .output()?; + let digest = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if digest.is_empty() { + bail!("failed to get image digest: {}", image); + } + Ok(digest + .trim_start_matches("sha256:") + .chars() + .take(16) + .collect()) +} + +/// Wait for SSH to become available on the given port. +pub fn wait_for_ssh(port: u16, key_path: &Path, user: &str) -> Result<()> { + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + info!("waiting for SSH on port {}...", port); + let start = std::time::Instant::now(); + let mut attempt = 0u32; + loop { + if start.elapsed() > SSH_TIMEOUT { + bail!("SSH connection timeout ({}s)", SSH_TIMEOUT.as_secs()); + } + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-o", "BatchMode=yes", &user_host, "true"]); + if let Ok(s) = cmd.stdout(Stdio::null()).stderr(Stdio::null()).status() { + if s.success() { + info!("SSH connected after {}s", start.elapsed().as_secs()); + return Ok(()); + } + } + let backoff = if attempt < 2 { + 500 + } else if attempt < 4 { + 1000 + } else { + 2000 + }; + std::thread::sleep(Duration::from_millis(backoff)); + attempt += 1; + } +} + +/// Execute a command via SSH and return the exit status. +pub fn run_ssh_command( + port: u16, + key_path: &Path, + user: &str, + command: &str, +) -> Result { + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-o", "BatchMode=yes", &user_host, command]); + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .status() + .map_err(|e| eyre!("ssh failed: {}", e)) +} + +/// Start an interactive SSH session with TTY allocation. +pub fn run_ssh_interactive( + port: u16, + key_path: &Path, + user: &str, +) -> Result { + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-t", &user_host]); + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .status() + .map_err(|e| eyre!("ssh failed: {}", e)) +} + +/// Remove a file, ignoring NotFound errors. +pub fn remove_file_if_exists(path: &Path) { + if let Err(e) = std::fs::remove_file(path) { + if e.kind() != std::io::ErrorKind::NotFound { + tracing::debug!("failed to remove {}: {}", path.display(), e); + } + } +} + +/// Generate an SSH keypair and return the public key content. +pub fn generate_ssh_keypair(key_path: &Path) -> Result { + let pub_path = key_path.with_extension( + key_path + .extension() + .map(|e| format!("{}.pub", e.to_string_lossy())) + .unwrap_or_else(|| "pub".to_string()), + ); + remove_file_if_exists(key_path); + remove_file_if_exists(&pub_path); + let status = Command::new("ssh-keygen") + .args([ + "-t", + "ed25519", + "-N", + "", + "-q", + "-f", + &key_path.to_string_lossy(), + ]) + .status()?; + if !status.success() { + bail!("ssh-keygen failed"); + } + let pubkey = std::fs::read_to_string(&pub_path)?.trim().to_string(); + Ok(pubkey) +} + +/// Sanitize a container image name into a valid VM name. +pub fn sanitize_vm_name(image: &str) -> String { + image + .split('/') + .last() + .unwrap_or(image) + .replace(':', "-") + .replace('.', "-") + .chars() + .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_') + .collect::() + .trim_matches('-') + .to_string() +} + +/// Parse a size string (e.g. "10G", "20GB", "5120M", "1TB") to bytes. +pub fn parse_size(size_str: &str) -> Result { + let s = size_str.trim(); + if s.is_empty() { + bail!("empty size string"); + } + if let Ok(n) = s.parse::() { + return Ok(n); + } + let upper = s.to_uppercase(); + let (num_str, multiplier) = if let Some(n) = upper.strip_suffix("TB") { + (n, 1024_u64.pow(4)) + } else if let Some(n) = upper.strip_suffix("GB") { + (n, 1024_u64 * 1024 * 1024) + } else if let Some(n) = upper.strip_suffix("MB") { + (n, 1024_u64 * 1024) + } else if let Some(n) = upper.strip_suffix("KB") { + (n, 1024_u64) + } else if let Some(n) = upper.strip_suffix('T') { + (n, 1024_u64.pow(4)) + } else if let Some(n) = upper.strip_suffix('G') { + (n, 1024_u64 * 1024 * 1024) + } else if let Some(n) = upper.strip_suffix('M') { + (n, 1024_u64 * 1024) + } else if let Some(n) = upper.strip_suffix('K') { + (n, 1024_u64) + } else if let Some(n) = upper.strip_suffix('B') { + (n, 1) + } else { + bail!("invalid size format: '{}' (use e.g. 20G, 5120M, 1TB)", s); + }; + let num: u64 = num_str + .trim() + .parse() + .with_context(|| format!("invalid number in size: '{}'", num_str))?; + Ok(num * multiplier) +} + +/// Container image name for the nbdkit EROFS plugin. +pub const NBDKIT_IMAGE: &str = "localhost/bcvk-nbdkit:latest"; + +/// Generate a shell script that checks for and builds the nbdkit container image. +/// +/// The caller provides the plugin `.so` binary via `plugin_so` (typically from +/// `include_bytes!` in a platform-specific module). The script: +/// 1. Checks if the image already exists (early exit if so) +/// 2. Writes the `.so` to a temp path via base64 +/// 3. Builds a container image with nbdkit + the plugin baked in +/// 4. Cleans up the temp file +pub fn nbdkit_setup_script(plugin_so: &[u8]) -> String { + use base64::Engine; + let b64 = base64::engine::general_purpose::STANDARD.encode(plugin_so); + format!( + "set -e; \ + if podman image exists {image}; then exit 0; fi; \ + mkdir -p /var/tmp/bcvk; \ + printf '%s' '{b64}' | base64 -d > /var/tmp/bcvk/plugin.so; \ + printf 'FROM quay.io/fedora/fedora:latest\\nRUN dnf install -y nbdkit nbdkit-basic-plugins && dnf clean all\\nCOPY plugin.so /plugin.so\\n' | \ + podman build -t {image} -f - /var/tmp/bcvk; \ + rm -f /var/tmp/bcvk/plugin.so", + image = NBDKIT_IMAGE, + b64 = b64, + ) +} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_size() { + assert_eq!(parse_size("10G").unwrap(), 10 * 1024 * 1024 * 1024); + assert_eq!(parse_size("20GB").unwrap(), 20 * 1024 * 1024 * 1024); + assert_eq!(parse_size("5120M").unwrap(), 5120 * 1024 * 1024); + assert_eq!(parse_size("512MB").unwrap(), 512 * 1024 * 1024); + assert_eq!(parse_size("1024K").unwrap(), 1024 * 1024); + assert_eq!(parse_size("1TB").unwrap(), 1024_u64.pow(4)); + assert_eq!(parse_size("1073741824").unwrap(), 1073741824); + assert_eq!(parse_size("100B").unwrap(), 100); + assert!(parse_size("abc").is_err()); + assert!(parse_size("10X").is_err()); + assert!(parse_size("").is_err()); + } + + #[test] + fn test_parse_memory_to_mb() { + assert_eq!(parse_memory_to_mb("4G").unwrap(), 4096); + assert_eq!(parse_memory_to_mb("2048M").unwrap(), 2048); + assert_eq!(parse_memory_to_mb("512").unwrap(), 512); + assert_eq!(parse_memory_to_mb("1g").unwrap(), 1024); + assert_eq!(parse_memory_to_mb("256m").unwrap(), 256); + } + + #[test] + fn test_parse_memory_to_mb_errors() { + assert!(parse_memory_to_mb("abc").is_err()); + } + + #[test] + fn test_default_vcpus() { + let vcpus = default_vcpus(); + assert!(vcpus >= 1); + } + + #[test] + fn test_sanitize_vm_name() { + assert_eq!( + sanitize_vm_name("quay.io/fedora/fedora-bootc:latest"), + "fedora-bootc-latest" + ); + assert_eq!(sanitize_vm_name("centos:stream10"), "centos-stream10"); + assert_eq!(sanitize_vm_name("simple"), "simple"); + } +} diff --git a/crates/nbdkit-erofs-plugin/src/erofs.rs b/crates/nbdkit-erofs-plugin/src/erofs.rs index a795b076a..317edba0d 100644 --- a/crates/nbdkit-erofs-plugin/src/erofs.rs +++ b/crates/nbdkit-erofs-plugin/src/erofs.rs @@ -458,7 +458,10 @@ pub fn build_erofs_regions(layout: &ErofsLayout, walk: &WalkResult) -> Vec = Vec::new(); - // BOOTAA64.EFI + let boot_efi_name = if grub_path + .file_name() + .map(|n| n == "grubx64.efi") + .unwrap_or(false) + { + "BOOTX64" + } else { + "BOOTAA64" + }; files.push(FatFile { - name_8_3: make_8_3("BOOTAA64", "EFI"), + name_8_3: make_8_3(boot_efi_name, "EFI"), size: grub_size, regions: vec![FileDataRegion::FromFile { path: grub_path.to_path_buf(), @@ -352,7 +360,11 @@ pub fn build_esp_regions( regions.push(Region { start: offset, len: *len, - region_type: RegionType::File { path: path.clone() }, + region_type: RegionType::File { + file: std::sync::Arc::new( + std::fs::File::open(path).expect("failed to open file for region"), + ), + }, }); offset += len; file_offset += len; diff --git a/crates/nbdkit-erofs-plugin/src/gpt.rs b/crates/nbdkit-erofs-plugin/src/gpt.rs index 88e8bcf44..02becd16d 100644 --- a/crates/nbdkit-erofs-plugin/src/gpt.rs +++ b/crates/nbdkit-erofs-plugin/src/gpt.rs @@ -69,7 +69,7 @@ pub fn build_gpt_disk( &LINUX_TYPE_GUID, erofs_start_lba, erofs_start_lba + erofs_sectors - 1, - b"root", + b"bcvk-root", ); let partition_table_crc = crc32fast::hash(&partition_table); diff --git a/crates/nbdkit-erofs-plugin/src/lib.rs b/crates/nbdkit-erofs-plugin/src/lib.rs index b2cd4075c..5f78a274d 100644 --- a/crates/nbdkit-erofs-plugin/src/lib.rs +++ b/crates/nbdkit-erofs-plugin/src/lib.rs @@ -7,11 +7,11 @@ mod regions; use std::ffi::{c_char, c_int, c_void, CStr, CString}; use std::path::PathBuf; -use std::sync::Mutex; +use std::sync::RwLock; use regions::Region; -static PLUGIN_STATE: Mutex> = Mutex::new(None); +static PLUGIN_STATE: RwLock> = RwLock::new(None); struct PluginState { dir: PathBuf, @@ -39,7 +39,7 @@ pub extern "C" fn plugin_config(key: *const c_char, value: *const c_char) -> c_i let key = unsafe { CStr::from_ptr(key) }.to_str().unwrap_or(""); let value = unsafe { CStr::from_ptr(value) }.to_str().unwrap_or(""); - let mut state = PLUGIN_STATE.lock().unwrap(); + let mut state = PLUGIN_STATE.write().unwrap(); let state = state.get_or_insert_with(|| PluginState { dir: PathBuf::new(), cmdline: None, @@ -62,7 +62,7 @@ pub extern "C" fn plugin_config(key: *const c_char, value: *const c_char) -> c_i #[no_mangle] pub extern "C" fn plugin_config_complete() -> c_int { - let state = PLUGIN_STATE.lock().unwrap(); + let state = PLUGIN_STATE.read().unwrap(); let state = match state.as_ref() { Some(s) => s, None => { @@ -116,12 +116,12 @@ fn find_grub(dir: &std::path::Path) -> Option { } None } - walk(&dir.join("usr/lib"), "grubaa64.efi") + walk(&dir.join("usr/lib"), "grubaa64.efi").or_else(|| walk(&dir.join("usr/lib"), "grubx64.efi")) } #[no_mangle] pub extern "C" fn plugin_get_ready() -> c_int { - let mut state_guard = PLUGIN_STATE.lock().unwrap(); + let mut state_guard = PLUGIN_STATE.write().unwrap(); let state = match state_guard.as_mut() { Some(s) => s, None => return -1, @@ -144,7 +144,8 @@ pub extern "C" fn plugin_get_ready() -> c_int { } }; - let erofs_regions = erofs::build_erofs_regions(&erofs_layout, &walk); + let erofs_regions = + regions::consolidate_regions(erofs::build_erofs_regions(&erofs_layout, &walk)); // Discover boot files from dir let (kernel_path, initrd_path) = match find_kernel_dir(&state.dir) { @@ -240,7 +241,7 @@ pub extern "C" fn plugin_close(_handle: *mut c_void) {} #[no_mangle] pub extern "C" fn plugin_get_size(_handle: *mut c_void) -> i64 { - let state = PLUGIN_STATE.lock().unwrap(); + let state = PLUGIN_STATE.read().unwrap(); state.as_ref().map(|s| s.total_size as i64).unwrap_or(-1) } @@ -257,7 +258,7 @@ pub extern "C" fn plugin_pread( offset: u64, _flags: u32, ) -> c_int { - let state = PLUGIN_STATE.lock().unwrap(); + let state = PLUGIN_STATE.read().unwrap(); let state = match state.as_ref() { Some(s) => s, None => return -1, @@ -339,7 +340,7 @@ static PLUGIN_MAGIC_KEY: &[u8] = b"dir\0"; static PLUGIN: NbdkitPlugin = NbdkitPlugin { _struct_size: std::mem::size_of::() as u64, _api_version: 2, - _thread_model: 0, + _thread_model: 3, // NBDKIT_THREAD_MODEL_PARALLEL name: PLUGIN_NAME.as_ptr() as *const c_char, longname: PLUGIN_LONGNAME.as_ptr() as *const c_char, version: PLUGIN_VERSION.as_ptr() as *const c_char, diff --git a/crates/nbdkit-erofs-plugin/src/regions.rs b/crates/nbdkit-erofs-plugin/src/regions.rs index 16268d623..d79e8e228 100644 --- a/crates/nbdkit-erofs-plugin/src/regions.rs +++ b/crates/nbdkit-erofs-plugin/src/regions.rs @@ -1,13 +1,13 @@ //! Region-based virtual block device composition. //! Inspired by the regions pattern in nbdkit's floppy plugin (BSD-3-Clause). -use std::path::PathBuf; +use std::fs::File; use std::sync::Arc; #[derive(Debug, Clone)] pub enum RegionType { Data(Arc>), - File { path: PathBuf }, + File { file: Arc }, Zero, } @@ -39,6 +39,76 @@ pub fn find_region(regions: &[Region], offset: u64) -> Option<&Region> { .map(|i| ®ions[i]) } +const PRELOAD_THRESHOLD: u64 = 4096; +const MERGE_CHUNK_MAX: u64 = 4 * 1024 * 1024; + +pub fn consolidate_regions(regions: Vec) -> Vec { + use std::os::unix::fs::FileExt; + + let mut out: Vec = Vec::new(); + let mut merge_buf: Vec = Vec::new(); + let mut merge_start: u64 = 0; + + for r in regions { + let should_inline = match &r.region_type { + RegionType::File { file } => r.len <= PRELOAD_THRESHOLD, + RegionType::Data(_) | RegionType::Zero => true, + }; + + if should_inline { + if merge_buf.is_empty() { + merge_start = r.start; + } + let needed = (r.start + r.len - merge_start) as usize; + if needed as u64 > MERGE_CHUNK_MAX && !merge_buf.is_empty() { + out.push(Region { + start: merge_start, + len: merge_buf.len() as u64, + region_type: RegionType::Data(Arc::new(merge_buf.clone())), + }); + merge_buf.clear(); + merge_start = r.start; + } + let offset_in_buf = (r.start - merge_start) as usize; + if merge_buf.len() < offset_in_buf + r.len as usize { + merge_buf.resize(offset_in_buf + r.len as usize, 0); + } + match &r.region_type { + RegionType::Data(data) => { + merge_buf[offset_in_buf..offset_in_buf + r.len as usize] + .copy_from_slice(&data[..r.len as usize]); + } + RegionType::File { file } => { + let _ = file.read_exact_at( + &mut merge_buf[offset_in_buf..offset_in_buf + r.len as usize], + 0, + ); + } + RegionType::Zero => { + merge_buf[offset_in_buf..offset_in_buf + r.len as usize].fill(0); + } + } + } else { + if !merge_buf.is_empty() { + out.push(Region { + start: merge_start, + len: merge_buf.len() as u64, + region_type: RegionType::Data(Arc::new(std::mem::take(&mut merge_buf))), + }); + } + out.push(r); + } + } + if !merge_buf.is_empty() { + out.push(Region { + start: merge_start, + len: merge_buf.len() as u64, + region_type: RegionType::Data(Arc::new(merge_buf)), + }); + } + out +} + pub fn pread(regions: &[Region], buf: &mut [u8], offset: u64) -> std::io::Result<()> { let mut remaining = buf.len(); let mut buf_offset = 0; @@ -61,10 +131,9 @@ pub fn pread(regions: &[Region], buf: &mut [u8], offset: u64) -> std::io::Result let start = region_offset as usize; buf[buf_offset..buf_offset + len].copy_from_slice(&data[start..start + len]); } - RegionType::File { path } => { + RegionType::File { file } => { use std::os::unix::fs::FileExt; - let f = std::fs::File::open(path)?; - f.read_exact_at(&mut buf[buf_offset..buf_offset + len], region_offset)?; + file.read_exact_at(&mut buf[buf_offset..buf_offset + len], region_offset)?; } RegionType::Zero => { buf[buf_offset..buf_offset + len].fill(0);