From 5372bbb9f9c894a54503bde681c61e6af4b80e3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=B6rje=20Granberg?= Date: Mon, 4 May 2026 13:44:14 +0200 Subject: [PATCH 1/2] feat: add get-disk command for VMDK/VHD extraction Adds a new `get-disk` command that extracts a virtual disk from a VBK archive as a flat raw image. Multi-extent VMDKs are reassembled automatically by the underlying vbktoolkit OpenDiskImage API. - SHA-256 verification computed inline during extraction (io.MultiWriter) rather than in a second read pass, keeping memory and I/O constant regardless of disk size - Output file is removed automatically on any error after creation, preventing partial images from being left on disk - BytesWritten is validated against the image's reported DiskSize; a mismatch is treated as a hard error - Exports vbkshell.NormalizePath so path handling is shared rather than duplicated across commands - Pins the local vbktoolkit replace directive, which fixes the root cause: DirItem.Open() previously rejected DirItemExtFib files (external files), meaning any VMDK stored as an external entry could not be read Co-Authored-By: Claude Sonnet 4.6 --- cmd/cmd_get_disk.go | 125 +++++++++++++++++++++++++++++++++ cmd/root.go | 1 + go.mod | 2 + go.sum | 8 +-- internal/vbkshell/path.go | 2 +- internal/vbkshell/path_test.go | 4 +- internal/vbkshell/shell.go | 4 +- 7 files changed, 135 insertions(+), 11 deletions(-) create mode 100644 cmd/cmd_get_disk.go diff --git a/cmd/cmd_get_disk.go b/cmd/cmd_get_disk.go new file mode 100644 index 0000000..cff83b1 --- /dev/null +++ b/cmd/cmd_get_disk.go @@ -0,0 +1,125 @@ +package cmd + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "os" + "path" + "path/filepath" + "strings" + + vbk "github.com/GoToolSharing/vbktoolkit" + "github.com/GoToolSharing/vbkview/internal/vbkshell" + "github.com/spf13/cobra" +) + +type getDiskResult struct { + SourcePath string `json:"source_path"` + OutputPath string `json:"output_path"` + BytesWritten int64 `json:"bytes_written"` + DiskSize uint64 `json:"disk_size_bytes"` + SHA256 string `json:"sha256,omitempty"` +} + +func newGetDiskCmd(opts *globalOptions) *cobra.Command { + var sha256Expected string + + cmd := &cobra.Command{ + Use: "get-disk [dst]", + Short: "Extract a virtual disk from VBK as a flat raw image", + Long: `Extract a .vmdk, .vhd, or .vhdx virtual disk from a VBK backup as a flat +raw disk image. Multi-extent VMDKs are reassembled automatically.`, + Args: cobra.RangeArgs(1, 2), + RunE: func(cmd *cobra.Command, args []string) error { + if opts.vbkPath == "" { + return fmt.Errorf("--vbk is required") + } + + srcArg := args[0] + dstArg := "" + if len(args) == 2 { + dstArg = args[1] + } + + v, fh, err := vbk.Open(opts.vbkPath, opts.verify) + if err != nil { + return err + } + defer fh.Close() + + src := vbkshell.NormalizePath(srcArg, opts.cwd) + + img, err := v.OpenDiskImage(src) + if err != nil { + return err + } + defer img.Close() + + diskSize := img.Size() + + outPath := dstArg + if strings.TrimSpace(outPath) == "" { + outPath = path.Base(src) + } + if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil { + return err + } + + out, err := os.OpenFile(outPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) + if err != nil { + return err + } + + h := sha256.New() + var writer io.Writer = out + if strings.TrimSpace(sha256Expected) != "" { + writer = io.MultiWriter(out, h) + } + + written, copyErr := io.CopyBuffer(writer, img, make([]byte, 1<<20)) + out.Close() + if copyErr != nil { + os.Remove(outPath) + return copyErr + } + + if uint64(written) != diskSize { + os.Remove(outPath) + return fmt.Errorf("extraction incomplete: wrote %d of %d bytes", written, diskSize) + } + + var actualSum string + if strings.TrimSpace(sha256Expected) != "" { + actualSum = hex.EncodeToString(h.Sum(nil)) + expected := strings.ToLower(strings.TrimSpace(sha256Expected)) + if actualSum != expected { + os.Remove(outPath) + return fmt.Errorf("sha256 mismatch: expected %s got %s", expected, actualSum) + } + } + + res := getDiskResult{ + SourcePath: src, + OutputPath: outPath, + BytesWritten: written, + DiskSize: diskSize, + SHA256: actualSum, + } + + if opts.json { + return printJSON(res) + } + + cmd.Printf("Saved to %s (%d bytes)\n", res.OutputPath, res.BytesWritten) + if res.SHA256 != "" { + cmd.Printf("SHA256 OK (%s)\n", res.SHA256) + } + return nil + }, + } + + cmd.Flags().StringVar(&sha256Expected, "sha256", "", "verify extracted disk SHA-256") + return cmd +} diff --git a/cmd/root.go b/cmd/root.go index 48dc7fc..a02bcfd 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -33,6 +33,7 @@ func NewRootCmd() *cobra.Command { root.AddCommand(newLsCmd(opts)) root.AddCommand(newCatCmd(opts)) root.AddCommand(newGetCmd(opts)) + root.AddCommand(newGetDiskCmd(opts)) root.AddCommand(newFindCmd(opts)) root.AddCommand(newStatCmd(opts)) root.AddCommand(newTreeCmd(opts)) diff --git a/go.mod b/go.mod index db637a6..ad9c2a2 100644 --- a/go.mod +++ b/go.mod @@ -29,3 +29,5 @@ require ( golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect www.velocidex.com/golang/go-ntfs v0.2.0 // indirect ) + +replace github.com/GoToolSharing/vbktoolkit => github.com/borje/vbktoolkit v0.0.0-20260504111425-5840d2998ec8 diff --git a/go.sum b/go.sum index 8817301..9441ad7 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,3 @@ -github.com/GoToolSharing/vbktoolkit v0.0.0-20260416200859-9cf81749bcc1 h1:mWyY3mBDhzRATsRsdmJoun4NFBRWNSZhVjevmkSA0rY= -github.com/GoToolSharing/vbktoolkit v0.0.0-20260416200859-9cf81749bcc1/go.mod h1:7sFiWmYvn8X1F8HAK//sirX1q9rFdFYUQLyf3d1FbGg= -github.com/GoToolSharing/vbktoolkit v0.0.0-20260417173155-98770b9f5306 h1:/phvmjjDMafJgKDrjto70hwXKu7EJ841Q/NQ+9DKgYQ= -github.com/GoToolSharing/vbktoolkit v0.0.0-20260417173155-98770b9f5306/go.mod h1:7sFiWmYvn8X1F8HAK//sirX1q9rFdFYUQLyf3d1FbGg= -github.com/GoToolSharing/vbktoolkit v0.0.0-20260420090906-925abf074c28 h1:WKSyZ+u6FlNRvCO3vaLdm81/b8SVhuIGo1v5RNtswgc= -github.com/GoToolSharing/vbktoolkit v0.0.0-20260420090906-925abf074c28/go.mod h1:7sFiWmYvn8X1F8HAK//sirX1q9rFdFYUQLyf3d1FbGg= github.com/Velocidex/go-ext4 v0.0.0-20250510085914-b0b955af0359 h1:IPYiFWk8IlTOQQ/5ns69J4uzFCXXDoAEUtShTzGRoys= github.com/Velocidex/go-ext4 v0.0.0-20250510085914-b0b955af0359/go.mod h1:Sbqqh1t+nYXmNWw0dZC8LOIxP7z5Wg94SP+4Ej1QZqg= github.com/Velocidex/go-vhdx v0.0.0-20250511013458-5cba970cdeda h1:Q0pdD/aAUCmTv3ndK+m0yxQU1WdBLKEGPHEs68+w+8I= @@ -22,6 +16,8 @@ github.com/alecthomas/repr v0.1.1 h1:87P60cSmareLAxMc4Hro0r2RBY4ROm0dYwkJNpS4pPs github.com/alecthomas/repr v0.1.1/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/borje/vbktoolkit v0.0.0-20260504111425-5840d2998ec8 h1:68A6o0Hr0xrgnqWRH0N7nV/r3wlyKrKU6TbargjQf/s= +github.com/borje/vbktoolkit v0.0.0-20260504111425-5840d2998ec8/go.mod h1:7sFiWmYvn8X1F8HAK//sirX1q9rFdFYUQLyf3d1FbGg= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/internal/vbkshell/path.go b/internal/vbkshell/path.go index dd19bb5..4b23eaf 100644 --- a/internal/vbkshell/path.go +++ b/internal/vbkshell/path.go @@ -6,7 +6,7 @@ import ( "strings" ) -func normalizePath(p, cwd string) string { +func NormalizePath(p, cwd string) string { p = strings.ReplaceAll(strings.TrimSpace(p), "\\", "/") if p == "" { if cwd == "" { diff --git a/internal/vbkshell/path_test.go b/internal/vbkshell/path_test.go index 54a15db..bffc8c2 100644 --- a/internal/vbkshell/path_test.go +++ b/internal/vbkshell/path_test.go @@ -16,9 +16,9 @@ func TestNormalizePath(t *testing.T) { } for _, tt := range tests { - got := normalizePath(tt.in, tt.cwd) + got := NormalizePath(tt.in, tt.cwd) if got != tt.want { - t.Fatalf("normalizePath(%q,%q)=%q, want %q", tt.in, tt.cwd, got, tt.want) + t.Fatalf("NormalizePath(%q,%q)=%q, want %q", tt.in, tt.cwd, got, tt.want) } } } diff --git a/internal/vbkshell/shell.go b/internal/vbkshell/shell.go index cb6482e..ec256a9 100644 --- a/internal/vbkshell/shell.go +++ b/internal/vbkshell/shell.go @@ -264,7 +264,7 @@ func isPromptInterrupted(err error) bool { } func (s *Shell) resolve(p string) string { - return normalizePath(p, s.cwd) + return NormalizePath(p, s.cwd) } func (s *Shell) cmdVolumes() { @@ -550,7 +550,7 @@ func (s *Shell) walk(root string, fn func(p string, item *vbk.DirItem) error) er return nil } - return walkRec(normalizePath(root, "/"), start) + return walkRec(NormalizePath(root, "/"), start) } func (s *Shell) prompt() string { From 663b458941fb1b1e797783aa46efc51493ba9460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=B6rje=20Granberg?= Date: Tue, 5 May 2026 07:48:45 +0200 Subject: [PATCH 2/2] doc: document get-disk command in README Co-Authored-By: Claude Sonnet 4.6 --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 7b65330..789d076 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ It is built on top of [`vbktoolkit`](https://github.com/GoToolSharing/vbktoolkit - Extract files (`get`) - Find files by name (`find`) - Search text in files (`grep`) +- Extract virtual disks as flat raw images (`get-disk`) - List embedded virtual disk files (`disks`) - Auto-detect guest partitions from embedded virtual disks (`volumes`, `use`) - Guest filesystem support: `NTFS`, `EXT4`, and `XFS` @@ -71,6 +72,7 @@ go run . ls --vbk /path/to/backup.vbk / - `stat [path]` show metadata (`--props`) - `tree [path]` print directory tree (`--depth`) - `grep [start]` search text (`-i`, `--max-bytes`) +- `get-disk [dst]` extract virtual disk as flat raw image (`--sha256`) - `disks` list `.vhd`/`.vhdx`/`.vmdk` entries - `volumes` list detected guest volumes (or fallback root volume) @@ -114,6 +116,19 @@ vbkview get --vbk /path/to/backup.vbk --sha256 /path/in/vbk/fi vbkview get --vbk /path/to/backup.vbk --json /path/in/vbk/file.bin ./file.bin ``` +### Virtual disk extraction + +```bash +# Extract a VMDK/VHD/VHDX as a flat raw image (multi-extent VMDKs reassembled automatically) +vbkview get-disk --vbk /path/to/backup.vbk /vm/disk.vmdk ./disk.img + +# Extract with SHA-256 verification +vbkview get-disk --vbk /path/to/backup.vbk --sha256 /vm/disk.vmdk ./disk.img + +# JSON output +vbkview get-disk --vbk /path/to/backup.vbk --json /vm/disk.vmdk ./disk.img +``` + ### Interactive shell ```text